1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.pocketword;
25 
26 import org.openoffice.xmerge.ConvertData;
27 import org.openoffice.xmerge.ConvertException;
28 import org.openoffice.xmerge.Document;
29 import org.openoffice.xmerge.DocumentDeserializer;
30 
31 import org.openoffice.xmerge.converter.xml.OfficeConstants;
32 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
33 
34 import org.openoffice.xmerge.converter.xml.ParaStyle;
35 import org.openoffice.xmerge.converter.xml.TextStyle;
36 import org.openoffice.xmerge.converter.xml.StyleCatalog;
37 
38 import org.openoffice.xmerge.util.OfficeUtil;
39 
40 import java.io.IOException;
41 
42 import java.util.Enumeration;
43 import java.util.Vector;
44 
45 import org.w3c.dom.NodeList;
46 import org.w3c.dom.Node;
47 import org.w3c.dom.Element;
48 
49 
50 /**
51  * <p>Pocket Word implementation of <code>DocumentDeserializer</code>
52  * for the {@link
53  * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
54  * PluginFactoryImpl}.</p>
55  *
56  * <p>This converts a Pocket Word file to an OpenOffice.org XML Writer DOM.</p>
57  *
58  * @author  Mark Murnane
59  * @version 1.1
60  */
61 public final class DocumentDeserializerImpl
62             implements DocumentDeserializer, OfficeConstants {
63 
64     private PocketWordDocument pswDoc = null;
65     private SxwDocument sxwDoc = null;
66     private String docName;
67 
68     private StyleCatalog styleCat = null;
69 
70 
71     /**
72      * <p>Initialises a new <code>DocumentDeserializerImpl</code> using the
73      * supplied <code>ConvertData</code>.</p>
74      *
75      * <p>The <code>Document</code> objects in the <code>ConvertData</code>
76      *    should be {@link
77      *    org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument
78      *    PocketWordDocument} objects.</p>
79      *
80      * @param   cd      ConvertData containing a <code>PocketWordDocument</code>
81      *                  for conversion.
82      */
DocumentDeserializerImpl(ConvertData cd)83     public DocumentDeserializerImpl(ConvertData cd) {
84         Enumeration e = cd.getDocumentEnumeration();
85 
86         // A Pocket Word file is composed of one binary file
87         while (e.hasMoreElements()) {
88             pswDoc = (PocketWordDocument)e.nextElement();
89         }
90 
91         docName = pswDoc.getName();
92     }
93 
94 
95     /**
96      *  <p>Convert the data passed into the <code>DocumentDeserializer</code>
97      *  constructor into the OpenOffice.org XML Writer <code>Document</code>
98      *  format.</p>
99      *
100      *  <p>This method may or may not be thread-safe.  It is expected
101      *  that the user code does not call this method in more than one
102      *  thread.  And for most cases, this method is only done once.</p>
103      *
104      *  @return  The resulting <code>Document</code> object from conversion.
105      *
106      *  @throws  ConvertException  If any Convert error occurs.
107      *  @throws  IOException       If any I/O error occurs.
108      */
deserialize()109     public Document deserialize() throws IOException, ConvertException {
110         Enumeration pe = pswDoc.getParagraphEnumeration();
111 
112         sxwDoc = new SxwDocument (docName);
113         sxwDoc.initContentDOM();
114 
115         // Default to an initial 5 entries in the catalog.
116         styleCat = new StyleCatalog(5);
117 
118         try {
119             buildDocument(pe);
120         }
121         catch (Exception e) {
122             e.printStackTrace();
123             throw new ConvertException("Error building OpenOffice.org XML Writer DOM: "
124                                         + e.toString());
125 
126         }
127 
128         return sxwDoc;
129     }
130 
131 
132     /**
133      * This method actually takes care of the conversion.
134      *
135      * @param   data    An Enumeration of all Paragraphs in the Pocket Word doc.
136      *
137      * @throws  IOException     If any I/O errors occur.
138      */
buildDocument(Enumeration data)139     private void buildDocument(Enumeration data) throws IOException {
140 
141         org.w3c.dom.Document doc = sxwDoc.getContentDOM();
142 
143         /*
144          * There should be only one each of office:body and
145          * office:automatic-styles in each document.
146          */
147         Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0);
148 
149         // Not every document has an automatic style tag
150         Node autoStylesNode = doc.getElementsByTagName(
151                                         TAG_OFFICE_AUTOMATIC_STYLES).item(0);
152         if (autoStylesNode == null) {
153             autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
154             doc.insertBefore(autoStylesNode, bodyNode);
155         }
156 
157 
158         // Needed for naming new styles
159         int paraStyles = 1;
160         int textStyles = 1;
161 
162         // Pocket Word has no concept of a list.
163         Element listNode = null;
164 
165 
166         // Down to business ...
167         while (data.hasMoreElements()) {
168             Paragraph p = (Paragraph)data.nextElement();
169             Element paraNode  = doc.createElement(TAG_PARAGRAPH);
170 
171             // Set paragraph style information here
172             ParaStyle pStyle = p.makeStyle();
173             if (pStyle == null) {
174                 paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME,
175                                         PocketWordConstants.DEFAULT_STYLE);
176             }
177             else {
178                 // Create paragraph style
179                 pStyle.setName(new String("PS" + paraStyles++));
180                 paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
181                 styleCat.add(pStyle);
182             }
183 
184 
185             /*
186              * For each of the paragraphs, process each segment.
187              * There will always be at least one.
188              */
189             Enumeration paraData = p.getSegmentsEnumerator();
190             Vector textSpans = new Vector(0, 1);
191 
192             do {
193                 ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement();
194                 Element span = doc.createElement(OfficeConstants.TAG_SPAN);
195 
196                 TextStyle ts = pts.getStyle();
197 
198                 if (ts != null) {
199                     ts.setName(new String("TS" + textStyles++));
200                     span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName());
201                     styleCat.add(ts);
202                 }
203                 else {
204                     span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME,
205                                         PocketWordConstants.DEFAULT_STYLE);
206                 }
207 
208                 // If this isn't a blank paragraph
209                 if (pts.getText() != null && !pts.getText().equals("")) {
210                     Node[] children = OfficeUtil.parseText(pts.getText(), doc);
211 
212                     for (int j = 0; j < children.length; j++) {
213                         span.appendChild(children[j]);
214                     }
215                 }
216 
217                 textSpans.add(span);
218 
219             } while (paraData.hasMoreElements());
220 
221 
222             /*
223              * Special case for the first span.  If it has no style, then
224              * it shouldn't be a span, so just add its children with style
225              * set as standard.
226              */
227             Element firstSpan = (Element)textSpans.elementAt(0);
228             String  styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME);
229             if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) {
230                 NodeList nl = firstSpan.getChildNodes();
231                 int len = nl.getLength();
232 
233                 for (int i = 0; i < len; i++) {
234                     /*
235                      * Always take item 0 as the DOM tree event model will
236                      * cause the NodeList to shrink as each Node is reparented.
237                      *
238                      * By taking the first item from the list, we essentially
239                      * traverse the list in order.
240                      */
241                     paraNode.appendChild(nl.item(0));
242                 }
243             }
244             else {
245                 paraNode.appendChild(firstSpan);
246             }
247 
248             // The rest are spans, so just add them
249             for (int i = 1; i < textSpans.size(); i++) {
250                 paraNode.appendChild((Node)textSpans.elementAt(i));
251             }
252 
253 
254             /*
255              * Pocket Word doesn't support lists, but it does have bulleted
256              * paragraphs that are essentially the same thing.
257              *
258              * Unlike OpenOffice Writer, a blank paragraph can be bulleted
259              * as well.  This will be handled by inserting a blank paragraph
260              * into the unordered list, but OpenOffice Writer will not display
261              * an item at that point in the list.
262              */
263             if (p.isBulleted()) {
264                 if (listNode == null) {
265                     listNode = doc.createElement(TAG_UNORDERED_LIST);
266                 }
267                 Element listItem = doc.createElement(TAG_LIST_ITEM);
268                 listItem.appendChild(paraNode);
269                 listNode.appendChild(listItem);
270             }
271             else {
272                 if (listNode != null) {
273                     bodyNode.appendChild(listNode);
274                     listNode = null;
275                 }
276                 bodyNode.appendChild(paraNode);
277             }
278         } // End processing paragraphs
279 
280 
281         // Now write the style catalog to the document
282         NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes();
283         int nlLen = nl.getLength();     // nl.item reduces the length
284         for (int i = 0; i < nlLen; i++) {
285             autoStylesNode.appendChild(nl.item(0));
286         }
287     }
288 }
289