1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.pocketword;
25 
26 import org.w3c.dom.Node;
27 import org.w3c.dom.NodeList;
28 import org.w3c.dom.NamedNodeMap;
29 
30 import org.openoffice.xmerge.ConvertData;
31 import org.openoffice.xmerge.ConvertException;
32 import org.openoffice.xmerge.Document;
33 import org.openoffice.xmerge.DocumentSerializer;
34 
35 import org.openoffice.xmerge.converter.xml.OfficeConstants;
36 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
37 
38 import org.openoffice.xmerge.converter.xml.ParaStyle;
39 import org.openoffice.xmerge.converter.xml.TextStyle;
40 import org.openoffice.xmerge.converter.xml.StyleCatalog;
41 
42 import java.io.IOException;
43 
44 
45 /**
46  * <p>Pocket Word implementation of <code>DocumentDeserializer</code>
47  * for use by {@link
48  * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
49  * PluginFactoryImpl}.</p>
50  *
51  * <p>This converts an OpenOffice.org XML Writer files to a Pocket Word file.</p>
52  *
53  * @author      Mark Murnane
54  * @version 1.1
55  */
56 public final class DocumentSerializerImpl
57             implements DocumentSerializer, OfficeConstants {
58 
59     private PocketWordDocument pswDoc;
60     private SxwDocument        sxwDoc;
61 
62     private StyleCatalog styleCat = null;
63 
64     private boolean inList = false;
65 
66 
67     /**
68      *  <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br>
69      *     supplied <code>Document</code></p>
70      *
71      * <p>The supplied document should be an {@link
72      *    org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument}
73      *    object.</p>
74      *
75      *  @param  doc  The <code>Document</code> to convert.
76      */
DocumentSerializerImpl(Document doc)77     public DocumentSerializerImpl(Document doc) {
78         sxwDoc = (SxwDocument)doc;
79         pswDoc = new PocketWordDocument(sxwDoc.getName());
80     }
81 
82 
83     /**
84      *  <p>Convert the data passed into the <code>DocumentSerializerImpl</code>
85      *  constructor into Pocket Word format.</p>
86      *
87      *  <p>This method may or may not be thread-safe.  It is expected
88      *  that the user code does not call this method in more than one
89      *  thread.  And for most cases, this method is only done once.</p>
90      *
91      *  @return  <code>ConvertData</code> object to pass back the
92      *           converted data.
93      *
94      *  @throws  ConvertException  If any conversion error occurs.
95      *  @throws  IOException       If any I/O error occurs.
96      */
serialize()97     public ConvertData serialize() throws IOException, ConvertException {
98         ConvertData cd = new ConvertData();
99 
100         org.w3c.dom.Document doc = sxwDoc.getContentDOM();
101 
102         // Load any style info before traversing the document content tree
103         loadStyles();
104 
105         NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
106 
107         int len = list.getLength();
108         if (len > 0) {
109             Node node = list.item(0);
110             traverseBody(node);
111         }
112 
113         cd.addDocument(pswDoc);
114 
115         return cd;
116     }
117 
118 
119     /*
120      * Handles the loading of defined styles from the style.xml file as well
121      * as automatic styles from the content.xml file.
122      *
123      * Any change to a defined style, such as a short bold section, falls into
124      * the latter category.
125      */
loadStyles()126     private void loadStyles() {
127         org.w3c.dom.Document contentDom = sxwDoc.getContentDOM();
128         org.w3c.dom.Document styleDom   = sxwDoc.getStyleDOM();
129 
130         styleCat = new StyleCatalog(25);
131 
132         NodeList nl = null;
133         String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY,
134                                            PocketWordConstants.PARAGRAPH_STYLE_FAMILY,
135                                            PocketWordConstants.PARAGRAPH_STYLE_FAMILY };
136         Class classes[]   = new Class[] { TextStyle.class,
137                                           ParaStyle.class,
138                                           TextStyle.class };
139 
140         String[] styleTypes = new String[] { TAG_OFFICE_STYLES,
141                                              TAG_OFFICE_AUTOMATIC_STYLES,
142                                              TAG_OFFICE_MASTER_STYLES };
143 
144         /*
145          * Documents converted from PSW -> SXW will not have a style.xml when
146          * being converted back to PSW.  This would occur if a document was
147          * not modified within Writer between conversions.
148          *
149          * Any Writer modifications and saves create the style.xml and other
150          * portions of a complete Writer SXW file.
151          */
152         if (styleDom != null) {
153            // Process the Style XML tree
154            for (int i = 0; i < styleTypes.length; i++ ) {
155                nl = styleDom.getElementsByTagName(styleTypes[i]);
156                if (nl.getLength() != 0) {
157                    styleCat.add(nl.item(0), families, classes, null, false);
158                }
159            }
160         }
161 
162         /*
163          * Process the content XML for any other style info.
164          * Should only be automatic types here.
165          */
166         for (int i = 0; i < styleTypes.length; i++ ) {
167             nl = contentDom.getElementsByTagName(styleTypes[i]);
168             if (nl.getLength() != 0) {
169                 styleCat.add(nl.item(0), families, classes, null, false);
170             }
171         }
172     }
173 
174 
175     /*
176      * Process the office:body tag.
177      */
traverseBody(Node node)178     private void traverseBody(Node node) throws IOException, ConvertException {
179 
180         if (node.hasChildNodes()) {
181             NodeList nList = node.getChildNodes();
182             int len = nList.getLength();
183 
184             for (int i = 0; i < len; i++) {
185                 Node child = nList.item(i);
186 
187                 if (child.getNodeType() == Node.ELEMENT_NODE) {
188                     String nodeName = child.getNodeName();
189 
190                     if (nodeName.equals(TAG_PARAGRAPH)
191                             || nodeName.equals(TAG_HEADING)) {
192                         traverseParagraph(child);
193                     }
194 
195                     if (nodeName.equals(TAG_UNORDERED_LIST) ||
196                         nodeName.equals(TAG_ORDERED_LIST)) {
197                         traverseList(child);
198                     }
199                 }
200             }
201         }
202     }
203 
204 
205     /*
206      * Process a text:p tag
207      */
traverseParagraph(Node node)208     private void traverseParagraph(Node node) throws IOException, ConvertException {
209         String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);
210 
211         ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName,
212                                 PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
213                                 ParaStyle.class);
214         if (pstyle != null) {
215             pstyle = (ParaStyle)pstyle.getResolved();
216         }
217 
218         TextStyle tstyle = (TextStyle)styleCat.lookup(styleName,
219                                 PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
220                                 TextStyle.class);
221         if (pstyle != null) {
222             tstyle = (TextStyle)tstyle.getResolved();
223         }
224 
225         try {
226             pswDoc.addParagraph(pstyle, inList);
227         }
228         catch (Exception e) {
229             throw new ConvertException(
230                                 "Error adding paragraph to PocketWordDocument.\n"
231                                 + e.toString());
232         }
233 
234         traverseParagraphContents(node, tstyle);
235     }
236 
237 
238     /*
239      * Process the contents of a paragraph.  This method handles situations
240      * where the paragraph contains multiple children, each representing a
241      * differently formatted piece of text.
242      */
traverseParagraphContents(Node node, TextStyle defTextStyle)243     private void traverseParagraphContents (Node node, TextStyle defTextStyle)
244         throws IOException, ConvertException {
245         // First up, get the style of this little bit
246         String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);
247         TextStyle tStyle = (TextStyle)styleCat.lookup(styleName,
248                                 PocketWordConstants.TEXT_STYLE_FAMILY, null,
249                                 TextStyle.class);
250 
251         if (tStyle == null) {
252             tStyle = defTextStyle;
253         }
254 
255         if (node.hasChildNodes()) {
256             NodeList nList = node.getChildNodes();
257             int len = nList.getLength();
258 
259             for (int i = 0; i < len; i++) {
260 
261                 Node child = nList.item(i);
262                 short nodeType = child.getNodeType();
263 
264                 switch (nodeType) {
265                     case Node.TEXT_NODE:
266                         String s = child.getNodeValue();
267                         if (s.length() > 0) {
268                             try {
269                                 pswDoc.addParagraphData(s, tStyle);
270                             }
271                             catch (Exception e) {
272                                 throw new ConvertException(
273                                     "Error adding data to paragraph in " +
274                                     "PocketWordDocument.\n" + e.toString());
275 
276                             }
277                         }
278                         break;
279 
280                     case Node.ELEMENT_NODE:
281                         if (child.getNodeName().equals(TAG_SPACE)) {
282                             StringBuffer sb = new StringBuffer("");
283                             int count = 1;
284 
285                             NamedNodeMap map = child.getAttributes();
286 
287                             if (map.getLength() > 0) {
288                                 Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
289                                 count = Integer.parseInt(attr.getNodeValue().trim());
290                             }
291 
292                             for ( ; count > 0; count--) {
293                                 sb.append(" ");
294                             }
295 
296                             /*
297                              * May want to look at style info for spaces.  Could
298                              * be important when calculating font metrics.
299                              */
300                             try {
301                                 pswDoc.addParagraphData(sb.toString(), tStyle);
302                             }
303                             catch (Exception e) {
304                                 throw new ConvertException(
305                                     "Error adding data to paragraph in " +
306                                     "PocketWordDocument.\n" + e.toString());
307 
308                             }
309                         }
310                         else if (child.getNodeName().equals(TAG_TAB_STOP)) {
311                             try {
312                                 pswDoc.addParagraphData("\t", tStyle);
313                             }
314                             catch (Exception e) {
315                                 throw new ConvertException(
316                                     "Error adding data to paragraph in " +
317                                     "PocketWordDocument.\n" + e.toString());
318 
319                             }
320                         }
321                         else if (child.getNodeName().equals(TAG_LINE_BREAK)) {
322                             /*
323                              * Pocket Word does not support soft line breaks.
324                              * They are just new paragraphs.
325                              */
326                         }
327                         else if (child.getNodeName().equals(TAG_SPAN)) {
328                             /*
329                              * This is where the interesting ones, i.e. format
330                              * changes occur.
331                              */
332                             traverseParagraphContents (child, defTextStyle);
333                         }
334                         else if (child.getNodeName().equals(TAG_HYPERLINK)) {
335                             traverseParagraphContents (child, defTextStyle);
336                         }
337                         else {
338                             // Should maybe have a default in here.
339                         }
340                         break;
341                     default:
342                         // Do nothing
343                 }
344             }
345         }
346         else {
347             /*
348              * If the node has no children, then it is a blank paragraph, but
349              * they still require an entry in the Paragraph class to make sense.
350              */
351             pswDoc.addParagraphData("", tStyle);
352         }
353     }
354 
355 
356     /*
357      * Process a text:ordered-list or text:unordered-list tag.  Pocket Word has
358      * no concept of a list so there is no need to differentiate between the
359      * two.
360      *
361      * Each item on the list contains a text:p node.
362      */
traverseList(Node node)363     private void traverseList (Node node) throws IOException, ConvertException {
364         inList = true;
365 
366         if (node.hasChildNodes()) {
367             NodeList nList = node.getChildNodes();
368             int len = nList.getLength();
369 
370             for (int i = 0; i < len; i++) {
371                 Node child = nList.item(i);
372 
373                 if (child.getNodeType() == Node.ELEMENT_NODE) {
374                     String nodeName = child.getNodeName();
375 
376                     if (nodeName.equals(TAG_LIST_ITEM)) {
377                         traverseListItem(child);
378                     }
379                 }
380             }
381         }
382 
383         inList = false;
384     }
385 
386 
387     /*
388      * Process a text:list-item node.  They usually contain have a single
389      * text:p child but can also have sections or other lists.
390      *
391      * For this case, only paragraphs are supported.
392      */
traverseListItem(Node node)393     private void traverseListItem (Node node) throws IOException, ConvertException {
394         if (node.hasChildNodes()) {
395             NodeList nList = node.getChildNodes();
396             int len = nList.getLength();
397 
398             for (int i = 0; i < len; i++) {
399                 Node child = nList.item(i);
400 
401                 if (child.getNodeType() == Node.ELEMENT_NODE) {
402                     String nodeName = child.getNodeName();
403 
404                     if (nodeName.equals(TAG_PARAGRAPH)) {
405                         traverseParagraph(child);
406                     }
407                 }
408             }
409         }
410 
411     }
412 
413 
414     /*
415      * Utility method to retrieve a Node attribute.
416      */
getAttribute(Node node, String attribute)417     private String getAttribute (Node node, String attribute) {
418         NamedNodeMap attrNodes = node.getAttributes();
419 
420         if (attrNodes != null) {
421             Node attr = attrNodes.getNamedItem(attribute);
422             if (attr != null) {
423                 return attr.getNodeValue();
424             }
425         }
426 
427         return null;
428     }
429 }
430