1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
25 
26 import org.w3c.dom.NodeList;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.NamedNodeMap;
29 import org.w3c.dom.Element;
30 
31 import java.io.IOException;
32 
33 import org.openoffice.xmerge.Document;
34 import org.openoffice.xmerge.ConvertData;
35 import org.openoffice.xmerge.ConvertException;
36 import org.openoffice.xmerge.DocumentSerializer;
37 import org.openoffice.xmerge.ConverterCapabilities;
38 import org.openoffice.xmerge.converter.xml.OfficeConstants;
39 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
40 import org.openoffice.xmerge.converter.palm.PalmDB;
41 import org.openoffice.xmerge.converter.palm.PdbEncoder;
42 import org.openoffice.xmerge.converter.palm.Record;
43 import org.openoffice.xmerge.converter.palm.PdbUtil;
44 import org.openoffice.xmerge.converter.palm.PalmDocument;
45 import org.openoffice.xmerge.converter.xml.OfficeDocument;
46 import org.openoffice.xmerge.util.*;
47 import org.openoffice.xmerge.converter.xml.*;
48 
49 /**
50  *  <p>WordSmith implementation of
51  *  org.openoffice.xmerge.DocumentSerializer
52  *  for the {@link
53  *  org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
54  *  PluginFactoryImpl}.</p>
55  *
56  *  <p>The <code>serialize</code> method traverses the DOM
57  *  document from the given <code>Document</code> object.  It uses a
58  *  <code>DocEncoder</code> object for the actual conversion of
59  *  contents to the WordSmith format.</p>
60  *
61  *  @author      Herbie Ong, David Proulx
62  */
63 
64 // DJP: take out "implements OfficeConstants"
65 public final class DocumentSerializerImpl
66 implements OfficeConstants, DocumentSerializer {
67 
68     /**  A WSEncoder object for encoding to WordSmith. */
69     private WSEncoder encoder = null;
70 
71     /**  The <code>StyleCatalog</code>. */
72     private StyleCatalog styleCat = null;
73 
74     private WseFontTable fontTable = new WseFontTable();
75     private WseColorTable colorTable = new WseColorTable();
76 
77     /**
78      *  The <code>SxwDocument</code> object that this converter
79      *  processes.
80      */
81     private SxwDocument sxwDoc = null;
82 
83     /**
84      *  Constructor.
85      *
86      *  @param  doc  The <code>Document</code> to convert.
87      */
DocumentSerializerImpl(Document doc)88     public DocumentSerializerImpl(Document doc) {
89         sxwDoc = (SxwDocument) doc;
90     }
91 
92 
93     /**
94      *  <p>Method to convert a <code>Document</code> into a
95      *  <code>PalmDocument</code>.</p>
96      *
97      *  <p>This method is not thread safe for performance reasons.
98      *  This method should not be called from within two threads.
99      *  It would be best to call this method only once per object
100      *  instance.</p>
101      *
102      *  <p>Note that the doc parameter needs to be an XML
103      *  <code>Document</code>, else this method will throw a
104      *  <code>ClassCastException</code>.  I think this is a hack,
105      *  but this is the only way to not modify most of the existing
106      *  code right now.</p>
107      *
108      *  @param  doc  Input should be an XML <code>Document</code>
109      *               object
110      *  @param  os   Output of <code>PalmDB</code> object
111      *
112      *  @throws  ConvertException  If any conversion error occurs.
113      *  @throws  IOException       If any I/O error occurs.
114      */
serialize()115     public ConvertData serialize()
116     throws IOException {
117 
118 
119         // get the server document name
120         String docName = sxwDoc.getName();
121 
122         // get DOM document
123         org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
124 
125         // Create WordSmith encoder object.  Add WordSmith header,
126         // empty font table to it.
127         encoder = new WSEncoder();
128         encoder.addElement(fontTable);
129         encoder.addElement(colorTable);
130 
131         // Read the styles into the style catalog
132         String families[] = new String[3];
133         families[0] = "text";
134         families[1] = "paragraph";
135         families[2] = "paragraph";
136         Class classes[] = new Class[3];
137         classes[0] = TextStyle.class;
138         classes[1] = ParaStyle.class;
139         classes[2] = TextStyle.class;
140         styleCat = new StyleCatalog(25);
141 
142         // Parse the input document
143         // DJP todo: eliminate multiple calls to add() when it can
144         // recurse properly.
145         NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
146         styleCat.add(nl.item(0), families, classes, null, false);
147         nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
148         styleCat.add(nl.item(0), families, classes, null, false);
149         nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
150         styleCat.add(nl.item(0), families, classes, null, false);
151 
152         // Traverse to the office:body element.
153         // There should only be one.
154         NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY);
155         int len = list.getLength();
156         if (len > 0) {
157             Node node = list.item(0);
158             traverseBody(node);
159         }
160 
161         // create a PalmDB object and ConvertData object.
162         //
163         Record records[] = encoder.getRecords();
164 
165         ConvertData cd = new ConvertData();
166         PalmDocument palmDoc = new PalmDocument(docName,
167             PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0,
168             PalmDB.PDB_HEADER_ATTR_BACKUP, records);
169         cd.addDocument(palmDoc);
170         return cd;
171     }
172 
173 
174     /**
175      *  This method traverses <i>office:body</i> element.
176      *
177      *  @param  node  <i>office:body</i> <code>Node</code>.
178      *
179      *  @throws  IOException  If any I/O error occurs.
180      */
traverseBody(Node node)181     private void traverseBody(Node node) throws IOException {
182 
183         if (node.hasChildNodes()) {
184 
185             NodeList nodeList = node.getChildNodes();
186             int len = nodeList.getLength();
187 
188             for (int i = 0; i < len; i++) {
189 
190                 Node child = nodeList.item(i);
191 
192                 if (child.getNodeType() == Node.ELEMENT_NODE) {
193                     String nodeName = child.getNodeName();
194 
195                     if (nodeName.equals(TAG_PARAGRAPH) ||
196                     nodeName.equals(TAG_HEADING)) {
197 
198                         traverseParagraph(child);
199 
200                     } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
201 
202                         traverseList(child);
203 
204                     } else if (nodeName.equals(TAG_ORDERED_LIST)) {
205 
206                         traverseList(child);
207 
208                     } else {
209 
210                         Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
211                     }
212                 }
213             }
214         }
215 
216     }
217 
218 
219     /**
220      *  This method traverses the <i>text:p</i> and <i>text:h</i>
221      *  element <code>Node</code> objects.
222      *
223      *  @param  node  A <i>text:p</i> or <i>text:h</i> <code>Node</code>.
224      *
225      *  @throws  IOException  If any I/O error occurs.
226      */
traverseParagraph(Node node)227     private void traverseParagraph(Node node) throws IOException {
228 
229         String styleName = findAttribute(node, "text:style-name");
230         ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph",
231         null, ParaStyle.class);
232 
233         // If the style does not exist in the style catalog for some reason,
234         // make up a default style and use it.  We'll have to add this default
235         // style to the style catalog the first time it is used.
236         if (pstyle == null) {
237             styleName = "CONVERTER-DEFAULT";
238             pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null,
239                                                 ParaStyle.class);
240             if (pstyle == null) {
241                 pstyle = new ParaStyle(styleName, "paragraph", null,
242                    (String [])null, null, styleCat);
243                 styleCat.add(pstyle);
244                 styleCat.add(new TextStyle(styleName, "paragraph", null,
245                          0, 0, 12, "Times-Roman", styleCat));
246             }
247         }
248 
249         pstyle = (ParaStyle)pstyle.getResolved();
250         encoder.addElement(new WsePara(pstyle, styleCat));
251         TextStyle defParaTextStyle = (TextStyle)
252            styleCat.lookup(styleName, "paragraph", null, TextStyle.class);
253 
254         traverseParaContents(node, defParaTextStyle);
255     }
256 
257 
258     /**
259      *  This method traverses a paragraph content.  Note that this
260      *  method may recurse to call itself.
261      *
262      *  @param  node  A paragraph or content <code>Node</code>
263      */
traverseParaContents(Node node, TextStyle defTextStyle)264     private void traverseParaContents(Node node, TextStyle defTextStyle) {
265 
266         String styleName = findAttribute(node, "text:style-name");
267         TextStyle style = (TextStyle)
268            styleCat.lookup(styleName, "text", null, TextStyle.class);
269 
270         if (node.hasChildNodes()) {
271             NodeList nodeList = node.getChildNodes();
272             int nChildren = nodeList.getLength();
273 
274             for (int i = 0; i < nChildren; i++) {
275                 Node child = nodeList.item(i);
276 
277                 if (child.getNodeType() == Node.TEXT_NODE) {
278 
279                     // this is for grabbing text nodes.
280                     String s = child.getNodeValue();
281 
282                     if (s.length() > 0) {
283                         if (style != null)
284                             encoder.addElement(new WseTextRun(s, style, styleCat,
285                             fontTable, colorTable));
286                         else
287                             encoder.addElement(new WseTextRun(s, defTextStyle,
288                             styleCat, fontTable, colorTable));
289                     }
290 
291                 } else if (child.getNodeType() == Node.ELEMENT_NODE) {
292 
293                     String childNodeName = child.getNodeName();
294 
295                     if (childNodeName.equals(TAG_SPACE)) {
296 
297                         // this is for text:s tags.
298                         NamedNodeMap map = child.getAttributes();
299                         Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
300                         StringBuffer space = new StringBuffer(" ");
301                         int count = 1;
302 
303                         if (attr != null) {
304                             try {
305                                 String countStr = attr.getNodeValue();
306                                 count = Integer.parseInt(countStr.trim());
307                             } catch (NumberFormatException e) {
308                                 Debug.log(Debug.ERROR, "Problem parsing space tag", e);
309                             }
310                         }
311 
312                         for (int j = 1; j < count; j++)
313                             space.append(" ");
314 
315                         encoder.addElement(new WseTextRun(space.toString(),
316                                                        defTextStyle,
317                                                       styleCat, fontTable, colorTable));
318                         Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />");
319 
320                     } else if (childNodeName.equals(TAG_TAB_STOP)) {
321 
322                         // this is for text:tab-stop
323                         encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat,
324                             fontTable, colorTable));
325 
326                         Debug.log(Debug.INFO, "<TAB/>");
327 
328                     } else if (childNodeName.equals(TAG_LINE_BREAK)) {
329 
330                         // this is for text:line-break
331                         encoder.addElement(new WseTextRun("\n", defTextStyle,
332                                             styleCat, fontTable, colorTable));
333 
334                         Debug.log(Debug.INFO, "<LINE-BREAK/>");
335 
336                     } else if (childNodeName.equals(TAG_SPAN)) {
337 
338                         // this is for text:span
339                         Debug.log(Debug.INFO, "<SPAN>");
340                         traverseParaContents(child, defTextStyle);
341                         Debug.log(Debug.INFO, "</SPAN>");
342 
343                     } else if (childNodeName.equals(TAG_HYPERLINK)) {
344 
345                         // this is for text:a
346                         Debug.log(Debug.INFO, "<HYPERLINK>");
347                         traverseParaContents(child, defTextStyle);
348                         Debug.log(Debug.INFO, "<HYPERLINK/>");
349 
350                     } else if (childNodeName.equals(TAG_BOOKMARK) ||
351                     childNodeName.equals(TAG_BOOKMARK_START)) {
352 
353                         Debug.log(Debug.INFO, "<BOOKMARK/>");
354 
355                     } else {
356 
357                         Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
358                     }
359 
360                 }
361 
362             }
363         }
364     }
365 
366 
367     /**
368      *  This method traverses list tags <i>text:unordered-list</i> and
369      *  <i>text:ordered-list</i>.  A list can only contain one optional
370      *  <i>text:list-header</i> and one or more <i>text:list-item</i>
371      *  elements.
372      *
373      *  @param  node  A list <code>Node</code>.
374      *
375      *  @throws  IOException  If any I/O error occurs.
376      */
traverseList(Node node)377     private void traverseList(Node node) throws IOException {
378 
379         Debug.log(Debug.TRACE, "<LIST>");
380 
381         if (node.hasChildNodes()) {
382 
383             NodeList nodeList = node.getChildNodes();
384             int len = nodeList.getLength();
385 
386             for (int i = 0; i < len; i++) {
387 
388                 Node child = nodeList.item(i);
389 
390                 if (child.getNodeType() == Node.ELEMENT_NODE) {
391 
392                     String nodeName = child.getNodeName();
393 
394                     if (nodeName.equals(TAG_LIST_ITEM)) {
395 
396                         traverseListItem(child);
397 
398                     } else if (nodeName.equals(TAG_LIST_HEADER)) {
399 
400                         traverseListHeader(child);
401 
402                     } else {
403 
404                         Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
405                     }
406                 }
407             }
408         }
409 
410         Debug.log(Debug.TRACE, "</LIST>");
411     }
412 
413 
414     /**
415      *  This method traverses a <i>text:list-header</i> element.
416      *  It contains one or more <i>text:p</i> elements.
417      *
418      *  @param  node  A list header <code>Node</code>.
419      *
420      *  @throws  IOException  If any I/O error occurs.
421      */
traverseListHeader(Node node)422     private void traverseListHeader(Node node) throws IOException {
423 
424         Debug.log(Debug.TRACE, "<LIST-HEADER>");
425 
426         if (node.hasChildNodes()) {
427 
428             NodeList nodeList = node.getChildNodes();
429             int len = nodeList.getLength();
430 
431             for (int i = 0; i < len; i++) {
432 
433                 Node child = nodeList.item(i);
434 
435                 if (child.getNodeType() == Node.ELEMENT_NODE) {
436 
437                     String nodeName = child.getNodeName();
438 
439                     if (nodeName.equals(TAG_PARAGRAPH)) {
440 
441                         traverseParagraph(child);
442 
443                     } else {
444 
445                         Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />");
446                     }
447                 }
448             }
449         }
450 
451         Debug.log(Debug.TRACE, "</LIST-HEADER>");
452     }
453 
454 
455     /**
456      *  This method will traverse a <i>text:list-item</i>.
457      *  A list item may contain one or more of <i>text:p</i>,
458      *  <i>text:h</i>, <i>text:section</i>,
459      *  <i>text:ordered-list</i> and <i>text:unordered-list</i>.
460      *
461      *  This method currently only implements grabbing <i>text:p</i>,
462      *  <i>text:h</i>, <i>text:unordered-list</i> and
463      *  <i>text:ordered-list</i>.
464      *
465      *  @param  node  <code>Node</code> to traverse.
466      *
467      *  @throws  IOException  If any I/O error occurs.
468      */
traverseListItem(Node node)469     private void traverseListItem(Node node) throws IOException {
470 
471         Debug.log(Debug.TRACE, "<LIST-ITEM>");
472 
473         if (node.hasChildNodes()) {
474 
475             NodeList nodeList = node.getChildNodes();
476             int len = nodeList.getLength();
477 
478             for (int i = 0; i < len; i++) {
479 
480                 Node child = nodeList.item(i);
481 
482                 if (child.getNodeType() == Node.ELEMENT_NODE) {
483 
484                     String nodeName = child.getNodeName();
485 
486                     if (nodeName.equals(TAG_PARAGRAPH)) {
487 
488                         traverseParagraph(child);
489 
490                     } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
491 
492                         traverseList(child);
493 
494                     } else if (nodeName.equals(TAG_ORDERED_LIST)) {
495 
496                         traverseList(child);
497 
498                     } else {
499 
500                         Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
501                     }
502                 }
503             }
504         }
505 
506         Debug.log(Debug.TRACE, "</LIST-ITEM>");
507     }
508 
509 
510     /**
511      *  Look up a <code>Node</code> object's named attribute and return
512      *  its value
513      *
514      *  @param node  The <code>Node</code>.
515      *  @param name  The attribute name.
516      *
517      *  @return  The value of the named attribute
518      */
findAttribute(Node node, String name)519     private String findAttribute(Node node, String name) {
520         NamedNodeMap attrNodes = node.getAttributes();
521         if (attrNodes != null) {
522             int len = attrNodes.getLength();
523             for (int i = 0; i < len; i++) {
524                 Node attr = attrNodes.item(i);
525                 if (attr.getNodeName().equals(name))
526                     return attr.getNodeValue();
527             }
528         }
529         return null;
530     }
531 }
532 
533