1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
25 
26 import org.w3c.dom.NodeList;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.NamedNodeMap;
29 
30 import java.io.IOException;
31 import java.net.URLDecoder;
32 
33 import org.openoffice.xmerge.Document;
34 import org.openoffice.xmerge.ConvertData;
35 import org.openoffice.xmerge.ConvertException;
36 import org.openoffice.xmerge.DocumentSerializer;
37 import org.openoffice.xmerge.converter.xml.OfficeConstants;
38 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
39 import org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocConstants;
40 import org.openoffice.xmerge.converter.palm.PalmDB;
41 import org.openoffice.xmerge.converter.palm.Record;
42 import org.openoffice.xmerge.converter.palm.PalmDocument;
43 import org.openoffice.xmerge.util.Debug;
44 import org.openoffice.xmerge.util.XmlUtil;
45 
46 /**
47  *  <p>AportisDoc implementation of
48  *  org.openoffice.xmerge.DocumentSerializer
49  *  for the {@link
50  *  org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
51  *  PluginFactoryImpl}.</p>
52  *
53  *  <p>The <code>serialize</code> method traverses the DOM
54  *  document from the given <code>Document</code> object.  It uses a
55  *  <code>DocEncoder</code> object for the actual conversion of
56  *  contents to the AportisDoc format.</p>
57  *
58  *  @author      Herbie Ong
59  */
60 
61 
62 public final class DocumentSerializerImpl
63     implements OfficeConstants, DocConstants, DocumentSerializer {
64 
65     /** A <code>DocEncoder</code> object for encoding to AportisDoc. */
66     private DocEncoder encoder = null;
67 
68     /** SXW <code>Document</code> object that this converter processes. */
69     private SxwDocument sxwDoc = null;
70 
71 
72     /**
73      *  Constructor.
74      *
75      *  @param  doc  A SXW <code>Document</code> to be converted.
76      */
DocumentSerializerImpl(Document doc)77     public DocumentSerializerImpl(Document doc) {
78         sxwDoc = (SxwDocument) doc;
79     }
80 
81 
82     /**
83      *  <p>Method to convert a <code>Document</code> into a PDB.
84      *  It passes back the converted data as a <code>ConvertData</code>
85      *  object.</p>
86      *
87      *  <p>This method is not thread safe for performance reasons.
88      *  This method should not be called from within two threads.
89      *  It would be best to call this method only once per object
90      *  instance.</p>
91      *
92      *  @return  The <code>ConvertData</code> object containing the output.
93      *
94      *  @throws  ConvertException  If any conversion error occurs.
95      *  @throws  IOException       If any I/O error occurs.
96      */
serialize()97     public ConvertData serialize() throws ConvertException, IOException {
98 
99 
100         // get the server document name
101 
102         String docName = URLDecoder.decode(sxwDoc.getName(), DocConstants.ENCODING);
103 
104         // get DOM document
105 
106         org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
107 
108         encoder = new DocEncoder();
109 
110         // Traverse to the office:body element.
111         // There should only be one.
112 
113         NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY);
114         int len = list.getLength();
115 
116         if (len > 0) {
117             Node node = list.item(0);
118             traverseBody(node);
119         }
120 
121         // create a ConvertData object.
122         //
123         Record records[] = encoder.getRecords();
124         ConvertData cd = new ConvertData();
125 
126         PalmDocument palmDoc = new PalmDocument(docName,
127             DocConstants.CREATOR_ID, DocConstants.TYPE_ID,
128             0, PalmDB.PDB_HEADER_ATTR_BACKUP, records);
129 
130         cd.addDocument(palmDoc);
131         return cd;
132     }
133 
134 
135     /**
136      *  This method traverses <i>office:body</i> element.
137      *
138      *  @param  node  <i>office:body</i> <code>Node</code>.
139      *
140      *  @throws  IOException  If any I/O error occurs.
141      */
traverseBody(Node node)142     private void traverseBody(Node node) throws IOException {
143 
144         log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
145         log("<AportisDOC>");
146 
147         if (node.hasChildNodes()) {
148 
149             NodeList nodeList = node.getChildNodes();
150             int len = nodeList.getLength();
151 
152             for (int i = 0; i < len; i++) {
153                 Node child = nodeList.item(i);
154 
155                 if (child.getNodeType() == Node.ELEMENT_NODE) {
156                     String nodeName = child.getNodeName();
157 
158                     if (nodeName.equals(TAG_PARAGRAPH) ||
159                         nodeName.equals(TAG_HEADING)) {
160 
161                         traverseParagraph(child);
162 
163                     } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
164 
165                         traverseList(child);
166 
167                     } else if (nodeName.equals(TAG_ORDERED_LIST)) {
168 
169                         traverseList(child);
170 
171                     } else {
172 
173                         log("<OTHERS " + XmlUtil.getNodeInfo(child) + " />");
174                     }
175                 }
176             }
177         }
178 
179         log("</AportisDOC>");
180     }
181 
182 
183     /**
184      *  This method traverses the <i>text:p</i> and <i>text:h</i>
185      *  element <code>Node</code> objects.
186      *
187      *  @param  node  A <i>text:p</i> or <i>text:h</i>
188      *                <code>Node</code>.
189      *
190      *  @throws  IOException  If any I/O error occurs.
191      */
traverseParagraph(Node node)192     private void traverseParagraph(Node node) throws IOException {
193 
194         log("<PARA>");
195         traverseParaContents(node);
196         encoder.addText(EOL_CHAR);
197         log("</PARA>");
198     }
199 
200 
201     /**
202      *  This method traverses a paragraph content.
203      *  It uses the <code>traverseParaElem</code> method to
204      *  traverse into Element <code>Node</code> objects.
205      *
206      *  @param  node  A paragraph or content <code>Node</code>.
207      *
208      *  @throws  IOException  If any I/O error occurs.
209      */
traverseParaContents(Node node)210     private void traverseParaContents(Node node) throws IOException {
211 
212         if (node.hasChildNodes()) {
213 
214             NodeList nodeList = node.getChildNodes();
215             int len = nodeList.getLength();
216 
217             for (int i = 0; i < len; i++) {
218 
219                 Node child = nodeList.item(i);
220                 short nodeType = child.getNodeType();
221 
222                 switch (nodeType) {
223 
224                     case Node.TEXT_NODE:
225                         // this is for grabbing text nodes.
226                         String s = child.getNodeValue();
227 
228                         if (s.length() > 0) {
229                             encoder.addText(s);
230                         }
231 
232                         log("<TEXT>");
233                         log(s);
234                         log("</TEXT>");
235 
236                         break;
237 
238                     case Node.ELEMENT_NODE:
239 
240                         traverseParaElem(child);
241                         break;
242 
243                     case Node.ENTITY_REFERENCE_NODE:
244 
245                         log("<ENTITY_REFERENCE>");
246                         traverseParaContents(child);
247                         log("<ENTITY_REFERENCE/>");
248                         break;
249 
250                     default:
251                         log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />");
252                 }
253             }
254         }
255     }
256 
257 
258     /**
259      *  This method traverses an <code>Element</code> <code>Node</code>
260      *  within a paragraph.
261      *
262      *  @param  node  <code>Element</code> <code>Node</code> within a
263      *                paragraph.
264      *
265      *  @throws  IOException  If any I/O error occurs.
266      */
traverseParaElem(Node node)267     private void traverseParaElem(Node node) throws IOException {
268 
269         String nodeName = node.getNodeName();
270 
271         if (nodeName.equals(TAG_SPACE)) {
272 
273             // this is for text:s tags.
274             NamedNodeMap map = node.getAttributes();
275             Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
276             StringBuffer space = new StringBuffer(SPACE_CHAR);
277             int count = 1;
278 
279             if (attr != null) {
280 
281                 try {
282 
283                     String countStr = attr.getNodeValue();
284                     count = Integer.parseInt(countStr.trim());
285 
286                 } catch (NumberFormatException e) {
287 
288                     // TODO: for now, throw IOException.
289                     // later, perhaps will have to throw
290                     // some other conversion exception instead.
291                     throw new IOException(e.getMessage());
292                 }
293             }
294 
295             for (int j = 0; j < count; j++) {
296 
297                 space.append(SPACE_CHAR);
298             }
299 
300             encoder.addText(space.toString());
301 
302             log("<SPACE count=\"" + count + "\" />");
303 
304         } else if (nodeName.equals(TAG_TAB_STOP)) {
305 
306             // this is for text:tab-stop
307             encoder.addText(TAB_CHAR);
308 
309             log("<TAB/>");
310 
311         } else if (nodeName.equals(TAG_LINE_BREAK)) {
312 
313             // commented out by Csaba: There is no point to convert a linebreak
314             // into a EOL, because it messes up the number of XML nodes and the
315             // merge won't work properly. Other solution would be to implement such
316             // nodemerger, which would be able to merge embedded tags in a paragraph
317 
318             // this is for text:line-break
319             // encoder.addText(EOL_CHAR);
320 
321             log("skipped <LINE-BREAK/>");
322 
323         } else if (nodeName.equals(TAG_SPAN)) {
324 
325             // this is for text:span
326             log("<SPAN>");
327             traverseParaContents(node);
328             log("</SPAN>");
329 
330         } else if (nodeName.equals(TAG_HYPERLINK)) {
331 
332             // this is for text:a
333             log("<HYPERLINK>");
334             traverseParaContents(node);
335             log("<HYPERLINK/>");
336 
337         } else if (nodeName.equals(TAG_BOOKMARK) ||
338                    nodeName.equals(TAG_BOOKMARK_START)) {
339 
340             log("<BOOKMARK/>");
341 
342         } else if (nodeName.equals(TAG_TEXT_VARIABLE_SET)
343                    || nodeName.equals(TAG_TEXT_VARIABLE_GET)
344                    || nodeName.equals(TAG_TEXT_EXPRESSION)
345                    || nodeName.equals(TAG_TEXT_USER_FIELD_GET)
346                    || nodeName.equals(TAG_TEXT_PAGE_VARIABLE_GET)
347                    || nodeName.equals(TAG_TEXT_SEQUENCE)
348                    || nodeName.equals( TAG_TEXT_VARIABLE_INPUT)
349                    || nodeName.equals(TAG_TEXT_TIME)
350                    || nodeName.equals( TAG_TEXT_PAGE_COUNT)
351                    || nodeName.equals(TAG_TEXT_PAGE_NUMBER )
352                    || nodeName.equals(TAG_TEXT_SUBJECT)
353                    || nodeName.equals(TAG_TEXT_TITLE)
354                    || nodeName.equals(TAG_TEXT_CREATION_TIME)
355                    || nodeName.equals(TAG_TEXT_DATE)
356                    || nodeName.equals(TAG_TEXT_TEXT_INPUT)
357                    || nodeName.equals(TAG_TEXT_AUTHOR_INITIALS)) {
358             log("<FIELD>");
359             traverseParaContents(node);
360             log("</FIELD>");
361 
362         }else if (nodeName.startsWith(TAG_TEXT)) {
363             log("<Unknown text Field>");
364             traverseParaContents(node);
365             log("</Unknown text Field>");
366 
367         }else {
368 
369             log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />");
370         }
371     }
372 
373 
374     /**
375      *  This method traverses list tags <i>text:unordered-list</i> and
376      *  <i>text:ordered-list</i>.  A list can only contain one optional
377      *  <i>text:list-header</i> and one or more <i>text:list-item</i>
378      *  elements.
379      *
380      *  @param  node  A list <code>Node</code>.
381      *
382      *  @throws  IOException  If any I/O error occurs.
383      */
traverseList(Node node)384     private void traverseList(Node node) throws IOException {
385 
386         log("<LIST>");
387 
388         if (node.hasChildNodes()) {
389 
390             NodeList nodeList = node.getChildNodes();
391             int len = nodeList.getLength();
392 
393             for (int i = 0; i < len; i++) {
394 
395                 Node child = nodeList.item(i);
396 
397                 if (child.getNodeType() == Node.ELEMENT_NODE) {
398 
399                     String nodeName = child.getNodeName();
400 
401                     if (nodeName.equals(TAG_LIST_ITEM)) {
402 
403                         traverseListItem(child);
404 
405                     } else if (nodeName.equals(TAG_LIST_HEADER)) {
406 
407                         traverseListHeader(child);
408 
409                     } else {
410 
411                         log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
412                     }
413                 }
414             }
415         }
416 
417         log("</LIST>");
418     }
419 
420 
421     /**
422      *  This method traverses a <i>text:list-header</i> element.
423      *  It contains one or more <i>text:p</i> elements.
424      *
425      *  @param  node  A list header <code>Node</code>.
426      *
427      *  @throws  IOException  If any I/O error occurs.
428      */
traverseListHeader(Node node)429     private void traverseListHeader(Node node) throws IOException {
430 
431         log("<LIST-HEADER>");
432 
433         if (node.hasChildNodes()) {
434 
435             NodeList nodeList = node.getChildNodes();
436             int len = nodeList.getLength();
437 
438             for (int i = 0; i < len; i++) {
439 
440                 Node child = nodeList.item(i);
441 
442                 if (child.getNodeType() == Node.ELEMENT_NODE) {
443 
444                     String nodeName = child.getNodeName();
445 
446                     if (nodeName.equals(TAG_PARAGRAPH)) {
447 
448                         traverseParagraph(child);
449 
450                     } else {
451 
452                         log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
453                     }
454                 }
455             }
456         }
457 
458         log("</LIST-HEADER>");
459     }
460 
461 
462     /**
463      *  <p>This method will traverse a <i>text:list-item</i>.
464      *  A list item may contain one or more of <i>text:p</i>,
465      *  <i>text:h</i>, <i>text:section</i>, <i>text:ordered-list</i>
466      *  and <i>text:unordered-list</i>.</p>
467      *
468      *  <p>This method currently only implements grabbing <i>text:p</i>,
469      *  <i>text:h</i>, <i>text:unordered-list</i> and
470      *  <i>text:ordered-list</i>.</p>
471      *
472      *  @param  node  The <code>Node</code>.
473      *
474      *  @throws  IOException  If any I/O error occurs.
475      */
traverseListItem(Node node)476     private void traverseListItem(Node node) throws IOException {
477 
478         log("<LIST-ITEM>");
479 
480         if (node.hasChildNodes()) {
481 
482             NodeList nodeList = node.getChildNodes();
483             int len = nodeList.getLength();
484 
485             for (int i = 0; i < len; i++) {
486 
487                 Node child = nodeList.item(i);
488 
489                 if (child.getNodeType() == Node.ELEMENT_NODE) {
490 
491                     String nodeName = child.getNodeName();
492 
493                     if (nodeName.equals(TAG_PARAGRAPH)) {
494 
495                         traverseParagraph(child);
496 
497                     } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
498 
499                         traverseList(child);
500 
501                     } else if (nodeName.equals(TAG_ORDERED_LIST)) {
502 
503                         traverseList(child);
504 
505                     } else {
506 
507                         log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
508                     }
509                 }
510             }
511         }
512 
513         log("</LIST-ITEM>");
514     }
515 
516 
517     /**
518      *  Logs debug messages.
519      *
520      *  @param  str  The debug message.
521      */
log(String str)522     private void log(String str) {
523 
524         Debug.log(Debug.TRACE, str);
525     }
526 }
527 
528