1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; 25 26 import org.w3c.dom.NodeList; 27 import org.w3c.dom.Node; 28 import org.w3c.dom.NamedNodeMap; 29 30 import java.io.IOException; 31 import java.net.URLDecoder; 32 33 import org.openoffice.xmerge.Document; 34 import org.openoffice.xmerge.ConvertData; 35 import org.openoffice.xmerge.ConvertException; 36 import org.openoffice.xmerge.DocumentSerializer; 37 import org.openoffice.xmerge.converter.xml.OfficeConstants; 38 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; 39 import org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocConstants; 40 import org.openoffice.xmerge.converter.palm.PalmDB; 41 import org.openoffice.xmerge.converter.palm.Record; 42 import org.openoffice.xmerge.converter.palm.PalmDocument; 43 import org.openoffice.xmerge.util.Debug; 44 import org.openoffice.xmerge.util.XmlUtil; 45 46 /** 47 * <p>AportisDoc implementation of 48 * org.openoffice.xmerge.DocumentSerializer 49 * for the {@link 50 * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl 51 * PluginFactoryImpl}.</p> 52 * 53 * <p>The <code>serialize</code> method traverses the DOM 54 * document from the given <code>Document</code> object. It uses a 55 * <code>DocEncoder</code> object for the actual conversion of 56 * contents to the AportisDoc format.</p> 57 * 58 * @author Herbie Ong 59 */ 60 61 62 public final class DocumentSerializerImpl 63 implements OfficeConstants, DocConstants, DocumentSerializer { 64 65 /** A <code>DocEncoder</code> object for encoding to AportisDoc. */ 66 private DocEncoder encoder = null; 67 68 /** SXW <code>Document</code> object that this converter processes. */ 69 private SxwDocument sxwDoc = null; 70 71 72 /** 73 * Constructor. 74 * 75 * @param doc A SXW <code>Document</code> to be converted. 76 */ DocumentSerializerImpl(Document doc)77 public DocumentSerializerImpl(Document doc) { 78 sxwDoc = (SxwDocument) doc; 79 } 80 81 82 /** 83 * <p>Method to convert a <code>Document</code> into a PDB. 84 * It passes back the converted data as a <code>ConvertData</code> 85 * object.</p> 86 * 87 * <p>This method is not thread safe for performance reasons. 88 * This method should not be called from within two threads. 89 * It would be best to call this method only once per object 90 * instance.</p> 91 * 92 * @return The <code>ConvertData</code> object containing the output. 93 * 94 * @throws ConvertException If any conversion error occurs. 95 * @throws IOException If any I/O error occurs. 96 */ serialize()97 public ConvertData serialize() throws ConvertException, IOException { 98 99 100 // get the server document name 101 102 String docName = URLDecoder.decode(sxwDoc.getName(), DocConstants.ENCODING); 103 104 // get DOM document 105 106 org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); 107 108 encoder = new DocEncoder(); 109 110 // Traverse to the office:body element. 111 // There should only be one. 112 113 NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); 114 int len = list.getLength(); 115 116 if (len > 0) { 117 Node node = list.item(0); 118 traverseBody(node); 119 } 120 121 // create a ConvertData object. 122 // 123 Record records[] = encoder.getRecords(); 124 ConvertData cd = new ConvertData(); 125 126 PalmDocument palmDoc = new PalmDocument(docName, 127 DocConstants.CREATOR_ID, DocConstants.TYPE_ID, 128 0, PalmDB.PDB_HEADER_ATTR_BACKUP, records); 129 130 cd.addDocument(palmDoc); 131 return cd; 132 } 133 134 135 /** 136 * This method traverses <i>office:body</i> element. 137 * 138 * @param node <i>office:body</i> <code>Node</code>. 139 * 140 * @throws IOException If any I/O error occurs. 141 */ traverseBody(Node node)142 private void traverseBody(Node node) throws IOException { 143 144 log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 145 log("<AportisDOC>"); 146 147 if (node.hasChildNodes()) { 148 149 NodeList nodeList = node.getChildNodes(); 150 int len = nodeList.getLength(); 151 152 for (int i = 0; i < len; i++) { 153 Node child = nodeList.item(i); 154 155 if (child.getNodeType() == Node.ELEMENT_NODE) { 156 String nodeName = child.getNodeName(); 157 158 if (nodeName.equals(TAG_PARAGRAPH) || 159 nodeName.equals(TAG_HEADING)) { 160 161 traverseParagraph(child); 162 163 } else if (nodeName.equals(TAG_UNORDERED_LIST)) { 164 165 traverseList(child); 166 167 } else if (nodeName.equals(TAG_ORDERED_LIST)) { 168 169 traverseList(child); 170 171 } else { 172 173 log("<OTHERS " + XmlUtil.getNodeInfo(child) + " />"); 174 } 175 } 176 } 177 } 178 179 log("</AportisDOC>"); 180 } 181 182 183 /** 184 * This method traverses the <i>text:p</i> and <i>text:h</i> 185 * element <code>Node</code> objects. 186 * 187 * @param node A <i>text:p</i> or <i>text:h</i> 188 * <code>Node</code>. 189 * 190 * @throws IOException If any I/O error occurs. 191 */ traverseParagraph(Node node)192 private void traverseParagraph(Node node) throws IOException { 193 194 log("<PARA>"); 195 traverseParaContents(node); 196 encoder.addText(EOL_CHAR); 197 log("</PARA>"); 198 } 199 200 201 /** 202 * This method traverses a paragraph content. 203 * It uses the <code>traverseParaElem</code> method to 204 * traverse into Element <code>Node</code> objects. 205 * 206 * @param node A paragraph or content <code>Node</code>. 207 * 208 * @throws IOException If any I/O error occurs. 209 */ traverseParaContents(Node node)210 private void traverseParaContents(Node node) throws IOException { 211 212 if (node.hasChildNodes()) { 213 214 NodeList nodeList = node.getChildNodes(); 215 int len = nodeList.getLength(); 216 217 for (int i = 0; i < len; i++) { 218 219 Node child = nodeList.item(i); 220 short nodeType = child.getNodeType(); 221 222 switch (nodeType) { 223 224 case Node.TEXT_NODE: 225 // this is for grabbing text nodes. 226 String s = child.getNodeValue(); 227 228 if (s.length() > 0) { 229 encoder.addText(s); 230 } 231 232 log("<TEXT>"); 233 log(s); 234 log("</TEXT>"); 235 236 break; 237 238 case Node.ELEMENT_NODE: 239 240 traverseParaElem(child); 241 break; 242 243 case Node.ENTITY_REFERENCE_NODE: 244 245 log("<ENTITY_REFERENCE>"); 246 traverseParaContents(child); 247 log("<ENTITY_REFERENCE/>"); 248 break; 249 250 default: 251 log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); 252 } 253 } 254 } 255 } 256 257 258 /** 259 * This method traverses an <code>Element</code> <code>Node</code> 260 * within a paragraph. 261 * 262 * @param node <code>Element</code> <code>Node</code> within a 263 * paragraph. 264 * 265 * @throws IOException If any I/O error occurs. 266 */ traverseParaElem(Node node)267 private void traverseParaElem(Node node) throws IOException { 268 269 String nodeName = node.getNodeName(); 270 271 if (nodeName.equals(TAG_SPACE)) { 272 273 // this is for text:s tags. 274 NamedNodeMap map = node.getAttributes(); 275 Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); 276 StringBuffer space = new StringBuffer(SPACE_CHAR); 277 int count = 1; 278 279 if (attr != null) { 280 281 try { 282 283 String countStr = attr.getNodeValue(); 284 count = Integer.parseInt(countStr.trim()); 285 286 } catch (NumberFormatException e) { 287 288 // TODO: for now, throw IOException. 289 // later, perhaps will have to throw 290 // some other conversion exception instead. 291 throw new IOException(e.getMessage()); 292 } 293 } 294 295 for (int j = 0; j < count; j++) { 296 297 space.append(SPACE_CHAR); 298 } 299 300 encoder.addText(space.toString()); 301 302 log("<SPACE count=\"" + count + "\" />"); 303 304 } else if (nodeName.equals(TAG_TAB_STOP)) { 305 306 // this is for text:tab-stop 307 encoder.addText(TAB_CHAR); 308 309 log("<TAB/>"); 310 311 } else if (nodeName.equals(TAG_LINE_BREAK)) { 312 313 // commented out by Csaba: There is no point to convert a linebreak 314 // into a EOL, because it messes up the number of XML nodes and the 315 // merge won't work properly. Other solution would be to implement such 316 // nodemerger, which would be able to merge embedded tags in a paragraph 317 318 // this is for text:line-break 319 // encoder.addText(EOL_CHAR); 320 321 log("skipped <LINE-BREAK/>"); 322 323 } else if (nodeName.equals(TAG_SPAN)) { 324 325 // this is for text:span 326 log("<SPAN>"); 327 traverseParaContents(node); 328 log("</SPAN>"); 329 330 } else if (nodeName.equals(TAG_HYPERLINK)) { 331 332 // this is for text:a 333 log("<HYPERLINK>"); 334 traverseParaContents(node); 335 log("<HYPERLINK/>"); 336 337 } else if (nodeName.equals(TAG_BOOKMARK) || 338 nodeName.equals(TAG_BOOKMARK_START)) { 339 340 log("<BOOKMARK/>"); 341 342 } else if (nodeName.equals(TAG_TEXT_VARIABLE_SET) 343 || nodeName.equals(TAG_TEXT_VARIABLE_GET) 344 || nodeName.equals(TAG_TEXT_EXPRESSION) 345 || nodeName.equals(TAG_TEXT_USER_FIELD_GET) 346 || nodeName.equals(TAG_TEXT_PAGE_VARIABLE_GET) 347 || nodeName.equals(TAG_TEXT_SEQUENCE) 348 || nodeName.equals( TAG_TEXT_VARIABLE_INPUT) 349 || nodeName.equals(TAG_TEXT_TIME) 350 || nodeName.equals( TAG_TEXT_PAGE_COUNT) 351 || nodeName.equals(TAG_TEXT_PAGE_NUMBER ) 352 || nodeName.equals(TAG_TEXT_SUBJECT) 353 || nodeName.equals(TAG_TEXT_TITLE) 354 || nodeName.equals(TAG_TEXT_CREATION_TIME) 355 || nodeName.equals(TAG_TEXT_DATE) 356 || nodeName.equals(TAG_TEXT_TEXT_INPUT) 357 || nodeName.equals(TAG_TEXT_AUTHOR_INITIALS)) { 358 log("<FIELD>"); 359 traverseParaContents(node); 360 log("</FIELD>"); 361 362 }else if (nodeName.startsWith(TAG_TEXT)) { 363 log("<Unknown text Field>"); 364 traverseParaContents(node); 365 log("</Unknown text Field>"); 366 367 }else { 368 369 log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); 370 } 371 } 372 373 374 /** 375 * This method traverses list tags <i>text:unordered-list</i> and 376 * <i>text:ordered-list</i>. A list can only contain one optional 377 * <i>text:list-header</i> and one or more <i>text:list-item</i> 378 * elements. 379 * 380 * @param node A list <code>Node</code>. 381 * 382 * @throws IOException If any I/O error occurs. 383 */ traverseList(Node node)384 private void traverseList(Node node) throws IOException { 385 386 log("<LIST>"); 387 388 if (node.hasChildNodes()) { 389 390 NodeList nodeList = node.getChildNodes(); 391 int len = nodeList.getLength(); 392 393 for (int i = 0; i < len; i++) { 394 395 Node child = nodeList.item(i); 396 397 if (child.getNodeType() == Node.ELEMENT_NODE) { 398 399 String nodeName = child.getNodeName(); 400 401 if (nodeName.equals(TAG_LIST_ITEM)) { 402 403 traverseListItem(child); 404 405 } else if (nodeName.equals(TAG_LIST_HEADER)) { 406 407 traverseListHeader(child); 408 409 } else { 410 411 log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); 412 } 413 } 414 } 415 } 416 417 log("</LIST>"); 418 } 419 420 421 /** 422 * This method traverses a <i>text:list-header</i> element. 423 * It contains one or more <i>text:p</i> elements. 424 * 425 * @param node A list header <code>Node</code>. 426 * 427 * @throws IOException If any I/O error occurs. 428 */ traverseListHeader(Node node)429 private void traverseListHeader(Node node) throws IOException { 430 431 log("<LIST-HEADER>"); 432 433 if (node.hasChildNodes()) { 434 435 NodeList nodeList = node.getChildNodes(); 436 int len = nodeList.getLength(); 437 438 for (int i = 0; i < len; i++) { 439 440 Node child = nodeList.item(i); 441 442 if (child.getNodeType() == Node.ELEMENT_NODE) { 443 444 String nodeName = child.getNodeName(); 445 446 if (nodeName.equals(TAG_PARAGRAPH)) { 447 448 traverseParagraph(child); 449 450 } else { 451 452 log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); 453 } 454 } 455 } 456 } 457 458 log("</LIST-HEADER>"); 459 } 460 461 462 /** 463 * <p>This method will traverse a <i>text:list-item</i>. 464 * A list item may contain one or more of <i>text:p</i>, 465 * <i>text:h</i>, <i>text:section</i>, <i>text:ordered-list</i> 466 * and <i>text:unordered-list</i>.</p> 467 * 468 * <p>This method currently only implements grabbing <i>text:p</i>, 469 * <i>text:h</i>, <i>text:unordered-list</i> and 470 * <i>text:ordered-list</i>.</p> 471 * 472 * @param node The <code>Node</code>. 473 * 474 * @throws IOException If any I/O error occurs. 475 */ traverseListItem(Node node)476 private void traverseListItem(Node node) throws IOException { 477 478 log("<LIST-ITEM>"); 479 480 if (node.hasChildNodes()) { 481 482 NodeList nodeList = node.getChildNodes(); 483 int len = nodeList.getLength(); 484 485 for (int i = 0; i < len; i++) { 486 487 Node child = nodeList.item(i); 488 489 if (child.getNodeType() == Node.ELEMENT_NODE) { 490 491 String nodeName = child.getNodeName(); 492 493 if (nodeName.equals(TAG_PARAGRAPH)) { 494 495 traverseParagraph(child); 496 497 } else if (nodeName.equals(TAG_UNORDERED_LIST)) { 498 499 traverseList(child); 500 501 } else if (nodeName.equals(TAG_ORDERED_LIST)) { 502 503 traverseList(child); 504 505 } else { 506 507 log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); 508 } 509 } 510 } 511 } 512 513 log("</LIST-ITEM>"); 514 } 515 516 517 /** 518 * Logs debug messages. 519 * 520 * @param str The debug message. 521 */ log(String str)522 private void log(String str) { 523 524 Debug.log(Debug.TRACE, str); 525 } 526 } 527 528