1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.pocketword; 25 26 import org.w3c.dom.Node; 27 import org.w3c.dom.NodeList; 28 import org.w3c.dom.NamedNodeMap; 29 30 import org.openoffice.xmerge.ConvertData; 31 import org.openoffice.xmerge.ConvertException; 32 import org.openoffice.xmerge.Document; 33 import org.openoffice.xmerge.DocumentSerializer; 34 35 import org.openoffice.xmerge.converter.xml.OfficeConstants; 36 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; 37 38 import org.openoffice.xmerge.converter.xml.ParaStyle; 39 import org.openoffice.xmerge.converter.xml.TextStyle; 40 import org.openoffice.xmerge.converter.xml.StyleCatalog; 41 42 import java.io.IOException; 43 44 45 /** 46 * <p>Pocket Word implementation of <code>DocumentDeserializer</code> 47 * for use by {@link 48 * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl 49 * PluginFactoryImpl}.</p> 50 * 51 * <p>This converts an OpenOffice.org XML Writer files to a Pocket Word file.</p> 52 * 53 * @author Mark Murnane 54 * @version 1.1 55 */ 56 public final class DocumentSerializerImpl 57 implements DocumentSerializer, OfficeConstants { 58 59 private PocketWordDocument pswDoc; 60 private SxwDocument sxwDoc; 61 62 private StyleCatalog styleCat = null; 63 64 private boolean inList = false; 65 66 67 /** 68 * <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br> 69 * supplied <code>Document</code></p> 70 * 71 * <p>The supplied document should be an {@link 72 * org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument} 73 * object.</p> 74 * 75 * @param doc The <code>Document</code> to convert. 76 */ DocumentSerializerImpl(Document doc)77 public DocumentSerializerImpl(Document doc) { 78 sxwDoc = (SxwDocument)doc; 79 pswDoc = new PocketWordDocument(sxwDoc.getName()); 80 } 81 82 83 /** 84 * <p>Convert the data passed into the <code>DocumentSerializerImpl</code> 85 * constructor into Pocket Word format.</p> 86 * 87 * <p>This method may or may not be thread-safe. It is expected 88 * that the user code does not call this method in more than one 89 * thread. And for most cases, this method is only done once.</p> 90 * 91 * @return <code>ConvertData</code> object to pass back the 92 * converted data. 93 * 94 * @throws ConvertException If any conversion error occurs. 95 * @throws IOException If any I/O error occurs. 96 */ serialize()97 public ConvertData serialize() throws IOException, ConvertException { 98 ConvertData cd = new ConvertData(); 99 100 org.w3c.dom.Document doc = sxwDoc.getContentDOM(); 101 102 // Load any style info before traversing the document content tree 103 loadStyles(); 104 105 NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); 106 107 int len = list.getLength(); 108 if (len > 0) { 109 Node node = list.item(0); 110 traverseBody(node); 111 } 112 113 cd.addDocument(pswDoc); 114 115 return cd; 116 } 117 118 119 /* 120 * Handles the loading of defined styles from the style.xml file as well 121 * as automatic styles from the content.xml file. 122 * 123 * Any change to a defined style, such as a short bold section, falls into 124 * the latter category. 125 */ loadStyles()126 private void loadStyles() { 127 org.w3c.dom.Document contentDom = sxwDoc.getContentDOM(); 128 org.w3c.dom.Document styleDom = sxwDoc.getStyleDOM(); 129 130 styleCat = new StyleCatalog(25); 131 132 NodeList nl = null; 133 String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY, 134 PocketWordConstants.PARAGRAPH_STYLE_FAMILY, 135 PocketWordConstants.PARAGRAPH_STYLE_FAMILY }; 136 Class classes[] = new Class[] { TextStyle.class, 137 ParaStyle.class, 138 TextStyle.class }; 139 140 String[] styleTypes = new String[] { TAG_OFFICE_STYLES, 141 TAG_OFFICE_AUTOMATIC_STYLES, 142 TAG_OFFICE_MASTER_STYLES }; 143 144 /* 145 * Documents converted from PSW -> SXW will not have a style.xml when 146 * being converted back to PSW. This would occur if a document was 147 * not modified within Writer between conversions. 148 * 149 * Any Writer modifications and saves create the style.xml and other 150 * portions of a complete Writer SXW file. 151 */ 152 if (styleDom != null) { 153 // Process the Style XML tree 154 for (int i = 0; i < styleTypes.length; i++ ) { 155 nl = styleDom.getElementsByTagName(styleTypes[i]); 156 if (nl.getLength() != 0) { 157 styleCat.add(nl.item(0), families, classes, null, false); 158 } 159 } 160 } 161 162 /* 163 * Process the content XML for any other style info. 164 * Should only be automatic types here. 165 */ 166 for (int i = 0; i < styleTypes.length; i++ ) { 167 nl = contentDom.getElementsByTagName(styleTypes[i]); 168 if (nl.getLength() != 0) { 169 styleCat.add(nl.item(0), families, classes, null, false); 170 } 171 } 172 } 173 174 175 /* 176 * Process the office:body tag. 177 */ traverseBody(Node node)178 private void traverseBody(Node node) throws IOException, ConvertException { 179 180 if (node.hasChildNodes()) { 181 NodeList nList = node.getChildNodes(); 182 int len = nList.getLength(); 183 184 for (int i = 0; i < len; i++) { 185 Node child = nList.item(i); 186 187 if (child.getNodeType() == Node.ELEMENT_NODE) { 188 String nodeName = child.getNodeName(); 189 190 if (nodeName.equals(TAG_PARAGRAPH) 191 || nodeName.equals(TAG_HEADING)) { 192 traverseParagraph(child); 193 } 194 195 if (nodeName.equals(TAG_UNORDERED_LIST) || 196 nodeName.equals(TAG_ORDERED_LIST)) { 197 traverseList(child); 198 } 199 } 200 } 201 } 202 } 203 204 205 /* 206 * Process a text:p tag 207 */ traverseParagraph(Node node)208 private void traverseParagraph(Node node) throws IOException, ConvertException { 209 String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); 210 211 ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, 212 PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, 213 ParaStyle.class); 214 if (pstyle != null) { 215 pstyle = (ParaStyle)pstyle.getResolved(); 216 } 217 218 TextStyle tstyle = (TextStyle)styleCat.lookup(styleName, 219 PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, 220 TextStyle.class); 221 if (pstyle != null) { 222 tstyle = (TextStyle)tstyle.getResolved(); 223 } 224 225 try { 226 pswDoc.addParagraph(pstyle, inList); 227 } 228 catch (Exception e) { 229 throw new ConvertException( 230 "Error adding paragraph to PocketWordDocument.\n" 231 + e.toString()); 232 } 233 234 traverseParagraphContents(node, tstyle); 235 } 236 237 238 /* 239 * Process the contents of a paragraph. This method handles situations 240 * where the paragraph contains multiple children, each representing a 241 * differently formatted piece of text. 242 */ traverseParagraphContents(Node node, TextStyle defTextStyle)243 private void traverseParagraphContents (Node node, TextStyle defTextStyle) 244 throws IOException, ConvertException { 245 // First up, get the style of this little bit 246 String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); 247 TextStyle tStyle = (TextStyle)styleCat.lookup(styleName, 248 PocketWordConstants.TEXT_STYLE_FAMILY, null, 249 TextStyle.class); 250 251 if (tStyle == null) { 252 tStyle = defTextStyle; 253 } 254 255 if (node.hasChildNodes()) { 256 NodeList nList = node.getChildNodes(); 257 int len = nList.getLength(); 258 259 for (int i = 0; i < len; i++) { 260 261 Node child = nList.item(i); 262 short nodeType = child.getNodeType(); 263 264 switch (nodeType) { 265 case Node.TEXT_NODE: 266 String s = child.getNodeValue(); 267 if (s.length() > 0) { 268 try { 269 pswDoc.addParagraphData(s, tStyle); 270 } 271 catch (Exception e) { 272 throw new ConvertException( 273 "Error adding data to paragraph in " + 274 "PocketWordDocument.\n" + e.toString()); 275 276 } 277 } 278 break; 279 280 case Node.ELEMENT_NODE: 281 if (child.getNodeName().equals(TAG_SPACE)) { 282 StringBuffer sb = new StringBuffer(""); 283 int count = 1; 284 285 NamedNodeMap map = child.getAttributes(); 286 287 if (map.getLength() > 0) { 288 Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); 289 count = Integer.parseInt(attr.getNodeValue().trim()); 290 } 291 292 for ( ; count > 0; count--) { 293 sb.append(" "); 294 } 295 296 /* 297 * May want to look at style info for spaces. Could 298 * be important when calculating font metrics. 299 */ 300 try { 301 pswDoc.addParagraphData(sb.toString(), tStyle); 302 } 303 catch (Exception e) { 304 throw new ConvertException( 305 "Error adding data to paragraph in " + 306 "PocketWordDocument.\n" + e.toString()); 307 308 } 309 } 310 else if (child.getNodeName().equals(TAG_TAB_STOP)) { 311 try { 312 pswDoc.addParagraphData("\t", tStyle); 313 } 314 catch (Exception e) { 315 throw new ConvertException( 316 "Error adding data to paragraph in " + 317 "PocketWordDocument.\n" + e.toString()); 318 319 } 320 } 321 else if (child.getNodeName().equals(TAG_LINE_BREAK)) { 322 /* 323 * Pocket Word does not support soft line breaks. 324 * They are just new paragraphs. 325 */ 326 } 327 else if (child.getNodeName().equals(TAG_SPAN)) { 328 /* 329 * This is where the interesting ones, i.e. format 330 * changes occur. 331 */ 332 traverseParagraphContents (child, defTextStyle); 333 } 334 else if (child.getNodeName().equals(TAG_HYPERLINK)) { 335 traverseParagraphContents (child, defTextStyle); 336 } 337 else { 338 // Should maybe have a default in here. 339 } 340 break; 341 default: 342 // Do nothing 343 } 344 } 345 } 346 else { 347 /* 348 * If the node has no children, then it is a blank paragraph, but 349 * they still require an entry in the Paragraph class to make sense. 350 */ 351 pswDoc.addParagraphData("", tStyle); 352 } 353 } 354 355 356 /* 357 * Process a text:ordered-list or text:unordered-list tag. Pocket Word has 358 * no concept of a list so there is no need to differentiate between the 359 * two. 360 * 361 * Each item on the list contains a text:p node. 362 */ traverseList(Node node)363 private void traverseList (Node node) throws IOException, ConvertException { 364 inList = true; 365 366 if (node.hasChildNodes()) { 367 NodeList nList = node.getChildNodes(); 368 int len = nList.getLength(); 369 370 for (int i = 0; i < len; i++) { 371 Node child = nList.item(i); 372 373 if (child.getNodeType() == Node.ELEMENT_NODE) { 374 String nodeName = child.getNodeName(); 375 376 if (nodeName.equals(TAG_LIST_ITEM)) { 377 traverseListItem(child); 378 } 379 } 380 } 381 } 382 383 inList = false; 384 } 385 386 387 /* 388 * Process a text:list-item node. They usually contain have a single 389 * text:p child but can also have sections or other lists. 390 * 391 * For this case, only paragraphs are supported. 392 */ traverseListItem(Node node)393 private void traverseListItem (Node node) throws IOException, ConvertException { 394 if (node.hasChildNodes()) { 395 NodeList nList = node.getChildNodes(); 396 int len = nList.getLength(); 397 398 for (int i = 0; i < len; i++) { 399 Node child = nList.item(i); 400 401 if (child.getNodeType() == Node.ELEMENT_NODE) { 402 String nodeName = child.getNodeName(); 403 404 if (nodeName.equals(TAG_PARAGRAPH)) { 405 traverseParagraph(child); 406 } 407 } 408 } 409 } 410 411 } 412 413 414 /* 415 * Utility method to retrieve a Node attribute. 416 */ getAttribute(Node node, String attribute)417 private String getAttribute (Node node, String attribute) { 418 NamedNodeMap attrNodes = node.getAttributes(); 419 420 if (attrNodes != null) { 421 Node attr = attrNodes.getNamedItem(attribute); 422 if (attr != null) { 423 return attr.getNodeValue(); 424 } 425 } 426 427 return null; 428 } 429 } 430