1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.pocketword; 25 26 import org.openoffice.xmerge.ConvertData; 27 import org.openoffice.xmerge.ConvertException; 28 import org.openoffice.xmerge.Document; 29 import org.openoffice.xmerge.DocumentDeserializer; 30 31 import org.openoffice.xmerge.converter.xml.OfficeConstants; 32 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; 33 34 import org.openoffice.xmerge.converter.xml.ParaStyle; 35 import org.openoffice.xmerge.converter.xml.TextStyle; 36 import org.openoffice.xmerge.converter.xml.StyleCatalog; 37 38 import org.openoffice.xmerge.util.OfficeUtil; 39 40 import java.io.IOException; 41 42 import java.util.Enumeration; 43 import java.util.Vector; 44 45 import org.w3c.dom.NodeList; 46 import org.w3c.dom.Node; 47 import org.w3c.dom.Element; 48 49 50 /** 51 * <p>Pocket Word implementation of <code>DocumentDeserializer</code> 52 * for the {@link 53 * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl 54 * PluginFactoryImpl}.</p> 55 * 56 * <p>This converts a Pocket Word file to an OpenOffice.org XML Writer DOM.</p> 57 * 58 * @author Mark Murnane 59 * @version 1.1 60 */ 61 public final class DocumentDeserializerImpl 62 implements DocumentDeserializer, OfficeConstants { 63 64 private PocketWordDocument pswDoc = null; 65 private SxwDocument sxwDoc = null; 66 private String docName; 67 68 private StyleCatalog styleCat = null; 69 70 71 /** 72 * <p>Initialises a new <code>DocumentDeserializerImpl</code> using the 73 * supplied <code>ConvertData</code>.</p> 74 * 75 * <p>The <code>Document</code> objects in the <code>ConvertData</code> 76 * should be {@link 77 * org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument 78 * PocketWordDocument} objects.</p> 79 * 80 * @param cd ConvertData containing a <code>PocketWordDocument</code> 81 * for conversion. 82 */ DocumentDeserializerImpl(ConvertData cd)83 public DocumentDeserializerImpl(ConvertData cd) { 84 Enumeration e = cd.getDocumentEnumeration(); 85 86 // A Pocket Word file is composed of one binary file 87 while (e.hasMoreElements()) { 88 pswDoc = (PocketWordDocument)e.nextElement(); 89 } 90 91 docName = pswDoc.getName(); 92 } 93 94 95 /** 96 * <p>Convert the data passed into the <code>DocumentDeserializer</code> 97 * constructor into the OpenOffice.org XML Writer <code>Document</code> 98 * format.</p> 99 * 100 * <p>This method may or may not be thread-safe. It is expected 101 * that the user code does not call this method in more than one 102 * thread. And for most cases, this method is only done once.</p> 103 * 104 * @return The resulting <code>Document</code> object from conversion. 105 * 106 * @throws ConvertException If any Convert error occurs. 107 * @throws IOException If any I/O error occurs. 108 */ deserialize()109 public Document deserialize() throws IOException, ConvertException { 110 Enumeration pe = pswDoc.getParagraphEnumeration(); 111 112 sxwDoc = new SxwDocument (docName); 113 sxwDoc.initContentDOM(); 114 115 // Default to an initial 5 entries in the catalog. 116 styleCat = new StyleCatalog(5); 117 118 try { 119 buildDocument(pe); 120 } 121 catch (Exception e) { 122 e.printStackTrace(); 123 throw new ConvertException("Error building OpenOffice.org XML Writer DOM: " 124 + e.toString()); 125 126 } 127 128 return sxwDoc; 129 } 130 131 132 /** 133 * This method actually takes care of the conversion. 134 * 135 * @param data An Enumeration of all Paragraphs in the Pocket Word doc. 136 * 137 * @throws IOException If any I/O errors occur. 138 */ buildDocument(Enumeration data)139 private void buildDocument(Enumeration data) throws IOException { 140 141 org.w3c.dom.Document doc = sxwDoc.getContentDOM(); 142 143 /* 144 * There should be only one each of office:body and 145 * office:automatic-styles in each document. 146 */ 147 Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0); 148 149 // Not every document has an automatic style tag 150 Node autoStylesNode = doc.getElementsByTagName( 151 TAG_OFFICE_AUTOMATIC_STYLES).item(0); 152 if (autoStylesNode == null) { 153 autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); 154 doc.insertBefore(autoStylesNode, bodyNode); 155 } 156 157 158 // Needed for naming new styles 159 int paraStyles = 1; 160 int textStyles = 1; 161 162 // Pocket Word has no concept of a list. 163 Element listNode = null; 164 165 166 // Down to business ... 167 while (data.hasMoreElements()) { 168 Paragraph p = (Paragraph)data.nextElement(); 169 Element paraNode = doc.createElement(TAG_PARAGRAPH); 170 171 // Set paragraph style information here 172 ParaStyle pStyle = p.makeStyle(); 173 if (pStyle == null) { 174 paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, 175 PocketWordConstants.DEFAULT_STYLE); 176 } 177 else { 178 // Create paragraph style 179 pStyle.setName(new String("PS" + paraStyles++)); 180 paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); 181 styleCat.add(pStyle); 182 } 183 184 185 /* 186 * For each of the paragraphs, process each segment. 187 * There will always be at least one. 188 */ 189 Enumeration paraData = p.getSegmentsEnumerator(); 190 Vector textSpans = new Vector(0, 1); 191 192 do { 193 ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement(); 194 Element span = doc.createElement(OfficeConstants.TAG_SPAN); 195 196 TextStyle ts = pts.getStyle(); 197 198 if (ts != null) { 199 ts.setName(new String("TS" + textStyles++)); 200 span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName()); 201 styleCat.add(ts); 202 } 203 else { 204 span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, 205 PocketWordConstants.DEFAULT_STYLE); 206 } 207 208 // If this isn't a blank paragraph 209 if (pts.getText() != null && !pts.getText().equals("")) { 210 Node[] children = OfficeUtil.parseText(pts.getText(), doc); 211 212 for (int j = 0; j < children.length; j++) { 213 span.appendChild(children[j]); 214 } 215 } 216 217 textSpans.add(span); 218 219 } while (paraData.hasMoreElements()); 220 221 222 /* 223 * Special case for the first span. If it has no style, then 224 * it shouldn't be a span, so just add its children with style 225 * set as standard. 226 */ 227 Element firstSpan = (Element)textSpans.elementAt(0); 228 String styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME); 229 if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) { 230 NodeList nl = firstSpan.getChildNodes(); 231 int len = nl.getLength(); 232 233 for (int i = 0; i < len; i++) { 234 /* 235 * Always take item 0 as the DOM tree event model will 236 * cause the NodeList to shrink as each Node is reparented. 237 * 238 * By taking the first item from the list, we essentially 239 * traverse the list in order. 240 */ 241 paraNode.appendChild(nl.item(0)); 242 } 243 } 244 else { 245 paraNode.appendChild(firstSpan); 246 } 247 248 // The rest are spans, so just add them 249 for (int i = 1; i < textSpans.size(); i++) { 250 paraNode.appendChild((Node)textSpans.elementAt(i)); 251 } 252 253 254 /* 255 * Pocket Word doesn't support lists, but it does have bulleted 256 * paragraphs that are essentially the same thing. 257 * 258 * Unlike OpenOffice Writer, a blank paragraph can be bulleted 259 * as well. This will be handled by inserting a blank paragraph 260 * into the unordered list, but OpenOffice Writer will not display 261 * an item at that point in the list. 262 */ 263 if (p.isBulleted()) { 264 if (listNode == null) { 265 listNode = doc.createElement(TAG_UNORDERED_LIST); 266 } 267 Element listItem = doc.createElement(TAG_LIST_ITEM); 268 listItem.appendChild(paraNode); 269 listNode.appendChild(listItem); 270 } 271 else { 272 if (listNode != null) { 273 bodyNode.appendChild(listNode); 274 listNode = null; 275 } 276 bodyNode.appendChild(paraNode); 277 } 278 } // End processing paragraphs 279 280 281 // Now write the style catalog to the document 282 NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes(); 283 int nlLen = nl.getLength(); // nl.item reduces the length 284 for (int i = 0; i < nlLen; i++) { 285 autoStylesNode.appendChild(nl.item(0)); 286 } 287 } 288 } 289