/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ package org.openoffice.xmerge.converter.xml.sxw.pocketword; import org.openoffice.xmerge.ConvertData; import org.openoffice.xmerge.ConvertException; import org.openoffice.xmerge.Document; import org.openoffice.xmerge.DocumentDeserializer; import org.openoffice.xmerge.converter.xml.OfficeConstants; import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; import org.openoffice.xmerge.converter.xml.ParaStyle; import org.openoffice.xmerge.converter.xml.TextStyle; import org.openoffice.xmerge.converter.xml.StyleCatalog; import org.openoffice.xmerge.util.OfficeUtil; import java.io.IOException; import java.util.Enumeration; import java.util.Vector; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.w3c.dom.Element; /** *

Pocket Word implementation of DocumentDeserializer * for the {@link * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl * PluginFactoryImpl}.

* *

This converts a Pocket Word file to an OpenOffice.org XML Writer DOM.

* * @author Mark Murnane * @version 1.1 */ public final class DocumentDeserializerImpl implements DocumentDeserializer, OfficeConstants { private PocketWordDocument pswDoc = null; private SxwDocument sxwDoc = null; private String docName; private StyleCatalog styleCat = null; /** *

Initialises a new DocumentDeserializerImpl using the * supplied ConvertData.

* *

The Document objects in the ConvertData * should be {@link * org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument * PocketWordDocument} objects.

* * @param cd ConvertData containing a PocketWordDocument * for conversion. */ public DocumentDeserializerImpl(ConvertData cd) { Enumeration e = cd.getDocumentEnumeration(); // A Pocket Word file is composed of one binary file while (e.hasMoreElements()) { pswDoc = (PocketWordDocument)e.nextElement(); } docName = pswDoc.getName(); } /** *

Convert the data passed into the DocumentDeserializer * constructor into the OpenOffice.org XML Writer Document * format.

* *

This method may or may not be thread-safe. It is expected * that the user code does not call this method in more than one * thread. And for most cases, this method is only done once.

* * @return The resulting Document object from conversion. * * @throws ConvertException If any Convert error occurs. * @throws IOException If any I/O error occurs. */ public Document deserialize() throws IOException, ConvertException { Enumeration pe = pswDoc.getParagraphEnumeration(); sxwDoc = new SxwDocument (docName); sxwDoc.initContentDOM(); // Default to an initial 5 entries in the catalog. styleCat = new StyleCatalog(5); try { buildDocument(pe); } catch (Exception e) { e.printStackTrace(); throw new ConvertException("Error building OpenOffice.org XML Writer DOM: " + e.toString()); } return sxwDoc; } /** * This method actually takes care of the conversion. * * @param data An Enumeration of all Paragraphs in the Pocket Word doc. * * @throws IOException If any I/O errors occur. */ private void buildDocument(Enumeration data) throws IOException { org.w3c.dom.Document doc = sxwDoc.getContentDOM(); /* * There should be only one each of office:body and * office:automatic-styles in each document. */ Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0); // Not every document has an automatic style tag Node autoStylesNode = doc.getElementsByTagName( TAG_OFFICE_AUTOMATIC_STYLES).item(0); if (autoStylesNode == null) { autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); doc.insertBefore(autoStylesNode, bodyNode); } // Needed for naming new styles int paraStyles = 1; int textStyles = 1; // Pocket Word has no concept of a list. Element listNode = null; // Down to business ... while (data.hasMoreElements()) { Paragraph p = (Paragraph)data.nextElement(); Element paraNode = doc.createElement(TAG_PARAGRAPH); // Set paragraph style information here ParaStyle pStyle = p.makeStyle(); if (pStyle == null) { paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, PocketWordConstants.DEFAULT_STYLE); } else { // Create paragraph style pStyle.setName(new String("PS" + paraStyles++)); paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); styleCat.add(pStyle); } /* * For each of the paragraphs, process each segment. * There will always be at least one. */ Enumeration paraData = p.getSegmentsEnumerator(); Vector textSpans = new Vector(0, 1); do { ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement(); Element span = doc.createElement(OfficeConstants.TAG_SPAN); TextStyle ts = pts.getStyle(); if (ts != null) { ts.setName(new String("TS" + textStyles++)); span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName()); styleCat.add(ts); } else { span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, PocketWordConstants.DEFAULT_STYLE); } // If this isn't a blank paragraph if (pts.getText() != null && !pts.getText().equals("")) { Node[] children = OfficeUtil.parseText(pts.getText(), doc); for (int j = 0; j < children.length; j++) { span.appendChild(children[j]); } } textSpans.add(span); } while (paraData.hasMoreElements()); /* * Special case for the first span. If it has no style, then * it shouldn't be a span, so just add its children with style * set as standard. */ Element firstSpan = (Element)textSpans.elementAt(0); String styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME); if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) { NodeList nl = firstSpan.getChildNodes(); int len = nl.getLength(); for (int i = 0; i < len; i++) { /* * Always take item 0 as the DOM tree event model will * cause the NodeList to shrink as each Node is reparented. * * By taking the first item from the list, we essentially * traverse the list in order. */ paraNode.appendChild(nl.item(0)); } } else { paraNode.appendChild(firstSpan); } // The rest are spans, so just add them for (int i = 1; i < textSpans.size(); i++) { paraNode.appendChild((Node)textSpans.elementAt(i)); } /* * Pocket Word doesn't support lists, but it does have bulleted * paragraphs that are essentially the same thing. * * Unlike OpenOffice Writer, a blank paragraph can be bulleted * as well. This will be handled by inserting a blank paragraph * into the unordered list, but OpenOffice Writer will not display * an item at that point in the list. */ if (p.isBulleted()) { if (listNode == null) { listNode = doc.createElement(TAG_UNORDERED_LIST); } Element listItem = doc.createElement(TAG_LIST_ITEM); listItem.appendChild(paraNode); listNode.appendChild(listItem); } else { if (listNode != null) { bodyNode.appendChild(listNode); listNode = null; } bodyNode.appendChild(paraNode); } } // End processing paragraphs // Now write the style catalog to the document NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes(); int nlLen = nl.getLength(); // nl.item reduces the length for (int i = 0; i < nlLen; i++) { autoStylesNode.appendChild(nl.item(0)); } } }