sxw/pocketword/DocumentSerializerImpl.java

/**************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/


package org.openoffice.xmerge.converter.xml.sxw.pocketword;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.NamedNodeMap;

import org.openoffice.xmerge.ConvertData;
import org.openoffice.xmerge.ConvertException;
import org.openoffice.xmerge.Document;
import org.openoffice.xmerge.DocumentSerializer;

import org.openoffice.xmerge.converter.xml.OfficeConstants;
import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;

import org.openoffice.xmerge.converter.xml.ParaStyle;
import org.openoffice.xmerge.converter.xml.TextStyle;
import org.openoffice.xmerge.converter.xml.StyleCatalog;

import java.io.IOException;


/**
 * <p>Pocket Word implementation of <code>DocumentDeserializer</code>
 * for use by {@link
 * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
 * PluginFactoryImpl}.</p>
 *
 * <p>This converts an OpenOffice.org XML Writer files to a Pocket Word file.</p>
 *
 * @author      Mark Murnane
 * @version 1.1
 */
public final class DocumentSerializerImpl
            implements DocumentSerializer, OfficeConstants {

    private PocketWordDocument pswDoc;
    private SxwDocument        sxwDoc;

    private StyleCatalog styleCat = null;

    private boolean inList = false;


    /**
     *  <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br>
     *     supplied <code>Document</code></p>
     *
     * <p>The supplied document should be an {@link
     *    org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument}
     *    object.</p>
     *
     *  @param  doc  The <code>Document</code> to convert.
     */
    public DocumentSerializerImpl(Document doc) {
        sxwDoc = (SxwDocument)doc;
        pswDoc = new PocketWordDocument(sxwDoc.getName());
    }


    /**
     *  <p>Convert the data passed into the <code>DocumentSerializerImpl</code>
     *  constructor into Pocket Word format.</p>
     *
     *  <p>This method may or may not be thread-safe.  It is expected
     *  that the user code does not call this method in more than one
     *  thread.  And for most cases, this method is only done once.</p>
     *
     *  @return  <code>ConvertData</code> object to pass back the
     *           converted data.
     *
     *  @throws  ConvertException  If any conversion error occurs.
     *  @throws  IOException       If any I/O error occurs.
     */
    public ConvertData serialize() throws IOException, ConvertException {
        ConvertData cd = new ConvertData();

        org.w3c.dom.Document doc = sxwDoc.getContentDOM();

        // Load any style info before traversing the document content tree
        loadStyles();

        NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);

        int len = list.getLength();
        if (len > 0) {
            Node node = list.item(0);
            traverseBody(node);
        }

        cd.addDocument(pswDoc);

        return cd;
    }


    /*
     * Handles the loading of defined styles from the style.xml file as well
     * as automatic styles from the content.xml file.
     *
     * Any change to a defined style, such as a short bold section, falls into
     * the latter category.
     */
    private void loadStyles() {
        org.w3c.dom.Document contentDom = sxwDoc.getContentDOM();
        org.w3c.dom.Document styleDom   = sxwDoc.getStyleDOM();

        styleCat = new StyleCatalog(25);

        NodeList nl = null;
        String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY,
                                           PocketWordConstants.PARAGRAPH_STYLE_FAMILY,
                                           PocketWordConstants.PARAGRAPH_STYLE_FAMILY };
        Class classes[]   = new Class[] { TextStyle.class,
                                          ParaStyle.class,
                                          TextStyle.class };

        String[] styleTypes = new String[] { TAG_OFFICE_STYLES,
                                             TAG_OFFICE_AUTOMATIC_STYLES,
                                             TAG_OFFICE_MASTER_STYLES };

        /*
         * Documents converted from PSW -> SXW will not have a style.xml when
         * being converted back to PSW.  This would occur if a document was
         * not modified within Writer between conversions.
         *
         * Any Writer modifications and saves create the style.xml and other
         * portions of a complete Writer SXW file.
         */
        if (styleDom != null) {
           // Process the Style XML tree
           for (int i = 0; i < styleTypes.length; i++ ) {
               nl = styleDom.getElementsByTagName(styleTypes[i]);
               if (nl.getLength() != 0) {
                   styleCat.add(nl.item(0), families, classes, null, false);
               }
           }
        }

        /*
         * Process the content XML for any other style info.
         * Should only be automatic types here.
         */
        for (int i = 0; i < styleTypes.length; i++ ) {
            nl = contentDom.getElementsByTagName(styleTypes[i]);
            if (nl.getLength() != 0) {
                styleCat.add(nl.item(0), families, classes, null, false);
            }
        }
    }


    /*
     * Process the office:body tag.
     */
    private void traverseBody(Node node) throws IOException, ConvertException {

        if (node.hasChildNodes()) {
            NodeList nList = node.getChildNodes();
            int len = nList.getLength();

            for (int i = 0; i < len; i++) {
                Node child = nList.item(i);

                if (child.getNodeType() == Node.ELEMENT_NODE) {
                    String nodeName = child.getNodeName();

                    if (nodeName.equals(TAG_PARAGRAPH)
                            || nodeName.equals(TAG_HEADING)) {
                        traverseParagraph(child);
                    }

                    if (nodeName.equals(TAG_UNORDERED_LIST) ||
                        nodeName.equals(TAG_ORDERED_LIST)) {
                        traverseList(child);
                    }
                }
            }
        }
    }


    /*
     * Process a text:p tag
     */
    private void traverseParagraph(Node node) throws IOException, ConvertException {
        String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);

        ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName,
                                PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
                                ParaStyle.class);
        if (pstyle != null) {
            pstyle = (ParaStyle)pstyle.getResolved();
        }

        TextStyle tstyle = (TextStyle)styleCat.lookup(styleName,
                                PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
                                TextStyle.class);
        if (pstyle != null) {
            tstyle = (TextStyle)tstyle.getResolved();
        }

        try {
            pswDoc.addParagraph(pstyle, inList);
        }
        catch (Exception e) {
            throw new ConvertException(
                                "Error adding paragraph to PocketWordDocument.\n"
                                + e.toString());
        }

        traverseParagraphContents(node, tstyle);
    }


    /*
     * Process the contents of a paragraph.  This method handles situations
     * where the paragraph contains multiple children, each representing a
     * differently formatted piece of text.
     */
    private void traverseParagraphContents (Node node, TextStyle defTextStyle)
        throws IOException, ConvertException {
        // First up, get the style of this little bit
        String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);
        TextStyle tStyle = (TextStyle)styleCat.lookup(styleName,
                                PocketWordConstants.TEXT_STYLE_FAMILY, null,
                                TextStyle.class);

        if (tStyle == null) {
            tStyle = defTextStyle;
        }

        if (node.hasChildNodes()) {
            NodeList nList = node.getChildNodes();
            int len = nList.getLength();

            for (int i = 0; i < len; i++) {

                Node child = nList.item(i);
                short nodeType = child.getNodeType();

                switch (nodeType) {
                    case Node.TEXT_NODE:
                        String s = child.getNodeValue();
                        if (s.length() > 0) {
                            try {
                                pswDoc.addParagraphData(s, tStyle);
                            }
                            catch (Exception e) {
                                throw new ConvertException(
                                    "Error adding data to paragraph in " +
                                    "PocketWordDocument.\n" + e.toString());

                            }
                        }
                        break;

                    case Node.ELEMENT_NODE:
                        if (child.getNodeName().equals(TAG_SPACE)) {
                            StringBuffer sb = new StringBuffer("");
                            int count = 1;

                            NamedNodeMap map = child.getAttributes();

                            if (map.getLength() > 0) {
                                Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
                                count = Integer.parseInt(attr.getNodeValue().trim());
                            }

                            for ( ; count > 0; count--) {
                                sb.append(" ");
                            }

                            /*
                             * May want to look at style info for spaces.  Could
                             * be important when calculating font metrics.
                             */
                            try {
                                pswDoc.addParagraphData(sb.toString(), tStyle);
                            }
                            catch (Exception e) {
                                throw new ConvertException(
                                    "Error adding data to paragraph in " +
                                    "PocketWordDocument.\n" + e.toString());

                            }
                        }
                        else if (child.getNodeName().equals(TAG_TAB_STOP)) {
                            try {
                                pswDoc.addParagraphData("\t", tStyle);
                            }
                            catch (Exception e) {
                                throw new ConvertException(
                                    "Error adding data to paragraph in " +
                                    "PocketWordDocument.\n" + e.toString());

                            }
                        }
                        else if (child.getNodeName().equals(TAG_LINE_BREAK)) {
                            /*
                             * Pocket Word does not support soft line breaks.
                             * They are just new paragraphs.
                             */
                        }
                        else if (child.getNodeName().equals(TAG_SPAN)) {
                            /*
                             * This is where the interesting ones, i.e. format
                             * changes occur.
                             */
                            traverseParagraphContents (child, defTextStyle);
                        }
                        else if (child.getNodeName().equals(TAG_HYPERLINK)) {
                            traverseParagraphContents (child, defTextStyle);
                        }
                        else {
                            // Should maybe have a default in here.
                        }
                        break;
                    default:
                        // Do nothing
                }
            }
        }
        else {
            /*
             * If the node has no children, then it is a blank paragraph, but
             * they still require an entry in the Paragraph class to make sense.
             */
            pswDoc.addParagraphData("", tStyle);
        }
    }


    /*
     * Process a text:ordered-list or text:unordered-list tag.  Pocket Word has
     * no concept of a list so there is no need to differentiate between the
     * two.
     *
     * Each item on the list contains a text:p node.
     */
    private void traverseList (Node node) throws IOException, ConvertException {
        inList = true;

        if (node.hasChildNodes()) {
            NodeList nList = node.getChildNodes();
            int len = nList.getLength();

            for (int i = 0; i < len; i++) {
                Node child = nList.item(i);

                if (child.getNodeType() == Node.ELEMENT_NODE) {
                    String nodeName = child.getNodeName();

                    if (nodeName.equals(TAG_LIST_ITEM)) {
                        traverseListItem(child);
                    }
                }
            }
        }

        inList = false;
    }


    /*
     * Process a text:list-item node.  They usually contain have a single
     * text:p child but can also have sections or other lists.
     *
     * For this case, only paragraphs are supported.
     */
    private void traverseListItem (Node node) throws IOException, ConvertException {
        if (node.hasChildNodes()) {
            NodeList nList = node.getChildNodes();
            int len = nList.getLength();

            for (int i = 0; i < len; i++) {
                Node child = nList.item(i);

                if (child.getNodeType() == Node.ELEMENT_NODE) {
                    String nodeName = child.getNodeName();

                    if (nodeName.equals(TAG_PARAGRAPH)) {
                        traverseParagraph(child);
                    }
                }
            }
        }

    }


    /*
     * Utility method to retrieve a Node attribute.
     */
    private String getAttribute (Node node, String attribute) {
        NamedNodeMap attrNodes = node.getAttributes();

        if (attrNodes != null) {
            Node attr = attrNodes.getNamedItem(attribute);
            if (attr != null) {
                return attr.getNodeValue();
            }
        }

        return null;
    }
}