1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.util;
25 
26 import org.w3c.dom.Document;
27 import org.w3c.dom.Element;
28 import org.w3c.dom.Node;
29 import org.w3c.dom.Text;
30 
31 import org.openoffice.xmerge.converter.xml.OfficeConstants;
32 
33 import java.util.Vector;
34 
35 
36 /**
37  * Class providing utility methods for OpenOffice plugins.
38  *
39  * @author  Mark Murnane
40  * @version 1.1
41  */
42 public class OfficeUtil implements OfficeConstants {
43 
44     /**
45      * <p>Method to replace whitespace character within text with appropriate
46      *    OpenOffice.org XML tags.</p>
47      *
48      * @param   text      The text to parse for whitespace.
49      *
50      * @return  <code>Node</code> array containing OpenOffice XML nodes
51      *          representing the text.
52      */
parseText(String text, Document parentDoc)53     public static Node[] parseText(String text, Document parentDoc) {
54         Vector nodeVec = new Vector();
55 
56         /*
57          * Break up the text from the text run into Open
58          * Office text runs.  There may be more runs in OO because
59          * runs of 2 or more spaces map to nodes.
60          */
61         while ((text.indexOf("  ") != -1) || (text.indexOf("\t") != 1)) {
62 
63             /*
64              * Find the indices of tabs and multiple spaces, and
65              * figure out which of them occurs first in the string.
66              */
67             int spaceIndex = text.indexOf("  ");
68             int tabIndex = text.indexOf("\t");
69             if ((spaceIndex == -1) && (tabIndex == -1))
70                 break;  // DJP This should not be necessary.  What is wrong
71             // with the while() stmt up above?
72             int closerIndex;  // Index of the first of these
73             if (spaceIndex == -1)
74                 closerIndex = tabIndex;
75             else if (tabIndex == -1)
76                 closerIndex = spaceIndex;
77             else
78                 closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;
79 
80             /*
81              * If there is any text prior to the first occurrence of a
82              * tab or spaces, create a text node from it, then chop it
83              * off the string we're working with.
84              */
85             if (closerIndex > 0) {
86                 String beginningText = text.substring(0, closerIndex);
87                 Text textNode = parentDoc.createTextNode(beginningText);
88                 nodeVec.addElement(textNode);
89             }
90             text = text.substring(closerIndex);
91 
92             /*
93              * Handle either tab character or space sequence by creating
94              * an element for it, and then chopping out the text that
95              * represented it in "text".
96              */
97             if (closerIndex == tabIndex) {
98                 Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
99                 nodeVec.add(tabNode);
100                 text = text.substring(1);  // tab is always a single character
101             } else {
102                 // Compute length of space sequence.
103                 int nrSpaces = 2;
104                 while ((nrSpaces < text.length())
105                 && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
106                     nrSpaces++;
107 
108                 Element spaceNode = parentDoc.createElement(TAG_SPACE);
109                 spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
110                                        new Integer(nrSpaces).toString());
111                 nodeVec.add(spaceNode);
112                 text = text.substring(nrSpaces);
113             }
114         }
115 
116         /*
117          * No more tabs or space sequences.  If there's any remaining
118          * text create a text node for it.
119          */
120         if (text.length() > 0) {
121             Text textNode = parentDoc.createTextNode(text);
122             nodeVec.add(textNode);
123         }
124 
125         // Now create and populate an array to return the nodes in.
126         Node nodes[] = new Node[nodeVec.size()];
127         for (int i = 0; i < nodeVec.size(); i++)
128             nodes[i] = (Node)nodeVec.elementAt(i);
129         return nodes;
130     }
131 }