1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.merger.merge;
25 
26 import java.util.List;
27 import org.w3c.dom.Node;
28 import org.openoffice.xmerge.merger.Difference;
29 import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
30 import org.openoffice.xmerge.merger.diff.CharacterParser;
31 import org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm;
32 import org.openoffice.xmerge.merger.diff.TextNodeEntry;
33 import org.openoffice.xmerge.util.Debug;
34 
35 /**
36  *  This is an implementation of the <code>NodeMergeAlgorithm</code>
37  *  interface.  It is used to merge two paragraph <code>Node</code>
38  *  objects based on character comparisons.
39  *
40  * @author smak
41  */
42 public final class CharacterBaseParagraphMerge
43                    implements NodeMergeAlgorithm {
44 
45 
46     private class cacheCharArray {
cacheCharArray(int cacheSize)47         public  cacheCharArray(int cacheSize) {
48         }
49     }
50 
51 
52     /**
53      *  Merge two paragraph <code>Node</code> by using Longest Common
54      *  Subsequence (LCS) character algorithm defined in {@link
55      *  org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm
56      *  CharArrayLCSAlgorithm}
57      *
58      *  @param  orgPara  The original paragraph <code>Node</code>.
59      *  @param  modPara  The modified paragraph <code>Node</code>.
60      */
merge(Node orgPara, Node modPara)61     public void merge(Node orgPara, Node modPara) {
62         CharacterParser orgParser = new CharacterParser(orgPara);
63         CharacterParser modParser = new CharacterParser(modPara);
64 
65         char[] orgCharArray = orgParser.getCharArray();
66         char[] modCharArray = modParser.getCharArray();
67 
68         CharArrayLCSAlgorithm diffAlgo = new CharArrayLCSAlgorithm();
69 
70         Difference[] diffResult = diffAlgo.computeDiffs(orgCharArray,
71                                                         modCharArray);
72         // debug use
73         System.out.println("Diff Result: ");
74         for (int i = 0; i < diffResult.length; i++) {
75             Debug.log(Debug.INFO, diffResult[i].debug());
76         }
77 
78         applyDifference(orgParser, modParser, diffResult);
79     }
80 
81 
applyDifference(CharacterParser orgParser, CharacterParser modParser, Difference[] diffs)82     private void applyDifference(CharacterParser orgParser,
83                                  CharacterParser modParser,
84                                  Difference[] diffs) {
85 
86         List orgNodeList = orgParser.getNodeList();
87         List modNodeList = modParser.getNodeList();
88         int diffCount = 0;
89         int modNodeListCnt = 0;
90         int numNode = orgNodeList.size();
91 
92         for (int i = 0; i < numNode; i++) {
93 
94             int extraChar = 0;
95             int orgDiffCount = diffCount;
96             TextNodeEntry orgTextNode = (TextNodeEntry)(orgNodeList.get(i));
97 
98             Debug.log(Debug.INFO, "checking node " + (i + 1) + " of " + numNode);
99 
100             // check any difference in this node and estimate the new char num
101             for (; diffCount < diffs.length; diffCount++) {
102 
103                 Debug.log(Debug.INFO, "  checking diff " + (diffCount + 1) +
104                   " of " + diffs.length);
105                 Debug.log(Debug.INFO, "    OrgPosision <" +
106                   diffs[diffCount].getOrgPosition() + "> diffCount <" +
107                   diffCount + "> orgDiffCount <" + orgDiffCount + ">");
108 
109                 // don't need to check and diffs beyond the current node text
110                 // range except the last node
111                 if (diffs[diffCount].getOrgPosition() > orgTextNode.endChar() &&
112                     i < numNode - 1) {
113                     Debug.log(Debug.INFO, "    breaking!");
114                     break;
115                 }
116 
117                 if (diffs[diffCount].getOrgPosition()
118                                                   >= orgTextNode.startChar()) {
119                     if (diffs[diffCount].getOperation() == Difference.DELETE) {
120                         extraChar--;
121                     } else if (diffs[diffCount].getOperation()
122                                                         == Difference.ADD) {
123                         extraChar++;
124                     }
125 
126                 }
127             }
128 
129             Debug.log(Debug.INFO, "  final diffCount <" + diffCount +
130               "> final orgDiffCount <" + orgDiffCount + ">");
131 
132             // will only try to merge if there is a difference in this node
133             if (diffCount > orgDiffCount) {
134 
135                 Debug.log(Debug.INFO, "  There is a difference, doing merge");
136                 Debug.log(Debug.INFO, "  TextNode name <" +
137                   orgTextNode.node().getNodeName() + ">");
138                 Debug.log(Debug.INFO, "  TextNode value <" +
139                   orgTextNode.node().getNodeValue() + ">");
140                 Debug.log(Debug.INFO, "  TextNode start char <" +
141                   orgTextNode.startChar() + "> TextNode end char <" +
142                   orgTextNode.endChar() + ">");
143                 Debug.log(Debug.INFO, "  extraChar value <" + extraChar + ">");
144 
145                 coreMerge(orgDiffCount, diffCount, diffs, orgParser,
146                           modParser, orgTextNode, extraChar);
147             }
148         }
149     }
150 
coreMerge(int startDiffNum, int endDiffNum, Difference[] diffs, CharacterParser orgParser, CharacterParser modParser, TextNodeEntry orgTextNode, int extraChar)151     private void coreMerge(int startDiffNum, int endDiffNum, Difference[] diffs,
152                         CharacterParser orgParser, CharacterParser modParser,
153                         TextNodeEntry orgTextNode, int extraChar) {
154 
155         Node orgNode = orgTextNode.node();
156         char[] modTextArray = modParser.getCharArray();
157         String tmpString;
158 
159         // Handle situation where getNodeValue returns null
160         //
161         if (orgNode.getNodeValue() != null)
162            tmpString = orgNode.getNodeValue();
163         else
164            tmpString = "";
165 
166         char[] orgNodeText = tmpString.toCharArray();
167         char[] newNodeText;
168 
169         if (orgNodeText.length + extraChar > 0)
170            newNodeText = new char[orgNodeText.length + extraChar];
171         else
172            newNodeText = new char[0];
173 
174         int orgTextPosition = orgTextNode.startChar();   // used for block copy
175         int newTextPosition = 0;                         // used for block copy
176         int unChangedTextLength = 0;
177 
178         char[] cacheCharArray = new char[endDiffNum - startDiffNum];
179         int cacheLength = 0;
180         int lastDiffOperation = Difference.UNCHANGE;
181         int lastDiffPosition = -1;
182 
183         // starting to diff
184         //
185         for (int j = startDiffNum; j < endDiffNum; j++) {
186 
187             // copy any contents before the diff
188             //
189             if (diffs[j].getOrgPosition() > orgTextPosition) {
190                 // need to flush first
191                 if (cacheLength > 0) {
192                     System.arraycopy(cacheCharArray, 0,
193                                      newNodeText, newTextPosition, cacheLength);
194                     newTextPosition += cacheLength;
195 
196                     // reset the markers
197                     lastDiffPosition = -1;
198                     lastDiffOperation = Difference.UNCHANGE;
199                     cacheLength = 0;
200                 }
201 
202                 // find out the length how many characters are
203                 // untouched by the diff
204                 unChangedTextLength = diffs[j].getOrgPosition() -
205                                       orgTextPosition;
206                 System.arraycopy(orgNodeText,
207                                  orgTextPosition - orgTextNode.startChar(),
208                                  newNodeText, newTextPosition,
209                                  unChangedTextLength);
210                 orgTextPosition += unChangedTextLength;
211                 newTextPosition += unChangedTextLength;
212             }
213 
214             // for any deleted characters, just skip without copy
215             // but still need to take care the cached characters
216             //
217             if (diffs[j].getOperation() == Difference.DELETE) {
218                 orgTextPosition++;
219 
220                 // flush out the cache and copy the content to new Text
221                 if (cacheLength > 0) {
222                     System.arraycopy(cacheCharArray, 0,
223                                      newNodeText, newTextPosition, cacheLength);
224                     newTextPosition += cacheLength;
225 
226                     // reset the markers
227                     lastDiffPosition = -1;
228                     lastDiffOperation = Difference.UNCHANGE;
229                     cacheLength = 0;
230                 }
231 
232                 continue;
233 
234 
235             // check whether we should flush the cache.
236             // For changed diffs, only continuous changes can be cached
237             // For Add diffs, only same insertion point can be cached
238             // and for both changed/add diffs, need to have same operation
239             // as last cached diffs.
240 
241             } else {
242                 if (lastDiffOperation != diffs[j].getOperation()  ||
243                     (diffs[j].getOperation() == Difference.CHANGE &&
244                      diffs[j].getOrgPosition() != lastDiffPosition + 1) ||
245                     (diffs[j].getOperation() == Difference.ADD &&
246                      diffs[j].getOrgPosition() != lastDiffPosition)) {
247 
248                     // flush the cache
249                     if (cacheLength > 0) {
250                         System.arraycopy(cacheCharArray, 0, newNodeText,
251                                          newTextPosition, cacheLength);
252                         newTextPosition += cacheLength;
253 
254                         // reset the markers
255                         lastDiffPosition = -1;
256                         lastDiffOperation = Difference.UNCHANGE;
257                         cacheLength = 0;
258                     }
259                 }
260 
261                 // add the diffs to the cache, now the diffs will be either
262                 // a new 'changed' char or is an adjacent following change of
263                 // last difference
264                 cacheCharArray[cacheLength] =
265                                     modTextArray[diffs[j].getModPosition()];
266                 cacheLength++;
267                 lastDiffOperation = diffs[j].getOperation();
268                 lastDiffPosition = diffs[j].getOrgPosition();
269 
270                 // need to increment the original text position
271                 // after we cached it
272                 if (lastDiffOperation == Difference.CHANGE) {
273                     orgTextPosition++;
274                 }
275             }
276         }
277 
278         // flush any contents remaining in the cache
279         if (cacheLength > 0) {
280             System.arraycopy(cacheCharArray, 0, newNodeText,
281                              newTextPosition, cacheLength);
282             newTextPosition += cacheLength;
283             // no need to reset any cache-related info as this is a last flush
284         }
285 
286         // copy any contents after all the diffs
287         int orgStartPosition = orgTextNode.startChar();
288         if (orgNodeText.length + orgStartPosition > orgTextPosition) {
289             unChangedTextLength = orgNodeText.length + orgStartPosition
290                                   - orgTextPosition;
291             System.arraycopy(orgNodeText, orgTextPosition - orgStartPosition,
292                              newNodeText, newTextPosition,
293                              unChangedTextLength);
294         }
295 
296         // set the text to the original node if there are any diffs processed.
297         // can't use newNodeText.length to check as even it is empty, we may
298         // process a whole bunch of deletion already (i.e. the whole
299         // orgNodeText deleted).
300         if (endDiffNum > startDiffNum) {
301             String newString = new String(newNodeText);
302             orgNode.setNodeValue(newString);
303         }
304     }
305 }
306 
307