1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.merger.merge; 25 26 import java.util.List; 27 import org.w3c.dom.Node; 28 import org.openoffice.xmerge.merger.Difference; 29 import org.openoffice.xmerge.merger.NodeMergeAlgorithm; 30 import org.openoffice.xmerge.merger.diff.CharacterParser; 31 import org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm; 32 import org.openoffice.xmerge.merger.diff.TextNodeEntry; 33 import org.openoffice.xmerge.util.Debug; 34 35 /** 36 * This is an implementation of the <code>NodeMergeAlgorithm</code> 37 * interface. It is used to merge two paragraph <code>Node</code> 38 * objects based on character comparisons. 39 * 40 * @author smak 41 */ 42 public final class CharacterBaseParagraphMerge 43 implements NodeMergeAlgorithm { 44 45 46 private class cacheCharArray { cacheCharArray(int cacheSize)47 public cacheCharArray(int cacheSize) { 48 } 49 } 50 51 52 /** 53 * Merge two paragraph <code>Node</code> by using Longest Common 54 * Subsequence (LCS) character algorithm defined in {@link 55 * org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm 56 * CharArrayLCSAlgorithm} 57 * 58 * @param orgPara The original paragraph <code>Node</code>. 59 * @param modPara The modified paragraph <code>Node</code>. 60 */ merge(Node orgPara, Node modPara)61 public void merge(Node orgPara, Node modPara) { 62 CharacterParser orgParser = new CharacterParser(orgPara); 63 CharacterParser modParser = new CharacterParser(modPara); 64 65 char[] orgCharArray = orgParser.getCharArray(); 66 char[] modCharArray = modParser.getCharArray(); 67 68 CharArrayLCSAlgorithm diffAlgo = new CharArrayLCSAlgorithm(); 69 70 Difference[] diffResult = diffAlgo.computeDiffs(orgCharArray, 71 modCharArray); 72 // debug use 73 System.out.println("Diff Result: "); 74 for (int i = 0; i < diffResult.length; i++) { 75 Debug.log(Debug.INFO, diffResult[i].debug()); 76 } 77 78 applyDifference(orgParser, modParser, diffResult); 79 } 80 81 applyDifference(CharacterParser orgParser, CharacterParser modParser, Difference[] diffs)82 private void applyDifference(CharacterParser orgParser, 83 CharacterParser modParser, 84 Difference[] diffs) { 85 86 List orgNodeList = orgParser.getNodeList(); 87 List modNodeList = modParser.getNodeList(); 88 int diffCount = 0; 89 int modNodeListCnt = 0; 90 int numNode = orgNodeList.size(); 91 92 for (int i = 0; i < numNode; i++) { 93 94 int extraChar = 0; 95 int orgDiffCount = diffCount; 96 TextNodeEntry orgTextNode = (TextNodeEntry)(orgNodeList.get(i)); 97 98 Debug.log(Debug.INFO, "checking node " + (i + 1) + " of " + numNode); 99 100 // check any difference in this node and estimate the new char num 101 for (; diffCount < diffs.length; diffCount++) { 102 103 Debug.log(Debug.INFO, " checking diff " + (diffCount + 1) + 104 " of " + diffs.length); 105 Debug.log(Debug.INFO, " OrgPosision <" + 106 diffs[diffCount].getOrgPosition() + "> diffCount <" + 107 diffCount + "> orgDiffCount <" + orgDiffCount + ">"); 108 109 // don't need to check and diffs beyond the current node text 110 // range except the last node 111 if (diffs[diffCount].getOrgPosition() > orgTextNode.endChar() && 112 i < numNode - 1) { 113 Debug.log(Debug.INFO, " breaking!"); 114 break; 115 } 116 117 if (diffs[diffCount].getOrgPosition() 118 >= orgTextNode.startChar()) { 119 if (diffs[diffCount].getOperation() == Difference.DELETE) { 120 extraChar--; 121 } else if (diffs[diffCount].getOperation() 122 == Difference.ADD) { 123 extraChar++; 124 } 125 126 } 127 } 128 129 Debug.log(Debug.INFO, " final diffCount <" + diffCount + 130 "> final orgDiffCount <" + orgDiffCount + ">"); 131 132 // will only try to merge if there is a difference in this node 133 if (diffCount > orgDiffCount) { 134 135 Debug.log(Debug.INFO, " There is a difference, doing merge"); 136 Debug.log(Debug.INFO, " TextNode name <" + 137 orgTextNode.node().getNodeName() + ">"); 138 Debug.log(Debug.INFO, " TextNode value <" + 139 orgTextNode.node().getNodeValue() + ">"); 140 Debug.log(Debug.INFO, " TextNode start char <" + 141 orgTextNode.startChar() + "> TextNode end char <" + 142 orgTextNode.endChar() + ">"); 143 Debug.log(Debug.INFO, " extraChar value <" + extraChar + ">"); 144 145 coreMerge(orgDiffCount, diffCount, diffs, orgParser, 146 modParser, orgTextNode, extraChar); 147 } 148 } 149 } 150 coreMerge(int startDiffNum, int endDiffNum, Difference[] diffs, CharacterParser orgParser, CharacterParser modParser, TextNodeEntry orgTextNode, int extraChar)151 private void coreMerge(int startDiffNum, int endDiffNum, Difference[] diffs, 152 CharacterParser orgParser, CharacterParser modParser, 153 TextNodeEntry orgTextNode, int extraChar) { 154 155 Node orgNode = orgTextNode.node(); 156 char[] modTextArray = modParser.getCharArray(); 157 String tmpString; 158 159 // Handle situation where getNodeValue returns null 160 // 161 if (orgNode.getNodeValue() != null) 162 tmpString = orgNode.getNodeValue(); 163 else 164 tmpString = ""; 165 166 char[] orgNodeText = tmpString.toCharArray(); 167 char[] newNodeText; 168 169 if (orgNodeText.length + extraChar > 0) 170 newNodeText = new char[orgNodeText.length + extraChar]; 171 else 172 newNodeText = new char[0]; 173 174 int orgTextPosition = orgTextNode.startChar(); // used for block copy 175 int newTextPosition = 0; // used for block copy 176 int unChangedTextLength = 0; 177 178 char[] cacheCharArray = new char[endDiffNum - startDiffNum]; 179 int cacheLength = 0; 180 int lastDiffOperation = Difference.UNCHANGE; 181 int lastDiffPosition = -1; 182 183 // starting to diff 184 // 185 for (int j = startDiffNum; j < endDiffNum; j++) { 186 187 // copy any contents before the diff 188 // 189 if (diffs[j].getOrgPosition() > orgTextPosition) { 190 // need to flush first 191 if (cacheLength > 0) { 192 System.arraycopy(cacheCharArray, 0, 193 newNodeText, newTextPosition, cacheLength); 194 newTextPosition += cacheLength; 195 196 // reset the markers 197 lastDiffPosition = -1; 198 lastDiffOperation = Difference.UNCHANGE; 199 cacheLength = 0; 200 } 201 202 // find out the length how many characters are 203 // untouched by the diff 204 unChangedTextLength = diffs[j].getOrgPosition() - 205 orgTextPosition; 206 System.arraycopy(orgNodeText, 207 orgTextPosition - orgTextNode.startChar(), 208 newNodeText, newTextPosition, 209 unChangedTextLength); 210 orgTextPosition += unChangedTextLength; 211 newTextPosition += unChangedTextLength; 212 } 213 214 // for any deleted characters, just skip without copy 215 // but still need to take care the cached characters 216 // 217 if (diffs[j].getOperation() == Difference.DELETE) { 218 orgTextPosition++; 219 220 // flush out the cache and copy the content to new Text 221 if (cacheLength > 0) { 222 System.arraycopy(cacheCharArray, 0, 223 newNodeText, newTextPosition, cacheLength); 224 newTextPosition += cacheLength; 225 226 // reset the markers 227 lastDiffPosition = -1; 228 lastDiffOperation = Difference.UNCHANGE; 229 cacheLength = 0; 230 } 231 232 continue; 233 234 235 // check whether we should flush the cache. 236 // For changed diffs, only continuous changes can be cached 237 // For Add diffs, only same insertion point can be cached 238 // and for both changed/add diffs, need to have same operation 239 // as last cached diffs. 240 241 } else { 242 if (lastDiffOperation != diffs[j].getOperation() || 243 (diffs[j].getOperation() == Difference.CHANGE && 244 diffs[j].getOrgPosition() != lastDiffPosition + 1) || 245 (diffs[j].getOperation() == Difference.ADD && 246 diffs[j].getOrgPosition() != lastDiffPosition)) { 247 248 // flush the cache 249 if (cacheLength > 0) { 250 System.arraycopy(cacheCharArray, 0, newNodeText, 251 newTextPosition, cacheLength); 252 newTextPosition += cacheLength; 253 254 // reset the markers 255 lastDiffPosition = -1; 256 lastDiffOperation = Difference.UNCHANGE; 257 cacheLength = 0; 258 } 259 } 260 261 // add the diffs to the cache, now the diffs will be either 262 // a new 'changed' char or is an adjacent following change of 263 // last difference 264 cacheCharArray[cacheLength] = 265 modTextArray[diffs[j].getModPosition()]; 266 cacheLength++; 267 lastDiffOperation = diffs[j].getOperation(); 268 lastDiffPosition = diffs[j].getOrgPosition(); 269 270 // need to increment the original text position 271 // after we cached it 272 if (lastDiffOperation == Difference.CHANGE) { 273 orgTextPosition++; 274 } 275 } 276 } 277 278 // flush any contents remaining in the cache 279 if (cacheLength > 0) { 280 System.arraycopy(cacheCharArray, 0, newNodeText, 281 newTextPosition, cacheLength); 282 newTextPosition += cacheLength; 283 // no need to reset any cache-related info as this is a last flush 284 } 285 286 // copy any contents after all the diffs 287 int orgStartPosition = orgTextNode.startChar(); 288 if (orgNodeText.length + orgStartPosition > orgTextPosition) { 289 unChangedTextLength = orgNodeText.length + orgStartPosition 290 - orgTextPosition; 291 System.arraycopy(orgNodeText, orgTextPosition - orgStartPosition, 292 newNodeText, newTextPosition, 293 unChangedTextLength); 294 } 295 296 // set the text to the original node if there are any diffs processed. 297 // can't use newNodeText.length to check as even it is empty, we may 298 // process a whole bunch of deletion already (i.e. the whole 299 // orgNodeText deleted). 300 if (endDiffNum > startDiffNum) { 301 String newString = new String(newNodeText); 302 orgNode.setNodeValue(newString); 303 } 304 } 305 } 306 307