1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.merger.diff; 25 26 import org.w3c.dom.Node; 27 import org.w3c.dom.Element; 28 29 import java.util.Vector; 30 import org.openoffice.xmerge.merger.DiffAlgorithm; 31 import org.openoffice.xmerge.merger.Difference; 32 import org.openoffice.xmerge.merger.Iterator; 33 import org.openoffice.xmerge.converter.xml.OfficeConstants; 34 35 /** 36 * <p>A very simple and direct difference algorithm for row 37 * <code>Node</code> objects in a spreadsheet. Basically, it will 38 * compare objects in sequence and does not look ahead (unlike LCS).</p> 39 * 40 * <p><ol><li> 41 * If two objects are the same, skip to next one. 42 * </li><li> 43 * Otherwise check whether the row repeated attribute is the same. 44 * </li><li> 45 * If the row repeated attribute is the same, then compare two rows 46 * and mark it as <i>change</i> if those rows are different. 47 * </li><li> 48 * If the row repeated attribute is different, then split the rows and 49 * continue to compare. 50 * </li><li> 51 * If there are more objects in the modseq than the original sequence, 52 * then all of the extra ones in the modified sequence are marked as add. 53 * </li><li> 54 * If there are more objects in the original sequence than the modified 55 * sequence, then all the extra one in the modified sequence are marked 56 * as delete. 57 * </li></ol> 58 * 59 * <p>NOTE: The algorithm will have potential side effect to split rows.</p> 60 * 61 * @author smak 62 */ 63 64 public class IteratorRowCompare implements DiffAlgorithm { 65 66 /** 67 * Compute the differences of the given two sequences. 68 * Refer to the class description. 69 * 70 * Return an array of <code>Difference</code> objects. This method finds 71 * out the difference between two sequences. 72 * 73 * @param orgSeq The original sequence. 74 * @param modSeq The modified (or changed) sequence to 75 * compare against with the origial. 76 * 77 * @return An array of Difference objects. 78 */ computeDiffs(Iterator orgSeq, Iterator modSeq)79 public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) { 80 81 int orgSeqlen = orgSeq.elementCount(); 82 int modSeqlen = modSeq.elementCount(); 83 84 Vector diffVector = new Vector(); 85 86 // i and j are counters to keep track the current position in the 87 // iterator 88 int i = 0; 89 int j = 0; 90 Object orgSeqObject = orgSeq.start(); 91 Object modSeqObject = modSeq.start(); 92 Element orgRow, modRow; 93 boolean different = false; 94 boolean orgSplited = false; 95 boolean modSplited = false; 96 97 while (orgSeqObject != null) { 98 99 different = true; 100 101 if (modSeqObject == null) { 102 // no more modsequence, all the remaining org sequence objs 103 // should consider as a delete. 104 Difference diff = new Difference(Difference.DELETE, i, j); 105 diffVector.add(diff); 106 orgSeqObject = orgSeq.next(); 107 108 } else { 109 if (!orgSeq.equivalent(orgSeqObject, modSeqObject)) { 110 111 orgRow = (Element)orgSeqObject; 112 modRow = (Element)modSeqObject; 113 114 // check whether the original Row with multiple row 115 // if so, need to split one out for merge 116 String orgRowRepeated = orgRow.getAttribute( 117 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED); 118 String modRowRepeated = modRow.getAttribute( 119 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED); 120 121 122 int orgRowNum = 1; 123 int modRowNum = 1; 124 125 if (orgRowRepeated.length() > 0) { 126 orgRowNum = 127 Integer.valueOf(orgRowRepeated).intValue(); 128 } 129 if (modRowRepeated.length() > 0) { 130 modRowNum = 131 Integer.valueOf(modRowRepeated).intValue(); 132 } 133 134 // try to find out the common number of repeated Rows 135 if (orgRowNum == modRowNum) { 136 orgSeqObject = orgSeq.next(); 137 modSeqObject = modSeq.next(); 138 139 // cut the original row into two halves, first half 140 // have the repeated attribute = modify row attr 141 } else if (orgRowNum > modRowNum) { 142 Element orgSplitRow = splitRepeatedRow( 143 orgRow, modRowNum, 144 orgRowNum - modRowNum); 145 // it may equal after the split! 146 if (orgSeq.equivalent(orgSplitRow, modRow)) { 147 different = false; 148 } 149 orgSplited = true; 150 modSeqObject = modSeq.next(); 151 152 // cut the modified Row into two halves, first half 153 // have the repeated attribute = original Row attr 154 } else { 155 Element modSplitRow = splitRepeatedRow( 156 modRow, orgRowNum, 157 modRowNum - orgRowNum); 158 159 // check whether rows are equal after the split 160 if (modSeq.equivalent(orgRow, modSplitRow)) { 161 different = false; 162 } 163 modSplited = true; 164 orgSeqObject = orgSeq.next(); 165 } 166 167 if (different) { 168 Difference diff = new Difference(Difference.CHANGE, 169 i, j); 170 diffVector.add(diff); 171 } 172 173 } else { 174 // Rows are equivalent, move on to next one. 175 orgSeqObject = orgSeq.next(); 176 modSeqObject = modSeq.next(); 177 } // end if-else 178 j++; 179 } // end if-else 180 i++; 181 } // end while loop 182 183 // any extra objects in modify sequence should consider as an add 184 // to the original sequence 185 for (; modSeqObject != null; modSeqObject = modSeq.next(), j++) { 186 Difference diff = new Difference(Difference.ADD, i, j); 187 diffVector.add(diff); 188 } 189 190 // need to refresh the iterator if we split the rows 191 if (orgSplited) { 192 orgSeq.refresh(); 193 } 194 195 if (modSplited) { 196 modSeq.refresh(); 197 } 198 199 200 // convert the vector to array 201 Difference[] diffArray = new Difference[diffVector.size()]; 202 diffVector.copyInto(diffArray); 203 204 return diffArray; 205 } 206 207 splitRepeatedRow(Element orgRow, int splitNum, int orgNum)208 private Element splitRepeatedRow(Element orgRow, int splitNum, int orgNum) { 209 // NOTE: should we really want to do deep clone? 210 // in most the case, it is an empty Row, but the 211 // specification didn't forbid any node to use multiple 212 // column attributes. i.e. the node can contain text 213 // nodes or other things under it. 214 Element splitRow = (Element)(orgRow.cloneNode(true)); 215 216 if (splitNum > 1) { 217 splitRow.setAttribute( 218 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED, 219 String.valueOf(splitNum)); 220 } else if (splitNum == 1) { 221 splitRow.removeAttribute( 222 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED); 223 } 224 if (orgNum > 1) { 225 orgRow.setAttribute( 226 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED, 227 String.valueOf(orgNum)); 228 } else if (orgNum == 1) { 229 orgRow.removeAttribute( 230 OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED); 231 } 232 233 Node parentNode = orgRow.getParentNode(); 234 parentNode.insertBefore(splitRow, orgRow); 235 236 return splitRow; 237 } 238 } 239 240