1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.merger.diff; 25 26 import java.util.Vector; 27 import org.openoffice.xmerge.merger.DiffAlgorithm; 28 import org.openoffice.xmerge.merger.Difference; 29 import org.openoffice.xmerge.merger.Iterator; 30 import org.openoffice.xmerge.util.Debug; 31 32 /** 33 * This is one of the implementations of <code>DiffAlgorithm</code> interface. 34 * Using Longest Common Subsequence (LCS). The algorithm here is based 35 * on the book "Introduction to Algorithms" by Thomas H.Cormen, 36 * Charles E.Leiserson and Ronald L.Riverst (MIT Press 1990) page 314. 37 * 38 * @author smak 39 */ 40 public class IteratorLCSAlgorithm implements DiffAlgorithm { 41 computeDiffs(Iterator orgSeq, Iterator modSeq)42 public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) { 43 44 int orgSeqlen = orgSeq.elementCount(); 45 int modSeqlen = modSeq.elementCount(); 46 47 int[][] diffTable; 48 49 // Diff table is used to keep track which element is the same or not 50 // in those 2 sequences 51 diffTable = createDiffTable(orgSeq, modSeq); 52 53 // debug purpose... 54 if (Debug.isFlagSet(Debug.INFO)) { 55 printDiffTable(diffTable); 56 } 57 58 Vector diffResult = new Vector(); 59 60 generateResult(diffTable, orgSeqlen, modSeqlen, diffResult); 61 62 Difference[] diffArray = new Difference[0]; 63 64 // convert the vector to array, it has to do in here as 65 // generateResult is called recursively 66 if (diffResult.size() > 0) { 67 diffArray = new Difference[diffResult.size()]; 68 diffResult.copyInto(diffArray); 69 } 70 71 diffTable = null; 72 diffResult = null; 73 74 return diffArray; 75 } 76 77 78 /** 79 * Debug function used to print out the nicely formatted 80 * difference table. 81 * 82 * @param diffTable The difference table to display. 83 */ printDiffTable(int[][] diffTable)84 private void printDiffTable(int[][] diffTable) { 85 86 String tmpString = ""; 87 88 for (int i = 0; i < diffTable.length; i++) { 89 for (int j = 0; j < diffTable[i].length; j++) { 90 tmpString = tmpString + " " + diffTable[i][j] + " "; 91 } 92 Debug.log(Debug.INFO, tmpString); 93 tmpString = ""; 94 } 95 } 96 97 /** 98 * Create the difference table. 99 * The difference table is used internal to keep track what 100 * elements are common or different in the two sequences. 101 * 102 * @param orgSeq The original sequence to be used as a base. 103 * @param modSeq The modified sequence to compare. 104 * 105 * @return A difference table as a two-dimensional array of 106 * integers. 107 */ createDiffTable(Iterator orgSeq, Iterator modSeq)108 private int[][] createDiffTable(Iterator orgSeq, Iterator modSeq) { 109 int orgSeqlen = orgSeq.elementCount() + 1; 110 int modSeqlen = modSeq.elementCount() + 1; 111 int[][] diffTable; 112 113 // initialize the diffTable 114 diffTable = new int[orgSeqlen][]; 115 for (int i = 0; i < orgSeqlen; i++) { 116 diffTable[i] = new int[modSeqlen]; 117 } 118 119 // compute the diff Table using LCS algorithm, refer to the book 120 // mentioned at the top of the program 121 122 int i, j; 123 124 Object orgSeqObject, modSeqObject; 125 126 for (orgSeqObject = orgSeq.start(), i = 1; 127 orgSeqObject != null; 128 orgSeqObject = orgSeq.next(), i++) { 129 130 for (modSeqObject = modSeq.start(), j = 1; 131 modSeqObject != null; 132 modSeqObject = modSeq.next(), j++) { 133 134 if (orgSeq.equivalent(orgSeqObject, modSeqObject)) { 135 diffTable[i][j] = diffTable[i-1][j-1]+1; 136 } else { 137 if (diffTable[i-1][j] >= diffTable[i][j-1]) { 138 diffTable[i][j] = diffTable[i-1][j]; 139 } else { 140 diffTable[i][j] = diffTable[i][j-1]; 141 } 142 } 143 } 144 } 145 146 return diffTable; 147 } 148 149 150 /** 151 * Generate the <code>Difference</code> object result vector. 152 * This method will be called recursively to backtrack the difference 153 * table to get the difference result (and also the LCS). 154 * 155 * @param diffTable The difference table containing the 156 * <code>Difference</code> result. 157 * @param i The nth element in original sequence to 158 * compare. This method is called recursively 159 * with i and j decreased until 0. 160 * @param j The nth element in modified sequence to 161 * compare. 162 * @param diffVector A vector to output the <code>Difference</code> 163 * result. Can not use a return variable as it 164 * is a recursive method. The vector will contain 165 * <code>Difference</code> objects with operation 166 * and positions fill in. 167 */ generateResult(int[][] diffTable, int i, int j, Vector diffVector)168 private void generateResult(int[][] diffTable, 169 int i, int j, Vector diffVector) { 170 171 // handle the first element 172 if (i == 0 && j == 0) { 173 return; 174 175 } else if (j == 0) { 176 for (int cnt = 0; cnt < i; cnt++) { 177 Difference diff = 178 new Difference(Difference.DELETE, cnt, j); 179 diffVector.add(diff); 180 } 181 return; 182 183 } else if (i == 0) { 184 for (int cnt = 0; cnt < j; cnt++) { 185 Difference diff = 186 new Difference(Difference.ADD, i, cnt); 187 diffVector.add(diff); 188 } 189 return; 190 } 191 192 // for the detail of this algorithm, refer to the book mentioned on 193 // the top and page 317 and 318. 194 if ((diffTable[i-1][j-1] == diffTable[i][j] -1) && 195 (diffTable[i-1][j-1] == diffTable[i-1][j]) && 196 (diffTable[i-1][j-1] == diffTable[i][j-1])) { 197 198 // the element of ith and jth in org and mod sequence is the same 199 generateResult(diffTable, i-1, j-1, diffVector); 200 } else { 201 if (diffTable[i-1][j] > diffTable[i][j-1]) { 202 203 // recursively call first, then add the result so that 204 // the beginning of the diffs will be stored first 205 generateResult(diffTable, i-1, j, diffVector); 206 207 Difference diff = 208 new Difference(Difference.DELETE, i-1, j); 209 diffVector.add(diff); 210 } else if (diffTable[i-1][j] < diffTable[i][j-1]) { 211 212 // recursively call first, then add the result so that 213 // the beginning of the diffs will be stored first 214 generateResult(diffTable, i, j-1, diffVector); 215 216 Difference diff = 217 new Difference(Difference.ADD, i, j-1); 218 diffVector.add(diff); 219 } else { // diffTable[i-1][j] == diffTable[i][j-1] 220 // recursively call first, then add the result so that 221 // the beginning of the diffs will be stored first 222 generateResult(diffTable, i-1, j-1, diffVector); 223 224 Difference diff = 225 new Difference(Difference.CHANGE, i-1, j-1); 226 diffVector.add(diff); 227 228 } 229 } 230 } 231 } 232 233