1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.merger.diff; 25 26 import java.util.Vector; 27 import org.openoffice.xmerge.merger.Difference; 28 29 /** 30 * <p>This is an implementations of <code>DiffAlgorithm</code> interface 31 * which will difference char arrays.</p> 32 * 33 * <p>It also use Longest Common Subsequence (LCS). The algorithm is based 34 * on the book "Introduction to Algorithms" by Thomas H.Cormen, 35 * Charles E.Leiserson, and Ronald L.Riverst (MIT Press 1990) page 314.</p> 36 * 37 * @author smak 38 */ 39 public class CharArrayLCSAlgorithm { 40 41 /** 42 * Return an <code>Difference</code> array. This method finds out 43 * the difference between two sequences. 44 * 45 * @param orgSeq The original sequence. 46 * @param modSeq The modified (or changed) sequence to 47 * compare against the origial. 48 * 49 * @return A <code>Difference</code> array. 50 */ computeDiffs(char[] orgSeq, char[] modSeq)51 public Difference[] computeDiffs(char[] orgSeq, char[] modSeq) { 52 53 int orgSeqlen = orgSeq.length; 54 int modSeqlen = modSeq.length; 55 56 int[][] diffTable; 57 58 // Diff table is used to keep track which element is the same or not 59 // in those 2 sequences 60 diffTable = createDiffTable(orgSeq, modSeq); 61 62 // debug purpose... 63 // printDiffTable(diffTable); 64 65 Vector diffResult = new Vector(); 66 67 generateResult(diffTable, orgSeqlen, modSeqlen, diffResult); 68 69 // don't need anymore if Difference do not contain content information 70 /* fillInDiffContent(diffResult, orgSeq, modSeq); */ 71 72 Difference[] diffArray = new Difference[0]; 73 74 // convert the vector to array, it has to do in here as 75 // generateResult is called recursively 76 if (diffResult.size() > 0) { 77 diffArray = new Difference[diffResult.size()]; 78 diffResult.copyInto(diffArray); 79 } 80 81 diffTable = null; 82 diffResult = null; 83 84 return diffArray; 85 } 86 87 88 /** 89 * Debug function Used to print out the nicely formatted 90 * difference table. 91 * 92 * @param diffTable The difference table to display. 93 */ printDiffTable(int[][] diffTable)94 private void printDiffTable(int[][] diffTable) { 95 96 for (int i = 0; i < diffTable.length; i++) { 97 for (int j = 0; j < diffTable[i].length; j++) { 98 System.out.print(" " + diffTable[i][j] + " "); 99 } 100 System.out.println(); 101 } 102 } 103 104 105 /** 106 * Create the difference table. 107 * The difference table is used internal to keep track what 108 * elements are common or different in the two sequences. 109 * 110 * @param orgSeq The original sequence to be used as a base. 111 * @param modSeq The modified sequence to compare. 112 * 113 * @return A difference table as a two-dimensional array of 114 * integers. 115 */ createDiffTable(char[] orgSeq, char[] modSeq)116 private int[][] createDiffTable(char[] orgSeq, char[] modSeq) { 117 int orgSeqlen = orgSeq.length + 1; 118 int modSeqlen = modSeq.length + 1; 119 int[][] diffTable; 120 121 // initialize the diffTable (it need to be 1 row/col bigger 122 // than the original str) 123 diffTable = new int[orgSeqlen][]; 124 for (int i = 0; i < orgSeqlen; i++) { 125 diffTable[i] = new int[modSeqlen]; 126 } 127 128 // compute the diff Table using LCS algorithm, refer to the book 129 // mentioned at the top of the program 130 for (int i = 1; i < orgSeqlen; i++) { 131 for (int j = 1; j < modSeqlen; j++) { 132 133 if (orgSeq[i-1] == modSeq[j-1]) { 134 diffTable[i][j] = diffTable[i-1][j-1]+1; 135 } else { 136 if (diffTable[i-1][j] >= diffTable[i][j-1]) { 137 diffTable[i][j] = diffTable[i-1][j]; 138 } else { 139 diffTable[i][j] = diffTable[i][j-1]; 140 } 141 } 142 } 143 } 144 145 return diffTable; 146 } 147 148 149 /** 150 * Generate the <code>Difference</code> result vector. 151 * This method will be called recursively to backtrack the difference 152 * table to get the difference result (and also the LCS). 153 * 154 * @param diffTable The difference table containing the 155 * <code>Difference</code> result. 156 * @param i The nth element in original sequence to 157 * compare. This method is called recursively 158 * with i and j decreased until 0. 159 * @param j The nth element in modified sequence to 160 * compare. 161 * @param diffVector A vector to output the <code>Difference</code> 162 * result. Can not use a return variable as it 163 * is a recursive method. The vector will contain 164 * <code>Difference</code> objects with operation 165 * and positions filled in. 166 */ generateResult(int[][] diffTable, int i, int j, Vector diffVector)167 private void generateResult(int[][] diffTable, 168 int i, int j, Vector diffVector) { 169 170 // handle the first element 171 if (i == 0 || j == 0) { 172 if (i == 0 && j == 0) { 173 // return 174 } else if (j == 0) { 175 for (int cnt = 0; cnt < i; cnt++) { 176 Difference diff = 177 new Difference(Difference.DELETE, cnt, j); 178 diffVector.add(diff); 179 } 180 } else { 181 for (int cnt = 0; cnt < j; cnt++) { 182 Difference diff = 183 new Difference(Difference.ADD, i, cnt); 184 diffVector.add(diff); 185 } 186 } 187 return; 188 } 189 190 // for the detail of this algorithm, refer to the book mentioned on 191 // the top and page 317 and 318. 192 if ((diffTable[i-1][j-1] == diffTable[i][j] -1) && 193 (diffTable[i-1][j-1] == diffTable[i-1][j]) && 194 (diffTable[i-1][j-1] == diffTable[i][j-1])) { 195 196 // the element of ith and jth in org and mod sequence is the same 197 generateResult(diffTable, i-1, j-1, diffVector); 198 } else { 199 if (diffTable[i-1][j] > diffTable[i][j-1]) { 200 201 // recursively call first, then add the result so that 202 // the beginning of the diffs will be stored first 203 generateResult(diffTable, i-1, j, diffVector); 204 205 Difference diff = 206 new Difference(Difference.DELETE, i-1, j); 207 diffVector.add(diff); 208 } else if (diffTable[i-1][j] < diffTable[i][j-1]) { 209 210 // recursively call first, then add the result so that 211 // the beginning of the diffs will be stored first 212 generateResult(diffTable, i, j-1, diffVector); 213 214 Difference diff = 215 new Difference(Difference.ADD, i, j-1); 216 diffVector.add(diff); 217 } else { // diffTable[i-1][j] == diffTable[i][j-1] 218 // recursively call first, then add the result so that 219 // the beginning of the diffs will be stored first 220 generateResult(diffTable, i-1, j-1, diffVector); 221 222 Difference diff = 223 new Difference(Difference.CHANGE, i-1, j-1); 224 diffVector.add(diff); 225 226 } 227 } 228 } 229 } 230 231