1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.merger.diff;
25 
26 import java.util.Vector;
27 import org.openoffice.xmerge.merger.Difference;
28 
29 /**
30  *  <p>This is an implementations of <code>DiffAlgorithm</code> interface
31  *  which will difference char arrays.</p>
32  *
33  *  <p>It also use Longest Common Subsequence (LCS).  The algorithm is based
34  *  on the book "Introduction to Algorithms" by Thomas H.Cormen,
35  *  Charles E.Leiserson, and Ronald L.Riverst (MIT Press 1990) page 314.</p>
36  *
37  *  @author smak
38  */
39 public class CharArrayLCSAlgorithm {
40 
41     /**
42      *  Return an <code>Difference</code> array.  This method finds out
43      *  the difference between two sequences.
44      *
45      *  @param  orgSeq  The original sequence.
46      *  @param  modSeq  The modified (or changed) sequence to
47      *                  compare against the origial.
48      *
49      *  @return  A <code>Difference</code> array.
50      */
computeDiffs(char[] orgSeq, char[] modSeq)51     public Difference[] computeDiffs(char[] orgSeq, char[] modSeq) {
52 
53         int orgSeqlen = orgSeq.length;
54         int modSeqlen = modSeq.length;
55 
56         int[][] diffTable;
57 
58         // Diff table is used to keep track which element is the same or not
59         // in those 2 sequences
60         diffTable = createDiffTable(orgSeq, modSeq);
61 
62         // debug purpose...
63         // printDiffTable(diffTable);
64 
65         Vector diffResult = new Vector();
66 
67         generateResult(diffTable, orgSeqlen, modSeqlen, diffResult);
68 
69         // don't need anymore if Difference do not contain content information
70         /* fillInDiffContent(diffResult, orgSeq, modSeq); */
71 
72         Difference[] diffArray = new Difference[0];
73 
74         // convert the vector to array, it has to do in here as
75         // generateResult is called recursively
76         if (diffResult.size() > 0) {
77             diffArray = new Difference[diffResult.size()];
78             diffResult.copyInto(diffArray);
79         }
80 
81         diffTable = null;
82         diffResult = null;
83 
84         return diffArray;
85     }
86 
87 
88     /**
89      *  Debug function Used to print out the nicely formatted
90      *  difference table.
91      *
92      *  @param  diffTable  The difference table to display.
93      */
printDiffTable(int[][] diffTable)94     private void printDiffTable(int[][] diffTable) {
95 
96         for (int i = 0; i < diffTable.length; i++) {
97             for (int j = 0; j < diffTable[i].length; j++) {
98                 System.out.print(" " + diffTable[i][j] +  " ");
99             }
100             System.out.println();
101         }
102     }
103 
104 
105     /**
106      *  Create the difference table.
107      *  The difference table is used internal to keep track what
108      *  elements are common or different in the two sequences.
109      *
110      *  @param  orgSeq  The original sequence to be used as a base.
111      *  @param  modSeq  The modified sequence to compare.
112      *
113      *  @return  A difference table as a two-dimensional array of
114      *           integers.
115      */
createDiffTable(char[] orgSeq, char[] modSeq)116     private int[][] createDiffTable(char[] orgSeq, char[] modSeq) {
117         int orgSeqlen = orgSeq.length + 1;
118         int modSeqlen = modSeq.length + 1;
119         int[][] diffTable;
120 
121         // initialize the diffTable (it need to be 1 row/col bigger
122         // than the original str)
123         diffTable = new int[orgSeqlen][];
124         for (int i = 0; i < orgSeqlen; i++) {
125             diffTable[i] = new int[modSeqlen];
126         }
127 
128         // compute the diff Table using LCS algorithm, refer to the book
129         // mentioned at the top of the program
130         for (int i = 1; i < orgSeqlen; i++) {
131             for (int j = 1; j < modSeqlen; j++) {
132 
133                 if (orgSeq[i-1] == modSeq[j-1]) {
134                     diffTable[i][j] = diffTable[i-1][j-1]+1;
135                 } else {
136                     if (diffTable[i-1][j] >= diffTable[i][j-1]) {
137                         diffTable[i][j] = diffTable[i-1][j];
138                     } else {
139                         diffTable[i][j] = diffTable[i][j-1];
140                     }
141                 }
142             }
143         }
144 
145         return diffTable;
146     }
147 
148 
149     /**
150      *  Generate the <code>Difference</code> result vector.
151      *  This method will be called recursively to backtrack the difference
152      *  table to get the difference result (and also the LCS).
153      *
154      *  @param   diffTable   The difference table containing the
155      *                       <code>Difference</code> result.
156      *  @param   i           The nth element in original sequence to
157      *                       compare.  This method is called recursively
158      *                       with i and j decreased until 0.
159      *  @param   j           The nth element in modified sequence to
160      *                       compare.
161      *  @param   diffVector  A vector to output the <code>Difference</code>
162      *                       result. Can not use a return variable as it
163      *                       is a recursive method.  The vector will contain
164      *                       <code>Difference</code> objects with operation
165      *                       and positions filled in.
166      */
generateResult(int[][] diffTable, int i, int j, Vector diffVector)167     private void generateResult(int[][] diffTable,
168                                 int i, int j, Vector diffVector) {
169 
170         // handle the first element
171         if (i == 0 || j == 0) {
172             if (i == 0 && j == 0) {
173                 // return
174             } else if (j == 0) {
175                 for (int cnt = 0; cnt < i; cnt++) {
176                     Difference diff =
177                         new Difference(Difference.DELETE, cnt, j);
178                     diffVector.add(diff);
179                 }
180             } else {
181                 for (int cnt = 0; cnt < j; cnt++) {
182                     Difference diff =
183                         new Difference(Difference.ADD, i, cnt);
184                     diffVector.add(diff);
185                 }
186             }
187             return;
188         }
189 
190         // for the detail of this algorithm, refer to the book mentioned on
191         // the top and page 317 and 318.
192         if ((diffTable[i-1][j-1] == diffTable[i][j] -1) &&
193             (diffTable[i-1][j-1] == diffTable[i-1][j]) &&
194             (diffTable[i-1][j-1] == diffTable[i][j-1])) {
195 
196             // the element of ith and jth in org and mod sequence is the same
197             generateResult(diffTable, i-1, j-1, diffVector);
198         } else {
199             if (diffTable[i-1][j] > diffTable[i][j-1]) {
200 
201                 // recursively call first, then add the result so that
202                 // the beginning of the diffs will be stored first
203                 generateResult(diffTable, i-1, j, diffVector);
204 
205                 Difference diff =
206                     new Difference(Difference.DELETE, i-1, j);
207                 diffVector.add(diff);
208             } else if (diffTable[i-1][j] < diffTable[i][j-1]) {
209 
210                 // recursively call first, then add the result so that
211                 // the beginning of the diffs will be stored first
212                 generateResult(diffTable, i, j-1, diffVector);
213 
214                 Difference diff =
215                     new Difference(Difference.ADD, i, j-1);
216                 diffVector.add(diff);
217             } else { // diffTable[i-1][j] == diffTable[i][j-1]
218                 // recursively call first, then add the result so that
219                 // the beginning of the diffs will be stored first
220                 generateResult(diffTable, i-1, j-1, diffVector);
221 
222                 Difference diff =
223                     new Difference(Difference.CHANGE, i-1, j-1);
224                 diffVector.add(diff);
225 
226             }
227         }
228     }
229 }
230 
231