1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.merger.diff;
25 
26 import java.util.Vector;
27 import org.openoffice.xmerge.merger.DiffAlgorithm;
28 import org.openoffice.xmerge.merger.Difference;
29 import org.openoffice.xmerge.merger.Iterator;
30 import org.openoffice.xmerge.util.Debug;
31 
32 /**
33  *  This is one of the implementations of <code>DiffAlgorithm</code> interface.
34  *  Using Longest Common Subsequence (LCS). The algorithm here is based
35  *  on the book "Introduction to Algorithms" by Thomas H.Cormen,
36  *  Charles E.Leiserson and Ronald L.Riverst (MIT Press 1990) page 314.
37  *
38  *  @author smak
39  */
40 public class IteratorLCSAlgorithm implements DiffAlgorithm {
41 
computeDiffs(Iterator orgSeq, Iterator modSeq)42     public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) {
43 
44         int orgSeqlen = orgSeq.elementCount();
45         int modSeqlen = modSeq.elementCount();
46 
47         int[][] diffTable;
48 
49         // Diff table is used to keep track which element is the same or not
50         // in those 2 sequences
51         diffTable = createDiffTable(orgSeq, modSeq);
52 
53         // debug purpose...
54         if (Debug.isFlagSet(Debug.INFO)) {
55             printDiffTable(diffTable);
56         }
57 
58         Vector diffResult = new Vector();
59 
60         generateResult(diffTable, orgSeqlen, modSeqlen, diffResult);
61 
62         Difference[] diffArray = new Difference[0];
63 
64         // convert the vector to array, it has to do in here as
65         // generateResult is called recursively
66         if (diffResult.size() > 0) {
67             diffArray = new Difference[diffResult.size()];
68             diffResult.copyInto(diffArray);
69         }
70 
71         diffTable = null;
72         diffResult = null;
73 
74         return diffArray;
75     }
76 
77 
78     /**
79      *  Debug function used to print out the nicely formatted
80      *  difference table.
81      *
82      *  @param  diffTable  The difference table to display.
83      */
printDiffTable(int[][] diffTable)84     private void printDiffTable(int[][] diffTable) {
85 
86         String tmpString = "";
87 
88         for (int i = 0; i < diffTable.length; i++) {
89             for (int j = 0; j < diffTable[i].length; j++) {
90                tmpString = tmpString + " " + diffTable[i][j] +  " ";
91             }
92             Debug.log(Debug.INFO, tmpString);
93             tmpString = "";
94         }
95     }
96 
97     /**
98      *  Create the difference table.
99      *  The difference table is used internal to keep track what
100      *  elements are common or different in the two sequences.
101      *
102      *  @param  orgSeq  The original sequence to be used as a base.
103      *  @param  modSeq  The modified sequence to compare.
104      *
105      *  @return  A difference table as a two-dimensional array of
106      *           integers.
107      */
createDiffTable(Iterator orgSeq, Iterator modSeq)108     private int[][] createDiffTable(Iterator orgSeq, Iterator modSeq) {
109         int orgSeqlen = orgSeq.elementCount() + 1;
110         int modSeqlen = modSeq.elementCount() + 1;
111         int[][] diffTable;
112 
113         // initialize the diffTable
114         diffTable = new int[orgSeqlen][];
115         for (int i = 0; i < orgSeqlen; i++) {
116             diffTable[i] = new int[modSeqlen];
117         }
118 
119         // compute the diff Table using LCS algorithm, refer to the book
120         // mentioned at the top of the program
121 
122         int i, j;
123 
124         Object orgSeqObject, modSeqObject;
125 
126         for (orgSeqObject = orgSeq.start(), i = 1;
127              orgSeqObject != null;
128              orgSeqObject = orgSeq.next(), i++) {
129 
130             for (modSeqObject = modSeq.start(), j = 1;
131                  modSeqObject != null;
132                  modSeqObject = modSeq.next(), j++) {
133 
134                 if (orgSeq.equivalent(orgSeqObject, modSeqObject)) {
135                     diffTable[i][j] = diffTable[i-1][j-1]+1;
136                 } else {
137                     if (diffTable[i-1][j] >= diffTable[i][j-1]) {
138                         diffTable[i][j] = diffTable[i-1][j];
139                     } else {
140                         diffTable[i][j] = diffTable[i][j-1];
141                     }
142                 }
143             }
144         }
145 
146         return diffTable;
147     }
148 
149 
150     /**
151      *  Generate the <code>Difference</code> object result vector.
152      *  This method will be called recursively to backtrack the difference
153      *  table to get the difference result (and also the LCS).
154      *
155      *  @param   diffTable   The difference table containing the
156      *                       <code>Difference</code> result.
157      *  @param   i           The nth element in original sequence to
158      *                       compare.    This method is called recursively
159      *                       with i and j decreased until 0.
160      *  @param   j           The nth element in modified sequence to
161      *                       compare.
162      *  @param   diffVector  A vector to output the <code>Difference</code>
163      *                       result. Can not use a return variable as it
164      *                       is a recursive method.  The vector will contain
165      *                       <code>Difference</code> objects with operation
166      *                       and positions fill in.
167      */
generateResult(int[][] diffTable, int i, int j, Vector diffVector)168     private void generateResult(int[][] diffTable,
169                                 int i, int j, Vector diffVector) {
170 
171         // handle the first element
172         if (i == 0 && j == 0) {
173             return;
174 
175         } else if (j == 0) {
176             for (int cnt = 0; cnt < i; cnt++) {
177                 Difference diff =
178                     new Difference(Difference.DELETE, cnt, j);
179                 diffVector.add(diff);
180             }
181             return;
182 
183         } else if (i == 0) {
184             for (int cnt = 0; cnt < j; cnt++) {
185                 Difference diff =
186                     new Difference(Difference.ADD, i, cnt);
187                 diffVector.add(diff);
188             }
189             return;
190         }
191 
192         // for the detail of this algorithm, refer to the book mentioned on
193         // the top and page 317 and 318.
194         if ((diffTable[i-1][j-1] == diffTable[i][j] -1) &&
195             (diffTable[i-1][j-1] == diffTable[i-1][j]) &&
196             (diffTable[i-1][j-1] == diffTable[i][j-1])) {
197 
198             // the element of ith and jth in org and mod sequence is the same
199             generateResult(diffTable, i-1, j-1, diffVector);
200         } else {
201             if (diffTable[i-1][j] > diffTable[i][j-1]) {
202 
203                 // recursively call first, then add the result so that
204                 // the beginning of the diffs will be stored first
205                 generateResult(diffTable, i-1, j, diffVector);
206 
207                 Difference diff =
208                     new Difference(Difference.DELETE, i-1, j);
209                 diffVector.add(diff);
210             } else if (diffTable[i-1][j] < diffTable[i][j-1]) {
211 
212                 // recursively call first, then add the result so that
213                 // the beginning of the diffs will be stored first
214                 generateResult(diffTable, i, j-1, diffVector);
215 
216                 Difference diff =
217                     new Difference(Difference.ADD, i, j-1);
218                 diffVector.add(diff);
219             } else { // diffTable[i-1][j] == diffTable[i][j-1]
220                 // recursively call first, then add the result so that
221                 // the beginning of the diffs will be stored first
222                 generateResult(diffTable, i-1, j-1, diffVector);
223 
224                 Difference diff =
225                     new Difference(Difference.CHANGE, i-1, j-1);
226                 diffVector.add(diff);
227 
228             }
229         }
230     }
231 }
232 
233