1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.merger.diff;
25 
26 import org.w3c.dom.Node;
27 import org.w3c.dom.Element;
28 
29 import java.util.Vector;
30 import org.openoffice.xmerge.merger.DiffAlgorithm;
31 import org.openoffice.xmerge.merger.Difference;
32 import org.openoffice.xmerge.merger.Iterator;
33 import org.openoffice.xmerge.converter.xml.OfficeConstants;
34 
35 /**
36  *  <p>A very simple and direct difference algorithm for row
37  *  <code>Node</code> objects in a spreadsheet.  Basically, it will
38  *  compare objects in sequence and does not look ahead (unlike LCS).</p>
39  *
40  *  <p><ol><li>
41  *    If two objects are the same, skip to next one.
42  *  </li><li>
43  *    Otherwise check whether the row repeated attribute is the same.
44  *  </li><li>
45  *    If the row repeated attribute is the same, then compare two rows
46  *    and mark it as <i>change</i> if those rows are different.
47  *  </li><li>
48  *    If the row repeated attribute is different, then split the rows and
49  *    continue to compare.
50  *  </li><li>
51  *    If there are more objects in the modseq than the original sequence,
52  *    then all of the extra ones in the modified sequence are marked as add.
53  *  </li><li>
54  *    If there are more objects in the original sequence than the modified
55  *    sequence, then all the extra one in the modified sequence are marked
56  *    as delete.
57  *  </li></ol>
58  *
59  *  <p>NOTE: The algorithm will have potential side effect to split rows.</p>
60  *
61  *  @author smak
62  */
63 
64 public class IteratorRowCompare implements DiffAlgorithm {
65 
66     /**
67      *  Compute the differences of the given two sequences.
68      *  Refer to the class description.
69      *
70      *  Return an array of <code>Difference</code> objects.  This method finds
71      *  out the difference between two sequences.
72      *
73      *  @param  orgSeq  The original sequence.
74      *  @param  modSeq  The modified (or changed) sequence to
75      * 			       compare against with the origial.
76      *
77      *  @return  An array of Difference objects.
78      */
computeDiffs(Iterator orgSeq, Iterator modSeq)79     public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) {
80 
81         int orgSeqlen = orgSeq.elementCount();
82         int modSeqlen = modSeq.elementCount();
83 
84         Vector diffVector = new Vector();
85 
86         // i and j are counters to keep track the current position in the
87         // iterator
88         int i = 0;
89         int j = 0;
90         Object orgSeqObject = orgSeq.start();
91         Object modSeqObject = modSeq.start();
92         Element orgRow, modRow;
93         boolean different = false;
94         boolean orgSplited = false;
95         boolean modSplited = false;
96 
97         while (orgSeqObject != null) {
98 
99             different = true;
100 
101             if (modSeqObject ==  null)  {
102                 // no more modsequence, all the remaining org sequence objs
103                 // should consider as a delete.
104                 Difference diff = new Difference(Difference.DELETE, i, j);
105                 diffVector.add(diff);
106                 orgSeqObject = orgSeq.next();
107 
108             } else {
109                 if (!orgSeq.equivalent(orgSeqObject, modSeqObject)) {
110 
111                     orgRow = (Element)orgSeqObject;
112                     modRow = (Element)modSeqObject;
113 
114                     // check whether the original Row with multiple row
115                     // if so, need to split one out for merge
116                     String orgRowRepeated = orgRow.getAttribute(
117                         OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED);
118                     String modRowRepeated = modRow.getAttribute(
119                         OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED);
120 
121 
122                     int orgRowNum = 1;
123                     int modRowNum = 1;
124 
125                     if (orgRowRepeated.length() > 0) {
126                         orgRowNum =
127                             Integer.valueOf(orgRowRepeated).intValue();
128                     }
129                     if (modRowRepeated.length() > 0) {
130                         modRowNum =
131                             Integer.valueOf(modRowRepeated).intValue();
132                     }
133 
134                     // try to find out the common number of repeated Rows
135                     if (orgRowNum == modRowNum) {
136                         orgSeqObject = orgSeq.next();
137                         modSeqObject = modSeq.next();
138 
139                     // cut the original row into two halves, first half
140                     // have the repeated attribute = modify row attr
141                     } else if (orgRowNum > modRowNum) {
142                         Element orgSplitRow = splitRepeatedRow(
143                                     orgRow, modRowNum,
144                                     orgRowNum - modRowNum);
145                         // it may equal after the split!
146                         if (orgSeq.equivalent(orgSplitRow, modRow)) {
147                             different = false;
148                         }
149                         orgSplited = true;
150                         modSeqObject = modSeq.next();
151 
152                     // cut the modified Row into two halves, first half
153                     // have the repeated attribute = original Row attr
154                     } else {
155                         Element modSplitRow = splitRepeatedRow(
156                                     modRow, orgRowNum,
157                                     modRowNum - orgRowNum);
158 
159                         // check whether rows are equal after the split
160                         if (modSeq.equivalent(orgRow, modSplitRow)) {
161                             different = false;
162                         }
163                         modSplited = true;
164                         orgSeqObject = orgSeq.next();
165                     }
166 
167                     if (different) {
168                         Difference diff = new Difference(Difference.CHANGE,
169                                           i, j);
170                         diffVector.add(diff);
171                     }
172 
173                 } else {
174                     // Rows are equivalent, move on to next one.
175                     orgSeqObject = orgSeq.next();
176                     modSeqObject = modSeq.next();
177                 } // end if-else
178                 j++;
179             } // end if-else
180             i++;
181         } // end while loop
182 
183         // any extra objects in modify sequence should consider as an add
184         // to the original sequence
185         for (; modSeqObject != null; modSeqObject = modSeq.next(), j++) {
186             Difference diff = new Difference(Difference.ADD, i, j);
187             diffVector.add(diff);
188         }
189 
190         // need to refresh the iterator if we split the rows
191         if (orgSplited) {
192             orgSeq.refresh();
193         }
194 
195         if (modSplited) {
196             modSeq.refresh();
197         }
198 
199 
200         // convert the vector to array
201         Difference[] diffArray = new Difference[diffVector.size()];
202         diffVector.copyInto(diffArray);
203 
204         return diffArray;
205     }
206 
207 
splitRepeatedRow(Element orgRow, int splitNum, int orgNum)208     private Element splitRepeatedRow(Element orgRow, int splitNum, int orgNum) {
209         // NOTE: should we really want to do deep clone?
210         // in most the case, it is an empty Row, but the
211         // specification didn't forbid any node to use multiple
212         // column attributes. i.e. the node can contain text
213         // nodes or other things under it.
214         Element splitRow = (Element)(orgRow.cloneNode(true));
215 
216         if (splitNum > 1) {
217             splitRow.setAttribute(
218               OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED,
219               String.valueOf(splitNum));
220         } else if (splitNum == 1) {
221             splitRow.removeAttribute(
222               OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED);
223         }
224         if (orgNum > 1) {
225             orgRow.setAttribute(
226               OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED,
227               String.valueOf(orgNum));
228         } else if (orgNum == 1) {
229             orgRow.removeAttribute(
230               OfficeConstants.ATTRIBUTE_TABLE_NUM_ROWS_REPEATED);
231         }
232 
233         Node parentNode = orgRow.getParentNode();
234         parentNode.insertBefore(splitRow, orgRow);
235 
236         return splitRow;
237     }
238 }
239 
240