merger/merge/CharacterBaseParagraphMerge.java

/**************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/


package org.openoffice.xmerge.merger.merge;

import java.util.List;
import org.w3c.dom.Node;
import org.openoffice.xmerge.merger.Difference;
import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
import org.openoffice.xmerge.merger.diff.CharacterParser;
import org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm;
import org.openoffice.xmerge.merger.diff.TextNodeEntry;
import org.openoffice.xmerge.util.Debug;

/**
 *  This is an implementation of the <code>NodeMergeAlgorithm</code>
 *  interface.  It is used to merge two paragraph <code>Node</code>
 *  objects based on character comparisons.
 *
 * @author smak
 */
public final class CharacterBaseParagraphMerge
                   implements NodeMergeAlgorithm {


    private class cacheCharArray {
        public  cacheCharArray(int cacheSize) {
        }
    }


    /**
     *  Merge two paragraph <code>Node</code> by using Longest Common
     *  Subsequence (LCS) character algorithm defined in {@link
     *  org.openoffice.xmerge.merger.diff.CharArrayLCSAlgorithm
     *  CharArrayLCSAlgorithm}
     *
     *  @param  orgPara  The original paragraph <code>Node</code>.
     *  @param  modPara  The modified paragraph <code>Node</code>.
     */
    public void merge(Node orgPara, Node modPara) {
        CharacterParser orgParser = new CharacterParser(orgPara);
        CharacterParser modParser = new CharacterParser(modPara);

        char[] orgCharArray = orgParser.getCharArray();
        char[] modCharArray = modParser.getCharArray();

        CharArrayLCSAlgorithm diffAlgo = new CharArrayLCSAlgorithm();

        Difference[] diffResult = diffAlgo.computeDiffs(orgCharArray,
                                                        modCharArray);
        // debug use
        System.out.println("Diff Result: ");
        for (int i = 0; i < diffResult.length; i++) {
            Debug.log(Debug.INFO, diffResult[i].debug());
        }

        applyDifference(orgParser, modParser, diffResult);
    }


    private void applyDifference(CharacterParser orgParser,
                                 CharacterParser modParser,
                                 Difference[] diffs) {

        List orgNodeList = orgParser.getNodeList();
        List modNodeList = modParser.getNodeList();
        int diffCount = 0;
        int modNodeListCnt = 0;
        int numNode = orgNodeList.size();

        for (int i = 0; i < numNode; i++) {

            int extraChar = 0;
            int orgDiffCount = diffCount;
            TextNodeEntry orgTextNode = (TextNodeEntry)(orgNodeList.get(i));

            Debug.log(Debug.INFO, "checking node " + (i + 1) + " of " + numNode);

            // check any difference in this node and estimate the new char num
            for (; diffCount < diffs.length; diffCount++) {

                Debug.log(Debug.INFO, "  checking diff " + (diffCount + 1) +
                  " of " + diffs.length);
                Debug.log(Debug.INFO, "    OrgPosision <" +
                  diffs[diffCount].getOrgPosition() + "> diffCount <" +
                  diffCount + "> orgDiffCount <" + orgDiffCount + ">");

                // don't need to check and diffs beyond the current node text
                // range except the last node
                if (diffs[diffCount].getOrgPosition() > orgTextNode.endChar() &&
                    i < numNode - 1) {
                    Debug.log(Debug.INFO, "    breaking!");
                    break;
                }

                if (diffs[diffCount].getOrgPosition()
                                                  >= orgTextNode.startChar()) {
                    if (diffs[diffCount].getOperation() == Difference.DELETE) {
                        extraChar--;
                    } else if (diffs[diffCount].getOperation()
                                                        == Difference.ADD) {
                        extraChar++;
                    }

                }
            }

            Debug.log(Debug.INFO, "  final diffCount <" + diffCount +
              "> final orgDiffCount <" + orgDiffCount + ">");

            // will only try to merge if there is a difference in this node
            if (diffCount > orgDiffCount) {

                Debug.log(Debug.INFO, "  There is a difference, doing merge");
                Debug.log(Debug.INFO, "  TextNode name <" +
                  orgTextNode.node().getNodeName() + ">");
                Debug.log(Debug.INFO, "  TextNode value <" +
                  orgTextNode.node().getNodeValue() + ">");
                Debug.log(Debug.INFO, "  TextNode start char <" +
                  orgTextNode.startChar() + "> TextNode end char <" +
                  orgTextNode.endChar() + ">");
                Debug.log(Debug.INFO, "  extraChar value <" + extraChar + ">");

                coreMerge(orgDiffCount, diffCount, diffs, orgParser,
                          modParser, orgTextNode, extraChar);
            }
        }
    }

    private void coreMerge(int startDiffNum, int endDiffNum, Difference[] diffs,
                        CharacterParser orgParser, CharacterParser modParser,
                        TextNodeEntry orgTextNode, int extraChar) {

        Node orgNode = orgTextNode.node();
        char[] modTextArray = modParser.getCharArray();
        String tmpString;

        // Handle situation where getNodeValue returns null
        //
        if (orgNode.getNodeValue() != null)
           tmpString = orgNode.getNodeValue();
        else
           tmpString = "";

        char[] orgNodeText = tmpString.toCharArray();
        char[] newNodeText;

        if (orgNodeText.length + extraChar > 0)
           newNodeText = new char[orgNodeText.length + extraChar];
        else
           newNodeText = new char[0];

        int orgTextPosition = orgTextNode.startChar();   // used for block copy
        int newTextPosition = 0;                         // used for block copy
        int unChangedTextLength = 0;

        char[] cacheCharArray = new char[endDiffNum - startDiffNum];
        int cacheLength = 0;
        int lastDiffOperation = Difference.UNCHANGE;
        int lastDiffPosition = -1;

        // starting to diff
        //
        for (int j = startDiffNum; j < endDiffNum; j++) {

            // copy any contents before the diff
            //
            if (diffs[j].getOrgPosition() > orgTextPosition) {
                // need to flush first
                if (cacheLength > 0) {
                    System.arraycopy(cacheCharArray, 0,
                                     newNodeText, newTextPosition, cacheLength);
                    newTextPosition += cacheLength;

                    // reset the markers
                    lastDiffPosition = -1;
                    lastDiffOperation = Difference.UNCHANGE;
                    cacheLength = 0;
                }

                // find out the length how many characters are
                // untouched by the diff
                unChangedTextLength = diffs[j].getOrgPosition() -
                                      orgTextPosition;
                System.arraycopy(orgNodeText,
                                 orgTextPosition - orgTextNode.startChar(),
                                 newNodeText, newTextPosition,
                                 unChangedTextLength);
                orgTextPosition += unChangedTextLength;
                newTextPosition += unChangedTextLength;
            }

            // for any deleted characters, just skip without copy
            // but still need to take care the cached characters
            //
            if (diffs[j].getOperation() == Difference.DELETE) {
                orgTextPosition++;

                // flush out the cache and copy the content to new Text
                if (cacheLength > 0) {
                    System.arraycopy(cacheCharArray, 0,
                                     newNodeText, newTextPosition, cacheLength);
                    newTextPosition += cacheLength;

                    // reset the markers
                    lastDiffPosition = -1;
                    lastDiffOperation = Difference.UNCHANGE;
                    cacheLength = 0;
                }

                continue;


            // check whether we should flush the cache.
            // For changed diffs, only continuous changes can be cached
            // For Add diffs, only same insertion point can be cached
            // and for both changed/add diffs, need to have same operation
            // as last cached diffs.

            } else {
                if (lastDiffOperation != diffs[j].getOperation()  ||
                    (diffs[j].getOperation() == Difference.CHANGE &&
                     diffs[j].getOrgPosition() != lastDiffPosition + 1) ||
                    (diffs[j].getOperation() == Difference.ADD &&
                     diffs[j].getOrgPosition() != lastDiffPosition)) {

                    // flush the cache
                    if (cacheLength > 0) {
                        System.arraycopy(cacheCharArray, 0, newNodeText,
                                         newTextPosition, cacheLength);
                        newTextPosition += cacheLength;

                        // reset the markers
                        lastDiffPosition = -1;
                        lastDiffOperation = Difference.UNCHANGE;
                        cacheLength = 0;
                    }
                }

                // add the diffs to the cache, now the diffs will be either
                // a new 'changed' char or is an adjacent following change of
                // last difference
                cacheCharArray[cacheLength] =
                                    modTextArray[diffs[j].getModPosition()];
                cacheLength++;
                lastDiffOperation = diffs[j].getOperation();
                lastDiffPosition = diffs[j].getOrgPosition();

                // need to increment the original text position
                // after we cached it
                if (lastDiffOperation == Difference.CHANGE) {
                    orgTextPosition++;
                }
            }
        }

        // flush any contents remaining in the cache
        if (cacheLength > 0) {
            System.arraycopy(cacheCharArray, 0, newNodeText,
                             newTextPosition, cacheLength);
            newTextPosition += cacheLength;
            // no need to reset any cache-related info as this is a last flush
        }

        // copy any contents after all the diffs
        int orgStartPosition = orgTextNode.startChar();
        if (orgNodeText.length + orgStartPosition > orgTextPosition) {
            unChangedTextLength = orgNodeText.length + orgStartPosition
                                  - orgTextPosition;
            System.arraycopy(orgNodeText, orgTextPosition - orgStartPosition,
                             newNodeText, newTextPosition,
                             unChangedTextLength);
        }

        // set the text to the original node if there are any diffs processed.
        // can't use newNodeText.length to check as even it is empty, we may
        // process a whole bunch of deletion already (i.e. the whole
        // orgNodeText deleted).
        if (endDiffNum > startDiffNum) {
            String newString = new String(newNodeText);
            orgNode.setNodeValue(newString);
        }
    }
}