1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 package org.apache.openoffice.ooxml.schema.automaton; 23 24 import java.util.Collection; 25 import java.util.HashMap; 26 import java.util.HashSet; 27 import java.util.LinkedList; 28 import java.util.Map; 29 import java.util.Map.Entry; 30 import java.util.Queue; 31 import java.util.Set; 32 import java.util.TreeMap; 33 import java.util.TreeSet; 34 import java.util.Vector; 35 36 import org.apache.openoffice.ooxml.schema.model.attribute.Attribute; 37 import org.apache.openoffice.ooxml.schema.model.base.Location; 38 import org.apache.openoffice.ooxml.schema.model.base.QualifiedName; 39 40 /** Convert an NFA into a DFA via the powerset construction (also called subset 41 * construction). 42 */ 43 public class DFACreator 44 { 45 /** For a given non-deterministic finite automaton create an equivalent 46 * deterministic finite automaton. 47 */ CreateDFAforNFA( final StateContainer aDFAStateContainer, final StateContext aNFAStateContext, final Vector<Attribute> aAttributes, final QualifiedName aTypeName, final Location aLocation)48 public static FiniteAutomaton CreateDFAforNFA ( 49 final StateContainer aDFAStateContainer, 50 final StateContext aNFAStateContext, 51 final Vector<Attribute> aAttributes, 52 final QualifiedName aTypeName, 53 final Location aLocation) 54 { 55 final DFACreator aCreator = new DFACreator(aDFAStateContainer, aNFAStateContext, aTypeName); 56 aCreator.CreateDFAforNFA(); 57 return new FiniteAutomaton( 58 aCreator.maDFAStateContext, 59 aAttributes, 60 aLocation); 61 } 62 63 64 65 DFACreator( final StateContainer aDFAStateContainer, final StateContext aNFAStateContext, final QualifiedName aTypeName)66 private DFACreator ( 67 final StateContainer aDFAStateContainer, 68 final StateContext aNFAStateContext, 69 final QualifiedName aTypeName) 70 { 71 maNFAStateContext = aNFAStateContext; 72 73 // Create the set of state sets where each element corresponds to a 74 // state in the DFA. 75 maNFASetToDFAStateMap = new TreeMap<>(); 76 maDFAStateContext = new StateContext( 77 aDFAStateContainer, 78 aTypeName == null 79 ? "<TOP-LEVEL>" 80 : aTypeName.GetStateName()); 81 82 maDFATransitions = new HashSet<>(); 83 maAcceptingDFAStates = new Vector<>(); 84 } 85 86 87 88 CreateDFAforNFA()89 private void CreateDFAforNFA () 90 { 91 final State aNFAStartState = maNFAStateContext.GetStartState(); 92 93 // Initialize the creation process by adding the epsilon closure of the 94 // original start state to the work list. 95 final StateSet aStartSet = GetEpsilonClosure(new StateSet(aNFAStartState)); 96 maNFASetToDFAStateMap.put(aStartSet, maDFAStateContext.GetStartState()); 97 98 PropagateStateFlags(aStartSet, maDFAStateContext.GetStartState()); 99 100 final Queue<StateSet> aWorklist = new LinkedList<>(); 101 aWorklist.add(aStartSet); 102 103 while ( ! aWorklist.isEmpty()) 104 { 105 final Collection<StateSet> aAdditionalWorkList = ProcessTransitionFront( 106 aWorklist.poll()); 107 108 aWorklist.addAll(aAdditionalWorkList); 109 } 110 } 111 112 113 114 ProcessTransitionFront( final StateSet aSet)115 private Collection<StateSet> ProcessTransitionFront ( 116 final StateSet aSet) 117 { 118 final Set<StateSet> aLocalWorklist = new TreeSet<>(); 119 120 // Find all regular transitions that start from any state in the set. 121 final Map<String,Vector<Transition>> aTransitions = GetTransitionFront(aSet); 122 123 // Create new state sets for states that are reachable via the same element and 124 // the following epsilon transitions. 125 for (final Entry<String,Vector<Transition>> aEntry : aTransitions.entrySet()) 126 { 127 // Create new state sets for both the end state of the transition. 128 final StateSet aEpsilonClosure = GetEpsilonClosure(GetEndStateSet(aEntry.getValue())); 129 130 // When these are new state sets then add them to the worklist 131 // and the set of sets. 132 State aDFAState = maNFASetToDFAStateMap.get(aEpsilonClosure); 133 if (aDFAState == null) 134 { 135 aLocalWorklist.add(aEpsilonClosure); 136 aDFAState = aEpsilonClosure.CreateStateForStateSet(maDFAStateContext); 137 PropagateStateFlags(aEpsilonClosure, aDFAState); 138 maNFASetToDFAStateMap.put(aEpsilonClosure, aDFAState); 139 if (aDFAState.IsAccepting()) 140 maAcceptingDFAStates.add(aDFAState); 141 } 142 143 final State aStartState = maNFASetToDFAStateMap.get(aSet); 144 final QualifiedName aElementName = GetElementName(aEntry.getValue()); 145 final String sElementTypeName = GetElementTypeName(aEntry.getValue()); 146 assert(aElementName != null); 147 final Transition aTransition = new Transition( 148 aStartState, 149 aDFAState, 150 aElementName, 151 sElementTypeName); 152 aStartState.AddTransition(aTransition); 153 maDFATransitions.add(aTransition); 154 } 155 156 return aLocalWorklist; 157 } 158 159 160 161 GetElementName(final Vector<Transition> aTransitions)162 private QualifiedName GetElementName (final Vector<Transition> aTransitions) 163 { 164 for (final Transition aTransition : aTransitions) 165 return aTransition.GetElementName(); 166 return null; 167 } 168 169 170 171 GetElementTypeName(final Vector<Transition> aTransitions)172 private String GetElementTypeName (final Vector<Transition> aTransitions) 173 { 174 for (final Transition aTransition : aTransitions) 175 return aTransition.GetElementTypeName(); 176 return null; 177 } 178 179 180 181 182 /** Return the epsilon closure of the given set of states. 183 * The result is the set of all states that are reachable via zero, one or 184 * more epsilon transitions from at least one state in the given set of 185 * states. 186 */ GetEpsilonClosure( final StateSet aSet)187 private StateSet GetEpsilonClosure ( final StateSet aSet) 188 { 189 final StateSet aClosure = new StateSet(aSet); 190 191 final Queue<State> aWorkList = new LinkedList<>(); 192 for (final State aState : aSet.GetStates()) 193 aWorkList.add(aState); 194 195 while( ! aWorkList.isEmpty()) 196 { 197 final State aState = aWorkList.poll(); 198 for (final EpsilonTransition aTransition : aState.GetEpsilonTransitions()) 199 { 200 final State aEndState = aTransition.GetEndState(); 201 if ( ! aClosure.ContainsState(aEndState)) 202 { 203 aClosure.AddState(aEndState); 204 aWorkList.add(aEndState); 205 } 206 } 207 } 208 209 return aClosure; 210 } 211 212 213 214 215 /** Return the list of regular transitions (i.e. not epsilon transitions) 216 * that start from any of the states in the given set. 217 * The returned map is a partition of the transitions according to their 218 * triggering XML element. 219 */ GetTransitionFront(final StateSet aSet)220 private Map<String, Vector<Transition>> GetTransitionFront (final StateSet aSet) 221 { 222 final Map<String, Vector<Transition>> aTransitions = new HashMap<>(); 223 224 for (final State aState : aSet.GetStates()) 225 for (final Transition aTransition : aState.GetTransitions()) 226 { 227 final String sElementName; 228 final QualifiedName aElementName = aTransition.GetElementName(); 229 if (aElementName != null) 230 sElementName = aElementName.GetDisplayName(); 231 else 232 sElementName = null; // For skip transitions. 233 234 Vector<Transition> aElementTransitions = aTransitions.get(sElementName); 235 if (aElementTransitions == null) 236 { 237 aElementTransitions = new Vector<>(); 238 aTransitions.put(sElementName, aElementTransitions); 239 } 240 aElementTransitions.add(aTransition); 241 } 242 return aTransitions; 243 } 244 245 246 247 248 /** Return a state set that contains all end states of all the given transitions. 249 */ GetEndStateSet(final Iterable<Transition> aTransitions)250 private StateSet GetEndStateSet (final Iterable<Transition> aTransitions) 251 { 252 final StateSet aStateSet = new StateSet(); 253 for (final Transition aTransition : aTransitions) 254 aStateSet.AddState(aTransition.GetEndState()); 255 return aStateSet; 256 } 257 258 259 260 261 /** Propagate accepting state flag and skip data. 262 */ PropagateStateFlags( final StateSet aNFAStateSet, final State aDFAState)263 private void PropagateStateFlags ( 264 final StateSet aNFAStateSet, 265 final State aDFAState) 266 { 267 for (final State aNFAState : aNFAStateSet.GetStates()) 268 aDFAState.CopyFrom(aNFAState); 269 } 270 271 272 273 274 private final StateContext maNFAStateContext; 275 276 private final Map<StateSet,State> maNFASetToDFAStateMap; 277 private final StateContext maDFAStateContext; 278 private final Set<Transition> maDFATransitions; 279 private final Vector<State> maAcceptingDFAStates; 280 } 281