1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 package org.apache.openoffice.ooxml.parser; 23 24 import java.io.File; 25 import java.io.FileInputStream; 26 import java.io.InputStream; 27 import java.util.Map; 28 import java.util.Map.Entry; 29 import java.util.TreeMap; 30 import java.util.zip.ZipEntry; 31 import java.util.zip.ZipFile; 32 33 import javax.xml.stream.Location; 34 35 import org.apache.openoffice.ooxml.parser.action.ActionManager; 36 import org.apache.openoffice.ooxml.parser.action.ActionTrigger; 37 import org.apache.openoffice.ooxml.parser.action.IAction; 38 39 /** This OOXML parser is based on the output of the schema parser. 40 * It exists to debug the schema parser and as illustration and preparation of 41 * the C++ parse (yet to come.) 42 * Because of this, the parser data (set of states and transitions) are 43 * read at runtime while a real parser would do that at compile time. 44 */ 45 public class OOXMLParser 46 { 47 class ActionContext 48 { 49 public Map<String,Integer> TypeCounts = new TreeMap<>(); 50 } 51 /** The parser is called with two arguments: 52 * - A path to where the parser tables with the states and transitions can 53 * be found. 54 * - The XML input file or Zip stream to parse. 55 * The syntax for a Zip stream contains a '#' that separates the filename 56 * to its left from the entry name to its right. 57 */ main(final String ... aArgumentList)58 public static void main (final String ... aArgumentList) 59 { 60 if (aArgumentList.length<2 ||aArgumentList.length>3) 61 throw new RuntimeException("usage: OOXMLParser <parser-tables-path> <XML-input-file> <log-file>?"); 62 63 if (aArgumentList.length == 3) 64 { 65 final File aLogFile = new File(aArgumentList[2]); 66 Log.Dbg = new Log(aLogFile); 67 System.out.printf("writing log data to %s\n", aLogFile.toString()); 68 } 69 else 70 { 71 Log.Dbg = null; 72 System.out.printf("writing no log data\n"); 73 } 74 75 new OOXMLParser(aArgumentList[0], aArgumentList[1]); 76 } 77 78 79 OOXMLParser( final String sParseTableFilename, final String sInputFilename)80 private OOXMLParser ( 81 final String sParseTableFilename, 82 final String sInputFilename) 83 { 84 long nStartTime = System.currentTimeMillis(); 85 final StateMachine aMachine = new StateMachine(new File(sParseTableFilename), null); 86 final InputStream aIn = GetInputStream(sInputFilename); 87 long nEndTime = System.currentTimeMillis(); 88 89 final ActionContext aActionContext = new ActionContext(); 90 AddSomeActions(aMachine.GetActionManager(), aActionContext); 91 92 System.out.printf("initialzed parser in %fs\n", (nEndTime-nStartTime)/1000.0); 93 94 try 95 { 96 nStartTime = System.currentTimeMillis(); 97 final Parser aParser = new Parser(aMachine, aIn); 98 aParser.Parse(); 99 final int nElementCount = aParser.GetElementCount(); 100 nEndTime = System.currentTimeMillis(); 101 System.out.printf("parsed %d elements in %fs\n", 102 nElementCount, 103 (nEndTime-nStartTime)/1000.0); 104 105 System.out.printf("%d different elements found:\n", aActionContext.TypeCounts.size()); 106 for (final Entry<String, Integer> aEntry : aActionContext.TypeCounts.entrySet()) 107 { 108 System.out.printf("%-32s : %6d\n", aEntry.getKey(), aEntry.getValue()); 109 } 110 } 111 catch (final Exception aException) 112 { 113 aException.printStackTrace(); 114 } 115 } 116 117 118 119 AddSomeActions( final ActionManager aActionManager, final ActionContext aActionContext)120 private static void AddSomeActions ( 121 final ActionManager aActionManager, 122 final ActionContext aActionContext) 123 { 124 aActionManager.AddElementStartAction( 125 "*", 126 new IAction() 127 { 128 @Override public void Run( 129 final ActionTrigger eTrigger, 130 final ElementContext aContext, 131 final String sText, 132 final Location aStartLocation, 133 final Location aEndLocation) 134 { 135 Integer nValue = aActionContext.TypeCounts.get(aContext.GetTypeName()); 136 if (nValue == null) 137 nValue = 1; 138 else 139 ++nValue; 140 aActionContext.TypeCounts.put(aContext.GetTypeName(), nValue); 141 } 142 } 143 ); 144 aActionManager.AddElementStartAction( 145 ".*CT_Shd", 146 new IAction() 147 { 148 @Override public void Run( 149 final ActionTrigger eTrigger, 150 final ElementContext aContext, 151 final String sText, 152 final Location aStartLocation, 153 final Location aEndLocation) 154 { 155 System.out.printf("processing %s of element %s at position %d\n", 156 eTrigger, 157 aContext.GetElementName(), 158 aStartLocation.getCharacterOffset()); 159 160 if (aContext.GetAttributes().GetAttributeCount() == 0) 161 System.out.printf(" no attributes\n"); 162 else 163 for (final Entry<String,String> aAttribute : aContext.GetAttributes().GetAttributes()) 164 System.out.printf(" %s -> %s\n", aAttribute.getKey(), aAttribute.getValue()); 165 } 166 } 167 ); 168 aActionManager.AddTextAction( 169 ".*CT_Text", 170 new IAction() 171 { 172 @Override public void Run( 173 final ActionTrigger eTrigger, 174 final ElementContext aContext, 175 final String sText, 176 final Location aStartLocation, 177 final Location aEndLocation) 178 { 179 // System.out.printf("%s text \"%s\"\n", aContext.GetTypeName(), sText.replace("\n", "\\n")); 180 } 181 } 182 ); 183 } 184 185 186 187 GetInputStream(final String sInputName)188 private static InputStream GetInputStream (final String sInputName) 189 { 190 final InputStream aIn; 191 try 192 { 193 final int nSeparator = sInputName.indexOf('#'); 194 if (nSeparator >= 0) 195 { 196 // Split the input name into the file name of the archive and the 197 // name of a zip entry. 198 final String sArchiveName = sInputName.substring(0, nSeparator); 199 String sEntryName = sInputName.substring(nSeparator+1); 200 201 // Normalize and cleanup the entry name. 202 sEntryName = sEntryName.replace('\\', '/'); 203 if (sEntryName.startsWith("/")) 204 sEntryName = sEntryName.substring(1); 205 206 final ZipFile aZipFile = new ZipFile(new File(sArchiveName)); 207 final ZipEntry aZipEntry = aZipFile.getEntry(sEntryName); 208 aIn = aZipFile.getInputStream(aZipEntry); 209 } 210 else 211 { 212 // The input name points to a plain XML file. 213 aIn = new FileInputStream(sInputName); 214 } 215 } 216 catch (final Exception aException) 217 { 218 aException.printStackTrace(); 219 return null; 220 } 221 return aIn; 222 } 223 } 224