1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 package org.apache.openoffice.ooxml.schema; 23 24 import java.io.BufferedReader; 25 import java.io.File; 26 import java.io.FileReader; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.LinkedList; 30 import java.util.Map; 31 import java.util.Map.Entry; 32 import java.util.Queue; 33 import java.util.Set; 34 import java.util.Vector; 35 36 import javax.xml.stream.XMLStreamException; 37 38 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer; 39 import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator; 40 import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator; 41 import org.apache.openoffice.ooxml.schema.generator.LogGenerator; 42 import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator; 43 import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator; 44 import org.apache.openoffice.ooxml.schema.model.schema.Schema; 45 import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase; 46 import org.apache.openoffice.ooxml.schema.parser.SchemaParser; 47 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer; 48 49 public class SchemaReader 50 { main(final String ... aArgumentList)51 public static void main (final String ... aArgumentList) 52 { 53 if (aArgumentList.length != 1) 54 { 55 System.err.printf("usage: SchemaParser <driver-file>\n"); 56 System.err.printf(" driver file can contain these lines:\n"); 57 System.err.printf("# Comments\n"); 58 System.err.printf(" are ignored\n"); 59 System.err.printf("schema <mark> <file-name>\n"); 60 System.err.printf(" specifies a top-level schema file to read\n"); 61 System.err.printf("output-schema <file-name>\n"); 62 System.err.printf(" write schema information to file\n"); 63 System.err.printf("output-optimized-schema <file-name>\n"); 64 System.err.printf(" write information about optimized schema to file\n"); 65 System.exit(1); 66 } 67 68 final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0])); 69 aReader.Run(); 70 } 71 72 73 74 SchemaReader(final File aDriverFile)75 private SchemaReader (final File aDriverFile) 76 { 77 maSchemaBase = new SchemaBase(); 78 maTopLevelSchemas = new HashMap<>(); 79 maMainSchemaFiles = new Vector<>(); 80 maSchemaFiles = new HashSet<>(); 81 maWorkList = new LinkedList<>(); 82 maOutputOperations = new Vector<>(); 83 mnTotalLineCount = 0; 84 mnTotalByteCount = 0; 85 86 ParseDriverFile(aDriverFile); 87 } 88 89 90 91 92 /** Read and parse the driver file that specifies which schema files to read 93 * and where the output should go. 94 */ ParseDriverFile(final File aDriverFile)95 private void ParseDriverFile (final File aDriverFile) 96 { 97 if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead()) 98 { 99 System.err.printf("can not read driver file\n"); 100 System.exit(1); 101 } 102 103 try 104 { 105 final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile)); 106 while(true) 107 { 108 String sLine = aIn.readLine(); 109 if (sLine == null) 110 break; 111 // Lines starting with # are comment lines and are ignored. 112 if (sLine.matches("^\\s*#.*")) 113 continue; 114 // Lines containing only whitespace are also ignored. 115 else if (sLine.matches("^\\s*$")) 116 continue; 117 118 // Handle line continuation. 119 while (sLine.endsWith("\\")) 120 sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine(); 121 122 final Vector<String> aParts = SplitLine(sLine); 123 switch (aParts.get(0)) 124 { 125 case "schema": 126 maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)}); 127 break; 128 129 case "output-schema": 130 maOutputOperations.add(new Runnable() 131 { 132 final File maFile = CreateCheckedOutputFile(aParts.get(1)); 133 @Override public void run() 134 { 135 WriteSchema(maFile); 136 } 137 }); 138 break; 139 140 case "output-optimized-schema": 141 maOutputOperations.add(new Runnable() 142 { 143 final File maFile = CreateCheckedOutputFile(aParts.get(1)); 144 @Override public void run() 145 { 146 WriteOptimizedSchema(maFile); 147 } 148 }); 149 break; 150 151 case "output-nonvalidating-parse-tables": 152 maOutputOperations.add(new Runnable() 153 { 154 final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1)); 155 final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2)); 156 final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3)); 157 @Override public void run() 158 { 159 WriteNonValidatingParseTables( 160 aAutomatonLogFile, 161 aSimpleTypeLogFile, 162 aParseTableFile); 163 } 164 }); 165 break; 166 167 case "output-validating-parse-tables": 168 maOutputOperations.add(new Runnable() 169 { 170 final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1)); 171 final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2)); 172 final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3)); 173 @Override public void run() 174 { 175 WriteValidatingParseTables( 176 aAutomatonLogFile, 177 aSimpleTypeLogFile, 178 aParseTableFile); 179 } 180 }); 181 break; 182 183 case "output-html-page": 184 maOutputOperations.add(new Runnable() 185 { 186 final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1)); 187 @Override public void run() 188 { 189 WriteHTMLPage(aHTMLPageFile); 190 } 191 }); 192 break; 193 194 default: 195 System.err.printf("unknown command '%s' in driver file", aParts.get(0)); 196 System.exit(1); 197 } 198 } 199 aIn.close(); 200 } 201 catch (final Exception aException) 202 { 203 aException.printStackTrace(); 204 } 205 } 206 207 208 209 Run()210 private void Run () 211 { 212 try 213 { 214 ParseSchemaFiles(); 215 } 216 catch (final Exception aException) 217 { 218 aException.printStackTrace(); 219 } 220 221 maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values()); 222 for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet()) 223 aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase)); 224 225 System.out.printf(" optimization left %d complex types and %d simple types\n", 226 maOptimizedSchemaBase.ComplexTypes.GetCount(), 227 maOptimizedSchemaBase.SimpleTypes.GetCount()); 228 229 for (final Runnable aOperation : maOutputOperations) 230 { 231 aOperation.run(); 232 } 233 } 234 235 236 237 ParseSchemaFiles()238 private void ParseSchemaFiles () 239 throws XMLStreamException 240 { 241 System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size()); 242 243 for (final String[] aEntry : maMainSchemaFiles) 244 { 245 final String sMainSchemaShortname = aEntry[0]; 246 final String sMainSchemaFile = aEntry[1]; 247 final File aMainSchemaFile = new File(sMainSchemaFile); 248 if ( ! aMainSchemaFile.exists()) 249 { 250 System.err.printf(" schema file does not exist\n"); 251 System.exit(1); 252 } 253 if ( ! aMainSchemaFile.canRead()) 254 { 255 System.err.printf("can not read schema file\n"); 256 System.exit(1); 257 } 258 259 final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase); 260 ParseSchemaFile(sMainSchemaFile, aSchema); 261 maTopLevelSchemas.put(sMainSchemaShortname, aSchema); 262 } 263 264 long nStartTime = System.currentTimeMillis(); 265 while ( ! maWorkList.isEmpty()) 266 { 267 ParseSchemaFile(maWorkList.poll(), null); 268 } 269 long nEndTime = System.currentTimeMillis(); 270 271 System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n", 272 maSchemaFiles.size(), 273 mnTotalLineCount, 274 mnTotalByteCount, 275 (nEndTime-nStartTime)/1000.0); 276 System.out.printf(" found %d complex types and %d simple types\n", 277 maSchemaBase.ComplexTypes.GetCount(), 278 maSchemaBase.SimpleTypes.GetCount()); 279 280 int nTopLevelElementCount = 0; 281 for (final Schema aSchema : maTopLevelSchemas.values()) 282 nTopLevelElementCount += aSchema.TopLevelElements.GetCount(); 283 System.out.printf(" the %d top level schemas have %d elements\n", 284 maTopLevelSchemas.size(), 285 nTopLevelElementCount); 286 } 287 288 289 290 ParseSchemaFile( final String sSchemaFilename, final Schema aSchema)291 private void ParseSchemaFile ( 292 final String sSchemaFilename, 293 final Schema aSchema) 294 throws XMLStreamException 295 { 296 System.out.printf("parsing %s\n", sSchemaFilename); 297 maSchemaFiles.add(sSchemaFilename); 298 299 final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase); 300 aParser.Parse(); 301 302 mnTotalLineCount += aParser.GetLineCount(); 303 mnTotalByteCount += aParser.GetByteCount(); 304 for (final File aFile : aParser.GetImportedSchemaFilenames()) 305 AddSchemaReference(aFile.getAbsolutePath()); 306 } 307 308 309 310 AddSchemaReference(final String sSchemaFilename)311 private void AddSchemaReference (final String sSchemaFilename) 312 { 313 if ( ! maSchemaFiles.contains(sSchemaFilename)) 314 { 315 if (sSchemaFilename == null) 316 throw new RuntimeException(); 317 318 // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'. 319 maSchemaFiles.add(sSchemaFilename); 320 maWorkList.add(sSchemaFilename); 321 } 322 } 323 324 325 326 327 /** Split the given string at whitespace but not at whitespace inside double quotes. 328 * 329 */ SplitLine(final String sLine)330 private Vector<String> SplitLine (final String sLine) 331 { 332 final Vector<String> aParts = new Vector<>(); 333 334 boolean bIsInsideQuotes = false; 335 for (final String sPart : sLine.split("\"")) 336 { 337 if (bIsInsideQuotes) 338 aParts.add(sPart); 339 else 340 for (final String sInnerPart : sPart.split("\\s+")) 341 { 342 if (sInnerPart == null) 343 throw new RuntimeException(); 344 else if ( ! sInnerPart.isEmpty()) 345 aParts.add(sInnerPart); 346 } 347 348 bIsInsideQuotes = ! bIsInsideQuotes; 349 } 350 351 return aParts; 352 } 353 354 355 356 357 /** Create a File object for a given file name. 358 * Check that the file is writable, i.e. its directory exists and that if 359 * the file already exists it can be replaced. 360 * Throws a RuntimeException when a check fails. 361 */ CreateCheckedOutputFile(final String sFilename)362 private File CreateCheckedOutputFile (final String sFilename) 363 { 364 final File aFile = new File(sFilename); 365 if ( ! aFile.getParentFile().exists()) 366 throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file"); 367 if (aFile.exists() && ! aFile.canWrite()) 368 throw new RuntimeException("file "+sFilename+" already exists and can not be replaced"); 369 return aFile; 370 } 371 372 373 374 WriteSchema(final File aOutputFile)375 private void WriteSchema (final File aOutputFile) 376 { 377 LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values()); 378 } 379 380 381 382 WriteOptimizedSchema(final File aOutputFile)383 private void WriteOptimizedSchema (final File aOutputFile) 384 { 385 LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values()); 386 } 387 388 389 390 WriteNonValidatingParseTables( final File aAutomatonLogFile, final File aSimpleTypeLogFile, final File aParseTableFile)391 private void WriteNonValidatingParseTables ( 392 final File aAutomatonLogFile, 393 final File aSimpleTypeLogFile, 394 final File aParseTableFile) 395 { 396 long nStartTime = System.currentTimeMillis(); 397 final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile); 398 FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values()); 399 long nEndTime = System.currentTimeMillis(); 400 System.out.printf( 401 "created %d non-validating automatons with %d states and %d transitions in %fs\n", 402 aAutomatons.GetAutomatonCount(), 403 aAutomatons.GetStateCount(), 404 aAutomatons.GetTransitionCount(), 405 (nEndTime-nStartTime)/1000.0); 406 407 nStartTime = System.currentTimeMillis(); 408 final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create( 409 maOptimizedSchemaBase, 410 aSimpleTypeLogFile); 411 nEndTime = System.currentTimeMillis(); 412 System.out.printf( 413 "created %d simple type descriptions in %fs\n", 414 aSimpleTypes.GetSimpleTypeCount(), 415 (nEndTime-nStartTime)/1000.0); 416 417 new ParserTablesGenerator( 418 aAutomatons, 419 maOptimizedSchemaBase.Namespaces, 420 aSimpleTypes, 421 maOptimizedSchemaBase.AttributeValueToIdMap) 422 .Generate(aParseTableFile); 423 } 424 425 426 427 WriteValidatingParseTables( final File aAutomatonLogFile, final File aSimpleTypeLogFile, final File aParseTableFile)428 private void WriteValidatingParseTables ( 429 final File aAutomatonLogFile, 430 final File aSimpleTypeLogFile, 431 final File aParseTableFile) 432 { 433 long nStartTime = System.currentTimeMillis(); 434 final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile); 435 FiniteAutomatonContainer aAutomatons = aCreator.Create(); 436 long nEndTime = System.currentTimeMillis(); 437 System.out.printf( 438 "created %d validating stack automatons with %d states and %d transitions in %fs\n", 439 aAutomatons.GetAutomatonCount(), 440 aAutomatons.GetStateCount(), 441 aAutomatons.GetTransitionCount(), 442 (nEndTime-nStartTime)/1000.0); 443 444 445 nStartTime = System.currentTimeMillis(); 446 aAutomatons = aAutomatons.CreateDFAs(); 447 nEndTime = System.currentTimeMillis(); 448 System.out.printf( 449 "created %d deterministic automatons with %d states and %d transitions in %fs\n", 450 aAutomatons.GetAutomatonCount(), 451 aAutomatons.GetStateCount(), 452 aAutomatons.GetTransitionCount(), 453 (nEndTime-nStartTime)/1000.0); 454 455 nStartTime = System.currentTimeMillis(); 456 aAutomatons = aAutomatons.MinimizeDFAs(); 457 nEndTime = System.currentTimeMillis(); 458 System.out.printf( 459 "minimized automaton in %fs, there are now %d states and %d transitions\n", 460 (nEndTime-nStartTime)/1000.0, 461 aAutomatons.GetStateCount(), 462 aAutomatons.GetTransitionCount()); 463 464 nStartTime = System.currentTimeMillis(); 465 final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create( 466 maOptimizedSchemaBase, 467 aSimpleTypeLogFile); 468 nEndTime = System.currentTimeMillis(); 469 System.out.printf( 470 "created %d simple type descriptions in %fs\n", 471 aSimpleTypes.GetSimpleTypeCount(), 472 (nEndTime-nStartTime)/1000.0); 473 474 new ParserTablesGenerator( 475 aAutomatons, 476 maOptimizedSchemaBase.Namespaces, 477 aSimpleTypes, 478 maOptimizedSchemaBase.AttributeValueToIdMap) 479 .Generate(aParseTableFile); 480 } 481 482 483 484 WriteHTMLPage( final File aHTMLPageFile)485 private void WriteHTMLPage ( 486 final File aHTMLPageFile) 487 { 488 long nStartTime = System.currentTimeMillis(); 489 490 new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate(); 491 492 long nEndTime = System.currentTimeMillis(); 493 System.out.printf( 494 "created HTML page in %fs\n", 495 (nEndTime-nStartTime)/1000.0); 496 } 497 498 499 500 501 private final SchemaBase maSchemaBase; 502 private SchemaBase maOptimizedSchemaBase; 503 private final Map<String,Schema> maTopLevelSchemas; 504 private final Vector<String[]> maMainSchemaFiles; 505 private final Queue<String> maWorkList; 506 private final Vector<Runnable> maOutputOperations; 507 private final Set<String> maSchemaFiles; 508 private int mnTotalLineCount; 509 private int mnTotalByteCount; 510 } 511