1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements.  See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership.  The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License.  You may obtain a copy of the License at
10 *
11 *   http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied.  See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21 
22 package org.apache.openoffice.ooxml.schema;
23 
24 import java.io.BufferedReader;
25 import java.io.File;
26 import java.io.FileReader;
27 import java.util.HashMap;
28 import java.util.HashSet;
29 import java.util.LinkedList;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Queue;
33 import java.util.Set;
34 import java.util.Vector;
35 
36 import javax.xml.stream.XMLStreamException;
37 
38 import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
39 import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
40 import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
41 import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
42 import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
43 import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator;
44 import org.apache.openoffice.ooxml.schema.model.schema.Schema;
45 import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
46 import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
47 import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer;
48 
49 public class SchemaReader
50 {
main(final String ... aArgumentList)51     public static void main (final String ... aArgumentList)
52     {
53         if (aArgumentList.length != 1)
54         {
55             System.err.printf("usage: SchemaParser <driver-file>\n");
56             System.err.printf(" driver file can contain these lines:\n");
57             System.err.printf("# Comments\n");
58             System.err.printf("    are ignored\n");
59             System.err.printf("schema <mark> <file-name>\n");
60             System.err.printf("    specifies a top-level schema file to read\n");
61             System.err.printf("output-schema <file-name>\n");
62             System.err.printf("    write schema information to file\n");
63             System.err.printf("output-optimized-schema <file-name>\n");
64             System.err.printf("    write information about optimized schema to file\n");
65             System.exit(1);
66         }
67 
68         final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0]));
69         aReader.Run();
70     }
71 
72 
73 
74 
SchemaReader(final File aDriverFile)75     private SchemaReader (final File aDriverFile)
76     {
77         maSchemaBase = new SchemaBase();
78         maTopLevelSchemas = new HashMap<>();
79         maMainSchemaFiles = new Vector<>();
80         maSchemaFiles = new HashSet<>();
81         maWorkList = new LinkedList<>();
82         maOutputOperations = new Vector<>();
83         mnTotalLineCount = 0;
84         mnTotalByteCount = 0;
85 
86         ParseDriverFile(aDriverFile);
87     }
88 
89 
90 
91 
92     /** Read and parse the driver file that specifies which schema files to read
93      *  and where the output should go.
94      */
ParseDriverFile(final File aDriverFile)95     private void ParseDriverFile (final File aDriverFile)
96     {
97         if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead())
98         {
99             System.err.printf("can not read driver file\n");
100             System.exit(1);
101         }
102 
103         try
104         {
105             final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
106             while(true)
107             {
108                 String sLine = aIn.readLine();
109                 if (sLine == null)
110                     break;
111                 // Lines starting with # are comment lines and are ignored.
112                 if (sLine.matches("^\\s*#.*"))
113                     continue;
114                 // Lines containing only whitespace are also ignored.
115                 else if (sLine.matches("^\\s*$"))
116                     continue;
117 
118                 // Handle line continuation.
119                 while (sLine.endsWith("\\"))
120                     sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
121 
122                 final Vector<String> aParts = SplitLine(sLine);
123                 switch (aParts.get(0))
124                 {
125                 	case "schema":
126                 		maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
127                 		break;
128 
129                 	case "output-schema":
130                         maOutputOperations.add(new Runnable()
131                         {
132                             final File maFile = CreateCheckedOutputFile(aParts.get(1));
133                             @Override public void run()
134                             {
135                                 WriteSchema(maFile);
136                             }
137                         });
138                         break;
139 
140                     case "output-optimized-schema":
141                         maOutputOperations.add(new Runnable()
142                         {
143                             final File maFile = CreateCheckedOutputFile(aParts.get(1));
144                             @Override public void run()
145                             {
146                                 WriteOptimizedSchema(maFile);
147                             }
148                         });
149                         break;
150 
151                     case "output-nonvalidating-parse-tables":
152                         maOutputOperations.add(new Runnable()
153                         {
154                             final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
155                             final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
156                             final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
157                             @Override public void run()
158                             {
159                                 WriteNonValidatingParseTables(
160                                     aAutomatonLogFile,
161                                     aSimpleTypeLogFile,
162                                     aParseTableFile);
163                             }
164                         });
165                         break;
166 
167                     case "output-validating-parse-tables":
168                         maOutputOperations.add(new Runnable()
169                         {
170                             final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
171                             final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
172                             final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
173                             @Override public void run()
174                             {
175                                 WriteValidatingParseTables(
176                                     aAutomatonLogFile,
177                                     aSimpleTypeLogFile,
178                                     aParseTableFile);
179                             }
180                         });
181                         break;
182 
183                     case "output-html-page":
184                         maOutputOperations.add(new Runnable()
185                         {
186                             final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1));
187                             @Override public void run()
188                             {
189                                 WriteHTMLPage(aHTMLPageFile);
190                             }
191                         });
192                         break;
193 
194                     default:
195                         System.err.printf("unknown command '%s' in driver file", aParts.get(0));
196                         System.exit(1);
197                 }
198             }
199             aIn.close();
200         }
201         catch (final Exception aException)
202         {
203             aException.printStackTrace();
204         }
205     }
206 
207 
208 
209 
Run()210     private void Run ()
211     {
212         try
213         {
214             ParseSchemaFiles();
215         }
216         catch (final Exception aException)
217         {
218             aException.printStackTrace();
219         }
220 
221         maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
222         for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
223             aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
224 
225         System.out.printf("    optimization left %d complex types and %d simple types\n",
226             maOptimizedSchemaBase.ComplexTypes.GetCount(),
227             maOptimizedSchemaBase.SimpleTypes.GetCount());
228 
229         for (final Runnable aOperation : maOutputOperations)
230         {
231             aOperation.run();
232         }
233     }
234 
235 
236 
237 
ParseSchemaFiles()238     private void ParseSchemaFiles ()
239         throws XMLStreamException
240     {
241         System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
242 
243         for (final String[] aEntry : maMainSchemaFiles)
244         {
245             final String sMainSchemaShortname = aEntry[0];
246             final String sMainSchemaFile = aEntry[1];
247             final File aMainSchemaFile = new File(sMainSchemaFile);
248             if ( ! aMainSchemaFile.exists())
249             {
250                 System.err.printf("    schema file does not exist\n");
251                 System.exit(1);
252             }
253             if ( ! aMainSchemaFile.canRead())
254             {
255                 System.err.printf("can not read schema file\n");
256                 System.exit(1);
257             }
258 
259             final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
260             ParseSchemaFile(sMainSchemaFile, aSchema);
261             maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
262         }
263 
264         long nStartTime = System.currentTimeMillis();
265         while ( ! maWorkList.isEmpty())
266         {
267             ParseSchemaFile(maWorkList.poll(), null);
268         }
269         long nEndTime = System.currentTimeMillis();
270 
271         System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
272             maSchemaFiles.size(),
273             mnTotalLineCount,
274             mnTotalByteCount,
275             (nEndTime-nStartTime)/1000.0);
276         System.out.printf("    found %d complex types and %d simple types\n",
277             maSchemaBase.ComplexTypes.GetCount(),
278             maSchemaBase.SimpleTypes.GetCount());
279 
280         int nTopLevelElementCount = 0;
281         for (final Schema aSchema : maTopLevelSchemas.values())
282         	nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
283         System.out.printf("    the %d top level schemas have %d elements\n",
284         		maTopLevelSchemas.size(),
285         		nTopLevelElementCount);
286     }
287 
288 
289 
290 
ParseSchemaFile( final String sSchemaFilename, final Schema aSchema)291     private void ParseSchemaFile (
292     		final String sSchemaFilename,
293     		final Schema aSchema)
294     				throws XMLStreamException
295     {
296         System.out.printf("parsing %s\n", sSchemaFilename);
297         maSchemaFiles.add(sSchemaFilename);
298 
299         final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
300         aParser.Parse();
301 
302         mnTotalLineCount += aParser.GetLineCount();
303         mnTotalByteCount += aParser.GetByteCount();
304         for (final File aFile : aParser.GetImportedSchemaFilenames())
305             AddSchemaReference(aFile.getAbsolutePath());
306     }
307 
308 
309 
310 
AddSchemaReference(final String sSchemaFilename)311     private void AddSchemaReference (final String sSchemaFilename)
312     {
313         if ( ! maSchemaFiles.contains(sSchemaFilename))
314         {
315             if (sSchemaFilename == null)
316                 throw new RuntimeException();
317 
318             // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
319             maSchemaFiles.add(sSchemaFilename);
320             maWorkList.add(sSchemaFilename);
321         }
322     }
323 
324 
325 
326 
327     /** Split the given string at whitespace but not at whitespace inside double quotes.
328      *
329      */
SplitLine(final String sLine)330     private Vector<String> SplitLine (final String sLine)
331     {
332     	final Vector<String> aParts = new Vector<>();
333 
334     	boolean bIsInsideQuotes = false;
335     	for (final String sPart : sLine.split("\""))
336     	{
337     		if (bIsInsideQuotes)
338     			aParts.add(sPart);
339     		else
340     	    	for (final String sInnerPart : sPart.split("\\s+"))
341     	    	{
342     	    		if (sInnerPart == null)
343     	    			throw new RuntimeException();
344     	    		else if ( ! sInnerPart.isEmpty())
345     	    			aParts.add(sInnerPart);
346     	    	}
347 
348     		bIsInsideQuotes = ! bIsInsideQuotes;
349     	}
350 
351     	return aParts;
352     }
353 
354 
355 
356 
357     /** Create a File object for a given file name.
358      *  Check that the file is writable, i.e. its directory exists and that if
359      *  the file already exists it can be replaced.
360      *  Throws a RuntimeException when a check fails.
361      */
CreateCheckedOutputFile(final String sFilename)362     private File CreateCheckedOutputFile (final String sFilename)
363     {
364         final File aFile = new File(sFilename);
365         if ( ! aFile.getParentFile().exists())
366             throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
367         if (aFile.exists() && ! aFile.canWrite())
368             throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
369         return aFile;
370     }
371 
372 
373 
374 
WriteSchema(final File aOutputFile)375     private void WriteSchema (final File aOutputFile)
376     {
377         LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
378     }
379 
380 
381 
382 
WriteOptimizedSchema(final File aOutputFile)383     private void WriteOptimizedSchema (final File aOutputFile)
384     {
385         LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
386     }
387 
388 
389 
390 
WriteNonValidatingParseTables( final File aAutomatonLogFile, final File aSimpleTypeLogFile, final File aParseTableFile)391     private void WriteNonValidatingParseTables (
392         final File aAutomatonLogFile,
393         final File aSimpleTypeLogFile,
394         final File aParseTableFile)
395     {
396         long nStartTime = System.currentTimeMillis();
397         final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
398         FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
399         long nEndTime = System.currentTimeMillis();
400         System.out.printf(
401             "created %d non-validating automatons with %d states and %d transitions in %fs\n",
402             aAutomatons.GetAutomatonCount(),
403             aAutomatons.GetStateCount(),
404             aAutomatons.GetTransitionCount(),
405             (nEndTime-nStartTime)/1000.0);
406 
407         nStartTime = System.currentTimeMillis();
408         final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
409             maOptimizedSchemaBase,
410             aSimpleTypeLogFile);
411         nEndTime = System.currentTimeMillis();
412         System.out.printf(
413             "created %d simple type descriptions in %fs\n",
414             aSimpleTypes.GetSimpleTypeCount(),
415             (nEndTime-nStartTime)/1000.0);
416 
417         new ParserTablesGenerator(
418             aAutomatons,
419             maOptimizedSchemaBase.Namespaces,
420             aSimpleTypes,
421             maOptimizedSchemaBase.AttributeValueToIdMap)
422             .Generate(aParseTableFile);
423     }
424 
425 
426 
427 
WriteValidatingParseTables( final File aAutomatonLogFile, final File aSimpleTypeLogFile, final File aParseTableFile)428     private void WriteValidatingParseTables (
429         final File aAutomatonLogFile,
430         final File aSimpleTypeLogFile,
431         final File aParseTableFile)
432     {
433         long nStartTime = System.currentTimeMillis();
434         final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
435         FiniteAutomatonContainer aAutomatons = aCreator.Create();
436         long nEndTime = System.currentTimeMillis();
437         System.out.printf(
438             "created %d validating stack automatons with %d states and %d transitions in %fs\n",
439             aAutomatons.GetAutomatonCount(),
440             aAutomatons.GetStateCount(),
441             aAutomatons.GetTransitionCount(),
442             (nEndTime-nStartTime)/1000.0);
443 
444 
445         nStartTime = System.currentTimeMillis();
446         aAutomatons = aAutomatons.CreateDFAs();
447         nEndTime = System.currentTimeMillis();
448         System.out.printf(
449             "created %d deterministic automatons with %d states and %d transitions in %fs\n",
450             aAutomatons.GetAutomatonCount(),
451             aAutomatons.GetStateCount(),
452             aAutomatons.GetTransitionCount(),
453             (nEndTime-nStartTime)/1000.0);
454 
455         nStartTime = System.currentTimeMillis();
456         aAutomatons = aAutomatons.MinimizeDFAs();
457         nEndTime = System.currentTimeMillis();
458         System.out.printf(
459             "minimized automaton in %fs, there are now %d states and %d transitions\n",
460             (nEndTime-nStartTime)/1000.0,
461             aAutomatons.GetStateCount(),
462             aAutomatons.GetTransitionCount());
463 
464         nStartTime = System.currentTimeMillis();
465         final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
466             maOptimizedSchemaBase,
467             aSimpleTypeLogFile);
468         nEndTime = System.currentTimeMillis();
469         System.out.printf(
470             "created %d simple type descriptions in %fs\n",
471             aSimpleTypes.GetSimpleTypeCount(),
472             (nEndTime-nStartTime)/1000.0);
473 
474         new ParserTablesGenerator(
475             aAutomatons,
476             maOptimizedSchemaBase.Namespaces,
477             aSimpleTypes,
478             maOptimizedSchemaBase.AttributeValueToIdMap)
479             .Generate(aParseTableFile);
480     }
481 
482 
483 
484 
WriteHTMLPage( final File aHTMLPageFile)485     private void WriteHTMLPage (
486         final File aHTMLPageFile)
487     {
488         long nStartTime = System.currentTimeMillis();
489 
490         new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate();
491 
492         long nEndTime = System.currentTimeMillis();
493         System.out.printf(
494             "created HTML page in %fs\n",
495             (nEndTime-nStartTime)/1000.0);
496     }
497 
498 
499 
500 
501     private final SchemaBase maSchemaBase;
502     private SchemaBase maOptimizedSchemaBase;
503     private final Map<String,Schema> maTopLevelSchemas;
504     private final Vector<String[]> maMainSchemaFiles;
505     private final Queue<String> maWorkList;
506     private final Vector<Runnable> maOutputOperations;
507     private final Set<String> maSchemaFiles;
508     private int mnTotalLineCount;
509     private int mnTotalByteCount;
510 }
511