1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 /*
24  * XLIFFReader.java
25  *
26  *
27  */
28 package com.sun.star.tooling.converter;
29 
30 import java.io.IOException;
31 import java.util.Hashtable;
32 import java.util.Map;
33 
34 import com.sun.star.tooling.languageResolver.LanguageResolver;
35 
36 import org.xml.sax.Attributes;
37 import org.xml.sax.SAXException;
38 import org.xml.sax.SAXParseException;
39 import org.xml.sax.helpers.AttributesImpl;
40 import org.xml.sax.helpers.DefaultHandler;
41 
42 
43 /**
44  * Parse the given file and extract the content needed.
45  * <br/>
46  * This Reader understands the parts of the
47  * <a href="http://www.oasis-open.org/committees/xliff/documents/cs-xliff-core-1.1-20031031.htm">xliff</a> spezification used to translate
48  *  the strings in Star-Office and Open-Office.
49  *  <br/>
50  *  The given file is parsed and the content is stored in a HashMap with those keys:
51  *  <br/>
52  *  "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/>
53  *   "Project"  first column in sdf file format.<br/>
54  *  "SourceFile" second column in sdf file format.<br/>
55  *  "Dummy" third column in sdf file format.<br/>
56  *  "ResType" 4. column in sdf file format.<br/>
57  *  "GID" 5. column in sdf file format. <br/>
58  *  "LID" 6. column in sdf file format.<br/>
59  *  "HID" 7. column in sdf file format.<br/>
60  *  "Platform" 8. column in sdf file format. <br/>
61  *  "Width", 9. column in sdf file format.<br/>
62  *  "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/>
63  *  "SourceText"  11. column in sdf file format(in the line with the source language).<br/>
64  *  "SourceHText" 12. column in sdf file format(in the line with the source language).<br/>
65  *  "SourceQText" 13. column in sdf file format(in the line with the source language).<br/>
66  *  "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/>
67  *  "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/>
68  *  "TargetText" 11. column in sdf file format (in the line with the target language).<br/>
69  *  "TargetHText" 12. column in sdf file format (in the line with the target language).<br/>
70  *  "TargetQText"  13. column in sdf file format (in the line with the target language).<br/>
71  *  "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/>
72  *  "TimeStamp" 15. column in sdf file format.<br/>
73  * @
74  * @author Christian Schmidt 2005
75  *
76  */
77 public class XLIFFReader extends DefaultHandler {
78 
79     /**
80      * A String array holding the keys used by the HashMap holding the Data
81      */
82     private final String[]      dataNames     = { "BlockNr", "Project",
83             "SourceFile", "Dummy", "ResType", "GID", "LID", "HID", "Platform",
84             "Width", "SourceLanguageID", "SourceText", "SourceHText",
85             "SourceQText", "SourceTitle", "TargetLanguageID", "TargetText",
86             "TargetHText", "TargetQText", "TargetTitle", "TimeStamp" };
87 
88     /**
89      * Used to index in the data array
90      */
91     static int                  index         = 0;
92 
93     /**
94      * The Map that holds the data returned by this class
95      */
96     private Map                 moveData      = new ExtMap();
97 
98     /**
99      * A Map that holds  yet incomplete data
100      * until all depending transunits are found
101      */
102     private Hashtable           DataStore     = new Hashtable();
103 
104     /**
105      * An Elements name
106      */
107     private String              name          = new String("");
108 
109     /**
110      *List of Attributes used by an Element
111      */
112     private Attributes          attrs;
113 
114 //    private String              tagElement    = new String("");
115 
116     /**
117      * Indicates whether the next found content string should be printed
118      */
119     private boolean             printThis     = false;
120     /**
121      * Indicates whether the next found content string should be stored
122      */
123     private boolean             storeIt       = false;
124 
125     /**
126      * data holds the information created while parsing
127      *
128      */
129     private String[]            data          = new String[26];
130 
131     /**
132      * The handler used by this class
133      */
134     private final DataHandler   handler;
135     /**
136      * The target used by this class
137      */
138     private final DataWriter    target;
139 
140 //    private boolean             searchForText = false;
141 
142     /**
143      * counts how many dots are made
144      */
145     private int                 dotCount;
146 
147     /**
148      * Counts how many Trans Units are read
149      */
150     private int                 transUnitCounter;
151 
152     /**
153      * used source Language
154      */
155     private String              sourceLanguage;
156 
157     /**
158      * used target language
159      */
160     private String              targetLanguage;
161 
162     /**
163      * indicates whether this is the first Transunit
164      */
165     private boolean             isFirst       = true;
166 
167     private static final String EMPTY         = new String("");
168 
169     /**
170      * the last index in data where something is written
171      */
172     private int                 oldindex;
173 
174 //    private boolean isBptEptTag;
175 
176 //    private String innerString;
177 //
178 //    private String key;
179 
180     /**
181      * Index for the BlockNr in the data array
182      */
183     private static final int BLOCKNR_IDX = 0;
184     /**
185      * Index for the Project in the data array
186      */
187     private static final int PROJECT_IDX = 1;
188     /**
189      * Index for the Sourcefile name in the data array
190      */
191     private static final int SOURCEFILE_IDX = 2;
192     /**
193      * Index for the 'dummy' in the data array
194      */
195     private static final int DUMMY_IDX = 3;
196     /**
197      * Index for the Group Id in the data array
198      */
199     private static final int GID_IDX = 4;
200     /**
201      * Index for the Local Id in the data array
202      */
203     private static final int LID_IDX = 5;
204     /**
205      * Index for the Help Id in the data array
206      */
207     private static final int HID_IDX = 6;
208     /**
209      * Index for the Platform in the data array
210      */
211     private static final int PLATFORM_IDX = 7;
212     /**
213      * Index for the 'Width' in the data array
214      */
215     private static final int WIDTH_IDX = 8;
216     /**
217      * Index for the Sourcelanguage Id in the data array
218      */
219     private static final int SOURCE_LANGUAGE_ID_IDX = 10;
220     /**
221      * Index for the Source Text in the data array
222      */
223     private static final int SOURCE_TEXT_IDX = 11;
224     /**
225      * Index for the Source Helptext in the data array
226      */
227     private static final int SOURCE_HELPTEXT_IDX = 12;
228     /**
229      * Index for the Source Quickhelp Text in the data array
230      */
231     private static final int SOURCE_QUICK_HELPTEXT_IDX = 13;
232     /**
233      * Index for the Source Titletext in the data array
234      */
235     private static final int SOURCE_TITLETEXT_IDX = 14;
236     /**
237      * Index for the Timestamp in the data array
238      */
239     private static final int TIMESTAMP_IDX = 15;
240     /**
241      * Index for the res type in the data array
242      */
243     private static final int RESTYPE_IDX = 16;
244     /**
245      * Index for the Target Language Id in the data array
246      */
247     private static final int TARGET_LANGUAGE_ID_IDX = 20;
248     /**
249      * Index for the Target Text in the data array
250      */
251     private static final int TARGET_TEXT_IDX = 21;
252     /**
253      * Index for the Target Helptext in the data array
254      */
255     private static final int TARGET_HELP_TEXT_IDX = 22;
256     /**
257      * Index for the Target Quickhelp Text in the data array
258      */
259     private static final int TARGET_QUICKHELP_TEXT_IDX = 23;
260     /**
261      * Index for the Target Titletext in the data array
262      */
263     private static final int TARGET_TITLE_TEXT_IDX = 24;
264     /**
265      * Index for the Found Parts Counter in the data array
266      */
267     private static final int FOUND_PARTS_COUNTER_IDX = 18;
268 
269     /**
270      * used to find the matching ISO or RFC3066 language code
271      */
272     LanguageResolver languageResolver;
273 
274     private boolean doBlockCompleteCheck=true;
275 
276 
277 
278     /**
279      * Create a new Instance of XLIFFReader
280      *
281      * @param handler the DataHandler to use
282      * @param target the target used
283      * @throws IOException
284      */
XLIFFReader(DataHandler handler, DataWriter target)285     public XLIFFReader(DataHandler handler, DataWriter target) throws IOException {
286         this.languageResolver = new LanguageResolver();
287         this.handler = handler;
288         this.target = target;
289     }
290 
291     /**
292      * Create a new Instance of XLIFFReader
293      *
294      * @param handler the DataHandler to use
295      * @param target the target used
296      * @param doBlockCompleteCheck indicates whether every single transunit should be returned or the whole block data is to be collected
297      *
298      * @throws IOException
299      */
XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck)300     public XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck) throws IOException {
301         this(handler, target);
302         this.languageResolver = new LanguageResolver();
303         this.doBlockCompleteCheck=doBlockCompleteCheck;
304 
305     }
306 
307     /**
308      * delete and initialize the data content
309      */
initData()310     public void initData() {
311         for (int i = BLOCKNR_IDX; i < SOURCE_LANGUAGE_ID_IDX; i++) {
312             data[i] = "";
313         }
314         for (int i = SOURCE_TEXT_IDX; i < TIMESTAMP_IDX; i++) { // skip Time Stamp
315             data[i] = "";
316         }
317         for (int i = RESTYPE_IDX; i < TARGET_LANGUAGE_ID_IDX; i++) { // skip Source language ID
318             data[i] = "";
319         }
320         for (int i = TARGET_TEXT_IDX; i < 26; i++) {// skip Target language ID,
321             data[i] = "";
322         }
323 
324         data[DUMMY_IDX] = "0";//dummy
325         data[FOUND_PARTS_COUNTER_IDX] = "1";//parts found
326 
327     }
328 
329     /** (non-Javadoc)
330      * @see org.xml.sax.ContentHandler#startDocument()
331      */
startDocument()332     public void startDocument() {
333         initData();
334         //System.out.print("Start");
335 
336     }
337 
338     /** (non-Javadoc)
339      * @see org.xml.sax.ContentHandler#endDocument()
340      */
endDocument()341     public void endDocument() {
342 
343         try {
344             showStatistic();
345         } catch (IOException e) {
346 
347             OutputHandler.log(e.getMessage());
348 
349         }
350     }
351 
352     /** (non-Javadoc)
353      * @throws SAXException
354      * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
355      */
startElement(String namespaceURI, String sName, String qName, Attributes attrs)356     public void startElement(String namespaceURI, String sName, String qName,
357             Attributes attrs) throws SAXException {
358         this.name = new String(qName);
359         this.attrs = new AttributesImpl(attrs);
360         String resType;
361 
362         String attributeName = new String("");
363         String attribute = new String("");
364         String tagElement = new String("");
365         int i;
366 
367         if (qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex")) {
368             //ignore bpt, ept, ex  and sub tags
369             // content of the tags will be stored
370 
371             storeIt=true;
372             return;
373 
374         }
375         if (qName.equals("target")) {
376             if ((resType = data[RESTYPE_IDX]) == null) {
377 
378             } else {
379                 if ("res".equals(resType)) {
380                     index = TARGET_TEXT_IDX;
381 
382                     storeIt = true;
383                     return;
384                 }
385                 //                if("res-Help".equals(resType)){
386                 //                    index=TARGET_HELP_TEXT_IDX;
387                 //                    storeIt=true;
388                 //                    return;
389                 //                }
390                 if ("res-QuickHelp".equals(resType)) {
391                     index = TARGET_QUICKHELP_TEXT_IDX;
392 
393                     storeIt = true;
394                     return;
395                 }
396                 if ("res-Title".equals(resType)) {
397                     index = TARGET_TITLE_TEXT_IDX;
398 
399                     storeIt = true;
400                     return;
401                 }
402             }
403 
404         }
405         if (qName.equals("source")) {
406             if ((resType = data[RESTYPE_IDX]) == null) {
407                 //throw new SAXException("Ressource type not found");
408             } else {
409                 if ("res".equals(resType)) {
410                     index = SOURCE_TEXT_IDX;
411 
412                     storeIt = true;
413                     return;
414                 }
415                 //                if("res-Help".equals(resType)){
416                 //                    index=SOURCEHELPTEXT_IDX;
417                 //                    storeIt=true;
418                 //                    return;
419                 //                }
420                 if ("res-QuickHelp".equals(resType)) {
421                     index = SOURCE_QUICK_HELPTEXT_IDX;
422                     storeIt = true;
423                     return;
424                 }
425                 if ("res-Title".equals(resType)) {
426                     index = SOURCE_TITLETEXT_IDX;
427                     storeIt = true;
428                     return;
429                 }
430             }
431         }
432 
433         if (qName.equals("file")) {
434             data[TIMESTAMP_IDX] = attrs.getValue("date");
435             //data[17]=(attrs.getValue("original"));
436             try{
437                 data[SOURCE_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("source-language")));
438                 if(languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))!=null){
439                     data[TARGET_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("target-language")));
440                 }
441             }catch(Exception e){
442                 OutputHandler.log(e.getMessage());
443             }
444             return;
445         }
446         if (qName.equals("trans-unit")) {
447             String id = attrs.getValue("id");
448             if ((DataStore.get(id)) != null) {
449                 //TODO arraycopy might not be nessessary
450                 System.arraycopy((String[]) DataStore.get(id), 0, data, 0,
451                         data.length);
452                 int help = (Integer.valueOf(data[FOUND_PARTS_COUNTER_IDX])).intValue(); //found one more part
453                 help++; // refresh the actual found parts
454                 data[FOUND_PARTS_COUNTER_IDX] = Integer.toString(help); // belonging to this information
455 
456                 DataStore.remove(attrs.getValue("id")); // TODO this can be deleted?
457             } else {
458 
459                 data[BLOCKNR_IDX] = (attrs.getValue("id")); // a new part
460             }
461             data[RESTYPE_IDX] = (attrs.getValue("restype"));
462 
463             return;
464         }
465 
466         if (qName.equals("context")) {
467 
468             String value = attrs.getValue("context-type");
469 
470             if ("SourceHelpText".equals(value)) {
471                 index = SOURCE_HELPTEXT_IDX;
472                 storeIt = true;
473                 return;
474             }else if ("TargetHelpText".equals(value)) {
475                 index = TARGET_HELP_TEXT_IDX;
476                 storeIt = true;
477                 return;
478             }else if ("DBType".equals(value)) {
479                 //index=SOURCEFILE_IDX;
480                 //storeIt=true;
481                 return;
482             }else if ("Project".equals(value)) {
483                 index = PROJECT_IDX;
484                 storeIt = true;
485                 return;
486             }else if ("Filename".equals(value)) {
487                 index = SOURCEFILE_IDX;
488                 storeIt = true;
489                 return;
490             }else if ("Type".equals(value)) {
491                 index = RESTYPE_IDX;
492                 storeIt = true;
493                 return;
494             }else if ("GID".equals(value)) {
495                 index = GID_IDX;
496                 storeIt = true;
497                 return;
498             }else if ("LID".equals(value)) {
499                 index = LID_IDX;
500                 storeIt = true;
501                 return;
502             }else if ("HID".equals(value)) {
503                 index = HID_IDX;
504                 storeIt = true;
505                 return;
506             }else if ("Platform".equals(value)) {
507                 index = PLATFORM_IDX;
508                 storeIt = true;
509                 return;
510             }else if ("Width".equals(value)) {
511                 index = WIDTH_IDX;
512                 storeIt = true;
513                 return;
514             }
515 
516         }
517 
518     }
519 
520     /** (non-Javadoc)
521      * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
522      */
endElement(String namespaceURI, String sName, String qName)523     public void endElement(String namespaceURI, String sName, String qName)
524             throws SAXException {
525         //we ignore bpt and ept tags
526         if(!(qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex"))){
527             storeIt = false;
528         }
529         if (qName.equals("trans-unit")) {
530             showData();
531         }
532 
533     }
534 
535     /** (non-Javadoc)
536      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
537      */
characters(char[] ch, int start, int length)538     public void characters(char[] ch, int start, int length) {
539 
540         // checkContent();
541         String str2 = new String(ch, start, length);
542 
543         if (storeIt) {
544 
545             String str = new String(ch, start, length);
546             if (index == oldindex) {
547                 data[index] += str;
548             } else {
549                 data[index] = str;
550             }
551 
552         }
553         oldindex = index;
554 
555     }
556 
557     /** (non-Javadoc)
558      * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
559      */
error(SAXParseException e)560     public void error(SAXParseException e) throws SAXParseException {
561 
562         OutputHandler.log(e.getMessage());
563     }
564 
565     /** (non-Javadoc)
566      * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
567      */
fatalError(SAXParseException e)568     public void fatalError(SAXParseException e) throws SAXParseException {
569 
570         OutputHandler.log("PARSE ERROR in line " + e.getLineNumber() + ", "
571                 + e.getMessage() );
572 
573     }
574 
575     /** (non-Javadoc)
576      * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
577      */
warning(SAXParseException e)578     public void warning(SAXParseException e) throws SAXParseException {
579         //throw e;
580         OutputHandler.log(e.getMessage());
581     }
582 
583     /**
584      * Put the Data to the DataHandler
585      * tell the Writer to write it
586      *
587      * @throws SAXException
588      */
showData()589     public void showData() throws SAXException {
590         transUnitCounter++;
591         makeDot();
592         if (isComplete()) {
593 
594             try {
595                 moveData();
596                 if (isFirst == true) {
597                     this.sourceLanguage = (String) this.moveData
598                             .get("SourceLanguageID");
599                     this.targetLanguage = (String) this.moveData
600                             .get("TargetLanguageID");
601                     OutputHandler.out(EMPTY);
602                     OutputHandler.out("Source Language is: "
603                             + this.sourceLanguage);
604                     OutputHandler.out("Target Language is: "
605                             + this.targetLanguage);
606                     OutputHandler.out(EMPTY);
607                     OutputHandler.out("Start");
608                     OutputHandler.out(EMPTY);
609                     isFirst = false;
610                 }
611                 target.getDataFrom(handler);
612                 target.writeData();
613 
614             } catch (java.io.IOException e) {
615                 throw new SAXException(e);
616             }
617 
618         } else {
619             DataStore.put(data[BLOCKNR_IDX], data.clone());
620             initData();
621 
622         }
623         initData();
624     }
625 
626 
627     /**
628      * put the data in an Map in the format that
629      * DataHandler can handle it
630      */
moveData()631     final public void moveData() {
632 
633         moveData.put("BlockNr", data[BLOCKNR_IDX]);
634 
635         moveData.put("Project", data[PROJECT_IDX]);
636 
637         moveData.put("SourceFile", data[SOURCEFILE_IDX]);
638 
639         moveData.put("Dummy", "0");
640 
641         moveData.put("ResType", data[RESTYPE_IDX]);
642 
643         moveData.put("GID", data[GID_IDX]);
644 
645         moveData.put("LID", data[LID_IDX]);
646 
647         moveData.put("HID", data[HID_IDX]);
648 
649         moveData.put("Platform", data[PLATFORM_IDX]);
650 
651         if (EMPTY.equals(data[WIDTH_IDX]))
652             data[WIDTH_IDX] = "0";
653         moveData.put("Width", data[WIDTH_IDX]);
654 
655         moveData.put("SourceLanguageID", data[SOURCE_LANGUAGE_ID_IDX]);
656 
657         moveData.put("SourceText", data[SOURCE_TEXT_IDX]);
658 
659         moveData.put("SourceHText", data[SOURCE_HELPTEXT_IDX]);
660 
661         moveData.put("SourceQText", data[SOURCE_QUICK_HELPTEXT_IDX]);
662 
663         moveData.put("SourceTitle", data[SOURCE_TITLETEXT_IDX]);
664 
665         moveData.put("TargetLanguageID", data[TARGET_LANGUAGE_ID_IDX]);
666 
667         moveData.put("TargetText", data[TARGET_TEXT_IDX]);
668 
669         moveData.put("TargetHText", data[TARGET_HELP_TEXT_IDX]);
670 
671         moveData.put("TargetQText", data[TARGET_QUICKHELP_TEXT_IDX]);
672 
673         moveData.put("TargetTitle", data[TARGET_TITLE_TEXT_IDX]);
674 
675         moveData.put("TimeStamp", data[TIMESTAMP_IDX]);
676 
677         //and give it to the data handler
678         this.handler.fillDataWith(moveData);
679     }
680 
681     /**
682      * complete means all depending parts have been found esp. all res types
683      * that belong to the same SDF Line
684      *
685      * @return true if the data is complete
686      *
687      */
isComplete()688     final public boolean isComplete() {
689 
690         if(!doBlockCompleteCheck){
691             return true;
692         }
693 
694         String sParts;
695         if (data[FOUND_PARTS_COUNTER_IDX] == EMPTY)
696             data[FOUND_PARTS_COUNTER_IDX] = "1"; //this is the first part
697 
698         String sFoundParts = data[FOUND_PARTS_COUNTER_IDX];
699         //create the new 'id'
700         sParts = data[BLOCKNR_IDX].substring(data[BLOCKNR_IDX].lastIndexOf(":") + 1);
701 
702         if (sFoundParts.equals(sParts)) {
703             return true;
704         }
705         return false;
706     }
707 
708     // TODO this belongs in OutputHandler
709     /**
710      * show the user that it is going
711      * on by printing dots on the screen
712      *
713      */
makeDot()714     private void makeDot() {
715         int count = 0;
716         if ((count = (int) this.transUnitCounter / 1000) > this.dotCount) {
717             this.dotCount = count;
718             OutputHandler.printDot();
719         }
720     }
721 
722     /**
723      * show the statistic data found while parse this file
724      *
725      * @throws IOException
726      */
showStatistic()727     final void showStatistic() throws IOException {
728         OutputHandler.out(EMPTY);
729         OutputHandler.out("TransUnits found:  " + this.transUnitCounter);
730         // every data in DataStore is
731         // skipped 'cause its not complete
732         // TODO count really every transunit not only the data (might consist of
733         // more than one
734         OutputHandler.dbg("TransUnits skip :  " + this.DataStore.size());
735         //Converter.out(EMPTY);
736     }
737 }
738 
739