/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ /* * XLIFFReader.java * * */ package com.sun.star.tooling.converter; import java.io.IOException; import java.util.Hashtable; import java.util.Map; import com.sun.star.tooling.languageResolver.LanguageResolver; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.AttributesImpl; import org.xml.sax.helpers.DefaultHandler; /** * Parse the given file and extract the content needed. *
* This Reader understands the parts of the * xliff spezification used to translate * the strings in Star-Office and Open-Office. *
* The given file is parsed and the content is stored in a HashMap with those keys: *
* "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.
* "Project" first column in sdf file format.
* "SourceFile" second column in sdf file format.
* "Dummy" third column in sdf file format.
* "ResType" 4. column in sdf file format.
* "GID" 5. column in sdf file format.
* "LID" 6. column in sdf file format.
* "HID" 7. column in sdf file format.
* "Platform" 8. column in sdf file format.
* "Width", 9. column in sdf file format.
* "SourceLanguageID" 10. column in sdf file format(in the line with the source language).
* "SourceText" 11. column in sdf file format(in the line with the source language).
* "SourceHText" 12. column in sdf file format(in the line with the source language).
* "SourceQText" 13. column in sdf file format(in the line with the source language).
* "SourceTitle" 14. column in sdf file format(in the line with the source language).
* "TargetLanguageID" 10. column in sdf file format (in the line with the target language).
* "TargetText" 11. column in sdf file format (in the line with the target language).
* "TargetHText" 12. column in sdf file format (in the line with the target language).
* "TargetQText" 13. column in sdf file format (in the line with the target language).
* "TargetTitle", 14. column in sdf file format (in the line with the target language).
* "TimeStamp" 15. column in sdf file format.
* @ * @author Christian Schmidt 2005 * */ public class XLIFFReader extends DefaultHandler { /** * A String array holding the keys used by the HashMap holding the Data */ private final String[] dataNames = { "BlockNr", "Project", "SourceFile", "Dummy", "ResType", "GID", "LID", "HID", "Platform", "Width", "SourceLanguageID", "SourceText", "SourceHText", "SourceQText", "SourceTitle", "TargetLanguageID", "TargetText", "TargetHText", "TargetQText", "TargetTitle", "TimeStamp" }; /** * Used to index in the data array */ static int index = 0; /** * The Map that holds the data returned by this class */ private Map moveData = new ExtMap(); /** * A Map that holds yet incomplete data * until all depending transunits are found */ private Hashtable DataStore = new Hashtable(); /** * An Elements name */ private String name = new String(""); /** *List of Attributes used by an Element */ private Attributes attrs; // private String tagElement = new String(""); /** * Indicates whether the next found content string should be printed */ private boolean printThis = false; /** * Indicates whether the next found content string should be stored */ private boolean storeIt = false; /** * data holds the information created while parsing * */ private String[] data = new String[26]; /** * The handler used by this class */ private final DataHandler handler; /** * The target used by this class */ private final DataWriter target; // private boolean searchForText = false; /** * counts how many dots are made */ private int dotCount; /** * Counts how many Trans Units are read */ private int transUnitCounter; /** * used source Language */ private String sourceLanguage; /** * used target language */ private String targetLanguage; /** * indicates whether this is the first Transunit */ private boolean isFirst = true; private static final String EMPTY = new String(""); /** * the last index in data where something is written */ private int oldindex; // private boolean isBptEptTag; // private String innerString; // // private String key; /** * Index for the BlockNr in the data array */ private static final int BLOCKNR_IDX = 0; /** * Index for the Project in the data array */ private static final int PROJECT_IDX = 1; /** * Index for the Sourcefile name in the data array */ private static final int SOURCEFILE_IDX = 2; /** * Index for the 'dummy' in the data array */ private static final int DUMMY_IDX = 3; /** * Index for the Group Id in the data array */ private static final int GID_IDX = 4; /** * Index for the Local Id in the data array */ private static final int LID_IDX = 5; /** * Index for the Help Id in the data array */ private static final int HID_IDX = 6; /** * Index for the Platform in the data array */ private static final int PLATFORM_IDX = 7; /** * Index for the 'Width' in the data array */ private static final int WIDTH_IDX = 8; /** * Index for the Sourcelanguage Id in the data array */ private static final int SOURCE_LANGUAGE_ID_IDX = 10; /** * Index for the Source Text in the data array */ private static final int SOURCE_TEXT_IDX = 11; /** * Index for the Source Helptext in the data array */ private static final int SOURCE_HELPTEXT_IDX = 12; /** * Index for the Source Quickhelp Text in the data array */ private static final int SOURCE_QUICK_HELPTEXT_IDX = 13; /** * Index for the Source Titletext in the data array */ private static final int SOURCE_TITLETEXT_IDX = 14; /** * Index for the Timestamp in the data array */ private static final int TIMESTAMP_IDX = 15; /** * Index for the res type in the data array */ private static final int RESTYPE_IDX = 16; /** * Index for the Target Language Id in the data array */ private static final int TARGET_LANGUAGE_ID_IDX = 20; /** * Index for the Target Text in the data array */ private static final int TARGET_TEXT_IDX = 21; /** * Index for the Target Helptext in the data array */ private static final int TARGET_HELP_TEXT_IDX = 22; /** * Index for the Target Quickhelp Text in the data array */ private static final int TARGET_QUICKHELP_TEXT_IDX = 23; /** * Index for the Target Titletext in the data array */ private static final int TARGET_TITLE_TEXT_IDX = 24; /** * Index for the Found Parts Counter in the data array */ private static final int FOUND_PARTS_COUNTER_IDX = 18; /** * used to find the matching ISO or RFC3066 language code */ LanguageResolver languageResolver; private boolean doBlockCompleteCheck=true; /** * Create a new Instance of XLIFFReader * * @param handler the DataHandler to use * @param target the target used * @throws IOException */ public XLIFFReader(DataHandler handler, DataWriter target) throws IOException { this.languageResolver = new LanguageResolver(); this.handler = handler; this.target = target; } /** * Create a new Instance of XLIFFReader * * @param handler the DataHandler to use * @param target the target used * @param doBlockCompleteCheck indicates whether every single transunit should be returned or the whole block data is to be collected * * @throws IOException */ public XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck) throws IOException { this(handler, target); this.languageResolver = new LanguageResolver(); this.doBlockCompleteCheck=doBlockCompleteCheck; } /** * delete and initialize the data content */ public void initData() { for (int i = BLOCKNR_IDX; i < SOURCE_LANGUAGE_ID_IDX; i++) { data[i] = ""; } for (int i = SOURCE_TEXT_IDX; i < TIMESTAMP_IDX; i++) { // skip Time Stamp data[i] = ""; } for (int i = RESTYPE_IDX; i < TARGET_LANGUAGE_ID_IDX; i++) { // skip Source language ID data[i] = ""; } for (int i = TARGET_TEXT_IDX; i < 26; i++) {// skip Target language ID, data[i] = ""; } data[DUMMY_IDX] = "0";//dummy data[FOUND_PARTS_COUNTER_IDX] = "1";//parts found } /** (non-Javadoc) * @see org.xml.sax.ContentHandler#startDocument() */ public void startDocument() { initData(); //System.out.print("Start"); } /** (non-Javadoc) * @see org.xml.sax.ContentHandler#endDocument() */ public void endDocument() { try { showStatistic(); } catch (IOException e) { OutputHandler.log(e.getMessage()); } } /** (non-Javadoc) * @throws SAXException * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String namespaceURI, String sName, String qName, Attributes attrs) throws SAXException { this.name = new String(qName); this.attrs = new AttributesImpl(attrs); String resType; String attributeName = new String(""); String attribute = new String(""); String tagElement = new String(""); int i; if (qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex")) { //ignore bpt, ept, ex and sub tags // content of the tags will be stored storeIt=true; return; } if (qName.equals("target")) { if ((resType = data[RESTYPE_IDX]) == null) { } else { if ("res".equals(resType)) { index = TARGET_TEXT_IDX; storeIt = true; return; } // if("res-Help".equals(resType)){ // index=TARGET_HELP_TEXT_IDX; // storeIt=true; // return; // } if ("res-QuickHelp".equals(resType)) { index = TARGET_QUICKHELP_TEXT_IDX; storeIt = true; return; } if ("res-Title".equals(resType)) { index = TARGET_TITLE_TEXT_IDX; storeIt = true; return; } } } if (qName.equals("source")) { if ((resType = data[RESTYPE_IDX]) == null) { //throw new SAXException("Ressource type not found"); } else { if ("res".equals(resType)) { index = SOURCE_TEXT_IDX; storeIt = true; return; } // if("res-Help".equals(resType)){ // index=SOURCEHELPTEXT_IDX; // storeIt=true; // return; // } if ("res-QuickHelp".equals(resType)) { index = SOURCE_QUICK_HELPTEXT_IDX; storeIt = true; return; } if ("res-Title".equals(resType)) { index = SOURCE_TITLETEXT_IDX; storeIt = true; return; } } } if (qName.equals("file")) { data[TIMESTAMP_IDX] = attrs.getValue("date"); //data[17]=(attrs.getValue("original")); try{ data[SOURCE_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("source-language"))); if(languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))!=null){ data[TARGET_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))); } }catch(Exception e){ OutputHandler.log(e.getMessage()); } return; } if (qName.equals("trans-unit")) { String id = attrs.getValue("id"); if ((DataStore.get(id)) != null) { //TODO arraycopy might not be nessessary System.arraycopy((String[]) DataStore.get(id), 0, data, 0, data.length); int help = (Integer.valueOf(data[FOUND_PARTS_COUNTER_IDX])).intValue(); //found one more part help++; // refresh the actual found parts data[FOUND_PARTS_COUNTER_IDX] = Integer.toString(help); // belonging to this information DataStore.remove(attrs.getValue("id")); // TODO this can be deleted? } else { data[BLOCKNR_IDX] = (attrs.getValue("id")); // a new part } data[RESTYPE_IDX] = (attrs.getValue("restype")); return; } if (qName.equals("context")) { String value = attrs.getValue("context-type"); if ("SourceHelpText".equals(value)) { index = SOURCE_HELPTEXT_IDX; storeIt = true; return; }else if ("TargetHelpText".equals(value)) { index = TARGET_HELP_TEXT_IDX; storeIt = true; return; }else if ("DBType".equals(value)) { //index=SOURCEFILE_IDX; //storeIt=true; return; }else if ("Project".equals(value)) { index = PROJECT_IDX; storeIt = true; return; }else if ("Filename".equals(value)) { index = SOURCEFILE_IDX; storeIt = true; return; }else if ("Type".equals(value)) { index = RESTYPE_IDX; storeIt = true; return; }else if ("GID".equals(value)) { index = GID_IDX; storeIt = true; return; }else if ("LID".equals(value)) { index = LID_IDX; storeIt = true; return; }else if ("HID".equals(value)) { index = HID_IDX; storeIt = true; return; }else if ("Platform".equals(value)) { index = PLATFORM_IDX; storeIt = true; return; }else if ("Width".equals(value)) { index = WIDTH_IDX; storeIt = true; return; } } } /** (non-Javadoc) * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) */ public void endElement(String namespaceURI, String sName, String qName) throws SAXException { //we ignore bpt and ept tags if(!(qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex"))){ storeIt = false; } if (qName.equals("trans-unit")) { showData(); } } /** (non-Javadoc) * @see org.xml.sax.ContentHandler#characters(char[], int, int) */ public void characters(char[] ch, int start, int length) { // checkContent(); String str2 = new String(ch, start, length); if (storeIt) { String str = new String(ch, start, length); if (index == oldindex) { data[index] += str; } else { data[index] = str; } } oldindex = index; } /** (non-Javadoc) * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) */ public void error(SAXParseException e) throws SAXParseException { OutputHandler.log(e.getMessage()); } /** (non-Javadoc) * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) */ public void fatalError(SAXParseException e) throws SAXParseException { OutputHandler.log("PARSE ERROR in line " + e.getLineNumber() + ", " + e.getMessage() ); } /** (non-Javadoc) * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) */ public void warning(SAXParseException e) throws SAXParseException { //throw e; OutputHandler.log(e.getMessage()); } /** * Put the Data to the DataHandler * tell the Writer to write it * * @throws SAXException */ public void showData() throws SAXException { transUnitCounter++; makeDot(); if (isComplete()) { try { moveData(); if (isFirst == true) { this.sourceLanguage = (String) this.moveData .get("SourceLanguageID"); this.targetLanguage = (String) this.moveData .get("TargetLanguageID"); OutputHandler.out(EMPTY); OutputHandler.out("Source Language is: " + this.sourceLanguage); OutputHandler.out("Target Language is: " + this.targetLanguage); OutputHandler.out(EMPTY); OutputHandler.out("Start"); OutputHandler.out(EMPTY); isFirst = false; } target.getDataFrom(handler); target.writeData(); } catch (java.io.IOException e) { throw new SAXException(e); } } else { DataStore.put(data[BLOCKNR_IDX], data.clone()); initData(); } initData(); } /** * put the data in an Map in the format that * DataHandler can handle it */ final public void moveData() { moveData.put("BlockNr", data[BLOCKNR_IDX]); moveData.put("Project", data[PROJECT_IDX]); moveData.put("SourceFile", data[SOURCEFILE_IDX]); moveData.put("Dummy", "0"); moveData.put("ResType", data[RESTYPE_IDX]); moveData.put("GID", data[GID_IDX]); moveData.put("LID", data[LID_IDX]); moveData.put("HID", data[HID_IDX]); moveData.put("Platform", data[PLATFORM_IDX]); if (EMPTY.equals(data[WIDTH_IDX])) data[WIDTH_IDX] = "0"; moveData.put("Width", data[WIDTH_IDX]); moveData.put("SourceLanguageID", data[SOURCE_LANGUAGE_ID_IDX]); moveData.put("SourceText", data[SOURCE_TEXT_IDX]); moveData.put("SourceHText", data[SOURCE_HELPTEXT_IDX]); moveData.put("SourceQText", data[SOURCE_QUICK_HELPTEXT_IDX]); moveData.put("SourceTitle", data[SOURCE_TITLETEXT_IDX]); moveData.put("TargetLanguageID", data[TARGET_LANGUAGE_ID_IDX]); moveData.put("TargetText", data[TARGET_TEXT_IDX]); moveData.put("TargetHText", data[TARGET_HELP_TEXT_IDX]); moveData.put("TargetQText", data[TARGET_QUICKHELP_TEXT_IDX]); moveData.put("TargetTitle", data[TARGET_TITLE_TEXT_IDX]); moveData.put("TimeStamp", data[TIMESTAMP_IDX]); //and give it to the data handler this.handler.fillDataWith(moveData); } /** * complete means all depending parts have been found esp. all res types * that belong to the same SDF Line * * @return true if the data is complete * */ final public boolean isComplete() { if(!doBlockCompleteCheck){ return true; } String sParts; if (data[FOUND_PARTS_COUNTER_IDX] == EMPTY) data[FOUND_PARTS_COUNTER_IDX] = "1"; //this is the first part String sFoundParts = data[FOUND_PARTS_COUNTER_IDX]; //create the new 'id' sParts = data[BLOCKNR_IDX].substring(data[BLOCKNR_IDX].lastIndexOf(":") + 1); if (sFoundParts.equals(sParts)) { return true; } return false; } // TODO this belongs in OutputHandler /** * show the user that it is going * on by printing dots on the screen * */ private void makeDot() { int count = 0; if ((count = (int) this.transUnitCounter / 1000) > this.dotCount) { this.dotCount = count; OutputHandler.printDot(); } } /** * show the statistic data found while parse this file * * @throws IOException */ final void showStatistic() throws IOException { OutputHandler.out(EMPTY); OutputHandler.out("TransUnits found: " + this.transUnitCounter); // every data in DataStore is // skipped 'cause its not complete // TODO count really every transunit not only the data (might consist of // more than one OutputHandler.dbg("TransUnits skip : " + this.DataStore.size()); //Converter.out(EMPTY); } }