1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 /* 24 * SDFReader.java 25 * 26 * 27 */ 28 29 package com.sun.star.tooling.converter; 30 31 import java.io.File; 32 import java.io.FileInputStream; 33 import java.io.IOException; 34 import java.io.InputStreamReader; 35 import java.text.DecimalFormat; 36 import java.util.*; 37 38 /** 39 * 40 * SDFReader is a FileReader that knows about 41 * the content of SDFFiles 42 * 43 * A SDFBlock is read of the given file. A SDFBlock 44 * consists of all SDFLines that are traanslations of the 45 * same String and the SDFLine containing the source string 46 * itself. SDFFile lines are read and checked whether they 47 * have the allowed column count and don't contain illeagal 48 * characters (like most unprintable characters below 0x00df). 49 * If the given source language is not found in the first 50 * block of SDFLines a ConverterException is thrown at runtime. 51 * If the given target language is "" (that means not given) 52 * the first language that is not the given source language 53 * is taken for target language. The found values are returned in HashMaps that 54 * use the following keys: 55 * <br/> 56 * "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/> 57 * "Project" first column in sdf file format.<br/> 58 * "SourceFile" second column in sdf file format.<br/> 59 * "Dummy" third column in sdf file format.<br/> 60 * "ResType" 4. column in sdf file format.<br/> 61 * "GID" 5. column in sdf file format. <br/> 62 * "LID" 6. column in sdf file format.<br/> 63 * "HID" 7. column in sdf file format.<br/> 64 * "Platform" 8. column in sdf file format. <br/> 65 * "Width", 9. column in sdf file format.<br/> 66 * "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/> 67 * "SourceText" 11. column in sdf file format(in the line with the source language).<br/> 68 * "SourceHText" 12. column in sdf file format(in the line with the source language).<br/> 69 * "SourceQText" 13. column in sdf file format(in the line with the source language).<br/> 70 * "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/> 71 * "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/> 72 * "TargetText" 11. column in sdf file format (in the line with the target language).<br/> 73 * "TargetHText" 12. column in sdf file format (in the line with the target language).<br/> 74 * "TargetQText" 13. column in sdf file format (in the line with the target language).<br/> 75 * "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/> 76 * "TimeStamp" 15. column in sdf file format.<br/> 77 * 78 * @author Christian Schmidt 2005 79 * 80 */ 81 82 public class SDFReader extends DataReader { 83 84 /** 85 * an array of the SDF files column names 86 */ 87 final static String[] fieldnames = { "Project", "SourceFile", "Dummy", 88 "ResType", "GID", "LID", "HID", "Platform", "Width", "LanguageID", 89 "Text", "HText", "QText", "Title", "TimeStamp" }; 90 91 /** 92 * an array of the SDF files column names if the source language is in 93 */ 94 final static String[] sourceLineNames = { "Project", "SourceFile", "Dummy", 95 "ResType", "GID", "LID", "HID", "Platform", "Width", 96 "SourceLanguageID", "SourceText", "SourceHText", "SourceQText", 97 "SourceTitle", "TimeStamp" }; 98 99 /** 100 * an array of the SDF files column names if the target language is in 101 */ 102 final static String[] targetLineNames = { "Project", "SourceFile", "Dummy", 103 "ResType", "GID", "LID", "HID", "Platform", "Width", 104 "TargetLanguageID", "TargetText", "TargetHText", "TargetQText", 105 "TargetTitle", "TimeStamp" }; 106 107 final static String EMPTY = new String(""); 108 109 private int dotCount = 0; 110 111 /** 112 * a Map containing an SDF line with source language 113 */ 114 private Map sourceMap; 115 116 /** 117 * a Map containing an SDF line with target language 118 */ 119 private Map targetMap; 120 121 /** 122 * a Map containing an SDF 123 */ 124 private Map SDFMap; 125 126 /** 127 * a Map Array containing one SDF source language line and one SDF target 128 * language line 129 */ 130 private Map[] data = { sourceMap, targetMap }; 131 132 /** 133 * The Id of the current SDFBlock 134 */ 135 private String CurrentBlockId; 136 137 /** 138 * The SDF file to read from 139 */ 140 private File sourceFile; 141 142 /** 143 * The language in the source file that should be handelt as source language 144 */ 145 protected String sourceLanguage; 146 147 /** 148 * The language in the source file that should be handelt as target language 149 */ 150 protected String targetLanguage; 151 152 /** 153 * A counter holding the number of blocks just read 154 * from this file 155 */ 156 private long blockNr = 0;// If we use Integer, more then numbers greater than 128k would be signed 157 158 /** 159 * A counter holding the number of skipped lines that means 160 * lines that can not be worked with because they contain an error 161 */ 162 private int skippedLines = 0; 163 164 /** 165 * This switch is set for indicating that all source file lines 166 * are read and no lines remain buffered. Finding this like 'true' 167 * means the source file is finished 168 */ 169 private boolean endIt = false; 170 171 /** 172 * Indicates whether the targetLanguage is found in this source file so far 173 */ 174 private boolean foundTarget = false; 175 /** 176 * Indicates whether the sourceLanguage is found in this source file so far 177 */ 178 private boolean foundSource = false; 179 180 /** 181 * Counts how many lines were skipped because the language is 182 * neither sourceLanguage nor targetLanguage 183 */ 184 private int langMiss; 185 186 /** 187 * Indicates whether there is a line in the read buffer or not 188 */ 189 private boolean useBuffer = false; 190 191 /** 192 * A buffer for SDFLines 193 */ 194 private String lineBuffer; 195 196 /** 197 * The buffer for the already splitted SDFLines 198 */ 199 private String[] splittedLineBuffer; 200 201 /** 202 * Counts how many Blocks were skipped 203 * f.e. because no sourceLanguage is found 204 * in it 205 */ 206 private int skippedBlocks; 207 208 /** 209 * Counts the blocks without targetLanguage 210 */ 211 private int targetLangMiss; 212 213 /** 214 * Counts the blocks without sourceLanguage 215 */ 216 private int sourceLangMiss; 217 218 /** 219 * Counts the lines where no targetLanguage line was found 220 * and so empty lines were created 221 */ 222 private int targetLangCreate; 223 224 225 DecimalFormat blockNrFormatter = new DecimalFormat("000000"); 226 227 /** 228 * The hashcode of the current block 229 */ 230 private int CurrentBlockHash; 231 232 private boolean skip; 233 234 /** 235 * Create a new Instance of SDFREader 236 * 237 * @param source the file to read from 238 * @param sourceLanguage the sourceLanguage (must not be empty) 239 * @param targetLanguage the targetLanguage 240 * @param charset the charset used to read source 241 * @throws java.io.IOException 242 * @throws Exception 243 */ SDFReader(File source, String sourceLanguage, String targetLanguage, String charset)244 public SDFReader(File source, String sourceLanguage, String targetLanguage, 245 String charset) throws java.io.IOException { 246 super(new InputStreamReader(new FileInputStream(source), charset)); 247 sourceFile = source; 248 this.sourceLanguage = sourceLanguage; 249 this.targetLanguage = targetLanguage; 250 String line; 251 String[] splitLine; 252 //read first line to get the first 253 //SDF block id 254 mark(16000); 255 if ((line = readLine()) != null) { 256 if ((splitLine = split(line)) != null){ 257 this.CurrentBlockId = getSDFBlockId(splitLine); 258 this.CurrentBlockHash=this.CurrentBlockId.hashCode(); 259 //found the first 260 this.blockNr++; 261 } 262 this.splittedLineBuffer = splitLine; 263 } 264 reset(); 265 266 } 267 268 /* (non-Javadoc) 269 * @see com.sun.star.tooling.converter.DataReader#getData() 270 */ getData()271 public Map getData()throws IOException { 272 273 Map map=new HashMap(); 274 // do { 275 this.skip=false; 276 Map[] help=readBlock(); 277 if(help==null||help[1]==null||help[0]==null){ 278 return null; 279 // }else if (help[1].get("TargetLanguageID")==null||help[0].get("SourceLanguageID")==null) { 280 // OutputHandler.log("Missing Language Id in block "+blockNr+"\nthe block is skipped." ); 281 // this.skippedBlocks++; 282 // this.skip=true; 283 }else{ 284 map.putAll(help[1]); 285 map.putAll(help[0]); 286 287 } 288 // }while(this.skip=true); 289 return map; 290 } 291 292 293 /** 294 * Read a Block from the sdf file and return 295 * @return a Map[] where [0] holds the source and [1] the target language data. 296 * 297 * @throws java.io.IOException 298 */ readBlock()299 public Map[] readBlock() throws java.io.IOException { 300 String line = EMPTY; 301 String blockId = EMPTY; 302 String[] splittedLine = null; 303 data[0]=new ExtMap(); 304 data[1]=new ExtMap(); 305 String help; 306 String c = null; 307 //read next line or use buffered line 308 while (useBuffer || (line = readLine()) != null) { //works because '||' is shortcut 309 310 try { 311 // buffer used? 312 if (useBuffer) { 313 line = this.lineBuffer; 314 splittedLine = this.splittedLineBuffer; 315 this.SDFMap = new ExtMap(SDFReader.fieldnames, splittedLine); 316 317 try { 318 checkLanguage(splittedLine); 319 } catch (ConverterException e) { 320 throw e; 321 }finally{ 322 323 useBuffer = false; 324 } 325 } else { 326 //...are there wrong characters? 327 if ((check(line)).length() < line.length()) { 328 throw new LineErrorException(getLineNumber() 329 + " : Line contains wrong character " 330 //+ Integer.toHexString(Integer.parseInt(c)) 331 + " \n" + line); 332 } 333 //...is the number of columns ok? 334 if ((splittedLine = split(line)) == null) { 335 336 throw new LineErrorException(super.getLineNumber() 337 + " : Line has wrong column number \n" + line); 338 //continue; 339 } else { 340 // TODO makeDot is better in Data Handler 341 makeDot(); 342 // ...is this line in a new SDF block ? 343 if ((blockId = getSDFBlockId(splittedLine)) 344 .equals(CurrentBlockId)) { 345 346 this.SDFMap = new ExtMap(SDFReader.fieldnames, 347 splittedLine); 348 //what language is in it ? 349 checkLanguage(splittedLine); 350 351 } else { 352 353 /* 354 * we found the next block , but do we have the 355 * target text? 356 */ 357 358 if (!foundTarget) { 359 createTargetLine(); 360 } 361 362 blockNr++; 363 splittedLineBuffer = splittedLine;//read one line 364 // too much so 365 // buffer it 366 lineBuffer = line; 367 useBuffer = true;//reset(); 368 369 this.CurrentBlockId = blockId; 370 this.CurrentBlockHash=this.CurrentBlockId.hashCode(); 371 372 /* ...and what about the source text ? */ 373 if (!foundSource) { 374 375 OutputHandler 376 .log("Error in Line:" 377 + getLineNumber() 378 + "Source Language is missing maybe " 379 + "previous block has an error.\nBlock " 380 + (blockNr - 1) 381 + " is skipped. before line: \n" 382 + line); 383 384 foundTarget = false;//no target without source 385 skippedBlocks++; 386 skippedLines++; 387 sourceLangMiss++; 388 continue;// skip output of this block if no 389 // source language is found 390 391 } 392 393 break; 394 395 } 396 } 397 398 } 399 400 } catch (LineErrorException e) { 401 402 OutputHandler.log(e.getMessage()); 403 this.skippedLines++; 404 } catch (ConverterException e) { 405 OutputHandler.log(e.getMessage()); 406 } 407 408 } 409 // did we read the whole stuff? 410 if (null != line) { 411 // no 412 foundSource = false; 413 foundTarget = false; 414 415 return this.data; 416 417 } else { 418 // ok , its the end but is everything written now? 419 if (!endIt) { 420 // there is something to write 421 // but next time we can end it 422 endIt = true; 423 if(!foundTarget){ 424 createTargetLine(); 425 } 426 // write 427 return this.data;//last lines 428 } else { 429 430 showStat(); 431 return null; 432 } 433 } 434 // }catch(ConverterException e) { 435 // Converter.log(e.getMessage()); 436 // return null; 437 // } 438 } 439 440 /** 441 * 442 */ createTargetLine()443 private void createTargetLine() { 444 targetLangMiss++; 445 // if not, create one ... 446 data[1] = new ExtMap(SDFReader.targetLineNames, 447 splittedLineBuffer); 448 data[1].put("TargetLanguageID", 449 this.targetLanguage); 450 if ((String) data[1].get("TargetText") != EMPTY) 451 data[1].put("TargetText", EMPTY); 452 if ((String) data[1].get("TargetHText") != EMPTY) 453 data[1].put("TargetHText", EMPTY); 454 if ((String) data[1].get("TargetQText") != EMPTY) 455 data[1].put("TargetQText", EMPTY); 456 if ((String) data[1].get("TargetTitle") != EMPTY) 457 data[1].put("TargetTitle", EMPTY); 458 this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toString(this.CurrentBlockHash)); 459 targetLangCreate++; 460 } 461 462 /** 463 * Show the statistic information got while 464 * reading the file 465 * 466 * @throws IOException 467 */ showStat()468 private void showStat() throws IOException { 469 OutputHandler.out(EMPTY);OutputHandler.out(EMPTY); 470 // OutputHandler.out("Hashes: " + (theHashes.size()) + " "); 471 OutputHandler.out("Blocks found: " + blockNr + " "); 472 OutputHandler.out(EMPTY); 473 OutputHandler.out("Lines read: " + (getLineNumber()) + " "); 474 OutputHandler 475 .dbg("Lines created " + (targetLangCreate) + " "); 476 OutputHandler.dbg(" -------"); 477 OutputHandler.dbg("Lines total: " 478 + (getLineNumber() + targetLangCreate) + " "); 479 OutputHandler.dbg("Lines skipped: " + skippedLines + " "); 480 481 OutputHandler.dbg("Source Language misses: " + sourceLangMiss + " "); 482 OutputHandler.dbg("Target Language misses: " + targetLangMiss + " "); 483 OutputHandler.dbg("Blocks found: " + blockNr + " "); 484 OutputHandler.dbg("Blocks skipped: " + skippedBlocks + " "); 485 if ((sourceLangMiss + skippedBlocks + skippedLines) > 0) 486 OutputHandler.out("\n---! Errors found !--- view Logfile.\n\n" 487 + "To enable logfile use -l option at command line.\n" 488 + "For help type 'convert -h {Enter}'.\n"); 489 } 490 491 /** 492 * Check the current line whether the source language 493 * or target language is in it 494 * 495 * @throws ConverterException if a not needed language or no target language is found 496 * in this block 497 * @throws IOException 498 * 499 */ checkLanguage(String[] splittedLine)500 final private void checkLanguage(String[] splittedLine) 501 throws ConverterException, IOException { 502 String langID = (String) SDFMap.get("LanguageID"); 503 504 //maybe the source language is in this line 505 if (!foundSource && this.sourceLanguage.equals(langID)) { 506 // found the next source language line 507 this.data[0] = new ExtMap(SDFReader.sourceLineNames, splittedLine); 508 509 // this.data[0].put("BlockNr", Integer.toHexString(blockNr)); 510 // this.data[0].put("BlockHash", Integer.toHexString(this.CurrentBlockHash)); 511 this.data[0].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash)); 512 // this.data[0].put("BlockHash", blockHashFormatter.format(this.CurrentBlockHash)); 513 foundSource = true; 514 return; 515 516 } else { 517 // or the target language is in this line 518 if (!foundTarget) { 519 //no target language is given at command line 520 if (this.targetLanguage.equals(EMPTY)) { 521 //try if we can use the current lines language for target 522 // language 523 if (!langID.equals(this.sourceLanguage)) { 524 //yes , we can use this lines laanguage as target 525 this.targetLanguage = langID; 526 527 //source and target language both are known: show it 528 OutputHandler.out("Source Language is: " 529 + this.sourceLanguage + " "); 530 OutputHandler.out("Target Language is: " 531 + this.targetLanguage + " "); 532 OutputHandler.out(EMPTY); 533 System.out.println("Start"); 534 535 } else { 536 throw new ConverterException("(" + getLineNumber() 537 + ") No target language found: " 538 + this.targetLanguage); 539 } 540 } 541 if (this.targetLanguage.equals(langID)) { 542 this.data[1] = new ExtMap(SDFReader.targetLineNames, 543 splittedLine);// found the next target language line 544 this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash)); 545 foundTarget = true; 546 547 return; 548 } 549 }//end !foundTarget 550 } 551 //if we go here we dont need the found language... 552 throw new ConverterException("found not needed language '" 553 + this.SDFMap.get("LanguageID") + "' in Line: " 554 + getLineNumber()); 555 556 } 557 558 /** 559 * Make a dot on the screen to show the user that it is going on 560 */ makeDot()561 private void makeDot() { 562 int count = 0; 563 if ((count = (int) super.getLineNumber() / 1000) > this.dotCount) { 564 this.dotCount = count; 565 OutputHandler.printDot(); 566 } 567 568 } 569 570 /** 571 * split the SDFLine in its columns 572 * 573 * @param line the current SDFLine 574 * @return the splitted SDFLine as array of String 575 * or null if an error occours 576 * @throws IOException 577 */ split(String line)578 private String[] split(String line) throws IOException { 579 check(line); 580 String[] splitLine; 581 if ((splitLine = line.split("\t")).length == 15) 582 return splitLine; 583 else 584 //an error occurred 585 return null; 586 } 587 588 /** 589 * create a block Id from a splitted SDFLine 590 * the blockId consists of the column one to eight of an SDF File 591 * 592 * @param splitLine the line to create a block id from 593 * @return the blockId as String 594 */ getSDFBlockId(String[] splitLine)595 private String getSDFBlockId(String[] splitLine) { 596 StringBuffer BlockId = new StringBuffer(""); 597 for (int i = 0; i < 8; i++) { 598 BlockId.append(splitLine[i]); 599 } 600 return BlockId.toString(); 601 } 602 603 // public final boolean canRead() { 604 // return this.sourceFile.canRead(); 605 // } 606 607 /** 608 * Check if there are not allowed characters in this line 609 * 610 * @param line the SDFLine to check 611 * @return if everything, ok the original 612 * else the wrong character as String 613 * 614 * @throws java.io.IOException 615 */ check(String line)616 private String check(String line) throws java.io.IOException { 617 char c = ' '; 618 for (int i = 0; i < line.length(); i++) { 619 c = line.charAt(i); 620 if (c < 30 && c != 9) { 621 return Character.toString(c); 622 } 623 624 } 625 return line; 626 627 } 628 } 629