1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_l10ntools.hxx" 30 #include <stdio.h> 31 #include <tools/fsys.hxx> 32 #include <tools/stream.hxx> 33 34 // local includes 35 #include "utf8conv.hxx" 36 37 #define GSI_FILE_UNKNOWN 0x0000 38 #define GSI_FILE_OLDSTYLE 0x0001 39 #define GSI_FILE_L10NFRAMEWORK 0x0002 40 41 /*****************************************************************************/ 42 sal_uInt16 GetGSIFileType( SvStream &rStream ) 43 /*****************************************************************************/ 44 { 45 sal_uInt16 nFileType = GSI_FILE_UNKNOWN; 46 47 sal_uLong nPos( rStream.Tell()); 48 rStream.Seek( STREAM_SEEK_TO_BEGIN ); 49 50 ByteString sLine; 51 while( !rStream.IsEof() && !sLine.Len()) 52 rStream.ReadLine( sLine ); 53 54 if( sLine.Len()) { 55 if( sLine.Search( "($$)" ) != STRING_NOTFOUND ) 56 nFileType = GSI_FILE_OLDSTYLE; 57 else 58 nFileType = GSI_FILE_L10NFRAMEWORK; 59 } 60 61 rStream.Seek( nPos ); 62 63 return nFileType; 64 } 65 66 /*****************************************************************************/ 67 ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType ) 68 /*****************************************************************************/ 69 { 70 ByteString sId; 71 switch ( nFileType ) { 72 case GSI_FILE_OLDSTYLE: 73 sId = rLine; 74 sId.SearchAndReplaceAll( "($$)", "\t" ); 75 sId = sId.GetToken( 0, '\t' ); 76 break; 77 78 case GSI_FILE_L10NFRAMEWORK: 79 sId = rLine.GetToken( 0, '\t' ); 80 sId += "\t"; 81 sId += rLine.GetToken( 1, '\t' ); 82 sId += "\t"; 83 sId += rLine.GetToken( 4, '\t' ); 84 sId += "\t"; 85 sId += rLine.GetToken( 5, '\t' ); 86 break; 87 } 88 return sId; 89 } 90 91 /*****************************************************************************/ 92 ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType ) 93 /*****************************************************************************/ 94 { 95 ByteString sLangId; 96 switch ( nFileType ) { 97 case GSI_FILE_OLDSTYLE: 98 sLangId = rLine; 99 sLangId.SearchAndReplaceAll( "($$)", "\t" ); 100 sLangId = sLangId.GetToken( 2, '\t' ); 101 break; 102 103 case GSI_FILE_L10NFRAMEWORK: 104 sLangId = rLine.GetToken( 9, '\t' ); 105 break; 106 } 107 return sLangId; 108 } 109 110 /*****************************************************************************/ 111 void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine, 112 rtl_TextEncoding nEncoding, sal_uInt16 nFileType ) 113 /*****************************************************************************/ 114 { 115 switch ( nFileType ) { 116 case GSI_FILE_OLDSTYLE: 117 if ( bToUTF8 ) 118 rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding ); 119 else 120 rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding ); 121 break; 122 123 case GSI_FILE_L10NFRAMEWORK: { 124 ByteString sConverted; 125 for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) { 126 ByteString sToken = rLine.GetToken( i, '\t' ); 127 if (( i > 9 ) && ( i < 14 )) { 128 if( bToUTF8 ) 129 sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding ); 130 else 131 sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding ); 132 } 133 if ( i ) 134 sConverted += "\t"; 135 sConverted += sToken; 136 } 137 rLine = sConverted; 138 } 139 break; 140 } 141 } 142 143 /*****************************************************************************/ 144 void Help() 145 /*****************************************************************************/ 146 { 147 fprintf( stdout, "\n" ); 148 fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" ); 149 fprintf( stdout, "===============================================\n" ); 150 fprintf( stdout, "\n" ); 151 fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" ); 152 fprintf( stdout, "\n" ); 153 fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" ); 154 fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" ); 155 fprintf( stdout, " -f => conversion from UTF-8 to charset\n" ); 156 fprintf( stdout, " -p n => creates several files with ca. n lines\n" ); 157 fprintf( stdout, "\n" ); 158 fprintf( stdout, "Allowed charsets:\n" ); 159 fprintf( stdout, " MS_932 => Japanese\n" ); 160 fprintf( stdout, " MS_936 => Chinese Simplified\n" ); 161 fprintf( stdout, " MS_949 => Korean\n" ); 162 fprintf( stdout, " MS_950 => Chinese Traditional\n" ); 163 fprintf( stdout, " MS_1250 => East Europe\n" ); 164 fprintf( stdout, " MS_1251 => Cyrillic\n" ); 165 fprintf( stdout, " MS_1252 => West Europe\n" ); 166 fprintf( stdout, " MS_1253 => Greek\n" ); 167 fprintf( stdout, " MS_1254 => Turkish\n" ); 168 fprintf( stdout, " MS_1255 => Hebrew\n" ); 169 fprintf( stdout, " MS_1256 => Arabic\n" ); 170 fprintf( stdout, "\n" ); 171 fprintf( stdout, "Allowed langids:\n" ); 172 fprintf( stdout, " 1 => ENGLISH_US\n" ); 173 fprintf( stdout, " 3 => PORTUGUESE \n" ); 174 fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" ); 175 fprintf( stdout, " 7 => RUSSIAN\n" ); 176 fprintf( stdout, " 30 => GREEK\n" ); 177 fprintf( stdout, " 31 => DUTCH\n" ); 178 fprintf( stdout, " 33 => FRENCH\n" ); 179 fprintf( stdout, " 34 => SPANISH\n" ); 180 fprintf( stdout, " 35 => FINNISH\n" ); 181 fprintf( stdout, " 36 => HUNGARIAN\n" ); 182 fprintf( stdout, " 39 => ITALIAN\n" ); 183 fprintf( stdout, " 42 => CZECH\n" ); 184 fprintf( stdout, " 44 => ENGLISH (UK)\n" ); 185 fprintf( stdout, " 45 => DANISH\n" ); 186 fprintf( stdout, " 46 => SWEDISH\n" ); 187 fprintf( stdout, " 47 => NORWEGIAN\n" ); 188 fprintf( stdout, " 49 => GERMAN (old german style)\n" ); 189 fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" ); 190 fprintf( stdout, " 81 => JAPANESE\n" ); 191 fprintf( stdout, " 82 => KOREAN\n" ); 192 fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" ); 193 fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" ); 194 fprintf( stdout, " 90 => TURKISH\n" ); 195 fprintf( stdout, " 96 => ARABIC\n" ); 196 fprintf( stdout, " 97 => HEBREW\n" ); 197 fprintf( stdout, "\n" ); 198 } 199 200 /*****************************************************************************/ 201 #if defined(UNX) || defined(OS2) 202 int main( int argc, char *argv[] ) 203 #else 204 int _cdecl main( int argc, char *argv[] ) 205 #endif 206 /*****************************************************************************/ 207 { 208 if (( argc != 5 ) && ( argc != 4 )) { 209 Help(); 210 exit ( 0 ); 211 } 212 213 if ( argc == 4 ) { 214 if ( ByteString( argv[ 1 ] ) == "-p" ) { 215 216 DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US )); 217 if ( !aSource.Exists()) { 218 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 219 exit ( 2 ); 220 } 221 222 DirEntry aOutput( aSource ); 223 224 String sBase = aOutput.GetBase(); 225 String sExt = aOutput.GetExtension(); 226 227 String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ); 228 SvFileStream aGSI( sGSI, STREAM_STD_READ ); 229 if ( !aGSI.IsOpen()) { 230 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 231 exit ( 3 ); 232 } 233 234 sal_uInt16 nFileType( GetGSIFileType( aGSI )); 235 236 sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64(); 237 if ( !nMaxLines ) { 238 fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" ); 239 exit ( 3 ); 240 } 241 242 ByteString sGSILine; 243 ByteString sOldId; 244 sal_uLong nLine = 0; 245 sal_uLong nOutputFile = 1; 246 247 String sOutput( sBase ); 248 sOutput += String( "_", RTL_TEXTENCODING_ASCII_US ); 249 sOutput += String::CreateFromInt64( nOutputFile ); 250 if ( sExt.Len()) { 251 sOutput += String( ".", RTL_TEXTENCODING_ASCII_US ); 252 sOutput += sExt; 253 } 254 nOutputFile ++; 255 256 aOutput.SetName( sOutput ); 257 SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); 258 259 while ( !aGSI.IsEof()) { 260 261 aGSI.ReadLine( sGSILine ); 262 ByteString sId( GetGSILineId( sGSILine, nFileType )); 263 264 nLine++; 265 266 if (( nLine >= nMaxLines ) && ( sId != sOldId )) { 267 aOutputStream.Close(); 268 269 ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding()); 270 sText += " with "; 271 sText += ByteString::CreateFromInt64( nLine ); 272 sText += " lines written."; 273 274 fprintf( stdout, "%s\n", sText.GetBuffer()); 275 String sOutput1( sBase ); 276 sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US ); 277 sOutput1 += String::CreateFromInt64( nOutputFile ); 278 if ( sExt.Len()) { 279 sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US ); 280 sOutput1 += sExt; 281 } 282 nOutputFile ++; 283 284 aOutput.SetName( sOutput1 ); 285 286 aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); 287 nLine = 0; 288 } 289 290 aOutputStream.WriteLine( sGSILine ); 291 292 sOldId = sId; 293 } 294 295 aGSI.Close(); 296 aOutputStream.Close(); 297 298 ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US ); 299 sText += " with "; 300 sText += ByteString::CreateFromInt64( nLine ); 301 sText += " lines written."; 302 } 303 else { 304 Help(); 305 exit( 1 ); 306 } 307 } 308 else { 309 if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) { 310 rtl_TextEncoding nEncoding; 311 312 ByteString sCurLangId( argv[ 2 ] ); 313 314 ByteString sCharset( argv[ 3 ] ); 315 sCharset.ToUpperAscii(); 316 317 if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932; 318 else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936; 319 else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949; 320 else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950; 321 else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250; 322 else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251; 323 else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252; 324 else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253; 325 else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254; 326 else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255; 327 else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256; 328 else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257; 329 else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8; 330 331 else { 332 Help(); 333 exit ( 1 ); 334 } 335 336 DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US )); 337 if ( !aSource.Exists()) { 338 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 339 exit ( 2 ); 340 } 341 342 String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ); 343 SvFileStream aGSI( sGSI, STREAM_STD_READ ); 344 if ( !aGSI.IsOpen()) { 345 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 346 exit ( 3 ); 347 } 348 sal_uInt16 nFileType( GetGSIFileType( aGSI )); 349 350 ByteString sGSILine; 351 while ( !aGSI.IsEof()) { 352 353 aGSI.ReadLine( sGSILine ); 354 ByteString sLangId( GetGSILineLangId( sGSILine, nFileType )); 355 if ( sLangId == sCurLangId ) 356 ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType ); 357 358 fprintf( stdout, "%s\n", sGSILine.GetBuffer()); 359 } 360 361 aGSI.Close(); 362 } 363 else { 364 Help(); 365 exit( 1 ); 366 } 367 } 368 return 0; 369 } 370