1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 #include <fcntl.h> 31 #include <errno.h> 32 #include <string.h> 33 #include <unistd.h> 34 #include <ctype.h> 35 #include <sal/alloca.h> 36 37 #include <rtl/ustring.hxx> 38 39 #include <map> 40 #include <string> 41 42 /***************************************************************************** 43 * typedefs 44 *****************************************************************************/ 45 46 typedef std::map< const std::string, rtl_TextEncoding > EncodingMap; 47 48 struct _pair { 49 const char *key; 50 rtl_TextEncoding value; 51 }; 52 53 static int _pair_compare (const char *key, const _pair *pair); 54 static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member ); 55 56 57 const _pair _ms_encoding_list[] = { 58 { "0", RTL_TEXTENCODING_UTF8 }, 59 { "1250", RTL_TEXTENCODING_MS_1250 }, 60 { "1251", RTL_TEXTENCODING_MS_1251 }, 61 { "1252", RTL_TEXTENCODING_MS_1252 }, 62 { "1253", RTL_TEXTENCODING_MS_1253 }, 63 { "1254", RTL_TEXTENCODING_MS_1254 }, 64 { "1255", RTL_TEXTENCODING_MS_1255 }, 65 { "1256", RTL_TEXTENCODING_MS_1256 }, 66 { "1257", RTL_TEXTENCODING_MS_1257 }, 67 { "1258", RTL_TEXTENCODING_MS_1258 }, 68 { "874", RTL_TEXTENCODING_MS_874 }, 69 { "932", RTL_TEXTENCODING_MS_932 }, 70 { "936", RTL_TEXTENCODING_MS_936 }, 71 { "949", RTL_TEXTENCODING_MS_949 }, 72 { "950", RTL_TEXTENCODING_MS_950 } 73 }; 74 75 76 /***************************************************************************** 77 * fgets that work with unix line ends on Windows 78 *****************************************************************************/ 79 80 char * my_fgets(char *s, int n, FILE *fp) 81 { 82 int i; 83 for( i=0; i < n-1; i++ ) 84 { 85 int c = getc(fp); 86 87 if( c == EOF ) 88 break; 89 90 s[i] = (char) c; 91 92 if( s[i] == '\n' ) 93 { 94 i++; 95 break; 96 } 97 } 98 99 if( i>0 ) 100 { 101 s[i] = '\0'; 102 return s; 103 } 104 else 105 { 106 return NULL; 107 } 108 } 109 110 /***************************************************************************** 111 * compare function for binary search 112 *****************************************************************************/ 113 114 static int 115 _pair_compare (const char *key, const _pair *pair) 116 { 117 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); 118 return result; 119 } 120 121 /***************************************************************************** 122 * binary search on encoding tables 123 *****************************************************************************/ 124 125 static const _pair* 126 _pair_search (const char *key, const _pair *base, unsigned int member ) 127 { 128 unsigned int lower = 0; 129 unsigned int upper = member; 130 unsigned int current; 131 int comparison; 132 133 /* check for validity of input */ 134 if ( (key == NULL) || (base == NULL) || (member == 0) ) 135 return NULL; 136 137 /* binary search */ 138 while ( lower < upper ) 139 { 140 current = (lower + upper) / 2; 141 comparison = _pair_compare( key, base + current ); 142 if (comparison < 0) 143 upper = current; 144 else 145 if (comparison > 0) 146 lower = current + 1; 147 else 148 return base + current; 149 } 150 151 return NULL; 152 } 153 154 155 /************************************************************************ 156 * read_encoding_table 157 ************************************************************************/ 158 159 void read_encoding_table(char * file, EncodingMap& aEncodingMap) 160 { 161 FILE * fp = fopen(file, "r"); 162 if ( ! fp ) { 163 fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno)); 164 exit(2); 165 } 166 167 char buffer[512]; 168 while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) { 169 170 // strip comment lines 171 if ( buffer[0] == '#' ) 172 continue; 173 174 // find end of language string 175 char * cp; 176 for ( cp = buffer; ! isspace(*cp); cp++ ) 177 ; 178 *cp = '\0'; 179 180 // find start of codepage string 181 for ( ++cp; isspace(*cp); ++cp ) 182 ; 183 char * codepage = cp; 184 185 // find end of codepage string 186 for ( ++cp; ! isspace(*cp); ++cp ) 187 ; 188 *cp = '\0'; 189 190 // find the correct mapping for codepage 191 const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair ); 192 const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members ); 193 194 if ( encoding != NULL ) { 195 const std::string language(buffer); 196 aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) ); 197 } 198 } 199 200 fclose(fp); 201 } 202 203 /************************************************************************ 204 * print_legacy_mixed 205 ************************************************************************/ 206 207 void print_legacy_mixed( 208 FILE * ostream, 209 const rtl::OUString& aString, 210 const std::string& language, 211 EncodingMap& aEncodingMap) 212 { 213 EncodingMap::iterator iter = aEncodingMap.find(language); 214 215 if ( iter != aEncodingMap.end() ) { 216 fputs(OUStringToOString(aString, iter->second).getStr(), ostream); 217 } else { 218 fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str()); 219 } 220 } 221 222 /************************************************************************ 223 * print_java_style 224 ************************************************************************/ 225 226 void print_java_style(FILE * ostream, const rtl::OUString& aString) 227 { 228 int imax = aString.getLength(); 229 for (int i = 0; i < imax; i++) { 230 sal_Unicode uc = aString[i]; 231 if ( uc < 128 ) { 232 fprintf(ostream, "%c", (char) uc); 233 } else { 234 fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF ); 235 } 236 } 237 } 238 239 /************************************************************************ 240 * main 241 ************************************************************************/ 242 243 int main( int argc, char * const argv[] ) 244 { 245 EncodingMap aEncodingMap; 246 247 FILE *istream = stdin; 248 FILE *ostream = stdout; 249 250 char *outfile = NULL; 251 252 int errflg = 0; 253 int argi; 254 255 for( argi=1; argi < argc; argi++ ) 256 { 257 if( argv[argi][0] == '-' && argv[argi][2] == '\0' ) 258 { 259 switch(argv[argi][1]) { 260 case 'o': 261 if (argi+1 >= argc || argv[argi+1][0] == '-') 262 { 263 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); 264 errflg++; 265 break; 266 } 267 268 ++argi; 269 outfile = argv[argi]; 270 break; 271 case 't': 272 if (argi+1 >= argc || argv[argi+1][0] == '-') 273 { 274 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); 275 errflg++; 276 break; 277 } 278 279 read_encoding_table(argv[++argi], aEncodingMap); 280 break; 281 default: 282 fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]); 283 errflg++; 284 } 285 } 286 else 287 { 288 break; 289 } 290 } 291 292 if (errflg) { 293 fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n"); 294 exit(2); 295 } 296 297 /* assign input file to stdin */ 298 if ( argi < argc ) 299 { 300 istream = fopen(argv[argi], "r"); 301 if ( istream == NULL ) { 302 fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno)); 303 exit(2); 304 } 305 } 306 307 /* open output file if any */ 308 if ( outfile ) 309 { 310 ostream = fopen(outfile, "w"); 311 if ( ostream == NULL ) { 312 fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno)); 313 fclose(istream); 314 exit(2); 315 } 316 } 317 318 /* read line by line from stdin */ 319 char buffer[65536]; 320 while ( NULL != fgets(buffer, sizeof(buffer), istream) ) { 321 322 /* only handle lines containing " = " */ 323 char * cp = strstr(buffer, " = \""); 324 if ( cp ) { 325 rtl::OUString aString; 326 327 /* find end of lang string */ 328 int n; 329 for ( n=0; ! isspace(buffer[n]); n++ ) 330 ; 331 332 std::string line = buffer; 333 std::string lang(line, 0, n); 334 335 cp += 4; 336 rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp, 337 RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS ); 338 339 fprintf(ostream, "%s = \"", lang.c_str()); 340 341 if ( aEncodingMap.empty() ) { 342 print_java_style(ostream, aString); 343 } else { 344 print_legacy_mixed(ostream, aString, lang, aEncodingMap); 345 } 346 347 fprintf(ostream, "\"\n"); 348 349 350 } else { 351 fputs(buffer, ostream); 352 } 353 } 354 355 fclose(ostream); 356 fclose(istream); 357 } 358