1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include <stdio.h> 28 #include <string.h> 29 #include <stdlib.h> 30 #include <sal/main.h> 31 #include <sal/types.h> 32 #include <rtl/strbuf.hxx> 33 #include <rtl/ustring.hxx> 34 35 #include <vector> 36 37 using namespace ::rtl; 38 39 void make_hhc_char(FILE *sfp, FILE *cfp); 40 void make_stc_char(FILE *sfp, FILE *cfp); 41 void make_stc_word(FILE *sfp, FILE *cfp); 42 43 /* Main Procedure */ 44 45 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) 46 { 47 FILE *sfp, *cfp; 48 49 if (argc < 4) exit(-1); 50 51 52 sfp = fopen(argv[2], "rb"); // open the source file for read; 53 if (sfp == NULL) 54 { 55 printf("Open the dictionary source file failed."); 56 return -1; 57 } 58 59 // create the C source file to write 60 cfp = fopen(argv[3], "wb"); 61 if (cfp == NULL) { 62 fclose(sfp); 63 printf("Can't create the C source file."); 64 return -1; 65 } 66 67 fprintf(cfp, "/*\n"); 68 fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n"); 69 fprintf(cfp, " * All Rights Reserved.\n"); 70 fprintf(cfp, " */\n\n"); 71 fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n"); 72 fprintf(cfp, "#include <sal/types.h>\n"); 73 fprintf(cfp, "#include <textconversion.hxx>\n"); 74 fprintf(cfp, "\nextern \"C\" {\n"); 75 76 if (strcmp(argv[1], "hhc_char") == 0) 77 make_hhc_char(sfp, cfp); 78 else if (strcmp(argv[1], "stc_char") == 0) 79 make_stc_char(sfp, cfp); 80 else if (strcmp(argv[1], "stc_word") == 0) 81 make_stc_word(sfp, cfp); 82 83 fprintf (cfp, "}\n"); 84 85 fclose(sfp); 86 fclose(cfp); 87 88 return 0; 89 } // end of main 90 91 // Hangul/Hanja character conversion 92 void make_hhc_char(FILE *sfp, FILE *cfp) 93 { 94 sal_Int32 count, address, i, j, k; 95 sal_Unicode Hanja2HangulData[0x10000]; 96 for (i = 0; i < 0x10000; i++) { 97 Hanja2HangulData[i] = 0; 98 } 99 sal_uInt16 Hangul2HanjaData[10000][3]; 100 101 // generate main dict. data array 102 fprintf(cfp, "\nstatic const sal_Unicode Hangul2HanjaData[] = {"); 103 104 sal_Char Cstr[1024]; 105 count = 0; 106 address = 0; 107 while (fgets(Cstr, 1024, sfp)) { 108 // input file is in UTF-8 encoding (Hangul:Hanja) 109 // don't convert last new line character to Ostr. 110 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8); 111 const sal_Unicode *Ustr = Ostr.getStr(); 112 sal_Int32 len = Ostr.getLength(); 113 114 Hangul2HanjaData[count][0] = Ustr[0]; 115 Hangul2HanjaData[count][1] = sal::static_int_cast<sal_uInt16>( address ); 116 Hangul2HanjaData[count][2] = sal::static_int_cast<sal_uInt16>( len - 2 ); 117 count++; 118 119 for (i = 2; i < len; i++) { 120 Hanja2HangulData[Ustr[i]] = Ustr[0]; 121 if (address++ % 16 == 0) 122 fprintf(cfp, "\n\t"); 123 fprintf(cfp, "0x%04x, ", Ustr[i]); 124 } 125 } 126 fprintf(cfp, "\n};\n"); 127 128 fprintf(cfp, "\nstatic const com::sun::star::i18n::Hangul_Index Hangul2HanjaIndex[] = {\n"); 129 for (i = 0; i < count; i++) 130 fprintf(cfp, "\t{ 0x%04x, 0x%04x, 0x%02x },\n", 131 Hangul2HanjaData[i][0], 132 Hangul2HanjaData[i][1], 133 Hangul2HanjaData[i][2]); 134 fprintf(cfp, "};\n"); 135 136 fprintf(cfp, "\nstatic const sal_uInt16 Hanja2HangulIndex[] = {"); 137 138 address=0; 139 for (i = 0; i < 0x10; i++) { 140 fprintf(cfp, "\n\t"); 141 for (j = 0; j < 0x10; j++) { 142 for (k = 0; k < 0x100; k++) { 143 if (Hanja2HangulData[((i*0x10)+j)*0x100+k] != 0) 144 break; 145 } 146 fprintf( 147 cfp, "0x%04lx, ", 148 sal::static_int_cast< unsigned long >( 149 k < 0x100 ? (address++)*0x100 : 0xFFFF)); 150 } 151 } 152 fprintf(cfp, "\n};\n"); 153 154 fprintf(cfp, "\nstatic const sal_Unicode Hanja2HangulData[] = {"); 155 156 for (i = 0; i < 0x100; i++) { 157 for (j = 0; j < 0x100; j++) { 158 if (Hanja2HangulData[i*0x100+j] != 0) 159 break; 160 } 161 if (j < 0x100) { 162 for (j = 0; j < 0x10; j++) { 163 fprintf(cfp, "\n\t"); 164 for (k = 0; k < 0x10; k++) { 165 sal_Unicode c = Hanja2HangulData[((i*0x10+j)*0x10)+k]; 166 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF); 167 } 168 } 169 } 170 } 171 fprintf(cfp, "\n};\n"); 172 173 // create function to return arrays 174 fprintf (cfp, "\tconst sal_Unicode* getHangul2HanjaData() { return Hangul2HanjaData; }\n"); 175 fprintf (cfp, "\tconst com::sun::star::i18n::Hangul_Index* getHangul2HanjaIndex() { return Hangul2HanjaIndex; }\n"); 176 fprintf (cfp, "\tsal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(com::sun::star::i18n::Hangul_Index); }\n"); 177 fprintf (cfp, "\tconst sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n"); 178 fprintf (cfp, "\tconst sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n"); 179 } 180 181 // Simplified/Traditional Chinese character conversion 182 void make_stc_char(FILE *sfp, FILE *cfp) 183 { 184 sal_Int32 address, i, j, k; 185 sal_Unicode SChinese2TChineseData[0x10000]; 186 sal_Unicode SChinese2VChineseData[0x10000]; 187 sal_Unicode TChinese2SChineseData[0x10000]; 188 for (i = 0; i < 0x10000; i++) { 189 SChinese2TChineseData[i] = 0; 190 SChinese2VChineseData[i] = 0; 191 TChinese2SChineseData[i] = 0; 192 } 193 194 sal_Char Cstr[1024]; 195 while (fgets(Cstr, 1024, sfp)) { 196 // input file is in UTF-8 encoding (SChinese:TChinese) 197 // don't convert last new line character to Ostr. 198 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8); 199 const sal_Unicode *Ustr = Ostr.getStr(); 200 sal_Int32 len = Ostr.getLength(); 201 if (Ustr[1] == sal_Unicode('v')) 202 SChinese2VChineseData[Ustr[0]] = Ustr[2]; 203 else { 204 SChinese2TChineseData[Ustr[0]] = Ustr[2]; 205 if (SChinese2VChineseData[Ustr[0]] == 0) 206 SChinese2VChineseData[Ustr[0]] = Ustr[2]; 207 } 208 for (i = 2; i < len; i++) 209 TChinese2SChineseData[Ustr[i]] = Ustr[0]; 210 } 211 212 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {"); 213 214 address=0; 215 for (i = 0; i < 0x10; i++) { 216 fprintf(cfp, "\n\t"); 217 for (j = 0; j < 0x10; j++) { 218 for (k = 0; k < 0x100; k++) { 219 if (SChinese2TChineseData[((i*0x10)+j)*0x100+k] != 0) 220 break; 221 } 222 fprintf( 223 cfp, "0x%04lx, ", 224 sal::static_int_cast< unsigned long >( 225 k < 0x100 ? (address++)*0x100 : 0xFFFF)); 226 } 227 } 228 fprintf(cfp, "\n};\n"); 229 230 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2T[] = {"); 231 232 for (i = 0; i < 0x100; i++) { 233 for (j = 0; j < 0x100; j++) { 234 if (SChinese2TChineseData[i*0x100+j] != 0) 235 break; 236 } 237 if (j < 0x100) { 238 for (j = 0; j < 0x10; j++) { 239 fprintf(cfp, "\n\t"); 240 for (k = 0; k < 0x10; k++) { 241 sal_Unicode c = SChinese2TChineseData[((i*0x10+j)*0x10)+k]; 242 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF); 243 } 244 } 245 } 246 } 247 fprintf(cfp, "\n};\n"); 248 249 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {"); 250 251 address=0; 252 for (i = 0; i < 0x10; i++) { 253 fprintf(cfp, "\n\t"); 254 for (j = 0; j < 0x10; j++) { 255 for (k = 0; k < 0x100; k++) { 256 if (SChinese2VChineseData[((i*0x10)+j)*0x100+k] != 0) 257 break; 258 } 259 fprintf( 260 cfp, "0x%04lx, ", 261 sal::static_int_cast< unsigned long >( 262 k < 0x100 ? (address++)*0x100 : 0xFFFF)); 263 } 264 } 265 fprintf(cfp, "\n};\n"); 266 267 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2V[] = {"); 268 269 for (i = 0; i < 0x100; i++) { 270 for (j = 0; j < 0x100; j++) { 271 if (SChinese2VChineseData[i*0x100+j] != 0) 272 break; 273 } 274 if (j < 0x100) { 275 for (j = 0; j < 0x10; j++) { 276 fprintf(cfp, "\n\t"); 277 for (k = 0; k < 0x10; k++) { 278 sal_Unicode c = SChinese2VChineseData[((i*0x10+j)*0x10)+k]; 279 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF); 280 } 281 } 282 } 283 } 284 fprintf(cfp, "\n};\n"); 285 286 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {"); 287 288 address=0; 289 for (i = 0; i < 0x10; i++) { 290 fprintf(cfp, "\n\t"); 291 for (j = 0; j < 0x10; j++) { 292 for (k = 0; k < 0x100; k++) { 293 if (TChinese2SChineseData[((i*0x10)+j)*0x100+k] != 0) 294 break; 295 } 296 fprintf( 297 cfp, "0x%04lx, ", 298 sal::static_int_cast< unsigned long >( 299 k < 0x100 ? (address++)*0x100 : 0xFFFF)); 300 } 301 } 302 fprintf(cfp, "\n};\n"); 303 304 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_T2S[] = {"); 305 306 for (i = 0; i < 0x100; i++) { 307 for (j = 0; j < 0x100; j++) { 308 if (TChinese2SChineseData[i*0x100+j] != 0) 309 break; 310 } 311 if (j < 0x100) { 312 for (j = 0; j < 0x10; j++) { 313 fprintf(cfp, "\n\t"); 314 for (k = 0; k < 0x10; k++) { 315 sal_Unicode c = TChinese2SChineseData[((i*0x10+j)*0x10)+k]; 316 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF); 317 } 318 } 319 } 320 } 321 fprintf(cfp, "\n};\n"); 322 323 // create function to return arrays 324 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n"); 325 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n"); 326 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n"); 327 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n"); 328 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n"); 329 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n"); 330 } 331 332 333 typedef struct { 334 sal_uInt16 address; 335 sal_Int32 len; 336 sal_Unicode *data; 337 } Index; 338 339 extern "C" { 340 int Index_comp(const void* s1, const void* s2) 341 { 342 Index *p1 = (Index*)s1, *p2 = (Index*)s2; 343 int result = p1->len - p2->len; 344 for (int i = 0; result == 0 && i < p1->len; i++) 345 result = *(p1->data+i) - *(p2->data+i); 346 return result; 347 } 348 } 349 350 // Simplified/Traditional Chinese word conversion 351 void make_stc_word(FILE *sfp, FILE *cfp) 352 { 353 sal_Int32 count, i, length; 354 sal_Unicode STC_WordData[0x10000]; 355 std::vector<Index> STC_WordEntry_S2T(0x10000); 356 std::vector<Index> STC_WordEntry_T2S(0x10000); 357 sal_Int32 count_S2T = 0, count_T2S = 0; 358 sal_Int32 line = 0, char_total = 0; 359 sal_Char Cstr[1024]; 360 361 while (fgets(Cstr, 1024, sfp)) { 362 // input file is in UTF-8 encoding (SChinese:TChinese) 363 // don't convert last new line character to Ostr. 364 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8); 365 sal_Int32 len = Ostr.getLength(); 366 if (char_total + len + 1 > 0xFFFF) { 367 fprintf(stderr, "Word Dictionary stc_word.dic is too big (line %ld)", sal::static_int_cast< long >(line)); 368 return; 369 } 370 sal_Int32 sep=-1, eq=-1, gt=-1, lt=-1; 371 if (((sep = eq = Ostr.indexOf(sal_Unicode('='))) > 0) || 372 ((sep = gt = Ostr.indexOf(sal_Unicode('>'))) > 0) || 373 ((sep = lt = Ostr.indexOf(sal_Unicode('<'))) > 0)) { 374 375 if (eq > 0 || gt > 0) { 376 STC_WordEntry_S2T[count_S2T].address = sal::static_int_cast<sal_uInt16>( char_total ); 377 STC_WordEntry_S2T[count_S2T].len = sep; 378 STC_WordEntry_S2T[count_S2T++].data = &STC_WordData[char_total]; 379 } 380 if (eq > 0 || lt > 0) { 381 STC_WordEntry_T2S[count_T2S].address = sal::static_int_cast<sal_uInt16>( char_total + sep + 1 ); 382 STC_WordEntry_T2S[count_T2S].len = len - sep - 1; 383 STC_WordEntry_T2S[count_T2S++].data = &STC_WordData[char_total + sep + 1]; 384 } 385 for (i = 0; i < len; i++) 386 STC_WordData[char_total++] = (i == sep) ? 0 : Ostr[i]; 387 STC_WordData[char_total++] = 0; 388 } else { 389 fprintf(stderr, "Invalid entry in stc_word.dic (line %ld)", sal::static_int_cast< long >(line)); 390 return; 391 } 392 line++; 393 } 394 395 if (char_total > 0) { 396 fprintf(cfp, "\nstatic const sal_Unicode STC_WordData[] = {"); 397 for (i = 0; i < char_total; i++) { 398 if (i % 32 == 0) fprintf(cfp, "\n\t"); 399 fprintf(cfp, "0x%04x, ", STC_WordData[i]); 400 } 401 fprintf(cfp, "\n};\n"); 402 403 fprintf(cfp, "\nstatic sal_Int32 STC_WordData_Count = %ld;\n", sal::static_int_cast< long >(char_total)); 404 405 // create function to return arrays 406 fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n"); 407 } else { 408 fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n"); 409 } 410 411 sal_uInt16 STC_WordIndex[0x100]; 412 413 if (count_S2T > 0) { 414 qsort(&STC_WordEntry_S2T[0], count_S2T, sizeof(Index), Index_comp); 415 416 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {"); 417 count = 0; 418 length = 0; 419 for (i = 0; i < count_S2T; i++) { 420 if (i % 32 == 0) fprintf(cfp, "\n\t"); 421 fprintf(cfp, "0x%04x, ", STC_WordEntry_S2T[i].address); 422 if (STC_WordEntry_S2T[i].len != length) { 423 length = STC_WordEntry_S2T[i].len; 424 while (count <= length) 425 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i); 426 } 427 } 428 fprintf(cfp, "\n};\n"); 429 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i); 430 431 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {"); 432 for (i = 0; i < count; i++) { 433 if (i % 16 == 0) fprintf(cfp, "\n\t"); 434 fprintf(cfp, "0x%04x, ", STC_WordIndex[i]); 435 } 436 fprintf(cfp, "\n};\n"); 437 438 fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %ld;\n", sal::static_int_cast< long >(length)); 439 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n"); 440 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n"); 441 } else { 442 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n"); 443 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n"); 444 } 445 446 if (count_T2S > 0) { 447 qsort(&STC_WordEntry_T2S[0], count_T2S, sizeof(Index), Index_comp); 448 449 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {"); 450 count = 0; 451 length = 0; 452 for (i = 0; i < count_T2S; i++) { 453 if (i % 32 == 0) fprintf(cfp, "\n\t"); 454 fprintf(cfp, "0x%04x, ", STC_WordEntry_T2S[i].address); 455 if (STC_WordEntry_T2S[i].len != length) { 456 length = STC_WordEntry_T2S[i].len; 457 while (count <= length) 458 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i); 459 } 460 } 461 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i); 462 fprintf(cfp, "\n};\n"); 463 464 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {"); 465 for (i = 0; i < count; i++) { 466 if (i % 16 == 0) fprintf(cfp, "\n\t"); 467 fprintf(cfp, "0x%04x, ", STC_WordIndex[i]); 468 } 469 fprintf(cfp, "\n};\n"); 470 471 fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %ld;\n\n", sal::static_int_cast< long >(length)); 472 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n"); 473 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n"); 474 } else { 475 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n"); 476 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n"); 477 } 478 } 479 480