1449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5449ab281SAndrew Rist  * distributed with this work for additional information
6449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10449ab281SAndrew Rist  *
11449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12449ab281SAndrew Rist  *
13449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14449ab281SAndrew Rist  * software distributed under the License is distributed on an
15449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17449ab281SAndrew Rist  * specific language governing permissions and limitations
18449ab281SAndrew Rist  * under the License.
19449ab281SAndrew Rist  *
20449ab281SAndrew Rist  *************************************************************/
21449ab281SAndrew Rist 
22449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <stdio.h>
28cdf0e10cSrcweir #include <string.h>
29cdf0e10cSrcweir #include <stdlib.h>
30cdf0e10cSrcweir #include <sal/main.h>
31cdf0e10cSrcweir #include <sal/types.h>
32cdf0e10cSrcweir #include <rtl/strbuf.hxx>
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir #include <vector>
36cdf0e10cSrcweir 
37cdf0e10cSrcweir using namespace ::rtl;
38cdf0e10cSrcweir 
39cdf0e10cSrcweir void make_hhc_char(FILE *sfp, FILE *cfp);
40cdf0e10cSrcweir void make_stc_char(FILE *sfp, FILE *cfp);
41cdf0e10cSrcweir void make_stc_word(FILE *sfp, FILE *cfp);
42cdf0e10cSrcweir 
43cdf0e10cSrcweir /* Main Procedure */
44cdf0e10cSrcweir 
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)45cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
46cdf0e10cSrcweir {
47cdf0e10cSrcweir 	FILE *sfp, *cfp;
48cdf0e10cSrcweir 
49cdf0e10cSrcweir 	if (argc < 4) exit(-1);
50cdf0e10cSrcweir 
51cdf0e10cSrcweir 
52cdf0e10cSrcweir 	sfp = fopen(argv[2], "rb");	// open the source file for read;
53cdf0e10cSrcweir 	if (sfp == NULL)
54cdf0e10cSrcweir     {
55cdf0e10cSrcweir 	    printf("Open the dictionary source file failed.");
56cdf0e10cSrcweir         return -1;
57cdf0e10cSrcweir     }
58cdf0e10cSrcweir 
59cdf0e10cSrcweir 	// create the C source file to write
60cdf0e10cSrcweir 	cfp = fopen(argv[3], "wb");
61cdf0e10cSrcweir 	if (cfp == NULL) {
62cdf0e10cSrcweir 	    fclose(sfp);
63cdf0e10cSrcweir 	    printf("Can't create the C source file.");
64cdf0e10cSrcweir         return -1;
65cdf0e10cSrcweir 	}
66cdf0e10cSrcweir 
67cdf0e10cSrcweir 	fprintf(cfp, "/*\n");
68cdf0e10cSrcweir 	fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
69cdf0e10cSrcweir 	fprintf(cfp, " * All Rights Reserved.\n");
70cdf0e10cSrcweir 	fprintf(cfp, " */\n\n");
71cdf0e10cSrcweir 	fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
72cdf0e10cSrcweir 	fprintf(cfp, "#include <sal/types.h>\n");
73cdf0e10cSrcweir 	fprintf(cfp, "#include <textconversion.hxx>\n");
74cdf0e10cSrcweir     fprintf(cfp, "\nextern \"C\" {\n");
75cdf0e10cSrcweir 
76cdf0e10cSrcweir     if (strcmp(argv[1], "hhc_char") == 0)
77cdf0e10cSrcweir         make_hhc_char(sfp, cfp);
78cdf0e10cSrcweir     else if (strcmp(argv[1], "stc_char") == 0)
79cdf0e10cSrcweir         make_stc_char(sfp, cfp);
80cdf0e10cSrcweir     else if (strcmp(argv[1], "stc_word") == 0)
81cdf0e10cSrcweir         make_stc_word(sfp, cfp);
82cdf0e10cSrcweir 
83cdf0e10cSrcweir 	fprintf (cfp, "}\n");
84cdf0e10cSrcweir 
85cdf0e10cSrcweir 	fclose(sfp);
86cdf0e10cSrcweir 	fclose(cfp);
87cdf0e10cSrcweir 
88cdf0e10cSrcweir 	return 0;
89cdf0e10cSrcweir } // end of main
90cdf0e10cSrcweir 
91cdf0e10cSrcweir // Hangul/Hanja character conversion
make_hhc_char(FILE * sfp,FILE * cfp)92cdf0e10cSrcweir void make_hhc_char(FILE *sfp, FILE *cfp)
93cdf0e10cSrcweir {
94cdf0e10cSrcweir 	sal_Int32 count, address, i, j, k;
95cdf0e10cSrcweir 	sal_Unicode Hanja2HangulData[0x10000];
96cdf0e10cSrcweir 	for (i = 0; i < 0x10000; i++) {
97cdf0e10cSrcweir 	    Hanja2HangulData[i] = 0;
98cdf0e10cSrcweir 	}
99cdf0e10cSrcweir     sal_uInt16 Hangul2HanjaData[10000][3];
100cdf0e10cSrcweir 
101cdf0e10cSrcweir 	// generate main dict. data array
102cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_Unicode Hangul2HanjaData[] = {");
103cdf0e10cSrcweir 
104cdf0e10cSrcweir 	sal_Char Cstr[1024];
105cdf0e10cSrcweir 	count = 0;
106cdf0e10cSrcweir     address = 0;
107cdf0e10cSrcweir 	while (fgets(Cstr, 1024, sfp)) {
108cdf0e10cSrcweir 	    // input file is in UTF-8 encoding (Hangul:Hanja)
109cdf0e10cSrcweir 	    // don't convert last new line character to Ostr.
110cdf0e10cSrcweir 	    OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
111cdf0e10cSrcweir 	    const sal_Unicode *Ustr = Ostr.getStr();
112cdf0e10cSrcweir         sal_Int32  len = Ostr.getLength();
113cdf0e10cSrcweir 
114cdf0e10cSrcweir         Hangul2HanjaData[count][0] = Ustr[0];
115cdf0e10cSrcweir         Hangul2HanjaData[count][1] = sal::static_int_cast<sal_uInt16>( address );
116cdf0e10cSrcweir         Hangul2HanjaData[count][2] = sal::static_int_cast<sal_uInt16>( len - 2 );
117cdf0e10cSrcweir         count++;
118cdf0e10cSrcweir 
119cdf0e10cSrcweir         for (i = 2; i < len; i++) {
120cdf0e10cSrcweir             Hanja2HangulData[Ustr[i]] = Ustr[0];
121cdf0e10cSrcweir             if (address++ % 16 == 0)
122cdf0e10cSrcweir                 fprintf(cfp, "\n\t");
123cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ", Ustr[i]);
124cdf0e10cSrcweir         }
125cdf0e10cSrcweir 	}
126cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
127cdf0e10cSrcweir 
128cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const com::sun::star::i18n::Hangul_Index Hangul2HanjaIndex[] = {\n");
129cdf0e10cSrcweir     for (i = 0; i < count; i++)
130cdf0e10cSrcweir         fprintf(cfp, "\t{ 0x%04x, 0x%04x, 0x%02x },\n",
131cdf0e10cSrcweir                         Hangul2HanjaData[i][0],
132cdf0e10cSrcweir                         Hangul2HanjaData[i][1],
133cdf0e10cSrcweir                         Hangul2HanjaData[i][2]);
134cdf0e10cSrcweir 	fprintf(cfp, "};\n");
135cdf0e10cSrcweir 
136cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_uInt16 Hanja2HangulIndex[] = {");
137cdf0e10cSrcweir 
138cdf0e10cSrcweir     address=0;
139cdf0e10cSrcweir 	for (i = 0; i < 0x10; i++) {
140cdf0e10cSrcweir         fprintf(cfp, "\n\t");
141cdf0e10cSrcweir         for (j = 0; j < 0x10; j++) {
142cdf0e10cSrcweir             for (k = 0; k < 0x100; k++) {
143cdf0e10cSrcweir                 if (Hanja2HangulData[((i*0x10)+j)*0x100+k] != 0)
144cdf0e10cSrcweir                     break;
145cdf0e10cSrcweir             }
146cdf0e10cSrcweir             fprintf(
147cdf0e10cSrcweir                 cfp, "0x%04lx, ",
148cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
149cdf0e10cSrcweir                     k < 0x100 ? (address++)*0x100 : 0xFFFF));
150cdf0e10cSrcweir         }
151cdf0e10cSrcweir 	}
152cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
153cdf0e10cSrcweir 
154cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_Unicode Hanja2HangulData[] = {");
155cdf0e10cSrcweir 
156cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
157cdf0e10cSrcweir         for (j = 0; j < 0x100; j++) {
158cdf0e10cSrcweir             if (Hanja2HangulData[i*0x100+j] != 0)
159cdf0e10cSrcweir                 break;
160cdf0e10cSrcweir         }
161cdf0e10cSrcweir         if (j < 0x100) {
162cdf0e10cSrcweir             for (j = 0; j < 0x10; j++) {
163cdf0e10cSrcweir                 fprintf(cfp, "\n\t");
164cdf0e10cSrcweir                 for (k = 0; k < 0x10; k++) {
165cdf0e10cSrcweir 					sal_Unicode c = Hanja2HangulData[((i*0x10+j)*0x10)+k];
166cdf0e10cSrcweir                     fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
167cdf0e10cSrcweir                 }
168cdf0e10cSrcweir             }
169cdf0e10cSrcweir         }
170cdf0e10cSrcweir 	}
171cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
172cdf0e10cSrcweir 
173cdf0e10cSrcweir 	// create function to return arrays
174*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getHangul2HanjaData() { return Hangul2HanjaData; }\n");
175*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const com::sun::star::i18n::Hangul_Index* getHangul2HanjaIndex() { return Hangul2HanjaIndex; }\n");
176*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT sal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(com::sun::star::i18n::Hangul_Index); }\n");
177*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n");
178*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n");
179cdf0e10cSrcweir }
180cdf0e10cSrcweir 
181cdf0e10cSrcweir // Simplified/Traditional Chinese character conversion
make_stc_char(FILE * sfp,FILE * cfp)182cdf0e10cSrcweir void make_stc_char(FILE *sfp, FILE *cfp)
183cdf0e10cSrcweir {
184cdf0e10cSrcweir 	sal_Int32 address, i, j, k;
185cdf0e10cSrcweir 	sal_Unicode SChinese2TChineseData[0x10000];
186cdf0e10cSrcweir 	sal_Unicode SChinese2VChineseData[0x10000];
187cdf0e10cSrcweir 	sal_Unicode TChinese2SChineseData[0x10000];
188cdf0e10cSrcweir 	for (i = 0; i < 0x10000; i++) {
189cdf0e10cSrcweir 	    SChinese2TChineseData[i] = 0;
190cdf0e10cSrcweir 	    SChinese2VChineseData[i] = 0;
191cdf0e10cSrcweir 	    TChinese2SChineseData[i] = 0;
192cdf0e10cSrcweir 	}
193cdf0e10cSrcweir 
194cdf0e10cSrcweir 	sal_Char Cstr[1024];
195cdf0e10cSrcweir 	while (fgets(Cstr, 1024, sfp)) {
196cdf0e10cSrcweir 	    // input file is in UTF-8 encoding (SChinese:TChinese)
197cdf0e10cSrcweir 	    // don't convert last new line character to Ostr.
198cdf0e10cSrcweir 	    OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
199cdf0e10cSrcweir 	    const sal_Unicode *Ustr = Ostr.getStr();
200cdf0e10cSrcweir         sal_Int32  len = Ostr.getLength();
201cdf0e10cSrcweir         if (Ustr[1] == sal_Unicode('v'))
202cdf0e10cSrcweir             SChinese2VChineseData[Ustr[0]] = Ustr[2];
203cdf0e10cSrcweir         else {
204cdf0e10cSrcweir             SChinese2TChineseData[Ustr[0]] = Ustr[2];
205cdf0e10cSrcweir             if (SChinese2VChineseData[Ustr[0]] == 0)
206cdf0e10cSrcweir                 SChinese2VChineseData[Ustr[0]] = Ustr[2];
207cdf0e10cSrcweir         }
208cdf0e10cSrcweir         for (i = 2; i < len; i++)
209cdf0e10cSrcweir             TChinese2SChineseData[Ustr[i]] = Ustr[0];
210cdf0e10cSrcweir     }
211cdf0e10cSrcweir 
212cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {");
213cdf0e10cSrcweir 
214cdf0e10cSrcweir     address=0;
215cdf0e10cSrcweir 	for (i = 0; i < 0x10; i++) {
216cdf0e10cSrcweir         fprintf(cfp, "\n\t");
217cdf0e10cSrcweir         for (j = 0; j < 0x10; j++) {
218cdf0e10cSrcweir             for (k = 0; k < 0x100; k++) {
219cdf0e10cSrcweir                 if (SChinese2TChineseData[((i*0x10)+j)*0x100+k] != 0)
220cdf0e10cSrcweir                     break;
221cdf0e10cSrcweir             }
222cdf0e10cSrcweir             fprintf(
223cdf0e10cSrcweir                 cfp, "0x%04lx, ",
224cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
225cdf0e10cSrcweir                     k < 0x100 ? (address++)*0x100 : 0xFFFF));
226cdf0e10cSrcweir         }
227cdf0e10cSrcweir 	}
228cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
229cdf0e10cSrcweir 
230cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2T[] = {");
231cdf0e10cSrcweir 
232cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
233cdf0e10cSrcweir         for (j = 0; j < 0x100; j++) {
234cdf0e10cSrcweir             if (SChinese2TChineseData[i*0x100+j] != 0)
235cdf0e10cSrcweir                 break;
236cdf0e10cSrcweir         }
237cdf0e10cSrcweir         if (j < 0x100) {
238cdf0e10cSrcweir             for (j = 0; j < 0x10; j++) {
239cdf0e10cSrcweir                 fprintf(cfp, "\n\t");
240cdf0e10cSrcweir                 for (k = 0; k < 0x10; k++) {
241cdf0e10cSrcweir 					sal_Unicode c = SChinese2TChineseData[((i*0x10+j)*0x10)+k];
242cdf0e10cSrcweir                     fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
243cdf0e10cSrcweir                 }
244cdf0e10cSrcweir             }
245cdf0e10cSrcweir         }
246cdf0e10cSrcweir 	}
247cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
248cdf0e10cSrcweir 
249cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {");
250cdf0e10cSrcweir 
251cdf0e10cSrcweir     address=0;
252cdf0e10cSrcweir 	for (i = 0; i < 0x10; i++) {
253cdf0e10cSrcweir         fprintf(cfp, "\n\t");
254cdf0e10cSrcweir         for (j = 0; j < 0x10; j++) {
255cdf0e10cSrcweir             for (k = 0; k < 0x100; k++) {
256cdf0e10cSrcweir                 if (SChinese2VChineseData[((i*0x10)+j)*0x100+k] != 0)
257cdf0e10cSrcweir                     break;
258cdf0e10cSrcweir             }
259cdf0e10cSrcweir             fprintf(
260cdf0e10cSrcweir                 cfp, "0x%04lx, ",
261cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
262cdf0e10cSrcweir                     k < 0x100 ? (address++)*0x100 : 0xFFFF));
263cdf0e10cSrcweir         }
264cdf0e10cSrcweir 	}
265cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
266cdf0e10cSrcweir 
267cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2V[] = {");
268cdf0e10cSrcweir 
269cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
270cdf0e10cSrcweir         for (j = 0; j < 0x100; j++) {
271cdf0e10cSrcweir             if (SChinese2VChineseData[i*0x100+j] != 0)
272cdf0e10cSrcweir                 break;
273cdf0e10cSrcweir         }
274cdf0e10cSrcweir         if (j < 0x100) {
275cdf0e10cSrcweir             for (j = 0; j < 0x10; j++) {
276cdf0e10cSrcweir                 fprintf(cfp, "\n\t");
277cdf0e10cSrcweir                 for (k = 0; k < 0x10; k++) {
278cdf0e10cSrcweir 					sal_Unicode c = SChinese2VChineseData[((i*0x10+j)*0x10)+k];
279cdf0e10cSrcweir                     fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
280cdf0e10cSrcweir                 }
281cdf0e10cSrcweir             }
282cdf0e10cSrcweir         }
283cdf0e10cSrcweir 	}
284cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
285cdf0e10cSrcweir 
286cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {");
287cdf0e10cSrcweir 
288cdf0e10cSrcweir     address=0;
289cdf0e10cSrcweir 	for (i = 0; i < 0x10; i++) {
290cdf0e10cSrcweir         fprintf(cfp, "\n\t");
291cdf0e10cSrcweir         for (j = 0; j < 0x10; j++) {
292cdf0e10cSrcweir             for (k = 0; k < 0x100; k++) {
293cdf0e10cSrcweir                 if (TChinese2SChineseData[((i*0x10)+j)*0x100+k] != 0)
294cdf0e10cSrcweir                     break;
295cdf0e10cSrcweir             }
296cdf0e10cSrcweir             fprintf(
297cdf0e10cSrcweir                 cfp, "0x%04lx, ",
298cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
299cdf0e10cSrcweir                     k < 0x100 ? (address++)*0x100 : 0xFFFF));
300cdf0e10cSrcweir         }
301cdf0e10cSrcweir 	}
302cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
303cdf0e10cSrcweir 
304cdf0e10cSrcweir 	fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_T2S[] = {");
305cdf0e10cSrcweir 
306cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
307cdf0e10cSrcweir         for (j = 0; j < 0x100; j++) {
308cdf0e10cSrcweir             if (TChinese2SChineseData[i*0x100+j] != 0)
309cdf0e10cSrcweir                 break;
310cdf0e10cSrcweir         }
311cdf0e10cSrcweir         if (j < 0x100) {
312cdf0e10cSrcweir             for (j = 0; j < 0x10; j++) {
313cdf0e10cSrcweir                 fprintf(cfp, "\n\t");
314cdf0e10cSrcweir                 for (k = 0; k < 0x10; k++) {
315cdf0e10cSrcweir 					sal_Unicode c = TChinese2SChineseData[((i*0x10+j)*0x10)+k];
316cdf0e10cSrcweir                     fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
317cdf0e10cSrcweir                 }
318cdf0e10cSrcweir             }
319cdf0e10cSrcweir         }
320cdf0e10cSrcweir 	}
321cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
322cdf0e10cSrcweir 
323cdf0e10cSrcweir 	// create function to return arrays
324*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n");
325*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n");
326*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n");
327*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n");
328*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n");
329*52d905a6SJim Jagielski 	fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n");
330cdf0e10cSrcweir }
331cdf0e10cSrcweir 
332cdf0e10cSrcweir 
333cdf0e10cSrcweir typedef struct {
334cdf0e10cSrcweir     sal_uInt16 address;
335cdf0e10cSrcweir     sal_Int32 len;
336cdf0e10cSrcweir     sal_Unicode *data;
337cdf0e10cSrcweir } Index;
338cdf0e10cSrcweir 
339cdf0e10cSrcweir extern "C" {
Index_comp(const void * s1,const void * s2)340cdf0e10cSrcweir int Index_comp(const void* s1, const void* s2)
341cdf0e10cSrcweir {
342cdf0e10cSrcweir     Index *p1 = (Index*)s1, *p2 = (Index*)s2;
343cdf0e10cSrcweir     int result = p1->len - p2->len;
344cdf0e10cSrcweir     for (int i = 0; result == 0 && i < p1->len; i++)
345cdf0e10cSrcweir         result = *(p1->data+i) - *(p2->data+i);
346cdf0e10cSrcweir     return result;
347cdf0e10cSrcweir }
348cdf0e10cSrcweir }
349cdf0e10cSrcweir 
350cdf0e10cSrcweir // Simplified/Traditional Chinese word conversion
make_stc_word(FILE * sfp,FILE * cfp)351cdf0e10cSrcweir void make_stc_word(FILE *sfp, FILE *cfp)
352cdf0e10cSrcweir {
353cdf0e10cSrcweir 	sal_Int32 count, i, length;
354cdf0e10cSrcweir     sal_Unicode STC_WordData[0x10000];
355cdf0e10cSrcweir     std::vector<Index> STC_WordEntry_S2T(0x10000);
356cdf0e10cSrcweir     std::vector<Index> STC_WordEntry_T2S(0x10000);
357cdf0e10cSrcweir     sal_Int32 count_S2T = 0, count_T2S = 0;
358cdf0e10cSrcweir     sal_Int32 line = 0, char_total = 0;
359cdf0e10cSrcweir 	sal_Char Cstr[1024];
360cdf0e10cSrcweir 
361cdf0e10cSrcweir 	while (fgets(Cstr, 1024, sfp)) {
362cdf0e10cSrcweir 	    // input file is in UTF-8 encoding (SChinese:TChinese)
363cdf0e10cSrcweir 	    // don't convert last new line character to Ostr.
364cdf0e10cSrcweir 	    OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
365cdf0e10cSrcweir         sal_Int32  len = Ostr.getLength();
366cdf0e10cSrcweir         if (char_total + len + 1 > 0xFFFF) {
367cdf0e10cSrcweir             fprintf(stderr, "Word Dictionary stc_word.dic is too big (line %ld)", sal::static_int_cast< long >(line));
368cdf0e10cSrcweir             return;
369cdf0e10cSrcweir         }
370cdf0e10cSrcweir         sal_Int32 sep=-1, eq=-1, gt=-1, lt=-1;
371cdf0e10cSrcweir         if (((sep = eq = Ostr.indexOf(sal_Unicode('='))) > 0) ||
372cdf0e10cSrcweir             ((sep = gt = Ostr.indexOf(sal_Unicode('>'))) > 0) ||
373cdf0e10cSrcweir             ((sep = lt = Ostr.indexOf(sal_Unicode('<'))) > 0)) {
374cdf0e10cSrcweir 
375cdf0e10cSrcweir             if (eq > 0 || gt > 0) {
376cdf0e10cSrcweir                 STC_WordEntry_S2T[count_S2T].address = sal::static_int_cast<sal_uInt16>( char_total );
377cdf0e10cSrcweir                 STC_WordEntry_S2T[count_S2T].len = sep;
378cdf0e10cSrcweir                 STC_WordEntry_S2T[count_S2T++].data = &STC_WordData[char_total];
379cdf0e10cSrcweir             }
380cdf0e10cSrcweir             if (eq > 0 || lt > 0) {
381cdf0e10cSrcweir                 STC_WordEntry_T2S[count_T2S].address = sal::static_int_cast<sal_uInt16>( char_total + sep + 1 );
382cdf0e10cSrcweir                 STC_WordEntry_T2S[count_T2S].len = len - sep - 1;
383cdf0e10cSrcweir                 STC_WordEntry_T2S[count_T2S++].data = &STC_WordData[char_total + sep + 1];
384cdf0e10cSrcweir             }
385cdf0e10cSrcweir             for (i = 0; i < len; i++)
386cdf0e10cSrcweir                 STC_WordData[char_total++] = (i == sep) ? 0 : Ostr[i];
387cdf0e10cSrcweir             STC_WordData[char_total++] = 0;
388cdf0e10cSrcweir         } else {
389cdf0e10cSrcweir             fprintf(stderr, "Invalid entry in stc_word.dic (line %ld)", sal::static_int_cast< long >(line));
390cdf0e10cSrcweir             return;
391cdf0e10cSrcweir         }
392cdf0e10cSrcweir         line++;
393cdf0e10cSrcweir     }
394cdf0e10cSrcweir 
395cdf0e10cSrcweir     if (char_total > 0) {
396cdf0e10cSrcweir         fprintf(cfp, "\nstatic const sal_Unicode STC_WordData[] = {");
397cdf0e10cSrcweir         for (i = 0; i < char_total; i++) {
398cdf0e10cSrcweir             if (i % 32 == 0) fprintf(cfp, "\n\t");
399cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ", STC_WordData[i]);
400cdf0e10cSrcweir         }
401cdf0e10cSrcweir         fprintf(cfp, "\n};\n");
402cdf0e10cSrcweir 
403cdf0e10cSrcweir         fprintf(cfp, "\nstatic sal_Int32 STC_WordData_Count = %ld;\n", sal::static_int_cast< long >(char_total));
404cdf0e10cSrcweir 
405cdf0e10cSrcweir         // create function to return arrays
406*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n");
407cdf0e10cSrcweir     } else {
408*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n");
409cdf0e10cSrcweir     }
410cdf0e10cSrcweir 
411cdf0e10cSrcweir     sal_uInt16 STC_WordIndex[0x100];
412cdf0e10cSrcweir 
413cdf0e10cSrcweir     if (count_S2T > 0) {
414cdf0e10cSrcweir         qsort(&STC_WordEntry_S2T[0], count_S2T, sizeof(Index), Index_comp);
415cdf0e10cSrcweir 
416cdf0e10cSrcweir         fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {");
417cdf0e10cSrcweir         count = 0;
418cdf0e10cSrcweir         length = 0;
419cdf0e10cSrcweir         for (i = 0; i < count_S2T; i++) {
420cdf0e10cSrcweir             if (i % 32 == 0) fprintf(cfp, "\n\t");
421cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ", STC_WordEntry_S2T[i].address);
422cdf0e10cSrcweir             if (STC_WordEntry_S2T[i].len != length) {
423cdf0e10cSrcweir                 length = STC_WordEntry_S2T[i].len;
424cdf0e10cSrcweir                 while (count <= length)
425cdf0e10cSrcweir                     STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
426cdf0e10cSrcweir             }
427cdf0e10cSrcweir         }
428cdf0e10cSrcweir         fprintf(cfp, "\n};\n");
429cdf0e10cSrcweir         STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
430cdf0e10cSrcweir 
431cdf0e10cSrcweir         fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {");
432cdf0e10cSrcweir         for (i = 0; i < count; i++) {
433cdf0e10cSrcweir             if (i % 16 == 0) fprintf(cfp, "\n\t");
434cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
435cdf0e10cSrcweir         }
436cdf0e10cSrcweir         fprintf(cfp, "\n};\n");
437cdf0e10cSrcweir 
438cdf0e10cSrcweir         fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %ld;\n", sal::static_int_cast< long >(length));
439*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n");
440*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n");
441cdf0e10cSrcweir     } else {
442*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n");
443*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n");
444cdf0e10cSrcweir     }
445cdf0e10cSrcweir 
446cdf0e10cSrcweir     if (count_T2S > 0) {
447cdf0e10cSrcweir         qsort(&STC_WordEntry_T2S[0], count_T2S, sizeof(Index), Index_comp);
448cdf0e10cSrcweir 
449cdf0e10cSrcweir         fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {");
450cdf0e10cSrcweir         count = 0;
451cdf0e10cSrcweir         length = 0;
452cdf0e10cSrcweir         for (i = 0; i < count_T2S; i++) {
453cdf0e10cSrcweir             if (i % 32 == 0) fprintf(cfp, "\n\t");
454cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ", STC_WordEntry_T2S[i].address);
455cdf0e10cSrcweir             if (STC_WordEntry_T2S[i].len != length) {
456cdf0e10cSrcweir                 length = STC_WordEntry_T2S[i].len;
457cdf0e10cSrcweir                 while (count <= length)
458cdf0e10cSrcweir                     STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
459cdf0e10cSrcweir             }
460cdf0e10cSrcweir         }
461cdf0e10cSrcweir         STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
462cdf0e10cSrcweir         fprintf(cfp, "\n};\n");
463cdf0e10cSrcweir 
464cdf0e10cSrcweir         fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {");
465cdf0e10cSrcweir         for (i = 0; i < count; i++) {
466cdf0e10cSrcweir             if (i % 16 == 0) fprintf(cfp, "\n\t");
467cdf0e10cSrcweir             fprintf(cfp, "0x%04x, ",  STC_WordIndex[i]);
468cdf0e10cSrcweir         }
469cdf0e10cSrcweir         fprintf(cfp, "\n};\n");
470cdf0e10cSrcweir 
471cdf0e10cSrcweir         fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %ld;\n\n", sal::static_int_cast< long >(length));
472*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n");
473*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n");
474cdf0e10cSrcweir     } else {
475*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n");
476*52d905a6SJim Jagielski         fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n");
477cdf0e10cSrcweir     }
478cdf0e10cSrcweir }
479cdf0e10cSrcweir 
480