1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <stdio.h>
32*cdf0e10cSrcweir #include <string.h>
33*cdf0e10cSrcweir #include <stdlib.h>
34*cdf0e10cSrcweir #include <sal/main.h>
35*cdf0e10cSrcweir #include <sal/types.h>
36*cdf0e10cSrcweir #include <rtl/strbuf.hxx>
37*cdf0e10cSrcweir #include <rtl/ustring.hxx>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir using namespace ::rtl;
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir /* Main Procedure */
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
44*cdf0e10cSrcweir {
45*cdf0e10cSrcweir 	FILE *sfp, *cfp;
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir 	if (argc < 3) exit(-1);
48*cdf0e10cSrcweir 
49*cdf0e10cSrcweir 	sfp = fopen(argv[1], "rb");	// open the source file for read;
50*cdf0e10cSrcweir 	if (sfp == NULL)
51*cdf0e10cSrcweir 	{
52*cdf0e10cSrcweir 	    printf("Open the dictionary source file failed.");
53*cdf0e10cSrcweir 	    return -1;
54*cdf0e10cSrcweir 	}
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir 	// create the C source file to write
57*cdf0e10cSrcweir 	cfp = fopen(argv[2], "wb");
58*cdf0e10cSrcweir 	if (cfp == NULL) {
59*cdf0e10cSrcweir 	    fclose(sfp);
60*cdf0e10cSrcweir 	    printf("Can't create the C source file.");
61*cdf0e10cSrcweir 	    return -1;
62*cdf0e10cSrcweir 	}
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir 	fprintf(cfp, "/*\n");
65*cdf0e10cSrcweir 	fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
66*cdf0e10cSrcweir 	fprintf(cfp, " * All Rights Reserved.\n");
67*cdf0e10cSrcweir 	fprintf(cfp, " */\n\n");
68*cdf0e10cSrcweir 	fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
69*cdf0e10cSrcweir 	fprintf(cfp, "#include <sal/types.h>\n\n");
70*cdf0e10cSrcweir 	fprintf(cfp, "extern \"C\" {\n");
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir 	sal_Int32 count, i, j;
73*cdf0e10cSrcweir 	sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
74*cdf0e10cSrcweir 	sal_Bool exist[0x10000];
75*cdf0e10cSrcweir 	for (i = 0; i < 0x10000; i++) {
76*cdf0e10cSrcweir 	    exist[i] = sal_False;
77*cdf0e10cSrcweir 	    charArray[i] = 0;
78*cdf0e10cSrcweir 	}
79*cdf0e10cSrcweir 
80*cdf0e10cSrcweir 	// generate main dict. data array
81*cdf0e10cSrcweir 	fprintf(cfp, "static const sal_Unicode dataArea[] = {");
82*cdf0e10cSrcweir 	sal_Char str[1024];
83*cdf0e10cSrcweir 	sal_Unicode current = 0;
84*cdf0e10cSrcweir 	count = 0;
85*cdf0e10cSrcweir 	while (fgets(str, 1024, sfp)) {
86*cdf0e10cSrcweir 	    // input file is in UTF-8 encoding
87*cdf0e10cSrcweir 	    // don't convert last new line character to Ostr.
88*cdf0e10cSrcweir 	    OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
89*cdf0e10cSrcweir 	    const sal_Unicode *u = Ostr.getStr();
90*cdf0e10cSrcweir 
91*cdf0e10cSrcweir 	    sal_Int32 len = Ostr.getLength();
92*cdf0e10cSrcweir 
93*cdf0e10cSrcweir         i=0;
94*cdf0e10cSrcweir         Ostr.iterateCodePoints(&i, 1);
95*cdf0e10cSrcweir 	    if (len == i) continue;	// skip one character word
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir 	    if (*u != current) {
98*cdf0e10cSrcweir 		if (*u < current)
99*cdf0e10cSrcweir 		printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
100*cdf0e10cSrcweir 		            sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
101*cdf0e10cSrcweir 		current = *u;
102*cdf0e10cSrcweir 		charArray[current] = lenArrayCount;
103*cdf0e10cSrcweir 	    }
104*cdf0e10cSrcweir 
105*cdf0e10cSrcweir 	    if (lenArrayLen <= lenArrayCount+1)
106*cdf0e10cSrcweir 		lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
107*cdf0e10cSrcweir 	    lenArray[lenArrayCount++] = lenArrayCurr;
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir 	    exist[u[0]] = sal_True;
110*cdf0e10cSrcweir 	    for (i = 1; i < len; i++) {		// start from second character,
111*cdf0e10cSrcweir 		exist[u[i]] = sal_True; 	// since the first character is captured in charArray.
112*cdf0e10cSrcweir 		lenArrayCurr++;
113*cdf0e10cSrcweir 		if ((count++) % 0x10 == 0)
114*cdf0e10cSrcweir 		    fprintf(cfp, "\n\t");
115*cdf0e10cSrcweir 		fprintf(cfp, "0x%04x, ", u[i]);
116*cdf0e10cSrcweir 	    }
117*cdf0e10cSrcweir 	}
118*cdf0e10cSrcweir 	lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
119*cdf0e10cSrcweir 	charArray[current+1] = lenArrayCount;
120*cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
121*cdf0e10cSrcweir 
122*cdf0e10cSrcweir 	// generate lenArray
123*cdf0e10cSrcweir 	fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
124*cdf0e10cSrcweir 	count = 1;
125*cdf0e10cSrcweir 	fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
126*cdf0e10cSrcweir 	for (i = 0; i < lenArrayCount; i++) {
127*cdf0e10cSrcweir 	    fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
128*cdf0e10cSrcweir 	    if (count == 0xf) {
129*cdf0e10cSrcweir 		count = 0;
130*cdf0e10cSrcweir 		fprintf(cfp, "\n\t");
131*cdf0e10cSrcweir 	    } else count++;
132*cdf0e10cSrcweir 	}
133*cdf0e10cSrcweir 	fprintf(cfp, "\n};\n");
134*cdf0e10cSrcweir 
135*cdf0e10cSrcweir 	free(lenArray);
136*cdf0e10cSrcweir 
137*cdf0e10cSrcweir 	// generate index1 array
138*cdf0e10cSrcweir 	fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
139*cdf0e10cSrcweir 	sal_Int16 set[0x100];
140*cdf0e10cSrcweir 	count = 0;
141*cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
142*cdf0e10cSrcweir 	    for (j = 0; j < 0x100; j++)
143*cdf0e10cSrcweir 		if (charArray[(i*0x100) + j] != 0)
144*cdf0e10cSrcweir 		    break;
145*cdf0e10cSrcweir 
146*cdf0e10cSrcweir 	    fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
147*cdf0e10cSrcweir 	    if ((i+1) % 0x10 == 0)
148*cdf0e10cSrcweir 		fprintf (cfp, "\n\t");
149*cdf0e10cSrcweir 	}
150*cdf0e10cSrcweir 	fprintf (cfp, "};\n");
151*cdf0e10cSrcweir 
152*cdf0e10cSrcweir 	// generate index2 array
153*cdf0e10cSrcweir 	fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
154*cdf0e10cSrcweir 	sal_Int32 prev = 0;
155*cdf0e10cSrcweir 	for (i = 0; i < 0x100; i++) {
156*cdf0e10cSrcweir 	    if (set[i] != 0xff) {
157*cdf0e10cSrcweir 		for (j = 0; j < 0x100; j++) {
158*cdf0e10cSrcweir 		    sal_Int32 k = (i*0x100) + j;
159*cdf0e10cSrcweir 		    if (prev != 0 && charArray[k] == 0) {
160*cdf0e10cSrcweir 			for (k++; k < 0x10000; k++)
161*cdf0e10cSrcweir 			    if (charArray[k] != 0)
162*cdf0e10cSrcweir 				break;
163*cdf0e10cSrcweir 		    }
164*cdf0e10cSrcweir 		    prev = charArray[(i*0x100) + j];
165*cdf0e10cSrcweir 		    fprintf(
166*cdf0e10cSrcweir                 cfp, "0x%lx, ",
167*cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
168*cdf0e10cSrcweir                     k < 0x10000 ? charArray[k] + 1 : 0));
169*cdf0e10cSrcweir 		    if ((j+1) % 0x10 == 0)
170*cdf0e10cSrcweir 			fprintf (cfp, "\n\t");
171*cdf0e10cSrcweir 		}
172*cdf0e10cSrcweir 		fprintf (cfp, "\n\t");
173*cdf0e10cSrcweir 	    }
174*cdf0e10cSrcweir 	}
175*cdf0e10cSrcweir 	fprintf (cfp, "\n};\n");
176*cdf0e10cSrcweir 
177*cdf0e10cSrcweir 	// generate existMark array
178*cdf0e10cSrcweir 	count = 0;
179*cdf0e10cSrcweir 	fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
180*cdf0e10cSrcweir 	for (i = 0; i < 0x1FFF; i++) {
181*cdf0e10cSrcweir 	    sal_uInt8 bit = 0;
182*cdf0e10cSrcweir 	    for (j = 0; j < 8; j++)
183*cdf0e10cSrcweir 		if (exist[i * 8 + j])
184*cdf0e10cSrcweir 		    bit |= 1 << j;
185*cdf0e10cSrcweir 	    fprintf(cfp, "0x%02x, ", bit);
186*cdf0e10cSrcweir 	    if (count == 0xf) {
187*cdf0e10cSrcweir 		count = 0;
188*cdf0e10cSrcweir 		fprintf(cfp, "\n\t");
189*cdf0e10cSrcweir 	    } else count++;
190*cdf0e10cSrcweir 	}
191*cdf0e10cSrcweir 	fprintf (cfp, "\n};\n");
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir 	// create function to return arrays
194*cdf0e10cSrcweir 	fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
195*cdf0e10cSrcweir 	fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
196*cdf0e10cSrcweir 	fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
197*cdf0e10cSrcweir 	fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
198*cdf0e10cSrcweir 	fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
199*cdf0e10cSrcweir 	fprintf (cfp, "}\n");
200*cdf0e10cSrcweir 
201*cdf0e10cSrcweir 	fclose(sfp);
202*cdf0e10cSrcweir 	fclose(cfp);
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir 	return 0;
205*cdf0e10cSrcweir }	// End of main
206