1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include <sal/main.h>
35 #include <sal/types.h>
36 #include <rtl/strbuf.hxx>
37 #include <rtl/ustring.hxx>
38 
39 using namespace ::rtl;
40 
41 /* Main Procedure */
42 
43 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
44 {
45 	FILE *sfp, *cfp;
46 
47 	if (argc < 3) exit(-1);
48 
49 	sfp = fopen(argv[1], "rb");	// open the source file for read;
50 	if (sfp == NULL)
51 	{
52 	    printf("Open the dictionary source file failed.");
53 	    return -1;
54 	}
55 
56 	// create the C source file to write
57 	cfp = fopen(argv[2], "wb");
58 	if (cfp == NULL) {
59 	    fclose(sfp);
60 	    printf("Can't create the C source file.");
61 	    return -1;
62 	}
63 
64 	fprintf(cfp, "/*\n");
65 	fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
66 	fprintf(cfp, " * All Rights Reserved.\n");
67 	fprintf(cfp, " */\n\n");
68 	fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
69 	fprintf(cfp, "#include <sal/types.h>\n\n");
70 	fprintf(cfp, "extern \"C\" {\n");
71 
72 	sal_Int32 count, i, j;
73 	sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
74 	sal_Bool exist[0x10000];
75 	for (i = 0; i < 0x10000; i++) {
76 	    exist[i] = sal_False;
77 	    charArray[i] = 0;
78 	}
79 
80 	// generate main dict. data array
81 	fprintf(cfp, "static const sal_Unicode dataArea[] = {");
82 	sal_Char str[1024];
83 	sal_Unicode current = 0;
84 	count = 0;
85 	while (fgets(str, 1024, sfp)) {
86 	    // input file is in UTF-8 encoding
87 	    // don't convert last new line character to Ostr.
88 	    OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
89 	    const sal_Unicode *u = Ostr.getStr();
90 
91 	    sal_Int32 len = Ostr.getLength();
92 
93         i=0;
94         Ostr.iterateCodePoints(&i, 1);
95 	    if (len == i) continue;	// skip one character word
96 
97 	    if (*u != current) {
98 		if (*u < current)
99 		printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
100 		            sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
101 		current = *u;
102 		charArray[current] = lenArrayCount;
103 	    }
104 
105 	    if (lenArrayLen <= lenArrayCount+1)
106 		lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
107 	    lenArray[lenArrayCount++] = lenArrayCurr;
108 
109 	    exist[u[0]] = sal_True;
110 	    for (i = 1; i < len; i++) {		// start from second character,
111 		exist[u[i]] = sal_True; 	// since the first character is captured in charArray.
112 		lenArrayCurr++;
113 		if ((count++) % 0x10 == 0)
114 		    fprintf(cfp, "\n\t");
115 		fprintf(cfp, "0x%04x, ", u[i]);
116 	    }
117 	}
118 	lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
119 	charArray[current+1] = lenArrayCount;
120 	fprintf(cfp, "\n};\n");
121 
122 	// generate lenArray
123 	fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
124 	count = 1;
125 	fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
126 	for (i = 0; i < lenArrayCount; i++) {
127 	    fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
128 	    if (count == 0xf) {
129 		count = 0;
130 		fprintf(cfp, "\n\t");
131 	    } else count++;
132 	}
133 	fprintf(cfp, "\n};\n");
134 
135 	free(lenArray);
136 
137 	// generate index1 array
138 	fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
139 	sal_Int16 set[0x100];
140 	count = 0;
141 	for (i = 0; i < 0x100; i++) {
142 	    for (j = 0; j < 0x100; j++)
143 		if (charArray[(i*0x100) + j] != 0)
144 		    break;
145 
146 	    fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
147 	    if ((i+1) % 0x10 == 0)
148 		fprintf (cfp, "\n\t");
149 	}
150 	fprintf (cfp, "};\n");
151 
152 	// generate index2 array
153 	fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
154 	sal_Int32 prev = 0;
155 	for (i = 0; i < 0x100; i++) {
156 	    if (set[i] != 0xff) {
157 		for (j = 0; j < 0x100; j++) {
158 		    sal_Int32 k = (i*0x100) + j;
159 		    if (prev != 0 && charArray[k] == 0) {
160 			for (k++; k < 0x10000; k++)
161 			    if (charArray[k] != 0)
162 				break;
163 		    }
164 		    prev = charArray[(i*0x100) + j];
165 		    fprintf(
166                 cfp, "0x%lx, ",
167                 sal::static_int_cast< unsigned long >(
168                     k < 0x10000 ? charArray[k] + 1 : 0));
169 		    if ((j+1) % 0x10 == 0)
170 			fprintf (cfp, "\n\t");
171 		}
172 		fprintf (cfp, "\n\t");
173 	    }
174 	}
175 	fprintf (cfp, "\n};\n");
176 
177 	// generate existMark array
178 	count = 0;
179 	fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
180 	for (i = 0; i < 0x1FFF; i++) {
181 	    sal_uInt8 bit = 0;
182 	    for (j = 0; j < 8; j++)
183 		if (exist[i * 8 + j])
184 		    bit |= 1 << j;
185 	    fprintf(cfp, "0x%02x, ", bit);
186 	    if (count == 0xf) {
187 		count = 0;
188 		fprintf(cfp, "\n\t");
189 	    } else count++;
190 	}
191 	fprintf (cfp, "\n};\n");
192 
193 	// create function to return arrays
194 	fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
195 	fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
196 	fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
197 	fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
198 	fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
199 	fprintf (cfp, "}\n");
200 
201 	fclose(sfp);
202 	fclose(cfp);
203 
204 	return 0;
205 }	// End of main
206