xref: /trunk/main/i18npool/source/breakiterator/gendict.cxx (revision cf6516809c57e1bb0a940545cca99cdad54d4ce2)
1449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5449ab281SAndrew Rist  * distributed with this work for additional information
6449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir  *
11449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir  *
13449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14449ab281SAndrew Rist  * software distributed under the License is distributed on an
15449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17449ab281SAndrew Rist  * specific language governing permissions and limitations
18449ab281SAndrew Rist  * under the License.
19cdf0e10cSrcweir  *
20449ab281SAndrew Rist  *************************************************************/
21449ab281SAndrew Rist 
22449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <stdio.h>
28cdf0e10cSrcweir #include <string.h>
29cdf0e10cSrcweir #include <stdlib.h>
30cdf0e10cSrcweir #include <sal/main.h>
31cdf0e10cSrcweir #include <sal/types.h>
32cdf0e10cSrcweir #include <rtl/strbuf.hxx>
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir using namespace ::rtl;
36cdf0e10cSrcweir 
37cdf0e10cSrcweir /* Main Procedure */
38cdf0e10cSrcweir 
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)39cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
40cdf0e10cSrcweir {
41cdf0e10cSrcweir     FILE *sfp, *cfp;
42cdf0e10cSrcweir 
43cdf0e10cSrcweir     if (argc < 3) exit(-1);
44cdf0e10cSrcweir 
45cdf0e10cSrcweir     sfp = fopen(argv[1], "rb"); // open the source file for read;
46cdf0e10cSrcweir     if (sfp == NULL)
47cdf0e10cSrcweir     {
48cdf0e10cSrcweir         printf("Open the dictionary source file failed.");
49cdf0e10cSrcweir         return -1;
50cdf0e10cSrcweir     }
51cdf0e10cSrcweir 
52cdf0e10cSrcweir     // create the C source file to write
53cdf0e10cSrcweir     cfp = fopen(argv[2], "wb");
54cdf0e10cSrcweir     if (cfp == NULL) {
55cdf0e10cSrcweir         fclose(sfp);
56cdf0e10cSrcweir         printf("Can't create the C source file.");
57cdf0e10cSrcweir         return -1;
58cdf0e10cSrcweir     }
59cdf0e10cSrcweir 
60cdf0e10cSrcweir     fprintf(cfp, "/*\n");
61cdf0e10cSrcweir     fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
62cdf0e10cSrcweir     fprintf(cfp, " * All Rights Reserved.\n");
63cdf0e10cSrcweir     fprintf(cfp, " */\n\n");
64cdf0e10cSrcweir     fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
65cdf0e10cSrcweir     fprintf(cfp, "#include <sal/types.h>\n\n");
66cdf0e10cSrcweir     fprintf(cfp, "extern \"C\" {\n");
67cdf0e10cSrcweir 
68cdf0e10cSrcweir     sal_Int32 count, i, j;
69cdf0e10cSrcweir     sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
70cdf0e10cSrcweir     sal_Bool exist[0x10000];
71cdf0e10cSrcweir     for (i = 0; i < 0x10000; i++) {
72cdf0e10cSrcweir         exist[i] = sal_False;
73cdf0e10cSrcweir         charArray[i] = 0;
74cdf0e10cSrcweir     }
75cdf0e10cSrcweir 
76cdf0e10cSrcweir     // generate main dict. data array
77cdf0e10cSrcweir     fprintf(cfp, "static const sal_Unicode dataArea[] = {");
78cdf0e10cSrcweir     sal_Char str[1024];
79cdf0e10cSrcweir     sal_Unicode current = 0;
80cdf0e10cSrcweir     count = 0;
81cdf0e10cSrcweir     while (fgets(str, 1024, sfp)) {
82cdf0e10cSrcweir         // input file is in UTF-8 encoding
83cdf0e10cSrcweir         // don't convert last new line character to Ostr.
84cdf0e10cSrcweir         OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
85cdf0e10cSrcweir         const sal_Unicode *u = Ostr.getStr();
86cdf0e10cSrcweir 
87cdf0e10cSrcweir         sal_Int32 len = Ostr.getLength();
88cdf0e10cSrcweir 
89cdf0e10cSrcweir         i=0;
90cdf0e10cSrcweir         Ostr.iterateCodePoints(&i, 1);
91cdf0e10cSrcweir         if (len == i) continue; // skip one character word
92cdf0e10cSrcweir 
93cdf0e10cSrcweir         if (*u != current) {
94cdf0e10cSrcweir         if (*u < current)
95cdf0e10cSrcweir         printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
96cdf0e10cSrcweir                     sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
97cdf0e10cSrcweir         current = *u;
98cdf0e10cSrcweir         charArray[current] = lenArrayCount;
99cdf0e10cSrcweir         }
100cdf0e10cSrcweir 
101cdf0e10cSrcweir         if (lenArrayLen <= lenArrayCount+1)
102cdf0e10cSrcweir         lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
103cdf0e10cSrcweir         lenArray[lenArrayCount++] = lenArrayCurr;
104cdf0e10cSrcweir 
105cdf0e10cSrcweir         exist[u[0]] = sal_True;
106cdf0e10cSrcweir         for (i = 1; i < len; i++) {     // start from second character,
107cdf0e10cSrcweir         exist[u[i]] = sal_True;     // since the first character is captured in charArray.
108cdf0e10cSrcweir         lenArrayCurr++;
109cdf0e10cSrcweir         if ((count++) % 0x10 == 0)
110cdf0e10cSrcweir             fprintf(cfp, "\n\t");
111cdf0e10cSrcweir         fprintf(cfp, "0x%04x, ", u[i]);
112cdf0e10cSrcweir         }
113cdf0e10cSrcweir     }
114cdf0e10cSrcweir     lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
115cdf0e10cSrcweir     charArray[current+1] = lenArrayCount;
116cdf0e10cSrcweir     fprintf(cfp, "\n};\n");
117cdf0e10cSrcweir 
118cdf0e10cSrcweir     // generate lenArray
119cdf0e10cSrcweir     fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
120cdf0e10cSrcweir     count = 1;
121cdf0e10cSrcweir     fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
122cdf0e10cSrcweir     for (i = 0; i < lenArrayCount; i++) {
123cdf0e10cSrcweir         fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
124cdf0e10cSrcweir         if (count == 0xf) {
125cdf0e10cSrcweir         count = 0;
126cdf0e10cSrcweir         fprintf(cfp, "\n\t");
127cdf0e10cSrcweir         } else count++;
128cdf0e10cSrcweir     }
129cdf0e10cSrcweir     fprintf(cfp, "\n};\n");
130cdf0e10cSrcweir 
131cdf0e10cSrcweir     free(lenArray);
132cdf0e10cSrcweir 
133cdf0e10cSrcweir     // generate index1 array
134cdf0e10cSrcweir     fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
135cdf0e10cSrcweir     sal_Int16 set[0x100];
136cdf0e10cSrcweir     count = 0;
137cdf0e10cSrcweir     for (i = 0; i < 0x100; i++) {
138cdf0e10cSrcweir         for (j = 0; j < 0x100; j++)
139cdf0e10cSrcweir         if (charArray[(i*0x100) + j] != 0)
140cdf0e10cSrcweir             break;
141cdf0e10cSrcweir 
142cdf0e10cSrcweir         fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
143cdf0e10cSrcweir         if ((i+1) % 0x10 == 0)
144cdf0e10cSrcweir         fprintf (cfp, "\n\t");
145cdf0e10cSrcweir     }
146cdf0e10cSrcweir     fprintf (cfp, "};\n");
147cdf0e10cSrcweir 
148cdf0e10cSrcweir     // generate index2 array
149cdf0e10cSrcweir     fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
150cdf0e10cSrcweir     sal_Int32 prev = 0;
151cdf0e10cSrcweir     for (i = 0; i < 0x100; i++) {
152cdf0e10cSrcweir         if (set[i] != 0xff) {
153cdf0e10cSrcweir         for (j = 0; j < 0x100; j++) {
154cdf0e10cSrcweir             sal_Int32 k = (i*0x100) + j;
155cdf0e10cSrcweir             if (prev != 0 && charArray[k] == 0) {
156cdf0e10cSrcweir             for (k++; k < 0x10000; k++)
157cdf0e10cSrcweir                 if (charArray[k] != 0)
158cdf0e10cSrcweir                 break;
159cdf0e10cSrcweir             }
160cdf0e10cSrcweir             prev = charArray[(i*0x100) + j];
161cdf0e10cSrcweir             fprintf(
162cdf0e10cSrcweir                 cfp, "0x%lx, ",
163cdf0e10cSrcweir                 sal::static_int_cast< unsigned long >(
164cdf0e10cSrcweir                     k < 0x10000 ? charArray[k] + 1 : 0));
165cdf0e10cSrcweir             if ((j+1) % 0x10 == 0)
166cdf0e10cSrcweir             fprintf (cfp, "\n\t");
167cdf0e10cSrcweir         }
168cdf0e10cSrcweir         fprintf (cfp, "\n\t");
169cdf0e10cSrcweir         }
170cdf0e10cSrcweir     }
171cdf0e10cSrcweir     fprintf (cfp, "\n};\n");
172cdf0e10cSrcweir 
173cdf0e10cSrcweir     // generate existMark array
174cdf0e10cSrcweir     count = 0;
175cdf0e10cSrcweir     fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
176cdf0e10cSrcweir     for (i = 0; i < 0x1FFF; i++) {
177cdf0e10cSrcweir         sal_uInt8 bit = 0;
178cdf0e10cSrcweir         for (j = 0; j < 8; j++)
179cdf0e10cSrcweir         if (exist[i * 8 + j])
180cdf0e10cSrcweir             bit |= 1 << j;
181cdf0e10cSrcweir         fprintf(cfp, "0x%02x, ", bit);
182cdf0e10cSrcweir         if (count == 0xf) {
183cdf0e10cSrcweir         count = 0;
184cdf0e10cSrcweir         fprintf(cfp, "\n\t");
185cdf0e10cSrcweir         } else count++;
186cdf0e10cSrcweir     }
187cdf0e10cSrcweir     fprintf (cfp, "\n};\n");
188cdf0e10cSrcweir 
189cdf0e10cSrcweir     // create function to return arrays
190*7f6ffbefSDamjan Jovanovic     fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n");
191*7f6ffbefSDamjan Jovanovic     fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n");
192*7f6ffbefSDamjan Jovanovic     fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Int32* getIndex2() { return index2; }\n");
193*7f6ffbefSDamjan Jovanovic     fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray() { return lenArray; }\n");
194*7f6ffbefSDamjan Jovanovic     fprintf (cfp, "\tSAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea() { return dataArea; }\n");
195cdf0e10cSrcweir     fprintf (cfp, "}\n");
196cdf0e10cSrcweir 
197cdf0e10cSrcweir     fclose(sfp);
198cdf0e10cSrcweir     fclose(cfp);
199cdf0e10cSrcweir 
200cdf0e10cSrcweir     return 0;
201cdf0e10cSrcweir }   // End of main
202