1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include <stdio.h> 28 #include <string.h> 29 #include <stdlib.h> 30 #include <sal/main.h> 31 #include <sal/types.h> 32 #include <rtl/strbuf.hxx> 33 #include <rtl/ustring.hxx> 34 35 using namespace ::rtl; 36 37 /* Main Procedure */ 38 39 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) 40 { 41 FILE *sfp, *cfp; 42 43 if (argc < 3) exit(-1); 44 45 sfp = fopen(argv[1], "rb"); // open the source file for read; 46 if (sfp == NULL) 47 { 48 printf("Open the dictionary source file failed."); 49 return -1; 50 } 51 52 // create the C source file to write 53 cfp = fopen(argv[2], "wb"); 54 if (cfp == NULL) { 55 fclose(sfp); 56 printf("Can't create the C source file."); 57 return -1; 58 } 59 60 fprintf(cfp, "/*\n"); 61 fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n"); 62 fprintf(cfp, " * All Rights Reserved.\n"); 63 fprintf(cfp, " */\n\n"); 64 fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n"); 65 fprintf(cfp, "#include <sal/types.h>\n\n"); 66 fprintf(cfp, "extern \"C\" {\n"); 67 68 sal_Int32 count, i, j; 69 sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000]; 70 sal_Bool exist[0x10000]; 71 for (i = 0; i < 0x10000; i++) { 72 exist[i] = sal_False; 73 charArray[i] = 0; 74 } 75 76 // generate main dict. data array 77 fprintf(cfp, "static const sal_Unicode dataArea[] = {"); 78 sal_Char str[1024]; 79 sal_Unicode current = 0; 80 count = 0; 81 while (fgets(str, 1024, sfp)) { 82 // input file is in UTF-8 encoding 83 // don't convert last new line character to Ostr. 84 OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8); 85 const sal_Unicode *u = Ostr.getStr(); 86 87 sal_Int32 len = Ostr.getLength(); 88 89 i=0; 90 Ostr.iterateCodePoints(&i, 1); 91 if (len == i) continue; // skip one character word 92 93 if (*u != current) { 94 if (*u < current) 95 printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current, 96 sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount)); 97 current = *u; 98 charArray[current] = lenArrayCount; 99 } 100 101 if (lenArrayLen <= lenArrayCount+1) 102 lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32)); 103 lenArray[lenArrayCount++] = lenArrayCurr; 104 105 exist[u[0]] = sal_True; 106 for (i = 1; i < len; i++) { // start from second character, 107 exist[u[i]] = sal_True; // since the first character is captured in charArray. 108 lenArrayCurr++; 109 if ((count++) % 0x10 == 0) 110 fprintf(cfp, "\n\t"); 111 fprintf(cfp, "0x%04x, ", u[i]); 112 } 113 } 114 lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer 115 charArray[current+1] = lenArrayCount; 116 fprintf(cfp, "\n};\n"); 117 118 // generate lenArray 119 fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t"); 120 count = 1; 121 fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. 122 for (i = 0; i < lenArrayCount; i++) { 123 fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i])); 124 if (count == 0xf) { 125 count = 0; 126 fprintf(cfp, "\n\t"); 127 } else count++; 128 } 129 fprintf(cfp, "\n};\n"); 130 131 free(lenArray); 132 133 // generate index1 array 134 fprintf (cfp, "static const sal_Int16 index1[] = {\n\t"); 135 sal_Int16 set[0x100]; 136 count = 0; 137 for (i = 0; i < 0x100; i++) { 138 for (j = 0; j < 0x100; j++) 139 if (charArray[(i*0x100) + j] != 0) 140 break; 141 142 fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff)); 143 if ((i+1) % 0x10 == 0) 144 fprintf (cfp, "\n\t"); 145 } 146 fprintf (cfp, "};\n"); 147 148 // generate index2 array 149 fprintf (cfp, "static const sal_Int32 index2[] = {\n\t"); 150 sal_Int32 prev = 0; 151 for (i = 0; i < 0x100; i++) { 152 if (set[i] != 0xff) { 153 for (j = 0; j < 0x100; j++) { 154 sal_Int32 k = (i*0x100) + j; 155 if (prev != 0 && charArray[k] == 0) { 156 for (k++; k < 0x10000; k++) 157 if (charArray[k] != 0) 158 break; 159 } 160 prev = charArray[(i*0x100) + j]; 161 fprintf( 162 cfp, "0x%lx, ", 163 sal::static_int_cast< unsigned long >( 164 k < 0x10000 ? charArray[k] + 1 : 0)); 165 if ((j+1) % 0x10 == 0) 166 fprintf (cfp, "\n\t"); 167 } 168 fprintf (cfp, "\n\t"); 169 } 170 } 171 fprintf (cfp, "\n};\n"); 172 173 // generate existMark array 174 count = 0; 175 fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t"); 176 for (i = 0; i < 0x1FFF; i++) { 177 sal_uInt8 bit = 0; 178 for (j = 0; j < 8; j++) 179 if (exist[i * 8 + j]) 180 bit |= 1 << j; 181 fprintf(cfp, "0x%02x, ", bit); 182 if (count == 0xf) { 183 count = 0; 184 fprintf(cfp, "\n\t"); 185 } else count++; 186 } 187 fprintf (cfp, "\n};\n"); 188 189 // create function to return arrays 190 fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n"); 191 fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n"); 192 fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n"); 193 fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n"); 194 fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n"); 195 fprintf (cfp, "}\n"); 196 197 fclose(sfp); 198 fclose(cfp); 199 200 return 0; 201 } // End of main 202