1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include <sal/main.h> 35 #include <sal/types.h> 36 #include <rtl/strbuf.hxx> 37 #include <rtl/ustring.hxx> 38 39 using namespace ::rtl; 40 41 /* Main Procedure */ 42 43 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) 44 { 45 FILE *sfp, *cfp; 46 47 if (argc < 3) exit(-1); 48 49 sfp = fopen(argv[1], "rb"); // open the source file for read; 50 if (sfp == NULL) 51 { 52 printf("Open the dictionary source file failed."); 53 return -1; 54 } 55 56 // create the C source file to write 57 cfp = fopen(argv[2], "wb"); 58 if (cfp == NULL) { 59 fclose(sfp); 60 printf("Can't create the C source file."); 61 return -1; 62 } 63 64 fprintf(cfp, "/*\n"); 65 fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n"); 66 fprintf(cfp, " * All Rights Reserved.\n"); 67 fprintf(cfp, " */\n\n"); 68 fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n"); 69 fprintf(cfp, "#include <sal/types.h>\n\n"); 70 fprintf(cfp, "extern \"C\" {\n"); 71 72 sal_Int32 count, i, j; 73 sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000]; 74 sal_Bool exist[0x10000]; 75 for (i = 0; i < 0x10000; i++) { 76 exist[i] = sal_False; 77 charArray[i] = 0; 78 } 79 80 // generate main dict. data array 81 fprintf(cfp, "static const sal_Unicode dataArea[] = {"); 82 sal_Char str[1024]; 83 sal_Unicode current = 0; 84 count = 0; 85 while (fgets(str, 1024, sfp)) { 86 // input file is in UTF-8 encoding 87 // don't convert last new line character to Ostr. 88 OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8); 89 const sal_Unicode *u = Ostr.getStr(); 90 91 sal_Int32 len = Ostr.getLength(); 92 93 i=0; 94 Ostr.iterateCodePoints(&i, 1); 95 if (len == i) continue; // skip one character word 96 97 if (*u != current) { 98 if (*u < current) 99 printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current, 100 sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount)); 101 current = *u; 102 charArray[current] = lenArrayCount; 103 } 104 105 if (lenArrayLen <= lenArrayCount+1) 106 lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32)); 107 lenArray[lenArrayCount++] = lenArrayCurr; 108 109 exist[u[0]] = sal_True; 110 for (i = 1; i < len; i++) { // start from second character, 111 exist[u[i]] = sal_True; // since the first character is captured in charArray. 112 lenArrayCurr++; 113 if ((count++) % 0x10 == 0) 114 fprintf(cfp, "\n\t"); 115 fprintf(cfp, "0x%04x, ", u[i]); 116 } 117 } 118 lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer 119 charArray[current+1] = lenArrayCount; 120 fprintf(cfp, "\n};\n"); 121 122 // generate lenArray 123 fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t"); 124 count = 1; 125 fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. 126 for (i = 0; i < lenArrayCount; i++) { 127 fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i])); 128 if (count == 0xf) { 129 count = 0; 130 fprintf(cfp, "\n\t"); 131 } else count++; 132 } 133 fprintf(cfp, "\n};\n"); 134 135 free(lenArray); 136 137 // generate index1 array 138 fprintf (cfp, "static const sal_Int16 index1[] = {\n\t"); 139 sal_Int16 set[0x100]; 140 count = 0; 141 for (i = 0; i < 0x100; i++) { 142 for (j = 0; j < 0x100; j++) 143 if (charArray[(i*0x100) + j] != 0) 144 break; 145 146 fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff)); 147 if ((i+1) % 0x10 == 0) 148 fprintf (cfp, "\n\t"); 149 } 150 fprintf (cfp, "};\n"); 151 152 // generate index2 array 153 fprintf (cfp, "static const sal_Int32 index2[] = {\n\t"); 154 sal_Int32 prev = 0; 155 for (i = 0; i < 0x100; i++) { 156 if (set[i] != 0xff) { 157 for (j = 0; j < 0x100; j++) { 158 sal_Int32 k = (i*0x100) + j; 159 if (prev != 0 && charArray[k] == 0) { 160 for (k++; k < 0x10000; k++) 161 if (charArray[k] != 0) 162 break; 163 } 164 prev = charArray[(i*0x100) + j]; 165 fprintf( 166 cfp, "0x%lx, ", 167 sal::static_int_cast< unsigned long >( 168 k < 0x10000 ? charArray[k] + 1 : 0)); 169 if ((j+1) % 0x10 == 0) 170 fprintf (cfp, "\n\t"); 171 } 172 fprintf (cfp, "\n\t"); 173 } 174 } 175 fprintf (cfp, "\n};\n"); 176 177 // generate existMark array 178 count = 0; 179 fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t"); 180 for (i = 0; i < 0x1FFF; i++) { 181 sal_uInt8 bit = 0; 182 for (j = 0; j < 8; j++) 183 if (exist[i * 8 + j]) 184 bit |= 1 << j; 185 fprintf(cfp, "0x%02x, ", bit); 186 if (count == 0xf) { 187 count = 0; 188 fprintf(cfp, "\n\t"); 189 } else count++; 190 } 191 fprintf (cfp, "\n};\n"); 192 193 // create function to return arrays 194 fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n"); 195 fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n"); 196 fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n"); 197 fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n"); 198 fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n"); 199 fprintf (cfp, "}\n"); 200 201 fclose(sfp); 202 fclose(cfp); 203 204 return 0; 205 } // End of main 206