1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <stdio.h> 32*cdf0e10cSrcweir #include <string.h> 33*cdf0e10cSrcweir #include <stdlib.h> 34*cdf0e10cSrcweir #include <sal/main.h> 35*cdf0e10cSrcweir #include <sal/types.h> 36*cdf0e10cSrcweir #include <rtl/strbuf.hxx> 37*cdf0e10cSrcweir #include <rtl/ustring.hxx> 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir using namespace ::rtl; 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir /* Main Procedure */ 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) 44*cdf0e10cSrcweir { 45*cdf0e10cSrcweir FILE *sfp, *cfp; 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir if (argc < 3) exit(-1); 48*cdf0e10cSrcweir 49*cdf0e10cSrcweir sfp = fopen(argv[1], "rb"); // open the source file for read; 50*cdf0e10cSrcweir if (sfp == NULL) 51*cdf0e10cSrcweir { 52*cdf0e10cSrcweir printf("Open the dictionary source file failed."); 53*cdf0e10cSrcweir return -1; 54*cdf0e10cSrcweir } 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir // create the C source file to write 57*cdf0e10cSrcweir cfp = fopen(argv[2], "wb"); 58*cdf0e10cSrcweir if (cfp == NULL) { 59*cdf0e10cSrcweir fclose(sfp); 60*cdf0e10cSrcweir printf("Can't create the C source file."); 61*cdf0e10cSrcweir return -1; 62*cdf0e10cSrcweir } 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir fprintf(cfp, "/*\n"); 65*cdf0e10cSrcweir fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n"); 66*cdf0e10cSrcweir fprintf(cfp, " * All Rights Reserved.\n"); 67*cdf0e10cSrcweir fprintf(cfp, " */\n\n"); 68*cdf0e10cSrcweir fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n"); 69*cdf0e10cSrcweir fprintf(cfp, "#include <sal/types.h>\n\n"); 70*cdf0e10cSrcweir fprintf(cfp, "extern \"C\" {\n"); 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir sal_Int32 count, i, j; 73*cdf0e10cSrcweir sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000]; 74*cdf0e10cSrcweir sal_Bool exist[0x10000]; 75*cdf0e10cSrcweir for (i = 0; i < 0x10000; i++) { 76*cdf0e10cSrcweir exist[i] = sal_False; 77*cdf0e10cSrcweir charArray[i] = 0; 78*cdf0e10cSrcweir } 79*cdf0e10cSrcweir 80*cdf0e10cSrcweir // generate main dict. data array 81*cdf0e10cSrcweir fprintf(cfp, "static const sal_Unicode dataArea[] = {"); 82*cdf0e10cSrcweir sal_Char str[1024]; 83*cdf0e10cSrcweir sal_Unicode current = 0; 84*cdf0e10cSrcweir count = 0; 85*cdf0e10cSrcweir while (fgets(str, 1024, sfp)) { 86*cdf0e10cSrcweir // input file is in UTF-8 encoding 87*cdf0e10cSrcweir // don't convert last new line character to Ostr. 88*cdf0e10cSrcweir OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8); 89*cdf0e10cSrcweir const sal_Unicode *u = Ostr.getStr(); 90*cdf0e10cSrcweir 91*cdf0e10cSrcweir sal_Int32 len = Ostr.getLength(); 92*cdf0e10cSrcweir 93*cdf0e10cSrcweir i=0; 94*cdf0e10cSrcweir Ostr.iterateCodePoints(&i, 1); 95*cdf0e10cSrcweir if (len == i) continue; // skip one character word 96*cdf0e10cSrcweir 97*cdf0e10cSrcweir if (*u != current) { 98*cdf0e10cSrcweir if (*u < current) 99*cdf0e10cSrcweir printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current, 100*cdf0e10cSrcweir sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount)); 101*cdf0e10cSrcweir current = *u; 102*cdf0e10cSrcweir charArray[current] = lenArrayCount; 103*cdf0e10cSrcweir } 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir if (lenArrayLen <= lenArrayCount+1) 106*cdf0e10cSrcweir lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32)); 107*cdf0e10cSrcweir lenArray[lenArrayCount++] = lenArrayCurr; 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir exist[u[0]] = sal_True; 110*cdf0e10cSrcweir for (i = 1; i < len; i++) { // start from second character, 111*cdf0e10cSrcweir exist[u[i]] = sal_True; // since the first character is captured in charArray. 112*cdf0e10cSrcweir lenArrayCurr++; 113*cdf0e10cSrcweir if ((count++) % 0x10 == 0) 114*cdf0e10cSrcweir fprintf(cfp, "\n\t"); 115*cdf0e10cSrcweir fprintf(cfp, "0x%04x, ", u[i]); 116*cdf0e10cSrcweir } 117*cdf0e10cSrcweir } 118*cdf0e10cSrcweir lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer 119*cdf0e10cSrcweir charArray[current+1] = lenArrayCount; 120*cdf0e10cSrcweir fprintf(cfp, "\n};\n"); 121*cdf0e10cSrcweir 122*cdf0e10cSrcweir // generate lenArray 123*cdf0e10cSrcweir fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t"); 124*cdf0e10cSrcweir count = 1; 125*cdf0e10cSrcweir fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. 126*cdf0e10cSrcweir for (i = 0; i < lenArrayCount; i++) { 127*cdf0e10cSrcweir fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i])); 128*cdf0e10cSrcweir if (count == 0xf) { 129*cdf0e10cSrcweir count = 0; 130*cdf0e10cSrcweir fprintf(cfp, "\n\t"); 131*cdf0e10cSrcweir } else count++; 132*cdf0e10cSrcweir } 133*cdf0e10cSrcweir fprintf(cfp, "\n};\n"); 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir free(lenArray); 136*cdf0e10cSrcweir 137*cdf0e10cSrcweir // generate index1 array 138*cdf0e10cSrcweir fprintf (cfp, "static const sal_Int16 index1[] = {\n\t"); 139*cdf0e10cSrcweir sal_Int16 set[0x100]; 140*cdf0e10cSrcweir count = 0; 141*cdf0e10cSrcweir for (i = 0; i < 0x100; i++) { 142*cdf0e10cSrcweir for (j = 0; j < 0x100; j++) 143*cdf0e10cSrcweir if (charArray[(i*0x100) + j] != 0) 144*cdf0e10cSrcweir break; 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff)); 147*cdf0e10cSrcweir if ((i+1) % 0x10 == 0) 148*cdf0e10cSrcweir fprintf (cfp, "\n\t"); 149*cdf0e10cSrcweir } 150*cdf0e10cSrcweir fprintf (cfp, "};\n"); 151*cdf0e10cSrcweir 152*cdf0e10cSrcweir // generate index2 array 153*cdf0e10cSrcweir fprintf (cfp, "static const sal_Int32 index2[] = {\n\t"); 154*cdf0e10cSrcweir sal_Int32 prev = 0; 155*cdf0e10cSrcweir for (i = 0; i < 0x100; i++) { 156*cdf0e10cSrcweir if (set[i] != 0xff) { 157*cdf0e10cSrcweir for (j = 0; j < 0x100; j++) { 158*cdf0e10cSrcweir sal_Int32 k = (i*0x100) + j; 159*cdf0e10cSrcweir if (prev != 0 && charArray[k] == 0) { 160*cdf0e10cSrcweir for (k++; k < 0x10000; k++) 161*cdf0e10cSrcweir if (charArray[k] != 0) 162*cdf0e10cSrcweir break; 163*cdf0e10cSrcweir } 164*cdf0e10cSrcweir prev = charArray[(i*0x100) + j]; 165*cdf0e10cSrcweir fprintf( 166*cdf0e10cSrcweir cfp, "0x%lx, ", 167*cdf0e10cSrcweir sal::static_int_cast< unsigned long >( 168*cdf0e10cSrcweir k < 0x10000 ? charArray[k] + 1 : 0)); 169*cdf0e10cSrcweir if ((j+1) % 0x10 == 0) 170*cdf0e10cSrcweir fprintf (cfp, "\n\t"); 171*cdf0e10cSrcweir } 172*cdf0e10cSrcweir fprintf (cfp, "\n\t"); 173*cdf0e10cSrcweir } 174*cdf0e10cSrcweir } 175*cdf0e10cSrcweir fprintf (cfp, "\n};\n"); 176*cdf0e10cSrcweir 177*cdf0e10cSrcweir // generate existMark array 178*cdf0e10cSrcweir count = 0; 179*cdf0e10cSrcweir fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t"); 180*cdf0e10cSrcweir for (i = 0; i < 0x1FFF; i++) { 181*cdf0e10cSrcweir sal_uInt8 bit = 0; 182*cdf0e10cSrcweir for (j = 0; j < 8; j++) 183*cdf0e10cSrcweir if (exist[i * 8 + j]) 184*cdf0e10cSrcweir bit |= 1 << j; 185*cdf0e10cSrcweir fprintf(cfp, "0x%02x, ", bit); 186*cdf0e10cSrcweir if (count == 0xf) { 187*cdf0e10cSrcweir count = 0; 188*cdf0e10cSrcweir fprintf(cfp, "\n\t"); 189*cdf0e10cSrcweir } else count++; 190*cdf0e10cSrcweir } 191*cdf0e10cSrcweir fprintf (cfp, "\n};\n"); 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir // create function to return arrays 194*cdf0e10cSrcweir fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n"); 195*cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n"); 196*cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n"); 197*cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n"); 198*cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n"); 199*cdf0e10cSrcweir fprintf (cfp, "}\n"); 200*cdf0e10cSrcweir 201*cdf0e10cSrcweir fclose(sfp); 202*cdf0e10cSrcweir fclose(cfp); 203*cdf0e10cSrcweir 204*cdf0e10cSrcweir return 0; 205*cdf0e10cSrcweir } // End of main 206