1*449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*449ab281SAndrew Rist * distributed with this work for additional information 6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10*449ab281SAndrew Rist * 11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*449ab281SAndrew Rist * 13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*449ab281SAndrew Rist * software distributed under the License is distributed on an 15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17*449ab281SAndrew Rist * specific language governing permissions and limitations 18*449ab281SAndrew Rist * under the License. 19*449ab281SAndrew Rist * 20*449ab281SAndrew Rist *************************************************************/ 21*449ab281SAndrew Rist 22*449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <stdio.h> 28cdf0e10cSrcweir #include <string.h> 29cdf0e10cSrcweir #include <stdlib.h> 30cdf0e10cSrcweir #include <sal/main.h> 31cdf0e10cSrcweir #include <sal/types.h> 32cdf0e10cSrcweir #include <rtl/strbuf.hxx> 33cdf0e10cSrcweir #include <rtl/ustring.hxx> 34cdf0e10cSrcweir 35cdf0e10cSrcweir using namespace ::rtl; 36cdf0e10cSrcweir 37cdf0e10cSrcweir /* Main Procedure */ 38cdf0e10cSrcweir 39cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) 40cdf0e10cSrcweir { 41cdf0e10cSrcweir FILE *sfp, *cfp; 42cdf0e10cSrcweir 43cdf0e10cSrcweir if (argc < 3) exit(-1); 44cdf0e10cSrcweir 45cdf0e10cSrcweir sfp = fopen(argv[1], "rb"); // open the source file for read; 46cdf0e10cSrcweir if (sfp == NULL) 47cdf0e10cSrcweir { 48cdf0e10cSrcweir printf("Open the dictionary source file failed."); 49cdf0e10cSrcweir return -1; 50cdf0e10cSrcweir } 51cdf0e10cSrcweir 52cdf0e10cSrcweir // create the C source file to write 53cdf0e10cSrcweir cfp = fopen(argv[2], "wb"); 54cdf0e10cSrcweir if (cfp == NULL) { 55cdf0e10cSrcweir fclose(sfp); 56cdf0e10cSrcweir printf("Can't create the C source file."); 57cdf0e10cSrcweir return -1; 58cdf0e10cSrcweir } 59cdf0e10cSrcweir 60cdf0e10cSrcweir fprintf(cfp, "/*\n"); 61cdf0e10cSrcweir fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n"); 62cdf0e10cSrcweir fprintf(cfp, " * All Rights Reserved.\n"); 63cdf0e10cSrcweir fprintf(cfp, " */\n\n"); 64cdf0e10cSrcweir fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n"); 65cdf0e10cSrcweir fprintf(cfp, "#include <sal/types.h>\n\n"); 66cdf0e10cSrcweir fprintf(cfp, "extern \"C\" {\n"); 67cdf0e10cSrcweir 68cdf0e10cSrcweir sal_Int32 count, i, j; 69cdf0e10cSrcweir sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000]; 70cdf0e10cSrcweir sal_Bool exist[0x10000]; 71cdf0e10cSrcweir for (i = 0; i < 0x10000; i++) { 72cdf0e10cSrcweir exist[i] = sal_False; 73cdf0e10cSrcweir charArray[i] = 0; 74cdf0e10cSrcweir } 75cdf0e10cSrcweir 76cdf0e10cSrcweir // generate main dict. data array 77cdf0e10cSrcweir fprintf(cfp, "static const sal_Unicode dataArea[] = {"); 78cdf0e10cSrcweir sal_Char str[1024]; 79cdf0e10cSrcweir sal_Unicode current = 0; 80cdf0e10cSrcweir count = 0; 81cdf0e10cSrcweir while (fgets(str, 1024, sfp)) { 82cdf0e10cSrcweir // input file is in UTF-8 encoding 83cdf0e10cSrcweir // don't convert last new line character to Ostr. 84cdf0e10cSrcweir OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8); 85cdf0e10cSrcweir const sal_Unicode *u = Ostr.getStr(); 86cdf0e10cSrcweir 87cdf0e10cSrcweir sal_Int32 len = Ostr.getLength(); 88cdf0e10cSrcweir 89cdf0e10cSrcweir i=0; 90cdf0e10cSrcweir Ostr.iterateCodePoints(&i, 1); 91cdf0e10cSrcweir if (len == i) continue; // skip one character word 92cdf0e10cSrcweir 93cdf0e10cSrcweir if (*u != current) { 94cdf0e10cSrcweir if (*u < current) 95cdf0e10cSrcweir printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current, 96cdf0e10cSrcweir sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount)); 97cdf0e10cSrcweir current = *u; 98cdf0e10cSrcweir charArray[current] = lenArrayCount; 99cdf0e10cSrcweir } 100cdf0e10cSrcweir 101cdf0e10cSrcweir if (lenArrayLen <= lenArrayCount+1) 102cdf0e10cSrcweir lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32)); 103cdf0e10cSrcweir lenArray[lenArrayCount++] = lenArrayCurr; 104cdf0e10cSrcweir 105cdf0e10cSrcweir exist[u[0]] = sal_True; 106cdf0e10cSrcweir for (i = 1; i < len; i++) { // start from second character, 107cdf0e10cSrcweir exist[u[i]] = sal_True; // since the first character is captured in charArray. 108cdf0e10cSrcweir lenArrayCurr++; 109cdf0e10cSrcweir if ((count++) % 0x10 == 0) 110cdf0e10cSrcweir fprintf(cfp, "\n\t"); 111cdf0e10cSrcweir fprintf(cfp, "0x%04x, ", u[i]); 112cdf0e10cSrcweir } 113cdf0e10cSrcweir } 114cdf0e10cSrcweir lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer 115cdf0e10cSrcweir charArray[current+1] = lenArrayCount; 116cdf0e10cSrcweir fprintf(cfp, "\n};\n"); 117cdf0e10cSrcweir 118cdf0e10cSrcweir // generate lenArray 119cdf0e10cSrcweir fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t"); 120cdf0e10cSrcweir count = 1; 121cdf0e10cSrcweir fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array. 122cdf0e10cSrcweir for (i = 0; i < lenArrayCount; i++) { 123cdf0e10cSrcweir fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i])); 124cdf0e10cSrcweir if (count == 0xf) { 125cdf0e10cSrcweir count = 0; 126cdf0e10cSrcweir fprintf(cfp, "\n\t"); 127cdf0e10cSrcweir } else count++; 128cdf0e10cSrcweir } 129cdf0e10cSrcweir fprintf(cfp, "\n};\n"); 130cdf0e10cSrcweir 131cdf0e10cSrcweir free(lenArray); 132cdf0e10cSrcweir 133cdf0e10cSrcweir // generate index1 array 134cdf0e10cSrcweir fprintf (cfp, "static const sal_Int16 index1[] = {\n\t"); 135cdf0e10cSrcweir sal_Int16 set[0x100]; 136cdf0e10cSrcweir count = 0; 137cdf0e10cSrcweir for (i = 0; i < 0x100; i++) { 138cdf0e10cSrcweir for (j = 0; j < 0x100; j++) 139cdf0e10cSrcweir if (charArray[(i*0x100) + j] != 0) 140cdf0e10cSrcweir break; 141cdf0e10cSrcweir 142cdf0e10cSrcweir fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff)); 143cdf0e10cSrcweir if ((i+1) % 0x10 == 0) 144cdf0e10cSrcweir fprintf (cfp, "\n\t"); 145cdf0e10cSrcweir } 146cdf0e10cSrcweir fprintf (cfp, "};\n"); 147cdf0e10cSrcweir 148cdf0e10cSrcweir // generate index2 array 149cdf0e10cSrcweir fprintf (cfp, "static const sal_Int32 index2[] = {\n\t"); 150cdf0e10cSrcweir sal_Int32 prev = 0; 151cdf0e10cSrcweir for (i = 0; i < 0x100; i++) { 152cdf0e10cSrcweir if (set[i] != 0xff) { 153cdf0e10cSrcweir for (j = 0; j < 0x100; j++) { 154cdf0e10cSrcweir sal_Int32 k = (i*0x100) + j; 155cdf0e10cSrcweir if (prev != 0 && charArray[k] == 0) { 156cdf0e10cSrcweir for (k++; k < 0x10000; k++) 157cdf0e10cSrcweir if (charArray[k] != 0) 158cdf0e10cSrcweir break; 159cdf0e10cSrcweir } 160cdf0e10cSrcweir prev = charArray[(i*0x100) + j]; 161cdf0e10cSrcweir fprintf( 162cdf0e10cSrcweir cfp, "0x%lx, ", 163cdf0e10cSrcweir sal::static_int_cast< unsigned long >( 164cdf0e10cSrcweir k < 0x10000 ? charArray[k] + 1 : 0)); 165cdf0e10cSrcweir if ((j+1) % 0x10 == 0) 166cdf0e10cSrcweir fprintf (cfp, "\n\t"); 167cdf0e10cSrcweir } 168cdf0e10cSrcweir fprintf (cfp, "\n\t"); 169cdf0e10cSrcweir } 170cdf0e10cSrcweir } 171cdf0e10cSrcweir fprintf (cfp, "\n};\n"); 172cdf0e10cSrcweir 173cdf0e10cSrcweir // generate existMark array 174cdf0e10cSrcweir count = 0; 175cdf0e10cSrcweir fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t"); 176cdf0e10cSrcweir for (i = 0; i < 0x1FFF; i++) { 177cdf0e10cSrcweir sal_uInt8 bit = 0; 178cdf0e10cSrcweir for (j = 0; j < 8; j++) 179cdf0e10cSrcweir if (exist[i * 8 + j]) 180cdf0e10cSrcweir bit |= 1 << j; 181cdf0e10cSrcweir fprintf(cfp, "0x%02x, ", bit); 182cdf0e10cSrcweir if (count == 0xf) { 183cdf0e10cSrcweir count = 0; 184cdf0e10cSrcweir fprintf(cfp, "\n\t"); 185cdf0e10cSrcweir } else count++; 186cdf0e10cSrcweir } 187cdf0e10cSrcweir fprintf (cfp, "\n};\n"); 188cdf0e10cSrcweir 189cdf0e10cSrcweir // create function to return arrays 190cdf0e10cSrcweir fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n"); 191cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n"); 192cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n"); 193cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n"); 194cdf0e10cSrcweir fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n"); 195cdf0e10cSrcweir fprintf (cfp, "}\n"); 196cdf0e10cSrcweir 197cdf0e10cSrcweir fclose(sfp); 198cdf0e10cSrcweir fclose(cfp); 199cdf0e10cSrcweir 200cdf0e10cSrcweir return 0; 201cdf0e10cSrcweir } // End of main 202