1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <cclass_unicode.hxx> 32*cdf0e10cSrcweir #include <unicode/uchar.h> 33*cdf0e10cSrcweir #include <rtl/math.hxx> 34*cdf0e10cSrcweir #include <rtl/ustring.hxx> 35*cdf0e10cSrcweir #include <com/sun/star/i18n/KParseTokens.hpp> 36*cdf0e10cSrcweir #include <com/sun/star/i18n/KParseType.hpp> 37*cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 38*cdf0e10cSrcweir #include <com/sun/star/i18n/XLocaleData.hpp> 39*cdf0e10cSrcweir #include <com/sun/star/i18n/NativeNumberMode.hpp> 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir #include <string.h> // memcpy() 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir using namespace ::com::sun::star::uno; 44*cdf0e10cSrcweir using namespace ::com::sun::star::lang; 45*cdf0e10cSrcweir using namespace ::rtl; 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 48*cdf0e10cSrcweir 49*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000; 50*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001; 51*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002; 52*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004; 53*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008; 54*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010; 55*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020; 56*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040; 57*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080; 58*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100; 59*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200; 60*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400; 61*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800; 62*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000; 63*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000; 64*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000; 65*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000; 66*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000; 67*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000; 68*cdf0e10cSrcweir 69*cdf0e10cSrcweir #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT) 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]* 72*cdf0e10cSrcweir 73*cdf0e10cSrcweir const sal_uInt8 cclass_Unicode::nDefCnt = 128; 74*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] = 75*cdf0e10cSrcweir { 76*cdf0e10cSrcweir // (...) == Calc formula compiler specific, commented out and modified 77*cdf0e10cSrcweir 78*cdf0e10cSrcweir /* \0 */ TOKEN_EXCLUDED, 79*cdf0e10cSrcweir TOKEN_ILLEGAL, 80*cdf0e10cSrcweir TOKEN_ILLEGAL, 81*cdf0e10cSrcweir TOKEN_ILLEGAL, 82*cdf0e10cSrcweir TOKEN_ILLEGAL, 83*cdf0e10cSrcweir TOKEN_ILLEGAL, 84*cdf0e10cSrcweir TOKEN_ILLEGAL, 85*cdf0e10cSrcweir TOKEN_ILLEGAL, 86*cdf0e10cSrcweir TOKEN_ILLEGAL, 87*cdf0e10cSrcweir /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 88*cdf0e10cSrcweir TOKEN_ILLEGAL, 89*cdf0e10cSrcweir /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 90*cdf0e10cSrcweir TOKEN_ILLEGAL, 91*cdf0e10cSrcweir TOKEN_ILLEGAL, 92*cdf0e10cSrcweir TOKEN_ILLEGAL, 93*cdf0e10cSrcweir TOKEN_ILLEGAL, 94*cdf0e10cSrcweir TOKEN_ILLEGAL, 95*cdf0e10cSrcweir TOKEN_ILLEGAL, 96*cdf0e10cSrcweir TOKEN_ILLEGAL, 97*cdf0e10cSrcweir TOKEN_ILLEGAL, 98*cdf0e10cSrcweir TOKEN_ILLEGAL, 99*cdf0e10cSrcweir TOKEN_ILLEGAL, 100*cdf0e10cSrcweir TOKEN_ILLEGAL, 101*cdf0e10cSrcweir TOKEN_ILLEGAL, 102*cdf0e10cSrcweir TOKEN_ILLEGAL, 103*cdf0e10cSrcweir TOKEN_ILLEGAL, 104*cdf0e10cSrcweir TOKEN_ILLEGAL, 105*cdf0e10cSrcweir TOKEN_ILLEGAL, 106*cdf0e10cSrcweir TOKEN_ILLEGAL, 107*cdf0e10cSrcweir TOKEN_ILLEGAL, 108*cdf0e10cSrcweir TOKEN_ILLEGAL, 109*cdf0e10cSrcweir TOKEN_ILLEGAL, 110*cdf0e10cSrcweir /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 111*cdf0e10cSrcweir /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 112*cdf0e10cSrcweir /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP, 113*cdf0e10cSrcweir /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP) 114*cdf0e10cSrcweir /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 115*cdf0e10cSrcweir /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE) 116*cdf0e10cSrcweir /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 117*cdf0e10cSrcweir /* 39 ' */ TOKEN_NAME_SEP, 118*cdf0e10cSrcweir /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 119*cdf0e10cSrcweir /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 120*cdf0e10cSrcweir /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 121*cdf0e10cSrcweir /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 122*cdf0e10cSrcweir /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE) 123*cdf0e10cSrcweir /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 124*cdf0e10cSrcweir /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE) 125*cdf0e10cSrcweir /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 126*cdf0e10cSrcweir //for ( i = 48; i < 58; i++ ) 127*cdf0e10cSrcweir /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 128*cdf0e10cSrcweir /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 129*cdf0e10cSrcweir /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 130*cdf0e10cSrcweir /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 131*cdf0e10cSrcweir /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 132*cdf0e10cSrcweir /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 133*cdf0e10cSrcweir /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 134*cdf0e10cSrcweir /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 135*cdf0e10cSrcweir /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 136*cdf0e10cSrcweir /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 137*cdf0e10cSrcweir /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD) 138*cdf0e10cSrcweir /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 139*cdf0e10cSrcweir /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 140*cdf0e10cSrcweir /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 141*cdf0e10cSrcweir /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 142*cdf0e10cSrcweir /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 143*cdf0e10cSrcweir /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 144*cdf0e10cSrcweir //for ( i = 65; i < 91; i++ ) 145*cdf0e10cSrcweir /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD, 146*cdf0e10cSrcweir /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD, 147*cdf0e10cSrcweir /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD, 148*cdf0e10cSrcweir /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD, 149*cdf0e10cSrcweir /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD, 150*cdf0e10cSrcweir /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD, 151*cdf0e10cSrcweir /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD, 152*cdf0e10cSrcweir /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD, 153*cdf0e10cSrcweir /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD, 154*cdf0e10cSrcweir /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD, 155*cdf0e10cSrcweir /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD, 156*cdf0e10cSrcweir /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD, 157*cdf0e10cSrcweir /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD, 158*cdf0e10cSrcweir /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD, 159*cdf0e10cSrcweir /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD, 160*cdf0e10cSrcweir /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD, 161*cdf0e10cSrcweir /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD, 162*cdf0e10cSrcweir /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD, 163*cdf0e10cSrcweir /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD, 164*cdf0e10cSrcweir /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD, 165*cdf0e10cSrcweir /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD, 166*cdf0e10cSrcweir /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD, 167*cdf0e10cSrcweir /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD, 168*cdf0e10cSrcweir /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD, 169*cdf0e10cSrcweir /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD, 170*cdf0e10cSrcweir /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD, 171*cdf0e10cSrcweir /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 172*cdf0e10cSrcweir /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 173*cdf0e10cSrcweir /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 174*cdf0e10cSrcweir /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 175*cdf0e10cSrcweir /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD, 176*cdf0e10cSrcweir /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 177*cdf0e10cSrcweir //for ( i = 97; i < 123; i++ ) 178*cdf0e10cSrcweir /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD, 179*cdf0e10cSrcweir /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD, 180*cdf0e10cSrcweir /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD, 181*cdf0e10cSrcweir /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD, 182*cdf0e10cSrcweir /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD, 183*cdf0e10cSrcweir /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD, 184*cdf0e10cSrcweir /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD, 185*cdf0e10cSrcweir /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD, 186*cdf0e10cSrcweir /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD, 187*cdf0e10cSrcweir /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD, 188*cdf0e10cSrcweir /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD, 189*cdf0e10cSrcweir /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD, 190*cdf0e10cSrcweir /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD, 191*cdf0e10cSrcweir /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD, 192*cdf0e10cSrcweir /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD, 193*cdf0e10cSrcweir /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD, 194*cdf0e10cSrcweir /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD, 195*cdf0e10cSrcweir /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD, 196*cdf0e10cSrcweir /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD, 197*cdf0e10cSrcweir /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD, 198*cdf0e10cSrcweir /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD, 199*cdf0e10cSrcweir /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD, 200*cdf0e10cSrcweir /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD, 201*cdf0e10cSrcweir /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD, 202*cdf0e10cSrcweir /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD, 203*cdf0e10cSrcweir /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD, 204*cdf0e10cSrcweir /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 205*cdf0e10cSrcweir /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 206*cdf0e10cSrcweir /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 207*cdf0e10cSrcweir /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 208*cdf0e10cSrcweir /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED) 209*cdf0e10cSrcweir }; 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir 212*cdf0e10cSrcweir const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = 213*cdf0e10cSrcweir { 214*cdf0e10cSrcweir /* \0 */ KParseTokens::ASC_OTHER, 215*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 216*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 217*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 218*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 219*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 220*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 221*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 222*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 223*cdf0e10cSrcweir /* 9 \t */ KParseTokens::ASC_CONTROL, 224*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 225*cdf0e10cSrcweir /* 11 \v */ KParseTokens::ASC_CONTROL, 226*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 227*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 228*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 229*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 230*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 231*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 232*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 233*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 234*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 235*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 236*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 237*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 238*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 239*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 240*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 241*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 242*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 243*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 244*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 245*cdf0e10cSrcweir KParseTokens::ASC_CONTROL, 246*cdf0e10cSrcweir /* 32 */ KParseTokens::ASC_OTHER, 247*cdf0e10cSrcweir /* 33 ! */ KParseTokens::ASC_OTHER, 248*cdf0e10cSrcweir /* 34 " */ KParseTokens::ASC_OTHER, 249*cdf0e10cSrcweir /* 35 # */ KParseTokens::ASC_OTHER, 250*cdf0e10cSrcweir /* 36 $ */ KParseTokens::ASC_DOLLAR, 251*cdf0e10cSrcweir /* 37 % */ KParseTokens::ASC_OTHER, 252*cdf0e10cSrcweir /* 38 & */ KParseTokens::ASC_OTHER, 253*cdf0e10cSrcweir /* 39 ' */ KParseTokens::ASC_OTHER, 254*cdf0e10cSrcweir /* 40 ( */ KParseTokens::ASC_OTHER, 255*cdf0e10cSrcweir /* 41 ) */ KParseTokens::ASC_OTHER, 256*cdf0e10cSrcweir /* 42 * */ KParseTokens::ASC_OTHER, 257*cdf0e10cSrcweir /* 43 + */ KParseTokens::ASC_OTHER, 258*cdf0e10cSrcweir /* 44 , */ KParseTokens::ASC_OTHER, 259*cdf0e10cSrcweir /* 45 - */ KParseTokens::ASC_OTHER, 260*cdf0e10cSrcweir /* 46 . */ KParseTokens::ASC_DOT, 261*cdf0e10cSrcweir /* 47 / */ KParseTokens::ASC_OTHER, 262*cdf0e10cSrcweir //for ( i = 48; i < 58; i++ ) 263*cdf0e10cSrcweir /* 48 0 */ KParseTokens::ASC_DIGIT, 264*cdf0e10cSrcweir /* 49 1 */ KParseTokens::ASC_DIGIT, 265*cdf0e10cSrcweir /* 50 2 */ KParseTokens::ASC_DIGIT, 266*cdf0e10cSrcweir /* 51 3 */ KParseTokens::ASC_DIGIT, 267*cdf0e10cSrcweir /* 52 4 */ KParseTokens::ASC_DIGIT, 268*cdf0e10cSrcweir /* 53 5 */ KParseTokens::ASC_DIGIT, 269*cdf0e10cSrcweir /* 54 6 */ KParseTokens::ASC_DIGIT, 270*cdf0e10cSrcweir /* 55 7 */ KParseTokens::ASC_DIGIT, 271*cdf0e10cSrcweir /* 56 8 */ KParseTokens::ASC_DIGIT, 272*cdf0e10cSrcweir /* 57 9 */ KParseTokens::ASC_DIGIT, 273*cdf0e10cSrcweir /* 58 : */ KParseTokens::ASC_COLON, 274*cdf0e10cSrcweir /* 59 ; */ KParseTokens::ASC_OTHER, 275*cdf0e10cSrcweir /* 60 < */ KParseTokens::ASC_OTHER, 276*cdf0e10cSrcweir /* 61 = */ KParseTokens::ASC_OTHER, 277*cdf0e10cSrcweir /* 62 > */ KParseTokens::ASC_OTHER, 278*cdf0e10cSrcweir /* 63 ? */ KParseTokens::ASC_OTHER, 279*cdf0e10cSrcweir /* 64 @ */ KParseTokens::ASC_OTHER, 280*cdf0e10cSrcweir //for ( i = 65; i < 91; i++ ) 281*cdf0e10cSrcweir /* 65 A */ KParseTokens::ASC_UPALPHA, 282*cdf0e10cSrcweir /* 66 B */ KParseTokens::ASC_UPALPHA, 283*cdf0e10cSrcweir /* 67 C */ KParseTokens::ASC_UPALPHA, 284*cdf0e10cSrcweir /* 68 D */ KParseTokens::ASC_UPALPHA, 285*cdf0e10cSrcweir /* 69 E */ KParseTokens::ASC_UPALPHA, 286*cdf0e10cSrcweir /* 70 F */ KParseTokens::ASC_UPALPHA, 287*cdf0e10cSrcweir /* 71 G */ KParseTokens::ASC_UPALPHA, 288*cdf0e10cSrcweir /* 72 H */ KParseTokens::ASC_UPALPHA, 289*cdf0e10cSrcweir /* 73 I */ KParseTokens::ASC_UPALPHA, 290*cdf0e10cSrcweir /* 74 J */ KParseTokens::ASC_UPALPHA, 291*cdf0e10cSrcweir /* 75 K */ KParseTokens::ASC_UPALPHA, 292*cdf0e10cSrcweir /* 76 L */ KParseTokens::ASC_UPALPHA, 293*cdf0e10cSrcweir /* 77 M */ KParseTokens::ASC_UPALPHA, 294*cdf0e10cSrcweir /* 78 N */ KParseTokens::ASC_UPALPHA, 295*cdf0e10cSrcweir /* 79 O */ KParseTokens::ASC_UPALPHA, 296*cdf0e10cSrcweir /* 80 P */ KParseTokens::ASC_UPALPHA, 297*cdf0e10cSrcweir /* 81 Q */ KParseTokens::ASC_UPALPHA, 298*cdf0e10cSrcweir /* 82 R */ KParseTokens::ASC_UPALPHA, 299*cdf0e10cSrcweir /* 83 S */ KParseTokens::ASC_UPALPHA, 300*cdf0e10cSrcweir /* 84 T */ KParseTokens::ASC_UPALPHA, 301*cdf0e10cSrcweir /* 85 U */ KParseTokens::ASC_UPALPHA, 302*cdf0e10cSrcweir /* 86 V */ KParseTokens::ASC_UPALPHA, 303*cdf0e10cSrcweir /* 87 W */ KParseTokens::ASC_UPALPHA, 304*cdf0e10cSrcweir /* 88 X */ KParseTokens::ASC_UPALPHA, 305*cdf0e10cSrcweir /* 89 Y */ KParseTokens::ASC_UPALPHA, 306*cdf0e10cSrcweir /* 90 Z */ KParseTokens::ASC_UPALPHA, 307*cdf0e10cSrcweir /* 91 [ */ KParseTokens::ASC_OTHER, 308*cdf0e10cSrcweir /* 92 \ */ KParseTokens::ASC_OTHER, 309*cdf0e10cSrcweir /* 93 ] */ KParseTokens::ASC_OTHER, 310*cdf0e10cSrcweir /* 94 ^ */ KParseTokens::ASC_OTHER, 311*cdf0e10cSrcweir /* 95 _ */ KParseTokens::ASC_UNDERSCORE, 312*cdf0e10cSrcweir /* 96 ` */ KParseTokens::ASC_OTHER, 313*cdf0e10cSrcweir //for ( i = 97; i < 123; i++ ) 314*cdf0e10cSrcweir /* 97 a */ KParseTokens::ASC_LOALPHA, 315*cdf0e10cSrcweir /* 98 b */ KParseTokens::ASC_LOALPHA, 316*cdf0e10cSrcweir /* 99 c */ KParseTokens::ASC_LOALPHA, 317*cdf0e10cSrcweir /* 100 d */ KParseTokens::ASC_LOALPHA, 318*cdf0e10cSrcweir /* 101 e */ KParseTokens::ASC_LOALPHA, 319*cdf0e10cSrcweir /* 102 f */ KParseTokens::ASC_LOALPHA, 320*cdf0e10cSrcweir /* 103 g */ KParseTokens::ASC_LOALPHA, 321*cdf0e10cSrcweir /* 104 h */ KParseTokens::ASC_LOALPHA, 322*cdf0e10cSrcweir /* 105 i */ KParseTokens::ASC_LOALPHA, 323*cdf0e10cSrcweir /* 106 j */ KParseTokens::ASC_LOALPHA, 324*cdf0e10cSrcweir /* 107 k */ KParseTokens::ASC_LOALPHA, 325*cdf0e10cSrcweir /* 108 l */ KParseTokens::ASC_LOALPHA, 326*cdf0e10cSrcweir /* 109 m */ KParseTokens::ASC_LOALPHA, 327*cdf0e10cSrcweir /* 110 n */ KParseTokens::ASC_LOALPHA, 328*cdf0e10cSrcweir /* 111 o */ KParseTokens::ASC_LOALPHA, 329*cdf0e10cSrcweir /* 112 p */ KParseTokens::ASC_LOALPHA, 330*cdf0e10cSrcweir /* 113 q */ KParseTokens::ASC_LOALPHA, 331*cdf0e10cSrcweir /* 114 r */ KParseTokens::ASC_LOALPHA, 332*cdf0e10cSrcweir /* 115 s */ KParseTokens::ASC_LOALPHA, 333*cdf0e10cSrcweir /* 116 t */ KParseTokens::ASC_LOALPHA, 334*cdf0e10cSrcweir /* 117 u */ KParseTokens::ASC_LOALPHA, 335*cdf0e10cSrcweir /* 118 v */ KParseTokens::ASC_LOALPHA, 336*cdf0e10cSrcweir /* 119 w */ KParseTokens::ASC_LOALPHA, 337*cdf0e10cSrcweir /* 120 x */ KParseTokens::ASC_LOALPHA, 338*cdf0e10cSrcweir /* 121 y */ KParseTokens::ASC_LOALPHA, 339*cdf0e10cSrcweir /* 122 z */ KParseTokens::ASC_LOALPHA, 340*cdf0e10cSrcweir /* 123 { */ KParseTokens::ASC_OTHER, 341*cdf0e10cSrcweir /* 124 | */ KParseTokens::ASC_OTHER, 342*cdf0e10cSrcweir /* 125 } */ KParseTokens::ASC_OTHER, 343*cdf0e10cSrcweir /* 126 ~ */ KParseTokens::ASC_OTHER, 344*cdf0e10cSrcweir /* 127 */ KParseTokens::ASC_OTHER 345*cdf0e10cSrcweir }; 346*cdf0e10cSrcweir 347*cdf0e10cSrcweir 348*cdf0e10cSrcweir // static 349*cdf0e10cSrcweir const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) 350*cdf0e10cSrcweir { 351*cdf0e10cSrcweir if ( !pStr ) 352*cdf0e10cSrcweir return NULL; 353*cdf0e10cSrcweir while ( *pStr ) 354*cdf0e10cSrcweir { 355*cdf0e10cSrcweir if ( *pStr == c ) 356*cdf0e10cSrcweir return pStr; 357*cdf0e10cSrcweir pStr++; 358*cdf0e10cSrcweir } 359*cdf0e10cSrcweir return NULL; 360*cdf0e10cSrcweir } 361*cdf0e10cSrcweir 362*cdf0e10cSrcweir 363*cdf0e10cSrcweir sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos ) 364*cdf0e10cSrcweir { 365*cdf0e10cSrcweir sal_Unicode c = aStr[nPos]; 366*cdf0e10cSrcweir if ( c < nDefCnt ) 367*cdf0e10cSrcweir return pParseTokensType[ sal_uInt8(c) ]; 368*cdf0e10cSrcweir else 369*cdf0e10cSrcweir { 370*cdf0e10cSrcweir 371*cdf0e10cSrcweir //! all KParseTokens::UNI_... must be matched 372*cdf0e10cSrcweir switch ( u_charType( (sal_uInt32) c ) ) 373*cdf0e10cSrcweir { 374*cdf0e10cSrcweir case U_UPPERCASE_LETTER : 375*cdf0e10cSrcweir return KParseTokens::UNI_UPALPHA; 376*cdf0e10cSrcweir case U_LOWERCASE_LETTER : 377*cdf0e10cSrcweir return KParseTokens::UNI_LOALPHA; 378*cdf0e10cSrcweir case U_TITLECASE_LETTER : 379*cdf0e10cSrcweir return KParseTokens::UNI_TITLE_ALPHA; 380*cdf0e10cSrcweir case U_MODIFIER_LETTER : 381*cdf0e10cSrcweir return KParseTokens::UNI_MODIFIER_LETTER; 382*cdf0e10cSrcweir case U_OTHER_LETTER : 383*cdf0e10cSrcweir // Non_Spacing_Mark could not be as leading character 384*cdf0e10cSrcweir if (nPos == 0) break; 385*cdf0e10cSrcweir // fall through, treat it as Other_Letter. 386*cdf0e10cSrcweir case U_NON_SPACING_MARK : 387*cdf0e10cSrcweir return KParseTokens::UNI_OTHER_LETTER; 388*cdf0e10cSrcweir case U_DECIMAL_DIGIT_NUMBER : 389*cdf0e10cSrcweir return KParseTokens::UNI_DIGIT; 390*cdf0e10cSrcweir case U_LETTER_NUMBER : 391*cdf0e10cSrcweir return KParseTokens::UNI_LETTER_NUMBER; 392*cdf0e10cSrcweir case U_OTHER_NUMBER : 393*cdf0e10cSrcweir return KParseTokens::UNI_OTHER_NUMBER; 394*cdf0e10cSrcweir } 395*cdf0e10cSrcweir 396*cdf0e10cSrcweir return KParseTokens::UNI_OTHER; 397*cdf0e10cSrcweir } 398*cdf0e10cSrcweir } 399*cdf0e10cSrcweir 400*cdf0e10cSrcweir sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale ) 401*cdf0e10cSrcweir { 402*cdf0e10cSrcweir sal_Bool bChanged = (aParserLocale.Language != rLocale.Language 403*cdf0e10cSrcweir || aParserLocale.Country != rLocale.Country 404*cdf0e10cSrcweir || aParserLocale.Variant != rLocale.Variant); 405*cdf0e10cSrcweir if ( bChanged ) 406*cdf0e10cSrcweir { 407*cdf0e10cSrcweir aParserLocale.Language = rLocale.Language; 408*cdf0e10cSrcweir aParserLocale.Country = rLocale.Country; 409*cdf0e10cSrcweir aParserLocale.Variant = rLocale.Variant; 410*cdf0e10cSrcweir } 411*cdf0e10cSrcweir if ( !xLocaleData.is() && xMSF.is() ) 412*cdf0e10cSrcweir { 413*cdf0e10cSrcweir Reference < 414*cdf0e10cSrcweir XInterface > xI = 415*cdf0e10cSrcweir xMSF->createInstance( OUString( 416*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) ); 417*cdf0e10cSrcweir if ( xI.is() ) 418*cdf0e10cSrcweir { 419*cdf0e10cSrcweir Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) ); 420*cdf0e10cSrcweir x >>= xLocaleData; 421*cdf0e10cSrcweir } 422*cdf0e10cSrcweir } 423*cdf0e10cSrcweir return bChanged; 424*cdf0e10cSrcweir } 425*cdf0e10cSrcweir 426*cdf0e10cSrcweir 427*cdf0e10cSrcweir void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 428*cdf0e10cSrcweir const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 429*cdf0e10cSrcweir const OUString& userDefinedCharactersCont ) 430*cdf0e10cSrcweir { 431*cdf0e10cSrcweir bool bIntlEqual = (rLocale.Language == aParserLocale.Language && 432*cdf0e10cSrcweir rLocale.Country == aParserLocale.Country && 433*cdf0e10cSrcweir rLocale.Variant == aParserLocale.Variant); 434*cdf0e10cSrcweir if ( !pTable || !bIntlEqual || 435*cdf0e10cSrcweir startCharTokenType != nStartTypes || 436*cdf0e10cSrcweir contCharTokenType != nContTypes || 437*cdf0e10cSrcweir userDefinedCharactersStart != aStartChars || 438*cdf0e10cSrcweir userDefinedCharactersCont != aContChars ) 439*cdf0e10cSrcweir initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart, 440*cdf0e10cSrcweir contCharTokenType, userDefinedCharactersCont ); 441*cdf0e10cSrcweir } 442*cdf0e10cSrcweir 443*cdf0e10cSrcweir 444*cdf0e10cSrcweir void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 445*cdf0e10cSrcweir const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 446*cdf0e10cSrcweir const OUString& userDefinedCharactersCont ) 447*cdf0e10cSrcweir { 448*cdf0e10cSrcweir // (Re)Init 449*cdf0e10cSrcweir setupInternational( rLocale ); 450*cdf0e10cSrcweir // Memory of pTable is reused. 451*cdf0e10cSrcweir if ( !pTable ) 452*cdf0e10cSrcweir pTable = new UPT_FLAG_TYPE[nDefCnt]; 453*cdf0e10cSrcweir memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt ); 454*cdf0e10cSrcweir // Start and cont tables only need reallocation if different length. 455*cdf0e10cSrcweir if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() ) 456*cdf0e10cSrcweir { 457*cdf0e10cSrcweir delete [] pStart; 458*cdf0e10cSrcweir pStart = NULL; 459*cdf0e10cSrcweir } 460*cdf0e10cSrcweir if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() ) 461*cdf0e10cSrcweir { 462*cdf0e10cSrcweir delete [] pCont; 463*cdf0e10cSrcweir pCont = NULL; 464*cdf0e10cSrcweir } 465*cdf0e10cSrcweir nStartTypes = startCharTokenType; 466*cdf0e10cSrcweir nContTypes = contCharTokenType; 467*cdf0e10cSrcweir aStartChars = userDefinedCharactersStart; 468*cdf0e10cSrcweir aContChars = userDefinedCharactersCont; 469*cdf0e10cSrcweir 470*cdf0e10cSrcweir // specials 471*cdf0e10cSrcweir if( xLocaleData.is() ) 472*cdf0e10cSrcweir { 473*cdf0e10cSrcweir LocaleDataItem aItem = 474*cdf0e10cSrcweir xLocaleData->getLocaleItem( aParserLocale ); 475*cdf0e10cSrcweir //!TODO: theoretically separators may be a string, adjustment would have to be 476*cdf0e10cSrcweir //! done here and in parsing and in ::rtl::math::stringToDouble() 477*cdf0e10cSrcweir cGroupSep = aItem.thousandSeparator.getStr()[0]; 478*cdf0e10cSrcweir cDecimalSep = aItem.decimalSeparator.getStr()[0]; 479*cdf0e10cSrcweir } 480*cdf0e10cSrcweir 481*cdf0e10cSrcweir if ( cGroupSep < nDefCnt ) 482*cdf0e10cSrcweir pTable[cGroupSep] |= TOKEN_VALUE; 483*cdf0e10cSrcweir if ( cDecimalSep < nDefCnt ) 484*cdf0e10cSrcweir pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE; 485*cdf0e10cSrcweir 486*cdf0e10cSrcweir // Modify characters according to KParseTokens definitions. 487*cdf0e10cSrcweir { 488*cdf0e10cSrcweir using namespace KParseTokens; 489*cdf0e10cSrcweir sal_uInt8 i; 490*cdf0e10cSrcweir 491*cdf0e10cSrcweir if ( !(nStartTypes & ASC_UPALPHA) ) 492*cdf0e10cSrcweir for ( i = 65; i < 91; i++ ) 493*cdf0e10cSrcweir pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 494*cdf0e10cSrcweir if ( !(nContTypes & ASC_UPALPHA) ) 495*cdf0e10cSrcweir for ( i = 65; i < 91; i++ ) 496*cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 497*cdf0e10cSrcweir 498*cdf0e10cSrcweir if ( !(nStartTypes & ASC_LOALPHA) ) 499*cdf0e10cSrcweir for ( i = 97; i < 123; i++ ) 500*cdf0e10cSrcweir pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 501*cdf0e10cSrcweir if ( !(nContTypes & ASC_LOALPHA) ) 502*cdf0e10cSrcweir for ( i = 97; i < 123; i++ ) 503*cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 504*cdf0e10cSrcweir 505*cdf0e10cSrcweir if ( nStartTypes & ASC_DIGIT ) 506*cdf0e10cSrcweir for ( i = 48; i < 58; i++ ) 507*cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 508*cdf0e10cSrcweir if ( !(nContTypes & ASC_DIGIT) ) 509*cdf0e10cSrcweir for ( i = 48; i < 58; i++ ) 510*cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 511*cdf0e10cSrcweir 512*cdf0e10cSrcweir if ( !(nStartTypes & ASC_UNDERSCORE) ) 513*cdf0e10cSrcweir pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character 514*cdf0e10cSrcweir if ( !(nContTypes & ASC_UNDERSCORE) ) 515*cdf0e10cSrcweir pTable[95] &= ~TOKEN_WORD; // not allowed as cont character 516*cdf0e10cSrcweir 517*cdf0e10cSrcweir if ( nStartTypes & ASC_DOLLAR ) 518*cdf0e10cSrcweir pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character 519*cdf0e10cSrcweir if ( nContTypes & ASC_DOLLAR ) 520*cdf0e10cSrcweir pTable[36] |= TOKEN_WORD; // allowed as cont character 521*cdf0e10cSrcweir 522*cdf0e10cSrcweir if ( nStartTypes & ASC_DOT ) 523*cdf0e10cSrcweir pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character 524*cdf0e10cSrcweir if ( nContTypes & ASC_DOT ) 525*cdf0e10cSrcweir pTable[46] |= TOKEN_WORD; // allowed as cont character 526*cdf0e10cSrcweir 527*cdf0e10cSrcweir if ( nStartTypes & ASC_COLON ) 528*cdf0e10cSrcweir pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character 529*cdf0e10cSrcweir if ( nContTypes & ASC_COLON ) 530*cdf0e10cSrcweir pTable[58] |= TOKEN_WORD; // allowed as cont character 531*cdf0e10cSrcweir 532*cdf0e10cSrcweir if ( nStartTypes & ASC_CONTROL ) 533*cdf0e10cSrcweir for ( i = 1; i < 32; i++ ) 534*cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 535*cdf0e10cSrcweir if ( nContTypes & ASC_CONTROL ) 536*cdf0e10cSrcweir for ( i = 1; i < 32; i++ ) 537*cdf0e10cSrcweir pTable[i] |= TOKEN_WORD; // allowed as cont character 538*cdf0e10cSrcweir 539*cdf0e10cSrcweir if ( nStartTypes & ASC_ANY_BUT_CONTROL ) 540*cdf0e10cSrcweir for ( i = 32; i < nDefCnt; i++ ) 541*cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 542*cdf0e10cSrcweir if ( nContTypes & ASC_ANY_BUT_CONTROL ) 543*cdf0e10cSrcweir for ( i = 32; i < nDefCnt; i++ ) 544*cdf0e10cSrcweir pTable[i] |= TOKEN_WORD; // allowed as cont character 545*cdf0e10cSrcweir 546*cdf0e10cSrcweir } 547*cdf0e10cSrcweir 548*cdf0e10cSrcweir // Merge in (positively override with) user defined characters. 549*cdf0e10cSrcweir // StartChars 550*cdf0e10cSrcweir sal_Int32 nLen = aStartChars.getLength(); 551*cdf0e10cSrcweir if ( nLen ) 552*cdf0e10cSrcweir { 553*cdf0e10cSrcweir if ( !pStart ) 554*cdf0e10cSrcweir pStart = new UPT_FLAG_TYPE[ nLen ]; 555*cdf0e10cSrcweir const sal_Unicode* p = aStartChars.getStr(); 556*cdf0e10cSrcweir for ( sal_Int32 j=0; j<nLen; j++, p++ ) 557*cdf0e10cSrcweir { 558*cdf0e10cSrcweir pStart[j] = TOKEN_CHAR_WORD; 559*cdf0e10cSrcweir if ( *p < nDefCnt ) 560*cdf0e10cSrcweir pTable[*p] |= TOKEN_CHAR_WORD; 561*cdf0e10cSrcweir } 562*cdf0e10cSrcweir } 563*cdf0e10cSrcweir // ContChars 564*cdf0e10cSrcweir nLen = aContChars.getLength(); 565*cdf0e10cSrcweir if ( nLen ) 566*cdf0e10cSrcweir { 567*cdf0e10cSrcweir if ( !pCont ) 568*cdf0e10cSrcweir pCont = new UPT_FLAG_TYPE[ nLen ]; 569*cdf0e10cSrcweir const sal_Unicode* p = aContChars.getStr(); 570*cdf0e10cSrcweir for ( sal_Int32 j=0; j<nLen; j++ ) 571*cdf0e10cSrcweir { 572*cdf0e10cSrcweir pCont[j] = TOKEN_WORD; 573*cdf0e10cSrcweir if ( *p < nDefCnt ) 574*cdf0e10cSrcweir pTable[*p] |= TOKEN_WORD; 575*cdf0e10cSrcweir } 576*cdf0e10cSrcweir } 577*cdf0e10cSrcweir } 578*cdf0e10cSrcweir 579*cdf0e10cSrcweir 580*cdf0e10cSrcweir void cclass_Unicode::destroyParserTable() 581*cdf0e10cSrcweir { 582*cdf0e10cSrcweir if ( pCont ) 583*cdf0e10cSrcweir delete [] pCont; 584*cdf0e10cSrcweir if ( pStart ) 585*cdf0e10cSrcweir delete [] pStart; 586*cdf0e10cSrcweir if ( pTable ) 587*cdf0e10cSrcweir delete [] pTable; 588*cdf0e10cSrcweir } 589*cdf0e10cSrcweir 590*cdf0e10cSrcweir 591*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos ) 592*cdf0e10cSrcweir { 593*cdf0e10cSrcweir UPT_FLAG_TYPE nMask; 594*cdf0e10cSrcweir sal_Unicode c = aStr[nPos]; 595*cdf0e10cSrcweir if ( c < nDefCnt ) 596*cdf0e10cSrcweir nMask = pTable[ sal_uInt8(c) ]; 597*cdf0e10cSrcweir else 598*cdf0e10cSrcweir nMask = getFlagsExtended( aStr, nPos ); 599*cdf0e10cSrcweir switch ( eState ) 600*cdf0e10cSrcweir { 601*cdf0e10cSrcweir case ssGetChar : 602*cdf0e10cSrcweir case ssRewindFromValue : 603*cdf0e10cSrcweir case ssIgnoreLeadingInRewind : 604*cdf0e10cSrcweir case ssGetWordFirstChar : 605*cdf0e10cSrcweir if ( !(nMask & TOKEN_CHAR_WORD) ) 606*cdf0e10cSrcweir { 607*cdf0e10cSrcweir nMask |= getStartCharsFlags( c ); 608*cdf0e10cSrcweir if ( nMask & TOKEN_CHAR_WORD ) 609*cdf0e10cSrcweir nMask &= ~TOKEN_EXCLUDED; 610*cdf0e10cSrcweir } 611*cdf0e10cSrcweir break; 612*cdf0e10cSrcweir case ssGetValue : 613*cdf0e10cSrcweir case ssGetWord : 614*cdf0e10cSrcweir if ( !(nMask & TOKEN_WORD) ) 615*cdf0e10cSrcweir { 616*cdf0e10cSrcweir nMask |= getContCharsFlags( c ); 617*cdf0e10cSrcweir if ( nMask & TOKEN_WORD ) 618*cdf0e10cSrcweir nMask &= ~TOKEN_EXCLUDED; 619*cdf0e10cSrcweir } 620*cdf0e10cSrcweir break; 621*cdf0e10cSrcweir default: 622*cdf0e10cSrcweir ; // other cases aren't needed, no compiler warning 623*cdf0e10cSrcweir } 624*cdf0e10cSrcweir return nMask; 625*cdf0e10cSrcweir } 626*cdf0e10cSrcweir 627*cdf0e10cSrcweir 628*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos ) 629*cdf0e10cSrcweir { 630*cdf0e10cSrcweir sal_Unicode c = aStr[nPos]; 631*cdf0e10cSrcweir if ( c == cGroupSep ) 632*cdf0e10cSrcweir return TOKEN_VALUE; 633*cdf0e10cSrcweir else if ( c == cDecimalSep ) 634*cdf0e10cSrcweir return TOKEN_CHAR_VALUE | TOKEN_VALUE; 635*cdf0e10cSrcweir using namespace i18n; 636*cdf0e10cSrcweir bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar || 637*cdf0e10cSrcweir eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind); 638*cdf0e10cSrcweir sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes); 639*cdf0e10cSrcweir 640*cdf0e10cSrcweir //! all KParseTokens::UNI_... must be matched 641*cdf0e10cSrcweir switch ( u_charType( (sal_uInt32) c ) ) 642*cdf0e10cSrcweir { 643*cdf0e10cSrcweir case U_UPPERCASE_LETTER : 644*cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_UPALPHA) ? 645*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 646*cdf0e10cSrcweir TOKEN_ILLEGAL; 647*cdf0e10cSrcweir case U_LOWERCASE_LETTER : 648*cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_LOALPHA) ? 649*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 650*cdf0e10cSrcweir TOKEN_ILLEGAL; 651*cdf0e10cSrcweir case U_TITLECASE_LETTER : 652*cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ? 653*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 654*cdf0e10cSrcweir TOKEN_ILLEGAL; 655*cdf0e10cSrcweir case U_MODIFIER_LETTER : 656*cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ? 657*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 658*cdf0e10cSrcweir TOKEN_ILLEGAL; 659*cdf0e10cSrcweir case U_NON_SPACING_MARK : 660*cdf0e10cSrcweir case U_COMBINING_SPACING_MARK : 661*cdf0e10cSrcweir // Non_Spacing_Mark can't be a leading character, 662*cdf0e10cSrcweir // nor can a spacing combining mark. 663*cdf0e10cSrcweir if (bStart) 664*cdf0e10cSrcweir return TOKEN_ILLEGAL; 665*cdf0e10cSrcweir // fall through, treat it as Other_Letter. 666*cdf0e10cSrcweir case U_OTHER_LETTER : 667*cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_OTHER_LETTER) ? 668*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 669*cdf0e10cSrcweir TOKEN_ILLEGAL; 670*cdf0e10cSrcweir case U_DECIMAL_DIGIT_NUMBER : 671*cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_DIGIT) ? 672*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 673*cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 674*cdf0e10cSrcweir case U_LETTER_NUMBER : 675*cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ? 676*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 677*cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 678*cdf0e10cSrcweir case U_OTHER_NUMBER : 679*cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ? 680*cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 681*cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 682*cdf0e10cSrcweir case U_SPACE_SEPARATOR : 683*cdf0e10cSrcweir return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ? 684*cdf0e10cSrcweir TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) )); 685*cdf0e10cSrcweir } 686*cdf0e10cSrcweir 687*cdf0e10cSrcweir return TOKEN_ILLEGAL; 688*cdf0e10cSrcweir } 689*cdf0e10cSrcweir 690*cdf0e10cSrcweir 691*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c ) 692*cdf0e10cSrcweir { 693*cdf0e10cSrcweir if ( pStart ) 694*cdf0e10cSrcweir { 695*cdf0e10cSrcweir const sal_Unicode* pStr = aStartChars.getStr(); 696*cdf0e10cSrcweir const sal_Unicode* p = StrChr( pStr, c ); 697*cdf0e10cSrcweir if ( p ) 698*cdf0e10cSrcweir return pStart[ p - pStr ]; 699*cdf0e10cSrcweir } 700*cdf0e10cSrcweir return TOKEN_ILLEGAL; 701*cdf0e10cSrcweir } 702*cdf0e10cSrcweir 703*cdf0e10cSrcweir 704*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c ) 705*cdf0e10cSrcweir { 706*cdf0e10cSrcweir if ( pCont ) 707*cdf0e10cSrcweir { 708*cdf0e10cSrcweir const sal_Unicode* pStr = aContChars.getStr(); 709*cdf0e10cSrcweir const sal_Unicode* p = StrChr( pStr, c ); 710*cdf0e10cSrcweir if ( p ) 711*cdf0e10cSrcweir return pCont[ p - pStr ]; 712*cdf0e10cSrcweir } 713*cdf0e10cSrcweir return TOKEN_ILLEGAL; 714*cdf0e10cSrcweir } 715*cdf0e10cSrcweir 716*cdf0e10cSrcweir 717*cdf0e10cSrcweir void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType ) 718*cdf0e10cSrcweir { 719*cdf0e10cSrcweir using namespace i18n; 720*cdf0e10cSrcweir const sal_Unicode* const pTextStart = rText.getStr() + nPos; 721*cdf0e10cSrcweir eState = ssGetChar; 722*cdf0e10cSrcweir 723*cdf0e10cSrcweir //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue! 724*cdf0e10cSrcweir const sal_Unicode* pSym = pTextStart; 725*cdf0e10cSrcweir const sal_Unicode* pSrc = pSym; 726*cdf0e10cSrcweir OUString aSymbol; 727*cdf0e10cSrcweir sal_Unicode c = *pSrc; 728*cdf0e10cSrcweir sal_Unicode cLast = 0; 729*cdf0e10cSrcweir int nDecSeps = 0; 730*cdf0e10cSrcweir bool bQuote = false; 731*cdf0e10cSrcweir bool bMightBeWord = true; 732*cdf0e10cSrcweir bool bMightBeWordLast = true; 733*cdf0e10cSrcweir //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue! 734*cdf0e10cSrcweir 735*cdf0e10cSrcweir while ( (c != 0) && (eState != ssStop) ) 736*cdf0e10cSrcweir { 737*cdf0e10cSrcweir UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart ); 738*cdf0e10cSrcweir if ( nMask & TOKEN_EXCLUDED ) 739*cdf0e10cSrcweir eState = ssBounce; 740*cdf0e10cSrcweir if ( bMightBeWord ) 741*cdf0e10cSrcweir { // only relevant for ssGetValue fall back 742*cdf0e10cSrcweir if ( eState == ssGetChar || eState == ssRewindFromValue || 743*cdf0e10cSrcweir eState == ssIgnoreLeadingInRewind ) 744*cdf0e10cSrcweir bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0); 745*cdf0e10cSrcweir else 746*cdf0e10cSrcweir bMightBeWord = ((nMask & TOKEN_WORD) != 0); 747*cdf0e10cSrcweir } 748*cdf0e10cSrcweir sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart ); 749*cdf0e10cSrcweir pSrc++; 750*cdf0e10cSrcweir switch (eState) 751*cdf0e10cSrcweir { 752*cdf0e10cSrcweir case ssGetChar : 753*cdf0e10cSrcweir case ssRewindFromValue : 754*cdf0e10cSrcweir case ssIgnoreLeadingInRewind : 755*cdf0e10cSrcweir { 756*cdf0e10cSrcweir if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue 757*cdf0e10cSrcweir && eState != ssIgnoreLeadingInRewind ) 758*cdf0e10cSrcweir { //! must be first, may fall back to ssGetWord via bMightBeWord 759*cdf0e10cSrcweir eState = ssGetValue; 760*cdf0e10cSrcweir if ( nMask & TOKEN_VALUE_DIGIT ) 761*cdf0e10cSrcweir { 762*cdf0e10cSrcweir if ( 128 <= c ) 763*cdf0e10cSrcweir r.TokenType = KParseType::UNI_NUMBER; 764*cdf0e10cSrcweir else 765*cdf0e10cSrcweir r.TokenType = KParseType::ASC_NUMBER; 766*cdf0e10cSrcweir } 767*cdf0e10cSrcweir else if ( c == cDecimalSep ) 768*cdf0e10cSrcweir { 769*cdf0e10cSrcweir if ( *pSrc ) 770*cdf0e10cSrcweir ++nDecSeps; 771*cdf0e10cSrcweir else 772*cdf0e10cSrcweir eState = ssRewindFromValue; 773*cdf0e10cSrcweir // retry for ONE_SINGLE_CHAR or others 774*cdf0e10cSrcweir } 775*cdf0e10cSrcweir } 776*cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_WORD ) 777*cdf0e10cSrcweir { 778*cdf0e10cSrcweir eState = ssGetWord; 779*cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME; 780*cdf0e10cSrcweir } 781*cdf0e10cSrcweir else if ( nMask & TOKEN_NAME_SEP ) 782*cdf0e10cSrcweir { 783*cdf0e10cSrcweir eState = ssGetWordFirstChar; 784*cdf0e10cSrcweir bQuote = true; 785*cdf0e10cSrcweir pSym++; 786*cdf0e10cSrcweir nParseTokensType = 0; // will be taken of first real character 787*cdf0e10cSrcweir r.TokenType = KParseType::SINGLE_QUOTE_NAME; 788*cdf0e10cSrcweir } 789*cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_STRING ) 790*cdf0e10cSrcweir { 791*cdf0e10cSrcweir eState = ssGetString; 792*cdf0e10cSrcweir pSym++; 793*cdf0e10cSrcweir nParseTokensType = 0; // will be taken of first real character 794*cdf0e10cSrcweir r.TokenType = KParseType::DOUBLE_QUOTE_STRING; 795*cdf0e10cSrcweir } 796*cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_DONTCARE ) 797*cdf0e10cSrcweir { 798*cdf0e10cSrcweir if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS ) 799*cdf0e10cSrcweir { 800*cdf0e10cSrcweir if (eState == ssRewindFromValue) 801*cdf0e10cSrcweir eState = ssIgnoreLeadingInRewind; 802*cdf0e10cSrcweir r.LeadingWhiteSpace++; 803*cdf0e10cSrcweir pSym++; 804*cdf0e10cSrcweir nParseTokensType = 0; // wait until real character 805*cdf0e10cSrcweir bMightBeWord = true; 806*cdf0e10cSrcweir } 807*cdf0e10cSrcweir else 808*cdf0e10cSrcweir eState = ssBounce; 809*cdf0e10cSrcweir } 810*cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_BOOL ) 811*cdf0e10cSrcweir { 812*cdf0e10cSrcweir eState = ssGetBool; 813*cdf0e10cSrcweir r.TokenType = KParseType::BOOLEAN; 814*cdf0e10cSrcweir } 815*cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR ) 816*cdf0e10cSrcweir { //! must be last 817*cdf0e10cSrcweir eState = ssStop; 818*cdf0e10cSrcweir r.TokenType = KParseType::ONE_SINGLE_CHAR; 819*cdf0e10cSrcweir } 820*cdf0e10cSrcweir else 821*cdf0e10cSrcweir eState = ssBounce; // not known 822*cdf0e10cSrcweir } 823*cdf0e10cSrcweir break; 824*cdf0e10cSrcweir case ssGetValue : 825*cdf0e10cSrcweir { 826*cdf0e10cSrcweir if ( nMask & TOKEN_VALUE_DIGIT ) 827*cdf0e10cSrcweir { 828*cdf0e10cSrcweir if ( 128 <= c ) 829*cdf0e10cSrcweir r.TokenType = KParseType::UNI_NUMBER; 830*cdf0e10cSrcweir else if ( r.TokenType != KParseType::UNI_NUMBER ) 831*cdf0e10cSrcweir r.TokenType = KParseType::ASC_NUMBER; 832*cdf0e10cSrcweir } 833*cdf0e10cSrcweir if ( nMask & TOKEN_VALUE ) 834*cdf0e10cSrcweir { 835*cdf0e10cSrcweir if ( c == cDecimalSep && ++nDecSeps > 1 ) 836*cdf0e10cSrcweir { 837*cdf0e10cSrcweir if ( pSrc - pTextStart == 2 ) 838*cdf0e10cSrcweir eState = ssRewindFromValue; 839*cdf0e10cSrcweir // consecutive separators 840*cdf0e10cSrcweir else 841*cdf0e10cSrcweir eState = ssStopBack; 842*cdf0e10cSrcweir } 843*cdf0e10cSrcweir // else keep it going 844*cdf0e10cSrcweir } 845*cdf0e10cSrcweir else if ( c == 'E' || c == 'e' ) 846*cdf0e10cSrcweir { 847*cdf0e10cSrcweir UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 848*cdf0e10cSrcweir if ( nNext & TOKEN_VALUE_EXP ) 849*cdf0e10cSrcweir ; // keep it going 850*cdf0e10cSrcweir else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 851*cdf0e10cSrcweir { // might be a numerical name (1.2efg) 852*cdf0e10cSrcweir eState = ssGetWord; 853*cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME; 854*cdf0e10cSrcweir } 855*cdf0e10cSrcweir else 856*cdf0e10cSrcweir eState = ssStopBack; 857*cdf0e10cSrcweir } 858*cdf0e10cSrcweir else if ( nMask & TOKEN_VALUE_SIGN ) 859*cdf0e10cSrcweir { 860*cdf0e10cSrcweir if ( (cLast == 'E') || (cLast == 'e') ) 861*cdf0e10cSrcweir { 862*cdf0e10cSrcweir UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 863*cdf0e10cSrcweir if ( nNext & TOKEN_VALUE_EXP_VALUE ) 864*cdf0e10cSrcweir ; // keep it going 865*cdf0e10cSrcweir else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 866*cdf0e10cSrcweir { // might be a numerical name (1.2e+fg) 867*cdf0e10cSrcweir eState = ssGetWord; 868*cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME; 869*cdf0e10cSrcweir } 870*cdf0e10cSrcweir else 871*cdf0e10cSrcweir eState = ssStopBack; 872*cdf0e10cSrcweir } 873*cdf0e10cSrcweir else if ( bMightBeWord ) 874*cdf0e10cSrcweir { // might be a numerical name (1.2+fg) 875*cdf0e10cSrcweir eState = ssGetWord; 876*cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME; 877*cdf0e10cSrcweir } 878*cdf0e10cSrcweir else 879*cdf0e10cSrcweir eState = ssStopBack; 880*cdf0e10cSrcweir } 881*cdf0e10cSrcweir else if ( bMightBeWord && (nMask & TOKEN_WORD) ) 882*cdf0e10cSrcweir { // might be a numerical name (1995.A1) 883*cdf0e10cSrcweir eState = ssGetWord; 884*cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME; 885*cdf0e10cSrcweir } 886*cdf0e10cSrcweir else 887*cdf0e10cSrcweir eState = ssStopBack; 888*cdf0e10cSrcweir } 889*cdf0e10cSrcweir break; 890*cdf0e10cSrcweir case ssGetWordFirstChar : 891*cdf0e10cSrcweir eState = ssGetWord; 892*cdf0e10cSrcweir // fall thru 893*cdf0e10cSrcweir case ssGetWord : 894*cdf0e10cSrcweir { 895*cdf0e10cSrcweir if ( nMask & TOKEN_WORD ) 896*cdf0e10cSrcweir ; // keep it going 897*cdf0e10cSrcweir else if ( nMask & TOKEN_NAME_SEP ) 898*cdf0e10cSrcweir { 899*cdf0e10cSrcweir if ( bQuote ) 900*cdf0e10cSrcweir { 901*cdf0e10cSrcweir if ( cLast == '\\' ) 902*cdf0e10cSrcweir { // escaped 903*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 2 ); 904*cdf0e10cSrcweir aSymbol += OUString( &c, 1); 905*cdf0e10cSrcweir } 906*cdf0e10cSrcweir else 907*cdf0e10cSrcweir { 908*cdf0e10cSrcweir eState = ssStop; 909*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 1 ); 910*cdf0e10cSrcweir } 911*cdf0e10cSrcweir pSym = pSrc; 912*cdf0e10cSrcweir } 913*cdf0e10cSrcweir else 914*cdf0e10cSrcweir eState = ssStopBack; 915*cdf0e10cSrcweir } 916*cdf0e10cSrcweir else if ( bQuote ) 917*cdf0e10cSrcweir ; // keep it going 918*cdf0e10cSrcweir else 919*cdf0e10cSrcweir eState = ssStopBack; 920*cdf0e10cSrcweir } 921*cdf0e10cSrcweir break; 922*cdf0e10cSrcweir case ssGetString : 923*cdf0e10cSrcweir { 924*cdf0e10cSrcweir if ( nMask & TOKEN_STRING_SEP ) 925*cdf0e10cSrcweir { 926*cdf0e10cSrcweir if ( cLast == '\\' ) 927*cdf0e10cSrcweir { // escaped 928*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 2 ); 929*cdf0e10cSrcweir aSymbol += OUString( &c, 1); 930*cdf0e10cSrcweir } 931*cdf0e10cSrcweir else if ( c == *pSrc && 932*cdf0e10cSrcweir !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) ) 933*cdf0e10cSrcweir { // "" => literal " escaped 934*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym ); 935*cdf0e10cSrcweir pSrc++; 936*cdf0e10cSrcweir } 937*cdf0e10cSrcweir else 938*cdf0e10cSrcweir { 939*cdf0e10cSrcweir eState = ssStop; 940*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 1 ); 941*cdf0e10cSrcweir } 942*cdf0e10cSrcweir pSym = pSrc; 943*cdf0e10cSrcweir } 944*cdf0e10cSrcweir } 945*cdf0e10cSrcweir break; 946*cdf0e10cSrcweir case ssGetBool : 947*cdf0e10cSrcweir { 948*cdf0e10cSrcweir if ( (nMask & TOKEN_BOOL) ) 949*cdf0e10cSrcweir eState = ssStop; // maximum 2: <, >, <>, <=, >= 950*cdf0e10cSrcweir else 951*cdf0e10cSrcweir eState = ssStopBack; 952*cdf0e10cSrcweir } 953*cdf0e10cSrcweir break; 954*cdf0e10cSrcweir case ssStopBack : 955*cdf0e10cSrcweir case ssBounce : 956*cdf0e10cSrcweir case ssStop : 957*cdf0e10cSrcweir ; // nothing, no compiler warning 958*cdf0e10cSrcweir break; 959*cdf0e10cSrcweir } 960*cdf0e10cSrcweir if ( eState == ssRewindFromValue ) 961*cdf0e10cSrcweir { 962*cdf0e10cSrcweir r = ParseResult(); 963*cdf0e10cSrcweir pSym = pTextStart; 964*cdf0e10cSrcweir pSrc = pSym; 965*cdf0e10cSrcweir aSymbol = OUString(); 966*cdf0e10cSrcweir c = *pSrc; 967*cdf0e10cSrcweir cLast = 0; 968*cdf0e10cSrcweir nDecSeps = 0; 969*cdf0e10cSrcweir bQuote = false; 970*cdf0e10cSrcweir bMightBeWord = true; 971*cdf0e10cSrcweir bMightBeWordLast = true; 972*cdf0e10cSrcweir } 973*cdf0e10cSrcweir else 974*cdf0e10cSrcweir { 975*cdf0e10cSrcweir if ( !(r.TokenType & nTokenType) ) 976*cdf0e10cSrcweir { 977*cdf0e10cSrcweir if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) 978*cdf0e10cSrcweir && (nTokenType & KParseType::IDENTNAME) && bMightBeWord ) 979*cdf0e10cSrcweir ; // keep a number that might be a word 980*cdf0e10cSrcweir else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) ) 981*cdf0e10cSrcweir ; // keep ignored white space 982*cdf0e10cSrcweir else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) ) 983*cdf0e10cSrcweir ; // keep uncertain value 984*cdf0e10cSrcweir else 985*cdf0e10cSrcweir eState = ssBounce; 986*cdf0e10cSrcweir } 987*cdf0e10cSrcweir if ( eState == ssBounce ) 988*cdf0e10cSrcweir { 989*cdf0e10cSrcweir r.TokenType = 0; 990*cdf0e10cSrcweir eState = ssStopBack; 991*cdf0e10cSrcweir } 992*cdf0e10cSrcweir if ( eState == ssStopBack ) 993*cdf0e10cSrcweir { // put back 994*cdf0e10cSrcweir pSrc--; 995*cdf0e10cSrcweir bMightBeWord = bMightBeWordLast; 996*cdf0e10cSrcweir eState = ssStop; 997*cdf0e10cSrcweir } 998*cdf0e10cSrcweir if ( eState != ssStop ) 999*cdf0e10cSrcweir { 1000*cdf0e10cSrcweir if ( !r.StartFlags ) 1001*cdf0e10cSrcweir r.StartFlags |= nParseTokensType; 1002*cdf0e10cSrcweir else 1003*cdf0e10cSrcweir r.ContFlags |= nParseTokensType; 1004*cdf0e10cSrcweir } 1005*cdf0e10cSrcweir bMightBeWordLast = bMightBeWord; 1006*cdf0e10cSrcweir cLast = c; 1007*cdf0e10cSrcweir c = *pSrc; 1008*cdf0e10cSrcweir } 1009*cdf0e10cSrcweir } 1010*cdf0e10cSrcweir // r.CharLen is the length in characters (not code points) of the parsed 1011*cdf0e10cSrcweir // token not including any leading white space, change this calculation if 1012*cdf0e10cSrcweir // multi-code-point Unicode characters are to be supported. 1013*cdf0e10cSrcweir r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace; 1014*cdf0e10cSrcweir r.EndPos = nPos + (pSrc - pTextStart); 1015*cdf0e10cSrcweir if ( r.TokenType & KParseType::ASC_NUMBER ) 1016*cdf0e10cSrcweir { 1017*cdf0e10cSrcweir r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace, 1018*cdf0e10cSrcweir pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL ); 1019*cdf0e10cSrcweir if ( bMightBeWord ) 1020*cdf0e10cSrcweir r.TokenType |= KParseType::IDENTNAME; 1021*cdf0e10cSrcweir } 1022*cdf0e10cSrcweir else if ( r.TokenType & KParseType::UNI_NUMBER ) 1023*cdf0e10cSrcweir { 1024*cdf0e10cSrcweir if ( !xNatNumSup.is() ) 1025*cdf0e10cSrcweir { 1026*cdf0e10cSrcweir #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier" 1027*cdf0e10cSrcweir if ( xMSF.is() ) 1028*cdf0e10cSrcweir { 1029*cdf0e10cSrcweir xNatNumSup = Reference< XNativeNumberSupplier > ( 1030*cdf0e10cSrcweir xMSF->createInstance( OUString( 1031*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( 1032*cdf0e10cSrcweir NATIVENUMBERSUPPLIER_SERVICENAME ) ) ), 1033*cdf0e10cSrcweir UNO_QUERY ); 1034*cdf0e10cSrcweir } 1035*cdf0e10cSrcweir if ( !xNatNumSup.is() ) 1036*cdf0e10cSrcweir { 1037*cdf0e10cSrcweir throw RuntimeException( OUString( 1038*cdf0e10cSrcweir #ifdef DBG_UTIL 1039*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( 1040*cdf0e10cSrcweir "cclass_Unicode::parseText: can't instanciate " 1041*cdf0e10cSrcweir NATIVENUMBERSUPPLIER_SERVICENAME ) 1042*cdf0e10cSrcweir #endif 1043*cdf0e10cSrcweir ), *this ); 1044*cdf0e10cSrcweir } 1045*cdf0e10cSrcweir #undef NATIVENUMBERSUPPLIER_SERVICENAME 1046*cdf0e10cSrcweir } 1047*cdf0e10cSrcweir OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos + 1048*cdf0e10cSrcweir r.LeadingWhiteSpace ); 1049*cdf0e10cSrcweir // transliterate to ASCII 1050*cdf0e10cSrcweir aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale, 1051*cdf0e10cSrcweir NativeNumberMode::NATNUM0 ); 1052*cdf0e10cSrcweir r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL ); 1053*cdf0e10cSrcweir if ( bMightBeWord ) 1054*cdf0e10cSrcweir r.TokenType |= KParseType::IDENTNAME; 1055*cdf0e10cSrcweir } 1056*cdf0e10cSrcweir else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) ) 1057*cdf0e10cSrcweir { 1058*cdf0e10cSrcweir if ( pSym < pSrc ) 1059*cdf0e10cSrcweir { //! open quote 1060*cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym ); 1061*cdf0e10cSrcweir r.TokenType |= KParseType::MISSING_QUOTE; 1062*cdf0e10cSrcweir } 1063*cdf0e10cSrcweir r.DequotedNameOrString = aSymbol; 1064*cdf0e10cSrcweir } 1065*cdf0e10cSrcweir } 1066*cdf0e10cSrcweir 1067*cdf0e10cSrcweir } } } } 1068