1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include <cclass_unicode.hxx> 32 #include <unicode/uchar.h> 33 #include <rtl/math.hxx> 34 #include <rtl/ustring.hxx> 35 #include <com/sun/star/i18n/KParseTokens.hpp> 36 #include <com/sun/star/i18n/KParseType.hpp> 37 #include <com/sun/star/i18n/UnicodeType.hpp> 38 #include <com/sun/star/i18n/XLocaleData.hpp> 39 #include <com/sun/star/i18n/NativeNumberMode.hpp> 40 41 #include <string.h> // memcpy() 42 43 using namespace ::com::sun::star::uno; 44 using namespace ::com::sun::star::lang; 45 using namespace ::rtl; 46 47 namespace com { namespace sun { namespace star { namespace i18n { 48 49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000; 50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001; 51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002; 52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004; 53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008; 54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010; 55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020; 56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040; 57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080; 58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100; 59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200; 60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400; 61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800; 62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000; 63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000; 64 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000; 65 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000; 66 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000; 67 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000; 68 69 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT) 70 71 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]* 72 73 const sal_uInt8 cclass_Unicode::nDefCnt = 128; 74 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] = 75 { 76 // (...) == Calc formula compiler specific, commented out and modified 77 78 /* \0 */ TOKEN_EXCLUDED, 79 TOKEN_ILLEGAL, 80 TOKEN_ILLEGAL, 81 TOKEN_ILLEGAL, 82 TOKEN_ILLEGAL, 83 TOKEN_ILLEGAL, 84 TOKEN_ILLEGAL, 85 TOKEN_ILLEGAL, 86 TOKEN_ILLEGAL, 87 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 88 TOKEN_ILLEGAL, 89 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 90 TOKEN_ILLEGAL, 91 TOKEN_ILLEGAL, 92 TOKEN_ILLEGAL, 93 TOKEN_ILLEGAL, 94 TOKEN_ILLEGAL, 95 TOKEN_ILLEGAL, 96 TOKEN_ILLEGAL, 97 TOKEN_ILLEGAL, 98 TOKEN_ILLEGAL, 99 TOKEN_ILLEGAL, 100 TOKEN_ILLEGAL, 101 TOKEN_ILLEGAL, 102 TOKEN_ILLEGAL, 103 TOKEN_ILLEGAL, 104 TOKEN_ILLEGAL, 105 TOKEN_ILLEGAL, 106 TOKEN_ILLEGAL, 107 TOKEN_ILLEGAL, 108 TOKEN_ILLEGAL, 109 TOKEN_ILLEGAL, 110 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 111 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 112 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP, 113 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP) 114 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 115 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE) 116 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 117 /* 39 ' */ TOKEN_NAME_SEP, 118 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 119 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 120 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 121 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 122 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE) 123 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 124 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE) 125 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 126 //for ( i = 48; i < 58; i++ ) 127 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 128 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 129 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 130 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 131 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 132 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 133 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 134 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 135 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 136 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 137 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD) 138 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 139 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 140 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 141 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 142 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 143 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 144 //for ( i = 65; i < 91; i++ ) 145 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD, 146 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD, 147 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD, 148 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD, 149 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD, 150 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD, 151 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD, 152 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD, 153 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD, 154 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD, 155 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD, 156 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD, 157 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD, 158 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD, 159 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD, 160 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD, 161 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD, 162 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD, 163 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD, 164 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD, 165 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD, 166 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD, 167 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD, 168 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD, 169 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD, 170 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD, 171 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 172 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 173 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 174 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 175 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD, 176 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 177 //for ( i = 97; i < 123; i++ ) 178 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD, 179 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD, 180 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD, 181 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD, 182 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD, 183 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD, 184 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD, 185 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD, 186 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD, 187 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD, 188 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD, 189 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD, 190 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD, 191 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD, 192 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD, 193 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD, 194 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD, 195 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD, 196 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD, 197 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD, 198 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD, 199 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD, 200 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD, 201 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD, 202 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD, 203 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD, 204 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 205 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 206 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 207 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 208 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED) 209 }; 210 211 212 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = 213 { 214 /* \0 */ KParseTokens::ASC_OTHER, 215 KParseTokens::ASC_CONTROL, 216 KParseTokens::ASC_CONTROL, 217 KParseTokens::ASC_CONTROL, 218 KParseTokens::ASC_CONTROL, 219 KParseTokens::ASC_CONTROL, 220 KParseTokens::ASC_CONTROL, 221 KParseTokens::ASC_CONTROL, 222 KParseTokens::ASC_CONTROL, 223 /* 9 \t */ KParseTokens::ASC_CONTROL, 224 KParseTokens::ASC_CONTROL, 225 /* 11 \v */ KParseTokens::ASC_CONTROL, 226 KParseTokens::ASC_CONTROL, 227 KParseTokens::ASC_CONTROL, 228 KParseTokens::ASC_CONTROL, 229 KParseTokens::ASC_CONTROL, 230 KParseTokens::ASC_CONTROL, 231 KParseTokens::ASC_CONTROL, 232 KParseTokens::ASC_CONTROL, 233 KParseTokens::ASC_CONTROL, 234 KParseTokens::ASC_CONTROL, 235 KParseTokens::ASC_CONTROL, 236 KParseTokens::ASC_CONTROL, 237 KParseTokens::ASC_CONTROL, 238 KParseTokens::ASC_CONTROL, 239 KParseTokens::ASC_CONTROL, 240 KParseTokens::ASC_CONTROL, 241 KParseTokens::ASC_CONTROL, 242 KParseTokens::ASC_CONTROL, 243 KParseTokens::ASC_CONTROL, 244 KParseTokens::ASC_CONTROL, 245 KParseTokens::ASC_CONTROL, 246 /* 32 */ KParseTokens::ASC_OTHER, 247 /* 33 ! */ KParseTokens::ASC_OTHER, 248 /* 34 " */ KParseTokens::ASC_OTHER, 249 /* 35 # */ KParseTokens::ASC_OTHER, 250 /* 36 $ */ KParseTokens::ASC_DOLLAR, 251 /* 37 % */ KParseTokens::ASC_OTHER, 252 /* 38 & */ KParseTokens::ASC_OTHER, 253 /* 39 ' */ KParseTokens::ASC_OTHER, 254 /* 40 ( */ KParseTokens::ASC_OTHER, 255 /* 41 ) */ KParseTokens::ASC_OTHER, 256 /* 42 * */ KParseTokens::ASC_OTHER, 257 /* 43 + */ KParseTokens::ASC_OTHER, 258 /* 44 , */ KParseTokens::ASC_OTHER, 259 /* 45 - */ KParseTokens::ASC_OTHER, 260 /* 46 . */ KParseTokens::ASC_DOT, 261 /* 47 / */ KParseTokens::ASC_OTHER, 262 //for ( i = 48; i < 58; i++ ) 263 /* 48 0 */ KParseTokens::ASC_DIGIT, 264 /* 49 1 */ KParseTokens::ASC_DIGIT, 265 /* 50 2 */ KParseTokens::ASC_DIGIT, 266 /* 51 3 */ KParseTokens::ASC_DIGIT, 267 /* 52 4 */ KParseTokens::ASC_DIGIT, 268 /* 53 5 */ KParseTokens::ASC_DIGIT, 269 /* 54 6 */ KParseTokens::ASC_DIGIT, 270 /* 55 7 */ KParseTokens::ASC_DIGIT, 271 /* 56 8 */ KParseTokens::ASC_DIGIT, 272 /* 57 9 */ KParseTokens::ASC_DIGIT, 273 /* 58 : */ KParseTokens::ASC_COLON, 274 /* 59 ; */ KParseTokens::ASC_OTHER, 275 /* 60 < */ KParseTokens::ASC_OTHER, 276 /* 61 = */ KParseTokens::ASC_OTHER, 277 /* 62 > */ KParseTokens::ASC_OTHER, 278 /* 63 ? */ KParseTokens::ASC_OTHER, 279 /* 64 @ */ KParseTokens::ASC_OTHER, 280 //for ( i = 65; i < 91; i++ ) 281 /* 65 A */ KParseTokens::ASC_UPALPHA, 282 /* 66 B */ KParseTokens::ASC_UPALPHA, 283 /* 67 C */ KParseTokens::ASC_UPALPHA, 284 /* 68 D */ KParseTokens::ASC_UPALPHA, 285 /* 69 E */ KParseTokens::ASC_UPALPHA, 286 /* 70 F */ KParseTokens::ASC_UPALPHA, 287 /* 71 G */ KParseTokens::ASC_UPALPHA, 288 /* 72 H */ KParseTokens::ASC_UPALPHA, 289 /* 73 I */ KParseTokens::ASC_UPALPHA, 290 /* 74 J */ KParseTokens::ASC_UPALPHA, 291 /* 75 K */ KParseTokens::ASC_UPALPHA, 292 /* 76 L */ KParseTokens::ASC_UPALPHA, 293 /* 77 M */ KParseTokens::ASC_UPALPHA, 294 /* 78 N */ KParseTokens::ASC_UPALPHA, 295 /* 79 O */ KParseTokens::ASC_UPALPHA, 296 /* 80 P */ KParseTokens::ASC_UPALPHA, 297 /* 81 Q */ KParseTokens::ASC_UPALPHA, 298 /* 82 R */ KParseTokens::ASC_UPALPHA, 299 /* 83 S */ KParseTokens::ASC_UPALPHA, 300 /* 84 T */ KParseTokens::ASC_UPALPHA, 301 /* 85 U */ KParseTokens::ASC_UPALPHA, 302 /* 86 V */ KParseTokens::ASC_UPALPHA, 303 /* 87 W */ KParseTokens::ASC_UPALPHA, 304 /* 88 X */ KParseTokens::ASC_UPALPHA, 305 /* 89 Y */ KParseTokens::ASC_UPALPHA, 306 /* 90 Z */ KParseTokens::ASC_UPALPHA, 307 /* 91 [ */ KParseTokens::ASC_OTHER, 308 /* 92 \ */ KParseTokens::ASC_OTHER, 309 /* 93 ] */ KParseTokens::ASC_OTHER, 310 /* 94 ^ */ KParseTokens::ASC_OTHER, 311 /* 95 _ */ KParseTokens::ASC_UNDERSCORE, 312 /* 96 ` */ KParseTokens::ASC_OTHER, 313 //for ( i = 97; i < 123; i++ ) 314 /* 97 a */ KParseTokens::ASC_LOALPHA, 315 /* 98 b */ KParseTokens::ASC_LOALPHA, 316 /* 99 c */ KParseTokens::ASC_LOALPHA, 317 /* 100 d */ KParseTokens::ASC_LOALPHA, 318 /* 101 e */ KParseTokens::ASC_LOALPHA, 319 /* 102 f */ KParseTokens::ASC_LOALPHA, 320 /* 103 g */ KParseTokens::ASC_LOALPHA, 321 /* 104 h */ KParseTokens::ASC_LOALPHA, 322 /* 105 i */ KParseTokens::ASC_LOALPHA, 323 /* 106 j */ KParseTokens::ASC_LOALPHA, 324 /* 107 k */ KParseTokens::ASC_LOALPHA, 325 /* 108 l */ KParseTokens::ASC_LOALPHA, 326 /* 109 m */ KParseTokens::ASC_LOALPHA, 327 /* 110 n */ KParseTokens::ASC_LOALPHA, 328 /* 111 o */ KParseTokens::ASC_LOALPHA, 329 /* 112 p */ KParseTokens::ASC_LOALPHA, 330 /* 113 q */ KParseTokens::ASC_LOALPHA, 331 /* 114 r */ KParseTokens::ASC_LOALPHA, 332 /* 115 s */ KParseTokens::ASC_LOALPHA, 333 /* 116 t */ KParseTokens::ASC_LOALPHA, 334 /* 117 u */ KParseTokens::ASC_LOALPHA, 335 /* 118 v */ KParseTokens::ASC_LOALPHA, 336 /* 119 w */ KParseTokens::ASC_LOALPHA, 337 /* 120 x */ KParseTokens::ASC_LOALPHA, 338 /* 121 y */ KParseTokens::ASC_LOALPHA, 339 /* 122 z */ KParseTokens::ASC_LOALPHA, 340 /* 123 { */ KParseTokens::ASC_OTHER, 341 /* 124 | */ KParseTokens::ASC_OTHER, 342 /* 125 } */ KParseTokens::ASC_OTHER, 343 /* 126 ~ */ KParseTokens::ASC_OTHER, 344 /* 127 */ KParseTokens::ASC_OTHER 345 }; 346 347 348 // static 349 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) 350 { 351 if ( !pStr ) 352 return NULL; 353 while ( *pStr ) 354 { 355 if ( *pStr == c ) 356 return pStr; 357 pStr++; 358 } 359 return NULL; 360 } 361 362 363 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos ) 364 { 365 sal_Unicode c = aStr[nPos]; 366 if ( c < nDefCnt ) 367 return pParseTokensType[ sal_uInt8(c) ]; 368 else 369 { 370 371 //! all KParseTokens::UNI_... must be matched 372 switch ( u_charType( (sal_uInt32) c ) ) 373 { 374 case U_UPPERCASE_LETTER : 375 return KParseTokens::UNI_UPALPHA; 376 case U_LOWERCASE_LETTER : 377 return KParseTokens::UNI_LOALPHA; 378 case U_TITLECASE_LETTER : 379 return KParseTokens::UNI_TITLE_ALPHA; 380 case U_MODIFIER_LETTER : 381 return KParseTokens::UNI_MODIFIER_LETTER; 382 case U_OTHER_LETTER : 383 // Non_Spacing_Mark could not be as leading character 384 if (nPos == 0) break; 385 // fall through, treat it as Other_Letter. 386 case U_NON_SPACING_MARK : 387 return KParseTokens::UNI_OTHER_LETTER; 388 case U_DECIMAL_DIGIT_NUMBER : 389 return KParseTokens::UNI_DIGIT; 390 case U_LETTER_NUMBER : 391 return KParseTokens::UNI_LETTER_NUMBER; 392 case U_OTHER_NUMBER : 393 return KParseTokens::UNI_OTHER_NUMBER; 394 } 395 396 return KParseTokens::UNI_OTHER; 397 } 398 } 399 400 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale ) 401 { 402 sal_Bool bChanged = (aParserLocale.Language != rLocale.Language 403 || aParserLocale.Country != rLocale.Country 404 || aParserLocale.Variant != rLocale.Variant); 405 if ( bChanged ) 406 { 407 aParserLocale.Language = rLocale.Language; 408 aParserLocale.Country = rLocale.Country; 409 aParserLocale.Variant = rLocale.Variant; 410 } 411 if ( !xLocaleData.is() && xMSF.is() ) 412 { 413 Reference < 414 XInterface > xI = 415 xMSF->createInstance( OUString( 416 RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) ); 417 if ( xI.is() ) 418 { 419 Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) ); 420 x >>= xLocaleData; 421 } 422 } 423 return bChanged; 424 } 425 426 427 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 428 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 429 const OUString& userDefinedCharactersCont ) 430 { 431 bool bIntlEqual = (rLocale.Language == aParserLocale.Language && 432 rLocale.Country == aParserLocale.Country && 433 rLocale.Variant == aParserLocale.Variant); 434 if ( !pTable || !bIntlEqual || 435 startCharTokenType != nStartTypes || 436 contCharTokenType != nContTypes || 437 userDefinedCharactersStart != aStartChars || 438 userDefinedCharactersCont != aContChars ) 439 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart, 440 contCharTokenType, userDefinedCharactersCont ); 441 } 442 443 444 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 445 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 446 const OUString& userDefinedCharactersCont ) 447 { 448 // (Re)Init 449 setupInternational( rLocale ); 450 // Memory of pTable is reused. 451 if ( !pTable ) 452 pTable = new UPT_FLAG_TYPE[nDefCnt]; 453 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt ); 454 // Start and cont tables only need reallocation if different length. 455 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() ) 456 { 457 delete [] pStart; 458 pStart = NULL; 459 } 460 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() ) 461 { 462 delete [] pCont; 463 pCont = NULL; 464 } 465 nStartTypes = startCharTokenType; 466 nContTypes = contCharTokenType; 467 aStartChars = userDefinedCharactersStart; 468 aContChars = userDefinedCharactersCont; 469 470 // specials 471 if( xLocaleData.is() ) 472 { 473 LocaleDataItem aItem = 474 xLocaleData->getLocaleItem( aParserLocale ); 475 //!TODO: theoretically separators may be a string, adjustment would have to be 476 //! done here and in parsing and in ::rtl::math::stringToDouble() 477 cGroupSep = aItem.thousandSeparator.getStr()[0]; 478 cDecimalSep = aItem.decimalSeparator.getStr()[0]; 479 } 480 481 if ( cGroupSep < nDefCnt ) 482 pTable[cGroupSep] |= TOKEN_VALUE; 483 if ( cDecimalSep < nDefCnt ) 484 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE; 485 486 // Modify characters according to KParseTokens definitions. 487 { 488 using namespace KParseTokens; 489 sal_uInt8 i; 490 491 if ( !(nStartTypes & ASC_UPALPHA) ) 492 for ( i = 65; i < 91; i++ ) 493 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 494 if ( !(nContTypes & ASC_UPALPHA) ) 495 for ( i = 65; i < 91; i++ ) 496 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 497 498 if ( !(nStartTypes & ASC_LOALPHA) ) 499 for ( i = 97; i < 123; i++ ) 500 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 501 if ( !(nContTypes & ASC_LOALPHA) ) 502 for ( i = 97; i < 123; i++ ) 503 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 504 505 if ( nStartTypes & ASC_DIGIT ) 506 for ( i = 48; i < 58; i++ ) 507 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 508 if ( !(nContTypes & ASC_DIGIT) ) 509 for ( i = 48; i < 58; i++ ) 510 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 511 512 if ( !(nStartTypes & ASC_UNDERSCORE) ) 513 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character 514 if ( !(nContTypes & ASC_UNDERSCORE) ) 515 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character 516 517 if ( nStartTypes & ASC_DOLLAR ) 518 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character 519 if ( nContTypes & ASC_DOLLAR ) 520 pTable[36] |= TOKEN_WORD; // allowed as cont character 521 522 if ( nStartTypes & ASC_DOT ) 523 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character 524 if ( nContTypes & ASC_DOT ) 525 pTable[46] |= TOKEN_WORD; // allowed as cont character 526 527 if ( nStartTypes & ASC_COLON ) 528 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character 529 if ( nContTypes & ASC_COLON ) 530 pTable[58] |= TOKEN_WORD; // allowed as cont character 531 532 if ( nStartTypes & ASC_CONTROL ) 533 for ( i = 1; i < 32; i++ ) 534 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 535 if ( nContTypes & ASC_CONTROL ) 536 for ( i = 1; i < 32; i++ ) 537 pTable[i] |= TOKEN_WORD; // allowed as cont character 538 539 if ( nStartTypes & ASC_ANY_BUT_CONTROL ) 540 for ( i = 32; i < nDefCnt; i++ ) 541 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 542 if ( nContTypes & ASC_ANY_BUT_CONTROL ) 543 for ( i = 32; i < nDefCnt; i++ ) 544 pTable[i] |= TOKEN_WORD; // allowed as cont character 545 546 } 547 548 // Merge in (positively override with) user defined characters. 549 // StartChars 550 sal_Int32 nLen = aStartChars.getLength(); 551 if ( nLen ) 552 { 553 if ( !pStart ) 554 pStart = new UPT_FLAG_TYPE[ nLen ]; 555 const sal_Unicode* p = aStartChars.getStr(); 556 for ( sal_Int32 j=0; j<nLen; j++, p++ ) 557 { 558 pStart[j] = TOKEN_CHAR_WORD; 559 if ( *p < nDefCnt ) 560 pTable[*p] |= TOKEN_CHAR_WORD; 561 } 562 } 563 // ContChars 564 nLen = aContChars.getLength(); 565 if ( nLen ) 566 { 567 if ( !pCont ) 568 pCont = new UPT_FLAG_TYPE[ nLen ]; 569 const sal_Unicode* p = aContChars.getStr(); 570 for ( sal_Int32 j=0; j<nLen; j++ ) 571 { 572 pCont[j] = TOKEN_WORD; 573 if ( *p < nDefCnt ) 574 pTable[*p] |= TOKEN_WORD; 575 } 576 } 577 } 578 579 580 void cclass_Unicode::destroyParserTable() 581 { 582 if ( pCont ) 583 delete [] pCont; 584 if ( pStart ) 585 delete [] pStart; 586 if ( pTable ) 587 delete [] pTable; 588 } 589 590 591 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos ) 592 { 593 UPT_FLAG_TYPE nMask; 594 sal_Unicode c = aStr[nPos]; 595 if ( c < nDefCnt ) 596 nMask = pTable[ sal_uInt8(c) ]; 597 else 598 nMask = getFlagsExtended( aStr, nPos ); 599 switch ( eState ) 600 { 601 case ssGetChar : 602 case ssRewindFromValue : 603 case ssIgnoreLeadingInRewind : 604 case ssGetWordFirstChar : 605 if ( !(nMask & TOKEN_CHAR_WORD) ) 606 { 607 nMask |= getStartCharsFlags( c ); 608 if ( nMask & TOKEN_CHAR_WORD ) 609 nMask &= ~TOKEN_EXCLUDED; 610 } 611 break; 612 case ssGetValue : 613 case ssGetWord : 614 if ( !(nMask & TOKEN_WORD) ) 615 { 616 nMask |= getContCharsFlags( c ); 617 if ( nMask & TOKEN_WORD ) 618 nMask &= ~TOKEN_EXCLUDED; 619 } 620 break; 621 default: 622 ; // other cases aren't needed, no compiler warning 623 } 624 return nMask; 625 } 626 627 628 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos ) 629 { 630 sal_Unicode c = aStr[nPos]; 631 if ( c == cGroupSep ) 632 return TOKEN_VALUE; 633 else if ( c == cDecimalSep ) 634 return TOKEN_CHAR_VALUE | TOKEN_VALUE; 635 using namespace i18n; 636 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar || 637 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind); 638 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes); 639 640 //! all KParseTokens::UNI_... must be matched 641 switch ( u_charType( (sal_uInt32) c ) ) 642 { 643 case U_UPPERCASE_LETTER : 644 return (nTypes & KParseTokens::UNI_UPALPHA) ? 645 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 646 TOKEN_ILLEGAL; 647 case U_LOWERCASE_LETTER : 648 return (nTypes & KParseTokens::UNI_LOALPHA) ? 649 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 650 TOKEN_ILLEGAL; 651 case U_TITLECASE_LETTER : 652 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ? 653 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 654 TOKEN_ILLEGAL; 655 case U_MODIFIER_LETTER : 656 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ? 657 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 658 TOKEN_ILLEGAL; 659 case U_NON_SPACING_MARK : 660 case U_COMBINING_SPACING_MARK : 661 // Non_Spacing_Mark can't be a leading character, 662 // nor can a spacing combining mark. 663 if (bStart) 664 return TOKEN_ILLEGAL; 665 // fall through, treat it as Other_Letter. 666 case U_OTHER_LETTER : 667 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ? 668 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 669 TOKEN_ILLEGAL; 670 case U_DECIMAL_DIGIT_NUMBER : 671 return ((nTypes & KParseTokens::UNI_DIGIT) ? 672 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 673 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 674 case U_LETTER_NUMBER : 675 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ? 676 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 677 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 678 case U_OTHER_NUMBER : 679 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ? 680 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 681 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 682 case U_SPACE_SEPARATOR : 683 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ? 684 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) )); 685 } 686 687 return TOKEN_ILLEGAL; 688 } 689 690 691 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c ) 692 { 693 if ( pStart ) 694 { 695 const sal_Unicode* pStr = aStartChars.getStr(); 696 const sal_Unicode* p = StrChr( pStr, c ); 697 if ( p ) 698 return pStart[ p - pStr ]; 699 } 700 return TOKEN_ILLEGAL; 701 } 702 703 704 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c ) 705 { 706 if ( pCont ) 707 { 708 const sal_Unicode* pStr = aContChars.getStr(); 709 const sal_Unicode* p = StrChr( pStr, c ); 710 if ( p ) 711 return pCont[ p - pStr ]; 712 } 713 return TOKEN_ILLEGAL; 714 } 715 716 717 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType ) 718 { 719 using namespace i18n; 720 const sal_Unicode* const pTextStart = rText.getStr() + nPos; 721 eState = ssGetChar; 722 723 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue! 724 const sal_Unicode* pSym = pTextStart; 725 const sal_Unicode* pSrc = pSym; 726 OUString aSymbol; 727 sal_Unicode c = *pSrc; 728 sal_Unicode cLast = 0; 729 int nDecSeps = 0; 730 bool bQuote = false; 731 bool bMightBeWord = true; 732 bool bMightBeWordLast = true; 733 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue! 734 735 while ( (c != 0) && (eState != ssStop) ) 736 { 737 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart ); 738 if ( nMask & TOKEN_EXCLUDED ) 739 eState = ssBounce; 740 if ( bMightBeWord ) 741 { // only relevant for ssGetValue fall back 742 if ( eState == ssGetChar || eState == ssRewindFromValue || 743 eState == ssIgnoreLeadingInRewind ) 744 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0); 745 else 746 bMightBeWord = ((nMask & TOKEN_WORD) != 0); 747 } 748 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart ); 749 pSrc++; 750 switch (eState) 751 { 752 case ssGetChar : 753 case ssRewindFromValue : 754 case ssIgnoreLeadingInRewind : 755 { 756 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue 757 && eState != ssIgnoreLeadingInRewind ) 758 { //! must be first, may fall back to ssGetWord via bMightBeWord 759 eState = ssGetValue; 760 if ( nMask & TOKEN_VALUE_DIGIT ) 761 { 762 if ( 128 <= c ) 763 r.TokenType = KParseType::UNI_NUMBER; 764 else 765 r.TokenType = KParseType::ASC_NUMBER; 766 } 767 else if ( c == cDecimalSep ) 768 { 769 if ( *pSrc ) 770 ++nDecSeps; 771 else 772 eState = ssRewindFromValue; 773 // retry for ONE_SINGLE_CHAR or others 774 } 775 } 776 else if ( nMask & TOKEN_CHAR_WORD ) 777 { 778 eState = ssGetWord; 779 r.TokenType = KParseType::IDENTNAME; 780 } 781 else if ( nMask & TOKEN_NAME_SEP ) 782 { 783 eState = ssGetWordFirstChar; 784 bQuote = true; 785 pSym++; 786 nParseTokensType = 0; // will be taken of first real character 787 r.TokenType = KParseType::SINGLE_QUOTE_NAME; 788 } 789 else if ( nMask & TOKEN_CHAR_STRING ) 790 { 791 eState = ssGetString; 792 pSym++; 793 nParseTokensType = 0; // will be taken of first real character 794 r.TokenType = KParseType::DOUBLE_QUOTE_STRING; 795 } 796 else if ( nMask & TOKEN_CHAR_DONTCARE ) 797 { 798 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS ) 799 { 800 if (eState == ssRewindFromValue) 801 eState = ssIgnoreLeadingInRewind; 802 r.LeadingWhiteSpace++; 803 pSym++; 804 nParseTokensType = 0; // wait until real character 805 bMightBeWord = true; 806 } 807 else 808 eState = ssBounce; 809 } 810 else if ( nMask & TOKEN_CHAR_BOOL ) 811 { 812 eState = ssGetBool; 813 r.TokenType = KParseType::BOOLEAN; 814 } 815 else if ( nMask & TOKEN_CHAR ) 816 { //! must be last 817 eState = ssStop; 818 r.TokenType = KParseType::ONE_SINGLE_CHAR; 819 } 820 else 821 eState = ssBounce; // not known 822 } 823 break; 824 case ssGetValue : 825 { 826 if ( nMask & TOKEN_VALUE_DIGIT ) 827 { 828 if ( 128 <= c ) 829 r.TokenType = KParseType::UNI_NUMBER; 830 else if ( r.TokenType != KParseType::UNI_NUMBER ) 831 r.TokenType = KParseType::ASC_NUMBER; 832 } 833 if ( nMask & TOKEN_VALUE ) 834 { 835 if ( c == cDecimalSep && ++nDecSeps > 1 ) 836 { 837 if ( pSrc - pTextStart == 2 ) 838 eState = ssRewindFromValue; 839 // consecutive separators 840 else 841 eState = ssStopBack; 842 } 843 // else keep it going 844 } 845 else if ( c == 'E' || c == 'e' ) 846 { 847 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 848 if ( nNext & TOKEN_VALUE_EXP ) 849 ; // keep it going 850 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 851 { // might be a numerical name (1.2efg) 852 eState = ssGetWord; 853 r.TokenType = KParseType::IDENTNAME; 854 } 855 else 856 eState = ssStopBack; 857 } 858 else if ( nMask & TOKEN_VALUE_SIGN ) 859 { 860 if ( (cLast == 'E') || (cLast == 'e') ) 861 { 862 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 863 if ( nNext & TOKEN_VALUE_EXP_VALUE ) 864 ; // keep it going 865 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 866 { // might be a numerical name (1.2e+fg) 867 eState = ssGetWord; 868 r.TokenType = KParseType::IDENTNAME; 869 } 870 else 871 eState = ssStopBack; 872 } 873 else if ( bMightBeWord ) 874 { // might be a numerical name (1.2+fg) 875 eState = ssGetWord; 876 r.TokenType = KParseType::IDENTNAME; 877 } 878 else 879 eState = ssStopBack; 880 } 881 else if ( bMightBeWord && (nMask & TOKEN_WORD) ) 882 { // might be a numerical name (1995.A1) 883 eState = ssGetWord; 884 r.TokenType = KParseType::IDENTNAME; 885 } 886 else 887 eState = ssStopBack; 888 } 889 break; 890 case ssGetWordFirstChar : 891 eState = ssGetWord; 892 // fall thru 893 case ssGetWord : 894 { 895 if ( nMask & TOKEN_WORD ) 896 ; // keep it going 897 else if ( nMask & TOKEN_NAME_SEP ) 898 { 899 if ( bQuote ) 900 { 901 if ( cLast == '\\' ) 902 { // escaped 903 aSymbol += OUString( pSym, pSrc - pSym - 2 ); 904 aSymbol += OUString( &c, 1); 905 } 906 else 907 { 908 eState = ssStop; 909 aSymbol += OUString( pSym, pSrc - pSym - 1 ); 910 } 911 pSym = pSrc; 912 } 913 else 914 eState = ssStopBack; 915 } 916 else if ( bQuote ) 917 ; // keep it going 918 else 919 eState = ssStopBack; 920 } 921 break; 922 case ssGetString : 923 { 924 if ( nMask & TOKEN_STRING_SEP ) 925 { 926 if ( cLast == '\\' ) 927 { // escaped 928 aSymbol += OUString( pSym, pSrc - pSym - 2 ); 929 aSymbol += OUString( &c, 1); 930 } 931 else if ( c == *pSrc && 932 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) ) 933 { // "" => literal " escaped 934 aSymbol += OUString( pSym, pSrc - pSym ); 935 pSrc++; 936 } 937 else 938 { 939 eState = ssStop; 940 aSymbol += OUString( pSym, pSrc - pSym - 1 ); 941 } 942 pSym = pSrc; 943 } 944 } 945 break; 946 case ssGetBool : 947 { 948 if ( (nMask & TOKEN_BOOL) ) 949 eState = ssStop; // maximum 2: <, >, <>, <=, >= 950 else 951 eState = ssStopBack; 952 } 953 break; 954 case ssStopBack : 955 case ssBounce : 956 case ssStop : 957 ; // nothing, no compiler warning 958 break; 959 } 960 if ( eState == ssRewindFromValue ) 961 { 962 r = ParseResult(); 963 pSym = pTextStart; 964 pSrc = pSym; 965 aSymbol = OUString(); 966 c = *pSrc; 967 cLast = 0; 968 nDecSeps = 0; 969 bQuote = false; 970 bMightBeWord = true; 971 bMightBeWordLast = true; 972 } 973 else 974 { 975 if ( !(r.TokenType & nTokenType) ) 976 { 977 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) 978 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord ) 979 ; // keep a number that might be a word 980 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) ) 981 ; // keep ignored white space 982 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) ) 983 ; // keep uncertain value 984 else 985 eState = ssBounce; 986 } 987 if ( eState == ssBounce ) 988 { 989 r.TokenType = 0; 990 eState = ssStopBack; 991 } 992 if ( eState == ssStopBack ) 993 { // put back 994 pSrc--; 995 bMightBeWord = bMightBeWordLast; 996 eState = ssStop; 997 } 998 if ( eState != ssStop ) 999 { 1000 if ( !r.StartFlags ) 1001 r.StartFlags |= nParseTokensType; 1002 else 1003 r.ContFlags |= nParseTokensType; 1004 } 1005 bMightBeWordLast = bMightBeWord; 1006 cLast = c; 1007 c = *pSrc; 1008 } 1009 } 1010 // r.CharLen is the length in characters (not code points) of the parsed 1011 // token not including any leading white space, change this calculation if 1012 // multi-code-point Unicode characters are to be supported. 1013 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace; 1014 r.EndPos = nPos + (pSrc - pTextStart); 1015 if ( r.TokenType & KParseType::ASC_NUMBER ) 1016 { 1017 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace, 1018 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL ); 1019 if ( bMightBeWord ) 1020 r.TokenType |= KParseType::IDENTNAME; 1021 } 1022 else if ( r.TokenType & KParseType::UNI_NUMBER ) 1023 { 1024 if ( !xNatNumSup.is() ) 1025 { 1026 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier" 1027 if ( xMSF.is() ) 1028 { 1029 xNatNumSup = Reference< XNativeNumberSupplier > ( 1030 xMSF->createInstance( OUString( 1031 RTL_CONSTASCII_USTRINGPARAM( 1032 NATIVENUMBERSUPPLIER_SERVICENAME ) ) ), 1033 UNO_QUERY ); 1034 } 1035 if ( !xNatNumSup.is() ) 1036 { 1037 throw RuntimeException( OUString( 1038 #ifdef DBG_UTIL 1039 RTL_CONSTASCII_USTRINGPARAM( 1040 "cclass_Unicode::parseText: can't instanciate " 1041 NATIVENUMBERSUPPLIER_SERVICENAME ) 1042 #endif 1043 ), *this ); 1044 } 1045 #undef NATIVENUMBERSUPPLIER_SERVICENAME 1046 } 1047 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos + 1048 r.LeadingWhiteSpace ); 1049 // transliterate to ASCII 1050 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale, 1051 NativeNumberMode::NATNUM0 ); 1052 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL ); 1053 if ( bMightBeWord ) 1054 r.TokenType |= KParseType::IDENTNAME; 1055 } 1056 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) ) 1057 { 1058 if ( pSym < pSrc ) 1059 { //! open quote 1060 aSymbol += OUString( pSym, pSrc - pSym ); 1061 r.TokenType |= KParseType::MISSING_QUOTE; 1062 } 1063 r.DequotedNameOrString = aSymbol; 1064 } 1065 } 1066 1067 } } } } 1068