1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 #include <cclass_unicode.hxx>
28 #include <unicode/uchar.h>
29 #include <rtl/math.hxx>
30 #include <rtl/ustring.hxx>
31 #include <com/sun/star/i18n/KParseTokens.hpp>
32 #include <com/sun/star/i18n/KParseType.hpp>
33 #include <com/sun/star/i18n/UnicodeType.hpp>
34 #include <com/sun/star/i18n/XLocaleData.hpp>
35 #include <com/sun/star/i18n/NativeNumberMode.hpp>
36
37 #include <string.h> // memcpy()
38
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::lang;
41 using namespace ::rtl;
42
43 namespace com { namespace sun { namespace star { namespace i18n {
44
45 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000;
46 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001;
47 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002;
48 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004;
49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008;
50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010;
51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000;
59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000;
60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000;
61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000;
62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000;
63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000;
64
65 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
66
67 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
68
69 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
70 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
71 {
72 // (...) == Calc formula compiler specific, commented out and modified
73
74 /* \0 */ TOKEN_EXCLUDED,
75 TOKEN_ILLEGAL,
76 TOKEN_ILLEGAL,
77 TOKEN_ILLEGAL,
78 TOKEN_ILLEGAL,
79 TOKEN_ILLEGAL,
80 TOKEN_ILLEGAL,
81 TOKEN_ILLEGAL,
82 TOKEN_ILLEGAL,
83 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
84 TOKEN_ILLEGAL,
85 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
86 TOKEN_ILLEGAL,
87 TOKEN_ILLEGAL,
88 TOKEN_ILLEGAL,
89 TOKEN_ILLEGAL,
90 TOKEN_ILLEGAL,
91 TOKEN_ILLEGAL,
92 TOKEN_ILLEGAL,
93 TOKEN_ILLEGAL,
94 TOKEN_ILLEGAL,
95 TOKEN_ILLEGAL,
96 TOKEN_ILLEGAL,
97 TOKEN_ILLEGAL,
98 TOKEN_ILLEGAL,
99 TOKEN_ILLEGAL,
100 TOKEN_ILLEGAL,
101 TOKEN_ILLEGAL,
102 TOKEN_ILLEGAL,
103 TOKEN_ILLEGAL,
104 TOKEN_ILLEGAL,
105 TOKEN_ILLEGAL,
106 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
107 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
108 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
109 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP)
110 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
111 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE)
112 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
113 /* 39 ' */ TOKEN_NAME_SEP,
114 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
115 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
116 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
118 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
119 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
120 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
121 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
122 //for ( i = 48; i < 58; i++ )
123 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD)
134 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
135 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
136 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
137 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
138 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
139 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
140 //for ( i = 65; i < 91; i++ )
141 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
142 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
143 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
144 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
145 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
146 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
147 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
148 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
149 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
150 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
151 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
152 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
153 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
154 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
155 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
156 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
157 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
158 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
159 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
160 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
161 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
162 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
163 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
164 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
165 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
166 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
167 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
168 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
169 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
170 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
171 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
172 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
173 //for ( i = 97; i < 123; i++ )
174 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
175 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
176 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
177 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
178 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
179 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
180 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
181 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
182 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
183 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
184 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
185 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
186 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
187 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
188 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
189 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
190 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
191 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
192 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
193 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
194 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
195 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
196 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
197 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
198 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
199 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
200 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
201 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
202 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
203 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
204 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED)
205 };
206
207
208 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
209 {
210 /* \0 */ KParseTokens::ASC_OTHER,
211 KParseTokens::ASC_CONTROL,
212 KParseTokens::ASC_CONTROL,
213 KParseTokens::ASC_CONTROL,
214 KParseTokens::ASC_CONTROL,
215 KParseTokens::ASC_CONTROL,
216 KParseTokens::ASC_CONTROL,
217 KParseTokens::ASC_CONTROL,
218 KParseTokens::ASC_CONTROL,
219 /* 9 \t */ KParseTokens::ASC_CONTROL,
220 KParseTokens::ASC_CONTROL,
221 /* 11 \v */ KParseTokens::ASC_CONTROL,
222 KParseTokens::ASC_CONTROL,
223 KParseTokens::ASC_CONTROL,
224 KParseTokens::ASC_CONTROL,
225 KParseTokens::ASC_CONTROL,
226 KParseTokens::ASC_CONTROL,
227 KParseTokens::ASC_CONTROL,
228 KParseTokens::ASC_CONTROL,
229 KParseTokens::ASC_CONTROL,
230 KParseTokens::ASC_CONTROL,
231 KParseTokens::ASC_CONTROL,
232 KParseTokens::ASC_CONTROL,
233 KParseTokens::ASC_CONTROL,
234 KParseTokens::ASC_CONTROL,
235 KParseTokens::ASC_CONTROL,
236 KParseTokens::ASC_CONTROL,
237 KParseTokens::ASC_CONTROL,
238 KParseTokens::ASC_CONTROL,
239 KParseTokens::ASC_CONTROL,
240 KParseTokens::ASC_CONTROL,
241 KParseTokens::ASC_CONTROL,
242 /* 32 */ KParseTokens::ASC_OTHER,
243 /* 33 ! */ KParseTokens::ASC_OTHER,
244 /* 34 " */ KParseTokens::ASC_OTHER,
245 /* 35 # */ KParseTokens::ASC_OTHER,
246 /* 36 $ */ KParseTokens::ASC_DOLLAR,
247 /* 37 % */ KParseTokens::ASC_OTHER,
248 /* 38 & */ KParseTokens::ASC_OTHER,
249 /* 39 ' */ KParseTokens::ASC_OTHER,
250 /* 40 ( */ KParseTokens::ASC_OTHER,
251 /* 41 ) */ KParseTokens::ASC_OTHER,
252 /* 42 * */ KParseTokens::ASC_OTHER,
253 /* 43 + */ KParseTokens::ASC_OTHER,
254 /* 44 , */ KParseTokens::ASC_OTHER,
255 /* 45 - */ KParseTokens::ASC_OTHER,
256 /* 46 . */ KParseTokens::ASC_DOT,
257 /* 47 / */ KParseTokens::ASC_OTHER,
258 //for ( i = 48; i < 58; i++ )
259 /* 48 0 */ KParseTokens::ASC_DIGIT,
260 /* 49 1 */ KParseTokens::ASC_DIGIT,
261 /* 50 2 */ KParseTokens::ASC_DIGIT,
262 /* 51 3 */ KParseTokens::ASC_DIGIT,
263 /* 52 4 */ KParseTokens::ASC_DIGIT,
264 /* 53 5 */ KParseTokens::ASC_DIGIT,
265 /* 54 6 */ KParseTokens::ASC_DIGIT,
266 /* 55 7 */ KParseTokens::ASC_DIGIT,
267 /* 56 8 */ KParseTokens::ASC_DIGIT,
268 /* 57 9 */ KParseTokens::ASC_DIGIT,
269 /* 58 : */ KParseTokens::ASC_COLON,
270 /* 59 ; */ KParseTokens::ASC_OTHER,
271 /* 60 < */ KParseTokens::ASC_OTHER,
272 /* 61 = */ KParseTokens::ASC_OTHER,
273 /* 62 > */ KParseTokens::ASC_OTHER,
274 /* 63 ? */ KParseTokens::ASC_OTHER,
275 /* 64 @ */ KParseTokens::ASC_OTHER,
276 //for ( i = 65; i < 91; i++ )
277 /* 65 A */ KParseTokens::ASC_UPALPHA,
278 /* 66 B */ KParseTokens::ASC_UPALPHA,
279 /* 67 C */ KParseTokens::ASC_UPALPHA,
280 /* 68 D */ KParseTokens::ASC_UPALPHA,
281 /* 69 E */ KParseTokens::ASC_UPALPHA,
282 /* 70 F */ KParseTokens::ASC_UPALPHA,
283 /* 71 G */ KParseTokens::ASC_UPALPHA,
284 /* 72 H */ KParseTokens::ASC_UPALPHA,
285 /* 73 I */ KParseTokens::ASC_UPALPHA,
286 /* 74 J */ KParseTokens::ASC_UPALPHA,
287 /* 75 K */ KParseTokens::ASC_UPALPHA,
288 /* 76 L */ KParseTokens::ASC_UPALPHA,
289 /* 77 M */ KParseTokens::ASC_UPALPHA,
290 /* 78 N */ KParseTokens::ASC_UPALPHA,
291 /* 79 O */ KParseTokens::ASC_UPALPHA,
292 /* 80 P */ KParseTokens::ASC_UPALPHA,
293 /* 81 Q */ KParseTokens::ASC_UPALPHA,
294 /* 82 R */ KParseTokens::ASC_UPALPHA,
295 /* 83 S */ KParseTokens::ASC_UPALPHA,
296 /* 84 T */ KParseTokens::ASC_UPALPHA,
297 /* 85 U */ KParseTokens::ASC_UPALPHA,
298 /* 86 V */ KParseTokens::ASC_UPALPHA,
299 /* 87 W */ KParseTokens::ASC_UPALPHA,
300 /* 88 X */ KParseTokens::ASC_UPALPHA,
301 /* 89 Y */ KParseTokens::ASC_UPALPHA,
302 /* 90 Z */ KParseTokens::ASC_UPALPHA,
303 /* 91 [ */ KParseTokens::ASC_OTHER,
304 /* 92 \ */ KParseTokens::ASC_OTHER,
305 /* 93 ] */ KParseTokens::ASC_OTHER,
306 /* 94 ^ */ KParseTokens::ASC_OTHER,
307 /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
308 /* 96 ` */ KParseTokens::ASC_OTHER,
309 //for ( i = 97; i < 123; i++ )
310 /* 97 a */ KParseTokens::ASC_LOALPHA,
311 /* 98 b */ KParseTokens::ASC_LOALPHA,
312 /* 99 c */ KParseTokens::ASC_LOALPHA,
313 /* 100 d */ KParseTokens::ASC_LOALPHA,
314 /* 101 e */ KParseTokens::ASC_LOALPHA,
315 /* 102 f */ KParseTokens::ASC_LOALPHA,
316 /* 103 g */ KParseTokens::ASC_LOALPHA,
317 /* 104 h */ KParseTokens::ASC_LOALPHA,
318 /* 105 i */ KParseTokens::ASC_LOALPHA,
319 /* 106 j */ KParseTokens::ASC_LOALPHA,
320 /* 107 k */ KParseTokens::ASC_LOALPHA,
321 /* 108 l */ KParseTokens::ASC_LOALPHA,
322 /* 109 m */ KParseTokens::ASC_LOALPHA,
323 /* 110 n */ KParseTokens::ASC_LOALPHA,
324 /* 111 o */ KParseTokens::ASC_LOALPHA,
325 /* 112 p */ KParseTokens::ASC_LOALPHA,
326 /* 113 q */ KParseTokens::ASC_LOALPHA,
327 /* 114 r */ KParseTokens::ASC_LOALPHA,
328 /* 115 s */ KParseTokens::ASC_LOALPHA,
329 /* 116 t */ KParseTokens::ASC_LOALPHA,
330 /* 117 u */ KParseTokens::ASC_LOALPHA,
331 /* 118 v */ KParseTokens::ASC_LOALPHA,
332 /* 119 w */ KParseTokens::ASC_LOALPHA,
333 /* 120 x */ KParseTokens::ASC_LOALPHA,
334 /* 121 y */ KParseTokens::ASC_LOALPHA,
335 /* 122 z */ KParseTokens::ASC_LOALPHA,
336 /* 123 { */ KParseTokens::ASC_OTHER,
337 /* 124 | */ KParseTokens::ASC_OTHER,
338 /* 125 } */ KParseTokens::ASC_OTHER,
339 /* 126 ~ */ KParseTokens::ASC_OTHER,
340 /* 127 */ KParseTokens::ASC_OTHER
341 };
342
343
344 // static
StrChr(const sal_Unicode * pStr,sal_Unicode c)345 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
346 {
347 if ( !pStr )
348 return NULL;
349 while ( *pStr )
350 {
351 if ( *pStr == c )
352 return pStr;
353 pStr++;
354 }
355 return NULL;
356 }
357
358
getParseTokensType(const sal_Unicode * aStr,sal_Int32 nPos)359 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
360 {
361 sal_Unicode c = aStr[nPos];
362 if ( c < nDefCnt )
363 return pParseTokensType[ sal_uInt8(c) ];
364 else
365 {
366
367 //! all KParseTokens::UNI_... must be matched
368 switch ( u_charType( (sal_uInt32) c ) )
369 {
370 case U_UPPERCASE_LETTER :
371 return KParseTokens::UNI_UPALPHA;
372 case U_LOWERCASE_LETTER :
373 return KParseTokens::UNI_LOALPHA;
374 case U_TITLECASE_LETTER :
375 return KParseTokens::UNI_TITLE_ALPHA;
376 case U_MODIFIER_LETTER :
377 return KParseTokens::UNI_MODIFIER_LETTER;
378 case U_OTHER_LETTER :
379 // Non_Spacing_Mark could not be as leading character
380 if (nPos == 0) break;
381 // fall through, treat it as Other_Letter.
382 case U_NON_SPACING_MARK :
383 return KParseTokens::UNI_OTHER_LETTER;
384 case U_DECIMAL_DIGIT_NUMBER :
385 return KParseTokens::UNI_DIGIT;
386 case U_LETTER_NUMBER :
387 return KParseTokens::UNI_LETTER_NUMBER;
388 case U_OTHER_NUMBER :
389 return KParseTokens::UNI_OTHER_NUMBER;
390 }
391
392 return KParseTokens::UNI_OTHER;
393 }
394 }
395
setupInternational(const Locale & rLocale)396 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
397 {
398 sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
399 || aParserLocale.Country != rLocale.Country
400 || aParserLocale.Variant != rLocale.Variant);
401 if ( bChanged )
402 {
403 aParserLocale.Language = rLocale.Language;
404 aParserLocale.Country = rLocale.Country;
405 aParserLocale.Variant = rLocale.Variant;
406 }
407 if ( !xLocaleData.is() && xMSF.is() )
408 {
409 Reference <
410 XInterface > xI =
411 xMSF->createInstance( OUString(
412 RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
413 if ( xI.is() )
414 {
415 Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
416 x >>= xLocaleData;
417 }
418 }
419 return bChanged;
420 }
421
422
setupParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)423 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
424 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
425 const OUString& userDefinedCharactersCont )
426 {
427 bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
428 rLocale.Country == aParserLocale.Country &&
429 rLocale.Variant == aParserLocale.Variant);
430 if ( !pTable || !bIntlEqual ||
431 startCharTokenType != nStartTypes ||
432 contCharTokenType != nContTypes ||
433 userDefinedCharactersStart != aStartChars ||
434 userDefinedCharactersCont != aContChars )
435 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
436 contCharTokenType, userDefinedCharactersCont );
437 }
438
439
initParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)440 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
441 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
442 const OUString& userDefinedCharactersCont )
443 {
444 // (Re)Init
445 setupInternational( rLocale );
446 // Memory of pTable is reused.
447 if ( !pTable )
448 pTable = new UPT_FLAG_TYPE[nDefCnt];
449 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
450 // Start and cont tables only need reallocation if different length.
451 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
452 {
453 delete [] pStart;
454 pStart = NULL;
455 }
456 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
457 {
458 delete [] pCont;
459 pCont = NULL;
460 }
461 nStartTypes = startCharTokenType;
462 nContTypes = contCharTokenType;
463 aStartChars = userDefinedCharactersStart;
464 aContChars = userDefinedCharactersCont;
465
466 // specials
467 if( xLocaleData.is() )
468 {
469 LocaleDataItem aItem =
470 xLocaleData->getLocaleItem( aParserLocale );
471 //!TODO: theoretically separators may be a string, adjustment would have to be
472 //! done here and in parsing and in ::rtl::math::stringToDouble()
473 cGroupSep = aItem.thousandSeparator.getStr()[0];
474 cDecimalSep = aItem.decimalSeparator.getStr()[0];
475 }
476
477 if ( cGroupSep < nDefCnt )
478 pTable[cGroupSep] |= TOKEN_VALUE;
479 if ( cDecimalSep < nDefCnt )
480 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
481
482 // Modify characters according to KParseTokens definitions.
483 {
484 using namespace KParseTokens;
485 sal_uInt8 i;
486
487 if ( !(nStartTypes & ASC_UPALPHA) )
488 for ( i = 65; i < 91; i++ )
489 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
490 if ( !(nContTypes & ASC_UPALPHA) )
491 for ( i = 65; i < 91; i++ )
492 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
493
494 if ( !(nStartTypes & ASC_LOALPHA) )
495 for ( i = 97; i < 123; i++ )
496 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
497 if ( !(nContTypes & ASC_LOALPHA) )
498 for ( i = 97; i < 123; i++ )
499 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
500
501 if ( nStartTypes & ASC_DIGIT )
502 for ( i = 48; i < 58; i++ )
503 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
504 if ( !(nContTypes & ASC_DIGIT) )
505 for ( i = 48; i < 58; i++ )
506 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
507
508 if ( !(nStartTypes & ASC_UNDERSCORE) )
509 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character
510 if ( !(nContTypes & ASC_UNDERSCORE) )
511 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character
512
513 if ( nStartTypes & ASC_DOLLAR )
514 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character
515 if ( nContTypes & ASC_DOLLAR )
516 pTable[36] |= TOKEN_WORD; // allowed as cont character
517
518 if ( nStartTypes & ASC_DOT )
519 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character
520 if ( nContTypes & ASC_DOT )
521 pTable[46] |= TOKEN_WORD; // allowed as cont character
522
523 if ( nStartTypes & ASC_COLON )
524 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character
525 if ( nContTypes & ASC_COLON )
526 pTable[58] |= TOKEN_WORD; // allowed as cont character
527
528 if ( nStartTypes & ASC_CONTROL )
529 for ( i = 1; i < 32; i++ )
530 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
531 if ( nContTypes & ASC_CONTROL )
532 for ( i = 1; i < 32; i++ )
533 pTable[i] |= TOKEN_WORD; // allowed as cont character
534
535 if ( nStartTypes & ASC_ANY_BUT_CONTROL )
536 for ( i = 32; i < nDefCnt; i++ )
537 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
538 if ( nContTypes & ASC_ANY_BUT_CONTROL )
539 for ( i = 32; i < nDefCnt; i++ )
540 pTable[i] |= TOKEN_WORD; // allowed as cont character
541
542 }
543
544 // Merge in (positively override with) user defined characters.
545 // StartChars
546 sal_Int32 nLen = aStartChars.getLength();
547 if ( nLen )
548 {
549 if ( !pStart )
550 pStart = new UPT_FLAG_TYPE[ nLen ];
551 const sal_Unicode* p = aStartChars.getStr();
552 for ( sal_Int32 j=0; j<nLen; j++, p++ )
553 {
554 pStart[j] = TOKEN_CHAR_WORD;
555 if ( *p < nDefCnt )
556 pTable[*p] |= TOKEN_CHAR_WORD;
557 }
558 }
559 // ContChars
560 nLen = aContChars.getLength();
561 if ( nLen )
562 {
563 if ( !pCont )
564 pCont = new UPT_FLAG_TYPE[ nLen ];
565 const sal_Unicode* p = aContChars.getStr();
566 for ( sal_Int32 j=0; j<nLen; j++ )
567 {
568 pCont[j] = TOKEN_WORD;
569 if ( *p < nDefCnt )
570 pTable[*p] |= TOKEN_WORD;
571 }
572 }
573 }
574
575
destroyParserTable()576 void cclass_Unicode::destroyParserTable()
577 {
578 if ( pCont )
579 delete [] pCont;
580 if ( pStart )
581 delete [] pStart;
582 if ( pTable )
583 delete [] pTable;
584 }
585
586
getFlags(const sal_Unicode * aStr,sal_Int32 nPos)587 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
588 {
589 UPT_FLAG_TYPE nMask;
590 sal_Unicode c = aStr[nPos];
591 if ( c < nDefCnt )
592 nMask = pTable[ sal_uInt8(c) ];
593 else
594 nMask = getFlagsExtended( aStr, nPos );
595 switch ( eState )
596 {
597 case ssGetChar :
598 case ssRewindFromValue :
599 case ssIgnoreLeadingInRewind :
600 case ssGetWordFirstChar :
601 if ( !(nMask & TOKEN_CHAR_WORD) )
602 {
603 nMask |= getStartCharsFlags( c );
604 if ( nMask & TOKEN_CHAR_WORD )
605 nMask &= ~TOKEN_EXCLUDED;
606 }
607 break;
608 case ssGetValue :
609 case ssGetWord :
610 if ( !(nMask & TOKEN_WORD) )
611 {
612 nMask |= getContCharsFlags( c );
613 if ( nMask & TOKEN_WORD )
614 nMask &= ~TOKEN_EXCLUDED;
615 }
616 break;
617 default:
618 ; // other cases aren't needed, no compiler warning
619 }
620 return nMask;
621 }
622
623
getFlagsExtended(const sal_Unicode * aStr,sal_Int32 nPos)624 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
625 {
626 sal_Unicode c = aStr[nPos];
627 if ( c == cGroupSep )
628 return TOKEN_VALUE;
629 else if ( c == cDecimalSep )
630 return TOKEN_CHAR_VALUE | TOKEN_VALUE;
631 using namespace i18n;
632 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
633 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
634 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
635
636 //! all KParseTokens::UNI_... must be matched
637 switch ( u_charType( (sal_uInt32) c ) )
638 {
639 case U_UPPERCASE_LETTER :
640 return (nTypes & KParseTokens::UNI_UPALPHA) ?
641 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
642 TOKEN_ILLEGAL;
643 case U_LOWERCASE_LETTER :
644 return (nTypes & KParseTokens::UNI_LOALPHA) ?
645 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646 TOKEN_ILLEGAL;
647 case U_TITLECASE_LETTER :
648 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
649 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650 TOKEN_ILLEGAL;
651 case U_MODIFIER_LETTER :
652 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
653 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654 TOKEN_ILLEGAL;
655 case U_NON_SPACING_MARK :
656 case U_COMBINING_SPACING_MARK :
657 // Non_Spacing_Mark can't be a leading character,
658 // nor can a spacing combining mark.
659 if (bStart)
660 return TOKEN_ILLEGAL;
661 // fall through, treat it as Other_Letter.
662 case U_OTHER_LETTER :
663 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
664 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
665 TOKEN_ILLEGAL;
666 case U_DECIMAL_DIGIT_NUMBER :
667 return ((nTypes & KParseTokens::UNI_DIGIT) ?
668 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
670 case U_LETTER_NUMBER :
671 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
672 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674 case U_OTHER_NUMBER :
675 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
676 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678 case U_SPACE_SEPARATOR :
679 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
680 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
681 }
682
683 return TOKEN_ILLEGAL;
684 }
685
686
getStartCharsFlags(sal_Unicode c)687 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
688 {
689 if ( pStart )
690 {
691 const sal_Unicode* pStr = aStartChars.getStr();
692 const sal_Unicode* p = StrChr( pStr, c );
693 if ( p )
694 return pStart[ p - pStr ];
695 }
696 return TOKEN_ILLEGAL;
697 }
698
699
getContCharsFlags(sal_Unicode c)700 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
701 {
702 if ( pCont )
703 {
704 const sal_Unicode* pStr = aContChars.getStr();
705 const sal_Unicode* p = StrChr( pStr, c );
706 if ( p )
707 return pCont[ p - pStr ];
708 }
709 return TOKEN_ILLEGAL;
710 }
711
712
parseText(ParseResult & r,const OUString & rText,sal_Int32 nPos,sal_Int32 nTokenType)713 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
714 {
715 using namespace i18n;
716 const sal_Unicode* const pTextStart = rText.getStr() + nPos;
717 eState = ssGetChar;
718
719 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
720 const sal_Unicode* pSym = pTextStart;
721 const sal_Unicode* pSrc = pSym;
722 OUString aSymbol;
723 sal_Unicode c = *pSrc;
724 sal_Unicode cLast = 0;
725 int nDecSeps = 0;
726 bool bQuote = false;
727 bool bMightBeWord = true;
728 bool bMightBeWordLast = true;
729 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
730
731 while ( (c != 0) && (eState != ssStop) )
732 {
733 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
734 if ( nMask & TOKEN_EXCLUDED )
735 eState = ssBounce;
736 if ( bMightBeWord )
737 { // only relevant for ssGetValue fall back
738 if ( eState == ssGetChar || eState == ssRewindFromValue ||
739 eState == ssIgnoreLeadingInRewind )
740 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
741 else
742 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
743 }
744 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
745 pSrc++;
746 switch (eState)
747 {
748 case ssGetChar :
749 case ssRewindFromValue :
750 case ssIgnoreLeadingInRewind :
751 {
752 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
753 && eState != ssIgnoreLeadingInRewind )
754 { //! must be first, may fall back to ssGetWord via bMightBeWord
755 eState = ssGetValue;
756 if ( nMask & TOKEN_VALUE_DIGIT )
757 {
758 if ( 128 <= c )
759 r.TokenType = KParseType::UNI_NUMBER;
760 else
761 r.TokenType = KParseType::ASC_NUMBER;
762 }
763 else if ( c == cDecimalSep )
764 {
765 if ( *pSrc )
766 ++nDecSeps;
767 else
768 eState = ssRewindFromValue;
769 // retry for ONE_SINGLE_CHAR or others
770 }
771 }
772 else if ( nMask & TOKEN_CHAR_WORD )
773 {
774 eState = ssGetWord;
775 r.TokenType = KParseType::IDENTNAME;
776 }
777 else if ( nMask & TOKEN_NAME_SEP )
778 {
779 eState = ssGetWordFirstChar;
780 bQuote = true;
781 pSym++;
782 nParseTokensType = 0; // will be taken of first real character
783 r.TokenType = KParseType::SINGLE_QUOTE_NAME;
784 }
785 else if ( nMask & TOKEN_CHAR_STRING )
786 {
787 eState = ssGetString;
788 pSym++;
789 nParseTokensType = 0; // will be taken of first real character
790 r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
791 }
792 else if ( nMask & TOKEN_CHAR_DONTCARE )
793 {
794 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
795 {
796 if (eState == ssRewindFromValue)
797 eState = ssIgnoreLeadingInRewind;
798 r.LeadingWhiteSpace++;
799 pSym++;
800 nParseTokensType = 0; // wait until real character
801 bMightBeWord = true;
802 }
803 else
804 eState = ssBounce;
805 }
806 else if ( nMask & TOKEN_CHAR_BOOL )
807 {
808 eState = ssGetBool;
809 r.TokenType = KParseType::BOOLEAN;
810 }
811 else if ( nMask & TOKEN_CHAR )
812 { //! must be last
813 eState = ssStop;
814 r.TokenType = KParseType::ONE_SINGLE_CHAR;
815 }
816 else
817 eState = ssBounce; // not known
818 }
819 break;
820 case ssGetValue :
821 {
822 if ( nMask & TOKEN_VALUE_DIGIT )
823 {
824 if ( 128 <= c )
825 r.TokenType = KParseType::UNI_NUMBER;
826 else if ( r.TokenType != KParseType::UNI_NUMBER )
827 r.TokenType = KParseType::ASC_NUMBER;
828 }
829 if ( nMask & TOKEN_VALUE )
830 {
831 if ( c == cDecimalSep && ++nDecSeps > 1 )
832 {
833 if ( pSrc - pTextStart == 2 )
834 eState = ssRewindFromValue;
835 // consecutive separators
836 else
837 eState = ssStopBack;
838 }
839 // else keep it going
840 }
841 else if ( c == 'E' || c == 'e' )
842 {
843 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
844 if ( nNext & TOKEN_VALUE_EXP )
845 ; // keep it going
846 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
847 { // might be a numerical name (1.2efg)
848 eState = ssGetWord;
849 r.TokenType = KParseType::IDENTNAME;
850 }
851 else
852 eState = ssStopBack;
853 }
854 else if ( nMask & TOKEN_VALUE_SIGN )
855 {
856 if ( (cLast == 'E') || (cLast == 'e') )
857 {
858 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
859 if ( nNext & TOKEN_VALUE_EXP_VALUE )
860 ; // keep it going
861 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
862 { // might be a numerical name (1.2e+fg)
863 eState = ssGetWord;
864 r.TokenType = KParseType::IDENTNAME;
865 }
866 else
867 eState = ssStopBack;
868 }
869 else if ( bMightBeWord )
870 { // might be a numerical name (1.2+fg)
871 eState = ssGetWord;
872 r.TokenType = KParseType::IDENTNAME;
873 }
874 else
875 eState = ssStopBack;
876 }
877 else if ( bMightBeWord && (nMask & TOKEN_WORD) )
878 { // might be a numerical name (1995.A1)
879 eState = ssGetWord;
880 r.TokenType = KParseType::IDENTNAME;
881 }
882 else
883 eState = ssStopBack;
884 }
885 break;
886 case ssGetWordFirstChar :
887 eState = ssGetWord;
888 // fall thru
889 case ssGetWord :
890 {
891 if ( nMask & TOKEN_WORD )
892 ; // keep it going
893 else if ( nMask & TOKEN_NAME_SEP )
894 {
895 if ( bQuote )
896 {
897 if ( cLast == '\\' )
898 { // escaped
899 aSymbol += OUString( pSym, pSrc - pSym - 2 );
900 aSymbol += OUString( &c, 1);
901 }
902 else
903 {
904 eState = ssStop;
905 aSymbol += OUString( pSym, pSrc - pSym - 1 );
906 }
907 pSym = pSrc;
908 }
909 else
910 eState = ssStopBack;
911 }
912 else if ( bQuote )
913 ; // keep it going
914 else
915 eState = ssStopBack;
916 }
917 break;
918 case ssGetString :
919 {
920 if ( nMask & TOKEN_STRING_SEP )
921 {
922 if ( cLast == '\\' )
923 { // escaped
924 aSymbol += OUString( pSym, pSrc - pSym - 2 );
925 aSymbol += OUString( &c, 1);
926 }
927 else if ( c == *pSrc &&
928 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
929 { // "" => literal " escaped
930 aSymbol += OUString( pSym, pSrc - pSym );
931 pSrc++;
932 }
933 else
934 {
935 eState = ssStop;
936 aSymbol += OUString( pSym, pSrc - pSym - 1 );
937 }
938 pSym = pSrc;
939 }
940 }
941 break;
942 case ssGetBool :
943 {
944 if ( (nMask & TOKEN_BOOL) )
945 eState = ssStop; // maximum 2: <, >, <>, <=, >=
946 else
947 eState = ssStopBack;
948 }
949 break;
950 case ssStopBack :
951 case ssBounce :
952 case ssStop :
953 ; // nothing, no compiler warning
954 break;
955 }
956 if ( eState == ssRewindFromValue )
957 {
958 r = ParseResult();
959 pSym = pTextStart;
960 pSrc = pSym;
961 aSymbol = OUString();
962 c = *pSrc;
963 cLast = 0;
964 nDecSeps = 0;
965 bQuote = false;
966 bMightBeWord = true;
967 bMightBeWordLast = true;
968 }
969 else
970 {
971 if ( !(r.TokenType & nTokenType) )
972 {
973 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
974 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
975 ; // keep a number that might be a word
976 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
977 ; // keep ignored white space
978 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
979 ; // keep uncertain value
980 else
981 eState = ssBounce;
982 }
983 if ( eState == ssBounce )
984 {
985 r.TokenType = 0;
986 eState = ssStopBack;
987 }
988 if ( eState == ssStopBack )
989 { // put back
990 pSrc--;
991 bMightBeWord = bMightBeWordLast;
992 eState = ssStop;
993 }
994 if ( eState != ssStop )
995 {
996 if ( !r.StartFlags )
997 r.StartFlags |= nParseTokensType;
998 else
999 r.ContFlags |= nParseTokensType;
1000 }
1001 bMightBeWordLast = bMightBeWord;
1002 cLast = c;
1003 c = *pSrc;
1004 }
1005 }
1006 // r.CharLen is the length in characters (not code points) of the parsed
1007 // token not including any leading white space, change this calculation if
1008 // multi-code-point Unicode characters are to be supported.
1009 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1010 r.EndPos = nPos + (pSrc - pTextStart);
1011 if ( r.TokenType & KParseType::ASC_NUMBER )
1012 {
1013 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1014 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1015 if ( bMightBeWord )
1016 r.TokenType |= KParseType::IDENTNAME;
1017 }
1018 else if ( r.TokenType & KParseType::UNI_NUMBER )
1019 {
1020 if ( !xNatNumSup.is() )
1021 {
1022 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1023 if ( xMSF.is() )
1024 {
1025 xNatNumSup = Reference< XNativeNumberSupplier > (
1026 xMSF->createInstance( OUString(
1027 RTL_CONSTASCII_USTRINGPARAM(
1028 NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1029 UNO_QUERY );
1030 }
1031 if ( !xNatNumSup.is() )
1032 {
1033 throw RuntimeException( OUString(
1034 #ifdef DBG_UTIL
1035 RTL_CONSTASCII_USTRINGPARAM(
1036 "cclass_Unicode::parseText: can't instanciate "
1037 NATIVENUMBERSUPPLIER_SERVICENAME )
1038 #endif
1039 ), *this );
1040 }
1041 #undef NATIVENUMBERSUPPLIER_SERVICENAME
1042 }
1043 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1044 r.LeadingWhiteSpace );
1045 // transliterate to ASCII
1046 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1047 NativeNumberMode::NATNUM0 );
1048 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1049 if ( bMightBeWord )
1050 r.TokenType |= KParseType::IDENTNAME;
1051 }
1052 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1053 {
1054 if ( pSym < pSrc )
1055 { //! open quote
1056 aSymbol += OUString( pSym, pSrc - pSym );
1057 r.TokenType |= KParseType::MISSING_QUOTE;
1058 }
1059 r.DequotedNameOrString = aSymbol;
1060 }
1061 }
1062
1063 } } } }
1064