1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 
27 #include <cclass_unicode.hxx>
28 #include <unicode/uchar.h>
29 #include <rtl/math.hxx>
30 #include <rtl/ustring.hxx>
31 #include <com/sun/star/i18n/KParseTokens.hpp>
32 #include <com/sun/star/i18n/KParseType.hpp>
33 #include <com/sun/star/i18n/UnicodeType.hpp>
34 #include <com/sun/star/i18n/XLocaleData.hpp>
35 #include <com/sun/star/i18n/NativeNumberMode.hpp>
36 
37 #include <string.h>		// memcpy()
38 
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::lang;
41 using namespace ::rtl;
42 
43 namespace com { namespace sun { namespace star { namespace i18n {
44 
45 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
46 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
47 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
48 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;
64 
65 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
66 
67 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
68 
69 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
70 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
71 {
72 // (...) == Calc formula compiler specific, commented out and modified
73 
74 	/* \0 */	TOKEN_EXCLUDED,
75 				TOKEN_ILLEGAL,
76 				TOKEN_ILLEGAL,
77 				TOKEN_ILLEGAL,
78 				TOKEN_ILLEGAL,
79 				TOKEN_ILLEGAL,
80 				TOKEN_ILLEGAL,
81 				TOKEN_ILLEGAL,
82 				TOKEN_ILLEGAL,
83 	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
84 				TOKEN_ILLEGAL,
85 	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
86 				TOKEN_ILLEGAL,
87 				TOKEN_ILLEGAL,
88 				TOKEN_ILLEGAL,
89 				TOKEN_ILLEGAL,
90 				TOKEN_ILLEGAL,
91 				TOKEN_ILLEGAL,
92 				TOKEN_ILLEGAL,
93 				TOKEN_ILLEGAL,
94 				TOKEN_ILLEGAL,
95 				TOKEN_ILLEGAL,
96 				TOKEN_ILLEGAL,
97 				TOKEN_ILLEGAL,
98 				TOKEN_ILLEGAL,
99 				TOKEN_ILLEGAL,
100 				TOKEN_ILLEGAL,
101 				TOKEN_ILLEGAL,
102 				TOKEN_ILLEGAL,
103 				TOKEN_ILLEGAL,
104 				TOKEN_ILLEGAL,
105 				TOKEN_ILLEGAL,
106 	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
107 	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
108 	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
109 	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
110 	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
111 	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
112 	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
113 	/*  39 ' */	TOKEN_NAME_SEP,
114 	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
115 	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
116 	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117 	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
118 	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
119 	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
120 	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
121 	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
122 	//for ( i = 48; i < 58; i++ )
123 	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124 	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125 	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126 	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127 	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128 	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129 	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130 	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131 	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132 	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133 	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
134 	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
135 	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
136 	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
137 	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
138 	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
139 	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
140 	//for ( i = 65; i < 91; i++ )
141 	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
142 	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
143 	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
144 	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
145 	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
146 	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
147 	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
148 	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
149 	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
150 	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
151 	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
152 	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
153 	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
154 	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
155 	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
156 	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
157 	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
158 	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
159 	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
160 	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
161 	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
162 	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
163 	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
164 	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
165 	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
166 	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
167 	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
168 	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
169 	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
170 	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
171 	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
172 	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
173 	//for ( i = 97; i < 123; i++ )
174 	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
175 	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
176 	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
177 	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
178 	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
179 	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
180 	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
181 	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
182 	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
183 	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
184 	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
185 	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
186 	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
187 	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
188 	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
189 	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
190 	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
191 	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
192 	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
193 	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
194 	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
195 	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
196 	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
197 	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
198 	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
199 	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
200 	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
201 	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
202 	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
203 	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
204 	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
205 };
206 
207 
208 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
209 {
210 	/* \0 */	KParseTokens::ASC_OTHER,
211 				KParseTokens::ASC_CONTROL,
212 				KParseTokens::ASC_CONTROL,
213 				KParseTokens::ASC_CONTROL,
214 				KParseTokens::ASC_CONTROL,
215 				KParseTokens::ASC_CONTROL,
216 				KParseTokens::ASC_CONTROL,
217 				KParseTokens::ASC_CONTROL,
218 				KParseTokens::ASC_CONTROL,
219 	/*  9 \t */	KParseTokens::ASC_CONTROL,
220 				KParseTokens::ASC_CONTROL,
221 	/* 11 \v */	KParseTokens::ASC_CONTROL,
222 				KParseTokens::ASC_CONTROL,
223 				KParseTokens::ASC_CONTROL,
224 				KParseTokens::ASC_CONTROL,
225 				KParseTokens::ASC_CONTROL,
226 				KParseTokens::ASC_CONTROL,
227 				KParseTokens::ASC_CONTROL,
228 				KParseTokens::ASC_CONTROL,
229 				KParseTokens::ASC_CONTROL,
230 				KParseTokens::ASC_CONTROL,
231 				KParseTokens::ASC_CONTROL,
232 				KParseTokens::ASC_CONTROL,
233 				KParseTokens::ASC_CONTROL,
234 				KParseTokens::ASC_CONTROL,
235 				KParseTokens::ASC_CONTROL,
236 				KParseTokens::ASC_CONTROL,
237 				KParseTokens::ASC_CONTROL,
238 				KParseTokens::ASC_CONTROL,
239 				KParseTokens::ASC_CONTROL,
240 				KParseTokens::ASC_CONTROL,
241 				KParseTokens::ASC_CONTROL,
242 	/*  32   */	KParseTokens::ASC_OTHER,
243 	/*  33 ! */	KParseTokens::ASC_OTHER,
244 	/*  34 " */	KParseTokens::ASC_OTHER,
245 	/*  35 # */	KParseTokens::ASC_OTHER,
246 	/*  36 $ */	KParseTokens::ASC_DOLLAR,
247 	/*  37 % */	KParseTokens::ASC_OTHER,
248 	/*  38 & */	KParseTokens::ASC_OTHER,
249 	/*  39 ' */	KParseTokens::ASC_OTHER,
250 	/*  40 ( */	KParseTokens::ASC_OTHER,
251 	/*  41 ) */	KParseTokens::ASC_OTHER,
252 	/*  42 * */	KParseTokens::ASC_OTHER,
253 	/*  43 + */	KParseTokens::ASC_OTHER,
254 	/*  44 , */	KParseTokens::ASC_OTHER,
255 	/*  45 - */	KParseTokens::ASC_OTHER,
256 	/*  46 . */	KParseTokens::ASC_DOT,
257 	/*  47 / */	KParseTokens::ASC_OTHER,
258 	//for ( i = 48; i < 58; i++ )
259 	/*  48 0 */	KParseTokens::ASC_DIGIT,
260 	/*  49 1 */	KParseTokens::ASC_DIGIT,
261 	/*  50 2 */	KParseTokens::ASC_DIGIT,
262 	/*  51 3 */	KParseTokens::ASC_DIGIT,
263 	/*  52 4 */	KParseTokens::ASC_DIGIT,
264 	/*  53 5 */	KParseTokens::ASC_DIGIT,
265 	/*  54 6 */	KParseTokens::ASC_DIGIT,
266 	/*  55 7 */	KParseTokens::ASC_DIGIT,
267 	/*  56 8 */	KParseTokens::ASC_DIGIT,
268 	/*  57 9 */	KParseTokens::ASC_DIGIT,
269 	/*  58 : */	KParseTokens::ASC_COLON,
270 	/*  59 ; */	KParseTokens::ASC_OTHER,
271 	/*  60 < */	KParseTokens::ASC_OTHER,
272 	/*  61 = */	KParseTokens::ASC_OTHER,
273 	/*  62 > */	KParseTokens::ASC_OTHER,
274 	/*  63 ? */	KParseTokens::ASC_OTHER,
275 	/*  64 @ */	KParseTokens::ASC_OTHER,
276 	//for ( i = 65; i < 91; i++ )
277 	/*  65 A */	KParseTokens::ASC_UPALPHA,
278 	/*  66 B */	KParseTokens::ASC_UPALPHA,
279 	/*  67 C */	KParseTokens::ASC_UPALPHA,
280 	/*  68 D */	KParseTokens::ASC_UPALPHA,
281 	/*  69 E */	KParseTokens::ASC_UPALPHA,
282 	/*  70 F */	KParseTokens::ASC_UPALPHA,
283 	/*  71 G */	KParseTokens::ASC_UPALPHA,
284 	/*  72 H */	KParseTokens::ASC_UPALPHA,
285 	/*  73 I */	KParseTokens::ASC_UPALPHA,
286 	/*  74 J */	KParseTokens::ASC_UPALPHA,
287 	/*  75 K */	KParseTokens::ASC_UPALPHA,
288 	/*  76 L */	KParseTokens::ASC_UPALPHA,
289 	/*  77 M */	KParseTokens::ASC_UPALPHA,
290 	/*  78 N */	KParseTokens::ASC_UPALPHA,
291 	/*  79 O */	KParseTokens::ASC_UPALPHA,
292 	/*  80 P */	KParseTokens::ASC_UPALPHA,
293 	/*  81 Q */	KParseTokens::ASC_UPALPHA,
294 	/*  82 R */	KParseTokens::ASC_UPALPHA,
295 	/*  83 S */	KParseTokens::ASC_UPALPHA,
296 	/*  84 T */	KParseTokens::ASC_UPALPHA,
297 	/*  85 U */	KParseTokens::ASC_UPALPHA,
298 	/*  86 V */	KParseTokens::ASC_UPALPHA,
299 	/*  87 W */	KParseTokens::ASC_UPALPHA,
300 	/*  88 X */	KParseTokens::ASC_UPALPHA,
301 	/*  89 Y */	KParseTokens::ASC_UPALPHA,
302 	/*  90 Z */	KParseTokens::ASC_UPALPHA,
303 	/*  91 [ */	KParseTokens::ASC_OTHER,
304 	/*  92 \ */	KParseTokens::ASC_OTHER,
305 	/*  93 ] */	KParseTokens::ASC_OTHER,
306 	/*  94 ^ */	KParseTokens::ASC_OTHER,
307 	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
308 	/*  96 ` */	KParseTokens::ASC_OTHER,
309 	//for ( i = 97; i < 123; i++ )
310 	/*  97 a */	KParseTokens::ASC_LOALPHA,
311 	/*  98 b */	KParseTokens::ASC_LOALPHA,
312 	/*  99 c */	KParseTokens::ASC_LOALPHA,
313 	/* 100 d */	KParseTokens::ASC_LOALPHA,
314 	/* 101 e */	KParseTokens::ASC_LOALPHA,
315 	/* 102 f */	KParseTokens::ASC_LOALPHA,
316 	/* 103 g */	KParseTokens::ASC_LOALPHA,
317 	/* 104 h */	KParseTokens::ASC_LOALPHA,
318 	/* 105 i */	KParseTokens::ASC_LOALPHA,
319 	/* 106 j */	KParseTokens::ASC_LOALPHA,
320 	/* 107 k */	KParseTokens::ASC_LOALPHA,
321 	/* 108 l */	KParseTokens::ASC_LOALPHA,
322 	/* 109 m */	KParseTokens::ASC_LOALPHA,
323 	/* 110 n */	KParseTokens::ASC_LOALPHA,
324 	/* 111 o */	KParseTokens::ASC_LOALPHA,
325 	/* 112 p */	KParseTokens::ASC_LOALPHA,
326 	/* 113 q */	KParseTokens::ASC_LOALPHA,
327 	/* 114 r */	KParseTokens::ASC_LOALPHA,
328 	/* 115 s */	KParseTokens::ASC_LOALPHA,
329 	/* 116 t */	KParseTokens::ASC_LOALPHA,
330 	/* 117 u */	KParseTokens::ASC_LOALPHA,
331 	/* 118 v */	KParseTokens::ASC_LOALPHA,
332 	/* 119 w */	KParseTokens::ASC_LOALPHA,
333 	/* 120 x */	KParseTokens::ASC_LOALPHA,
334 	/* 121 y */	KParseTokens::ASC_LOALPHA,
335 	/* 122 z */	KParseTokens::ASC_LOALPHA,
336 	/* 123 { */	KParseTokens::ASC_OTHER,
337 	/* 124 | */	KParseTokens::ASC_OTHER,
338 	/* 125 } */	KParseTokens::ASC_OTHER,
339 	/* 126 ~ */	KParseTokens::ASC_OTHER,
340 	/* 127   */	KParseTokens::ASC_OTHER
341 };
342 
343 
344 // static
StrChr(const sal_Unicode * pStr,sal_Unicode c)345 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
346 {
347 	if ( !pStr )
348 		return NULL;
349 	while ( *pStr )
350 	{
351 		if ( *pStr == c )
352 			return pStr;
353 		pStr++;
354 	}
355 	return NULL;
356 }
357 
358 
getParseTokensType(const sal_Unicode * aStr,sal_Int32 nPos)359 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
360 {
361 	sal_Unicode c = aStr[nPos];
362 	if ( c < nDefCnt )
363 		return pParseTokensType[ sal_uInt8(c) ];
364 	else
365 	{
366 
367 		//! all KParseTokens::UNI_... must be matched
368         switch ( u_charType( (sal_uInt32) c ) )
369 		{
370 			case U_UPPERCASE_LETTER :
371 				return KParseTokens::UNI_UPALPHA;
372 			case U_LOWERCASE_LETTER :
373 				return KParseTokens::UNI_LOALPHA;
374 			case U_TITLECASE_LETTER :
375 				return KParseTokens::UNI_TITLE_ALPHA;
376 			case U_MODIFIER_LETTER :
377 				return KParseTokens::UNI_MODIFIER_LETTER;
378 			case U_OTHER_LETTER :
379 				// Non_Spacing_Mark could not be as leading character
380 				if (nPos == 0) break;
381 				// fall through, treat it as Other_Letter.
382 			case U_NON_SPACING_MARK :
383 				return KParseTokens::UNI_OTHER_LETTER;
384 			case U_DECIMAL_DIGIT_NUMBER :
385 				return KParseTokens::UNI_DIGIT;
386 			case U_LETTER_NUMBER :
387 				return KParseTokens::UNI_LETTER_NUMBER;
388 			case U_OTHER_NUMBER :
389 				return KParseTokens::UNI_OTHER_NUMBER;
390 		}
391 
392 		return KParseTokens::UNI_OTHER;
393 	}
394 }
395 
setupInternational(const Locale & rLocale)396 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
397 {
398 	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
399 		|| aParserLocale.Country != rLocale.Country
400 		|| aParserLocale.Variant != rLocale.Variant);
401 	if ( bChanged )
402 	{
403 		aParserLocale.Language = rLocale.Language;
404 		aParserLocale.Country = rLocale.Country;
405 		aParserLocale.Variant = rLocale.Variant;
406 	}
407 	if ( !xLocaleData.is() && xMSF.is() )
408 	{
409 		Reference <
410 			XInterface > xI =
411 			xMSF->createInstance( OUString(
412 			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
413 		if ( xI.is() )
414 		{
415 			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
416 			x >>= xLocaleData;
417 		}
418 	}
419 	return bChanged;
420 }
421 
422 
setupParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)423 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
424             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
425             const OUString& userDefinedCharactersCont )
426 {
427 	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
428 		rLocale.Country == aParserLocale.Country &&
429 		rLocale.Variant == aParserLocale.Variant);
430 	if ( !pTable || !bIntlEqual ||
431 			startCharTokenType != nStartTypes ||
432 			contCharTokenType != nContTypes ||
433 			userDefinedCharactersStart != aStartChars ||
434 			userDefinedCharactersCont != aContChars )
435 		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
436 			contCharTokenType, userDefinedCharactersCont );
437 }
438 
439 
initParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)440 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
441             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
442             const OUString& userDefinedCharactersCont )
443 {
444 	// (Re)Init
445 	setupInternational( rLocale );
446 	// Memory of pTable is reused.
447 	if ( !pTable )
448 		pTable = new UPT_FLAG_TYPE[nDefCnt];
449 	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
450 	// Start and cont tables only need reallocation if different length.
451     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
452 	{
453 		delete [] pStart;
454 		pStart = NULL;
455 	}
456     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
457 	{
458 		delete [] pCont;
459 		pCont = NULL;
460 	}
461 	nStartTypes = startCharTokenType;
462 	nContTypes = contCharTokenType;
463 	aStartChars = userDefinedCharactersStart;
464 	aContChars = userDefinedCharactersCont;
465 
466 	// specials
467 	if( xLocaleData.is() )
468 	{
469 		LocaleDataItem aItem =
470 			xLocaleData->getLocaleItem( aParserLocale );
471 //!TODO: theoretically separators may be a string, adjustment would have to be
472 //! done here and in parsing and in ::rtl::math::stringToDouble()
473 		cGroupSep = aItem.thousandSeparator.getStr()[0];
474         cDecimalSep = aItem.decimalSeparator.getStr()[0];
475 	}
476 
477 	if ( cGroupSep < nDefCnt )
478 		pTable[cGroupSep] |= TOKEN_VALUE;
479 	if ( cDecimalSep < nDefCnt )
480 		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
481 
482 	// Modify characters according to KParseTokens definitions.
483 	{
484 		using namespace KParseTokens;
485 		sal_uInt8 i;
486 
487 		if ( !(nStartTypes & ASC_UPALPHA) )
488 			for ( i = 65; i < 91; i++ )
489 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
490 		if ( !(nContTypes & ASC_UPALPHA) )
491 			for ( i = 65; i < 91; i++ )
492 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
493 
494 		if ( !(nStartTypes & ASC_LOALPHA) )
495 			for ( i = 97; i < 123; i++ )
496 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
497 		if ( !(nContTypes & ASC_LOALPHA) )
498 			for ( i = 97; i < 123; i++ )
499 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
500 
501 		if ( nStartTypes & ASC_DIGIT )
502 			for ( i = 48; i < 58; i++ )
503 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
504 		if ( !(nContTypes & ASC_DIGIT) )
505 			for ( i = 48; i < 58; i++ )
506 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
507 
508 		if ( !(nStartTypes & ASC_UNDERSCORE) )
509 			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
510 		if ( !(nContTypes & ASC_UNDERSCORE) )
511 			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character
512 
513 		if ( nStartTypes & ASC_DOLLAR )
514 			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
515 		if ( nContTypes & ASC_DOLLAR )
516 			pTable[36] |= TOKEN_WORD;			// allowed as cont character
517 
518 		if ( nStartTypes & ASC_DOT )
519 			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
520 		if ( nContTypes & ASC_DOT )
521 			pTable[46] |= TOKEN_WORD;			// allowed as cont character
522 
523 		if ( nStartTypes & ASC_COLON )
524 			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
525 		if ( nContTypes & ASC_COLON )
526 			pTable[58] |= TOKEN_WORD;			// allowed as cont character
527 
528 		if ( nStartTypes & ASC_CONTROL )
529 			for ( i = 1; i < 32; i++ )
530 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
531 		if ( nContTypes & ASC_CONTROL )
532 			for ( i = 1; i < 32; i++ )
533 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
534 
535 		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
536 			for ( i = 32; i < nDefCnt; i++ )
537 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
538 		if ( nContTypes & ASC_ANY_BUT_CONTROL )
539 			for ( i = 32; i < nDefCnt; i++ )
540 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
541 
542 	}
543 
544 	// Merge in (positively override with) user defined characters.
545 	// StartChars
546     sal_Int32 nLen = aStartChars.getLength();
547 	if ( nLen )
548 	{
549 		if ( !pStart )
550 			pStart = new UPT_FLAG_TYPE[ nLen ];
551         const sal_Unicode* p = aStartChars.getStr();
552         for ( sal_Int32 j=0; j<nLen; j++, p++ )
553 		{
554 			pStart[j] = TOKEN_CHAR_WORD;
555             if ( *p < nDefCnt )
556                 pTable[*p] |= TOKEN_CHAR_WORD;
557 		}
558 	}
559 	// ContChars
560     nLen = aContChars.getLength();
561 	if ( nLen )
562 	{
563 		if ( !pCont )
564 			pCont = new UPT_FLAG_TYPE[ nLen ];
565         const sal_Unicode* p = aContChars.getStr();
566         for ( sal_Int32 j=0; j<nLen; j++ )
567 		{
568 			pCont[j] = TOKEN_WORD;
569             if ( *p < nDefCnt )
570                 pTable[*p] |= TOKEN_WORD;
571 		}
572 	}
573 }
574 
575 
destroyParserTable()576 void cclass_Unicode::destroyParserTable()
577 {
578 	if ( pCont )
579 		delete [] pCont;
580 	if ( pStart )
581 		delete [] pStart;
582 	if ( pTable )
583 		delete [] pTable;
584 }
585 
586 
getFlags(const sal_Unicode * aStr,sal_Int32 nPos)587 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
588 {
589 	UPT_FLAG_TYPE nMask;
590 	sal_Unicode c = aStr[nPos];
591 	if ( c < nDefCnt )
592 		nMask = pTable[ sal_uInt8(c) ];
593 	else
594 		nMask = getFlagsExtended( aStr, nPos );
595 	switch ( eState )
596 	{
597 		case ssGetChar :
598         case ssRewindFromValue :
599         case ssIgnoreLeadingInRewind :
600 		case ssGetWordFirstChar :
601 			if ( !(nMask & TOKEN_CHAR_WORD) )
602 			{
603 				nMask |= getStartCharsFlags( c );
604 				if ( nMask & TOKEN_CHAR_WORD )
605 					nMask &= ~TOKEN_EXCLUDED;
606 			}
607 		break;
608 		case ssGetValue :
609 		case ssGetWord :
610 			if ( !(nMask & TOKEN_WORD) )
611 			{
612 				nMask |= getContCharsFlags( c );
613 				if ( nMask & TOKEN_WORD )
614 					nMask &= ~TOKEN_EXCLUDED;
615 			}
616 		break;
617         default:
618             ;   // other cases aren't needed, no compiler warning
619 	}
620 	return nMask;
621 }
622 
623 
getFlagsExtended(const sal_Unicode * aStr,sal_Int32 nPos)624 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
625 {
626 	sal_Unicode c = aStr[nPos];
627 	if ( c == cGroupSep )
628 		return TOKEN_VALUE;
629 	else if ( c == cDecimalSep )
630 		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
631 	using namespace i18n;
632     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
633             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
634 	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
635 
636 	//! all KParseTokens::UNI_... must be matched
637     switch ( u_charType( (sal_uInt32) c ) )
638 	{
639 		case U_UPPERCASE_LETTER :
640 			return (nTypes & KParseTokens::UNI_UPALPHA) ?
641 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
642 				TOKEN_ILLEGAL;
643 		case U_LOWERCASE_LETTER :
644 			return (nTypes & KParseTokens::UNI_LOALPHA) ?
645 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646 				TOKEN_ILLEGAL;
647 		case U_TITLECASE_LETTER :
648 			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
649 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650 				TOKEN_ILLEGAL;
651 		case U_MODIFIER_LETTER :
652 			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
653 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654 				TOKEN_ILLEGAL;
655 		case U_NON_SPACING_MARK :
656         case U_COMBINING_SPACING_MARK :
657             // Non_Spacing_Mark can't be a leading character,
658             // nor can a spacing combining mark.
659             if (bStart)
660                 return TOKEN_ILLEGAL;
661 			// fall through, treat it as Other_Letter.
662 		case U_OTHER_LETTER :
663 			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
664 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
665 				TOKEN_ILLEGAL;
666 		case U_DECIMAL_DIGIT_NUMBER :
667 			return ((nTypes & KParseTokens::UNI_DIGIT) ?
668 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
670 		case U_LETTER_NUMBER :
671 			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
672 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674 		case U_OTHER_NUMBER :
675 			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
676 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678 		case U_SPACE_SEPARATOR :
679 			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
680 				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
681 	}
682 
683 	return TOKEN_ILLEGAL;
684 }
685 
686 
getStartCharsFlags(sal_Unicode c)687 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
688 {
689 	if ( pStart )
690 	{
691         const sal_Unicode* pStr = aStartChars.getStr();
692 		const sal_Unicode* p = StrChr( pStr, c );
693 		if ( p )
694 			return pStart[ p - pStr ];
695 	}
696 	return TOKEN_ILLEGAL;
697 }
698 
699 
getContCharsFlags(sal_Unicode c)700 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
701 {
702 	if ( pCont )
703 	{
704         const sal_Unicode* pStr = aContChars.getStr();
705 		const sal_Unicode* p = StrChr( pStr, c );
706 		if ( p )
707 			return pCont[ p - pStr ];
708 	}
709 	return TOKEN_ILLEGAL;
710 }
711 
712 
parseText(ParseResult & r,const OUString & rText,sal_Int32 nPos,sal_Int32 nTokenType)713 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
714 {
715 	using namespace i18n;
716 	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
717 	eState = ssGetChar;
718 
719     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
720 	const sal_Unicode* pSym = pTextStart;
721 	const sal_Unicode* pSrc = pSym;
722 	OUString aSymbol;
723 	sal_Unicode c = *pSrc;
724 	sal_Unicode cLast = 0;
725     int nDecSeps = 0;
726 	bool bQuote = false;
727 	bool bMightBeWord = true;
728 	bool bMightBeWordLast = true;
729     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
730 
731 	while ( (c != 0) && (eState != ssStop) )
732 	{
733 		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
734 		if ( nMask & TOKEN_EXCLUDED )
735 			eState = ssBounce;
736 		if ( bMightBeWord )
737 		{	// only relevant for ssGetValue fall back
738 			if ( eState == ssGetChar || eState == ssRewindFromValue ||
739                     eState == ssIgnoreLeadingInRewind )
740 				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
741 			else
742 				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
743 		}
744 		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
745 		pSrc++;
746 		switch (eState)
747 		{
748 			case ssGetChar :
749             case ssRewindFromValue :
750             case ssIgnoreLeadingInRewind :
751 			{
752                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
753                         && eState != ssIgnoreLeadingInRewind )
754 				{	//! must be first, may fall back to ssGetWord via bMightBeWord
755 					eState = ssGetValue;
756 					if ( nMask & TOKEN_VALUE_DIGIT )
757                     {
758                         if ( 128 <= c )
759                             r.TokenType = KParseType::UNI_NUMBER;
760                         else
761                             r.TokenType = KParseType::ASC_NUMBER;
762                     }
763                     else if ( c == cDecimalSep )
764                     {
765                         if ( *pSrc )
766                             ++nDecSeps;
767                         else
768                             eState = ssRewindFromValue;
769                             // retry for ONE_SINGLE_CHAR or others
770                     }
771 				}
772 				else if ( nMask & TOKEN_CHAR_WORD )
773 				{
774 					eState = ssGetWord;
775 					r.TokenType = KParseType::IDENTNAME;
776 				}
777 				else if ( nMask & TOKEN_NAME_SEP )
778 				{
779 					eState = ssGetWordFirstChar;
780 					bQuote = true;
781 					pSym++;
782 					nParseTokensType = 0;	// will be taken of first real character
783 					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
784 				}
785 				else if ( nMask & TOKEN_CHAR_STRING )
786 				{
787 					eState = ssGetString;
788 					pSym++;
789 					nParseTokensType = 0;	// will be taken of first real character
790 					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
791 				}
792 				else if ( nMask & TOKEN_CHAR_DONTCARE )
793 				{
794 					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
795 					{
796                         if (eState == ssRewindFromValue)
797                             eState = ssIgnoreLeadingInRewind;
798 						r.LeadingWhiteSpace++;
799 						pSym++;
800 						nParseTokensType = 0;	// wait until real character
801 						bMightBeWord = true;
802 					}
803 					else
804 						eState = ssBounce;
805 				}
806 				else if ( nMask & TOKEN_CHAR_BOOL )
807 				{
808 					eState = ssGetBool;
809 					r.TokenType = KParseType::BOOLEAN;
810 				}
811 				else if ( nMask & TOKEN_CHAR )
812 				{	//! must be last
813 					eState = ssStop;
814 					r.TokenType = KParseType::ONE_SINGLE_CHAR;
815 				}
816 				else
817 					eState = ssBounce;		// not known
818 			}
819 			break;
820 			case ssGetValue :
821 			{
822                 if ( nMask & TOKEN_VALUE_DIGIT )
823                 {
824                     if ( 128 <= c )
825                         r.TokenType = KParseType::UNI_NUMBER;
826                     else if ( r.TokenType != KParseType::UNI_NUMBER )
827                         r.TokenType = KParseType::ASC_NUMBER;
828                 }
829                 if ( nMask & TOKEN_VALUE )
830                 {
831                     if ( c == cDecimalSep && ++nDecSeps > 1 )
832                     {
833                         if ( pSrc - pTextStart == 2 )
834                             eState = ssRewindFromValue;
835                             // consecutive separators
836                         else
837                             eState = ssStopBack;
838                     }
839                     // else keep it going
840                 }
841 				else if ( c == 'E' || c == 'e' )
842 				{
843 					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
844 					if ( nNext & TOKEN_VALUE_EXP )
845 						;	// keep it going
846 					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
847 					{	// might be a numerical name (1.2efg)
848 						eState = ssGetWord;
849 						r.TokenType = KParseType::IDENTNAME;
850 					}
851 					else
852 						eState = ssStopBack;
853 				}
854 				else if ( nMask & TOKEN_VALUE_SIGN )
855 				{
856 					if ( (cLast == 'E') || (cLast == 'e') )
857 					{
858 						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
859 						if ( nNext & TOKEN_VALUE_EXP_VALUE )
860 							;	// keep it going
861 						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
862 						{	// might be a numerical name (1.2e+fg)
863 							eState = ssGetWord;
864 							r.TokenType = KParseType::IDENTNAME;
865 						}
866 						else
867 							eState = ssStopBack;
868 					}
869 					else if ( bMightBeWord )
870 					{	// might be a numerical name (1.2+fg)
871 						eState = ssGetWord;
872 						r.TokenType = KParseType::IDENTNAME;
873 					}
874 					else
875 						eState = ssStopBack;
876 				}
877 				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
878 				{	// might be a numerical name (1995.A1)
879 					eState = ssGetWord;
880 					r.TokenType = KParseType::IDENTNAME;
881 				}
882 				else
883 					eState = ssStopBack;
884 			}
885 			break;
886 			case ssGetWordFirstChar :
887 				eState = ssGetWord;
888 				// fall thru
889 			case ssGetWord :
890 			{
891 				if ( nMask & TOKEN_WORD )
892 					;	// keep it going
893 				else if ( nMask & TOKEN_NAME_SEP )
894 				{
895 					if ( bQuote )
896 					{
897 						if ( cLast == '\\' )
898 						{	// escaped
899 							aSymbol += OUString( pSym, pSrc - pSym - 2 );
900 							aSymbol += OUString( &c, 1);
901 						}
902 						else
903 						{
904 							eState = ssStop;
905 							aSymbol += OUString( pSym, pSrc - pSym - 1 );
906 						}
907 						pSym = pSrc;
908 					}
909 					else
910 						eState = ssStopBack;
911 				}
912 				else if ( bQuote )
913 					;	// keep it going
914 				else
915 					eState = ssStopBack;
916 			}
917 			break;
918 			case ssGetString :
919 			{
920 				if ( nMask & TOKEN_STRING_SEP )
921 				{
922 					if ( cLast == '\\' )
923 					{	// escaped
924 						aSymbol += OUString( pSym, pSrc - pSym - 2 );
925 						aSymbol += OUString( &c, 1);
926 					}
927                     else if ( c == *pSrc &&
928                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
929 					{	// "" => literal " escaped
930 						aSymbol += OUString( pSym, pSrc - pSym );
931 						pSrc++;
932 					}
933 					else
934 					{
935 						eState = ssStop;
936 						aSymbol += OUString( pSym, pSrc - pSym - 1 );
937 					}
938 					pSym = pSrc;
939 				}
940 			}
941 			break;
942 			case ssGetBool :
943 			{
944 				if ( (nMask & TOKEN_BOOL) )
945 					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
946 				else
947 					eState = ssStopBack;
948 			}
949 			break;
950             case ssStopBack :
951             case ssBounce :
952             case ssStop :
953                 ;   // nothing, no compiler warning
954             break;
955 		}
956         if ( eState == ssRewindFromValue )
957         {
958             r = ParseResult();
959             pSym = pTextStart;
960             pSrc = pSym;
961             aSymbol = OUString();
962             c = *pSrc;
963             cLast = 0;
964             nDecSeps = 0;
965             bQuote = false;
966             bMightBeWord = true;
967             bMightBeWordLast = true;
968         }
969         else
970         {
971             if ( !(r.TokenType & nTokenType) )
972             {
973                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
974                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
975                     ;	// keep a number that might be a word
976                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
977                     ;	// keep ignored white space
978                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
979                     ;   // keep uncertain value
980                 else
981                     eState = ssBounce;
982             }
983             if ( eState == ssBounce )
984             {
985                 r.TokenType = 0;
986                 eState = ssStopBack;
987             }
988             if ( eState == ssStopBack )
989             {	// put back
990                 pSrc--;
991                 bMightBeWord = bMightBeWordLast;
992                 eState = ssStop;
993             }
994             if ( eState != ssStop )
995             {
996                 if ( !r.StartFlags )
997                     r.StartFlags |= nParseTokensType;
998                 else
999                     r.ContFlags |= nParseTokensType;
1000             }
1001             bMightBeWordLast = bMightBeWord;
1002             cLast = c;
1003             c = *pSrc;
1004         }
1005 	}
1006 	// r.CharLen is the length in characters (not code points) of the parsed
1007 	// token not including any leading white space, change this calculation if
1008 	// multi-code-point Unicode characters are to be supported.
1009 	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1010 	r.EndPos = nPos + (pSrc - pTextStart);
1011 	if ( r.TokenType & KParseType::ASC_NUMBER )
1012 	{
1013         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1014                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1015 		if ( bMightBeWord )
1016 			r.TokenType |= KParseType::IDENTNAME;
1017 	}
1018 	else if ( r.TokenType & KParseType::UNI_NUMBER )
1019 	{
1020         if ( !xNatNumSup.is() )
1021         {
1022 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1023             if ( xMSF.is() )
1024             {
1025                 xNatNumSup = Reference< XNativeNumberSupplier > (
1026                         xMSF->createInstance( OUString(
1027                                 RTL_CONSTASCII_USTRINGPARAM(
1028                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1029                         UNO_QUERY );
1030             }
1031             if ( !xNatNumSup.is() )
1032             {
1033                 throw RuntimeException( OUString(
1034 #ifdef DBG_UTIL
1035                     RTL_CONSTASCII_USTRINGPARAM(
1036                         "cclass_Unicode::parseText: can't instanciate "
1037                         NATIVENUMBERSUPPLIER_SERVICENAME )
1038 #endif
1039                     ), *this );
1040             }
1041 #undef NATIVENUMBERSUPPLIER_SERVICENAME
1042         }
1043         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1044                 r.LeadingWhiteSpace );
1045         // transliterate to ASCII
1046         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1047                 NativeNumberMode::NATNUM0 );
1048         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1049 		if ( bMightBeWord )
1050 			r.TokenType |= KParseType::IDENTNAME;
1051 	}
1052 	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1053 	{
1054 		if ( pSym < pSrc )
1055 		{	//! open quote
1056 			aSymbol += OUString( pSym, pSrc - pSym );
1057 			r.TokenType |= KParseType::MISSING_QUOTE;
1058 		}
1059 		r.DequotedNameOrString = aSymbol;
1060 	}
1061 }
1062 
1063 } } } }
1064