1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include <cclass_unicode.hxx>
32 #include <unicode/uchar.h>
33 #include <rtl/math.hxx>
34 #include <rtl/ustring.hxx>
35 #include <com/sun/star/i18n/KParseTokens.hpp>
36 #include <com/sun/star/i18n/KParseType.hpp>
37 #include <com/sun/star/i18n/UnicodeType.hpp>
38 #include <com/sun/star/i18n/XLocaleData.hpp>
39 #include <com/sun/star/i18n/NativeNumberMode.hpp>
40 
41 #include <string.h>		// memcpy()
42 
43 using namespace ::com::sun::star::uno;
44 using namespace ::com::sun::star::lang;
45 using namespace ::rtl;
46 
47 namespace com { namespace sun { namespace star { namespace i18n {
48 
49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
64 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
65 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
66 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
67 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;
68 
69 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
70 
71 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
72 
73 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
74 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
75 {
76 // (...) == Calc formula compiler specific, commented out and modified
77 
78 	/* \0 */	TOKEN_EXCLUDED,
79 				TOKEN_ILLEGAL,
80 				TOKEN_ILLEGAL,
81 				TOKEN_ILLEGAL,
82 				TOKEN_ILLEGAL,
83 				TOKEN_ILLEGAL,
84 				TOKEN_ILLEGAL,
85 				TOKEN_ILLEGAL,
86 				TOKEN_ILLEGAL,
87 	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
88 				TOKEN_ILLEGAL,
89 	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
90 				TOKEN_ILLEGAL,
91 				TOKEN_ILLEGAL,
92 				TOKEN_ILLEGAL,
93 				TOKEN_ILLEGAL,
94 				TOKEN_ILLEGAL,
95 				TOKEN_ILLEGAL,
96 				TOKEN_ILLEGAL,
97 				TOKEN_ILLEGAL,
98 				TOKEN_ILLEGAL,
99 				TOKEN_ILLEGAL,
100 				TOKEN_ILLEGAL,
101 				TOKEN_ILLEGAL,
102 				TOKEN_ILLEGAL,
103 				TOKEN_ILLEGAL,
104 				TOKEN_ILLEGAL,
105 				TOKEN_ILLEGAL,
106 				TOKEN_ILLEGAL,
107 				TOKEN_ILLEGAL,
108 				TOKEN_ILLEGAL,
109 				TOKEN_ILLEGAL,
110 	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
111 	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
112 	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
113 	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
114 	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
115 	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
116 	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117 	/*  39 ' */	TOKEN_NAME_SEP,
118 	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
119 	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
120 	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
121 	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
122 	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
123 	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
124 	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
125 	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
126 	//for ( i = 48; i < 58; i++ )
127 	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128 	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129 	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130 	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131 	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132 	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133 	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
134 	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
135 	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
136 	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
137 	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
138 	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
139 	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
140 	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
141 	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
142 	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
143 	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
144 	//for ( i = 65; i < 91; i++ )
145 	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
146 	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
147 	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
148 	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
149 	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
150 	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
151 	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
152 	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
153 	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
154 	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
155 	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
156 	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
157 	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
158 	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
159 	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
160 	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
161 	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
162 	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
163 	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
164 	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
165 	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
166 	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
167 	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
168 	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
169 	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
170 	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
171 	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
172 	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
173 	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
174 	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
175 	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
176 	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
177 	//for ( i = 97; i < 123; i++ )
178 	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
179 	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
180 	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
181 	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
182 	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
183 	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
184 	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
185 	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
186 	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
187 	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
188 	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
189 	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
190 	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
191 	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
192 	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
193 	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
194 	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
195 	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
196 	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
197 	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
198 	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
199 	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
200 	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
201 	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
202 	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
203 	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
204 	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
205 	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
206 	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
207 	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
208 	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
209 };
210 
211 
212 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
213 {
214 	/* \0 */	KParseTokens::ASC_OTHER,
215 				KParseTokens::ASC_CONTROL,
216 				KParseTokens::ASC_CONTROL,
217 				KParseTokens::ASC_CONTROL,
218 				KParseTokens::ASC_CONTROL,
219 				KParseTokens::ASC_CONTROL,
220 				KParseTokens::ASC_CONTROL,
221 				KParseTokens::ASC_CONTROL,
222 				KParseTokens::ASC_CONTROL,
223 	/*  9 \t */	KParseTokens::ASC_CONTROL,
224 				KParseTokens::ASC_CONTROL,
225 	/* 11 \v */	KParseTokens::ASC_CONTROL,
226 				KParseTokens::ASC_CONTROL,
227 				KParseTokens::ASC_CONTROL,
228 				KParseTokens::ASC_CONTROL,
229 				KParseTokens::ASC_CONTROL,
230 				KParseTokens::ASC_CONTROL,
231 				KParseTokens::ASC_CONTROL,
232 				KParseTokens::ASC_CONTROL,
233 				KParseTokens::ASC_CONTROL,
234 				KParseTokens::ASC_CONTROL,
235 				KParseTokens::ASC_CONTROL,
236 				KParseTokens::ASC_CONTROL,
237 				KParseTokens::ASC_CONTROL,
238 				KParseTokens::ASC_CONTROL,
239 				KParseTokens::ASC_CONTROL,
240 				KParseTokens::ASC_CONTROL,
241 				KParseTokens::ASC_CONTROL,
242 				KParseTokens::ASC_CONTROL,
243 				KParseTokens::ASC_CONTROL,
244 				KParseTokens::ASC_CONTROL,
245 				KParseTokens::ASC_CONTROL,
246 	/*  32   */	KParseTokens::ASC_OTHER,
247 	/*  33 ! */	KParseTokens::ASC_OTHER,
248 	/*  34 " */	KParseTokens::ASC_OTHER,
249 	/*  35 # */	KParseTokens::ASC_OTHER,
250 	/*  36 $ */	KParseTokens::ASC_DOLLAR,
251 	/*  37 % */	KParseTokens::ASC_OTHER,
252 	/*  38 & */	KParseTokens::ASC_OTHER,
253 	/*  39 ' */	KParseTokens::ASC_OTHER,
254 	/*  40 ( */	KParseTokens::ASC_OTHER,
255 	/*  41 ) */	KParseTokens::ASC_OTHER,
256 	/*  42 * */	KParseTokens::ASC_OTHER,
257 	/*  43 + */	KParseTokens::ASC_OTHER,
258 	/*  44 , */	KParseTokens::ASC_OTHER,
259 	/*  45 - */	KParseTokens::ASC_OTHER,
260 	/*  46 . */	KParseTokens::ASC_DOT,
261 	/*  47 / */	KParseTokens::ASC_OTHER,
262 	//for ( i = 48; i < 58; i++ )
263 	/*  48 0 */	KParseTokens::ASC_DIGIT,
264 	/*  49 1 */	KParseTokens::ASC_DIGIT,
265 	/*  50 2 */	KParseTokens::ASC_DIGIT,
266 	/*  51 3 */	KParseTokens::ASC_DIGIT,
267 	/*  52 4 */	KParseTokens::ASC_DIGIT,
268 	/*  53 5 */	KParseTokens::ASC_DIGIT,
269 	/*  54 6 */	KParseTokens::ASC_DIGIT,
270 	/*  55 7 */	KParseTokens::ASC_DIGIT,
271 	/*  56 8 */	KParseTokens::ASC_DIGIT,
272 	/*  57 9 */	KParseTokens::ASC_DIGIT,
273 	/*  58 : */	KParseTokens::ASC_COLON,
274 	/*  59 ; */	KParseTokens::ASC_OTHER,
275 	/*  60 < */	KParseTokens::ASC_OTHER,
276 	/*  61 = */	KParseTokens::ASC_OTHER,
277 	/*  62 > */	KParseTokens::ASC_OTHER,
278 	/*  63 ? */	KParseTokens::ASC_OTHER,
279 	/*  64 @ */	KParseTokens::ASC_OTHER,
280 	//for ( i = 65; i < 91; i++ )
281 	/*  65 A */	KParseTokens::ASC_UPALPHA,
282 	/*  66 B */	KParseTokens::ASC_UPALPHA,
283 	/*  67 C */	KParseTokens::ASC_UPALPHA,
284 	/*  68 D */	KParseTokens::ASC_UPALPHA,
285 	/*  69 E */	KParseTokens::ASC_UPALPHA,
286 	/*  70 F */	KParseTokens::ASC_UPALPHA,
287 	/*  71 G */	KParseTokens::ASC_UPALPHA,
288 	/*  72 H */	KParseTokens::ASC_UPALPHA,
289 	/*  73 I */	KParseTokens::ASC_UPALPHA,
290 	/*  74 J */	KParseTokens::ASC_UPALPHA,
291 	/*  75 K */	KParseTokens::ASC_UPALPHA,
292 	/*  76 L */	KParseTokens::ASC_UPALPHA,
293 	/*  77 M */	KParseTokens::ASC_UPALPHA,
294 	/*  78 N */	KParseTokens::ASC_UPALPHA,
295 	/*  79 O */	KParseTokens::ASC_UPALPHA,
296 	/*  80 P */	KParseTokens::ASC_UPALPHA,
297 	/*  81 Q */	KParseTokens::ASC_UPALPHA,
298 	/*  82 R */	KParseTokens::ASC_UPALPHA,
299 	/*  83 S */	KParseTokens::ASC_UPALPHA,
300 	/*  84 T */	KParseTokens::ASC_UPALPHA,
301 	/*  85 U */	KParseTokens::ASC_UPALPHA,
302 	/*  86 V */	KParseTokens::ASC_UPALPHA,
303 	/*  87 W */	KParseTokens::ASC_UPALPHA,
304 	/*  88 X */	KParseTokens::ASC_UPALPHA,
305 	/*  89 Y */	KParseTokens::ASC_UPALPHA,
306 	/*  90 Z */	KParseTokens::ASC_UPALPHA,
307 	/*  91 [ */	KParseTokens::ASC_OTHER,
308 	/*  92 \ */	KParseTokens::ASC_OTHER,
309 	/*  93 ] */	KParseTokens::ASC_OTHER,
310 	/*  94 ^ */	KParseTokens::ASC_OTHER,
311 	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
312 	/*  96 ` */	KParseTokens::ASC_OTHER,
313 	//for ( i = 97; i < 123; i++ )
314 	/*  97 a */	KParseTokens::ASC_LOALPHA,
315 	/*  98 b */	KParseTokens::ASC_LOALPHA,
316 	/*  99 c */	KParseTokens::ASC_LOALPHA,
317 	/* 100 d */	KParseTokens::ASC_LOALPHA,
318 	/* 101 e */	KParseTokens::ASC_LOALPHA,
319 	/* 102 f */	KParseTokens::ASC_LOALPHA,
320 	/* 103 g */	KParseTokens::ASC_LOALPHA,
321 	/* 104 h */	KParseTokens::ASC_LOALPHA,
322 	/* 105 i */	KParseTokens::ASC_LOALPHA,
323 	/* 106 j */	KParseTokens::ASC_LOALPHA,
324 	/* 107 k */	KParseTokens::ASC_LOALPHA,
325 	/* 108 l */	KParseTokens::ASC_LOALPHA,
326 	/* 109 m */	KParseTokens::ASC_LOALPHA,
327 	/* 110 n */	KParseTokens::ASC_LOALPHA,
328 	/* 111 o */	KParseTokens::ASC_LOALPHA,
329 	/* 112 p */	KParseTokens::ASC_LOALPHA,
330 	/* 113 q */	KParseTokens::ASC_LOALPHA,
331 	/* 114 r */	KParseTokens::ASC_LOALPHA,
332 	/* 115 s */	KParseTokens::ASC_LOALPHA,
333 	/* 116 t */	KParseTokens::ASC_LOALPHA,
334 	/* 117 u */	KParseTokens::ASC_LOALPHA,
335 	/* 118 v */	KParseTokens::ASC_LOALPHA,
336 	/* 119 w */	KParseTokens::ASC_LOALPHA,
337 	/* 120 x */	KParseTokens::ASC_LOALPHA,
338 	/* 121 y */	KParseTokens::ASC_LOALPHA,
339 	/* 122 z */	KParseTokens::ASC_LOALPHA,
340 	/* 123 { */	KParseTokens::ASC_OTHER,
341 	/* 124 | */	KParseTokens::ASC_OTHER,
342 	/* 125 } */	KParseTokens::ASC_OTHER,
343 	/* 126 ~ */	KParseTokens::ASC_OTHER,
344 	/* 127   */	KParseTokens::ASC_OTHER
345 };
346 
347 
348 // static
349 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
350 {
351 	if ( !pStr )
352 		return NULL;
353 	while ( *pStr )
354 	{
355 		if ( *pStr == c )
356 			return pStr;
357 		pStr++;
358 	}
359 	return NULL;
360 }
361 
362 
363 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
364 {
365 	sal_Unicode c = aStr[nPos];
366 	if ( c < nDefCnt )
367 		return pParseTokensType[ sal_uInt8(c) ];
368 	else
369 	{
370 
371 		//! all KParseTokens::UNI_... must be matched
372         switch ( u_charType( (sal_uInt32) c ) )
373 		{
374 			case U_UPPERCASE_LETTER :
375 				return KParseTokens::UNI_UPALPHA;
376 			case U_LOWERCASE_LETTER :
377 				return KParseTokens::UNI_LOALPHA;
378 			case U_TITLECASE_LETTER :
379 				return KParseTokens::UNI_TITLE_ALPHA;
380 			case U_MODIFIER_LETTER :
381 				return KParseTokens::UNI_MODIFIER_LETTER;
382 			case U_OTHER_LETTER :
383 				// Non_Spacing_Mark could not be as leading character
384 				if (nPos == 0) break;
385 				// fall through, treat it as Other_Letter.
386 			case U_NON_SPACING_MARK :
387 				return KParseTokens::UNI_OTHER_LETTER;
388 			case U_DECIMAL_DIGIT_NUMBER :
389 				return KParseTokens::UNI_DIGIT;
390 			case U_LETTER_NUMBER :
391 				return KParseTokens::UNI_LETTER_NUMBER;
392 			case U_OTHER_NUMBER :
393 				return KParseTokens::UNI_OTHER_NUMBER;
394 		}
395 
396 		return KParseTokens::UNI_OTHER;
397 	}
398 }
399 
400 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
401 {
402 	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
403 		|| aParserLocale.Country != rLocale.Country
404 		|| aParserLocale.Variant != rLocale.Variant);
405 	if ( bChanged )
406 	{
407 		aParserLocale.Language = rLocale.Language;
408 		aParserLocale.Country = rLocale.Country;
409 		aParserLocale.Variant = rLocale.Variant;
410 	}
411 	if ( !xLocaleData.is() && xMSF.is() )
412 	{
413 		Reference <
414 			XInterface > xI =
415 			xMSF->createInstance( OUString(
416 			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
417 		if ( xI.is() )
418 		{
419 			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
420 			x >>= xLocaleData;
421 		}
422 	}
423 	return bChanged;
424 }
425 
426 
427 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
428             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
429             const OUString& userDefinedCharactersCont )
430 {
431 	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
432 		rLocale.Country == aParserLocale.Country &&
433 		rLocale.Variant == aParserLocale.Variant);
434 	if ( !pTable || !bIntlEqual ||
435 			startCharTokenType != nStartTypes ||
436 			contCharTokenType != nContTypes ||
437 			userDefinedCharactersStart != aStartChars ||
438 			userDefinedCharactersCont != aContChars )
439 		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
440 			contCharTokenType, userDefinedCharactersCont );
441 }
442 
443 
444 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
445             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
446             const OUString& userDefinedCharactersCont )
447 {
448 	// (Re)Init
449 	setupInternational( rLocale );
450 	// Memory of pTable is reused.
451 	if ( !pTable )
452 		pTable = new UPT_FLAG_TYPE[nDefCnt];
453 	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
454 	// Start and cont tables only need reallocation if different length.
455     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
456 	{
457 		delete [] pStart;
458 		pStart = NULL;
459 	}
460     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
461 	{
462 		delete [] pCont;
463 		pCont = NULL;
464 	}
465 	nStartTypes = startCharTokenType;
466 	nContTypes = contCharTokenType;
467 	aStartChars = userDefinedCharactersStart;
468 	aContChars = userDefinedCharactersCont;
469 
470 	// specials
471 	if( xLocaleData.is() )
472 	{
473 		LocaleDataItem aItem =
474 			xLocaleData->getLocaleItem( aParserLocale );
475 //!TODO: theoretically separators may be a string, adjustment would have to be
476 //! done here and in parsing and in ::rtl::math::stringToDouble()
477 		cGroupSep = aItem.thousandSeparator.getStr()[0];
478         cDecimalSep = aItem.decimalSeparator.getStr()[0];
479 	}
480 
481 	if ( cGroupSep < nDefCnt )
482 		pTable[cGroupSep] |= TOKEN_VALUE;
483 	if ( cDecimalSep < nDefCnt )
484 		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
485 
486 	// Modify characters according to KParseTokens definitions.
487 	{
488 		using namespace KParseTokens;
489 		sal_uInt8 i;
490 
491 		if ( !(nStartTypes & ASC_UPALPHA) )
492 			for ( i = 65; i < 91; i++ )
493 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
494 		if ( !(nContTypes & ASC_UPALPHA) )
495 			for ( i = 65; i < 91; i++ )
496 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
497 
498 		if ( !(nStartTypes & ASC_LOALPHA) )
499 			for ( i = 97; i < 123; i++ )
500 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
501 		if ( !(nContTypes & ASC_LOALPHA) )
502 			for ( i = 97; i < 123; i++ )
503 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
504 
505 		if ( nStartTypes & ASC_DIGIT )
506 			for ( i = 48; i < 58; i++ )
507 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
508 		if ( !(nContTypes & ASC_DIGIT) )
509 			for ( i = 48; i < 58; i++ )
510 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
511 
512 		if ( !(nStartTypes & ASC_UNDERSCORE) )
513 			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
514 		if ( !(nContTypes & ASC_UNDERSCORE) )
515 			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character
516 
517 		if ( nStartTypes & ASC_DOLLAR )
518 			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
519 		if ( nContTypes & ASC_DOLLAR )
520 			pTable[36] |= TOKEN_WORD;			// allowed as cont character
521 
522 		if ( nStartTypes & ASC_DOT )
523 			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
524 		if ( nContTypes & ASC_DOT )
525 			pTable[46] |= TOKEN_WORD;			// allowed as cont character
526 
527 		if ( nStartTypes & ASC_COLON )
528 			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
529 		if ( nContTypes & ASC_COLON )
530 			pTable[58] |= TOKEN_WORD;			// allowed as cont character
531 
532 		if ( nStartTypes & ASC_CONTROL )
533 			for ( i = 1; i < 32; i++ )
534 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
535 		if ( nContTypes & ASC_CONTROL )
536 			for ( i = 1; i < 32; i++ )
537 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
538 
539 		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
540 			for ( i = 32; i < nDefCnt; i++ )
541 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
542 		if ( nContTypes & ASC_ANY_BUT_CONTROL )
543 			for ( i = 32; i < nDefCnt; i++ )
544 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
545 
546 	}
547 
548 	// Merge in (positively override with) user defined characters.
549 	// StartChars
550     sal_Int32 nLen = aStartChars.getLength();
551 	if ( nLen )
552 	{
553 		if ( !pStart )
554 			pStart = new UPT_FLAG_TYPE[ nLen ];
555         const sal_Unicode* p = aStartChars.getStr();
556         for ( sal_Int32 j=0; j<nLen; j++, p++ )
557 		{
558 			pStart[j] = TOKEN_CHAR_WORD;
559             if ( *p < nDefCnt )
560                 pTable[*p] |= TOKEN_CHAR_WORD;
561 		}
562 	}
563 	// ContChars
564     nLen = aContChars.getLength();
565 	if ( nLen )
566 	{
567 		if ( !pCont )
568 			pCont = new UPT_FLAG_TYPE[ nLen ];
569         const sal_Unicode* p = aContChars.getStr();
570         for ( sal_Int32 j=0; j<nLen; j++ )
571 		{
572 			pCont[j] = TOKEN_WORD;
573             if ( *p < nDefCnt )
574                 pTable[*p] |= TOKEN_WORD;
575 		}
576 	}
577 }
578 
579 
580 void cclass_Unicode::destroyParserTable()
581 {
582 	if ( pCont )
583 		delete [] pCont;
584 	if ( pStart )
585 		delete [] pStart;
586 	if ( pTable )
587 		delete [] pTable;
588 }
589 
590 
591 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
592 {
593 	UPT_FLAG_TYPE nMask;
594 	sal_Unicode c = aStr[nPos];
595 	if ( c < nDefCnt )
596 		nMask = pTable[ sal_uInt8(c) ];
597 	else
598 		nMask = getFlagsExtended( aStr, nPos );
599 	switch ( eState )
600 	{
601 		case ssGetChar :
602         case ssRewindFromValue :
603         case ssIgnoreLeadingInRewind :
604 		case ssGetWordFirstChar :
605 			if ( !(nMask & TOKEN_CHAR_WORD) )
606 			{
607 				nMask |= getStartCharsFlags( c );
608 				if ( nMask & TOKEN_CHAR_WORD )
609 					nMask &= ~TOKEN_EXCLUDED;
610 			}
611 		break;
612 		case ssGetValue :
613 		case ssGetWord :
614 			if ( !(nMask & TOKEN_WORD) )
615 			{
616 				nMask |= getContCharsFlags( c );
617 				if ( nMask & TOKEN_WORD )
618 					nMask &= ~TOKEN_EXCLUDED;
619 			}
620 		break;
621         default:
622             ;   // other cases aren't needed, no compiler warning
623 	}
624 	return nMask;
625 }
626 
627 
628 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
629 {
630 	sal_Unicode c = aStr[nPos];
631 	if ( c == cGroupSep )
632 		return TOKEN_VALUE;
633 	else if ( c == cDecimalSep )
634 		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
635 	using namespace i18n;
636     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
637             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
638 	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
639 
640 	//! all KParseTokens::UNI_... must be matched
641     switch ( u_charType( (sal_uInt32) c ) )
642 	{
643 		case U_UPPERCASE_LETTER :
644 			return (nTypes & KParseTokens::UNI_UPALPHA) ?
645 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646 				TOKEN_ILLEGAL;
647 		case U_LOWERCASE_LETTER :
648 			return (nTypes & KParseTokens::UNI_LOALPHA) ?
649 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650 				TOKEN_ILLEGAL;
651 		case U_TITLECASE_LETTER :
652 			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
653 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654 				TOKEN_ILLEGAL;
655 		case U_MODIFIER_LETTER :
656 			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
657 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
658 				TOKEN_ILLEGAL;
659 		case U_NON_SPACING_MARK :
660         case U_COMBINING_SPACING_MARK :
661             // Non_Spacing_Mark can't be a leading character,
662             // nor can a spacing combining mark.
663             if (bStart)
664                 return TOKEN_ILLEGAL;
665 			// fall through, treat it as Other_Letter.
666 		case U_OTHER_LETTER :
667 			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
668 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669 				TOKEN_ILLEGAL;
670 		case U_DECIMAL_DIGIT_NUMBER :
671 			return ((nTypes & KParseTokens::UNI_DIGIT) ?
672 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674 		case U_LETTER_NUMBER :
675 			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
676 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678 		case U_OTHER_NUMBER :
679 			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
680 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
681 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
682 		case U_SPACE_SEPARATOR :
683 			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
684 				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
685 	}
686 
687 	return TOKEN_ILLEGAL;
688 }
689 
690 
691 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
692 {
693 	if ( pStart )
694 	{
695         const sal_Unicode* pStr = aStartChars.getStr();
696 		const sal_Unicode* p = StrChr( pStr, c );
697 		if ( p )
698 			return pStart[ p - pStr ];
699 	}
700 	return TOKEN_ILLEGAL;
701 }
702 
703 
704 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
705 {
706 	if ( pCont )
707 	{
708         const sal_Unicode* pStr = aContChars.getStr();
709 		const sal_Unicode* p = StrChr( pStr, c );
710 		if ( p )
711 			return pCont[ p - pStr ];
712 	}
713 	return TOKEN_ILLEGAL;
714 }
715 
716 
717 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
718 {
719 	using namespace i18n;
720 	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
721 	eState = ssGetChar;
722 
723     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
724 	const sal_Unicode* pSym = pTextStart;
725 	const sal_Unicode* pSrc = pSym;
726 	OUString aSymbol;
727 	sal_Unicode c = *pSrc;
728 	sal_Unicode cLast = 0;
729     int nDecSeps = 0;
730 	bool bQuote = false;
731 	bool bMightBeWord = true;
732 	bool bMightBeWordLast = true;
733     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
734 
735 	while ( (c != 0) && (eState != ssStop) )
736 	{
737 		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
738 		if ( nMask & TOKEN_EXCLUDED )
739 			eState = ssBounce;
740 		if ( bMightBeWord )
741 		{	// only relevant for ssGetValue fall back
742 			if ( eState == ssGetChar || eState == ssRewindFromValue ||
743                     eState == ssIgnoreLeadingInRewind )
744 				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
745 			else
746 				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
747 		}
748 		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
749 		pSrc++;
750 		switch (eState)
751 		{
752 			case ssGetChar :
753             case ssRewindFromValue :
754             case ssIgnoreLeadingInRewind :
755 			{
756                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
757                         && eState != ssIgnoreLeadingInRewind )
758 				{	//! must be first, may fall back to ssGetWord via bMightBeWord
759 					eState = ssGetValue;
760 					if ( nMask & TOKEN_VALUE_DIGIT )
761                     {
762                         if ( 128 <= c )
763                             r.TokenType = KParseType::UNI_NUMBER;
764                         else
765                             r.TokenType = KParseType::ASC_NUMBER;
766                     }
767                     else if ( c == cDecimalSep )
768                     {
769                         if ( *pSrc )
770                             ++nDecSeps;
771                         else
772                             eState = ssRewindFromValue;
773                             // retry for ONE_SINGLE_CHAR or others
774                     }
775 				}
776 				else if ( nMask & TOKEN_CHAR_WORD )
777 				{
778 					eState = ssGetWord;
779 					r.TokenType = KParseType::IDENTNAME;
780 				}
781 				else if ( nMask & TOKEN_NAME_SEP )
782 				{
783 					eState = ssGetWordFirstChar;
784 					bQuote = true;
785 					pSym++;
786 					nParseTokensType = 0;	// will be taken of first real character
787 					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
788 				}
789 				else if ( nMask & TOKEN_CHAR_STRING )
790 				{
791 					eState = ssGetString;
792 					pSym++;
793 					nParseTokensType = 0;	// will be taken of first real character
794 					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
795 				}
796 				else if ( nMask & TOKEN_CHAR_DONTCARE )
797 				{
798 					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
799 					{
800                         if (eState == ssRewindFromValue)
801                             eState = ssIgnoreLeadingInRewind;
802 						r.LeadingWhiteSpace++;
803 						pSym++;
804 						nParseTokensType = 0;	// wait until real character
805 						bMightBeWord = true;
806 					}
807 					else
808 						eState = ssBounce;
809 				}
810 				else if ( nMask & TOKEN_CHAR_BOOL )
811 				{
812 					eState = ssGetBool;
813 					r.TokenType = KParseType::BOOLEAN;
814 				}
815 				else if ( nMask & TOKEN_CHAR )
816 				{	//! must be last
817 					eState = ssStop;
818 					r.TokenType = KParseType::ONE_SINGLE_CHAR;
819 				}
820 				else
821 					eState = ssBounce;		// not known
822 			}
823 			break;
824 			case ssGetValue :
825 			{
826                 if ( nMask & TOKEN_VALUE_DIGIT )
827                 {
828                     if ( 128 <= c )
829                         r.TokenType = KParseType::UNI_NUMBER;
830                     else if ( r.TokenType != KParseType::UNI_NUMBER )
831                         r.TokenType = KParseType::ASC_NUMBER;
832                 }
833                 if ( nMask & TOKEN_VALUE )
834                 {
835                     if ( c == cDecimalSep && ++nDecSeps > 1 )
836                     {
837                         if ( pSrc - pTextStart == 2 )
838                             eState = ssRewindFromValue;
839                             // consecutive separators
840                         else
841                             eState = ssStopBack;
842                     }
843                     // else keep it going
844                 }
845 				else if ( c == 'E' || c == 'e' )
846 				{
847 					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
848 					if ( nNext & TOKEN_VALUE_EXP )
849 						;	// keep it going
850 					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
851 					{	// might be a numerical name (1.2efg)
852 						eState = ssGetWord;
853 						r.TokenType = KParseType::IDENTNAME;
854 					}
855 					else
856 						eState = ssStopBack;
857 				}
858 				else if ( nMask & TOKEN_VALUE_SIGN )
859 				{
860 					if ( (cLast == 'E') || (cLast == 'e') )
861 					{
862 						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
863 						if ( nNext & TOKEN_VALUE_EXP_VALUE )
864 							;	// keep it going
865 						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
866 						{	// might be a numerical name (1.2e+fg)
867 							eState = ssGetWord;
868 							r.TokenType = KParseType::IDENTNAME;
869 						}
870 						else
871 							eState = ssStopBack;
872 					}
873 					else if ( bMightBeWord )
874 					{	// might be a numerical name (1.2+fg)
875 						eState = ssGetWord;
876 						r.TokenType = KParseType::IDENTNAME;
877 					}
878 					else
879 						eState = ssStopBack;
880 				}
881 				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
882 				{	// might be a numerical name (1995.A1)
883 					eState = ssGetWord;
884 					r.TokenType = KParseType::IDENTNAME;
885 				}
886 				else
887 					eState = ssStopBack;
888 			}
889 			break;
890 			case ssGetWordFirstChar :
891 				eState = ssGetWord;
892 				// fall thru
893 			case ssGetWord :
894 			{
895 				if ( nMask & TOKEN_WORD )
896 					;	// keep it going
897 				else if ( nMask & TOKEN_NAME_SEP )
898 				{
899 					if ( bQuote )
900 					{
901 						if ( cLast == '\\' )
902 						{	// escaped
903 							aSymbol += OUString( pSym, pSrc - pSym - 2 );
904 							aSymbol += OUString( &c, 1);
905 						}
906 						else
907 						{
908 							eState = ssStop;
909 							aSymbol += OUString( pSym, pSrc - pSym - 1 );
910 						}
911 						pSym = pSrc;
912 					}
913 					else
914 						eState = ssStopBack;
915 				}
916 				else if ( bQuote )
917 					;	// keep it going
918 				else
919 					eState = ssStopBack;
920 			}
921 			break;
922 			case ssGetString :
923 			{
924 				if ( nMask & TOKEN_STRING_SEP )
925 				{
926 					if ( cLast == '\\' )
927 					{	// escaped
928 						aSymbol += OUString( pSym, pSrc - pSym - 2 );
929 						aSymbol += OUString( &c, 1);
930 					}
931                     else if ( c == *pSrc &&
932                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
933 					{	// "" => literal " escaped
934 						aSymbol += OUString( pSym, pSrc - pSym );
935 						pSrc++;
936 					}
937 					else
938 					{
939 						eState = ssStop;
940 						aSymbol += OUString( pSym, pSrc - pSym - 1 );
941 					}
942 					pSym = pSrc;
943 				}
944 			}
945 			break;
946 			case ssGetBool :
947 			{
948 				if ( (nMask & TOKEN_BOOL) )
949 					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
950 				else
951 					eState = ssStopBack;
952 			}
953 			break;
954             case ssStopBack :
955             case ssBounce :
956             case ssStop :
957                 ;   // nothing, no compiler warning
958             break;
959 		}
960         if ( eState == ssRewindFromValue )
961         {
962             r = ParseResult();
963             pSym = pTextStart;
964             pSrc = pSym;
965             aSymbol = OUString();
966             c = *pSrc;
967             cLast = 0;
968             nDecSeps = 0;
969             bQuote = false;
970             bMightBeWord = true;
971             bMightBeWordLast = true;
972         }
973         else
974         {
975             if ( !(r.TokenType & nTokenType) )
976             {
977                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
978                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
979                     ;	// keep a number that might be a word
980                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
981                     ;	// keep ignored white space
982                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
983                     ;   // keep uncertain value
984                 else
985                     eState = ssBounce;
986             }
987             if ( eState == ssBounce )
988             {
989                 r.TokenType = 0;
990                 eState = ssStopBack;
991             }
992             if ( eState == ssStopBack )
993             {	// put back
994                 pSrc--;
995                 bMightBeWord = bMightBeWordLast;
996                 eState = ssStop;
997             }
998             if ( eState != ssStop )
999             {
1000                 if ( !r.StartFlags )
1001                     r.StartFlags |= nParseTokensType;
1002                 else
1003                     r.ContFlags |= nParseTokensType;
1004             }
1005             bMightBeWordLast = bMightBeWord;
1006             cLast = c;
1007             c = *pSrc;
1008         }
1009 	}
1010 	// r.CharLen is the length in characters (not code points) of the parsed
1011 	// token not including any leading white space, change this calculation if
1012 	// multi-code-point Unicode characters are to be supported.
1013 	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1014 	r.EndPos = nPos + (pSrc - pTextStart);
1015 	if ( r.TokenType & KParseType::ASC_NUMBER )
1016 	{
1017         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1018                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1019 		if ( bMightBeWord )
1020 			r.TokenType |= KParseType::IDENTNAME;
1021 	}
1022 	else if ( r.TokenType & KParseType::UNI_NUMBER )
1023 	{
1024         if ( !xNatNumSup.is() )
1025         {
1026 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1027             if ( xMSF.is() )
1028             {
1029                 xNatNumSup = Reference< XNativeNumberSupplier > (
1030                         xMSF->createInstance( OUString(
1031                                 RTL_CONSTASCII_USTRINGPARAM(
1032                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1033                         UNO_QUERY );
1034             }
1035             if ( !xNatNumSup.is() )
1036             {
1037                 throw RuntimeException( OUString(
1038 #ifdef DBG_UTIL
1039                     RTL_CONSTASCII_USTRINGPARAM(
1040                         "cclass_Unicode::parseText: can't instanciate "
1041                         NATIVENUMBERSUPPLIER_SERVICENAME )
1042 #endif
1043                     ), *this );
1044             }
1045 #undef NATIVENUMBERSUPPLIER_SERVICENAME
1046         }
1047         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1048                 r.LeadingWhiteSpace );
1049         // transliterate to ASCII
1050         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1051                 NativeNumberMode::NATNUM0 );
1052         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1053 		if ( bMightBeWord )
1054 			r.TokenType |= KParseType::IDENTNAME;
1055 	}
1056 	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1057 	{
1058 		if ( pSym < pSrc )
1059 		{	//! open quote
1060 			aSymbol += OUString( pSym, pSrc - pSym );
1061 			r.TokenType |= KParseType::MISSING_QUOTE;
1062 		}
1063 		r.DequotedNameOrString = aSymbol;
1064 	}
1065 }
1066 
1067 } } } }
1068