1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <cclass_unicode.hxx>
32*cdf0e10cSrcweir #include <unicode/uchar.h>
33*cdf0e10cSrcweir #include <rtl/math.hxx>
34*cdf0e10cSrcweir #include <rtl/ustring.hxx>
35*cdf0e10cSrcweir #include <com/sun/star/i18n/KParseTokens.hpp>
36*cdf0e10cSrcweir #include <com/sun/star/i18n/KParseType.hpp>
37*cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
38*cdf0e10cSrcweir #include <com/sun/star/i18n/XLocaleData.hpp>
39*cdf0e10cSrcweir #include <com/sun/star/i18n/NativeNumberMode.hpp>
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir #include <string.h>		// memcpy()
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir using namespace ::com::sun::star::uno;
44*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
45*cdf0e10cSrcweir using namespace ::rtl;
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
48*cdf0e10cSrcweir 
49*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
50*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
51*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
52*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
53*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
54*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
55*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
56*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
57*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
58*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
59*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
60*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
61*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
62*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
63*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
64*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
65*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
66*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
67*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;
68*cdf0e10cSrcweir 
69*cdf0e10cSrcweir #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
72*cdf0e10cSrcweir 
73*cdf0e10cSrcweir const sal_uInt8 cclass_Unicode::nDefCnt = 128;
74*cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
75*cdf0e10cSrcweir {
76*cdf0e10cSrcweir // (...) == Calc formula compiler specific, commented out and modified
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir 	/* \0 */	TOKEN_EXCLUDED,
79*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
80*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
81*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
82*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
83*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
84*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
85*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
86*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
87*cdf0e10cSrcweir 	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
88*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
89*cdf0e10cSrcweir 	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
90*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
91*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
92*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
93*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
94*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
95*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
96*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
97*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
98*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
99*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
100*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
101*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
102*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
103*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
104*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
105*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
106*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
107*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
108*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
109*cdf0e10cSrcweir 				TOKEN_ILLEGAL,
110*cdf0e10cSrcweir 	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
111*cdf0e10cSrcweir 	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
112*cdf0e10cSrcweir 	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
113*cdf0e10cSrcweir 	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
114*cdf0e10cSrcweir 	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
115*cdf0e10cSrcweir 	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
116*cdf0e10cSrcweir 	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117*cdf0e10cSrcweir 	/*  39 ' */	TOKEN_NAME_SEP,
118*cdf0e10cSrcweir 	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
119*cdf0e10cSrcweir 	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
120*cdf0e10cSrcweir 	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
121*cdf0e10cSrcweir 	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
122*cdf0e10cSrcweir 	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
123*cdf0e10cSrcweir 	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
124*cdf0e10cSrcweir 	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
125*cdf0e10cSrcweir 	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
126*cdf0e10cSrcweir 	//for ( i = 48; i < 58; i++ )
127*cdf0e10cSrcweir 	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128*cdf0e10cSrcweir 	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129*cdf0e10cSrcweir 	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130*cdf0e10cSrcweir 	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131*cdf0e10cSrcweir 	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132*cdf0e10cSrcweir 	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133*cdf0e10cSrcweir 	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
134*cdf0e10cSrcweir 	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
135*cdf0e10cSrcweir 	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
136*cdf0e10cSrcweir 	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
137*cdf0e10cSrcweir 	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
138*cdf0e10cSrcweir 	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
139*cdf0e10cSrcweir 	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
140*cdf0e10cSrcweir 	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
141*cdf0e10cSrcweir 	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
142*cdf0e10cSrcweir 	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
143*cdf0e10cSrcweir 	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
144*cdf0e10cSrcweir 	//for ( i = 65; i < 91; i++ )
145*cdf0e10cSrcweir 	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
146*cdf0e10cSrcweir 	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
147*cdf0e10cSrcweir 	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
148*cdf0e10cSrcweir 	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
149*cdf0e10cSrcweir 	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
150*cdf0e10cSrcweir 	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
151*cdf0e10cSrcweir 	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
152*cdf0e10cSrcweir 	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
153*cdf0e10cSrcweir 	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
154*cdf0e10cSrcweir 	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
155*cdf0e10cSrcweir 	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
156*cdf0e10cSrcweir 	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
157*cdf0e10cSrcweir 	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
158*cdf0e10cSrcweir 	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
159*cdf0e10cSrcweir 	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
160*cdf0e10cSrcweir 	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
161*cdf0e10cSrcweir 	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
162*cdf0e10cSrcweir 	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
163*cdf0e10cSrcweir 	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
164*cdf0e10cSrcweir 	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
165*cdf0e10cSrcweir 	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
166*cdf0e10cSrcweir 	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
167*cdf0e10cSrcweir 	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
168*cdf0e10cSrcweir 	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
169*cdf0e10cSrcweir 	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
170*cdf0e10cSrcweir 	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
171*cdf0e10cSrcweir 	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
172*cdf0e10cSrcweir 	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
173*cdf0e10cSrcweir 	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
174*cdf0e10cSrcweir 	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
175*cdf0e10cSrcweir 	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
176*cdf0e10cSrcweir 	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
177*cdf0e10cSrcweir 	//for ( i = 97; i < 123; i++ )
178*cdf0e10cSrcweir 	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
179*cdf0e10cSrcweir 	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
180*cdf0e10cSrcweir 	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
181*cdf0e10cSrcweir 	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
182*cdf0e10cSrcweir 	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
183*cdf0e10cSrcweir 	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
184*cdf0e10cSrcweir 	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
185*cdf0e10cSrcweir 	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
186*cdf0e10cSrcweir 	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
187*cdf0e10cSrcweir 	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
188*cdf0e10cSrcweir 	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
189*cdf0e10cSrcweir 	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
190*cdf0e10cSrcweir 	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
191*cdf0e10cSrcweir 	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
192*cdf0e10cSrcweir 	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
193*cdf0e10cSrcweir 	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
194*cdf0e10cSrcweir 	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
195*cdf0e10cSrcweir 	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
196*cdf0e10cSrcweir 	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
197*cdf0e10cSrcweir 	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
198*cdf0e10cSrcweir 	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
199*cdf0e10cSrcweir 	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
200*cdf0e10cSrcweir 	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
201*cdf0e10cSrcweir 	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
202*cdf0e10cSrcweir 	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
203*cdf0e10cSrcweir 	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
204*cdf0e10cSrcweir 	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
205*cdf0e10cSrcweir 	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
206*cdf0e10cSrcweir 	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
207*cdf0e10cSrcweir 	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
208*cdf0e10cSrcweir 	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
209*cdf0e10cSrcweir };
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir 
212*cdf0e10cSrcweir const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
213*cdf0e10cSrcweir {
214*cdf0e10cSrcweir 	/* \0 */	KParseTokens::ASC_OTHER,
215*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
216*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
217*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
218*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
219*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
220*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
221*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
222*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
223*cdf0e10cSrcweir 	/*  9 \t */	KParseTokens::ASC_CONTROL,
224*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
225*cdf0e10cSrcweir 	/* 11 \v */	KParseTokens::ASC_CONTROL,
226*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
227*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
228*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
229*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
230*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
231*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
232*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
233*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
234*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
235*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
236*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
237*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
238*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
239*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
240*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
241*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
242*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
243*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
244*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
245*cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
246*cdf0e10cSrcweir 	/*  32   */	KParseTokens::ASC_OTHER,
247*cdf0e10cSrcweir 	/*  33 ! */	KParseTokens::ASC_OTHER,
248*cdf0e10cSrcweir 	/*  34 " */	KParseTokens::ASC_OTHER,
249*cdf0e10cSrcweir 	/*  35 # */	KParseTokens::ASC_OTHER,
250*cdf0e10cSrcweir 	/*  36 $ */	KParseTokens::ASC_DOLLAR,
251*cdf0e10cSrcweir 	/*  37 % */	KParseTokens::ASC_OTHER,
252*cdf0e10cSrcweir 	/*  38 & */	KParseTokens::ASC_OTHER,
253*cdf0e10cSrcweir 	/*  39 ' */	KParseTokens::ASC_OTHER,
254*cdf0e10cSrcweir 	/*  40 ( */	KParseTokens::ASC_OTHER,
255*cdf0e10cSrcweir 	/*  41 ) */	KParseTokens::ASC_OTHER,
256*cdf0e10cSrcweir 	/*  42 * */	KParseTokens::ASC_OTHER,
257*cdf0e10cSrcweir 	/*  43 + */	KParseTokens::ASC_OTHER,
258*cdf0e10cSrcweir 	/*  44 , */	KParseTokens::ASC_OTHER,
259*cdf0e10cSrcweir 	/*  45 - */	KParseTokens::ASC_OTHER,
260*cdf0e10cSrcweir 	/*  46 . */	KParseTokens::ASC_DOT,
261*cdf0e10cSrcweir 	/*  47 / */	KParseTokens::ASC_OTHER,
262*cdf0e10cSrcweir 	//for ( i = 48; i < 58; i++ )
263*cdf0e10cSrcweir 	/*  48 0 */	KParseTokens::ASC_DIGIT,
264*cdf0e10cSrcweir 	/*  49 1 */	KParseTokens::ASC_DIGIT,
265*cdf0e10cSrcweir 	/*  50 2 */	KParseTokens::ASC_DIGIT,
266*cdf0e10cSrcweir 	/*  51 3 */	KParseTokens::ASC_DIGIT,
267*cdf0e10cSrcweir 	/*  52 4 */	KParseTokens::ASC_DIGIT,
268*cdf0e10cSrcweir 	/*  53 5 */	KParseTokens::ASC_DIGIT,
269*cdf0e10cSrcweir 	/*  54 6 */	KParseTokens::ASC_DIGIT,
270*cdf0e10cSrcweir 	/*  55 7 */	KParseTokens::ASC_DIGIT,
271*cdf0e10cSrcweir 	/*  56 8 */	KParseTokens::ASC_DIGIT,
272*cdf0e10cSrcweir 	/*  57 9 */	KParseTokens::ASC_DIGIT,
273*cdf0e10cSrcweir 	/*  58 : */	KParseTokens::ASC_COLON,
274*cdf0e10cSrcweir 	/*  59 ; */	KParseTokens::ASC_OTHER,
275*cdf0e10cSrcweir 	/*  60 < */	KParseTokens::ASC_OTHER,
276*cdf0e10cSrcweir 	/*  61 = */	KParseTokens::ASC_OTHER,
277*cdf0e10cSrcweir 	/*  62 > */	KParseTokens::ASC_OTHER,
278*cdf0e10cSrcweir 	/*  63 ? */	KParseTokens::ASC_OTHER,
279*cdf0e10cSrcweir 	/*  64 @ */	KParseTokens::ASC_OTHER,
280*cdf0e10cSrcweir 	//for ( i = 65; i < 91; i++ )
281*cdf0e10cSrcweir 	/*  65 A */	KParseTokens::ASC_UPALPHA,
282*cdf0e10cSrcweir 	/*  66 B */	KParseTokens::ASC_UPALPHA,
283*cdf0e10cSrcweir 	/*  67 C */	KParseTokens::ASC_UPALPHA,
284*cdf0e10cSrcweir 	/*  68 D */	KParseTokens::ASC_UPALPHA,
285*cdf0e10cSrcweir 	/*  69 E */	KParseTokens::ASC_UPALPHA,
286*cdf0e10cSrcweir 	/*  70 F */	KParseTokens::ASC_UPALPHA,
287*cdf0e10cSrcweir 	/*  71 G */	KParseTokens::ASC_UPALPHA,
288*cdf0e10cSrcweir 	/*  72 H */	KParseTokens::ASC_UPALPHA,
289*cdf0e10cSrcweir 	/*  73 I */	KParseTokens::ASC_UPALPHA,
290*cdf0e10cSrcweir 	/*  74 J */	KParseTokens::ASC_UPALPHA,
291*cdf0e10cSrcweir 	/*  75 K */	KParseTokens::ASC_UPALPHA,
292*cdf0e10cSrcweir 	/*  76 L */	KParseTokens::ASC_UPALPHA,
293*cdf0e10cSrcweir 	/*  77 M */	KParseTokens::ASC_UPALPHA,
294*cdf0e10cSrcweir 	/*  78 N */	KParseTokens::ASC_UPALPHA,
295*cdf0e10cSrcweir 	/*  79 O */	KParseTokens::ASC_UPALPHA,
296*cdf0e10cSrcweir 	/*  80 P */	KParseTokens::ASC_UPALPHA,
297*cdf0e10cSrcweir 	/*  81 Q */	KParseTokens::ASC_UPALPHA,
298*cdf0e10cSrcweir 	/*  82 R */	KParseTokens::ASC_UPALPHA,
299*cdf0e10cSrcweir 	/*  83 S */	KParseTokens::ASC_UPALPHA,
300*cdf0e10cSrcweir 	/*  84 T */	KParseTokens::ASC_UPALPHA,
301*cdf0e10cSrcweir 	/*  85 U */	KParseTokens::ASC_UPALPHA,
302*cdf0e10cSrcweir 	/*  86 V */	KParseTokens::ASC_UPALPHA,
303*cdf0e10cSrcweir 	/*  87 W */	KParseTokens::ASC_UPALPHA,
304*cdf0e10cSrcweir 	/*  88 X */	KParseTokens::ASC_UPALPHA,
305*cdf0e10cSrcweir 	/*  89 Y */	KParseTokens::ASC_UPALPHA,
306*cdf0e10cSrcweir 	/*  90 Z */	KParseTokens::ASC_UPALPHA,
307*cdf0e10cSrcweir 	/*  91 [ */	KParseTokens::ASC_OTHER,
308*cdf0e10cSrcweir 	/*  92 \ */	KParseTokens::ASC_OTHER,
309*cdf0e10cSrcweir 	/*  93 ] */	KParseTokens::ASC_OTHER,
310*cdf0e10cSrcweir 	/*  94 ^ */	KParseTokens::ASC_OTHER,
311*cdf0e10cSrcweir 	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
312*cdf0e10cSrcweir 	/*  96 ` */	KParseTokens::ASC_OTHER,
313*cdf0e10cSrcweir 	//for ( i = 97; i < 123; i++ )
314*cdf0e10cSrcweir 	/*  97 a */	KParseTokens::ASC_LOALPHA,
315*cdf0e10cSrcweir 	/*  98 b */	KParseTokens::ASC_LOALPHA,
316*cdf0e10cSrcweir 	/*  99 c */	KParseTokens::ASC_LOALPHA,
317*cdf0e10cSrcweir 	/* 100 d */	KParseTokens::ASC_LOALPHA,
318*cdf0e10cSrcweir 	/* 101 e */	KParseTokens::ASC_LOALPHA,
319*cdf0e10cSrcweir 	/* 102 f */	KParseTokens::ASC_LOALPHA,
320*cdf0e10cSrcweir 	/* 103 g */	KParseTokens::ASC_LOALPHA,
321*cdf0e10cSrcweir 	/* 104 h */	KParseTokens::ASC_LOALPHA,
322*cdf0e10cSrcweir 	/* 105 i */	KParseTokens::ASC_LOALPHA,
323*cdf0e10cSrcweir 	/* 106 j */	KParseTokens::ASC_LOALPHA,
324*cdf0e10cSrcweir 	/* 107 k */	KParseTokens::ASC_LOALPHA,
325*cdf0e10cSrcweir 	/* 108 l */	KParseTokens::ASC_LOALPHA,
326*cdf0e10cSrcweir 	/* 109 m */	KParseTokens::ASC_LOALPHA,
327*cdf0e10cSrcweir 	/* 110 n */	KParseTokens::ASC_LOALPHA,
328*cdf0e10cSrcweir 	/* 111 o */	KParseTokens::ASC_LOALPHA,
329*cdf0e10cSrcweir 	/* 112 p */	KParseTokens::ASC_LOALPHA,
330*cdf0e10cSrcweir 	/* 113 q */	KParseTokens::ASC_LOALPHA,
331*cdf0e10cSrcweir 	/* 114 r */	KParseTokens::ASC_LOALPHA,
332*cdf0e10cSrcweir 	/* 115 s */	KParseTokens::ASC_LOALPHA,
333*cdf0e10cSrcweir 	/* 116 t */	KParseTokens::ASC_LOALPHA,
334*cdf0e10cSrcweir 	/* 117 u */	KParseTokens::ASC_LOALPHA,
335*cdf0e10cSrcweir 	/* 118 v */	KParseTokens::ASC_LOALPHA,
336*cdf0e10cSrcweir 	/* 119 w */	KParseTokens::ASC_LOALPHA,
337*cdf0e10cSrcweir 	/* 120 x */	KParseTokens::ASC_LOALPHA,
338*cdf0e10cSrcweir 	/* 121 y */	KParseTokens::ASC_LOALPHA,
339*cdf0e10cSrcweir 	/* 122 z */	KParseTokens::ASC_LOALPHA,
340*cdf0e10cSrcweir 	/* 123 { */	KParseTokens::ASC_OTHER,
341*cdf0e10cSrcweir 	/* 124 | */	KParseTokens::ASC_OTHER,
342*cdf0e10cSrcweir 	/* 125 } */	KParseTokens::ASC_OTHER,
343*cdf0e10cSrcweir 	/* 126 ~ */	KParseTokens::ASC_OTHER,
344*cdf0e10cSrcweir 	/* 127   */	KParseTokens::ASC_OTHER
345*cdf0e10cSrcweir };
346*cdf0e10cSrcweir 
347*cdf0e10cSrcweir 
348*cdf0e10cSrcweir // static
349*cdf0e10cSrcweir const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
350*cdf0e10cSrcweir {
351*cdf0e10cSrcweir 	if ( !pStr )
352*cdf0e10cSrcweir 		return NULL;
353*cdf0e10cSrcweir 	while ( *pStr )
354*cdf0e10cSrcweir 	{
355*cdf0e10cSrcweir 		if ( *pStr == c )
356*cdf0e10cSrcweir 			return pStr;
357*cdf0e10cSrcweir 		pStr++;
358*cdf0e10cSrcweir 	}
359*cdf0e10cSrcweir 	return NULL;
360*cdf0e10cSrcweir }
361*cdf0e10cSrcweir 
362*cdf0e10cSrcweir 
363*cdf0e10cSrcweir sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
364*cdf0e10cSrcweir {
365*cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
366*cdf0e10cSrcweir 	if ( c < nDefCnt )
367*cdf0e10cSrcweir 		return pParseTokensType[ sal_uInt8(c) ];
368*cdf0e10cSrcweir 	else
369*cdf0e10cSrcweir 	{
370*cdf0e10cSrcweir 
371*cdf0e10cSrcweir 		//! all KParseTokens::UNI_... must be matched
372*cdf0e10cSrcweir         switch ( u_charType( (sal_uInt32) c ) )
373*cdf0e10cSrcweir 		{
374*cdf0e10cSrcweir 			case U_UPPERCASE_LETTER :
375*cdf0e10cSrcweir 				return KParseTokens::UNI_UPALPHA;
376*cdf0e10cSrcweir 			case U_LOWERCASE_LETTER :
377*cdf0e10cSrcweir 				return KParseTokens::UNI_LOALPHA;
378*cdf0e10cSrcweir 			case U_TITLECASE_LETTER :
379*cdf0e10cSrcweir 				return KParseTokens::UNI_TITLE_ALPHA;
380*cdf0e10cSrcweir 			case U_MODIFIER_LETTER :
381*cdf0e10cSrcweir 				return KParseTokens::UNI_MODIFIER_LETTER;
382*cdf0e10cSrcweir 			case U_OTHER_LETTER :
383*cdf0e10cSrcweir 				// Non_Spacing_Mark could not be as leading character
384*cdf0e10cSrcweir 				if (nPos == 0) break;
385*cdf0e10cSrcweir 				// fall through, treat it as Other_Letter.
386*cdf0e10cSrcweir 			case U_NON_SPACING_MARK :
387*cdf0e10cSrcweir 				return KParseTokens::UNI_OTHER_LETTER;
388*cdf0e10cSrcweir 			case U_DECIMAL_DIGIT_NUMBER :
389*cdf0e10cSrcweir 				return KParseTokens::UNI_DIGIT;
390*cdf0e10cSrcweir 			case U_LETTER_NUMBER :
391*cdf0e10cSrcweir 				return KParseTokens::UNI_LETTER_NUMBER;
392*cdf0e10cSrcweir 			case U_OTHER_NUMBER :
393*cdf0e10cSrcweir 				return KParseTokens::UNI_OTHER_NUMBER;
394*cdf0e10cSrcweir 		}
395*cdf0e10cSrcweir 
396*cdf0e10cSrcweir 		return KParseTokens::UNI_OTHER;
397*cdf0e10cSrcweir 	}
398*cdf0e10cSrcweir }
399*cdf0e10cSrcweir 
400*cdf0e10cSrcweir sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
401*cdf0e10cSrcweir {
402*cdf0e10cSrcweir 	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
403*cdf0e10cSrcweir 		|| aParserLocale.Country != rLocale.Country
404*cdf0e10cSrcweir 		|| aParserLocale.Variant != rLocale.Variant);
405*cdf0e10cSrcweir 	if ( bChanged )
406*cdf0e10cSrcweir 	{
407*cdf0e10cSrcweir 		aParserLocale.Language = rLocale.Language;
408*cdf0e10cSrcweir 		aParserLocale.Country = rLocale.Country;
409*cdf0e10cSrcweir 		aParserLocale.Variant = rLocale.Variant;
410*cdf0e10cSrcweir 	}
411*cdf0e10cSrcweir 	if ( !xLocaleData.is() && xMSF.is() )
412*cdf0e10cSrcweir 	{
413*cdf0e10cSrcweir 		Reference <
414*cdf0e10cSrcweir 			XInterface > xI =
415*cdf0e10cSrcweir 			xMSF->createInstance( OUString(
416*cdf0e10cSrcweir 			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
417*cdf0e10cSrcweir 		if ( xI.is() )
418*cdf0e10cSrcweir 		{
419*cdf0e10cSrcweir 			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
420*cdf0e10cSrcweir 			x >>= xLocaleData;
421*cdf0e10cSrcweir 		}
422*cdf0e10cSrcweir 	}
423*cdf0e10cSrcweir 	return bChanged;
424*cdf0e10cSrcweir }
425*cdf0e10cSrcweir 
426*cdf0e10cSrcweir 
427*cdf0e10cSrcweir void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
428*cdf0e10cSrcweir             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
429*cdf0e10cSrcweir             const OUString& userDefinedCharactersCont )
430*cdf0e10cSrcweir {
431*cdf0e10cSrcweir 	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
432*cdf0e10cSrcweir 		rLocale.Country == aParserLocale.Country &&
433*cdf0e10cSrcweir 		rLocale.Variant == aParserLocale.Variant);
434*cdf0e10cSrcweir 	if ( !pTable || !bIntlEqual ||
435*cdf0e10cSrcweir 			startCharTokenType != nStartTypes ||
436*cdf0e10cSrcweir 			contCharTokenType != nContTypes ||
437*cdf0e10cSrcweir 			userDefinedCharactersStart != aStartChars ||
438*cdf0e10cSrcweir 			userDefinedCharactersCont != aContChars )
439*cdf0e10cSrcweir 		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
440*cdf0e10cSrcweir 			contCharTokenType, userDefinedCharactersCont );
441*cdf0e10cSrcweir }
442*cdf0e10cSrcweir 
443*cdf0e10cSrcweir 
444*cdf0e10cSrcweir void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
445*cdf0e10cSrcweir             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
446*cdf0e10cSrcweir             const OUString& userDefinedCharactersCont )
447*cdf0e10cSrcweir {
448*cdf0e10cSrcweir 	// (Re)Init
449*cdf0e10cSrcweir 	setupInternational( rLocale );
450*cdf0e10cSrcweir 	// Memory of pTable is reused.
451*cdf0e10cSrcweir 	if ( !pTable )
452*cdf0e10cSrcweir 		pTable = new UPT_FLAG_TYPE[nDefCnt];
453*cdf0e10cSrcweir 	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
454*cdf0e10cSrcweir 	// Start and cont tables only need reallocation if different length.
455*cdf0e10cSrcweir     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
456*cdf0e10cSrcweir 	{
457*cdf0e10cSrcweir 		delete [] pStart;
458*cdf0e10cSrcweir 		pStart = NULL;
459*cdf0e10cSrcweir 	}
460*cdf0e10cSrcweir     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
461*cdf0e10cSrcweir 	{
462*cdf0e10cSrcweir 		delete [] pCont;
463*cdf0e10cSrcweir 		pCont = NULL;
464*cdf0e10cSrcweir 	}
465*cdf0e10cSrcweir 	nStartTypes = startCharTokenType;
466*cdf0e10cSrcweir 	nContTypes = contCharTokenType;
467*cdf0e10cSrcweir 	aStartChars = userDefinedCharactersStart;
468*cdf0e10cSrcweir 	aContChars = userDefinedCharactersCont;
469*cdf0e10cSrcweir 
470*cdf0e10cSrcweir 	// specials
471*cdf0e10cSrcweir 	if( xLocaleData.is() )
472*cdf0e10cSrcweir 	{
473*cdf0e10cSrcweir 		LocaleDataItem aItem =
474*cdf0e10cSrcweir 			xLocaleData->getLocaleItem( aParserLocale );
475*cdf0e10cSrcweir //!TODO: theoretically separators may be a string, adjustment would have to be
476*cdf0e10cSrcweir //! done here and in parsing and in ::rtl::math::stringToDouble()
477*cdf0e10cSrcweir 		cGroupSep = aItem.thousandSeparator.getStr()[0];
478*cdf0e10cSrcweir         cDecimalSep = aItem.decimalSeparator.getStr()[0];
479*cdf0e10cSrcweir 	}
480*cdf0e10cSrcweir 
481*cdf0e10cSrcweir 	if ( cGroupSep < nDefCnt )
482*cdf0e10cSrcweir 		pTable[cGroupSep] |= TOKEN_VALUE;
483*cdf0e10cSrcweir 	if ( cDecimalSep < nDefCnt )
484*cdf0e10cSrcweir 		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
485*cdf0e10cSrcweir 
486*cdf0e10cSrcweir 	// Modify characters according to KParseTokens definitions.
487*cdf0e10cSrcweir 	{
488*cdf0e10cSrcweir 		using namespace KParseTokens;
489*cdf0e10cSrcweir 		sal_uInt8 i;
490*cdf0e10cSrcweir 
491*cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_UPALPHA) )
492*cdf0e10cSrcweir 			for ( i = 65; i < 91; i++ )
493*cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
494*cdf0e10cSrcweir 		if ( !(nContTypes & ASC_UPALPHA) )
495*cdf0e10cSrcweir 			for ( i = 65; i < 91; i++ )
496*cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
497*cdf0e10cSrcweir 
498*cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_LOALPHA) )
499*cdf0e10cSrcweir 			for ( i = 97; i < 123; i++ )
500*cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
501*cdf0e10cSrcweir 		if ( !(nContTypes & ASC_LOALPHA) )
502*cdf0e10cSrcweir 			for ( i = 97; i < 123; i++ )
503*cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
504*cdf0e10cSrcweir 
505*cdf0e10cSrcweir 		if ( nStartTypes & ASC_DIGIT )
506*cdf0e10cSrcweir 			for ( i = 48; i < 58; i++ )
507*cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
508*cdf0e10cSrcweir 		if ( !(nContTypes & ASC_DIGIT) )
509*cdf0e10cSrcweir 			for ( i = 48; i < 58; i++ )
510*cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
511*cdf0e10cSrcweir 
512*cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_UNDERSCORE) )
513*cdf0e10cSrcweir 			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
514*cdf0e10cSrcweir 		if ( !(nContTypes & ASC_UNDERSCORE) )
515*cdf0e10cSrcweir 			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character
516*cdf0e10cSrcweir 
517*cdf0e10cSrcweir 		if ( nStartTypes & ASC_DOLLAR )
518*cdf0e10cSrcweir 			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
519*cdf0e10cSrcweir 		if ( nContTypes & ASC_DOLLAR )
520*cdf0e10cSrcweir 			pTable[36] |= TOKEN_WORD;			// allowed as cont character
521*cdf0e10cSrcweir 
522*cdf0e10cSrcweir 		if ( nStartTypes & ASC_DOT )
523*cdf0e10cSrcweir 			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
524*cdf0e10cSrcweir 		if ( nContTypes & ASC_DOT )
525*cdf0e10cSrcweir 			pTable[46] |= TOKEN_WORD;			// allowed as cont character
526*cdf0e10cSrcweir 
527*cdf0e10cSrcweir 		if ( nStartTypes & ASC_COLON )
528*cdf0e10cSrcweir 			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
529*cdf0e10cSrcweir 		if ( nContTypes & ASC_COLON )
530*cdf0e10cSrcweir 			pTable[58] |= TOKEN_WORD;			// allowed as cont character
531*cdf0e10cSrcweir 
532*cdf0e10cSrcweir 		if ( nStartTypes & ASC_CONTROL )
533*cdf0e10cSrcweir 			for ( i = 1; i < 32; i++ )
534*cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
535*cdf0e10cSrcweir 		if ( nContTypes & ASC_CONTROL )
536*cdf0e10cSrcweir 			for ( i = 1; i < 32; i++ )
537*cdf0e10cSrcweir 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
538*cdf0e10cSrcweir 
539*cdf0e10cSrcweir 		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
540*cdf0e10cSrcweir 			for ( i = 32; i < nDefCnt; i++ )
541*cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
542*cdf0e10cSrcweir 		if ( nContTypes & ASC_ANY_BUT_CONTROL )
543*cdf0e10cSrcweir 			for ( i = 32; i < nDefCnt; i++ )
544*cdf0e10cSrcweir 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
545*cdf0e10cSrcweir 
546*cdf0e10cSrcweir 	}
547*cdf0e10cSrcweir 
548*cdf0e10cSrcweir 	// Merge in (positively override with) user defined characters.
549*cdf0e10cSrcweir 	// StartChars
550*cdf0e10cSrcweir     sal_Int32 nLen = aStartChars.getLength();
551*cdf0e10cSrcweir 	if ( nLen )
552*cdf0e10cSrcweir 	{
553*cdf0e10cSrcweir 		if ( !pStart )
554*cdf0e10cSrcweir 			pStart = new UPT_FLAG_TYPE[ nLen ];
555*cdf0e10cSrcweir         const sal_Unicode* p = aStartChars.getStr();
556*cdf0e10cSrcweir         for ( sal_Int32 j=0; j<nLen; j++, p++ )
557*cdf0e10cSrcweir 		{
558*cdf0e10cSrcweir 			pStart[j] = TOKEN_CHAR_WORD;
559*cdf0e10cSrcweir             if ( *p < nDefCnt )
560*cdf0e10cSrcweir                 pTable[*p] |= TOKEN_CHAR_WORD;
561*cdf0e10cSrcweir 		}
562*cdf0e10cSrcweir 	}
563*cdf0e10cSrcweir 	// ContChars
564*cdf0e10cSrcweir     nLen = aContChars.getLength();
565*cdf0e10cSrcweir 	if ( nLen )
566*cdf0e10cSrcweir 	{
567*cdf0e10cSrcweir 		if ( !pCont )
568*cdf0e10cSrcweir 			pCont = new UPT_FLAG_TYPE[ nLen ];
569*cdf0e10cSrcweir         const sal_Unicode* p = aContChars.getStr();
570*cdf0e10cSrcweir         for ( sal_Int32 j=0; j<nLen; j++ )
571*cdf0e10cSrcweir 		{
572*cdf0e10cSrcweir 			pCont[j] = TOKEN_WORD;
573*cdf0e10cSrcweir             if ( *p < nDefCnt )
574*cdf0e10cSrcweir                 pTable[*p] |= TOKEN_WORD;
575*cdf0e10cSrcweir 		}
576*cdf0e10cSrcweir 	}
577*cdf0e10cSrcweir }
578*cdf0e10cSrcweir 
579*cdf0e10cSrcweir 
580*cdf0e10cSrcweir void cclass_Unicode::destroyParserTable()
581*cdf0e10cSrcweir {
582*cdf0e10cSrcweir 	if ( pCont )
583*cdf0e10cSrcweir 		delete [] pCont;
584*cdf0e10cSrcweir 	if ( pStart )
585*cdf0e10cSrcweir 		delete [] pStart;
586*cdf0e10cSrcweir 	if ( pTable )
587*cdf0e10cSrcweir 		delete [] pTable;
588*cdf0e10cSrcweir }
589*cdf0e10cSrcweir 
590*cdf0e10cSrcweir 
591*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
592*cdf0e10cSrcweir {
593*cdf0e10cSrcweir 	UPT_FLAG_TYPE nMask;
594*cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
595*cdf0e10cSrcweir 	if ( c < nDefCnt )
596*cdf0e10cSrcweir 		nMask = pTable[ sal_uInt8(c) ];
597*cdf0e10cSrcweir 	else
598*cdf0e10cSrcweir 		nMask = getFlagsExtended( aStr, nPos );
599*cdf0e10cSrcweir 	switch ( eState )
600*cdf0e10cSrcweir 	{
601*cdf0e10cSrcweir 		case ssGetChar :
602*cdf0e10cSrcweir         case ssRewindFromValue :
603*cdf0e10cSrcweir         case ssIgnoreLeadingInRewind :
604*cdf0e10cSrcweir 		case ssGetWordFirstChar :
605*cdf0e10cSrcweir 			if ( !(nMask & TOKEN_CHAR_WORD) )
606*cdf0e10cSrcweir 			{
607*cdf0e10cSrcweir 				nMask |= getStartCharsFlags( c );
608*cdf0e10cSrcweir 				if ( nMask & TOKEN_CHAR_WORD )
609*cdf0e10cSrcweir 					nMask &= ~TOKEN_EXCLUDED;
610*cdf0e10cSrcweir 			}
611*cdf0e10cSrcweir 		break;
612*cdf0e10cSrcweir 		case ssGetValue :
613*cdf0e10cSrcweir 		case ssGetWord :
614*cdf0e10cSrcweir 			if ( !(nMask & TOKEN_WORD) )
615*cdf0e10cSrcweir 			{
616*cdf0e10cSrcweir 				nMask |= getContCharsFlags( c );
617*cdf0e10cSrcweir 				if ( nMask & TOKEN_WORD )
618*cdf0e10cSrcweir 					nMask &= ~TOKEN_EXCLUDED;
619*cdf0e10cSrcweir 			}
620*cdf0e10cSrcweir 		break;
621*cdf0e10cSrcweir         default:
622*cdf0e10cSrcweir             ;   // other cases aren't needed, no compiler warning
623*cdf0e10cSrcweir 	}
624*cdf0e10cSrcweir 	return nMask;
625*cdf0e10cSrcweir }
626*cdf0e10cSrcweir 
627*cdf0e10cSrcweir 
628*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
629*cdf0e10cSrcweir {
630*cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
631*cdf0e10cSrcweir 	if ( c == cGroupSep )
632*cdf0e10cSrcweir 		return TOKEN_VALUE;
633*cdf0e10cSrcweir 	else if ( c == cDecimalSep )
634*cdf0e10cSrcweir 		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
635*cdf0e10cSrcweir 	using namespace i18n;
636*cdf0e10cSrcweir     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
637*cdf0e10cSrcweir             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
638*cdf0e10cSrcweir 	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
639*cdf0e10cSrcweir 
640*cdf0e10cSrcweir 	//! all KParseTokens::UNI_... must be matched
641*cdf0e10cSrcweir     switch ( u_charType( (sal_uInt32) c ) )
642*cdf0e10cSrcweir 	{
643*cdf0e10cSrcweir 		case U_UPPERCASE_LETTER :
644*cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_UPALPHA) ?
645*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646*cdf0e10cSrcweir 				TOKEN_ILLEGAL;
647*cdf0e10cSrcweir 		case U_LOWERCASE_LETTER :
648*cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_LOALPHA) ?
649*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650*cdf0e10cSrcweir 				TOKEN_ILLEGAL;
651*cdf0e10cSrcweir 		case U_TITLECASE_LETTER :
652*cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
653*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654*cdf0e10cSrcweir 				TOKEN_ILLEGAL;
655*cdf0e10cSrcweir 		case U_MODIFIER_LETTER :
656*cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
657*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
658*cdf0e10cSrcweir 				TOKEN_ILLEGAL;
659*cdf0e10cSrcweir 		case U_NON_SPACING_MARK :
660*cdf0e10cSrcweir         case U_COMBINING_SPACING_MARK :
661*cdf0e10cSrcweir             // Non_Spacing_Mark can't be a leading character,
662*cdf0e10cSrcweir             // nor can a spacing combining mark.
663*cdf0e10cSrcweir             if (bStart)
664*cdf0e10cSrcweir                 return TOKEN_ILLEGAL;
665*cdf0e10cSrcweir 			// fall through, treat it as Other_Letter.
666*cdf0e10cSrcweir 		case U_OTHER_LETTER :
667*cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
668*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669*cdf0e10cSrcweir 				TOKEN_ILLEGAL;
670*cdf0e10cSrcweir 		case U_DECIMAL_DIGIT_NUMBER :
671*cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_DIGIT) ?
672*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673*cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674*cdf0e10cSrcweir 		case U_LETTER_NUMBER :
675*cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
676*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677*cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678*cdf0e10cSrcweir 		case U_OTHER_NUMBER :
679*cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
680*cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
681*cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
682*cdf0e10cSrcweir 		case U_SPACE_SEPARATOR :
683*cdf0e10cSrcweir 			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
684*cdf0e10cSrcweir 				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
685*cdf0e10cSrcweir 	}
686*cdf0e10cSrcweir 
687*cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
688*cdf0e10cSrcweir }
689*cdf0e10cSrcweir 
690*cdf0e10cSrcweir 
691*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
692*cdf0e10cSrcweir {
693*cdf0e10cSrcweir 	if ( pStart )
694*cdf0e10cSrcweir 	{
695*cdf0e10cSrcweir         const sal_Unicode* pStr = aStartChars.getStr();
696*cdf0e10cSrcweir 		const sal_Unicode* p = StrChr( pStr, c );
697*cdf0e10cSrcweir 		if ( p )
698*cdf0e10cSrcweir 			return pStart[ p - pStr ];
699*cdf0e10cSrcweir 	}
700*cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
701*cdf0e10cSrcweir }
702*cdf0e10cSrcweir 
703*cdf0e10cSrcweir 
704*cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
705*cdf0e10cSrcweir {
706*cdf0e10cSrcweir 	if ( pCont )
707*cdf0e10cSrcweir 	{
708*cdf0e10cSrcweir         const sal_Unicode* pStr = aContChars.getStr();
709*cdf0e10cSrcweir 		const sal_Unicode* p = StrChr( pStr, c );
710*cdf0e10cSrcweir 		if ( p )
711*cdf0e10cSrcweir 			return pCont[ p - pStr ];
712*cdf0e10cSrcweir 	}
713*cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
714*cdf0e10cSrcweir }
715*cdf0e10cSrcweir 
716*cdf0e10cSrcweir 
717*cdf0e10cSrcweir void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
718*cdf0e10cSrcweir {
719*cdf0e10cSrcweir 	using namespace i18n;
720*cdf0e10cSrcweir 	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
721*cdf0e10cSrcweir 	eState = ssGetChar;
722*cdf0e10cSrcweir 
723*cdf0e10cSrcweir     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
724*cdf0e10cSrcweir 	const sal_Unicode* pSym = pTextStart;
725*cdf0e10cSrcweir 	const sal_Unicode* pSrc = pSym;
726*cdf0e10cSrcweir 	OUString aSymbol;
727*cdf0e10cSrcweir 	sal_Unicode c = *pSrc;
728*cdf0e10cSrcweir 	sal_Unicode cLast = 0;
729*cdf0e10cSrcweir     int nDecSeps = 0;
730*cdf0e10cSrcweir 	bool bQuote = false;
731*cdf0e10cSrcweir 	bool bMightBeWord = true;
732*cdf0e10cSrcweir 	bool bMightBeWordLast = true;
733*cdf0e10cSrcweir     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
734*cdf0e10cSrcweir 
735*cdf0e10cSrcweir 	while ( (c != 0) && (eState != ssStop) )
736*cdf0e10cSrcweir 	{
737*cdf0e10cSrcweir 		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
738*cdf0e10cSrcweir 		if ( nMask & TOKEN_EXCLUDED )
739*cdf0e10cSrcweir 			eState = ssBounce;
740*cdf0e10cSrcweir 		if ( bMightBeWord )
741*cdf0e10cSrcweir 		{	// only relevant for ssGetValue fall back
742*cdf0e10cSrcweir 			if ( eState == ssGetChar || eState == ssRewindFromValue ||
743*cdf0e10cSrcweir                     eState == ssIgnoreLeadingInRewind )
744*cdf0e10cSrcweir 				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
745*cdf0e10cSrcweir 			else
746*cdf0e10cSrcweir 				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
747*cdf0e10cSrcweir 		}
748*cdf0e10cSrcweir 		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
749*cdf0e10cSrcweir 		pSrc++;
750*cdf0e10cSrcweir 		switch (eState)
751*cdf0e10cSrcweir 		{
752*cdf0e10cSrcweir 			case ssGetChar :
753*cdf0e10cSrcweir             case ssRewindFromValue :
754*cdf0e10cSrcweir             case ssIgnoreLeadingInRewind :
755*cdf0e10cSrcweir 			{
756*cdf0e10cSrcweir                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
757*cdf0e10cSrcweir                         && eState != ssIgnoreLeadingInRewind )
758*cdf0e10cSrcweir 				{	//! must be first, may fall back to ssGetWord via bMightBeWord
759*cdf0e10cSrcweir 					eState = ssGetValue;
760*cdf0e10cSrcweir 					if ( nMask & TOKEN_VALUE_DIGIT )
761*cdf0e10cSrcweir                     {
762*cdf0e10cSrcweir                         if ( 128 <= c )
763*cdf0e10cSrcweir                             r.TokenType = KParseType::UNI_NUMBER;
764*cdf0e10cSrcweir                         else
765*cdf0e10cSrcweir                             r.TokenType = KParseType::ASC_NUMBER;
766*cdf0e10cSrcweir                     }
767*cdf0e10cSrcweir                     else if ( c == cDecimalSep )
768*cdf0e10cSrcweir                     {
769*cdf0e10cSrcweir                         if ( *pSrc )
770*cdf0e10cSrcweir                             ++nDecSeps;
771*cdf0e10cSrcweir                         else
772*cdf0e10cSrcweir                             eState = ssRewindFromValue;
773*cdf0e10cSrcweir                             // retry for ONE_SINGLE_CHAR or others
774*cdf0e10cSrcweir                     }
775*cdf0e10cSrcweir 				}
776*cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_WORD )
777*cdf0e10cSrcweir 				{
778*cdf0e10cSrcweir 					eState = ssGetWord;
779*cdf0e10cSrcweir 					r.TokenType = KParseType::IDENTNAME;
780*cdf0e10cSrcweir 				}
781*cdf0e10cSrcweir 				else if ( nMask & TOKEN_NAME_SEP )
782*cdf0e10cSrcweir 				{
783*cdf0e10cSrcweir 					eState = ssGetWordFirstChar;
784*cdf0e10cSrcweir 					bQuote = true;
785*cdf0e10cSrcweir 					pSym++;
786*cdf0e10cSrcweir 					nParseTokensType = 0;	// will be taken of first real character
787*cdf0e10cSrcweir 					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
788*cdf0e10cSrcweir 				}
789*cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_STRING )
790*cdf0e10cSrcweir 				{
791*cdf0e10cSrcweir 					eState = ssGetString;
792*cdf0e10cSrcweir 					pSym++;
793*cdf0e10cSrcweir 					nParseTokensType = 0;	// will be taken of first real character
794*cdf0e10cSrcweir 					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
795*cdf0e10cSrcweir 				}
796*cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_DONTCARE )
797*cdf0e10cSrcweir 				{
798*cdf0e10cSrcweir 					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
799*cdf0e10cSrcweir 					{
800*cdf0e10cSrcweir                         if (eState == ssRewindFromValue)
801*cdf0e10cSrcweir                             eState = ssIgnoreLeadingInRewind;
802*cdf0e10cSrcweir 						r.LeadingWhiteSpace++;
803*cdf0e10cSrcweir 						pSym++;
804*cdf0e10cSrcweir 						nParseTokensType = 0;	// wait until real character
805*cdf0e10cSrcweir 						bMightBeWord = true;
806*cdf0e10cSrcweir 					}
807*cdf0e10cSrcweir 					else
808*cdf0e10cSrcweir 						eState = ssBounce;
809*cdf0e10cSrcweir 				}
810*cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_BOOL )
811*cdf0e10cSrcweir 				{
812*cdf0e10cSrcweir 					eState = ssGetBool;
813*cdf0e10cSrcweir 					r.TokenType = KParseType::BOOLEAN;
814*cdf0e10cSrcweir 				}
815*cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR )
816*cdf0e10cSrcweir 				{	//! must be last
817*cdf0e10cSrcweir 					eState = ssStop;
818*cdf0e10cSrcweir 					r.TokenType = KParseType::ONE_SINGLE_CHAR;
819*cdf0e10cSrcweir 				}
820*cdf0e10cSrcweir 				else
821*cdf0e10cSrcweir 					eState = ssBounce;		// not known
822*cdf0e10cSrcweir 			}
823*cdf0e10cSrcweir 			break;
824*cdf0e10cSrcweir 			case ssGetValue :
825*cdf0e10cSrcweir 			{
826*cdf0e10cSrcweir                 if ( nMask & TOKEN_VALUE_DIGIT )
827*cdf0e10cSrcweir                 {
828*cdf0e10cSrcweir                     if ( 128 <= c )
829*cdf0e10cSrcweir                         r.TokenType = KParseType::UNI_NUMBER;
830*cdf0e10cSrcweir                     else if ( r.TokenType != KParseType::UNI_NUMBER )
831*cdf0e10cSrcweir                         r.TokenType = KParseType::ASC_NUMBER;
832*cdf0e10cSrcweir                 }
833*cdf0e10cSrcweir                 if ( nMask & TOKEN_VALUE )
834*cdf0e10cSrcweir                 {
835*cdf0e10cSrcweir                     if ( c == cDecimalSep && ++nDecSeps > 1 )
836*cdf0e10cSrcweir                     {
837*cdf0e10cSrcweir                         if ( pSrc - pTextStart == 2 )
838*cdf0e10cSrcweir                             eState = ssRewindFromValue;
839*cdf0e10cSrcweir                             // consecutive separators
840*cdf0e10cSrcweir                         else
841*cdf0e10cSrcweir                             eState = ssStopBack;
842*cdf0e10cSrcweir                     }
843*cdf0e10cSrcweir                     // else keep it going
844*cdf0e10cSrcweir                 }
845*cdf0e10cSrcweir 				else if ( c == 'E' || c == 'e' )
846*cdf0e10cSrcweir 				{
847*cdf0e10cSrcweir 					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
848*cdf0e10cSrcweir 					if ( nNext & TOKEN_VALUE_EXP )
849*cdf0e10cSrcweir 						;	// keep it going
850*cdf0e10cSrcweir 					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
851*cdf0e10cSrcweir 					{	// might be a numerical name (1.2efg)
852*cdf0e10cSrcweir 						eState = ssGetWord;
853*cdf0e10cSrcweir 						r.TokenType = KParseType::IDENTNAME;
854*cdf0e10cSrcweir 					}
855*cdf0e10cSrcweir 					else
856*cdf0e10cSrcweir 						eState = ssStopBack;
857*cdf0e10cSrcweir 				}
858*cdf0e10cSrcweir 				else if ( nMask & TOKEN_VALUE_SIGN )
859*cdf0e10cSrcweir 				{
860*cdf0e10cSrcweir 					if ( (cLast == 'E') || (cLast == 'e') )
861*cdf0e10cSrcweir 					{
862*cdf0e10cSrcweir 						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
863*cdf0e10cSrcweir 						if ( nNext & TOKEN_VALUE_EXP_VALUE )
864*cdf0e10cSrcweir 							;	// keep it going
865*cdf0e10cSrcweir 						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
866*cdf0e10cSrcweir 						{	// might be a numerical name (1.2e+fg)
867*cdf0e10cSrcweir 							eState = ssGetWord;
868*cdf0e10cSrcweir 							r.TokenType = KParseType::IDENTNAME;
869*cdf0e10cSrcweir 						}
870*cdf0e10cSrcweir 						else
871*cdf0e10cSrcweir 							eState = ssStopBack;
872*cdf0e10cSrcweir 					}
873*cdf0e10cSrcweir 					else if ( bMightBeWord )
874*cdf0e10cSrcweir 					{	// might be a numerical name (1.2+fg)
875*cdf0e10cSrcweir 						eState = ssGetWord;
876*cdf0e10cSrcweir 						r.TokenType = KParseType::IDENTNAME;
877*cdf0e10cSrcweir 					}
878*cdf0e10cSrcweir 					else
879*cdf0e10cSrcweir 						eState = ssStopBack;
880*cdf0e10cSrcweir 				}
881*cdf0e10cSrcweir 				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
882*cdf0e10cSrcweir 				{	// might be a numerical name (1995.A1)
883*cdf0e10cSrcweir 					eState = ssGetWord;
884*cdf0e10cSrcweir 					r.TokenType = KParseType::IDENTNAME;
885*cdf0e10cSrcweir 				}
886*cdf0e10cSrcweir 				else
887*cdf0e10cSrcweir 					eState = ssStopBack;
888*cdf0e10cSrcweir 			}
889*cdf0e10cSrcweir 			break;
890*cdf0e10cSrcweir 			case ssGetWordFirstChar :
891*cdf0e10cSrcweir 				eState = ssGetWord;
892*cdf0e10cSrcweir 				// fall thru
893*cdf0e10cSrcweir 			case ssGetWord :
894*cdf0e10cSrcweir 			{
895*cdf0e10cSrcweir 				if ( nMask & TOKEN_WORD )
896*cdf0e10cSrcweir 					;	// keep it going
897*cdf0e10cSrcweir 				else if ( nMask & TOKEN_NAME_SEP )
898*cdf0e10cSrcweir 				{
899*cdf0e10cSrcweir 					if ( bQuote )
900*cdf0e10cSrcweir 					{
901*cdf0e10cSrcweir 						if ( cLast == '\\' )
902*cdf0e10cSrcweir 						{	// escaped
903*cdf0e10cSrcweir 							aSymbol += OUString( pSym, pSrc - pSym - 2 );
904*cdf0e10cSrcweir 							aSymbol += OUString( &c, 1);
905*cdf0e10cSrcweir 						}
906*cdf0e10cSrcweir 						else
907*cdf0e10cSrcweir 						{
908*cdf0e10cSrcweir 							eState = ssStop;
909*cdf0e10cSrcweir 							aSymbol += OUString( pSym, pSrc - pSym - 1 );
910*cdf0e10cSrcweir 						}
911*cdf0e10cSrcweir 						pSym = pSrc;
912*cdf0e10cSrcweir 					}
913*cdf0e10cSrcweir 					else
914*cdf0e10cSrcweir 						eState = ssStopBack;
915*cdf0e10cSrcweir 				}
916*cdf0e10cSrcweir 				else if ( bQuote )
917*cdf0e10cSrcweir 					;	// keep it going
918*cdf0e10cSrcweir 				else
919*cdf0e10cSrcweir 					eState = ssStopBack;
920*cdf0e10cSrcweir 			}
921*cdf0e10cSrcweir 			break;
922*cdf0e10cSrcweir 			case ssGetString :
923*cdf0e10cSrcweir 			{
924*cdf0e10cSrcweir 				if ( nMask & TOKEN_STRING_SEP )
925*cdf0e10cSrcweir 				{
926*cdf0e10cSrcweir 					if ( cLast == '\\' )
927*cdf0e10cSrcweir 					{	// escaped
928*cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym - 2 );
929*cdf0e10cSrcweir 						aSymbol += OUString( &c, 1);
930*cdf0e10cSrcweir 					}
931*cdf0e10cSrcweir                     else if ( c == *pSrc &&
932*cdf0e10cSrcweir                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
933*cdf0e10cSrcweir 					{	// "" => literal " escaped
934*cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym );
935*cdf0e10cSrcweir 						pSrc++;
936*cdf0e10cSrcweir 					}
937*cdf0e10cSrcweir 					else
938*cdf0e10cSrcweir 					{
939*cdf0e10cSrcweir 						eState = ssStop;
940*cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym - 1 );
941*cdf0e10cSrcweir 					}
942*cdf0e10cSrcweir 					pSym = pSrc;
943*cdf0e10cSrcweir 				}
944*cdf0e10cSrcweir 			}
945*cdf0e10cSrcweir 			break;
946*cdf0e10cSrcweir 			case ssGetBool :
947*cdf0e10cSrcweir 			{
948*cdf0e10cSrcweir 				if ( (nMask & TOKEN_BOOL) )
949*cdf0e10cSrcweir 					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
950*cdf0e10cSrcweir 				else
951*cdf0e10cSrcweir 					eState = ssStopBack;
952*cdf0e10cSrcweir 			}
953*cdf0e10cSrcweir 			break;
954*cdf0e10cSrcweir             case ssStopBack :
955*cdf0e10cSrcweir             case ssBounce :
956*cdf0e10cSrcweir             case ssStop :
957*cdf0e10cSrcweir                 ;   // nothing, no compiler warning
958*cdf0e10cSrcweir             break;
959*cdf0e10cSrcweir 		}
960*cdf0e10cSrcweir         if ( eState == ssRewindFromValue )
961*cdf0e10cSrcweir         {
962*cdf0e10cSrcweir             r = ParseResult();
963*cdf0e10cSrcweir             pSym = pTextStart;
964*cdf0e10cSrcweir             pSrc = pSym;
965*cdf0e10cSrcweir             aSymbol = OUString();
966*cdf0e10cSrcweir             c = *pSrc;
967*cdf0e10cSrcweir             cLast = 0;
968*cdf0e10cSrcweir             nDecSeps = 0;
969*cdf0e10cSrcweir             bQuote = false;
970*cdf0e10cSrcweir             bMightBeWord = true;
971*cdf0e10cSrcweir             bMightBeWordLast = true;
972*cdf0e10cSrcweir         }
973*cdf0e10cSrcweir         else
974*cdf0e10cSrcweir         {
975*cdf0e10cSrcweir             if ( !(r.TokenType & nTokenType) )
976*cdf0e10cSrcweir             {
977*cdf0e10cSrcweir                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
978*cdf0e10cSrcweir                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
979*cdf0e10cSrcweir                     ;	// keep a number that might be a word
980*cdf0e10cSrcweir                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
981*cdf0e10cSrcweir                     ;	// keep ignored white space
982*cdf0e10cSrcweir                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
983*cdf0e10cSrcweir                     ;   // keep uncertain value
984*cdf0e10cSrcweir                 else
985*cdf0e10cSrcweir                     eState = ssBounce;
986*cdf0e10cSrcweir             }
987*cdf0e10cSrcweir             if ( eState == ssBounce )
988*cdf0e10cSrcweir             {
989*cdf0e10cSrcweir                 r.TokenType = 0;
990*cdf0e10cSrcweir                 eState = ssStopBack;
991*cdf0e10cSrcweir             }
992*cdf0e10cSrcweir             if ( eState == ssStopBack )
993*cdf0e10cSrcweir             {	// put back
994*cdf0e10cSrcweir                 pSrc--;
995*cdf0e10cSrcweir                 bMightBeWord = bMightBeWordLast;
996*cdf0e10cSrcweir                 eState = ssStop;
997*cdf0e10cSrcweir             }
998*cdf0e10cSrcweir             if ( eState != ssStop )
999*cdf0e10cSrcweir             {
1000*cdf0e10cSrcweir                 if ( !r.StartFlags )
1001*cdf0e10cSrcweir                     r.StartFlags |= nParseTokensType;
1002*cdf0e10cSrcweir                 else
1003*cdf0e10cSrcweir                     r.ContFlags |= nParseTokensType;
1004*cdf0e10cSrcweir             }
1005*cdf0e10cSrcweir             bMightBeWordLast = bMightBeWord;
1006*cdf0e10cSrcweir             cLast = c;
1007*cdf0e10cSrcweir             c = *pSrc;
1008*cdf0e10cSrcweir         }
1009*cdf0e10cSrcweir 	}
1010*cdf0e10cSrcweir 	// r.CharLen is the length in characters (not code points) of the parsed
1011*cdf0e10cSrcweir 	// token not including any leading white space, change this calculation if
1012*cdf0e10cSrcweir 	// multi-code-point Unicode characters are to be supported.
1013*cdf0e10cSrcweir 	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1014*cdf0e10cSrcweir 	r.EndPos = nPos + (pSrc - pTextStart);
1015*cdf0e10cSrcweir 	if ( r.TokenType & KParseType::ASC_NUMBER )
1016*cdf0e10cSrcweir 	{
1017*cdf0e10cSrcweir         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1018*cdf0e10cSrcweir                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1019*cdf0e10cSrcweir 		if ( bMightBeWord )
1020*cdf0e10cSrcweir 			r.TokenType |= KParseType::IDENTNAME;
1021*cdf0e10cSrcweir 	}
1022*cdf0e10cSrcweir 	else if ( r.TokenType & KParseType::UNI_NUMBER )
1023*cdf0e10cSrcweir 	{
1024*cdf0e10cSrcweir         if ( !xNatNumSup.is() )
1025*cdf0e10cSrcweir         {
1026*cdf0e10cSrcweir #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1027*cdf0e10cSrcweir             if ( xMSF.is() )
1028*cdf0e10cSrcweir             {
1029*cdf0e10cSrcweir                 xNatNumSup = Reference< XNativeNumberSupplier > (
1030*cdf0e10cSrcweir                         xMSF->createInstance( OUString(
1031*cdf0e10cSrcweir                                 RTL_CONSTASCII_USTRINGPARAM(
1032*cdf0e10cSrcweir                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1033*cdf0e10cSrcweir                         UNO_QUERY );
1034*cdf0e10cSrcweir             }
1035*cdf0e10cSrcweir             if ( !xNatNumSup.is() )
1036*cdf0e10cSrcweir             {
1037*cdf0e10cSrcweir                 throw RuntimeException( OUString(
1038*cdf0e10cSrcweir #ifdef DBG_UTIL
1039*cdf0e10cSrcweir                     RTL_CONSTASCII_USTRINGPARAM(
1040*cdf0e10cSrcweir                         "cclass_Unicode::parseText: can't instanciate "
1041*cdf0e10cSrcweir                         NATIVENUMBERSUPPLIER_SERVICENAME )
1042*cdf0e10cSrcweir #endif
1043*cdf0e10cSrcweir                     ), *this );
1044*cdf0e10cSrcweir             }
1045*cdf0e10cSrcweir #undef NATIVENUMBERSUPPLIER_SERVICENAME
1046*cdf0e10cSrcweir         }
1047*cdf0e10cSrcweir         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1048*cdf0e10cSrcweir                 r.LeadingWhiteSpace );
1049*cdf0e10cSrcweir         // transliterate to ASCII
1050*cdf0e10cSrcweir         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1051*cdf0e10cSrcweir                 NativeNumberMode::NATNUM0 );
1052*cdf0e10cSrcweir         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1053*cdf0e10cSrcweir 		if ( bMightBeWord )
1054*cdf0e10cSrcweir 			r.TokenType |= KParseType::IDENTNAME;
1055*cdf0e10cSrcweir 	}
1056*cdf0e10cSrcweir 	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1057*cdf0e10cSrcweir 	{
1058*cdf0e10cSrcweir 		if ( pSym < pSrc )
1059*cdf0e10cSrcweir 		{	//! open quote
1060*cdf0e10cSrcweir 			aSymbol += OUString( pSym, pSrc - pSym );
1061*cdf0e10cSrcweir 			r.TokenType |= KParseType::MISSING_QUOTE;
1062*cdf0e10cSrcweir 		}
1063*cdf0e10cSrcweir 		r.DequotedNameOrString = aSymbol;
1064*cdf0e10cSrcweir 	}
1065*cdf0e10cSrcweir }
1066*cdf0e10cSrcweir 
1067*cdf0e10cSrcweir } } } }
1068