1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <cclass_unicode.hxx>
28cdf0e10cSrcweir #include <unicode/uchar.h>
29cdf0e10cSrcweir #include <rtl/math.hxx>
30cdf0e10cSrcweir #include <rtl/ustring.hxx>
31cdf0e10cSrcweir #include <com/sun/star/i18n/KParseTokens.hpp>
32cdf0e10cSrcweir #include <com/sun/star/i18n/KParseType.hpp>
33cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
34cdf0e10cSrcweir #include <com/sun/star/i18n/XLocaleData.hpp>
35cdf0e10cSrcweir #include <com/sun/star/i18n/NativeNumberMode.hpp>
36cdf0e10cSrcweir 
37cdf0e10cSrcweir #include <string.h>		// memcpy()
38cdf0e10cSrcweir 
39cdf0e10cSrcweir using namespace ::com::sun::star::uno;
40cdf0e10cSrcweir using namespace ::com::sun::star::lang;
41cdf0e10cSrcweir using namespace ::rtl;
42cdf0e10cSrcweir 
43cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
44cdf0e10cSrcweir 
45cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL		= 0x00000000;
46cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR			= 0x00000001;
47cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL	= 0x00000002;
48cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD	= 0x00000004;
49cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE	= 0x00000008;
50cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING	= 0x00000010;
51cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
52cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL			= 0x00000040;
53cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD			= 0x00000080;
54cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP		= 0x00000100;
55cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE		= 0x00000200;
56cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP	= 0x00000400;
57cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP	= 0x00000800;
58cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN	= 0x00001000;
59cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE	= 0x00002000;
60cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT	= 0x00004000;
61cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP		= 0x20000000;
62cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP	= 0x40000000;
63cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED		= 0x80000000;
64cdf0e10cSrcweir 
65cdf0e10cSrcweir #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
66cdf0e10cSrcweir 
67cdf0e10cSrcweir // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
68cdf0e10cSrcweir 
69cdf0e10cSrcweir const sal_uInt8 cclass_Unicode::nDefCnt = 128;
70cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
71cdf0e10cSrcweir {
72cdf0e10cSrcweir // (...) == Calc formula compiler specific, commented out and modified
73cdf0e10cSrcweir 
74cdf0e10cSrcweir 	/* \0 */	TOKEN_EXCLUDED,
75cdf0e10cSrcweir 				TOKEN_ILLEGAL,
76cdf0e10cSrcweir 				TOKEN_ILLEGAL,
77cdf0e10cSrcweir 				TOKEN_ILLEGAL,
78cdf0e10cSrcweir 				TOKEN_ILLEGAL,
79cdf0e10cSrcweir 				TOKEN_ILLEGAL,
80cdf0e10cSrcweir 				TOKEN_ILLEGAL,
81cdf0e10cSrcweir 				TOKEN_ILLEGAL,
82cdf0e10cSrcweir 				TOKEN_ILLEGAL,
83cdf0e10cSrcweir 	/*  9 \t */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
84cdf0e10cSrcweir 				TOKEN_ILLEGAL,
85cdf0e10cSrcweir 	/* 11 \v */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,		// (TOKEN_ILLEGAL)
86cdf0e10cSrcweir 				TOKEN_ILLEGAL,
87cdf0e10cSrcweir 				TOKEN_ILLEGAL,
88cdf0e10cSrcweir 				TOKEN_ILLEGAL,
89cdf0e10cSrcweir 				TOKEN_ILLEGAL,
90cdf0e10cSrcweir 				TOKEN_ILLEGAL,
91cdf0e10cSrcweir 				TOKEN_ILLEGAL,
92cdf0e10cSrcweir 				TOKEN_ILLEGAL,
93cdf0e10cSrcweir 				TOKEN_ILLEGAL,
94cdf0e10cSrcweir 				TOKEN_ILLEGAL,
95cdf0e10cSrcweir 				TOKEN_ILLEGAL,
96cdf0e10cSrcweir 				TOKEN_ILLEGAL,
97cdf0e10cSrcweir 				TOKEN_ILLEGAL,
98cdf0e10cSrcweir 				TOKEN_ILLEGAL,
99cdf0e10cSrcweir 				TOKEN_ILLEGAL,
100cdf0e10cSrcweir 				TOKEN_ILLEGAL,
101cdf0e10cSrcweir 				TOKEN_ILLEGAL,
102cdf0e10cSrcweir 				TOKEN_ILLEGAL,
103cdf0e10cSrcweir 				TOKEN_ILLEGAL,
104cdf0e10cSrcweir 				TOKEN_ILLEGAL,
105cdf0e10cSrcweir 				TOKEN_ILLEGAL,
106cdf0e10cSrcweir 	/*  32   */	TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
107cdf0e10cSrcweir 	/*  33 ! */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
108cdf0e10cSrcweir 	/*  34 " */	TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
109cdf0e10cSrcweir 	/*  35 # */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD_SEP)
110cdf0e10cSrcweir 	/*  36 $ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
111cdf0e10cSrcweir 	/*  37 % */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_VALUE)
112cdf0e10cSrcweir 	/*  38 & */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
113cdf0e10cSrcweir 	/*  39 ' */	TOKEN_NAME_SEP,
114cdf0e10cSrcweir 	/*  40 ( */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
115cdf0e10cSrcweir 	/*  41 ) */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
116cdf0e10cSrcweir 	/*  42 * */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117cdf0e10cSrcweir 	/*  43 + */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
118cdf0e10cSrcweir 	/*  44 , */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_VALUE | TOKEN_VALUE)
119cdf0e10cSrcweir 	/*  45 - */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
120cdf0e10cSrcweir 	/*  46 . */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
121cdf0e10cSrcweir 	/*  47 / */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
122cdf0e10cSrcweir 	//for ( i = 48; i < 58; i++ )
123cdf0e10cSrcweir 	/*  48 0 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124cdf0e10cSrcweir 	/*  49 1 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125cdf0e10cSrcweir 	/*  50 2 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126cdf0e10cSrcweir 	/*  51 3 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127cdf0e10cSrcweir 	/*  52 4 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128cdf0e10cSrcweir 	/*  53 5 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129cdf0e10cSrcweir 	/*  54 6 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130cdf0e10cSrcweir 	/*  55 7 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131cdf0e10cSrcweir 	/*  56 8 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132cdf0e10cSrcweir 	/*  57 9 */	TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133cdf0e10cSrcweir 	/*  58 : */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_WORD)
134cdf0e10cSrcweir 	/*  59 ; */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
135cdf0e10cSrcweir 	/*  60 < */	TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
136cdf0e10cSrcweir 	/*  61 = */	TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
137cdf0e10cSrcweir 	/*  62 > */	TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
138cdf0e10cSrcweir 	/*  63 ? */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_CHAR_WORD | TOKEN_WORD)
139cdf0e10cSrcweir 	/*  64 @ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
140cdf0e10cSrcweir 	//for ( i = 65; i < 91; i++ )
141cdf0e10cSrcweir 	/*  65 A */	TOKEN_CHAR_WORD | TOKEN_WORD,
142cdf0e10cSrcweir 	/*  66 B */	TOKEN_CHAR_WORD | TOKEN_WORD,
143cdf0e10cSrcweir 	/*  67 C */	TOKEN_CHAR_WORD | TOKEN_WORD,
144cdf0e10cSrcweir 	/*  68 D */	TOKEN_CHAR_WORD | TOKEN_WORD,
145cdf0e10cSrcweir 	/*  69 E */	TOKEN_CHAR_WORD | TOKEN_WORD,
146cdf0e10cSrcweir 	/*  70 F */	TOKEN_CHAR_WORD | TOKEN_WORD,
147cdf0e10cSrcweir 	/*  71 G */	TOKEN_CHAR_WORD | TOKEN_WORD,
148cdf0e10cSrcweir 	/*  72 H */	TOKEN_CHAR_WORD | TOKEN_WORD,
149cdf0e10cSrcweir 	/*  73 I */	TOKEN_CHAR_WORD | TOKEN_WORD,
150cdf0e10cSrcweir 	/*  74 J */	TOKEN_CHAR_WORD | TOKEN_WORD,
151cdf0e10cSrcweir 	/*  75 K */	TOKEN_CHAR_WORD | TOKEN_WORD,
152cdf0e10cSrcweir 	/*  76 L */	TOKEN_CHAR_WORD | TOKEN_WORD,
153cdf0e10cSrcweir 	/*  77 M */	TOKEN_CHAR_WORD | TOKEN_WORD,
154cdf0e10cSrcweir 	/*  78 N */	TOKEN_CHAR_WORD | TOKEN_WORD,
155cdf0e10cSrcweir 	/*  79 O */	TOKEN_CHAR_WORD | TOKEN_WORD,
156cdf0e10cSrcweir 	/*  80 P */	TOKEN_CHAR_WORD | TOKEN_WORD,
157cdf0e10cSrcweir 	/*  81 Q */	TOKEN_CHAR_WORD | TOKEN_WORD,
158cdf0e10cSrcweir 	/*  82 R */	TOKEN_CHAR_WORD | TOKEN_WORD,
159cdf0e10cSrcweir 	/*  83 S */	TOKEN_CHAR_WORD | TOKEN_WORD,
160cdf0e10cSrcweir 	/*  84 T */	TOKEN_CHAR_WORD | TOKEN_WORD,
161cdf0e10cSrcweir 	/*  85 U */	TOKEN_CHAR_WORD | TOKEN_WORD,
162cdf0e10cSrcweir 	/*  86 V */	TOKEN_CHAR_WORD | TOKEN_WORD,
163cdf0e10cSrcweir 	/*  87 W */	TOKEN_CHAR_WORD | TOKEN_WORD,
164cdf0e10cSrcweir 	/*  88 X */	TOKEN_CHAR_WORD | TOKEN_WORD,
165cdf0e10cSrcweir 	/*  89 Y */	TOKEN_CHAR_WORD | TOKEN_WORD,
166cdf0e10cSrcweir 	/*  90 Z */	TOKEN_CHAR_WORD | TOKEN_WORD,
167cdf0e10cSrcweir 	/*  91 [ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
168cdf0e10cSrcweir 	/*  92 \ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
169cdf0e10cSrcweir 	/*  93 ] */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
170cdf0e10cSrcweir 	/*  94 ^ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
171cdf0e10cSrcweir 	/*  95 _ */	TOKEN_CHAR_WORD | TOKEN_WORD,
172cdf0e10cSrcweir 	/*  96 ` */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
173cdf0e10cSrcweir 	//for ( i = 97; i < 123; i++ )
174cdf0e10cSrcweir 	/*  97 a */	TOKEN_CHAR_WORD | TOKEN_WORD,
175cdf0e10cSrcweir 	/*  98 b */	TOKEN_CHAR_WORD | TOKEN_WORD,
176cdf0e10cSrcweir 	/*  99 c */	TOKEN_CHAR_WORD | TOKEN_WORD,
177cdf0e10cSrcweir 	/* 100 d */	TOKEN_CHAR_WORD | TOKEN_WORD,
178cdf0e10cSrcweir 	/* 101 e */	TOKEN_CHAR_WORD | TOKEN_WORD,
179cdf0e10cSrcweir 	/* 102 f */	TOKEN_CHAR_WORD | TOKEN_WORD,
180cdf0e10cSrcweir 	/* 103 g */	TOKEN_CHAR_WORD | TOKEN_WORD,
181cdf0e10cSrcweir 	/* 104 h */	TOKEN_CHAR_WORD | TOKEN_WORD,
182cdf0e10cSrcweir 	/* 105 i */	TOKEN_CHAR_WORD | TOKEN_WORD,
183cdf0e10cSrcweir 	/* 106 j */	TOKEN_CHAR_WORD | TOKEN_WORD,
184cdf0e10cSrcweir 	/* 107 k */	TOKEN_CHAR_WORD | TOKEN_WORD,
185cdf0e10cSrcweir 	/* 108 l */	TOKEN_CHAR_WORD | TOKEN_WORD,
186cdf0e10cSrcweir 	/* 109 m */	TOKEN_CHAR_WORD | TOKEN_WORD,
187cdf0e10cSrcweir 	/* 110 n */	TOKEN_CHAR_WORD | TOKEN_WORD,
188cdf0e10cSrcweir 	/* 111 o */	TOKEN_CHAR_WORD | TOKEN_WORD,
189cdf0e10cSrcweir 	/* 112 p */	TOKEN_CHAR_WORD | TOKEN_WORD,
190cdf0e10cSrcweir 	/* 113 q */	TOKEN_CHAR_WORD | TOKEN_WORD,
191cdf0e10cSrcweir 	/* 114 r */	TOKEN_CHAR_WORD | TOKEN_WORD,
192cdf0e10cSrcweir 	/* 115 s */	TOKEN_CHAR_WORD | TOKEN_WORD,
193cdf0e10cSrcweir 	/* 116 t */	TOKEN_CHAR_WORD | TOKEN_WORD,
194cdf0e10cSrcweir 	/* 117 u */	TOKEN_CHAR_WORD | TOKEN_WORD,
195cdf0e10cSrcweir 	/* 118 v */	TOKEN_CHAR_WORD | TOKEN_WORD,
196cdf0e10cSrcweir 	/* 119 w */	TOKEN_CHAR_WORD | TOKEN_WORD,
197cdf0e10cSrcweir 	/* 120 x */	TOKEN_CHAR_WORD | TOKEN_WORD,
198cdf0e10cSrcweir 	/* 121 y */	TOKEN_CHAR_WORD | TOKEN_WORD,
199cdf0e10cSrcweir 	/* 122 z */	TOKEN_CHAR_WORD | TOKEN_WORD,
200cdf0e10cSrcweir 	/* 123 { */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
201cdf0e10cSrcweir 	/* 124 | */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
202cdf0e10cSrcweir 	/* 125 } */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
203cdf0e10cSrcweir 	/* 126 ~ */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,	// (TOKEN_ILLEGAL // UNUSED)
204cdf0e10cSrcweir 	/* 127   */	TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP	// (TOKEN_ILLEGAL // UNUSED)
205cdf0e10cSrcweir };
206cdf0e10cSrcweir 
207cdf0e10cSrcweir 
208cdf0e10cSrcweir const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
209cdf0e10cSrcweir {
210cdf0e10cSrcweir 	/* \0 */	KParseTokens::ASC_OTHER,
211cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
212cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
213cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
214cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
215cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
216cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
217cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
218cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
219cdf0e10cSrcweir 	/*  9 \t */	KParseTokens::ASC_CONTROL,
220cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
221cdf0e10cSrcweir 	/* 11 \v */	KParseTokens::ASC_CONTROL,
222cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
223cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
224cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
225cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
226cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
227cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
228cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
229cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
230cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
231cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
232cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
233cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
234cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
235cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
236cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
237cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
238cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
239cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
240cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
241cdf0e10cSrcweir 				KParseTokens::ASC_CONTROL,
242cdf0e10cSrcweir 	/*  32   */	KParseTokens::ASC_OTHER,
243cdf0e10cSrcweir 	/*  33 ! */	KParseTokens::ASC_OTHER,
244cdf0e10cSrcweir 	/*  34 " */	KParseTokens::ASC_OTHER,
245cdf0e10cSrcweir 	/*  35 # */	KParseTokens::ASC_OTHER,
246cdf0e10cSrcweir 	/*  36 $ */	KParseTokens::ASC_DOLLAR,
247cdf0e10cSrcweir 	/*  37 % */	KParseTokens::ASC_OTHER,
248cdf0e10cSrcweir 	/*  38 & */	KParseTokens::ASC_OTHER,
249cdf0e10cSrcweir 	/*  39 ' */	KParseTokens::ASC_OTHER,
250cdf0e10cSrcweir 	/*  40 ( */	KParseTokens::ASC_OTHER,
251cdf0e10cSrcweir 	/*  41 ) */	KParseTokens::ASC_OTHER,
252cdf0e10cSrcweir 	/*  42 * */	KParseTokens::ASC_OTHER,
253cdf0e10cSrcweir 	/*  43 + */	KParseTokens::ASC_OTHER,
254cdf0e10cSrcweir 	/*  44 , */	KParseTokens::ASC_OTHER,
255cdf0e10cSrcweir 	/*  45 - */	KParseTokens::ASC_OTHER,
256cdf0e10cSrcweir 	/*  46 . */	KParseTokens::ASC_DOT,
257cdf0e10cSrcweir 	/*  47 / */	KParseTokens::ASC_OTHER,
258cdf0e10cSrcweir 	//for ( i = 48; i < 58; i++ )
259cdf0e10cSrcweir 	/*  48 0 */	KParseTokens::ASC_DIGIT,
260cdf0e10cSrcweir 	/*  49 1 */	KParseTokens::ASC_DIGIT,
261cdf0e10cSrcweir 	/*  50 2 */	KParseTokens::ASC_DIGIT,
262cdf0e10cSrcweir 	/*  51 3 */	KParseTokens::ASC_DIGIT,
263cdf0e10cSrcweir 	/*  52 4 */	KParseTokens::ASC_DIGIT,
264cdf0e10cSrcweir 	/*  53 5 */	KParseTokens::ASC_DIGIT,
265cdf0e10cSrcweir 	/*  54 6 */	KParseTokens::ASC_DIGIT,
266cdf0e10cSrcweir 	/*  55 7 */	KParseTokens::ASC_DIGIT,
267cdf0e10cSrcweir 	/*  56 8 */	KParseTokens::ASC_DIGIT,
268cdf0e10cSrcweir 	/*  57 9 */	KParseTokens::ASC_DIGIT,
269cdf0e10cSrcweir 	/*  58 : */	KParseTokens::ASC_COLON,
270cdf0e10cSrcweir 	/*  59 ; */	KParseTokens::ASC_OTHER,
271cdf0e10cSrcweir 	/*  60 < */	KParseTokens::ASC_OTHER,
272cdf0e10cSrcweir 	/*  61 = */	KParseTokens::ASC_OTHER,
273cdf0e10cSrcweir 	/*  62 > */	KParseTokens::ASC_OTHER,
274cdf0e10cSrcweir 	/*  63 ? */	KParseTokens::ASC_OTHER,
275cdf0e10cSrcweir 	/*  64 @ */	KParseTokens::ASC_OTHER,
276cdf0e10cSrcweir 	//for ( i = 65; i < 91; i++ )
277cdf0e10cSrcweir 	/*  65 A */	KParseTokens::ASC_UPALPHA,
278cdf0e10cSrcweir 	/*  66 B */	KParseTokens::ASC_UPALPHA,
279cdf0e10cSrcweir 	/*  67 C */	KParseTokens::ASC_UPALPHA,
280cdf0e10cSrcweir 	/*  68 D */	KParseTokens::ASC_UPALPHA,
281cdf0e10cSrcweir 	/*  69 E */	KParseTokens::ASC_UPALPHA,
282cdf0e10cSrcweir 	/*  70 F */	KParseTokens::ASC_UPALPHA,
283cdf0e10cSrcweir 	/*  71 G */	KParseTokens::ASC_UPALPHA,
284cdf0e10cSrcweir 	/*  72 H */	KParseTokens::ASC_UPALPHA,
285cdf0e10cSrcweir 	/*  73 I */	KParseTokens::ASC_UPALPHA,
286cdf0e10cSrcweir 	/*  74 J */	KParseTokens::ASC_UPALPHA,
287cdf0e10cSrcweir 	/*  75 K */	KParseTokens::ASC_UPALPHA,
288cdf0e10cSrcweir 	/*  76 L */	KParseTokens::ASC_UPALPHA,
289cdf0e10cSrcweir 	/*  77 M */	KParseTokens::ASC_UPALPHA,
290cdf0e10cSrcweir 	/*  78 N */	KParseTokens::ASC_UPALPHA,
291cdf0e10cSrcweir 	/*  79 O */	KParseTokens::ASC_UPALPHA,
292cdf0e10cSrcweir 	/*  80 P */	KParseTokens::ASC_UPALPHA,
293cdf0e10cSrcweir 	/*  81 Q */	KParseTokens::ASC_UPALPHA,
294cdf0e10cSrcweir 	/*  82 R */	KParseTokens::ASC_UPALPHA,
295cdf0e10cSrcweir 	/*  83 S */	KParseTokens::ASC_UPALPHA,
296cdf0e10cSrcweir 	/*  84 T */	KParseTokens::ASC_UPALPHA,
297cdf0e10cSrcweir 	/*  85 U */	KParseTokens::ASC_UPALPHA,
298cdf0e10cSrcweir 	/*  86 V */	KParseTokens::ASC_UPALPHA,
299cdf0e10cSrcweir 	/*  87 W */	KParseTokens::ASC_UPALPHA,
300cdf0e10cSrcweir 	/*  88 X */	KParseTokens::ASC_UPALPHA,
301cdf0e10cSrcweir 	/*  89 Y */	KParseTokens::ASC_UPALPHA,
302cdf0e10cSrcweir 	/*  90 Z */	KParseTokens::ASC_UPALPHA,
303cdf0e10cSrcweir 	/*  91 [ */	KParseTokens::ASC_OTHER,
304cdf0e10cSrcweir 	/*  92 \ */	KParseTokens::ASC_OTHER,
305cdf0e10cSrcweir 	/*  93 ] */	KParseTokens::ASC_OTHER,
306cdf0e10cSrcweir 	/*  94 ^ */	KParseTokens::ASC_OTHER,
307cdf0e10cSrcweir 	/*  95 _ */	KParseTokens::ASC_UNDERSCORE,
308cdf0e10cSrcweir 	/*  96 ` */	KParseTokens::ASC_OTHER,
309cdf0e10cSrcweir 	//for ( i = 97; i < 123; i++ )
310cdf0e10cSrcweir 	/*  97 a */	KParseTokens::ASC_LOALPHA,
311cdf0e10cSrcweir 	/*  98 b */	KParseTokens::ASC_LOALPHA,
312cdf0e10cSrcweir 	/*  99 c */	KParseTokens::ASC_LOALPHA,
313cdf0e10cSrcweir 	/* 100 d */	KParseTokens::ASC_LOALPHA,
314cdf0e10cSrcweir 	/* 101 e */	KParseTokens::ASC_LOALPHA,
315cdf0e10cSrcweir 	/* 102 f */	KParseTokens::ASC_LOALPHA,
316cdf0e10cSrcweir 	/* 103 g */	KParseTokens::ASC_LOALPHA,
317cdf0e10cSrcweir 	/* 104 h */	KParseTokens::ASC_LOALPHA,
318cdf0e10cSrcweir 	/* 105 i */	KParseTokens::ASC_LOALPHA,
319cdf0e10cSrcweir 	/* 106 j */	KParseTokens::ASC_LOALPHA,
320cdf0e10cSrcweir 	/* 107 k */	KParseTokens::ASC_LOALPHA,
321cdf0e10cSrcweir 	/* 108 l */	KParseTokens::ASC_LOALPHA,
322cdf0e10cSrcweir 	/* 109 m */	KParseTokens::ASC_LOALPHA,
323cdf0e10cSrcweir 	/* 110 n */	KParseTokens::ASC_LOALPHA,
324cdf0e10cSrcweir 	/* 111 o */	KParseTokens::ASC_LOALPHA,
325cdf0e10cSrcweir 	/* 112 p */	KParseTokens::ASC_LOALPHA,
326cdf0e10cSrcweir 	/* 113 q */	KParseTokens::ASC_LOALPHA,
327cdf0e10cSrcweir 	/* 114 r */	KParseTokens::ASC_LOALPHA,
328cdf0e10cSrcweir 	/* 115 s */	KParseTokens::ASC_LOALPHA,
329cdf0e10cSrcweir 	/* 116 t */	KParseTokens::ASC_LOALPHA,
330cdf0e10cSrcweir 	/* 117 u */	KParseTokens::ASC_LOALPHA,
331cdf0e10cSrcweir 	/* 118 v */	KParseTokens::ASC_LOALPHA,
332cdf0e10cSrcweir 	/* 119 w */	KParseTokens::ASC_LOALPHA,
333cdf0e10cSrcweir 	/* 120 x */	KParseTokens::ASC_LOALPHA,
334cdf0e10cSrcweir 	/* 121 y */	KParseTokens::ASC_LOALPHA,
335cdf0e10cSrcweir 	/* 122 z */	KParseTokens::ASC_LOALPHA,
336cdf0e10cSrcweir 	/* 123 { */	KParseTokens::ASC_OTHER,
337cdf0e10cSrcweir 	/* 124 | */	KParseTokens::ASC_OTHER,
338cdf0e10cSrcweir 	/* 125 } */	KParseTokens::ASC_OTHER,
339cdf0e10cSrcweir 	/* 126 ~ */	KParseTokens::ASC_OTHER,
340cdf0e10cSrcweir 	/* 127   */	KParseTokens::ASC_OTHER
341cdf0e10cSrcweir };
342cdf0e10cSrcweir 
343cdf0e10cSrcweir 
344cdf0e10cSrcweir // static
StrChr(const sal_Unicode * pStr,sal_Unicode c)345cdf0e10cSrcweir const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
346cdf0e10cSrcweir {
347cdf0e10cSrcweir 	if ( !pStr )
348cdf0e10cSrcweir 		return NULL;
349cdf0e10cSrcweir 	while ( *pStr )
350cdf0e10cSrcweir 	{
351cdf0e10cSrcweir 		if ( *pStr == c )
352cdf0e10cSrcweir 			return pStr;
353cdf0e10cSrcweir 		pStr++;
354cdf0e10cSrcweir 	}
355cdf0e10cSrcweir 	return NULL;
356cdf0e10cSrcweir }
357cdf0e10cSrcweir 
358cdf0e10cSrcweir 
getParseTokensType(const sal_Unicode * aStr,sal_Int32 nPos)359cdf0e10cSrcweir sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
360cdf0e10cSrcweir {
361cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
362cdf0e10cSrcweir 	if ( c < nDefCnt )
363cdf0e10cSrcweir 		return pParseTokensType[ sal_uInt8(c) ];
364cdf0e10cSrcweir 	else
365cdf0e10cSrcweir 	{
366cdf0e10cSrcweir 
367cdf0e10cSrcweir 		//! all KParseTokens::UNI_... must be matched
368cdf0e10cSrcweir         switch ( u_charType( (sal_uInt32) c ) )
369cdf0e10cSrcweir 		{
370cdf0e10cSrcweir 			case U_UPPERCASE_LETTER :
371cdf0e10cSrcweir 				return KParseTokens::UNI_UPALPHA;
372cdf0e10cSrcweir 			case U_LOWERCASE_LETTER :
373cdf0e10cSrcweir 				return KParseTokens::UNI_LOALPHA;
374cdf0e10cSrcweir 			case U_TITLECASE_LETTER :
375cdf0e10cSrcweir 				return KParseTokens::UNI_TITLE_ALPHA;
376cdf0e10cSrcweir 			case U_MODIFIER_LETTER :
377cdf0e10cSrcweir 				return KParseTokens::UNI_MODIFIER_LETTER;
378cdf0e10cSrcweir 			case U_OTHER_LETTER :
379cdf0e10cSrcweir 				// Non_Spacing_Mark could not be as leading character
380cdf0e10cSrcweir 				if (nPos == 0) break;
381cdf0e10cSrcweir 				// fall through, treat it as Other_Letter.
382cdf0e10cSrcweir 			case U_NON_SPACING_MARK :
383cdf0e10cSrcweir 				return KParseTokens::UNI_OTHER_LETTER;
384cdf0e10cSrcweir 			case U_DECIMAL_DIGIT_NUMBER :
385cdf0e10cSrcweir 				return KParseTokens::UNI_DIGIT;
386cdf0e10cSrcweir 			case U_LETTER_NUMBER :
387cdf0e10cSrcweir 				return KParseTokens::UNI_LETTER_NUMBER;
388cdf0e10cSrcweir 			case U_OTHER_NUMBER :
389cdf0e10cSrcweir 				return KParseTokens::UNI_OTHER_NUMBER;
390cdf0e10cSrcweir 		}
391cdf0e10cSrcweir 
392cdf0e10cSrcweir 		return KParseTokens::UNI_OTHER;
393cdf0e10cSrcweir 	}
394cdf0e10cSrcweir }
395cdf0e10cSrcweir 
setupInternational(const Locale & rLocale)396cdf0e10cSrcweir sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
397cdf0e10cSrcweir {
398cdf0e10cSrcweir 	sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
399cdf0e10cSrcweir 		|| aParserLocale.Country != rLocale.Country
400cdf0e10cSrcweir 		|| aParserLocale.Variant != rLocale.Variant);
401cdf0e10cSrcweir 	if ( bChanged )
402cdf0e10cSrcweir 	{
403cdf0e10cSrcweir 		aParserLocale.Language = rLocale.Language;
404cdf0e10cSrcweir 		aParserLocale.Country = rLocale.Country;
405cdf0e10cSrcweir 		aParserLocale.Variant = rLocale.Variant;
406cdf0e10cSrcweir 	}
407cdf0e10cSrcweir 	if ( !xLocaleData.is() && xMSF.is() )
408cdf0e10cSrcweir 	{
409cdf0e10cSrcweir 		Reference <
410cdf0e10cSrcweir 			XInterface > xI =
411cdf0e10cSrcweir 			xMSF->createInstance( OUString(
412cdf0e10cSrcweir 			RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
413cdf0e10cSrcweir 		if ( xI.is() )
414cdf0e10cSrcweir 		{
415cdf0e10cSrcweir 			Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
416cdf0e10cSrcweir 			x >>= xLocaleData;
417cdf0e10cSrcweir 		}
418cdf0e10cSrcweir 	}
419cdf0e10cSrcweir 	return bChanged;
420cdf0e10cSrcweir }
421cdf0e10cSrcweir 
422cdf0e10cSrcweir 
setupParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)423cdf0e10cSrcweir void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
424cdf0e10cSrcweir             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
425cdf0e10cSrcweir             const OUString& userDefinedCharactersCont )
426cdf0e10cSrcweir {
427cdf0e10cSrcweir 	bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
428cdf0e10cSrcweir 		rLocale.Country == aParserLocale.Country &&
429cdf0e10cSrcweir 		rLocale.Variant == aParserLocale.Variant);
430cdf0e10cSrcweir 	if ( !pTable || !bIntlEqual ||
431cdf0e10cSrcweir 			startCharTokenType != nStartTypes ||
432cdf0e10cSrcweir 			contCharTokenType != nContTypes ||
433cdf0e10cSrcweir 			userDefinedCharactersStart != aStartChars ||
434cdf0e10cSrcweir 			userDefinedCharactersCont != aContChars )
435cdf0e10cSrcweir 		initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
436cdf0e10cSrcweir 			contCharTokenType, userDefinedCharactersCont );
437cdf0e10cSrcweir }
438cdf0e10cSrcweir 
439cdf0e10cSrcweir 
initParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)440cdf0e10cSrcweir void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
441cdf0e10cSrcweir             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
442cdf0e10cSrcweir             const OUString& userDefinedCharactersCont )
443cdf0e10cSrcweir {
444cdf0e10cSrcweir 	// (Re)Init
445cdf0e10cSrcweir 	setupInternational( rLocale );
446cdf0e10cSrcweir 	// Memory of pTable is reused.
447cdf0e10cSrcweir 	if ( !pTable )
448cdf0e10cSrcweir 		pTable = new UPT_FLAG_TYPE[nDefCnt];
449cdf0e10cSrcweir 	memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
450cdf0e10cSrcweir 	// Start and cont tables only need reallocation if different length.
451cdf0e10cSrcweir     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
452cdf0e10cSrcweir 	{
453cdf0e10cSrcweir 		delete [] pStart;
454cdf0e10cSrcweir 		pStart = NULL;
455cdf0e10cSrcweir 	}
456cdf0e10cSrcweir     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
457cdf0e10cSrcweir 	{
458cdf0e10cSrcweir 		delete [] pCont;
459cdf0e10cSrcweir 		pCont = NULL;
460cdf0e10cSrcweir 	}
461cdf0e10cSrcweir 	nStartTypes = startCharTokenType;
462cdf0e10cSrcweir 	nContTypes = contCharTokenType;
463cdf0e10cSrcweir 	aStartChars = userDefinedCharactersStart;
464cdf0e10cSrcweir 	aContChars = userDefinedCharactersCont;
465cdf0e10cSrcweir 
466cdf0e10cSrcweir 	// specials
467cdf0e10cSrcweir 	if( xLocaleData.is() )
468cdf0e10cSrcweir 	{
469cdf0e10cSrcweir 		LocaleDataItem aItem =
470cdf0e10cSrcweir 			xLocaleData->getLocaleItem( aParserLocale );
471cdf0e10cSrcweir //!TODO: theoretically separators may be a string, adjustment would have to be
472cdf0e10cSrcweir //! done here and in parsing and in ::rtl::math::stringToDouble()
473cdf0e10cSrcweir 		cGroupSep = aItem.thousandSeparator.getStr()[0];
474cdf0e10cSrcweir         cDecimalSep = aItem.decimalSeparator.getStr()[0];
475cdf0e10cSrcweir 	}
476cdf0e10cSrcweir 
477cdf0e10cSrcweir 	if ( cGroupSep < nDefCnt )
478cdf0e10cSrcweir 		pTable[cGroupSep] |= TOKEN_VALUE;
479cdf0e10cSrcweir 	if ( cDecimalSep < nDefCnt )
480cdf0e10cSrcweir 		pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
481cdf0e10cSrcweir 
482cdf0e10cSrcweir 	// Modify characters according to KParseTokens definitions.
483cdf0e10cSrcweir 	{
484cdf0e10cSrcweir 		using namespace KParseTokens;
485cdf0e10cSrcweir 		sal_uInt8 i;
486cdf0e10cSrcweir 
487cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_UPALPHA) )
488cdf0e10cSrcweir 			for ( i = 65; i < 91; i++ )
489cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
490cdf0e10cSrcweir 		if ( !(nContTypes & ASC_UPALPHA) )
491cdf0e10cSrcweir 			for ( i = 65; i < 91; i++ )
492cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
493cdf0e10cSrcweir 
494cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_LOALPHA) )
495cdf0e10cSrcweir 			for ( i = 97; i < 123; i++ )
496cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_CHAR_WORD;	// not allowed as start character
497cdf0e10cSrcweir 		if ( !(nContTypes & ASC_LOALPHA) )
498cdf0e10cSrcweir 			for ( i = 97; i < 123; i++ )
499cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
500cdf0e10cSrcweir 
501cdf0e10cSrcweir 		if ( nStartTypes & ASC_DIGIT )
502cdf0e10cSrcweir 			for ( i = 48; i < 58; i++ )
503cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
504cdf0e10cSrcweir 		if ( !(nContTypes & ASC_DIGIT) )
505cdf0e10cSrcweir 			for ( i = 48; i < 58; i++ )
506cdf0e10cSrcweir 				pTable[i] &= ~TOKEN_WORD;		// not allowed as cont character
507cdf0e10cSrcweir 
508cdf0e10cSrcweir 		if ( !(nStartTypes & ASC_UNDERSCORE) )
509cdf0e10cSrcweir 			pTable[95] &= ~TOKEN_CHAR_WORD;		// not allowed as start character
510cdf0e10cSrcweir 		if ( !(nContTypes & ASC_UNDERSCORE) )
511cdf0e10cSrcweir 			pTable[95] &= ~TOKEN_WORD;			// not allowed as cont character
512cdf0e10cSrcweir 
513cdf0e10cSrcweir 		if ( nStartTypes & ASC_DOLLAR )
514cdf0e10cSrcweir 			pTable[36] |= TOKEN_CHAR_WORD;		// allowed as start character
515cdf0e10cSrcweir 		if ( nContTypes & ASC_DOLLAR )
516cdf0e10cSrcweir 			pTable[36] |= TOKEN_WORD;			// allowed as cont character
517cdf0e10cSrcweir 
518cdf0e10cSrcweir 		if ( nStartTypes & ASC_DOT )
519cdf0e10cSrcweir 			pTable[46] |= TOKEN_CHAR_WORD;		// allowed as start character
520cdf0e10cSrcweir 		if ( nContTypes & ASC_DOT )
521cdf0e10cSrcweir 			pTable[46] |= TOKEN_WORD;			// allowed as cont character
522cdf0e10cSrcweir 
523cdf0e10cSrcweir 		if ( nStartTypes & ASC_COLON )
524cdf0e10cSrcweir 			pTable[58] |= TOKEN_CHAR_WORD;		// allowed as start character
525cdf0e10cSrcweir 		if ( nContTypes & ASC_COLON )
526cdf0e10cSrcweir 			pTable[58] |= TOKEN_WORD;			// allowed as cont character
527cdf0e10cSrcweir 
528cdf0e10cSrcweir 		if ( nStartTypes & ASC_CONTROL )
529cdf0e10cSrcweir 			for ( i = 1; i < 32; i++ )
530cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
531cdf0e10cSrcweir 		if ( nContTypes & ASC_CONTROL )
532cdf0e10cSrcweir 			for ( i = 1; i < 32; i++ )
533cdf0e10cSrcweir 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
534cdf0e10cSrcweir 
535cdf0e10cSrcweir 		if ( nStartTypes & ASC_ANY_BUT_CONTROL )
536cdf0e10cSrcweir 			for ( i = 32; i < nDefCnt; i++ )
537cdf0e10cSrcweir 				pTable[i] |= TOKEN_CHAR_WORD;	// allowed as start character
538cdf0e10cSrcweir 		if ( nContTypes & ASC_ANY_BUT_CONTROL )
539cdf0e10cSrcweir 			for ( i = 32; i < nDefCnt; i++ )
540cdf0e10cSrcweir 				pTable[i] |= TOKEN_WORD;		// allowed as cont character
541cdf0e10cSrcweir 
542cdf0e10cSrcweir 	}
543cdf0e10cSrcweir 
544cdf0e10cSrcweir 	// Merge in (positively override with) user defined characters.
545cdf0e10cSrcweir 	// StartChars
546cdf0e10cSrcweir     sal_Int32 nLen = aStartChars.getLength();
547cdf0e10cSrcweir 	if ( nLen )
548cdf0e10cSrcweir 	{
549cdf0e10cSrcweir 		if ( !pStart )
550cdf0e10cSrcweir 			pStart = new UPT_FLAG_TYPE[ nLen ];
551cdf0e10cSrcweir         const sal_Unicode* p = aStartChars.getStr();
552cdf0e10cSrcweir         for ( sal_Int32 j=0; j<nLen; j++, p++ )
553cdf0e10cSrcweir 		{
554cdf0e10cSrcweir 			pStart[j] = TOKEN_CHAR_WORD;
555cdf0e10cSrcweir             if ( *p < nDefCnt )
556cdf0e10cSrcweir                 pTable[*p] |= TOKEN_CHAR_WORD;
557cdf0e10cSrcweir 		}
558cdf0e10cSrcweir 	}
559cdf0e10cSrcweir 	// ContChars
560cdf0e10cSrcweir     nLen = aContChars.getLength();
561cdf0e10cSrcweir 	if ( nLen )
562cdf0e10cSrcweir 	{
563cdf0e10cSrcweir 		if ( !pCont )
564cdf0e10cSrcweir 			pCont = new UPT_FLAG_TYPE[ nLen ];
565cdf0e10cSrcweir         const sal_Unicode* p = aContChars.getStr();
566cdf0e10cSrcweir         for ( sal_Int32 j=0; j<nLen; j++ )
567cdf0e10cSrcweir 		{
568cdf0e10cSrcweir 			pCont[j] = TOKEN_WORD;
569cdf0e10cSrcweir             if ( *p < nDefCnt )
570cdf0e10cSrcweir                 pTable[*p] |= TOKEN_WORD;
571cdf0e10cSrcweir 		}
572cdf0e10cSrcweir 	}
573cdf0e10cSrcweir }
574cdf0e10cSrcweir 
575cdf0e10cSrcweir 
destroyParserTable()576cdf0e10cSrcweir void cclass_Unicode::destroyParserTable()
577cdf0e10cSrcweir {
578cdf0e10cSrcweir 	if ( pCont )
579cdf0e10cSrcweir 		delete [] pCont;
580cdf0e10cSrcweir 	if ( pStart )
581cdf0e10cSrcweir 		delete [] pStart;
582cdf0e10cSrcweir 	if ( pTable )
583cdf0e10cSrcweir 		delete [] pTable;
584cdf0e10cSrcweir }
585cdf0e10cSrcweir 
586cdf0e10cSrcweir 
getFlags(const sal_Unicode * aStr,sal_Int32 nPos)587cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
588cdf0e10cSrcweir {
589cdf0e10cSrcweir 	UPT_FLAG_TYPE nMask;
590cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
591cdf0e10cSrcweir 	if ( c < nDefCnt )
592cdf0e10cSrcweir 		nMask = pTable[ sal_uInt8(c) ];
593cdf0e10cSrcweir 	else
594cdf0e10cSrcweir 		nMask = getFlagsExtended( aStr, nPos );
595cdf0e10cSrcweir 	switch ( eState )
596cdf0e10cSrcweir 	{
597cdf0e10cSrcweir 		case ssGetChar :
598cdf0e10cSrcweir         case ssRewindFromValue :
599cdf0e10cSrcweir         case ssIgnoreLeadingInRewind :
600cdf0e10cSrcweir 		case ssGetWordFirstChar :
601cdf0e10cSrcweir 			if ( !(nMask & TOKEN_CHAR_WORD) )
602cdf0e10cSrcweir 			{
603cdf0e10cSrcweir 				nMask |= getStartCharsFlags( c );
604cdf0e10cSrcweir 				if ( nMask & TOKEN_CHAR_WORD )
605cdf0e10cSrcweir 					nMask &= ~TOKEN_EXCLUDED;
606cdf0e10cSrcweir 			}
607cdf0e10cSrcweir 		break;
608cdf0e10cSrcweir 		case ssGetValue :
609cdf0e10cSrcweir 		case ssGetWord :
610cdf0e10cSrcweir 			if ( !(nMask & TOKEN_WORD) )
611cdf0e10cSrcweir 			{
612cdf0e10cSrcweir 				nMask |= getContCharsFlags( c );
613cdf0e10cSrcweir 				if ( nMask & TOKEN_WORD )
614cdf0e10cSrcweir 					nMask &= ~TOKEN_EXCLUDED;
615cdf0e10cSrcweir 			}
616cdf0e10cSrcweir 		break;
617cdf0e10cSrcweir         default:
618cdf0e10cSrcweir             ;   // other cases aren't needed, no compiler warning
619cdf0e10cSrcweir 	}
620cdf0e10cSrcweir 	return nMask;
621cdf0e10cSrcweir }
622cdf0e10cSrcweir 
623cdf0e10cSrcweir 
getFlagsExtended(const sal_Unicode * aStr,sal_Int32 nPos)624cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
625cdf0e10cSrcweir {
626cdf0e10cSrcweir 	sal_Unicode c = aStr[nPos];
627cdf0e10cSrcweir 	if ( c == cGroupSep )
628cdf0e10cSrcweir 		return TOKEN_VALUE;
629cdf0e10cSrcweir 	else if ( c == cDecimalSep )
630cdf0e10cSrcweir 		return TOKEN_CHAR_VALUE | TOKEN_VALUE;
631cdf0e10cSrcweir 	using namespace i18n;
632cdf0e10cSrcweir     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
633cdf0e10cSrcweir             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
634cdf0e10cSrcweir 	sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
635cdf0e10cSrcweir 
636cdf0e10cSrcweir 	//! all KParseTokens::UNI_... must be matched
637cdf0e10cSrcweir     switch ( u_charType( (sal_uInt32) c ) )
638cdf0e10cSrcweir 	{
639cdf0e10cSrcweir 		case U_UPPERCASE_LETTER :
640cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_UPALPHA) ?
641cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
642cdf0e10cSrcweir 				TOKEN_ILLEGAL;
643cdf0e10cSrcweir 		case U_LOWERCASE_LETTER :
644cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_LOALPHA) ?
645cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646cdf0e10cSrcweir 				TOKEN_ILLEGAL;
647cdf0e10cSrcweir 		case U_TITLECASE_LETTER :
648cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
649cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650cdf0e10cSrcweir 				TOKEN_ILLEGAL;
651cdf0e10cSrcweir 		case U_MODIFIER_LETTER :
652cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
653cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654cdf0e10cSrcweir 				TOKEN_ILLEGAL;
655cdf0e10cSrcweir 		case U_NON_SPACING_MARK :
656cdf0e10cSrcweir         case U_COMBINING_SPACING_MARK :
657cdf0e10cSrcweir             // Non_Spacing_Mark can't be a leading character,
658cdf0e10cSrcweir             // nor can a spacing combining mark.
659cdf0e10cSrcweir             if (bStart)
660cdf0e10cSrcweir                 return TOKEN_ILLEGAL;
661cdf0e10cSrcweir 			// fall through, treat it as Other_Letter.
662cdf0e10cSrcweir 		case U_OTHER_LETTER :
663cdf0e10cSrcweir 			return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
664cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
665cdf0e10cSrcweir 				TOKEN_ILLEGAL;
666cdf0e10cSrcweir 		case U_DECIMAL_DIGIT_NUMBER :
667cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_DIGIT) ?
668cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
670cdf0e10cSrcweir 		case U_LETTER_NUMBER :
671cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
672cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674cdf0e10cSrcweir 		case U_OTHER_NUMBER :
675cdf0e10cSrcweir 			return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
676cdf0e10cSrcweir 				(bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677cdf0e10cSrcweir 				TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678cdf0e10cSrcweir 		case U_SPACE_SEPARATOR :
679cdf0e10cSrcweir 			return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
680cdf0e10cSrcweir 				TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
681cdf0e10cSrcweir 	}
682cdf0e10cSrcweir 
683cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
684cdf0e10cSrcweir }
685cdf0e10cSrcweir 
686cdf0e10cSrcweir 
getStartCharsFlags(sal_Unicode c)687cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
688cdf0e10cSrcweir {
689cdf0e10cSrcweir 	if ( pStart )
690cdf0e10cSrcweir 	{
691cdf0e10cSrcweir         const sal_Unicode* pStr = aStartChars.getStr();
692cdf0e10cSrcweir 		const sal_Unicode* p = StrChr( pStr, c );
693cdf0e10cSrcweir 		if ( p )
694cdf0e10cSrcweir 			return pStart[ p - pStr ];
695cdf0e10cSrcweir 	}
696cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
697cdf0e10cSrcweir }
698cdf0e10cSrcweir 
699cdf0e10cSrcweir 
getContCharsFlags(sal_Unicode c)700cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
701cdf0e10cSrcweir {
702cdf0e10cSrcweir 	if ( pCont )
703cdf0e10cSrcweir 	{
704cdf0e10cSrcweir         const sal_Unicode* pStr = aContChars.getStr();
705cdf0e10cSrcweir 		const sal_Unicode* p = StrChr( pStr, c );
706cdf0e10cSrcweir 		if ( p )
707cdf0e10cSrcweir 			return pCont[ p - pStr ];
708cdf0e10cSrcweir 	}
709cdf0e10cSrcweir 	return TOKEN_ILLEGAL;
710cdf0e10cSrcweir }
711cdf0e10cSrcweir 
712cdf0e10cSrcweir 
parseText(ParseResult & r,const OUString & rText,sal_Int32 nPos,sal_Int32 nTokenType)713cdf0e10cSrcweir void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
714cdf0e10cSrcweir {
715cdf0e10cSrcweir 	using namespace i18n;
716cdf0e10cSrcweir 	const sal_Unicode* const pTextStart = rText.getStr() + nPos;
717cdf0e10cSrcweir 	eState = ssGetChar;
718cdf0e10cSrcweir 
719cdf0e10cSrcweir     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
720cdf0e10cSrcweir 	const sal_Unicode* pSym = pTextStart;
721cdf0e10cSrcweir 	const sal_Unicode* pSrc = pSym;
722cdf0e10cSrcweir 	OUString aSymbol;
723cdf0e10cSrcweir 	sal_Unicode c = *pSrc;
724cdf0e10cSrcweir 	sal_Unicode cLast = 0;
725cdf0e10cSrcweir     int nDecSeps = 0;
726cdf0e10cSrcweir 	bool bQuote = false;
727cdf0e10cSrcweir 	bool bMightBeWord = true;
728cdf0e10cSrcweir 	bool bMightBeWordLast = true;
729cdf0e10cSrcweir     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
730cdf0e10cSrcweir 
731cdf0e10cSrcweir 	while ( (c != 0) && (eState != ssStop) )
732cdf0e10cSrcweir 	{
733cdf0e10cSrcweir 		UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
734cdf0e10cSrcweir 		if ( nMask & TOKEN_EXCLUDED )
735cdf0e10cSrcweir 			eState = ssBounce;
736cdf0e10cSrcweir 		if ( bMightBeWord )
737cdf0e10cSrcweir 		{	// only relevant for ssGetValue fall back
738cdf0e10cSrcweir 			if ( eState == ssGetChar || eState == ssRewindFromValue ||
739cdf0e10cSrcweir                     eState == ssIgnoreLeadingInRewind )
740cdf0e10cSrcweir 				bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
741cdf0e10cSrcweir 			else
742cdf0e10cSrcweir 				bMightBeWord = ((nMask & TOKEN_WORD) != 0);
743cdf0e10cSrcweir 		}
744cdf0e10cSrcweir 		sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
745cdf0e10cSrcweir 		pSrc++;
746cdf0e10cSrcweir 		switch (eState)
747cdf0e10cSrcweir 		{
748cdf0e10cSrcweir 			case ssGetChar :
749cdf0e10cSrcweir             case ssRewindFromValue :
750cdf0e10cSrcweir             case ssIgnoreLeadingInRewind :
751cdf0e10cSrcweir 			{
752cdf0e10cSrcweir                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
753cdf0e10cSrcweir                         && eState != ssIgnoreLeadingInRewind )
754cdf0e10cSrcweir 				{	//! must be first, may fall back to ssGetWord via bMightBeWord
755cdf0e10cSrcweir 					eState = ssGetValue;
756cdf0e10cSrcweir 					if ( nMask & TOKEN_VALUE_DIGIT )
757cdf0e10cSrcweir                     {
758cdf0e10cSrcweir                         if ( 128 <= c )
759cdf0e10cSrcweir                             r.TokenType = KParseType::UNI_NUMBER;
760cdf0e10cSrcweir                         else
761cdf0e10cSrcweir                             r.TokenType = KParseType::ASC_NUMBER;
762cdf0e10cSrcweir                     }
763cdf0e10cSrcweir                     else if ( c == cDecimalSep )
764cdf0e10cSrcweir                     {
765cdf0e10cSrcweir                         if ( *pSrc )
766cdf0e10cSrcweir                             ++nDecSeps;
767cdf0e10cSrcweir                         else
768cdf0e10cSrcweir                             eState = ssRewindFromValue;
769cdf0e10cSrcweir                             // retry for ONE_SINGLE_CHAR or others
770cdf0e10cSrcweir                     }
771cdf0e10cSrcweir 				}
772cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_WORD )
773cdf0e10cSrcweir 				{
774cdf0e10cSrcweir 					eState = ssGetWord;
775cdf0e10cSrcweir 					r.TokenType = KParseType::IDENTNAME;
776cdf0e10cSrcweir 				}
777cdf0e10cSrcweir 				else if ( nMask & TOKEN_NAME_SEP )
778cdf0e10cSrcweir 				{
779cdf0e10cSrcweir 					eState = ssGetWordFirstChar;
780cdf0e10cSrcweir 					bQuote = true;
781cdf0e10cSrcweir 					pSym++;
782cdf0e10cSrcweir 					nParseTokensType = 0;	// will be taken of first real character
783cdf0e10cSrcweir 					r.TokenType = KParseType::SINGLE_QUOTE_NAME;
784cdf0e10cSrcweir 				}
785cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_STRING )
786cdf0e10cSrcweir 				{
787cdf0e10cSrcweir 					eState = ssGetString;
788cdf0e10cSrcweir 					pSym++;
789cdf0e10cSrcweir 					nParseTokensType = 0;	// will be taken of first real character
790cdf0e10cSrcweir 					r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
791cdf0e10cSrcweir 				}
792cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_DONTCARE )
793cdf0e10cSrcweir 				{
794cdf0e10cSrcweir 					if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
795cdf0e10cSrcweir 					{
796cdf0e10cSrcweir                         if (eState == ssRewindFromValue)
797cdf0e10cSrcweir                             eState = ssIgnoreLeadingInRewind;
798cdf0e10cSrcweir 						r.LeadingWhiteSpace++;
799cdf0e10cSrcweir 						pSym++;
800cdf0e10cSrcweir 						nParseTokensType = 0;	// wait until real character
801cdf0e10cSrcweir 						bMightBeWord = true;
802cdf0e10cSrcweir 					}
803cdf0e10cSrcweir 					else
804cdf0e10cSrcweir 						eState = ssBounce;
805cdf0e10cSrcweir 				}
806cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR_BOOL )
807cdf0e10cSrcweir 				{
808cdf0e10cSrcweir 					eState = ssGetBool;
809cdf0e10cSrcweir 					r.TokenType = KParseType::BOOLEAN;
810cdf0e10cSrcweir 				}
811cdf0e10cSrcweir 				else if ( nMask & TOKEN_CHAR )
812cdf0e10cSrcweir 				{	//! must be last
813cdf0e10cSrcweir 					eState = ssStop;
814cdf0e10cSrcweir 					r.TokenType = KParseType::ONE_SINGLE_CHAR;
815cdf0e10cSrcweir 				}
816cdf0e10cSrcweir 				else
817cdf0e10cSrcweir 					eState = ssBounce;		// not known
818cdf0e10cSrcweir 			}
819cdf0e10cSrcweir 			break;
820cdf0e10cSrcweir 			case ssGetValue :
821cdf0e10cSrcweir 			{
822cdf0e10cSrcweir                 if ( nMask & TOKEN_VALUE_DIGIT )
823cdf0e10cSrcweir                 {
824cdf0e10cSrcweir                     if ( 128 <= c )
825cdf0e10cSrcweir                         r.TokenType = KParseType::UNI_NUMBER;
826cdf0e10cSrcweir                     else if ( r.TokenType != KParseType::UNI_NUMBER )
827cdf0e10cSrcweir                         r.TokenType = KParseType::ASC_NUMBER;
828cdf0e10cSrcweir                 }
829cdf0e10cSrcweir                 if ( nMask & TOKEN_VALUE )
830cdf0e10cSrcweir                 {
831cdf0e10cSrcweir                     if ( c == cDecimalSep && ++nDecSeps > 1 )
832cdf0e10cSrcweir                     {
833cdf0e10cSrcweir                         if ( pSrc - pTextStart == 2 )
834cdf0e10cSrcweir                             eState = ssRewindFromValue;
835cdf0e10cSrcweir                             // consecutive separators
836cdf0e10cSrcweir                         else
837cdf0e10cSrcweir                             eState = ssStopBack;
838cdf0e10cSrcweir                     }
839cdf0e10cSrcweir                     // else keep it going
840cdf0e10cSrcweir                 }
841cdf0e10cSrcweir 				else if ( c == 'E' || c == 'e' )
842cdf0e10cSrcweir 				{
843cdf0e10cSrcweir 					UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
844cdf0e10cSrcweir 					if ( nNext & TOKEN_VALUE_EXP )
845cdf0e10cSrcweir 						;	// keep it going
846cdf0e10cSrcweir 					else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
847cdf0e10cSrcweir 					{	// might be a numerical name (1.2efg)
848cdf0e10cSrcweir 						eState = ssGetWord;
849cdf0e10cSrcweir 						r.TokenType = KParseType::IDENTNAME;
850cdf0e10cSrcweir 					}
851cdf0e10cSrcweir 					else
852cdf0e10cSrcweir 						eState = ssStopBack;
853cdf0e10cSrcweir 				}
854cdf0e10cSrcweir 				else if ( nMask & TOKEN_VALUE_SIGN )
855cdf0e10cSrcweir 				{
856cdf0e10cSrcweir 					if ( (cLast == 'E') || (cLast == 'e') )
857cdf0e10cSrcweir 					{
858cdf0e10cSrcweir 						UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
859cdf0e10cSrcweir 						if ( nNext & TOKEN_VALUE_EXP_VALUE )
860cdf0e10cSrcweir 							;	// keep it going
861cdf0e10cSrcweir 						else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
862cdf0e10cSrcweir 						{	// might be a numerical name (1.2e+fg)
863cdf0e10cSrcweir 							eState = ssGetWord;
864cdf0e10cSrcweir 							r.TokenType = KParseType::IDENTNAME;
865cdf0e10cSrcweir 						}
866cdf0e10cSrcweir 						else
867cdf0e10cSrcweir 							eState = ssStopBack;
868cdf0e10cSrcweir 					}
869cdf0e10cSrcweir 					else if ( bMightBeWord )
870cdf0e10cSrcweir 					{	// might be a numerical name (1.2+fg)
871cdf0e10cSrcweir 						eState = ssGetWord;
872cdf0e10cSrcweir 						r.TokenType = KParseType::IDENTNAME;
873cdf0e10cSrcweir 					}
874cdf0e10cSrcweir 					else
875cdf0e10cSrcweir 						eState = ssStopBack;
876cdf0e10cSrcweir 				}
877cdf0e10cSrcweir 				else if ( bMightBeWord && (nMask & TOKEN_WORD) )
878cdf0e10cSrcweir 				{	// might be a numerical name (1995.A1)
879cdf0e10cSrcweir 					eState = ssGetWord;
880cdf0e10cSrcweir 					r.TokenType = KParseType::IDENTNAME;
881cdf0e10cSrcweir 				}
882cdf0e10cSrcweir 				else
883cdf0e10cSrcweir 					eState = ssStopBack;
884cdf0e10cSrcweir 			}
885cdf0e10cSrcweir 			break;
886cdf0e10cSrcweir 			case ssGetWordFirstChar :
887cdf0e10cSrcweir 				eState = ssGetWord;
888cdf0e10cSrcweir 				// fall thru
889cdf0e10cSrcweir 			case ssGetWord :
890cdf0e10cSrcweir 			{
891cdf0e10cSrcweir 				if ( nMask & TOKEN_WORD )
892cdf0e10cSrcweir 					;	// keep it going
893cdf0e10cSrcweir 				else if ( nMask & TOKEN_NAME_SEP )
894cdf0e10cSrcweir 				{
895cdf0e10cSrcweir 					if ( bQuote )
896cdf0e10cSrcweir 					{
897cdf0e10cSrcweir 						if ( cLast == '\\' )
898cdf0e10cSrcweir 						{	// escaped
899cdf0e10cSrcweir 							aSymbol += OUString( pSym, pSrc - pSym - 2 );
900cdf0e10cSrcweir 							aSymbol += OUString( &c, 1);
901cdf0e10cSrcweir 						}
902cdf0e10cSrcweir 						else
903cdf0e10cSrcweir 						{
904cdf0e10cSrcweir 							eState = ssStop;
905cdf0e10cSrcweir 							aSymbol += OUString( pSym, pSrc - pSym - 1 );
906cdf0e10cSrcweir 						}
907cdf0e10cSrcweir 						pSym = pSrc;
908cdf0e10cSrcweir 					}
909cdf0e10cSrcweir 					else
910cdf0e10cSrcweir 						eState = ssStopBack;
911cdf0e10cSrcweir 				}
912cdf0e10cSrcweir 				else if ( bQuote )
913cdf0e10cSrcweir 					;	// keep it going
914cdf0e10cSrcweir 				else
915cdf0e10cSrcweir 					eState = ssStopBack;
916cdf0e10cSrcweir 			}
917cdf0e10cSrcweir 			break;
918cdf0e10cSrcweir 			case ssGetString :
919cdf0e10cSrcweir 			{
920cdf0e10cSrcweir 				if ( nMask & TOKEN_STRING_SEP )
921cdf0e10cSrcweir 				{
922cdf0e10cSrcweir 					if ( cLast == '\\' )
923cdf0e10cSrcweir 					{	// escaped
924cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym - 2 );
925cdf0e10cSrcweir 						aSymbol += OUString( &c, 1);
926cdf0e10cSrcweir 					}
927cdf0e10cSrcweir                     else if ( c == *pSrc &&
928cdf0e10cSrcweir                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
929cdf0e10cSrcweir 					{	// "" => literal " escaped
930cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym );
931cdf0e10cSrcweir 						pSrc++;
932cdf0e10cSrcweir 					}
933cdf0e10cSrcweir 					else
934cdf0e10cSrcweir 					{
935cdf0e10cSrcweir 						eState = ssStop;
936cdf0e10cSrcweir 						aSymbol += OUString( pSym, pSrc - pSym - 1 );
937cdf0e10cSrcweir 					}
938cdf0e10cSrcweir 					pSym = pSrc;
939cdf0e10cSrcweir 				}
940cdf0e10cSrcweir 			}
941cdf0e10cSrcweir 			break;
942cdf0e10cSrcweir 			case ssGetBool :
943cdf0e10cSrcweir 			{
944cdf0e10cSrcweir 				if ( (nMask & TOKEN_BOOL) )
945cdf0e10cSrcweir 					eState = ssStop;	// maximum 2: <, >, <>, <=, >=
946cdf0e10cSrcweir 				else
947cdf0e10cSrcweir 					eState = ssStopBack;
948cdf0e10cSrcweir 			}
949cdf0e10cSrcweir 			break;
950cdf0e10cSrcweir             case ssStopBack :
951cdf0e10cSrcweir             case ssBounce :
952cdf0e10cSrcweir             case ssStop :
953cdf0e10cSrcweir                 ;   // nothing, no compiler warning
954cdf0e10cSrcweir             break;
955cdf0e10cSrcweir 		}
956cdf0e10cSrcweir         if ( eState == ssRewindFromValue )
957cdf0e10cSrcweir         {
958cdf0e10cSrcweir             r = ParseResult();
959cdf0e10cSrcweir             pSym = pTextStart;
960cdf0e10cSrcweir             pSrc = pSym;
961cdf0e10cSrcweir             aSymbol = OUString();
962cdf0e10cSrcweir             c = *pSrc;
963cdf0e10cSrcweir             cLast = 0;
964cdf0e10cSrcweir             nDecSeps = 0;
965cdf0e10cSrcweir             bQuote = false;
966cdf0e10cSrcweir             bMightBeWord = true;
967cdf0e10cSrcweir             bMightBeWordLast = true;
968cdf0e10cSrcweir         }
969cdf0e10cSrcweir         else
970cdf0e10cSrcweir         {
971cdf0e10cSrcweir             if ( !(r.TokenType & nTokenType) )
972cdf0e10cSrcweir             {
973cdf0e10cSrcweir                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
974cdf0e10cSrcweir                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
975cdf0e10cSrcweir                     ;	// keep a number that might be a word
976cdf0e10cSrcweir                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
977cdf0e10cSrcweir                     ;	// keep ignored white space
978cdf0e10cSrcweir                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
979cdf0e10cSrcweir                     ;   // keep uncertain value
980cdf0e10cSrcweir                 else
981cdf0e10cSrcweir                     eState = ssBounce;
982cdf0e10cSrcweir             }
983cdf0e10cSrcweir             if ( eState == ssBounce )
984cdf0e10cSrcweir             {
985cdf0e10cSrcweir                 r.TokenType = 0;
986cdf0e10cSrcweir                 eState = ssStopBack;
987cdf0e10cSrcweir             }
988cdf0e10cSrcweir             if ( eState == ssStopBack )
989cdf0e10cSrcweir             {	// put back
990cdf0e10cSrcweir                 pSrc--;
991cdf0e10cSrcweir                 bMightBeWord = bMightBeWordLast;
992cdf0e10cSrcweir                 eState = ssStop;
993cdf0e10cSrcweir             }
994cdf0e10cSrcweir             if ( eState != ssStop )
995cdf0e10cSrcweir             {
996cdf0e10cSrcweir                 if ( !r.StartFlags )
997cdf0e10cSrcweir                     r.StartFlags |= nParseTokensType;
998cdf0e10cSrcweir                 else
999cdf0e10cSrcweir                     r.ContFlags |= nParseTokensType;
1000cdf0e10cSrcweir             }
1001cdf0e10cSrcweir             bMightBeWordLast = bMightBeWord;
1002cdf0e10cSrcweir             cLast = c;
1003cdf0e10cSrcweir             c = *pSrc;
1004cdf0e10cSrcweir         }
1005cdf0e10cSrcweir 	}
1006cdf0e10cSrcweir 	// r.CharLen is the length in characters (not code points) of the parsed
1007cdf0e10cSrcweir 	// token not including any leading white space, change this calculation if
1008cdf0e10cSrcweir 	// multi-code-point Unicode characters are to be supported.
1009cdf0e10cSrcweir 	r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1010cdf0e10cSrcweir 	r.EndPos = nPos + (pSrc - pTextStart);
1011cdf0e10cSrcweir 	if ( r.TokenType & KParseType::ASC_NUMBER )
1012cdf0e10cSrcweir 	{
1013cdf0e10cSrcweir         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1014cdf0e10cSrcweir                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1015cdf0e10cSrcweir 		if ( bMightBeWord )
1016cdf0e10cSrcweir 			r.TokenType |= KParseType::IDENTNAME;
1017cdf0e10cSrcweir 	}
1018cdf0e10cSrcweir 	else if ( r.TokenType & KParseType::UNI_NUMBER )
1019cdf0e10cSrcweir 	{
1020cdf0e10cSrcweir         if ( !xNatNumSup.is() )
1021cdf0e10cSrcweir         {
1022cdf0e10cSrcweir #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1023cdf0e10cSrcweir             if ( xMSF.is() )
1024cdf0e10cSrcweir             {
1025cdf0e10cSrcweir                 xNatNumSup = Reference< XNativeNumberSupplier > (
1026cdf0e10cSrcweir                         xMSF->createInstance( OUString(
1027cdf0e10cSrcweir                                 RTL_CONSTASCII_USTRINGPARAM(
1028cdf0e10cSrcweir                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1029cdf0e10cSrcweir                         UNO_QUERY );
1030cdf0e10cSrcweir             }
1031cdf0e10cSrcweir             if ( !xNatNumSup.is() )
1032cdf0e10cSrcweir             {
1033cdf0e10cSrcweir                 throw RuntimeException( OUString(
1034cdf0e10cSrcweir #ifdef DBG_UTIL
1035cdf0e10cSrcweir                     RTL_CONSTASCII_USTRINGPARAM(
1036cdf0e10cSrcweir                         "cclass_Unicode::parseText: can't instanciate "
1037cdf0e10cSrcweir                         NATIVENUMBERSUPPLIER_SERVICENAME )
1038cdf0e10cSrcweir #endif
1039cdf0e10cSrcweir                     ), *this );
1040cdf0e10cSrcweir             }
1041cdf0e10cSrcweir #undef NATIVENUMBERSUPPLIER_SERVICENAME
1042cdf0e10cSrcweir         }
1043cdf0e10cSrcweir         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1044cdf0e10cSrcweir                 r.LeadingWhiteSpace );
1045cdf0e10cSrcweir         // transliterate to ASCII
1046cdf0e10cSrcweir         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1047cdf0e10cSrcweir                 NativeNumberMode::NATNUM0 );
1048cdf0e10cSrcweir         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1049cdf0e10cSrcweir 		if ( bMightBeWord )
1050cdf0e10cSrcweir 			r.TokenType |= KParseType::IDENTNAME;
1051cdf0e10cSrcweir 	}
1052cdf0e10cSrcweir 	else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1053cdf0e10cSrcweir 	{
1054cdf0e10cSrcweir 		if ( pSym < pSrc )
1055cdf0e10cSrcweir 		{	//! open quote
1056cdf0e10cSrcweir 			aSymbol += OUString( pSym, pSrc - pSym );
1057cdf0e10cSrcweir 			r.TokenType |= KParseType::MISSING_QUOTE;
1058cdf0e10cSrcweir 		}
1059cdf0e10cSrcweir 		r.DequotedNameOrString = aSymbol;
1060cdf0e10cSrcweir 	}
1061cdf0e10cSrcweir }
1062cdf0e10cSrcweir 
1063cdf0e10cSrcweir } } } }
1064