1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*449ab281SAndrew Rist * distributed with this work for additional information
6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at
10*449ab281SAndrew Rist *
11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist *
13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist * software distributed under the License is distributed on an
15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the
17*449ab281SAndrew Rist * specific language governing permissions and limitations
18*449ab281SAndrew Rist * under the License.
19*449ab281SAndrew Rist *
20*449ab281SAndrew Rist *************************************************************/
21*449ab281SAndrew Rist
22*449ab281SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #include <cclass_unicode.hxx>
28cdf0e10cSrcweir #include <unicode/uchar.h>
29cdf0e10cSrcweir #include <rtl/math.hxx>
30cdf0e10cSrcweir #include <rtl/ustring.hxx>
31cdf0e10cSrcweir #include <com/sun/star/i18n/KParseTokens.hpp>
32cdf0e10cSrcweir #include <com/sun/star/i18n/KParseType.hpp>
33cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
34cdf0e10cSrcweir #include <com/sun/star/i18n/XLocaleData.hpp>
35cdf0e10cSrcweir #include <com/sun/star/i18n/NativeNumberMode.hpp>
36cdf0e10cSrcweir
37cdf0e10cSrcweir #include <string.h> // memcpy()
38cdf0e10cSrcweir
39cdf0e10cSrcweir using namespace ::com::sun::star::uno;
40cdf0e10cSrcweir using namespace ::com::sun::star::lang;
41cdf0e10cSrcweir using namespace ::rtl;
42cdf0e10cSrcweir
43cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
44cdf0e10cSrcweir
45cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000;
46cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001;
47cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002;
48cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004;
49cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008;
50cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010;
51cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
52cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040;
53cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080;
54cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100;
55cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200;
56cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400;
57cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800;
58cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000;
59cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000;
60cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000;
61cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000;
62cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000;
63cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000;
64cdf0e10cSrcweir
65cdf0e10cSrcweir #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
66cdf0e10cSrcweir
67cdf0e10cSrcweir // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
68cdf0e10cSrcweir
69cdf0e10cSrcweir const sal_uInt8 cclass_Unicode::nDefCnt = 128;
70cdf0e10cSrcweir const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
71cdf0e10cSrcweir {
72cdf0e10cSrcweir // (...) == Calc formula compiler specific, commented out and modified
73cdf0e10cSrcweir
74cdf0e10cSrcweir /* \0 */ TOKEN_EXCLUDED,
75cdf0e10cSrcweir TOKEN_ILLEGAL,
76cdf0e10cSrcweir TOKEN_ILLEGAL,
77cdf0e10cSrcweir TOKEN_ILLEGAL,
78cdf0e10cSrcweir TOKEN_ILLEGAL,
79cdf0e10cSrcweir TOKEN_ILLEGAL,
80cdf0e10cSrcweir TOKEN_ILLEGAL,
81cdf0e10cSrcweir TOKEN_ILLEGAL,
82cdf0e10cSrcweir TOKEN_ILLEGAL,
83cdf0e10cSrcweir /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
84cdf0e10cSrcweir TOKEN_ILLEGAL,
85cdf0e10cSrcweir /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
86cdf0e10cSrcweir TOKEN_ILLEGAL,
87cdf0e10cSrcweir TOKEN_ILLEGAL,
88cdf0e10cSrcweir TOKEN_ILLEGAL,
89cdf0e10cSrcweir TOKEN_ILLEGAL,
90cdf0e10cSrcweir TOKEN_ILLEGAL,
91cdf0e10cSrcweir TOKEN_ILLEGAL,
92cdf0e10cSrcweir TOKEN_ILLEGAL,
93cdf0e10cSrcweir TOKEN_ILLEGAL,
94cdf0e10cSrcweir TOKEN_ILLEGAL,
95cdf0e10cSrcweir TOKEN_ILLEGAL,
96cdf0e10cSrcweir TOKEN_ILLEGAL,
97cdf0e10cSrcweir TOKEN_ILLEGAL,
98cdf0e10cSrcweir TOKEN_ILLEGAL,
99cdf0e10cSrcweir TOKEN_ILLEGAL,
100cdf0e10cSrcweir TOKEN_ILLEGAL,
101cdf0e10cSrcweir TOKEN_ILLEGAL,
102cdf0e10cSrcweir TOKEN_ILLEGAL,
103cdf0e10cSrcweir TOKEN_ILLEGAL,
104cdf0e10cSrcweir TOKEN_ILLEGAL,
105cdf0e10cSrcweir TOKEN_ILLEGAL,
106cdf0e10cSrcweir /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
107cdf0e10cSrcweir /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
108cdf0e10cSrcweir /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
109cdf0e10cSrcweir /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP)
110cdf0e10cSrcweir /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
111cdf0e10cSrcweir /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE)
112cdf0e10cSrcweir /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
113cdf0e10cSrcweir /* 39 ' */ TOKEN_NAME_SEP,
114cdf0e10cSrcweir /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
115cdf0e10cSrcweir /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
116cdf0e10cSrcweir /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117cdf0e10cSrcweir /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
118cdf0e10cSrcweir /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
119cdf0e10cSrcweir /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
120cdf0e10cSrcweir /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
121cdf0e10cSrcweir /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
122cdf0e10cSrcweir //for ( i = 48; i < 58; i++ )
123cdf0e10cSrcweir /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124cdf0e10cSrcweir /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125cdf0e10cSrcweir /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126cdf0e10cSrcweir /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127cdf0e10cSrcweir /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128cdf0e10cSrcweir /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129cdf0e10cSrcweir /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
130cdf0e10cSrcweir /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131cdf0e10cSrcweir /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132cdf0e10cSrcweir /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133cdf0e10cSrcweir /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD)
134cdf0e10cSrcweir /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
135cdf0e10cSrcweir /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
136cdf0e10cSrcweir /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
137cdf0e10cSrcweir /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
138cdf0e10cSrcweir /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
139cdf0e10cSrcweir /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
140cdf0e10cSrcweir //for ( i = 65; i < 91; i++ )
141cdf0e10cSrcweir /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
142cdf0e10cSrcweir /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
143cdf0e10cSrcweir /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
144cdf0e10cSrcweir /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
145cdf0e10cSrcweir /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
146cdf0e10cSrcweir /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
147cdf0e10cSrcweir /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
148cdf0e10cSrcweir /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
149cdf0e10cSrcweir /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
150cdf0e10cSrcweir /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
151cdf0e10cSrcweir /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
152cdf0e10cSrcweir /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
153cdf0e10cSrcweir /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
154cdf0e10cSrcweir /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
155cdf0e10cSrcweir /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
156cdf0e10cSrcweir /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
157cdf0e10cSrcweir /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
158cdf0e10cSrcweir /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
159cdf0e10cSrcweir /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
160cdf0e10cSrcweir /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
161cdf0e10cSrcweir /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
162cdf0e10cSrcweir /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
163cdf0e10cSrcweir /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
164cdf0e10cSrcweir /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
165cdf0e10cSrcweir /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
166cdf0e10cSrcweir /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
167cdf0e10cSrcweir /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
168cdf0e10cSrcweir /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
169cdf0e10cSrcweir /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
170cdf0e10cSrcweir /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
171cdf0e10cSrcweir /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
172cdf0e10cSrcweir /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
173cdf0e10cSrcweir //for ( i = 97; i < 123; i++ )
174cdf0e10cSrcweir /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
175cdf0e10cSrcweir /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
176cdf0e10cSrcweir /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
177cdf0e10cSrcweir /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
178cdf0e10cSrcweir /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
179cdf0e10cSrcweir /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
180cdf0e10cSrcweir /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
181cdf0e10cSrcweir /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
182cdf0e10cSrcweir /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
183cdf0e10cSrcweir /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
184cdf0e10cSrcweir /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
185cdf0e10cSrcweir /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
186cdf0e10cSrcweir /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
187cdf0e10cSrcweir /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
188cdf0e10cSrcweir /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
189cdf0e10cSrcweir /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
190cdf0e10cSrcweir /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
191cdf0e10cSrcweir /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
192cdf0e10cSrcweir /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
193cdf0e10cSrcweir /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
194cdf0e10cSrcweir /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
195cdf0e10cSrcweir /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
196cdf0e10cSrcweir /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
197cdf0e10cSrcweir /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
198cdf0e10cSrcweir /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
199cdf0e10cSrcweir /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
200cdf0e10cSrcweir /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
201cdf0e10cSrcweir /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
202cdf0e10cSrcweir /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
203cdf0e10cSrcweir /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
204cdf0e10cSrcweir /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED)
205cdf0e10cSrcweir };
206cdf0e10cSrcweir
207cdf0e10cSrcweir
208cdf0e10cSrcweir const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
209cdf0e10cSrcweir {
210cdf0e10cSrcweir /* \0 */ KParseTokens::ASC_OTHER,
211cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
212cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
213cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
214cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
215cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
216cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
217cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
218cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
219cdf0e10cSrcweir /* 9 \t */ KParseTokens::ASC_CONTROL,
220cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
221cdf0e10cSrcweir /* 11 \v */ KParseTokens::ASC_CONTROL,
222cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
223cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
224cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
225cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
226cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
227cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
228cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
229cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
230cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
231cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
232cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
233cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
234cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
235cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
236cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
237cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
238cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
239cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
240cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
241cdf0e10cSrcweir KParseTokens::ASC_CONTROL,
242cdf0e10cSrcweir /* 32 */ KParseTokens::ASC_OTHER,
243cdf0e10cSrcweir /* 33 ! */ KParseTokens::ASC_OTHER,
244cdf0e10cSrcweir /* 34 " */ KParseTokens::ASC_OTHER,
245cdf0e10cSrcweir /* 35 # */ KParseTokens::ASC_OTHER,
246cdf0e10cSrcweir /* 36 $ */ KParseTokens::ASC_DOLLAR,
247cdf0e10cSrcweir /* 37 % */ KParseTokens::ASC_OTHER,
248cdf0e10cSrcweir /* 38 & */ KParseTokens::ASC_OTHER,
249cdf0e10cSrcweir /* 39 ' */ KParseTokens::ASC_OTHER,
250cdf0e10cSrcweir /* 40 ( */ KParseTokens::ASC_OTHER,
251cdf0e10cSrcweir /* 41 ) */ KParseTokens::ASC_OTHER,
252cdf0e10cSrcweir /* 42 * */ KParseTokens::ASC_OTHER,
253cdf0e10cSrcweir /* 43 + */ KParseTokens::ASC_OTHER,
254cdf0e10cSrcweir /* 44 , */ KParseTokens::ASC_OTHER,
255cdf0e10cSrcweir /* 45 - */ KParseTokens::ASC_OTHER,
256cdf0e10cSrcweir /* 46 . */ KParseTokens::ASC_DOT,
257cdf0e10cSrcweir /* 47 / */ KParseTokens::ASC_OTHER,
258cdf0e10cSrcweir //for ( i = 48; i < 58; i++ )
259cdf0e10cSrcweir /* 48 0 */ KParseTokens::ASC_DIGIT,
260cdf0e10cSrcweir /* 49 1 */ KParseTokens::ASC_DIGIT,
261cdf0e10cSrcweir /* 50 2 */ KParseTokens::ASC_DIGIT,
262cdf0e10cSrcweir /* 51 3 */ KParseTokens::ASC_DIGIT,
263cdf0e10cSrcweir /* 52 4 */ KParseTokens::ASC_DIGIT,
264cdf0e10cSrcweir /* 53 5 */ KParseTokens::ASC_DIGIT,
265cdf0e10cSrcweir /* 54 6 */ KParseTokens::ASC_DIGIT,
266cdf0e10cSrcweir /* 55 7 */ KParseTokens::ASC_DIGIT,
267cdf0e10cSrcweir /* 56 8 */ KParseTokens::ASC_DIGIT,
268cdf0e10cSrcweir /* 57 9 */ KParseTokens::ASC_DIGIT,
269cdf0e10cSrcweir /* 58 : */ KParseTokens::ASC_COLON,
270cdf0e10cSrcweir /* 59 ; */ KParseTokens::ASC_OTHER,
271cdf0e10cSrcweir /* 60 < */ KParseTokens::ASC_OTHER,
272cdf0e10cSrcweir /* 61 = */ KParseTokens::ASC_OTHER,
273cdf0e10cSrcweir /* 62 > */ KParseTokens::ASC_OTHER,
274cdf0e10cSrcweir /* 63 ? */ KParseTokens::ASC_OTHER,
275cdf0e10cSrcweir /* 64 @ */ KParseTokens::ASC_OTHER,
276cdf0e10cSrcweir //for ( i = 65; i < 91; i++ )
277cdf0e10cSrcweir /* 65 A */ KParseTokens::ASC_UPALPHA,
278cdf0e10cSrcweir /* 66 B */ KParseTokens::ASC_UPALPHA,
279cdf0e10cSrcweir /* 67 C */ KParseTokens::ASC_UPALPHA,
280cdf0e10cSrcweir /* 68 D */ KParseTokens::ASC_UPALPHA,
281cdf0e10cSrcweir /* 69 E */ KParseTokens::ASC_UPALPHA,
282cdf0e10cSrcweir /* 70 F */ KParseTokens::ASC_UPALPHA,
283cdf0e10cSrcweir /* 71 G */ KParseTokens::ASC_UPALPHA,
284cdf0e10cSrcweir /* 72 H */ KParseTokens::ASC_UPALPHA,
285cdf0e10cSrcweir /* 73 I */ KParseTokens::ASC_UPALPHA,
286cdf0e10cSrcweir /* 74 J */ KParseTokens::ASC_UPALPHA,
287cdf0e10cSrcweir /* 75 K */ KParseTokens::ASC_UPALPHA,
288cdf0e10cSrcweir /* 76 L */ KParseTokens::ASC_UPALPHA,
289cdf0e10cSrcweir /* 77 M */ KParseTokens::ASC_UPALPHA,
290cdf0e10cSrcweir /* 78 N */ KParseTokens::ASC_UPALPHA,
291cdf0e10cSrcweir /* 79 O */ KParseTokens::ASC_UPALPHA,
292cdf0e10cSrcweir /* 80 P */ KParseTokens::ASC_UPALPHA,
293cdf0e10cSrcweir /* 81 Q */ KParseTokens::ASC_UPALPHA,
294cdf0e10cSrcweir /* 82 R */ KParseTokens::ASC_UPALPHA,
295cdf0e10cSrcweir /* 83 S */ KParseTokens::ASC_UPALPHA,
296cdf0e10cSrcweir /* 84 T */ KParseTokens::ASC_UPALPHA,
297cdf0e10cSrcweir /* 85 U */ KParseTokens::ASC_UPALPHA,
298cdf0e10cSrcweir /* 86 V */ KParseTokens::ASC_UPALPHA,
299cdf0e10cSrcweir /* 87 W */ KParseTokens::ASC_UPALPHA,
300cdf0e10cSrcweir /* 88 X */ KParseTokens::ASC_UPALPHA,
301cdf0e10cSrcweir /* 89 Y */ KParseTokens::ASC_UPALPHA,
302cdf0e10cSrcweir /* 90 Z */ KParseTokens::ASC_UPALPHA,
303cdf0e10cSrcweir /* 91 [ */ KParseTokens::ASC_OTHER,
304cdf0e10cSrcweir /* 92 \ */ KParseTokens::ASC_OTHER,
305cdf0e10cSrcweir /* 93 ] */ KParseTokens::ASC_OTHER,
306cdf0e10cSrcweir /* 94 ^ */ KParseTokens::ASC_OTHER,
307cdf0e10cSrcweir /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
308cdf0e10cSrcweir /* 96 ` */ KParseTokens::ASC_OTHER,
309cdf0e10cSrcweir //for ( i = 97; i < 123; i++ )
310cdf0e10cSrcweir /* 97 a */ KParseTokens::ASC_LOALPHA,
311cdf0e10cSrcweir /* 98 b */ KParseTokens::ASC_LOALPHA,
312cdf0e10cSrcweir /* 99 c */ KParseTokens::ASC_LOALPHA,
313cdf0e10cSrcweir /* 100 d */ KParseTokens::ASC_LOALPHA,
314cdf0e10cSrcweir /* 101 e */ KParseTokens::ASC_LOALPHA,
315cdf0e10cSrcweir /* 102 f */ KParseTokens::ASC_LOALPHA,
316cdf0e10cSrcweir /* 103 g */ KParseTokens::ASC_LOALPHA,
317cdf0e10cSrcweir /* 104 h */ KParseTokens::ASC_LOALPHA,
318cdf0e10cSrcweir /* 105 i */ KParseTokens::ASC_LOALPHA,
319cdf0e10cSrcweir /* 106 j */ KParseTokens::ASC_LOALPHA,
320cdf0e10cSrcweir /* 107 k */ KParseTokens::ASC_LOALPHA,
321cdf0e10cSrcweir /* 108 l */ KParseTokens::ASC_LOALPHA,
322cdf0e10cSrcweir /* 109 m */ KParseTokens::ASC_LOALPHA,
323cdf0e10cSrcweir /* 110 n */ KParseTokens::ASC_LOALPHA,
324cdf0e10cSrcweir /* 111 o */ KParseTokens::ASC_LOALPHA,
325cdf0e10cSrcweir /* 112 p */ KParseTokens::ASC_LOALPHA,
326cdf0e10cSrcweir /* 113 q */ KParseTokens::ASC_LOALPHA,
327cdf0e10cSrcweir /* 114 r */ KParseTokens::ASC_LOALPHA,
328cdf0e10cSrcweir /* 115 s */ KParseTokens::ASC_LOALPHA,
329cdf0e10cSrcweir /* 116 t */ KParseTokens::ASC_LOALPHA,
330cdf0e10cSrcweir /* 117 u */ KParseTokens::ASC_LOALPHA,
331cdf0e10cSrcweir /* 118 v */ KParseTokens::ASC_LOALPHA,
332cdf0e10cSrcweir /* 119 w */ KParseTokens::ASC_LOALPHA,
333cdf0e10cSrcweir /* 120 x */ KParseTokens::ASC_LOALPHA,
334cdf0e10cSrcweir /* 121 y */ KParseTokens::ASC_LOALPHA,
335cdf0e10cSrcweir /* 122 z */ KParseTokens::ASC_LOALPHA,
336cdf0e10cSrcweir /* 123 { */ KParseTokens::ASC_OTHER,
337cdf0e10cSrcweir /* 124 | */ KParseTokens::ASC_OTHER,
338cdf0e10cSrcweir /* 125 } */ KParseTokens::ASC_OTHER,
339cdf0e10cSrcweir /* 126 ~ */ KParseTokens::ASC_OTHER,
340cdf0e10cSrcweir /* 127 */ KParseTokens::ASC_OTHER
341cdf0e10cSrcweir };
342cdf0e10cSrcweir
343cdf0e10cSrcweir
344cdf0e10cSrcweir // static
StrChr(const sal_Unicode * pStr,sal_Unicode c)345cdf0e10cSrcweir const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
346cdf0e10cSrcweir {
347cdf0e10cSrcweir if ( !pStr )
348cdf0e10cSrcweir return NULL;
349cdf0e10cSrcweir while ( *pStr )
350cdf0e10cSrcweir {
351cdf0e10cSrcweir if ( *pStr == c )
352cdf0e10cSrcweir return pStr;
353cdf0e10cSrcweir pStr++;
354cdf0e10cSrcweir }
355cdf0e10cSrcweir return NULL;
356cdf0e10cSrcweir }
357cdf0e10cSrcweir
358cdf0e10cSrcweir
getParseTokensType(const sal_Unicode * aStr,sal_Int32 nPos)359cdf0e10cSrcweir sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
360cdf0e10cSrcweir {
361cdf0e10cSrcweir sal_Unicode c = aStr[nPos];
362cdf0e10cSrcweir if ( c < nDefCnt )
363cdf0e10cSrcweir return pParseTokensType[ sal_uInt8(c) ];
364cdf0e10cSrcweir else
365cdf0e10cSrcweir {
366cdf0e10cSrcweir
367cdf0e10cSrcweir //! all KParseTokens::UNI_... must be matched
368cdf0e10cSrcweir switch ( u_charType( (sal_uInt32) c ) )
369cdf0e10cSrcweir {
370cdf0e10cSrcweir case U_UPPERCASE_LETTER :
371cdf0e10cSrcweir return KParseTokens::UNI_UPALPHA;
372cdf0e10cSrcweir case U_LOWERCASE_LETTER :
373cdf0e10cSrcweir return KParseTokens::UNI_LOALPHA;
374cdf0e10cSrcweir case U_TITLECASE_LETTER :
375cdf0e10cSrcweir return KParseTokens::UNI_TITLE_ALPHA;
376cdf0e10cSrcweir case U_MODIFIER_LETTER :
377cdf0e10cSrcweir return KParseTokens::UNI_MODIFIER_LETTER;
378cdf0e10cSrcweir case U_OTHER_LETTER :
379cdf0e10cSrcweir // Non_Spacing_Mark could not be as leading character
380cdf0e10cSrcweir if (nPos == 0) break;
381cdf0e10cSrcweir // fall through, treat it as Other_Letter.
382cdf0e10cSrcweir case U_NON_SPACING_MARK :
383cdf0e10cSrcweir return KParseTokens::UNI_OTHER_LETTER;
384cdf0e10cSrcweir case U_DECIMAL_DIGIT_NUMBER :
385cdf0e10cSrcweir return KParseTokens::UNI_DIGIT;
386cdf0e10cSrcweir case U_LETTER_NUMBER :
387cdf0e10cSrcweir return KParseTokens::UNI_LETTER_NUMBER;
388cdf0e10cSrcweir case U_OTHER_NUMBER :
389cdf0e10cSrcweir return KParseTokens::UNI_OTHER_NUMBER;
390cdf0e10cSrcweir }
391cdf0e10cSrcweir
392cdf0e10cSrcweir return KParseTokens::UNI_OTHER;
393cdf0e10cSrcweir }
394cdf0e10cSrcweir }
395cdf0e10cSrcweir
setupInternational(const Locale & rLocale)396cdf0e10cSrcweir sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
397cdf0e10cSrcweir {
398cdf0e10cSrcweir sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
399cdf0e10cSrcweir || aParserLocale.Country != rLocale.Country
400cdf0e10cSrcweir || aParserLocale.Variant != rLocale.Variant);
401cdf0e10cSrcweir if ( bChanged )
402cdf0e10cSrcweir {
403cdf0e10cSrcweir aParserLocale.Language = rLocale.Language;
404cdf0e10cSrcweir aParserLocale.Country = rLocale.Country;
405cdf0e10cSrcweir aParserLocale.Variant = rLocale.Variant;
406cdf0e10cSrcweir }
407cdf0e10cSrcweir if ( !xLocaleData.is() && xMSF.is() )
408cdf0e10cSrcweir {
409cdf0e10cSrcweir Reference <
410cdf0e10cSrcweir XInterface > xI =
411cdf0e10cSrcweir xMSF->createInstance( OUString(
412cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
413cdf0e10cSrcweir if ( xI.is() )
414cdf0e10cSrcweir {
415cdf0e10cSrcweir Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
416cdf0e10cSrcweir x >>= xLocaleData;
417cdf0e10cSrcweir }
418cdf0e10cSrcweir }
419cdf0e10cSrcweir return bChanged;
420cdf0e10cSrcweir }
421cdf0e10cSrcweir
422cdf0e10cSrcweir
setupParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)423cdf0e10cSrcweir void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
424cdf0e10cSrcweir const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
425cdf0e10cSrcweir const OUString& userDefinedCharactersCont )
426cdf0e10cSrcweir {
427cdf0e10cSrcweir bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
428cdf0e10cSrcweir rLocale.Country == aParserLocale.Country &&
429cdf0e10cSrcweir rLocale.Variant == aParserLocale.Variant);
430cdf0e10cSrcweir if ( !pTable || !bIntlEqual ||
431cdf0e10cSrcweir startCharTokenType != nStartTypes ||
432cdf0e10cSrcweir contCharTokenType != nContTypes ||
433cdf0e10cSrcweir userDefinedCharactersStart != aStartChars ||
434cdf0e10cSrcweir userDefinedCharactersCont != aContChars )
435cdf0e10cSrcweir initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
436cdf0e10cSrcweir contCharTokenType, userDefinedCharactersCont );
437cdf0e10cSrcweir }
438cdf0e10cSrcweir
439cdf0e10cSrcweir
initParserTable(const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)440cdf0e10cSrcweir void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
441cdf0e10cSrcweir const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
442cdf0e10cSrcweir const OUString& userDefinedCharactersCont )
443cdf0e10cSrcweir {
444cdf0e10cSrcweir // (Re)Init
445cdf0e10cSrcweir setupInternational( rLocale );
446cdf0e10cSrcweir // Memory of pTable is reused.
447cdf0e10cSrcweir if ( !pTable )
448cdf0e10cSrcweir pTable = new UPT_FLAG_TYPE[nDefCnt];
449cdf0e10cSrcweir memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
450cdf0e10cSrcweir // Start and cont tables only need reallocation if different length.
451cdf0e10cSrcweir if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
452cdf0e10cSrcweir {
453cdf0e10cSrcweir delete [] pStart;
454cdf0e10cSrcweir pStart = NULL;
455cdf0e10cSrcweir }
456cdf0e10cSrcweir if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
457cdf0e10cSrcweir {
458cdf0e10cSrcweir delete [] pCont;
459cdf0e10cSrcweir pCont = NULL;
460cdf0e10cSrcweir }
461cdf0e10cSrcweir nStartTypes = startCharTokenType;
462cdf0e10cSrcweir nContTypes = contCharTokenType;
463cdf0e10cSrcweir aStartChars = userDefinedCharactersStart;
464cdf0e10cSrcweir aContChars = userDefinedCharactersCont;
465cdf0e10cSrcweir
466cdf0e10cSrcweir // specials
467cdf0e10cSrcweir if( xLocaleData.is() )
468cdf0e10cSrcweir {
469cdf0e10cSrcweir LocaleDataItem aItem =
470cdf0e10cSrcweir xLocaleData->getLocaleItem( aParserLocale );
471cdf0e10cSrcweir //!TODO: theoretically separators may be a string, adjustment would have to be
472cdf0e10cSrcweir //! done here and in parsing and in ::rtl::math::stringToDouble()
473cdf0e10cSrcweir cGroupSep = aItem.thousandSeparator.getStr()[0];
474cdf0e10cSrcweir cDecimalSep = aItem.decimalSeparator.getStr()[0];
475cdf0e10cSrcweir }
476cdf0e10cSrcweir
477cdf0e10cSrcweir if ( cGroupSep < nDefCnt )
478cdf0e10cSrcweir pTable[cGroupSep] |= TOKEN_VALUE;
479cdf0e10cSrcweir if ( cDecimalSep < nDefCnt )
480cdf0e10cSrcweir pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
481cdf0e10cSrcweir
482cdf0e10cSrcweir // Modify characters according to KParseTokens definitions.
483cdf0e10cSrcweir {
484cdf0e10cSrcweir using namespace KParseTokens;
485cdf0e10cSrcweir sal_uInt8 i;
486cdf0e10cSrcweir
487cdf0e10cSrcweir if ( !(nStartTypes & ASC_UPALPHA) )
488cdf0e10cSrcweir for ( i = 65; i < 91; i++ )
489cdf0e10cSrcweir pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
490cdf0e10cSrcweir if ( !(nContTypes & ASC_UPALPHA) )
491cdf0e10cSrcweir for ( i = 65; i < 91; i++ )
492cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
493cdf0e10cSrcweir
494cdf0e10cSrcweir if ( !(nStartTypes & ASC_LOALPHA) )
495cdf0e10cSrcweir for ( i = 97; i < 123; i++ )
496cdf0e10cSrcweir pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
497cdf0e10cSrcweir if ( !(nContTypes & ASC_LOALPHA) )
498cdf0e10cSrcweir for ( i = 97; i < 123; i++ )
499cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
500cdf0e10cSrcweir
501cdf0e10cSrcweir if ( nStartTypes & ASC_DIGIT )
502cdf0e10cSrcweir for ( i = 48; i < 58; i++ )
503cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
504cdf0e10cSrcweir if ( !(nContTypes & ASC_DIGIT) )
505cdf0e10cSrcweir for ( i = 48; i < 58; i++ )
506cdf0e10cSrcweir pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
507cdf0e10cSrcweir
508cdf0e10cSrcweir if ( !(nStartTypes & ASC_UNDERSCORE) )
509cdf0e10cSrcweir pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character
510cdf0e10cSrcweir if ( !(nContTypes & ASC_UNDERSCORE) )
511cdf0e10cSrcweir pTable[95] &= ~TOKEN_WORD; // not allowed as cont character
512cdf0e10cSrcweir
513cdf0e10cSrcweir if ( nStartTypes & ASC_DOLLAR )
514cdf0e10cSrcweir pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character
515cdf0e10cSrcweir if ( nContTypes & ASC_DOLLAR )
516cdf0e10cSrcweir pTable[36] |= TOKEN_WORD; // allowed as cont character
517cdf0e10cSrcweir
518cdf0e10cSrcweir if ( nStartTypes & ASC_DOT )
519cdf0e10cSrcweir pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character
520cdf0e10cSrcweir if ( nContTypes & ASC_DOT )
521cdf0e10cSrcweir pTable[46] |= TOKEN_WORD; // allowed as cont character
522cdf0e10cSrcweir
523cdf0e10cSrcweir if ( nStartTypes & ASC_COLON )
524cdf0e10cSrcweir pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character
525cdf0e10cSrcweir if ( nContTypes & ASC_COLON )
526cdf0e10cSrcweir pTable[58] |= TOKEN_WORD; // allowed as cont character
527cdf0e10cSrcweir
528cdf0e10cSrcweir if ( nStartTypes & ASC_CONTROL )
529cdf0e10cSrcweir for ( i = 1; i < 32; i++ )
530cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
531cdf0e10cSrcweir if ( nContTypes & ASC_CONTROL )
532cdf0e10cSrcweir for ( i = 1; i < 32; i++ )
533cdf0e10cSrcweir pTable[i] |= TOKEN_WORD; // allowed as cont character
534cdf0e10cSrcweir
535cdf0e10cSrcweir if ( nStartTypes & ASC_ANY_BUT_CONTROL )
536cdf0e10cSrcweir for ( i = 32; i < nDefCnt; i++ )
537cdf0e10cSrcweir pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
538cdf0e10cSrcweir if ( nContTypes & ASC_ANY_BUT_CONTROL )
539cdf0e10cSrcweir for ( i = 32; i < nDefCnt; i++ )
540cdf0e10cSrcweir pTable[i] |= TOKEN_WORD; // allowed as cont character
541cdf0e10cSrcweir
542cdf0e10cSrcweir }
543cdf0e10cSrcweir
544cdf0e10cSrcweir // Merge in (positively override with) user defined characters.
545cdf0e10cSrcweir // StartChars
546cdf0e10cSrcweir sal_Int32 nLen = aStartChars.getLength();
547cdf0e10cSrcweir if ( nLen )
548cdf0e10cSrcweir {
549cdf0e10cSrcweir if ( !pStart )
550cdf0e10cSrcweir pStart = new UPT_FLAG_TYPE[ nLen ];
551cdf0e10cSrcweir const sal_Unicode* p = aStartChars.getStr();
552cdf0e10cSrcweir for ( sal_Int32 j=0; j<nLen; j++, p++ )
553cdf0e10cSrcweir {
554cdf0e10cSrcweir pStart[j] = TOKEN_CHAR_WORD;
555cdf0e10cSrcweir if ( *p < nDefCnt )
556cdf0e10cSrcweir pTable[*p] |= TOKEN_CHAR_WORD;
557cdf0e10cSrcweir }
558cdf0e10cSrcweir }
559cdf0e10cSrcweir // ContChars
560cdf0e10cSrcweir nLen = aContChars.getLength();
561cdf0e10cSrcweir if ( nLen )
562cdf0e10cSrcweir {
563cdf0e10cSrcweir if ( !pCont )
564cdf0e10cSrcweir pCont = new UPT_FLAG_TYPE[ nLen ];
565cdf0e10cSrcweir const sal_Unicode* p = aContChars.getStr();
566cdf0e10cSrcweir for ( sal_Int32 j=0; j<nLen; j++ )
567cdf0e10cSrcweir {
568cdf0e10cSrcweir pCont[j] = TOKEN_WORD;
569cdf0e10cSrcweir if ( *p < nDefCnt )
570cdf0e10cSrcweir pTable[*p] |= TOKEN_WORD;
571cdf0e10cSrcweir }
572cdf0e10cSrcweir }
573cdf0e10cSrcweir }
574cdf0e10cSrcweir
575cdf0e10cSrcweir
destroyParserTable()576cdf0e10cSrcweir void cclass_Unicode::destroyParserTable()
577cdf0e10cSrcweir {
578cdf0e10cSrcweir if ( pCont )
579cdf0e10cSrcweir delete [] pCont;
580cdf0e10cSrcweir if ( pStart )
581cdf0e10cSrcweir delete [] pStart;
582cdf0e10cSrcweir if ( pTable )
583cdf0e10cSrcweir delete [] pTable;
584cdf0e10cSrcweir }
585cdf0e10cSrcweir
586cdf0e10cSrcweir
getFlags(const sal_Unicode * aStr,sal_Int32 nPos)587cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
588cdf0e10cSrcweir {
589cdf0e10cSrcweir UPT_FLAG_TYPE nMask;
590cdf0e10cSrcweir sal_Unicode c = aStr[nPos];
591cdf0e10cSrcweir if ( c < nDefCnt )
592cdf0e10cSrcweir nMask = pTable[ sal_uInt8(c) ];
593cdf0e10cSrcweir else
594cdf0e10cSrcweir nMask = getFlagsExtended( aStr, nPos );
595cdf0e10cSrcweir switch ( eState )
596cdf0e10cSrcweir {
597cdf0e10cSrcweir case ssGetChar :
598cdf0e10cSrcweir case ssRewindFromValue :
599cdf0e10cSrcweir case ssIgnoreLeadingInRewind :
600cdf0e10cSrcweir case ssGetWordFirstChar :
601cdf0e10cSrcweir if ( !(nMask & TOKEN_CHAR_WORD) )
602cdf0e10cSrcweir {
603cdf0e10cSrcweir nMask |= getStartCharsFlags( c );
604cdf0e10cSrcweir if ( nMask & TOKEN_CHAR_WORD )
605cdf0e10cSrcweir nMask &= ~TOKEN_EXCLUDED;
606cdf0e10cSrcweir }
607cdf0e10cSrcweir break;
608cdf0e10cSrcweir case ssGetValue :
609cdf0e10cSrcweir case ssGetWord :
610cdf0e10cSrcweir if ( !(nMask & TOKEN_WORD) )
611cdf0e10cSrcweir {
612cdf0e10cSrcweir nMask |= getContCharsFlags( c );
613cdf0e10cSrcweir if ( nMask & TOKEN_WORD )
614cdf0e10cSrcweir nMask &= ~TOKEN_EXCLUDED;
615cdf0e10cSrcweir }
616cdf0e10cSrcweir break;
617cdf0e10cSrcweir default:
618cdf0e10cSrcweir ; // other cases aren't needed, no compiler warning
619cdf0e10cSrcweir }
620cdf0e10cSrcweir return nMask;
621cdf0e10cSrcweir }
622cdf0e10cSrcweir
623cdf0e10cSrcweir
getFlagsExtended(const sal_Unicode * aStr,sal_Int32 nPos)624cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
625cdf0e10cSrcweir {
626cdf0e10cSrcweir sal_Unicode c = aStr[nPos];
627cdf0e10cSrcweir if ( c == cGroupSep )
628cdf0e10cSrcweir return TOKEN_VALUE;
629cdf0e10cSrcweir else if ( c == cDecimalSep )
630cdf0e10cSrcweir return TOKEN_CHAR_VALUE | TOKEN_VALUE;
631cdf0e10cSrcweir using namespace i18n;
632cdf0e10cSrcweir bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
633cdf0e10cSrcweir eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
634cdf0e10cSrcweir sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
635cdf0e10cSrcweir
636cdf0e10cSrcweir //! all KParseTokens::UNI_... must be matched
637cdf0e10cSrcweir switch ( u_charType( (sal_uInt32) c ) )
638cdf0e10cSrcweir {
639cdf0e10cSrcweir case U_UPPERCASE_LETTER :
640cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_UPALPHA) ?
641cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
642cdf0e10cSrcweir TOKEN_ILLEGAL;
643cdf0e10cSrcweir case U_LOWERCASE_LETTER :
644cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_LOALPHA) ?
645cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
646cdf0e10cSrcweir TOKEN_ILLEGAL;
647cdf0e10cSrcweir case U_TITLECASE_LETTER :
648cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
649cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
650cdf0e10cSrcweir TOKEN_ILLEGAL;
651cdf0e10cSrcweir case U_MODIFIER_LETTER :
652cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
653cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
654cdf0e10cSrcweir TOKEN_ILLEGAL;
655cdf0e10cSrcweir case U_NON_SPACING_MARK :
656cdf0e10cSrcweir case U_COMBINING_SPACING_MARK :
657cdf0e10cSrcweir // Non_Spacing_Mark can't be a leading character,
658cdf0e10cSrcweir // nor can a spacing combining mark.
659cdf0e10cSrcweir if (bStart)
660cdf0e10cSrcweir return TOKEN_ILLEGAL;
661cdf0e10cSrcweir // fall through, treat it as Other_Letter.
662cdf0e10cSrcweir case U_OTHER_LETTER :
663cdf0e10cSrcweir return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
664cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
665cdf0e10cSrcweir TOKEN_ILLEGAL;
666cdf0e10cSrcweir case U_DECIMAL_DIGIT_NUMBER :
667cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_DIGIT) ?
668cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
669cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
670cdf0e10cSrcweir case U_LETTER_NUMBER :
671cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
672cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
673cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
674cdf0e10cSrcweir case U_OTHER_NUMBER :
675cdf0e10cSrcweir return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
676cdf0e10cSrcweir (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
677cdf0e10cSrcweir TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
678cdf0e10cSrcweir case U_SPACE_SEPARATOR :
679cdf0e10cSrcweir return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
680cdf0e10cSrcweir TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
681cdf0e10cSrcweir }
682cdf0e10cSrcweir
683cdf0e10cSrcweir return TOKEN_ILLEGAL;
684cdf0e10cSrcweir }
685cdf0e10cSrcweir
686cdf0e10cSrcweir
getStartCharsFlags(sal_Unicode c)687cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
688cdf0e10cSrcweir {
689cdf0e10cSrcweir if ( pStart )
690cdf0e10cSrcweir {
691cdf0e10cSrcweir const sal_Unicode* pStr = aStartChars.getStr();
692cdf0e10cSrcweir const sal_Unicode* p = StrChr( pStr, c );
693cdf0e10cSrcweir if ( p )
694cdf0e10cSrcweir return pStart[ p - pStr ];
695cdf0e10cSrcweir }
696cdf0e10cSrcweir return TOKEN_ILLEGAL;
697cdf0e10cSrcweir }
698cdf0e10cSrcweir
699cdf0e10cSrcweir
getContCharsFlags(sal_Unicode c)700cdf0e10cSrcweir UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
701cdf0e10cSrcweir {
702cdf0e10cSrcweir if ( pCont )
703cdf0e10cSrcweir {
704cdf0e10cSrcweir const sal_Unicode* pStr = aContChars.getStr();
705cdf0e10cSrcweir const sal_Unicode* p = StrChr( pStr, c );
706cdf0e10cSrcweir if ( p )
707cdf0e10cSrcweir return pCont[ p - pStr ];
708cdf0e10cSrcweir }
709cdf0e10cSrcweir return TOKEN_ILLEGAL;
710cdf0e10cSrcweir }
711cdf0e10cSrcweir
712cdf0e10cSrcweir
parseText(ParseResult & r,const OUString & rText,sal_Int32 nPos,sal_Int32 nTokenType)713cdf0e10cSrcweir void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
714cdf0e10cSrcweir {
715cdf0e10cSrcweir using namespace i18n;
716cdf0e10cSrcweir const sal_Unicode* const pTextStart = rText.getStr() + nPos;
717cdf0e10cSrcweir eState = ssGetChar;
718cdf0e10cSrcweir
719cdf0e10cSrcweir //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
720cdf0e10cSrcweir const sal_Unicode* pSym = pTextStart;
721cdf0e10cSrcweir const sal_Unicode* pSrc = pSym;
722cdf0e10cSrcweir OUString aSymbol;
723cdf0e10cSrcweir sal_Unicode c = *pSrc;
724cdf0e10cSrcweir sal_Unicode cLast = 0;
725cdf0e10cSrcweir int nDecSeps = 0;
726cdf0e10cSrcweir bool bQuote = false;
727cdf0e10cSrcweir bool bMightBeWord = true;
728cdf0e10cSrcweir bool bMightBeWordLast = true;
729cdf0e10cSrcweir //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
730cdf0e10cSrcweir
731cdf0e10cSrcweir while ( (c != 0) && (eState != ssStop) )
732cdf0e10cSrcweir {
733cdf0e10cSrcweir UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
734cdf0e10cSrcweir if ( nMask & TOKEN_EXCLUDED )
735cdf0e10cSrcweir eState = ssBounce;
736cdf0e10cSrcweir if ( bMightBeWord )
737cdf0e10cSrcweir { // only relevant for ssGetValue fall back
738cdf0e10cSrcweir if ( eState == ssGetChar || eState == ssRewindFromValue ||
739cdf0e10cSrcweir eState == ssIgnoreLeadingInRewind )
740cdf0e10cSrcweir bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
741cdf0e10cSrcweir else
742cdf0e10cSrcweir bMightBeWord = ((nMask & TOKEN_WORD) != 0);
743cdf0e10cSrcweir }
744cdf0e10cSrcweir sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
745cdf0e10cSrcweir pSrc++;
746cdf0e10cSrcweir switch (eState)
747cdf0e10cSrcweir {
748cdf0e10cSrcweir case ssGetChar :
749cdf0e10cSrcweir case ssRewindFromValue :
750cdf0e10cSrcweir case ssIgnoreLeadingInRewind :
751cdf0e10cSrcweir {
752cdf0e10cSrcweir if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
753cdf0e10cSrcweir && eState != ssIgnoreLeadingInRewind )
754cdf0e10cSrcweir { //! must be first, may fall back to ssGetWord via bMightBeWord
755cdf0e10cSrcweir eState = ssGetValue;
756cdf0e10cSrcweir if ( nMask & TOKEN_VALUE_DIGIT )
757cdf0e10cSrcweir {
758cdf0e10cSrcweir if ( 128 <= c )
759cdf0e10cSrcweir r.TokenType = KParseType::UNI_NUMBER;
760cdf0e10cSrcweir else
761cdf0e10cSrcweir r.TokenType = KParseType::ASC_NUMBER;
762cdf0e10cSrcweir }
763cdf0e10cSrcweir else if ( c == cDecimalSep )
764cdf0e10cSrcweir {
765cdf0e10cSrcweir if ( *pSrc )
766cdf0e10cSrcweir ++nDecSeps;
767cdf0e10cSrcweir else
768cdf0e10cSrcweir eState = ssRewindFromValue;
769cdf0e10cSrcweir // retry for ONE_SINGLE_CHAR or others
770cdf0e10cSrcweir }
771cdf0e10cSrcweir }
772cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_WORD )
773cdf0e10cSrcweir {
774cdf0e10cSrcweir eState = ssGetWord;
775cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME;
776cdf0e10cSrcweir }
777cdf0e10cSrcweir else if ( nMask & TOKEN_NAME_SEP )
778cdf0e10cSrcweir {
779cdf0e10cSrcweir eState = ssGetWordFirstChar;
780cdf0e10cSrcweir bQuote = true;
781cdf0e10cSrcweir pSym++;
782cdf0e10cSrcweir nParseTokensType = 0; // will be taken of first real character
783cdf0e10cSrcweir r.TokenType = KParseType::SINGLE_QUOTE_NAME;
784cdf0e10cSrcweir }
785cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_STRING )
786cdf0e10cSrcweir {
787cdf0e10cSrcweir eState = ssGetString;
788cdf0e10cSrcweir pSym++;
789cdf0e10cSrcweir nParseTokensType = 0; // will be taken of first real character
790cdf0e10cSrcweir r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
791cdf0e10cSrcweir }
792cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_DONTCARE )
793cdf0e10cSrcweir {
794cdf0e10cSrcweir if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
795cdf0e10cSrcweir {
796cdf0e10cSrcweir if (eState == ssRewindFromValue)
797cdf0e10cSrcweir eState = ssIgnoreLeadingInRewind;
798cdf0e10cSrcweir r.LeadingWhiteSpace++;
799cdf0e10cSrcweir pSym++;
800cdf0e10cSrcweir nParseTokensType = 0; // wait until real character
801cdf0e10cSrcweir bMightBeWord = true;
802cdf0e10cSrcweir }
803cdf0e10cSrcweir else
804cdf0e10cSrcweir eState = ssBounce;
805cdf0e10cSrcweir }
806cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR_BOOL )
807cdf0e10cSrcweir {
808cdf0e10cSrcweir eState = ssGetBool;
809cdf0e10cSrcweir r.TokenType = KParseType::BOOLEAN;
810cdf0e10cSrcweir }
811cdf0e10cSrcweir else if ( nMask & TOKEN_CHAR )
812cdf0e10cSrcweir { //! must be last
813cdf0e10cSrcweir eState = ssStop;
814cdf0e10cSrcweir r.TokenType = KParseType::ONE_SINGLE_CHAR;
815cdf0e10cSrcweir }
816cdf0e10cSrcweir else
817cdf0e10cSrcweir eState = ssBounce; // not known
818cdf0e10cSrcweir }
819cdf0e10cSrcweir break;
820cdf0e10cSrcweir case ssGetValue :
821cdf0e10cSrcweir {
822cdf0e10cSrcweir if ( nMask & TOKEN_VALUE_DIGIT )
823cdf0e10cSrcweir {
824cdf0e10cSrcweir if ( 128 <= c )
825cdf0e10cSrcweir r.TokenType = KParseType::UNI_NUMBER;
826cdf0e10cSrcweir else if ( r.TokenType != KParseType::UNI_NUMBER )
827cdf0e10cSrcweir r.TokenType = KParseType::ASC_NUMBER;
828cdf0e10cSrcweir }
829cdf0e10cSrcweir if ( nMask & TOKEN_VALUE )
830cdf0e10cSrcweir {
831cdf0e10cSrcweir if ( c == cDecimalSep && ++nDecSeps > 1 )
832cdf0e10cSrcweir {
833cdf0e10cSrcweir if ( pSrc - pTextStart == 2 )
834cdf0e10cSrcweir eState = ssRewindFromValue;
835cdf0e10cSrcweir // consecutive separators
836cdf0e10cSrcweir else
837cdf0e10cSrcweir eState = ssStopBack;
838cdf0e10cSrcweir }
839cdf0e10cSrcweir // else keep it going
840cdf0e10cSrcweir }
841cdf0e10cSrcweir else if ( c == 'E' || c == 'e' )
842cdf0e10cSrcweir {
843cdf0e10cSrcweir UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
844cdf0e10cSrcweir if ( nNext & TOKEN_VALUE_EXP )
845cdf0e10cSrcweir ; // keep it going
846cdf0e10cSrcweir else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
847cdf0e10cSrcweir { // might be a numerical name (1.2efg)
848cdf0e10cSrcweir eState = ssGetWord;
849cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME;
850cdf0e10cSrcweir }
851cdf0e10cSrcweir else
852cdf0e10cSrcweir eState = ssStopBack;
853cdf0e10cSrcweir }
854cdf0e10cSrcweir else if ( nMask & TOKEN_VALUE_SIGN )
855cdf0e10cSrcweir {
856cdf0e10cSrcweir if ( (cLast == 'E') || (cLast == 'e') )
857cdf0e10cSrcweir {
858cdf0e10cSrcweir UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
859cdf0e10cSrcweir if ( nNext & TOKEN_VALUE_EXP_VALUE )
860cdf0e10cSrcweir ; // keep it going
861cdf0e10cSrcweir else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
862cdf0e10cSrcweir { // might be a numerical name (1.2e+fg)
863cdf0e10cSrcweir eState = ssGetWord;
864cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME;
865cdf0e10cSrcweir }
866cdf0e10cSrcweir else
867cdf0e10cSrcweir eState = ssStopBack;
868cdf0e10cSrcweir }
869cdf0e10cSrcweir else if ( bMightBeWord )
870cdf0e10cSrcweir { // might be a numerical name (1.2+fg)
871cdf0e10cSrcweir eState = ssGetWord;
872cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME;
873cdf0e10cSrcweir }
874cdf0e10cSrcweir else
875cdf0e10cSrcweir eState = ssStopBack;
876cdf0e10cSrcweir }
877cdf0e10cSrcweir else if ( bMightBeWord && (nMask & TOKEN_WORD) )
878cdf0e10cSrcweir { // might be a numerical name (1995.A1)
879cdf0e10cSrcweir eState = ssGetWord;
880cdf0e10cSrcweir r.TokenType = KParseType::IDENTNAME;
881cdf0e10cSrcweir }
882cdf0e10cSrcweir else
883cdf0e10cSrcweir eState = ssStopBack;
884cdf0e10cSrcweir }
885cdf0e10cSrcweir break;
886cdf0e10cSrcweir case ssGetWordFirstChar :
887cdf0e10cSrcweir eState = ssGetWord;
888cdf0e10cSrcweir // fall thru
889cdf0e10cSrcweir case ssGetWord :
890cdf0e10cSrcweir {
891cdf0e10cSrcweir if ( nMask & TOKEN_WORD )
892cdf0e10cSrcweir ; // keep it going
893cdf0e10cSrcweir else if ( nMask & TOKEN_NAME_SEP )
894cdf0e10cSrcweir {
895cdf0e10cSrcweir if ( bQuote )
896cdf0e10cSrcweir {
897cdf0e10cSrcweir if ( cLast == '\\' )
898cdf0e10cSrcweir { // escaped
899cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 2 );
900cdf0e10cSrcweir aSymbol += OUString( &c, 1);
901cdf0e10cSrcweir }
902cdf0e10cSrcweir else
903cdf0e10cSrcweir {
904cdf0e10cSrcweir eState = ssStop;
905cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 1 );
906cdf0e10cSrcweir }
907cdf0e10cSrcweir pSym = pSrc;
908cdf0e10cSrcweir }
909cdf0e10cSrcweir else
910cdf0e10cSrcweir eState = ssStopBack;
911cdf0e10cSrcweir }
912cdf0e10cSrcweir else if ( bQuote )
913cdf0e10cSrcweir ; // keep it going
914cdf0e10cSrcweir else
915cdf0e10cSrcweir eState = ssStopBack;
916cdf0e10cSrcweir }
917cdf0e10cSrcweir break;
918cdf0e10cSrcweir case ssGetString :
919cdf0e10cSrcweir {
920cdf0e10cSrcweir if ( nMask & TOKEN_STRING_SEP )
921cdf0e10cSrcweir {
922cdf0e10cSrcweir if ( cLast == '\\' )
923cdf0e10cSrcweir { // escaped
924cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 2 );
925cdf0e10cSrcweir aSymbol += OUString( &c, 1);
926cdf0e10cSrcweir }
927cdf0e10cSrcweir else if ( c == *pSrc &&
928cdf0e10cSrcweir !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
929cdf0e10cSrcweir { // "" => literal " escaped
930cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym );
931cdf0e10cSrcweir pSrc++;
932cdf0e10cSrcweir }
933cdf0e10cSrcweir else
934cdf0e10cSrcweir {
935cdf0e10cSrcweir eState = ssStop;
936cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym - 1 );
937cdf0e10cSrcweir }
938cdf0e10cSrcweir pSym = pSrc;
939cdf0e10cSrcweir }
940cdf0e10cSrcweir }
941cdf0e10cSrcweir break;
942cdf0e10cSrcweir case ssGetBool :
943cdf0e10cSrcweir {
944cdf0e10cSrcweir if ( (nMask & TOKEN_BOOL) )
945cdf0e10cSrcweir eState = ssStop; // maximum 2: <, >, <>, <=, >=
946cdf0e10cSrcweir else
947cdf0e10cSrcweir eState = ssStopBack;
948cdf0e10cSrcweir }
949cdf0e10cSrcweir break;
950cdf0e10cSrcweir case ssStopBack :
951cdf0e10cSrcweir case ssBounce :
952cdf0e10cSrcweir case ssStop :
953cdf0e10cSrcweir ; // nothing, no compiler warning
954cdf0e10cSrcweir break;
955cdf0e10cSrcweir }
956cdf0e10cSrcweir if ( eState == ssRewindFromValue )
957cdf0e10cSrcweir {
958cdf0e10cSrcweir r = ParseResult();
959cdf0e10cSrcweir pSym = pTextStart;
960cdf0e10cSrcweir pSrc = pSym;
961cdf0e10cSrcweir aSymbol = OUString();
962cdf0e10cSrcweir c = *pSrc;
963cdf0e10cSrcweir cLast = 0;
964cdf0e10cSrcweir nDecSeps = 0;
965cdf0e10cSrcweir bQuote = false;
966cdf0e10cSrcweir bMightBeWord = true;
967cdf0e10cSrcweir bMightBeWordLast = true;
968cdf0e10cSrcweir }
969cdf0e10cSrcweir else
970cdf0e10cSrcweir {
971cdf0e10cSrcweir if ( !(r.TokenType & nTokenType) )
972cdf0e10cSrcweir {
973cdf0e10cSrcweir if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
974cdf0e10cSrcweir && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
975cdf0e10cSrcweir ; // keep a number that might be a word
976cdf0e10cSrcweir else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
977cdf0e10cSrcweir ; // keep ignored white space
978cdf0e10cSrcweir else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
979cdf0e10cSrcweir ; // keep uncertain value
980cdf0e10cSrcweir else
981cdf0e10cSrcweir eState = ssBounce;
982cdf0e10cSrcweir }
983cdf0e10cSrcweir if ( eState == ssBounce )
984cdf0e10cSrcweir {
985cdf0e10cSrcweir r.TokenType = 0;
986cdf0e10cSrcweir eState = ssStopBack;
987cdf0e10cSrcweir }
988cdf0e10cSrcweir if ( eState == ssStopBack )
989cdf0e10cSrcweir { // put back
990cdf0e10cSrcweir pSrc--;
991cdf0e10cSrcweir bMightBeWord = bMightBeWordLast;
992cdf0e10cSrcweir eState = ssStop;
993cdf0e10cSrcweir }
994cdf0e10cSrcweir if ( eState != ssStop )
995cdf0e10cSrcweir {
996cdf0e10cSrcweir if ( !r.StartFlags )
997cdf0e10cSrcweir r.StartFlags |= nParseTokensType;
998cdf0e10cSrcweir else
999cdf0e10cSrcweir r.ContFlags |= nParseTokensType;
1000cdf0e10cSrcweir }
1001cdf0e10cSrcweir bMightBeWordLast = bMightBeWord;
1002cdf0e10cSrcweir cLast = c;
1003cdf0e10cSrcweir c = *pSrc;
1004cdf0e10cSrcweir }
1005cdf0e10cSrcweir }
1006cdf0e10cSrcweir // r.CharLen is the length in characters (not code points) of the parsed
1007cdf0e10cSrcweir // token not including any leading white space, change this calculation if
1008cdf0e10cSrcweir // multi-code-point Unicode characters are to be supported.
1009cdf0e10cSrcweir r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1010cdf0e10cSrcweir r.EndPos = nPos + (pSrc - pTextStart);
1011cdf0e10cSrcweir if ( r.TokenType & KParseType::ASC_NUMBER )
1012cdf0e10cSrcweir {
1013cdf0e10cSrcweir r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1014cdf0e10cSrcweir pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1015cdf0e10cSrcweir if ( bMightBeWord )
1016cdf0e10cSrcweir r.TokenType |= KParseType::IDENTNAME;
1017cdf0e10cSrcweir }
1018cdf0e10cSrcweir else if ( r.TokenType & KParseType::UNI_NUMBER )
1019cdf0e10cSrcweir {
1020cdf0e10cSrcweir if ( !xNatNumSup.is() )
1021cdf0e10cSrcweir {
1022cdf0e10cSrcweir #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1023cdf0e10cSrcweir if ( xMSF.is() )
1024cdf0e10cSrcweir {
1025cdf0e10cSrcweir xNatNumSup = Reference< XNativeNumberSupplier > (
1026cdf0e10cSrcweir xMSF->createInstance( OUString(
1027cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM(
1028cdf0e10cSrcweir NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1029cdf0e10cSrcweir UNO_QUERY );
1030cdf0e10cSrcweir }
1031cdf0e10cSrcweir if ( !xNatNumSup.is() )
1032cdf0e10cSrcweir {
1033cdf0e10cSrcweir throw RuntimeException( OUString(
1034cdf0e10cSrcweir #ifdef DBG_UTIL
1035cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM(
1036cdf0e10cSrcweir "cclass_Unicode::parseText: can't instanciate "
1037cdf0e10cSrcweir NATIVENUMBERSUPPLIER_SERVICENAME )
1038cdf0e10cSrcweir #endif
1039cdf0e10cSrcweir ), *this );
1040cdf0e10cSrcweir }
1041cdf0e10cSrcweir #undef NATIVENUMBERSUPPLIER_SERVICENAME
1042cdf0e10cSrcweir }
1043cdf0e10cSrcweir OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1044cdf0e10cSrcweir r.LeadingWhiteSpace );
1045cdf0e10cSrcweir // transliterate to ASCII
1046cdf0e10cSrcweir aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1047cdf0e10cSrcweir NativeNumberMode::NATNUM0 );
1048cdf0e10cSrcweir r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1049cdf0e10cSrcweir if ( bMightBeWord )
1050cdf0e10cSrcweir r.TokenType |= KParseType::IDENTNAME;
1051cdf0e10cSrcweir }
1052cdf0e10cSrcweir else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1053cdf0e10cSrcweir {
1054cdf0e10cSrcweir if ( pSym < pSrc )
1055cdf0e10cSrcweir { //! open quote
1056cdf0e10cSrcweir aSymbol += OUString( pSym, pSrc - pSym );
1057cdf0e10cSrcweir r.TokenType |= KParseType::MISSING_QUOTE;
1058cdf0e10cSrcweir }
1059cdf0e10cSrcweir r.DequotedNameOrString = aSymbol;
1060cdf0e10cSrcweir }
1061cdf0e10cSrcweir }
1062cdf0e10cSrcweir
1063cdf0e10cSrcweir } } } }
1064