xref: /trunk/main/svtools/source/edit/syntaxhighlight.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_svtools.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx>
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir #include <unotools/charclass.hxx>
34*cdf0e10cSrcweir #include <tools/debug.hxx>
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir // ##########################################################################
38*cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps
39*cdf0e10cSrcweir // ##########################################################################
40*cdf0e10cSrcweir static const char* strListBasicKeyWords[] = {
41*cdf0e10cSrcweir     "access",
42*cdf0e10cSrcweir     "alias",
43*cdf0e10cSrcweir     "and",
44*cdf0e10cSrcweir     "any",
45*cdf0e10cSrcweir     "append",
46*cdf0e10cSrcweir     "as",
47*cdf0e10cSrcweir     "base",
48*cdf0e10cSrcweir     "binary",
49*cdf0e10cSrcweir     "boolean",
50*cdf0e10cSrcweir     "byref",
51*cdf0e10cSrcweir     "byte",
52*cdf0e10cSrcweir     "byval",
53*cdf0e10cSrcweir     "call",
54*cdf0e10cSrcweir     "case",
55*cdf0e10cSrcweir     "cdecl",
56*cdf0e10cSrcweir     "classmodule",
57*cdf0e10cSrcweir     "close",
58*cdf0e10cSrcweir     "compare",
59*cdf0e10cSrcweir     "compatible",
60*cdf0e10cSrcweir     "const",
61*cdf0e10cSrcweir     "currency",
62*cdf0e10cSrcweir     "date",
63*cdf0e10cSrcweir     "declare",
64*cdf0e10cSrcweir     "defbool",
65*cdf0e10cSrcweir     "defcur",
66*cdf0e10cSrcweir     "defdate",
67*cdf0e10cSrcweir     "defdbl",
68*cdf0e10cSrcweir     "deferr",
69*cdf0e10cSrcweir     "defint",
70*cdf0e10cSrcweir     "deflng",
71*cdf0e10cSrcweir     "defobj",
72*cdf0e10cSrcweir     "defsng",
73*cdf0e10cSrcweir     "defstr",
74*cdf0e10cSrcweir     "defvar",
75*cdf0e10cSrcweir     "dim",
76*cdf0e10cSrcweir     "do",
77*cdf0e10cSrcweir     "double",
78*cdf0e10cSrcweir     "each",
79*cdf0e10cSrcweir     "else",
80*cdf0e10cSrcweir     "elseif",
81*cdf0e10cSrcweir     "end",
82*cdf0e10cSrcweir     "end enum",
83*cdf0e10cSrcweir     "end function",
84*cdf0e10cSrcweir     "end if",
85*cdf0e10cSrcweir     "end select",
86*cdf0e10cSrcweir     "end sub",
87*cdf0e10cSrcweir     "end type",
88*cdf0e10cSrcweir     "endif",
89*cdf0e10cSrcweir     "enum",
90*cdf0e10cSrcweir     "eqv",
91*cdf0e10cSrcweir     "erase",
92*cdf0e10cSrcweir     "error",
93*cdf0e10cSrcweir     "exit",
94*cdf0e10cSrcweir     "explicit",
95*cdf0e10cSrcweir     "for",
96*cdf0e10cSrcweir     "function",
97*cdf0e10cSrcweir     "get",
98*cdf0e10cSrcweir     "global",
99*cdf0e10cSrcweir     "gosub",
100*cdf0e10cSrcweir     "goto",
101*cdf0e10cSrcweir     "if",
102*cdf0e10cSrcweir     "imp",
103*cdf0e10cSrcweir     "implements",
104*cdf0e10cSrcweir     "in",
105*cdf0e10cSrcweir     "input",
106*cdf0e10cSrcweir     "integer",
107*cdf0e10cSrcweir     "is",
108*cdf0e10cSrcweir     "let",
109*cdf0e10cSrcweir     "lib",
110*cdf0e10cSrcweir     "like",
111*cdf0e10cSrcweir     "line",
112*cdf0e10cSrcweir     "line input",
113*cdf0e10cSrcweir     "local",
114*cdf0e10cSrcweir     "lock",
115*cdf0e10cSrcweir     "long",
116*cdf0e10cSrcweir     "loop",
117*cdf0e10cSrcweir     "lprint",
118*cdf0e10cSrcweir     "lset",
119*cdf0e10cSrcweir     "mod",
120*cdf0e10cSrcweir     "name",
121*cdf0e10cSrcweir     "new",
122*cdf0e10cSrcweir     "next",
123*cdf0e10cSrcweir     "not",
124*cdf0e10cSrcweir     "object",
125*cdf0e10cSrcweir     "on",
126*cdf0e10cSrcweir     "open",
127*cdf0e10cSrcweir     "option",
128*cdf0e10cSrcweir     "optional",
129*cdf0e10cSrcweir     "or",
130*cdf0e10cSrcweir     "output",
131*cdf0e10cSrcweir     "preserve",
132*cdf0e10cSrcweir     "print",
133*cdf0e10cSrcweir     "private",
134*cdf0e10cSrcweir     "property",
135*cdf0e10cSrcweir     "public",
136*cdf0e10cSrcweir     "random",
137*cdf0e10cSrcweir     "read",
138*cdf0e10cSrcweir     "redim",
139*cdf0e10cSrcweir     "rem",
140*cdf0e10cSrcweir     "resume",
141*cdf0e10cSrcweir     "return",
142*cdf0e10cSrcweir     "rset",
143*cdf0e10cSrcweir     "select",
144*cdf0e10cSrcweir     "set",
145*cdf0e10cSrcweir     "shared",
146*cdf0e10cSrcweir     "single",
147*cdf0e10cSrcweir     "static",
148*cdf0e10cSrcweir     "step",
149*cdf0e10cSrcweir     "stop",
150*cdf0e10cSrcweir     "string",
151*cdf0e10cSrcweir     "sub",
152*cdf0e10cSrcweir     "system",
153*cdf0e10cSrcweir     "text",
154*cdf0e10cSrcweir     "then",
155*cdf0e10cSrcweir     "to",
156*cdf0e10cSrcweir     "type",
157*cdf0e10cSrcweir     "typeof",
158*cdf0e10cSrcweir     "until",
159*cdf0e10cSrcweir     "variant",
160*cdf0e10cSrcweir     "wend",
161*cdf0e10cSrcweir     "while",
162*cdf0e10cSrcweir     "with",
163*cdf0e10cSrcweir     "write",
164*cdf0e10cSrcweir     "xor"
165*cdf0e10cSrcweir };
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir 
168*cdf0e10cSrcweir static const char* strListSqlKeyWords[] = {
169*cdf0e10cSrcweir     "all",
170*cdf0e10cSrcweir     "and",
171*cdf0e10cSrcweir     "any",
172*cdf0e10cSrcweir     "as",
173*cdf0e10cSrcweir     "asc",
174*cdf0e10cSrcweir     "avg",
175*cdf0e10cSrcweir     "between",
176*cdf0e10cSrcweir     "by",
177*cdf0e10cSrcweir     "cast",
178*cdf0e10cSrcweir     "corresponding",
179*cdf0e10cSrcweir     "count",
180*cdf0e10cSrcweir     "create",
181*cdf0e10cSrcweir     "cross",
182*cdf0e10cSrcweir     "delete",
183*cdf0e10cSrcweir     "desc",
184*cdf0e10cSrcweir     "distinct",
185*cdf0e10cSrcweir     "drop",
186*cdf0e10cSrcweir     "escape",
187*cdf0e10cSrcweir     "except",
188*cdf0e10cSrcweir     "exists",
189*cdf0e10cSrcweir     "false",
190*cdf0e10cSrcweir     "from",
191*cdf0e10cSrcweir     "full",
192*cdf0e10cSrcweir     "global",
193*cdf0e10cSrcweir     "group",
194*cdf0e10cSrcweir     "having",
195*cdf0e10cSrcweir     "in",
196*cdf0e10cSrcweir     "inner",
197*cdf0e10cSrcweir     "insert",
198*cdf0e10cSrcweir     "intersect",
199*cdf0e10cSrcweir     "into",
200*cdf0e10cSrcweir     "is",
201*cdf0e10cSrcweir     "join",
202*cdf0e10cSrcweir     "left",
203*cdf0e10cSrcweir     "like",
204*cdf0e10cSrcweir     "local",
205*cdf0e10cSrcweir     "match",
206*cdf0e10cSrcweir     "max",
207*cdf0e10cSrcweir     "min",
208*cdf0e10cSrcweir     "natural",
209*cdf0e10cSrcweir     "not",
210*cdf0e10cSrcweir     "null",
211*cdf0e10cSrcweir     "on",
212*cdf0e10cSrcweir     "or",
213*cdf0e10cSrcweir     "order",
214*cdf0e10cSrcweir     "outer",
215*cdf0e10cSrcweir     "right",
216*cdf0e10cSrcweir     "select",
217*cdf0e10cSrcweir     "set",
218*cdf0e10cSrcweir     "some",
219*cdf0e10cSrcweir     "sum",
220*cdf0e10cSrcweir     "table",
221*cdf0e10cSrcweir     "temporary",
222*cdf0e10cSrcweir     "true",
223*cdf0e10cSrcweir     "union",
224*cdf0e10cSrcweir     "unique",
225*cdf0e10cSrcweir     "unknown",
226*cdf0e10cSrcweir     "update",
227*cdf0e10cSrcweir     "using",
228*cdf0e10cSrcweir     "values",
229*cdf0e10cSrcweir     "where"
230*cdf0e10cSrcweir };
231*cdf0e10cSrcweir 
232*cdf0e10cSrcweir 
233*cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
234*cdf0e10cSrcweir {
235*cdf0e10cSrcweir     return strcmp( (char *)arg1, *(char **)arg2 );
236*cdf0e10cSrcweir }
237*cdf0e10cSrcweir 
238*cdf0e10cSrcweir 
239*cdf0e10cSrcweir class LetterTable
240*cdf0e10cSrcweir {
241*cdf0e10cSrcweir     bool        IsLetterTab[256];
242*cdf0e10cSrcweir 
243*cdf0e10cSrcweir public:
244*cdf0e10cSrcweir     LetterTable( void );
245*cdf0e10cSrcweir 
246*cdf0e10cSrcweir     inline bool isLetter( sal_Unicode c )
247*cdf0e10cSrcweir     {
248*cdf0e10cSrcweir         bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
249*cdf0e10cSrcweir         return bRet;
250*cdf0e10cSrcweir     }
251*cdf0e10cSrcweir     bool isLetterUnicode( sal_Unicode c );
252*cdf0e10cSrcweir };
253*cdf0e10cSrcweir 
254*cdf0e10cSrcweir class BasicSimpleCharClass
255*cdf0e10cSrcweir {
256*cdf0e10cSrcweir     static LetterTable aLetterTable;
257*cdf0e10cSrcweir 
258*cdf0e10cSrcweir public:
259*cdf0e10cSrcweir     static sal_Bool isAlpha( sal_Unicode c, bool bCompatible )
260*cdf0e10cSrcweir     {
261*cdf0e10cSrcweir         sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
262*cdf0e10cSrcweir                     || (bCompatible && aLetterTable.isLetter( c ));
263*cdf0e10cSrcweir         return bRet;
264*cdf0e10cSrcweir     }
265*cdf0e10cSrcweir 
266*cdf0e10cSrcweir     static sal_Bool isDigit( sal_Unicode c )
267*cdf0e10cSrcweir     {
268*cdf0e10cSrcweir         sal_Bool bRet = (c >= '0' && c <= '9');
269*cdf0e10cSrcweir         return bRet;
270*cdf0e10cSrcweir     }
271*cdf0e10cSrcweir 
272*cdf0e10cSrcweir     static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible )
273*cdf0e10cSrcweir     {
274*cdf0e10cSrcweir         sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible );
275*cdf0e10cSrcweir         return bRet;
276*cdf0e10cSrcweir     }
277*cdf0e10cSrcweir };
278*cdf0e10cSrcweir 
279*cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable;
280*cdf0e10cSrcweir 
281*cdf0e10cSrcweir LetterTable::LetterTable( void )
282*cdf0e10cSrcweir {
283*cdf0e10cSrcweir     for( int i = 0 ; i < 256 ; ++i )
284*cdf0e10cSrcweir         IsLetterTab[i] = false;
285*cdf0e10cSrcweir 
286*cdf0e10cSrcweir     IsLetterTab[0xC0] = true;   // ?, CAPITAL LETTER A WITH GRAVE ACCENT
287*cdf0e10cSrcweir     IsLetterTab[0xC1] = true;   // ?, CAPITAL LETTER A WITH ACUTE ACCENT
288*cdf0e10cSrcweir     IsLetterTab[0xC2] = true;   // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
289*cdf0e10cSrcweir     IsLetterTab[0xC3] = true;   // ?, CAPITAL LETTER A WITH TILDE
290*cdf0e10cSrcweir     IsLetterTab[0xC4] = true;   // ?, CAPITAL LETTER A WITH DIAERESIS
291*cdf0e10cSrcweir     IsLetterTab[0xC5] = true;   // ?, CAPITAL LETTER A WITH RING ABOVE
292*cdf0e10cSrcweir     IsLetterTab[0xC6] = true;   // ?, CAPITAL LIGATURE AE
293*cdf0e10cSrcweir     IsLetterTab[0xC7] = true;   // ?, CAPITAL LETTER C WITH CEDILLA
294*cdf0e10cSrcweir     IsLetterTab[0xC8] = true;   // ?, CAPITAL LETTER E WITH GRAVE ACCENT
295*cdf0e10cSrcweir     IsLetterTab[0xC9] = true;   // ?, CAPITAL LETTER E WITH ACUTE ACCENT
296*cdf0e10cSrcweir     IsLetterTab[0xCA] = true;   // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
297*cdf0e10cSrcweir     IsLetterTab[0xCB] = true;   // ?, CAPITAL LETTER E WITH DIAERESIS
298*cdf0e10cSrcweir     IsLetterTab[0xCC] = true;   // ?, CAPITAL LETTER I WITH GRAVE ACCENT
299*cdf0e10cSrcweir     IsLetterTab[0xCD] = true;   // ?, CAPITAL LETTER I WITH ACUTE ACCENT
300*cdf0e10cSrcweir     IsLetterTab[0xCE] = true;   // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
301*cdf0e10cSrcweir     IsLetterTab[0xCF] = true;   // ?, CAPITAL LETTER I WITH DIAERESIS
302*cdf0e10cSrcweir     IsLetterTab[0xD0] = true;   // ?, CAPITAL LETTER ETH
303*cdf0e10cSrcweir     IsLetterTab[0xD1] = true;   // ?, CAPITAL LETTER N WITH TILDE
304*cdf0e10cSrcweir     IsLetterTab[0xD2] = true;   // ?, CAPITAL LETTER O WITH GRAVE ACCENT
305*cdf0e10cSrcweir     IsLetterTab[0xD3] = true;   // ?, CAPITAL LETTER O WITH ACUTE ACCENT
306*cdf0e10cSrcweir     IsLetterTab[0xD4] = true;   // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
307*cdf0e10cSrcweir     IsLetterTab[0xD5] = true;   // ?, CAPITAL LETTER O WITH TILDE
308*cdf0e10cSrcweir     IsLetterTab[0xD6] = true;   // ?, CAPITAL LETTER O WITH DIAERESIS
309*cdf0e10cSrcweir     IsLetterTab[0xD8] = true;   // ?, CAPITAL LETTER O WITH STROKE
310*cdf0e10cSrcweir     IsLetterTab[0xD9] = true;   // ?, CAPITAL LETTER U WITH GRAVE ACCENT
311*cdf0e10cSrcweir     IsLetterTab[0xDA] = true;   // ?, CAPITAL LETTER U WITH ACUTE ACCENT
312*cdf0e10cSrcweir     IsLetterTab[0xDB] = true;   // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
313*cdf0e10cSrcweir     IsLetterTab[0xDC] = true;   // ?, CAPITAL LETTER U WITH DIAERESIS
314*cdf0e10cSrcweir     IsLetterTab[0xDD] = true;   // ?, CAPITAL LETTER Y WITH ACUTE ACCENT
315*cdf0e10cSrcweir     IsLetterTab[0xDE] = true;   // ?, CAPITAL LETTER THORN
316*cdf0e10cSrcweir     IsLetterTab[0xDF] = true;   // ?, SMALL LETTER SHARP S
317*cdf0e10cSrcweir     IsLetterTab[0xE0] = true;   // ?, SMALL LETTER A WITH GRAVE ACCENT
318*cdf0e10cSrcweir     IsLetterTab[0xE1] = true;   // ?, SMALL LETTER A WITH ACUTE ACCENT
319*cdf0e10cSrcweir     IsLetterTab[0xE2] = true;   // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
320*cdf0e10cSrcweir     IsLetterTab[0xE3] = true;   // ?, SMALL LETTER A WITH TILDE
321*cdf0e10cSrcweir     IsLetterTab[0xE4] = true;   // ?, SMALL LETTER A WITH DIAERESIS
322*cdf0e10cSrcweir     IsLetterTab[0xE5] = true;   // ?, SMALL LETTER A WITH RING ABOVE
323*cdf0e10cSrcweir     IsLetterTab[0xE6] = true;   // ?, SMALL LIGATURE AE
324*cdf0e10cSrcweir     IsLetterTab[0xE7] = true;   // ?, SMALL LETTER C WITH CEDILLA
325*cdf0e10cSrcweir     IsLetterTab[0xE8] = true;   // ?, SMALL LETTER E WITH GRAVE ACCENT
326*cdf0e10cSrcweir     IsLetterTab[0xE9] = true;   // ?, SMALL LETTER E WITH ACUTE ACCENT
327*cdf0e10cSrcweir     IsLetterTab[0xEA] = true;   // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
328*cdf0e10cSrcweir     IsLetterTab[0xEB] = true;   // ?, SMALL LETTER E WITH DIAERESIS
329*cdf0e10cSrcweir     IsLetterTab[0xEC] = true;   // ?, SMALL LETTER I WITH GRAVE ACCENT
330*cdf0e10cSrcweir     IsLetterTab[0xED] = true;   // ?, SMALL LETTER I WITH ACUTE ACCENT
331*cdf0e10cSrcweir     IsLetterTab[0xEE] = true;   // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
332*cdf0e10cSrcweir     IsLetterTab[0xEF] = true;   // ?, SMALL LETTER I WITH DIAERESIS
333*cdf0e10cSrcweir     IsLetterTab[0xF0] = true;   // ?, SMALL LETTER ETH
334*cdf0e10cSrcweir     IsLetterTab[0xF1] = true;   // ?, SMALL LETTER N WITH TILDE
335*cdf0e10cSrcweir     IsLetterTab[0xF2] = true;   // ?, SMALL LETTER O WITH GRAVE ACCENT
336*cdf0e10cSrcweir     IsLetterTab[0xF3] = true;   // ?, SMALL LETTER O WITH ACUTE ACCENT
337*cdf0e10cSrcweir     IsLetterTab[0xF4] = true;   // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
338*cdf0e10cSrcweir     IsLetterTab[0xF5] = true;   // ?, SMALL LETTER O WITH TILDE
339*cdf0e10cSrcweir     IsLetterTab[0xF6] = true;   // ?, SMALL LETTER O WITH DIAERESIS
340*cdf0e10cSrcweir     IsLetterTab[0xF8] = true;   // ?, SMALL LETTER O WITH OBLIQUE BAR
341*cdf0e10cSrcweir     IsLetterTab[0xF9] = true;   // ?, SMALL LETTER U WITH GRAVE ACCENT
342*cdf0e10cSrcweir     IsLetterTab[0xFA] = true;   // ?, SMALL LETTER U WITH ACUTE ACCENT
343*cdf0e10cSrcweir     IsLetterTab[0xFB] = true;   // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
344*cdf0e10cSrcweir     IsLetterTab[0xFC] = true;   // ?, SMALL LETTER U WITH DIAERESIS
345*cdf0e10cSrcweir     IsLetterTab[0xFD] = true;   // ?, SMALL LETTER Y WITH ACUTE ACCENT
346*cdf0e10cSrcweir     IsLetterTab[0xFE] = true;   // ?, SMALL LETTER THORN
347*cdf0e10cSrcweir     IsLetterTab[0xFF] = true;   // � , SMALL LETTER Y WITH DIAERESIS
348*cdf0e10cSrcweir }
349*cdf0e10cSrcweir 
350*cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c )
351*cdf0e10cSrcweir {
352*cdf0e10cSrcweir     static CharClass* pCharClass = NULL;
353*cdf0e10cSrcweir     if( pCharClass == NULL )
354*cdf0e10cSrcweir         pCharClass = new CharClass( Application::GetSettings().GetLocale() );
355*cdf0e10cSrcweir     String aStr( c );
356*cdf0e10cSrcweir     bool bRet = pCharClass->isLetter( aStr, 0 );
357*cdf0e10cSrcweir     return bRet;
358*cdf0e10cSrcweir }
359*cdf0e10cSrcweir 
360*cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen
361*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
362*cdf0e10cSrcweir {
363*cdf0e10cSrcweir     bool bRet = false;
364*cdf0e10cSrcweir     if( c != 0 && c <= 255 )
365*cdf0e10cSrcweir     {
366*cdf0e10cSrcweir         bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
367*cdf0e10cSrcweir     }
368*cdf0e10cSrcweir     else if( c > 255 )
369*cdf0e10cSrcweir     {
370*cdf0e10cSrcweir         bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
371*cdf0e10cSrcweir             ? BasicSimpleCharClass::isAlpha( c, true ) : false;
372*cdf0e10cSrcweir     }
373*cdf0e10cSrcweir     return bRet;
374*cdf0e10cSrcweir }
375*cdf0e10cSrcweir 
376*cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
377*cdf0e10cSrcweir {
378*cdf0e10cSrcweir     ppListKeyWords = ppKeyWords;
379*cdf0e10cSrcweir     nKeyWordCount = nCount;
380*cdf0e10cSrcweir }
381*cdf0e10cSrcweir 
382*cdf0e10cSrcweir // Neues Token holen
383*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
384*cdf0e10cSrcweir     /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
385*cdf0e10cSrcweir {
386*cdf0e10cSrcweir     reType = TT_UNKNOWN;
387*cdf0e10cSrcweir 
388*cdf0e10cSrcweir     // Position merken
389*cdf0e10cSrcweir     rpStartPos = mpActualPos;
390*cdf0e10cSrcweir 
391*cdf0e10cSrcweir     // Zeichen untersuchen
392*cdf0e10cSrcweir     sal_Unicode c = peekChar();
393*cdf0e10cSrcweir     if( c == CHAR_EOF )
394*cdf0e10cSrcweir         return sal_False;
395*cdf0e10cSrcweir 
396*cdf0e10cSrcweir     // Zeichen lesen
397*cdf0e10cSrcweir     getChar();
398*cdf0e10cSrcweir 
399*cdf0e10cSrcweir     //*** Alle Moeglichkeiten durchgehen ***
400*cdf0e10cSrcweir     // Space?
401*cdf0e10cSrcweir     if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
402*cdf0e10cSrcweir     {
403*cdf0e10cSrcweir         while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
404*cdf0e10cSrcweir             getChar();
405*cdf0e10cSrcweir 
406*cdf0e10cSrcweir         reType = TT_WHITESPACE;
407*cdf0e10cSrcweir     }
408*cdf0e10cSrcweir 
409*cdf0e10cSrcweir     // Identifier?
410*cdf0e10cSrcweir     else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
411*cdf0e10cSrcweir     {
412*cdf0e10cSrcweir         sal_Bool bIdentifierChar;
413*cdf0e10cSrcweir         do
414*cdf0e10cSrcweir         {
415*cdf0e10cSrcweir             // Naechstes Zeichen holen
416*cdf0e10cSrcweir             c = peekChar();
417*cdf0e10cSrcweir             bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
418*cdf0e10cSrcweir             if( bIdentifierChar )
419*cdf0e10cSrcweir                 getChar();
420*cdf0e10cSrcweir         }
421*cdf0e10cSrcweir         while( bIdentifierChar );
422*cdf0e10cSrcweir 
423*cdf0e10cSrcweir         reType = TT_IDENTIFIER;
424*cdf0e10cSrcweir 
425*cdf0e10cSrcweir         // Schluesselwort-Tabelle
426*cdf0e10cSrcweir         if (ppListKeyWords != NULL)
427*cdf0e10cSrcweir         {
428*cdf0e10cSrcweir             int nCount = mpActualPos - rpStartPos;
429*cdf0e10cSrcweir 
430*cdf0e10cSrcweir             // No keyword if string contains char > 255
431*cdf0e10cSrcweir             bool bCanBeKeyword = true;
432*cdf0e10cSrcweir             for( int i = 0 ; i < nCount ; i++ )
433*cdf0e10cSrcweir             {
434*cdf0e10cSrcweir                 if( rpStartPos[i] > 255 )
435*cdf0e10cSrcweir                 {
436*cdf0e10cSrcweir                     bCanBeKeyword = false;
437*cdf0e10cSrcweir                     break;
438*cdf0e10cSrcweir                 }
439*cdf0e10cSrcweir             }
440*cdf0e10cSrcweir 
441*cdf0e10cSrcweir             if( bCanBeKeyword )
442*cdf0e10cSrcweir             {
443*cdf0e10cSrcweir                 String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
444*cdf0e10cSrcweir                 ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
445*cdf0e10cSrcweir                 aByteStr.ToLowerAscii();
446*cdf0e10cSrcweir                 if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
447*cdf0e10cSrcweir                                                                         compare_strings ) )
448*cdf0e10cSrcweir                 {
449*cdf0e10cSrcweir                     reType = TT_KEYWORDS;
450*cdf0e10cSrcweir 
451*cdf0e10cSrcweir                     if ( aByteStr.Equals( "rem" ) )
452*cdf0e10cSrcweir                     {
453*cdf0e10cSrcweir                         // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
454*cdf0e10cSrcweir                         sal_Unicode cPeek = peekChar();
455*cdf0e10cSrcweir                         while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
456*cdf0e10cSrcweir                         {
457*cdf0e10cSrcweir                             c = getChar();
458*cdf0e10cSrcweir                             cPeek = peekChar();
459*cdf0e10cSrcweir                         }
460*cdf0e10cSrcweir 
461*cdf0e10cSrcweir                         reType = TT_COMMENT;
462*cdf0e10cSrcweir                     }
463*cdf0e10cSrcweir                 }
464*cdf0e10cSrcweir             }
465*cdf0e10cSrcweir         }
466*cdf0e10cSrcweir     }
467*cdf0e10cSrcweir 
468*cdf0e10cSrcweir     // Operator?
469*cdf0e10cSrcweir     // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
470*cdf0e10cSrcweir     else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
471*cdf0e10cSrcweir     {
472*cdf0e10cSrcweir         // paramters for SQL view
473*cdf0e10cSrcweir         if ( (c==':') || (c=='?'))
474*cdf0e10cSrcweir         {
475*cdf0e10cSrcweir             if (c!='?')
476*cdf0e10cSrcweir             {
477*cdf0e10cSrcweir                 sal_Bool bIdentifierChar;
478*cdf0e10cSrcweir                 do
479*cdf0e10cSrcweir                 {
480*cdf0e10cSrcweir                     // Naechstes Zeichen holen
481*cdf0e10cSrcweir                     c = peekChar();
482*cdf0e10cSrcweir                     bIdentifierChar =  BasicSimpleCharClass::isAlpha( c, true );
483*cdf0e10cSrcweir                     if( bIdentifierChar )
484*cdf0e10cSrcweir                         getChar();
485*cdf0e10cSrcweir                 }
486*cdf0e10cSrcweir                 while( bIdentifierChar );
487*cdf0e10cSrcweir             }
488*cdf0e10cSrcweir             reType = TT_PARAMETER;
489*cdf0e10cSrcweir         }
490*cdf0e10cSrcweir         else if ((c=='-'))
491*cdf0e10cSrcweir         {
492*cdf0e10cSrcweir             sal_Unicode cPeekNext = peekChar();
493*cdf0e10cSrcweir             if (cPeekNext=='-')
494*cdf0e10cSrcweir             {
495*cdf0e10cSrcweir                 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
496*cdf0e10cSrcweir                 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
497*cdf0e10cSrcweir                 {
498*cdf0e10cSrcweir                     getChar();
499*cdf0e10cSrcweir                     cPeekNext = peekChar();
500*cdf0e10cSrcweir                 }
501*cdf0e10cSrcweir                 reType = TT_COMMENT;
502*cdf0e10cSrcweir             }
503*cdf0e10cSrcweir         }
504*cdf0e10cSrcweir        else if (c=='/')
505*cdf0e10cSrcweir        {
506*cdf0e10cSrcweir            sal_Unicode cPeekNext = peekChar();
507*cdf0e10cSrcweir            if (cPeekNext=='/')
508*cdf0e10cSrcweir            {
509*cdf0e10cSrcweir                // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
510*cdf0e10cSrcweir                while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
511*cdf0e10cSrcweir                {
512*cdf0e10cSrcweir                    getChar();
513*cdf0e10cSrcweir                    cPeekNext = peekChar();
514*cdf0e10cSrcweir                }
515*cdf0e10cSrcweir                reType = TT_COMMENT;
516*cdf0e10cSrcweir            }
517*cdf0e10cSrcweir        }
518*cdf0e10cSrcweir         else
519*cdf0e10cSrcweir         {
520*cdf0e10cSrcweir             // Kommentar ?
521*cdf0e10cSrcweir             if ( c == '\'' )
522*cdf0e10cSrcweir             {
523*cdf0e10cSrcweir                 c = getChar();  // '/' entfernen
524*cdf0e10cSrcweir 
525*cdf0e10cSrcweir                 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
526*cdf0e10cSrcweir                 sal_Unicode cPeek = c;
527*cdf0e10cSrcweir                 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
528*cdf0e10cSrcweir                 {
529*cdf0e10cSrcweir                     getChar();
530*cdf0e10cSrcweir                     cPeek = peekChar();
531*cdf0e10cSrcweir                 }
532*cdf0e10cSrcweir 
533*cdf0e10cSrcweir                 reType = TT_COMMENT;
534*cdf0e10cSrcweir             }
535*cdf0e10cSrcweir 
536*cdf0e10cSrcweir             // Echter Operator, kann hier einfach behandelt werden,
537*cdf0e10cSrcweir             // da nicht der wirkliche Operator, wie z.B. += interessiert,
538*cdf0e10cSrcweir             // sondern nur die Tatsache, dass es sich um einen handelt.
539*cdf0e10cSrcweir             if( reType != TT_COMMENT )
540*cdf0e10cSrcweir             {
541*cdf0e10cSrcweir                 reType = TT_OPERATOR;
542*cdf0e10cSrcweir             }
543*cdf0e10cSrcweir 
544*cdf0e10cSrcweir         }
545*cdf0e10cSrcweir     }
546*cdf0e10cSrcweir 
547*cdf0e10cSrcweir     // Objekt-Trenner? Muss vor Number abgehandelt werden
548*cdf0e10cSrcweir     else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
549*cdf0e10cSrcweir     {
550*cdf0e10cSrcweir         reType = TT_OPERATOR;
551*cdf0e10cSrcweir     }
552*cdf0e10cSrcweir 
553*cdf0e10cSrcweir     // Zahl?
554*cdf0e10cSrcweir     else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
555*cdf0e10cSrcweir     {
556*cdf0e10cSrcweir         reType = TT_NUMBER;
557*cdf0e10cSrcweir 
558*cdf0e10cSrcweir         // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
559*cdf0e10cSrcweir         int nRadix = 10;
560*cdf0e10cSrcweir 
561*cdf0e10cSrcweir         // Ist es eine Hex- oder Oct-Zahl?
562*cdf0e10cSrcweir         if( c == '&' )
563*cdf0e10cSrcweir         {
564*cdf0e10cSrcweir             // Octal?
565*cdf0e10cSrcweir             if( peekChar() == 'o' || peekChar() == 'O' )
566*cdf0e10cSrcweir             {
567*cdf0e10cSrcweir                 // o entfernen
568*cdf0e10cSrcweir                 getChar();
569*cdf0e10cSrcweir                 nRadix = 8;     // Octal-Basis
570*cdf0e10cSrcweir 
571*cdf0e10cSrcweir                 // Alle Ziffern einlesen
572*cdf0e10cSrcweir                 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
573*cdf0e10cSrcweir                     c = getChar();
574*cdf0e10cSrcweir             }
575*cdf0e10cSrcweir             // Hex?
576*cdf0e10cSrcweir             else if( peekChar() == 'h' || peekChar() == 'H' )
577*cdf0e10cSrcweir             {
578*cdf0e10cSrcweir                 // x entfernen
579*cdf0e10cSrcweir                 getChar();
580*cdf0e10cSrcweir                 nRadix = 16;     // Hex-Basis
581*cdf0e10cSrcweir 
582*cdf0e10cSrcweir                 // Alle Ziffern einlesen und puffern
583*cdf0e10cSrcweir                 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
584*cdf0e10cSrcweir                     c = getChar();
585*cdf0e10cSrcweir             }
586*cdf0e10cSrcweir             else
587*cdf0e10cSrcweir             {
588*cdf0e10cSrcweir                 reType = TT_OPERATOR;
589*cdf0e10cSrcweir             }
590*cdf0e10cSrcweir         }
591*cdf0e10cSrcweir 
592*cdf0e10cSrcweir         // Wenn nicht Oct oder Hex als double ansehen
593*cdf0e10cSrcweir         if( reType == TT_NUMBER && nRadix == 10 )
594*cdf0e10cSrcweir         {
595*cdf0e10cSrcweir             // Flag, ob das letzte Zeichen ein Exponent war
596*cdf0e10cSrcweir             sal_Bool bAfterExpChar = sal_False;
597*cdf0e10cSrcweir 
598*cdf0e10cSrcweir             // Alle Ziffern einlesen
599*cdf0e10cSrcweir             while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
600*cdf0e10cSrcweir                     (bAfterExpChar && peekChar() == '+' ) ||
601*cdf0e10cSrcweir                     (bAfterExpChar && peekChar() == '-' ) )
602*cdf0e10cSrcweir                     // Nach Exponent auch +/- OK
603*cdf0e10cSrcweir             {
604*cdf0e10cSrcweir                 c = getChar();                  // Zeichen lesen
605*cdf0e10cSrcweir                 bAfterExpChar = ( c == 'e' || c == 'E' );
606*cdf0e10cSrcweir             }
607*cdf0e10cSrcweir         }
608*cdf0e10cSrcweir 
609*cdf0e10cSrcweir         // reType = TT_NUMBER;
610*cdf0e10cSrcweir     }
611*cdf0e10cSrcweir 
612*cdf0e10cSrcweir     // String?
613*cdf0e10cSrcweir     else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
614*cdf0e10cSrcweir     {
615*cdf0e10cSrcweir         // Merken, welches Zeichen den String eroeffnet hat
616*cdf0e10cSrcweir         sal_Unicode cEndString = c;
617*cdf0e10cSrcweir         if( c == '[' )
618*cdf0e10cSrcweir             cEndString = ']';
619*cdf0e10cSrcweir 
620*cdf0e10cSrcweir         // Alle Ziffern einlesen und puffern
621*cdf0e10cSrcweir         while( peekChar() != cEndString )
622*cdf0e10cSrcweir         {
623*cdf0e10cSrcweir             // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
624*cdf0e10cSrcweir             if( peekChar() == CHAR_EOF )
625*cdf0e10cSrcweir             {
626*cdf0e10cSrcweir                 // ERROR: unterminated string literal
627*cdf0e10cSrcweir                 reType = TT_ERROR;
628*cdf0e10cSrcweir                 break;
629*cdf0e10cSrcweir             }
630*cdf0e10cSrcweir             c = getChar();
631*cdf0e10cSrcweir             if( testCharFlags( c, CHAR_EOL ) == sal_True )
632*cdf0e10cSrcweir             {
633*cdf0e10cSrcweir                 // ERROR: unterminated string literal
634*cdf0e10cSrcweir                 reType = TT_ERROR;
635*cdf0e10cSrcweir                 break;
636*cdf0e10cSrcweir             }
637*cdf0e10cSrcweir         }
638*cdf0e10cSrcweir 
639*cdf0e10cSrcweir         //  Zeichen lesen
640*cdf0e10cSrcweir         if( reType != TT_ERROR )
641*cdf0e10cSrcweir         {
642*cdf0e10cSrcweir             getChar();
643*cdf0e10cSrcweir             if( cEndString == ']' )
644*cdf0e10cSrcweir                 reType = TT_IDENTIFIER;
645*cdf0e10cSrcweir             else
646*cdf0e10cSrcweir                 reType = TT_STRING;
647*cdf0e10cSrcweir         }
648*cdf0e10cSrcweir     }
649*cdf0e10cSrcweir 
650*cdf0e10cSrcweir     // Zeilenende?
651*cdf0e10cSrcweir     else if( testCharFlags( c, CHAR_EOL ) == sal_True )
652*cdf0e10cSrcweir     {
653*cdf0e10cSrcweir         // Falls ein weiteres anderes EOL-Char folgt, weg damit
654*cdf0e10cSrcweir         sal_Unicode cNext = peekChar();
655*cdf0e10cSrcweir         if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
656*cdf0e10cSrcweir             getChar();
657*cdf0e10cSrcweir 
658*cdf0e10cSrcweir         // Positions-Daten auf Zeilen-Beginn setzen
659*cdf0e10cSrcweir         nCol = 0;
660*cdf0e10cSrcweir         nLine++;
661*cdf0e10cSrcweir 
662*cdf0e10cSrcweir         reType = TT_EOL;
663*cdf0e10cSrcweir     }
664*cdf0e10cSrcweir 
665*cdf0e10cSrcweir     // Alles andere bleibt TT_UNKNOWN
666*cdf0e10cSrcweir 
667*cdf0e10cSrcweir 
668*cdf0e10cSrcweir     // End-Position eintragen
669*cdf0e10cSrcweir     rpEndPos = mpActualPos;
670*cdf0e10cSrcweir     return sal_True;
671*cdf0e10cSrcweir }
672*cdf0e10cSrcweir 
673*cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr
674*cdf0e10cSrcweir     ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
675*cdf0e10cSrcweir {
676*cdf0e10cSrcweir     return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
677*cdf0e10cSrcweir }
678*cdf0e10cSrcweir 
679*cdf0e10cSrcweir #ifdef DBG_UTIL
680*cdf0e10cSrcweir // TEST: Token ausgeben
681*cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
682*cdf0e10cSrcweir     /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
683*cdf0e10cSrcweir {
684*cdf0e10cSrcweir     String aOut;
685*cdf0e10cSrcweir     switch( eType )
686*cdf0e10cSrcweir     {
687*cdf0e10cSrcweir         case TT_UNKNOWN:    aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
688*cdf0e10cSrcweir         case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
689*cdf0e10cSrcweir         case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
690*cdf0e10cSrcweir         case TT_NUMBER:     aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
691*cdf0e10cSrcweir         case TT_STRING:     aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
692*cdf0e10cSrcweir         case TT_EOL:        aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
693*cdf0e10cSrcweir         case TT_COMMENT:    aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
694*cdf0e10cSrcweir         case TT_ERROR:      aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
695*cdf0e10cSrcweir         case TT_OPERATOR:   aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
696*cdf0e10cSrcweir         case TT_KEYWORDS:   aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
697*cdf0e10cSrcweir         case TT_PARAMETER:  aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
698*cdf0e10cSrcweir     }
699*cdf0e10cSrcweir     if( eType != TT_EOL )
700*cdf0e10cSrcweir     {
701*cdf0e10cSrcweir         aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
702*cdf0e10cSrcweir     }
703*cdf0e10cSrcweir     aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
704*cdf0e10cSrcweir     return aOut;
705*cdf0e10cSrcweir }
706*cdf0e10cSrcweir #endif
707*cdf0e10cSrcweir 
708*cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
709*cdf0e10cSrcweir {
710*cdf0e10cSrcweir     memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
711*cdf0e10cSrcweir 
712*cdf0e10cSrcweir     // Zeichen-Tabelle fuellen
713*cdf0e10cSrcweir     sal_uInt16 i;
714*cdf0e10cSrcweir 
715*cdf0e10cSrcweir     // Zulaessige Zeichen fuer Identifier
716*cdf0e10cSrcweir     sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
717*cdf0e10cSrcweir     for( i = 'a' ; i <= 'z' ; i++ )
718*cdf0e10cSrcweir         aCharTypeTab[i] |= nHelpMask;
719*cdf0e10cSrcweir     for( i = 'A' ; i <= 'Z' ; i++ )
720*cdf0e10cSrcweir         aCharTypeTab[i] |= nHelpMask;
721*cdf0e10cSrcweir     // '_' extra eintragen
722*cdf0e10cSrcweir     aCharTypeTab[(int)'_'] |= nHelpMask;
723*cdf0e10cSrcweir     // AB 23.6.97: '$' ist auch erlaubt
724*cdf0e10cSrcweir     aCharTypeTab[(int)'$'] |= nHelpMask;
725*cdf0e10cSrcweir 
726*cdf0e10cSrcweir     // Ziffern (Identifier und Number ist moeglich)
727*cdf0e10cSrcweir     nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
728*cdf0e10cSrcweir                          CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
729*cdf0e10cSrcweir     for( i = '0' ; i <= '9' ; i++ )
730*cdf0e10cSrcweir         aCharTypeTab[i] |= nHelpMask;
731*cdf0e10cSrcweir 
732*cdf0e10cSrcweir     // e und E sowie . von Hand ergaenzen
733*cdf0e10cSrcweir     aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
734*cdf0e10cSrcweir     aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
735*cdf0e10cSrcweir     aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
736*cdf0e10cSrcweir     aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
737*cdf0e10cSrcweir 
738*cdf0e10cSrcweir     // Hex-Ziffern
739*cdf0e10cSrcweir     for( i = 'a' ; i <= 'f' ; i++ )
740*cdf0e10cSrcweir         aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
741*cdf0e10cSrcweir     for( i = 'A' ; i <= 'F' ; i++ )
742*cdf0e10cSrcweir         aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
743*cdf0e10cSrcweir 
744*cdf0e10cSrcweir     // Oct-Ziffern
745*cdf0e10cSrcweir     for( i = '0' ; i <= '7' ; i++ )
746*cdf0e10cSrcweir         aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
747*cdf0e10cSrcweir 
748*cdf0e10cSrcweir     // String-Beginn/End-Zeichen
749*cdf0e10cSrcweir     aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
750*cdf0e10cSrcweir     aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
751*cdf0e10cSrcweir     aCharTypeTab[(int)'[']  |= CHAR_START_STRING;
752*cdf0e10cSrcweir     aCharTypeTab[(int)'`']  |= CHAR_START_STRING;
753*cdf0e10cSrcweir 
754*cdf0e10cSrcweir     // Operator-Zeichen
755*cdf0e10cSrcweir     aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
756*cdf0e10cSrcweir     aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
757*cdf0e10cSrcweir     // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR;     Removed because of #i14140
758*cdf0e10cSrcweir     aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
759*cdf0e10cSrcweir     aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
760*cdf0e10cSrcweir     aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
761*cdf0e10cSrcweir     aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
762*cdf0e10cSrcweir     aCharTypeTab[(int)','] |= CHAR_OPERATOR;
763*cdf0e10cSrcweir     aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
764*cdf0e10cSrcweir     aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
765*cdf0e10cSrcweir     aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
766*cdf0e10cSrcweir     aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
767*cdf0e10cSrcweir     aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
768*cdf0e10cSrcweir     aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
769*cdf0e10cSrcweir     aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
770*cdf0e10cSrcweir     aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
771*cdf0e10cSrcweir     aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
772*cdf0e10cSrcweir     aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
773*cdf0e10cSrcweir     aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
774*cdf0e10cSrcweir     aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
775*cdf0e10cSrcweir     // aCharTypeTab[(int)'['] |= CHAR_OPERATOR;     Removed because of #i17826
776*cdf0e10cSrcweir     aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
777*cdf0e10cSrcweir     aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
778*cdf0e10cSrcweir 
779*cdf0e10cSrcweir     // Space
780*cdf0e10cSrcweir     aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
781*cdf0e10cSrcweir     aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
782*cdf0e10cSrcweir 
783*cdf0e10cSrcweir     // Zeilen-Ende-Zeichen
784*cdf0e10cSrcweir     aCharTypeTab[(int)'\r'] |= CHAR_EOL;
785*cdf0e10cSrcweir     aCharTypeTab[(int)'\n'] |= CHAR_EOL;
786*cdf0e10cSrcweir 
787*cdf0e10cSrcweir     ppListKeyWords = NULL;
788*cdf0e10cSrcweir }
789*cdf0e10cSrcweir 
790*cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
791*cdf0e10cSrcweir {
792*cdf0e10cSrcweir }
793*cdf0e10cSrcweir 
794*cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void )
795*cdf0e10cSrcweir {
796*cdf0e10cSrcweir     static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
797*cdf0e10cSrcweir     if( !pSimpleTokenizer )
798*cdf0e10cSrcweir         pSimpleTokenizer = new SimpleTokenizer_Impl();
799*cdf0e10cSrcweir     return pSimpleTokenizer;
800*cdf0e10cSrcweir }
801*cdf0e10cSrcweir 
802*cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
803*cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
804*cdf0e10cSrcweir {
805*cdf0e10cSrcweir     // Position auf den Anfang des Source-Strings setzen
806*cdf0e10cSrcweir     mpStringBegin = mpActualPos = aSource->GetBuffer();
807*cdf0e10cSrcweir 
808*cdf0e10cSrcweir     // Zeile und Spalte initialisieren
809*cdf0e10cSrcweir     nLine = nParseLine;
810*cdf0e10cSrcweir     nCol = 0L;
811*cdf0e10cSrcweir 
812*cdf0e10cSrcweir     // Variablen fuer die Out-Parameter
813*cdf0e10cSrcweir     TokenTypes eType;
814*cdf0e10cSrcweir     const sal_Unicode* pStartPos;
815*cdf0e10cSrcweir     const sal_Unicode* pEndPos;
816*cdf0e10cSrcweir 
817*cdf0e10cSrcweir     // Schleife ueber alle Tokens
818*cdf0e10cSrcweir     sal_uInt16 nTokenCount = 0;
819*cdf0e10cSrcweir     while( getNextToken( eType, pStartPos, pEndPos ) )
820*cdf0e10cSrcweir         nTokenCount++;
821*cdf0e10cSrcweir 
822*cdf0e10cSrcweir     return nTokenCount;
823*cdf0e10cSrcweir }
824*cdf0e10cSrcweir 
825*cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
826*cdf0e10cSrcweir                                                     /*out*/HighlightPortions& portions  )
827*cdf0e10cSrcweir {
828*cdf0e10cSrcweir     // Position auf den Anfang des Source-Strings setzen
829*cdf0e10cSrcweir     mpStringBegin = mpActualPos = rLine.GetBuffer();
830*cdf0e10cSrcweir 
831*cdf0e10cSrcweir     // Zeile und Spalte initialisieren
832*cdf0e10cSrcweir     nLine = nParseLine;
833*cdf0e10cSrcweir     nCol = 0L;
834*cdf0e10cSrcweir 
835*cdf0e10cSrcweir     // Variablen fuer die Out-Parameter
836*cdf0e10cSrcweir     TokenTypes eType;
837*cdf0e10cSrcweir     const sal_Unicode* pStartPos;
838*cdf0e10cSrcweir     const sal_Unicode* pEndPos;
839*cdf0e10cSrcweir 
840*cdf0e10cSrcweir     // Schleife ueber alle Tokens
841*cdf0e10cSrcweir     while( getNextToken( eType, pStartPos, pEndPos ) )
842*cdf0e10cSrcweir     {
843*cdf0e10cSrcweir         HighlightPortion portion;
844*cdf0e10cSrcweir 
845*cdf0e10cSrcweir         portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
846*cdf0e10cSrcweir         portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
847*cdf0e10cSrcweir         portion.tokenType = eType;
848*cdf0e10cSrcweir 
849*cdf0e10cSrcweir         portions.push_back(portion);
850*cdf0e10cSrcweir     }
851*cdf0e10cSrcweir }
852*cdf0e10cSrcweir 
853*cdf0e10cSrcweir 
854*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////////
855*cdf0e10cSrcweir // Implementierung des SyntaxHighlighter
856*cdf0e10cSrcweir 
857*cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter()
858*cdf0e10cSrcweir {
859*cdf0e10cSrcweir     m_pSimpleTokenizer = 0;
860*cdf0e10cSrcweir     m_pKeyWords = NULL;
861*cdf0e10cSrcweir     m_nKeyWordCount = 0;
862*cdf0e10cSrcweir }
863*cdf0e10cSrcweir 
864*cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter()
865*cdf0e10cSrcweir {
866*cdf0e10cSrcweir     delete m_pSimpleTokenizer;
867*cdf0e10cSrcweir     delete m_pKeyWords;
868*cdf0e10cSrcweir }
869*cdf0e10cSrcweir 
870*cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
871*cdf0e10cSrcweir {
872*cdf0e10cSrcweir     eLanguage = eLanguage_;
873*cdf0e10cSrcweir     delete m_pSimpleTokenizer;
874*cdf0e10cSrcweir     m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
875*cdf0e10cSrcweir 
876*cdf0e10cSrcweir     switch (eLanguage)
877*cdf0e10cSrcweir     {
878*cdf0e10cSrcweir         case HIGHLIGHT_BASIC:
879*cdf0e10cSrcweir             m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
880*cdf0e10cSrcweir                                             sizeof( strListBasicKeyWords ) / sizeof( char* ));
881*cdf0e10cSrcweir             break;
882*cdf0e10cSrcweir         case HIGHLIGHT_SQL:
883*cdf0e10cSrcweir             m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
884*cdf0e10cSrcweir                                             sizeof( strListSqlKeyWords ) / sizeof( char* ));
885*cdf0e10cSrcweir             break;
886*cdf0e10cSrcweir         default:
887*cdf0e10cSrcweir             m_pSimpleTokenizer->setKeyWords( NULL, 0 );
888*cdf0e10cSrcweir     }
889*cdf0e10cSrcweir }
890*cdf0e10cSrcweir 
891*cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
892*cdf0e10cSrcweir                                 const String* pChangedLines, sal_uInt32 nArrayLength)
893*cdf0e10cSrcweir {
894*cdf0e10cSrcweir     (void)nLineCountDifference;
895*cdf0e10cSrcweir 
896*cdf0e10cSrcweir     for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
897*cdf0e10cSrcweir         m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
898*cdf0e10cSrcweir 
899*cdf0e10cSrcweir     return Range( nLine, nLine + nArrayLength-1 );
900*cdf0e10cSrcweir }
901*cdf0e10cSrcweir 
902*cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
903*cdf0e10cSrcweir                                             /*out*/HighlightPortions& portions )
904*cdf0e10cSrcweir {
905*cdf0e10cSrcweir     m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
906*cdf0e10cSrcweir }
907