1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_svtools.hxx"
26
27 #include <svtools/syntaxhighlight.hxx>
28
29 #include <unotools/charclass.hxx>
30 #include <tools/debug.hxx>
31
32
33 // ##########################################################################
34 // ATTENTION: all these words needs to be in small caps
35 // ##########################################################################
36 static const char* strListBasicKeyWords[] = {
37 "access",
38 "alias",
39 "and",
40 "any",
41 "append",
42 "as",
43 "base",
44 "binary",
45 "boolean",
46 "byref",
47 "byte",
48 "byval",
49 "call",
50 "case",
51 "cdecl",
52 "classmodule",
53 "close",
54 "compare",
55 "compatible",
56 "const",
57 "currency",
58 "date",
59 "declare",
60 "defbool",
61 "defcur",
62 "defdate",
63 "defdbl",
64 "deferr",
65 "defint",
66 "deflng",
67 "defobj",
68 "defsng",
69 "defstr",
70 "defvar",
71 "dim",
72 "do",
73 "double",
74 "each",
75 "else",
76 "elseif",
77 "end",
78 "end enum",
79 "end function",
80 "end if",
81 "end select",
82 "end sub",
83 "end type",
84 "endif",
85 "enum",
86 "eqv",
87 "erase",
88 "error",
89 "exit",
90 "explicit",
91 "for",
92 "function",
93 "get",
94 "global",
95 "gosub",
96 "goto",
97 "if",
98 "imp",
99 "implements",
100 "in",
101 "input",
102 "integer",
103 "is",
104 "let",
105 "lib",
106 "like",
107 "line",
108 "line input",
109 "local",
110 "lock",
111 "long",
112 "loop",
113 "lprint",
114 "lset",
115 "mod",
116 "name",
117 "new",
118 "next",
119 "not",
120 "object",
121 "on",
122 "open",
123 "option",
124 "optional",
125 "or",
126 "output",
127 "preserve",
128 "print",
129 "private",
130 "property",
131 "public",
132 "random",
133 "read",
134 "redim",
135 "rem",
136 "resume",
137 "return",
138 "rset",
139 "select",
140 "set",
141 "shared",
142 "single",
143 "static",
144 "step",
145 "stop",
146 "string",
147 "sub",
148 "system",
149 "text",
150 "then",
151 "to",
152 "type",
153 "typeof",
154 "until",
155 "variant",
156 "wend",
157 "while",
158 "with",
159 "write",
160 "xor"
161 };
162
163
164 static const char* strListSqlKeyWords[] = {
165 "all",
166 "and",
167 "any",
168 "as",
169 "asc",
170 "avg",
171 "between",
172 "by",
173 "cast",
174 "corresponding",
175 "count",
176 "create",
177 "cross",
178 "delete",
179 "desc",
180 "distinct",
181 "drop",
182 "escape",
183 "except",
184 "exists",
185 "false",
186 "from",
187 "full",
188 "global",
189 "group",
190 "having",
191 "in",
192 "inner",
193 "insert",
194 "intersect",
195 "into",
196 "is",
197 "join",
198 "left",
199 "like",
200 "local",
201 "match",
202 "max",
203 "min",
204 "natural",
205 "not",
206 "null",
207 "on",
208 "or",
209 "order",
210 "outer",
211 "right",
212 "select",
213 "set",
214 "some",
215 "sum",
216 "table",
217 "temporary",
218 "true",
219 "union",
220 "unique",
221 "unknown",
222 "update",
223 "using",
224 "values",
225 "where"
226 };
227
228
compare_strings(const void * arg1,const void * arg2)229 extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
230 {
231 return strcmp( (char *)arg1, *(char **)arg2 );
232 }
233
234
235 class LetterTable
236 {
237 bool IsLetterTab[256];
238
239 public:
240 LetterTable( void );
241
isLetter(sal_Unicode c)242 inline bool isLetter( sal_Unicode c )
243 {
244 bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
245 return bRet;
246 }
247 bool isLetterUnicode( sal_Unicode c );
248 };
249
250 class BasicSimpleCharClass
251 {
252 static LetterTable aLetterTable;
253
254 public:
isAlpha(sal_Unicode c,bool bCompatible)255 static sal_Bool isAlpha( sal_Unicode c, bool bCompatible )
256 {
257 sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
258 || (bCompatible && aLetterTable.isLetter( c ));
259 return bRet;
260 }
261
isDigit(sal_Unicode c)262 static sal_Bool isDigit( sal_Unicode c )
263 {
264 sal_Bool bRet = (c >= '0' && c <= '9');
265 return bRet;
266 }
267
isAlphaNumeric(sal_Unicode c,bool bCompatible)268 static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible )
269 {
270 sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible );
271 return bRet;
272 }
273 };
274
275 LetterTable BasicSimpleCharClass::aLetterTable;
276
LetterTable(void)277 LetterTable::LetterTable( void )
278 {
279 for( int i = 0 ; i < 256 ; ++i )
280 IsLetterTab[i] = false;
281
282 IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT
283 IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT
284 IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
285 IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE
286 IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS
287 IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE
288 IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE
289 IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA
290 IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT
291 IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT
292 IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
293 IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS
294 IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT
295 IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT
296 IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
297 IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS
298 IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH
299 IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE
300 IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT
301 IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT
302 IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
303 IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE
304 IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS
305 IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE
306 IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT
307 IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT
308 IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
309 IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS
310 IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT
311 IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN
312 IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S
313 IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT
314 IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT
315 IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
316 IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE
317 IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS
318 IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE
319 IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE
320 IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA
321 IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT
322 IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT
323 IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
324 IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS
325 IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT
326 IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT
327 IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
328 IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS
329 IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH
330 IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE
331 IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT
332 IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT
333 IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
334 IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE
335 IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS
336 IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR
337 IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT
338 IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT
339 IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
340 IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS
341 IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT
342 IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN
343 IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS
344 }
345
isLetterUnicode(sal_Unicode c)346 bool LetterTable::isLetterUnicode( sal_Unicode c )
347 {
348 static CharClass* pCharClass = NULL;
349 if( pCharClass == NULL )
350 pCharClass = new CharClass( Application::GetSettings().GetLocale() );
351 String aStr( c );
352 bool bRet = pCharClass->isLetter( aStr, 0 );
353 return bRet;
354 }
355
356 // Hilfsfunktion: Zeichen-Flag Testen
testCharFlags(sal_Unicode c,sal_uInt16 nTestFlags)357 sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
358 {
359 bool bRet = false;
360 if( c != 0 && c <= 255 )
361 {
362 bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
363 }
364 else if( c > 255 )
365 {
366 bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
367 ? BasicSimpleCharClass::isAlpha( c, true ) : false;
368 }
369 return bRet;
370 }
371
setKeyWords(const char ** ppKeyWords,sal_uInt16 nCount)372 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
373 {
374 ppListKeyWords = ppKeyWords;
375 nKeyWordCount = nCount;
376 }
377
378 // Neues Token holen
getNextToken(TokenTypes & reType,const sal_Unicode * & rpStartPos,const sal_Unicode * & rpEndPos)379 sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
380 /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
381 {
382 reType = TT_UNKNOWN;
383
384 // Position merken
385 rpStartPos = mpActualPos;
386
387 // Zeichen untersuchen
388 sal_Unicode c = peekChar();
389 if( c == CHAR_EOF )
390 return sal_False;
391
392 // Zeichen lesen
393 getChar();
394
395 //*** Alle Moeglichkeiten durchgehen ***
396 // Space?
397 if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
398 {
399 while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
400 getChar();
401
402 reType = TT_WHITESPACE;
403 }
404
405 // Identifier?
406 else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
407 {
408 sal_Bool bIdentifierChar;
409 do
410 {
411 // Naechstes Zeichen holen
412 c = peekChar();
413 bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
414 if( bIdentifierChar )
415 getChar();
416 }
417 while( bIdentifierChar );
418
419 reType = TT_IDENTIFIER;
420
421 // Schluesselwort-Tabelle
422 if (ppListKeyWords != NULL)
423 {
424 int nCount = mpActualPos - rpStartPos;
425
426 // No keyword if string contains char > 255
427 bool bCanBeKeyword = true;
428 for( int i = 0 ; i < nCount ; i++ )
429 {
430 if( rpStartPos[i] > 255 )
431 {
432 bCanBeKeyword = false;
433 break;
434 }
435 }
436
437 if( bCanBeKeyword )
438 {
439 String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
440 ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
441 aByteStr.ToLowerAscii();
442 if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
443 compare_strings ) )
444 {
445 reType = TT_KEYWORDS;
446
447 if ( aByteStr.Equals( "rem" ) )
448 {
449 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
450 sal_Unicode cPeek = peekChar();
451 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
452 {
453 c = getChar();
454 cPeek = peekChar();
455 }
456
457 reType = TT_COMMENT;
458 }
459 }
460 }
461 }
462 }
463
464 // Operator?
465 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
466 else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
467 {
468 // paramters for SQL view
469 if ( (c==':') || (c=='?'))
470 {
471 if (c!='?')
472 {
473 sal_Bool bIdentifierChar;
474 do
475 {
476 // Naechstes Zeichen holen
477 c = peekChar();
478 bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true );
479 if( bIdentifierChar )
480 getChar();
481 }
482 while( bIdentifierChar );
483 }
484 reType = TT_PARAMETER;
485 }
486 else if( c=='-' )
487 {
488 sal_Unicode cPeekNext = peekChar();
489 if (cPeekNext=='-')
490 {
491 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
492 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
493 {
494 getChar();
495 cPeekNext = peekChar();
496 }
497 reType = TT_COMMENT;
498 }
499 }
500 else if (c=='/')
501 {
502 sal_Unicode cPeekNext = peekChar();
503 if (cPeekNext=='/')
504 {
505 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
506 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
507 {
508 getChar();
509 cPeekNext = peekChar();
510 }
511 reType = TT_COMMENT;
512 }
513 }
514 else
515 {
516 // Kommentar ?
517 if ( c == '\'' )
518 {
519 c = getChar(); // '/' entfernen
520
521 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
522 sal_Unicode cPeek = c;
523 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
524 {
525 getChar();
526 cPeek = peekChar();
527 }
528
529 reType = TT_COMMENT;
530 }
531
532 // Echter Operator, kann hier einfach behandelt werden,
533 // da nicht der wirkliche Operator, wie z.B. += interessiert,
534 // sondern nur die Tatsache, dass es sich um einen handelt.
535 if( reType != TT_COMMENT )
536 {
537 reType = TT_OPERATOR;
538 }
539
540 }
541 }
542
543 // Objekt-Trenner? Muss vor Number abgehandelt werden
544 else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
545 {
546 reType = TT_OPERATOR;
547 }
548
549 // Zahl?
550 else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
551 {
552 reType = TT_NUMBER;
553
554 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
555 int nRadix = 10;
556
557 // Ist es eine Hex- oder Oct-Zahl?
558 if( c == '&' )
559 {
560 // Octal?
561 if( peekChar() == 'o' || peekChar() == 'O' )
562 {
563 // o entfernen
564 getChar();
565 nRadix = 8; // Octal-Basis
566
567 // Alle Ziffern einlesen
568 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
569 c = getChar();
570 }
571 // Hex?
572 else if( peekChar() == 'h' || peekChar() == 'H' )
573 {
574 // x entfernen
575 getChar();
576 nRadix = 16; // Hex-Basis
577
578 // Alle Ziffern einlesen und puffern
579 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
580 c = getChar();
581 }
582 else
583 {
584 reType = TT_OPERATOR;
585 }
586 }
587
588 // Wenn nicht Oct oder Hex als double ansehen
589 if( reType == TT_NUMBER && nRadix == 10 )
590 {
591 // Flag, ob das letzte Zeichen ein Exponent war
592 sal_Bool bAfterExpChar = sal_False;
593
594 // Alle Ziffern einlesen
595 while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
596 (bAfterExpChar && peekChar() == '+' ) ||
597 (bAfterExpChar && peekChar() == '-' ) )
598 // Nach Exponent auch +/- OK
599 {
600 c = getChar(); // Zeichen lesen
601 bAfterExpChar = ( c == 'e' || c == 'E' );
602 }
603 }
604
605 // reType = TT_NUMBER;
606 }
607
608 // String?
609 else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
610 {
611 // Merken, welches Zeichen den String eroeffnet hat
612 sal_Unicode cEndString = c;
613 if( c == '[' )
614 cEndString = ']';
615
616 // Alle Ziffern einlesen und puffern
617 while( peekChar() != cEndString )
618 {
619 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
620 if( peekChar() == CHAR_EOF )
621 {
622 // ERROR: unterminated string literal
623 reType = TT_ERROR;
624 break;
625 }
626 c = getChar();
627 if( testCharFlags( c, CHAR_EOL ) == sal_True )
628 {
629 // ERROR: unterminated string literal
630 reType = TT_ERROR;
631 break;
632 }
633 }
634
635 // Zeichen lesen
636 if( reType != TT_ERROR )
637 {
638 getChar();
639 if( cEndString == ']' )
640 reType = TT_IDENTIFIER;
641 else
642 reType = TT_STRING;
643 }
644 }
645
646 // Zeilenende?
647 else if( testCharFlags( c, CHAR_EOL ) == sal_True )
648 {
649 // Falls ein weiteres anderes EOL-Char folgt, weg damit
650 sal_Unicode cNext = peekChar();
651 if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
652 getChar();
653
654 // Positions-Daten auf Zeilen-Beginn setzen
655 nCol = 0;
656 nLine++;
657
658 reType = TT_EOL;
659 }
660
661 // Alles andere bleibt TT_UNKNOWN
662
663
664 // End-Position eintragen
665 rpEndPos = mpActualPos;
666 return sal_True;
667 }
668
getTokStr(const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)669 String SimpleTokenizer_Impl::getTokStr
670 ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
671 {
672 return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
673 }
674
675 #ifdef DBG_UTIL
676 // TEST: Token ausgeben
getFullTokenStr(TokenTypes eType,const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)677 String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
678 /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
679 {
680 String aOut;
681 switch( eType )
682 {
683 case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
684 case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
685 case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
686 case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
687 case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
688 case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
689 case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
690 case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
691 case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
692 case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
693 case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
694 }
695 if( eType != TT_EOL )
696 {
697 aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
698 }
699 aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
700 return aOut;
701 }
702 #endif
703
SimpleTokenizer_Impl(HighlighterLanguage aLang)704 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
705 {
706 memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
707
708 // Zeichen-Tabelle fuellen
709 sal_uInt16 i;
710
711 // Zulaessige Zeichen fuer Identifier
712 sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
713 for( i = 'a' ; i <= 'z' ; i++ )
714 aCharTypeTab[i] |= nHelpMask;
715 for( i = 'A' ; i <= 'Z' ; i++ )
716 aCharTypeTab[i] |= nHelpMask;
717 // '_' extra eintragen
718 aCharTypeTab[(int)'_'] |= nHelpMask;
719 // AB 23.6.97: '$' ist auch erlaubt
720 aCharTypeTab[(int)'$'] |= nHelpMask;
721
722 // Ziffern (Identifier und Number ist moeglich)
723 nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
724 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
725 for( i = '0' ; i <= '9' ; i++ )
726 aCharTypeTab[i] |= nHelpMask;
727
728 // e und E sowie . von Hand ergaenzen
729 aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
730 aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
731 aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
732 aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
733
734 // Hex-Ziffern
735 for( i = 'a' ; i <= 'f' ; i++ )
736 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
737 for( i = 'A' ; i <= 'F' ; i++ )
738 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
739
740 // Oct-Ziffern
741 for( i = '0' ; i <= '7' ; i++ )
742 aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
743
744 // String-Beginn/End-Zeichen
745 aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
746 aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
747 aCharTypeTab[(int)'['] |= CHAR_START_STRING;
748 aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
749
750 // Operator-Zeichen
751 aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
752 aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
753 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
754 aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
755 aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
756 aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
757 aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
758 aCharTypeTab[(int)','] |= CHAR_OPERATOR;
759 aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
760 aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
761 aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
762 aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
763 aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
764 aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
765 aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
766 aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
767 aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
768 aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
769 aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
770 aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
771 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
772 aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
773 aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
774
775 // Space
776 aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
777 aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
778
779 // Zeilen-Ende-Zeichen
780 aCharTypeTab[(int)'\r'] |= CHAR_EOL;
781 aCharTypeTab[(int)'\n'] |= CHAR_EOL;
782
783 ppListKeyWords = NULL;
784 }
785
~SimpleTokenizer_Impl(void)786 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
787 {
788 }
789
getSimpleTokenizer(void)790 SimpleTokenizer_Impl* getSimpleTokenizer( void )
791 {
792 static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
793 if( !pSimpleTokenizer )
794 pSimpleTokenizer = new SimpleTokenizer_Impl();
795 return pSimpleTokenizer;
796 }
797
798 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
parseLine(sal_uInt32 nParseLine,const String * aSource)799 sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
800 {
801 // Position auf den Anfang des Source-Strings setzen
802 mpStringBegin = mpActualPos = aSource->GetBuffer();
803
804 // Zeile und Spalte initialisieren
805 nLine = nParseLine;
806 nCol = 0L;
807
808 // Variablen fuer die Out-Parameter
809 TokenTypes eType;
810 const sal_Unicode* pStartPos;
811 const sal_Unicode* pEndPos;
812
813 // Schleife ueber alle Tokens
814 sal_uInt16 nTokenCount = 0;
815 while( getNextToken( eType, pStartPos, pEndPos ) )
816 nTokenCount++;
817
818 return nTokenCount;
819 }
820
getHighlightPortions(sal_uInt32 nParseLine,const String & rLine,HighlightPortions & portions)821 void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
822 /*out*/HighlightPortions& portions )
823 {
824 // Position auf den Anfang des Source-Strings setzen
825 mpStringBegin = mpActualPos = rLine.GetBuffer();
826
827 // Zeile und Spalte initialisieren
828 nLine = nParseLine;
829 nCol = 0L;
830
831 // Variablen fuer die Out-Parameter
832 TokenTypes eType;
833 const sal_Unicode* pStartPos;
834 const sal_Unicode* pEndPos;
835
836 // Schleife ueber alle Tokens
837 while( getNextToken( eType, pStartPos, pEndPos ) )
838 {
839 HighlightPortion portion;
840
841 portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
842 portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
843 portion.tokenType = eType;
844
845 portions.push_back(portion);
846 }
847 }
848
849
850 //////////////////////////////////////////////////////////////////////////
851 // Implementierung des SyntaxHighlighter
852
SyntaxHighlighter()853 SyntaxHighlighter::SyntaxHighlighter()
854 {
855 m_pSimpleTokenizer = 0;
856 m_pKeyWords = NULL;
857 m_nKeyWordCount = 0;
858 }
859
~SyntaxHighlighter()860 SyntaxHighlighter::~SyntaxHighlighter()
861 {
862 delete m_pSimpleTokenizer;
863 delete m_pKeyWords;
864 }
865
initialize(HighlighterLanguage eLanguage_)866 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
867 {
868 eLanguage = eLanguage_;
869 delete m_pSimpleTokenizer;
870 m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
871
872 switch (eLanguage)
873 {
874 case HIGHLIGHT_BASIC:
875 m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
876 sizeof( strListBasicKeyWords ) / sizeof( char* ));
877 break;
878 case HIGHLIGHT_SQL:
879 m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
880 sizeof( strListSqlKeyWords ) / sizeof( char* ));
881 break;
882 default:
883 m_pSimpleTokenizer->setKeyWords( NULL, 0 );
884 }
885 }
886
notifyChange(sal_uInt32 nLine,sal_Int32 nLineCountDifference,const String * pChangedLines,sal_uInt32 nArrayLength)887 const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
888 const String* pChangedLines, sal_uInt32 nArrayLength)
889 {
890 (void)nLineCountDifference;
891
892 for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
893 m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
894
895 return Range( nLine, nLine + nArrayLength-1 );
896 }
897
getHighlightPortions(sal_uInt32 nLine,const String & rLine,HighlightPortions & portions)898 void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
899 /*out*/HighlightPortions& portions )
900 {
901 m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
902 }
903