1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_svtools.hxx" 30 31 #include <svtools/syntaxhighlight.hxx> 32 33 #include <unotools/charclass.hxx> 34 #include <tools/debug.hxx> 35 36 37 // ########################################################################## 38 // ATTENTION: all these words needs to be in small caps 39 // ########################################################################## 40 static const char* strListBasicKeyWords[] = { 41 "access", 42 "alias", 43 "and", 44 "any", 45 "append", 46 "as", 47 "base", 48 "binary", 49 "boolean", 50 "byref", 51 "byte", 52 "byval", 53 "call", 54 "case", 55 "cdecl", 56 "classmodule", 57 "close", 58 "compare", 59 "compatible", 60 "const", 61 "currency", 62 "date", 63 "declare", 64 "defbool", 65 "defcur", 66 "defdate", 67 "defdbl", 68 "deferr", 69 "defint", 70 "deflng", 71 "defobj", 72 "defsng", 73 "defstr", 74 "defvar", 75 "dim", 76 "do", 77 "double", 78 "each", 79 "else", 80 "elseif", 81 "end", 82 "end enum", 83 "end function", 84 "end if", 85 "end select", 86 "end sub", 87 "end type", 88 "endif", 89 "enum", 90 "eqv", 91 "erase", 92 "error", 93 "exit", 94 "explicit", 95 "for", 96 "function", 97 "get", 98 "global", 99 "gosub", 100 "goto", 101 "if", 102 "imp", 103 "implements", 104 "in", 105 "input", 106 "integer", 107 "is", 108 "let", 109 "lib", 110 "like", 111 "line", 112 "line input", 113 "local", 114 "lock", 115 "long", 116 "loop", 117 "lprint", 118 "lset", 119 "mod", 120 "name", 121 "new", 122 "next", 123 "not", 124 "object", 125 "on", 126 "open", 127 "option", 128 "optional", 129 "or", 130 "output", 131 "preserve", 132 "print", 133 "private", 134 "property", 135 "public", 136 "random", 137 "read", 138 "redim", 139 "rem", 140 "resume", 141 "return", 142 "rset", 143 "select", 144 "set", 145 "shared", 146 "single", 147 "static", 148 "step", 149 "stop", 150 "string", 151 "sub", 152 "system", 153 "text", 154 "then", 155 "to", 156 "type", 157 "typeof", 158 "until", 159 "variant", 160 "wend", 161 "while", 162 "with", 163 "write", 164 "xor" 165 }; 166 167 168 static const char* strListSqlKeyWords[] = { 169 "all", 170 "and", 171 "any", 172 "as", 173 "asc", 174 "avg", 175 "between", 176 "by", 177 "cast", 178 "corresponding", 179 "count", 180 "create", 181 "cross", 182 "delete", 183 "desc", 184 "distinct", 185 "drop", 186 "escape", 187 "except", 188 "exists", 189 "false", 190 "from", 191 "full", 192 "global", 193 "group", 194 "having", 195 "in", 196 "inner", 197 "insert", 198 "intersect", 199 "into", 200 "is", 201 "join", 202 "left", 203 "like", 204 "local", 205 "match", 206 "max", 207 "min", 208 "natural", 209 "not", 210 "null", 211 "on", 212 "or", 213 "order", 214 "outer", 215 "right", 216 "select", 217 "set", 218 "some", 219 "sum", 220 "table", 221 "temporary", 222 "true", 223 "union", 224 "unique", 225 "unknown", 226 "update", 227 "using", 228 "values", 229 "where" 230 }; 231 232 233 extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) 234 { 235 return strcmp( (char *)arg1, *(char **)arg2 ); 236 } 237 238 239 class LetterTable 240 { 241 bool IsLetterTab[256]; 242 243 public: 244 LetterTable( void ); 245 246 inline bool isLetter( sal_Unicode c ) 247 { 248 bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); 249 return bRet; 250 } 251 bool isLetterUnicode( sal_Unicode c ); 252 }; 253 254 class BasicSimpleCharClass 255 { 256 static LetterTable aLetterTable; 257 258 public: 259 static sal_Bool isAlpha( sal_Unicode c, bool bCompatible ) 260 { 261 sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 262 || (bCompatible && aLetterTable.isLetter( c )); 263 return bRet; 264 } 265 266 static sal_Bool isDigit( sal_Unicode c ) 267 { 268 sal_Bool bRet = (c >= '0' && c <= '9'); 269 return bRet; 270 } 271 272 static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible ) 273 { 274 sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible ); 275 return bRet; 276 } 277 }; 278 279 LetterTable BasicSimpleCharClass::aLetterTable; 280 281 LetterTable::LetterTable( void ) 282 { 283 for( int i = 0 ; i < 256 ; ++i ) 284 IsLetterTab[i] = false; 285 286 IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT 287 IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT 288 IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT 289 IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE 290 IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS 291 IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE 292 IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE 293 IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA 294 IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT 295 IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT 296 IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT 297 IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS 298 IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT 299 IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT 300 IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT 301 IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS 302 IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH 303 IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE 304 IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT 305 IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT 306 IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT 307 IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE 308 IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS 309 IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE 310 IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT 311 IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT 312 IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT 313 IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS 314 IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT 315 IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN 316 IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S 317 IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT 318 IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT 319 IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT 320 IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE 321 IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS 322 IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE 323 IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE 324 IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA 325 IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT 326 IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT 327 IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT 328 IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS 329 IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT 330 IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT 331 IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT 332 IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS 333 IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH 334 IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE 335 IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT 336 IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT 337 IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT 338 IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE 339 IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS 340 IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR 341 IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT 342 IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT 343 IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT 344 IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS 345 IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT 346 IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN 347 IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS 348 } 349 350 bool LetterTable::isLetterUnicode( sal_Unicode c ) 351 { 352 static CharClass* pCharClass = NULL; 353 if( pCharClass == NULL ) 354 pCharClass = new CharClass( Application::GetSettings().GetLocale() ); 355 String aStr( c ); 356 bool bRet = pCharClass->isLetter( aStr, 0 ); 357 return bRet; 358 } 359 360 // Hilfsfunktion: Zeichen-Flag Testen 361 sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) 362 { 363 bool bRet = false; 364 if( c != 0 && c <= 255 ) 365 { 366 bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); 367 } 368 else if( c > 255 ) 369 { 370 bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 371 ? BasicSimpleCharClass::isAlpha( c, true ) : false; 372 } 373 return bRet; 374 } 375 376 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ) 377 { 378 ppListKeyWords = ppKeyWords; 379 nKeyWordCount = nCount; 380 } 381 382 // Neues Token holen 383 sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, 384 /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) 385 { 386 reType = TT_UNKNOWN; 387 388 // Position merken 389 rpStartPos = mpActualPos; 390 391 // Zeichen untersuchen 392 sal_Unicode c = peekChar(); 393 if( c == CHAR_EOF ) 394 return sal_False; 395 396 // Zeichen lesen 397 getChar(); 398 399 //*** Alle Moeglichkeiten durchgehen *** 400 // Space? 401 if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) 402 { 403 while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True ) 404 getChar(); 405 406 reType = TT_WHITESPACE; 407 } 408 409 // Identifier? 410 else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) ) 411 { 412 sal_Bool bIdentifierChar; 413 do 414 { 415 // Naechstes Zeichen holen 416 c = peekChar(); 417 bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); 418 if( bIdentifierChar ) 419 getChar(); 420 } 421 while( bIdentifierChar ); 422 423 reType = TT_IDENTIFIER; 424 425 // Schluesselwort-Tabelle 426 if (ppListKeyWords != NULL) 427 { 428 int nCount = mpActualPos - rpStartPos; 429 430 // No keyword if string contains char > 255 431 bool bCanBeKeyword = true; 432 for( int i = 0 ; i < nCount ; i++ ) 433 { 434 if( rpStartPos[i] > 255 ) 435 { 436 bCanBeKeyword = false; 437 break; 438 } 439 } 440 441 if( bCanBeKeyword ) 442 { 443 String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) ); 444 ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US ); 445 aByteStr.ToLowerAscii(); 446 if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ), 447 compare_strings ) ) 448 { 449 reType = TT_KEYWORDS; 450 451 if ( aByteStr.Equals( "rem" ) ) 452 { 453 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 454 sal_Unicode cPeek = peekChar(); 455 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 456 { 457 c = getChar(); 458 cPeek = peekChar(); 459 } 460 461 reType = TT_COMMENT; 462 } 463 } 464 } 465 } 466 } 467 468 // Operator? 469 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there 470 else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) 471 { 472 // paramters for SQL view 473 if ( (c==':') || (c=='?')) 474 { 475 if (c!='?') 476 { 477 sal_Bool bIdentifierChar; 478 do 479 { 480 // Naechstes Zeichen holen 481 c = peekChar(); 482 bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true ); 483 if( bIdentifierChar ) 484 getChar(); 485 } 486 while( bIdentifierChar ); 487 } 488 reType = TT_PARAMETER; 489 } 490 else if ((c=='-')) 491 { 492 sal_Unicode cPeekNext = peekChar(); 493 if (cPeekNext=='-') 494 { 495 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 496 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 497 { 498 getChar(); 499 cPeekNext = peekChar(); 500 } 501 reType = TT_COMMENT; 502 } 503 } 504 else if (c=='/') 505 { 506 sal_Unicode cPeekNext = peekChar(); 507 if (cPeekNext=='/') 508 { 509 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 510 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 511 { 512 getChar(); 513 cPeekNext = peekChar(); 514 } 515 reType = TT_COMMENT; 516 } 517 } 518 else 519 { 520 // Kommentar ? 521 if ( c == '\'' ) 522 { 523 c = getChar(); // '/' entfernen 524 525 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 526 sal_Unicode cPeek = c; 527 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 528 { 529 getChar(); 530 cPeek = peekChar(); 531 } 532 533 reType = TT_COMMENT; 534 } 535 536 // Echter Operator, kann hier einfach behandelt werden, 537 // da nicht der wirkliche Operator, wie z.B. += interessiert, 538 // sondern nur die Tatsache, dass es sich um einen handelt. 539 if( reType != TT_COMMENT ) 540 { 541 reType = TT_OPERATOR; 542 } 543 544 } 545 } 546 547 // Objekt-Trenner? Muss vor Number abgehandelt werden 548 else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) 549 { 550 reType = TT_OPERATOR; 551 } 552 553 // Zahl? 554 else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) 555 { 556 reType = TT_NUMBER; 557 558 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert 559 int nRadix = 10; 560 561 // Ist es eine Hex- oder Oct-Zahl? 562 if( c == '&' ) 563 { 564 // Octal? 565 if( peekChar() == 'o' || peekChar() == 'O' ) 566 { 567 // o entfernen 568 getChar(); 569 nRadix = 8; // Octal-Basis 570 571 // Alle Ziffern einlesen 572 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) 573 c = getChar(); 574 } 575 // Hex? 576 else if( peekChar() == 'h' || peekChar() == 'H' ) 577 { 578 // x entfernen 579 getChar(); 580 nRadix = 16; // Hex-Basis 581 582 // Alle Ziffern einlesen und puffern 583 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) 584 c = getChar(); 585 } 586 else 587 { 588 reType = TT_OPERATOR; 589 } 590 } 591 592 // Wenn nicht Oct oder Hex als double ansehen 593 if( reType == TT_NUMBER && nRadix == 10 ) 594 { 595 // Flag, ob das letzte Zeichen ein Exponent war 596 sal_Bool bAfterExpChar = sal_False; 597 598 // Alle Ziffern einlesen 599 while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || 600 (bAfterExpChar && peekChar() == '+' ) || 601 (bAfterExpChar && peekChar() == '-' ) ) 602 // Nach Exponent auch +/- OK 603 { 604 c = getChar(); // Zeichen lesen 605 bAfterExpChar = ( c == 'e' || c == 'E' ); 606 } 607 } 608 609 // reType = TT_NUMBER; 610 } 611 612 // String? 613 else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) 614 { 615 // Merken, welches Zeichen den String eroeffnet hat 616 sal_Unicode cEndString = c; 617 if( c == '[' ) 618 cEndString = ']'; 619 620 // Alle Ziffern einlesen und puffern 621 while( peekChar() != cEndString ) 622 { 623 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht 624 if( peekChar() == CHAR_EOF ) 625 { 626 // ERROR: unterminated string literal 627 reType = TT_ERROR; 628 break; 629 } 630 c = getChar(); 631 if( testCharFlags( c, CHAR_EOL ) == sal_True ) 632 { 633 // ERROR: unterminated string literal 634 reType = TT_ERROR; 635 break; 636 } 637 } 638 639 // Zeichen lesen 640 if( reType != TT_ERROR ) 641 { 642 getChar(); 643 if( cEndString == ']' ) 644 reType = TT_IDENTIFIER; 645 else 646 reType = TT_STRING; 647 } 648 } 649 650 // Zeilenende? 651 else if( testCharFlags( c, CHAR_EOL ) == sal_True ) 652 { 653 // Falls ein weiteres anderes EOL-Char folgt, weg damit 654 sal_Unicode cNext = peekChar(); 655 if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) 656 getChar(); 657 658 // Positions-Daten auf Zeilen-Beginn setzen 659 nCol = 0; 660 nLine++; 661 662 reType = TT_EOL; 663 } 664 665 // Alles andere bleibt TT_UNKNOWN 666 667 668 // End-Position eintragen 669 rpEndPos = mpActualPos; 670 return sal_True; 671 } 672 673 String SimpleTokenizer_Impl::getTokStr 674 ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 675 { 676 return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 677 } 678 679 #ifdef DBG_UTIL 680 // TEST: Token ausgeben 681 String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType, 682 /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 683 { 684 String aOut; 685 switch( eType ) 686 { 687 case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break; 688 case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break; 689 case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break; 690 case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break; 691 case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break; 692 case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break; 693 case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break; 694 case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break; 695 case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break; 696 case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break; 697 case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break; 698 } 699 if( eType != TT_EOL ) 700 { 701 aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 702 } 703 aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") ); 704 return aOut; 705 } 706 #endif 707 708 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) 709 { 710 memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); 711 712 // Zeichen-Tabelle fuellen 713 sal_uInt16 i; 714 715 // Zulaessige Zeichen fuer Identifier 716 sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); 717 for( i = 'a' ; i <= 'z' ; i++ ) 718 aCharTypeTab[i] |= nHelpMask; 719 for( i = 'A' ; i <= 'Z' ; i++ ) 720 aCharTypeTab[i] |= nHelpMask; 721 // '_' extra eintragen 722 aCharTypeTab[(int)'_'] |= nHelpMask; 723 // AB 23.6.97: '$' ist auch erlaubt 724 aCharTypeTab[(int)'$'] |= nHelpMask; 725 726 // Ziffern (Identifier und Number ist moeglich) 727 nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | 728 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); 729 for( i = '0' ; i <= '9' ; i++ ) 730 aCharTypeTab[i] |= nHelpMask; 731 732 // e und E sowie . von Hand ergaenzen 733 aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; 734 aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; 735 aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); 736 aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; 737 738 // Hex-Ziffern 739 for( i = 'a' ; i <= 'f' ; i++ ) 740 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 741 for( i = 'A' ; i <= 'F' ; i++ ) 742 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 743 744 // Oct-Ziffern 745 for( i = '0' ; i <= '7' ; i++ ) 746 aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; 747 748 // String-Beginn/End-Zeichen 749 aCharTypeTab[(int)'\''] |= CHAR_START_STRING; 750 aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; 751 aCharTypeTab[(int)'['] |= CHAR_START_STRING; 752 aCharTypeTab[(int)'`'] |= CHAR_START_STRING; 753 754 // Operator-Zeichen 755 aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; 756 aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; 757 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 758 aCharTypeTab[(int)'('] |= CHAR_OPERATOR; 759 aCharTypeTab[(int)')'] |= CHAR_OPERATOR; 760 aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; 761 aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; 762 aCharTypeTab[(int)','] |= CHAR_OPERATOR; 763 aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; 764 aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; 765 aCharTypeTab[(int)':'] |= CHAR_OPERATOR; 766 aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; 767 aCharTypeTab[(int)'='] |= CHAR_OPERATOR; 768 aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; 769 aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; 770 aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; 771 aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; 772 aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; 773 aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; 774 aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; 775 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 776 aCharTypeTab[(int)']'] |= CHAR_OPERATOR; 777 aCharTypeTab[(int)';'] |= CHAR_OPERATOR; 778 779 // Space 780 aCharTypeTab[(int)' ' ] |= CHAR_SPACE; 781 aCharTypeTab[(int)'\t'] |= CHAR_SPACE; 782 783 // Zeilen-Ende-Zeichen 784 aCharTypeTab[(int)'\r'] |= CHAR_EOL; 785 aCharTypeTab[(int)'\n'] |= CHAR_EOL; 786 787 ppListKeyWords = NULL; 788 } 789 790 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) 791 { 792 } 793 794 SimpleTokenizer_Impl* getSimpleTokenizer( void ) 795 { 796 static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; 797 if( !pSimpleTokenizer ) 798 pSimpleTokenizer = new SimpleTokenizer_Impl(); 799 return pSimpleTokenizer; 800 } 801 802 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul 803 sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource ) 804 { 805 // Position auf den Anfang des Source-Strings setzen 806 mpStringBegin = mpActualPos = aSource->GetBuffer(); 807 808 // Zeile und Spalte initialisieren 809 nLine = nParseLine; 810 nCol = 0L; 811 812 // Variablen fuer die Out-Parameter 813 TokenTypes eType; 814 const sal_Unicode* pStartPos; 815 const sal_Unicode* pEndPos; 816 817 // Schleife ueber alle Tokens 818 sal_uInt16 nTokenCount = 0; 819 while( getNextToken( eType, pStartPos, pEndPos ) ) 820 nTokenCount++; 821 822 return nTokenCount; 823 } 824 825 void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine, 826 /*out*/HighlightPortions& portions ) 827 { 828 // Position auf den Anfang des Source-Strings setzen 829 mpStringBegin = mpActualPos = rLine.GetBuffer(); 830 831 // Zeile und Spalte initialisieren 832 nLine = nParseLine; 833 nCol = 0L; 834 835 // Variablen fuer die Out-Parameter 836 TokenTypes eType; 837 const sal_Unicode* pStartPos; 838 const sal_Unicode* pEndPos; 839 840 // Schleife ueber alle Tokens 841 while( getNextToken( eType, pStartPos, pEndPos ) ) 842 { 843 HighlightPortion portion; 844 845 portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin); 846 portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin); 847 portion.tokenType = eType; 848 849 portions.push_back(portion); 850 } 851 } 852 853 854 ////////////////////////////////////////////////////////////////////////// 855 // Implementierung des SyntaxHighlighter 856 857 SyntaxHighlighter::SyntaxHighlighter() 858 { 859 m_pSimpleTokenizer = 0; 860 m_pKeyWords = NULL; 861 m_nKeyWordCount = 0; 862 } 863 864 SyntaxHighlighter::~SyntaxHighlighter() 865 { 866 delete m_pSimpleTokenizer; 867 delete m_pKeyWords; 868 } 869 870 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) 871 { 872 eLanguage = eLanguage_; 873 delete m_pSimpleTokenizer; 874 m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); 875 876 switch (eLanguage) 877 { 878 case HIGHLIGHT_BASIC: 879 m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, 880 sizeof( strListBasicKeyWords ) / sizeof( char* )); 881 break; 882 case HIGHLIGHT_SQL: 883 m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, 884 sizeof( strListSqlKeyWords ) / sizeof( char* )); 885 break; 886 default: 887 m_pSimpleTokenizer->setKeyWords( NULL, 0 ); 888 } 889 } 890 891 const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, 892 const String* pChangedLines, sal_uInt32 nArrayLength) 893 { 894 (void)nLineCountDifference; 895 896 for( sal_uInt32 i=0 ; i < nArrayLength ; i++ ) 897 m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); 898 899 return Range( nLine, nLine + nArrayLength-1 ); 900 } 901 902 void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine, 903 /*out*/HighlightPortions& portions ) 904 { 905 m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); 906 } 907