1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include "textsearch.hxx" 32 #include "levdis.hxx" 33 #include <regexp/reclass.hxx> 34 #include <com/sun/star/lang/Locale.hpp> 35 #include <com/sun/star/lang/XMultiServiceFactory.hpp> 36 #include <comphelper/processfactory.hxx> 37 #include <com/sun/star/i18n/UnicodeType.hpp> 38 #include <com/sun/star/util/SearchFlags.hpp> 39 #include <com/sun/star/i18n/WordType.hpp> 40 #include <com/sun/star/i18n/ScriptType.hpp> 41 #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 42 #include <com/sun/star/i18n/KCharacterType.hpp> 43 #include <com/sun/star/registry/XRegistryKey.hpp> 44 #include <cppuhelper/factory.hxx> 45 #include <cppuhelper/weak.hxx> 46 47 #ifdef _MSC_VER 48 // get rid of that dumb compiler warning 49 // identifier was truncated to '255' characters in the debug information 50 // for STL template usage, if .pdb files are to be created 51 #pragma warning( disable: 4786 ) 52 #endif 53 54 #include <string.h> 55 56 using namespace ::com::sun::star::util; 57 using namespace ::com::sun::star::uno; 58 using namespace ::com::sun::star::lang; 59 using namespace ::com::sun::star::i18n; 60 using namespace ::rtl; 61 62 static sal_Int32 COMPLEX_TRANS_MASK_TMP = 63 TransliterationModules_ignoreBaFa_ja_JP | 64 TransliterationModules_ignoreIterationMark_ja_JP | 65 TransliterationModules_ignoreTiJi_ja_JP | 66 TransliterationModules_ignoreHyuByu_ja_JP | 67 TransliterationModules_ignoreSeZe_ja_JP | 68 TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 69 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 70 TransliterationModules_ignoreProlongedSoundMark_ja_JP; 71 static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP; 72 static const sal_Int32 COMPLEX_TRANS_MASK = 73 COMPLEX_TRANS_MASK_TMP | 74 TransliterationModules_IGNORE_KANA | 75 TransliterationModules_IGNORE_WIDTH; 76 // Above 2 transliteration is simple but need to take effect in 77 // complex transliteration 78 79 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 80 : xMSF( rxMSF ) 81 , pJumpTable( 0 ) 82 , pJumpTable2( 0 ) 83 , pRegExp( 0 ) 84 , pWLD( 0 ) 85 { 86 SearchOptions aOpt; 87 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 88 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 89 //aOpt.Locale = ???; 90 setOptions( aOpt ); 91 } 92 93 TextSearch::~TextSearch() 94 { 95 delete pRegExp; 96 delete pWLD; 97 delete pJumpTable; 98 delete pJumpTable2; 99 } 100 101 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 102 { 103 aSrchPara = rOptions; 104 105 delete pRegExp, pRegExp = 0; 106 delete pWLD, pWLD = 0; 107 delete pJumpTable, pJumpTable = 0; 108 delete pJumpTable2, pJumpTable2 = 0; 109 110 // Create Transliteration class 111 if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 112 { 113 if( !xTranslit.is() ) 114 { 115 Reference < XInterface > xI = xMSF->createInstance( 116 OUString::createFromAscii( 117 "com.sun.star.i18n.Transliteration")); 118 if ( xI.is() ) 119 xI->queryInterface( ::getCppuType( 120 (const Reference< XExtendedTransliteration >*)0)) 121 >>= xTranslit; 122 } 123 // Load transliteration module 124 if( xTranslit.is() ) 125 xTranslit->loadModule( 126 (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 127 aSrchPara.Locale); 128 } 129 else if( xTranslit.is() ) 130 xTranslit = 0; 131 132 // Create Transliteration for 2<->1, 2<->2 transliteration 133 if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 134 { 135 if( !xTranslit2.is() ) 136 { 137 Reference < XInterface > xI = xMSF->createInstance( 138 OUString::createFromAscii( 139 "com.sun.star.i18n.Transliteration")); 140 if ( xI.is() ) 141 xI->queryInterface( ::getCppuType( 142 (const Reference< XExtendedTransliteration >*)0)) 143 >>= xTranslit2; 144 } 145 // Load transliteration module 146 if( xTranslit2.is() ) 147 xTranslit2->loadModule( 148 (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 149 aSrchPara.Locale); 150 } 151 152 if ( !xBreak.is() ) 153 { 154 Reference < XInterface > xI = xMSF->createInstance( 155 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 156 if( xI.is() ) 157 xI->queryInterface( ::getCppuType( 158 (const Reference< XBreakIterator >*)0)) 159 >>= xBreak; 160 } 161 162 sSrchStr = aSrchPara.searchString; 163 164 // use transliteration here, but only if not RegEx, which does it different 165 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() && 166 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 167 sSrchStr = xTranslit->transliterateString2String( 168 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 169 170 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() && 171 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 172 sSrchStr2 = xTranslit2->transliterateString2String( 173 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 174 175 // When start or end of search string is a complex script type, we need to 176 // make sure the result boundary is not located in the middle of cell. 177 checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 178 ScriptType::COMPLEX)); 179 checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 180 sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 181 182 if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP ) 183 { 184 fnForward = &TextSearch::RESrchFrwrd; 185 fnBackward = &TextSearch::RESrchBkwrd; 186 187 pRegExp = new Regexpr( aSrchPara, xTranslit ); 188 } 189 else 190 { 191 if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE ) 192 { 193 fnForward = &TextSearch::ApproxSrchFrwrd; 194 fnBackward = &TextSearch::ApproxSrchBkwrd; 195 196 pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 197 aSrchPara.insertedChars, aSrchPara.deletedChars, 198 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 199 200 nLimit = pWLD->GetLimit(); 201 } 202 else 203 { 204 fnForward = &TextSearch::NSrchFrwrd; 205 fnBackward = &TextSearch::NSrchBkwrd; 206 } 207 } 208 } 209 210 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 211 { 212 sal_Int32 nRet = 0, nEnd = rOff.getLength(); 213 while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 214 return nRet; 215 } 216 217 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 218 throw( RuntimeException ) 219 { 220 sal_Int32 nDone; 221 return nPos == xBreak->previousCharacters(searchStr, nPos+1, 222 aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 223 } 224 225 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 226 throw( RuntimeException ) 227 { 228 SearchResult sres; 229 230 OUString in_str(searchStr); 231 sal_Int32 newStartPos = startPos; 232 sal_Int32 newEndPos = endPos; 233 234 bUsePrimarySrchStr = true; 235 236 if ( xTranslit.is() ) 237 { 238 // apply normal transliteration (1<->1, 1<->0) 239 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 240 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 241 242 // JP 20.6.2001: also the start and end positions must be corrected! 243 if( startPos ) 244 newStartPos = FindPosInSeq_Impl( offset, startPos ); 245 246 if( endPos < searchStr.getLength() ) 247 newEndPos = FindPosInSeq_Impl( offset, endPos ); 248 else 249 newEndPos = in_str.getLength(); 250 251 sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 252 253 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 254 { 255 if (sres.startOffset[k]) 256 sres.startOffset[k] = offset[sres.startOffset[k]]; 257 // JP 20.6.2001: end is ever exclusive and then don't return 258 // the position of the next character - return the 259 // next position behind the last found character! 260 // "a b c" find "b" must return 2,3 and not 2,4!!! 261 if (sres.endOffset[k]) 262 sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 263 } 264 } 265 else 266 { 267 sres = (this->*fnForward)( in_str, startPos, endPos ); 268 } 269 270 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 271 { 272 SearchResult sres2; 273 274 in_str = OUString(searchStr); 275 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 276 277 in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 278 279 if( startPos ) 280 startPos = FindPosInSeq_Impl( offset, startPos ); 281 282 if( endPos < searchStr.getLength() ) 283 endPos = FindPosInSeq_Impl( offset, endPos ); 284 else 285 endPos = in_str.getLength(); 286 287 bUsePrimarySrchStr = false; 288 sres2 = (this->*fnForward)( in_str, startPos, endPos ); 289 290 for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 291 { 292 if (sres2.startOffset[k]) 293 sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 294 if (sres2.endOffset[k]) 295 sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 296 } 297 298 // pick first and long one 299 if ( sres.subRegExpressions == 0) 300 return sres2; 301 if ( sres2.subRegExpressions == 1) 302 { 303 if ( sres.startOffset[0] > sres2.startOffset[0]) 304 return sres2; 305 else if ( sres.startOffset[0] == sres2.startOffset[0] && 306 sres.endOffset[0] < sres2.endOffset[0]) 307 return sres2; 308 } 309 } 310 311 return sres; 312 } 313 314 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 315 throw(RuntimeException) 316 { 317 SearchResult sres; 318 319 OUString in_str(searchStr); 320 sal_Int32 newStartPos = startPos; 321 sal_Int32 newEndPos = endPos; 322 323 bUsePrimarySrchStr = true; 324 325 if ( xTranslit.is() ) 326 { 327 // apply only simple 1<->1 transliteration here 328 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 329 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 330 331 // JP 20.6.2001: also the start and end positions must be corrected! 332 if( startPos < searchStr.getLength() ) 333 newStartPos = FindPosInSeq_Impl( offset, startPos ); 334 else 335 newStartPos = in_str.getLength(); 336 337 if( endPos ) 338 newEndPos = FindPosInSeq_Impl( offset, endPos ); 339 340 sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 341 342 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 343 { 344 if (sres.startOffset[k]) 345 sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 346 // JP 20.6.2001: end is ever exclusive and then don't return 347 // the position of the next character - return the 348 // next position behind the last found character! 349 // "a b c" find "b" must return 2,3 and not 2,4!!! 350 if (sres.endOffset[k]) 351 sres.endOffset[k] = offset[sres.endOffset[k]]; 352 } 353 } 354 else 355 { 356 sres = (this->*fnBackward)( in_str, startPos, endPos ); 357 } 358 359 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 360 { 361 SearchResult sres2; 362 363 in_str = OUString(searchStr); 364 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 365 366 in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 367 368 if( startPos < searchStr.getLength() ) 369 startPos = FindPosInSeq_Impl( offset, startPos ); 370 else 371 startPos = in_str.getLength(); 372 373 if( endPos ) 374 endPos = FindPosInSeq_Impl( offset, endPos ); 375 376 bUsePrimarySrchStr = false; 377 sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 378 379 for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 380 { 381 if (sres2.startOffset[k]) 382 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 383 if (sres2.endOffset[k]) 384 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 385 } 386 387 // pick last and long one 388 if ( sres.subRegExpressions == 0 ) 389 return sres2; 390 if ( sres2.subRegExpressions == 1 ) 391 { 392 if ( sres.startOffset[0] < sres2.startOffset[0] ) 393 return sres2; 394 if ( sres.startOffset[0] == sres2.startOffset[0] && 395 sres.endOffset[0] > sres2.endOffset[0] ) 396 return sres2; 397 } 398 } 399 400 return sres; 401 } 402 403 404 405 //--------------- die Wort-Trennner ---------------------------------- 406 407 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 408 { 409 bool bRet = 1; 410 if( '\x7f' != rStr[nPos]) 411 { 412 if ( !xCharClass.is() ) 413 { 414 Reference < XInterface > xI = xMSF->createInstance( 415 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 416 if( xI.is() ) 417 xI->queryInterface( ::getCppuType( 418 (const Reference< XCharacterClassification >*)0)) 419 >>= xCharClass; 420 } 421 if ( xCharClass.is() ) 422 { 423 sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 424 aSrchPara.Locale ); 425 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 426 KCharacterType::LETTER ) & nCType ) ) 427 bRet = 0; 428 } 429 } 430 return bRet; 431 } 432 433 434 435 // --------- methods for the kind of boyer-morre search ------------------ 436 437 438 void TextSearch::MakeForwardTab() 439 { 440 // create the jumptable for the search text 441 if( pJumpTable ) 442 { 443 if( bIsForwardTab ) 444 return ; // the jumpTable is ok 445 delete pJumpTable; 446 } 447 bIsForwardTab = true; 448 449 sal_Int32 n, nLen = sSrchStr.getLength(); 450 pJumpTable = new TextSearchJumpTable; 451 452 for( n = 0; n < nLen - 1; ++n ) 453 { 454 sal_Unicode cCh = sSrchStr[n]; 455 sal_Int32 nDiff = nLen - n - 1; 456 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 457 458 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 459 pJumpTable->insert( aEntry ); 460 if ( !aPair.second ) 461 (*(aPair.first)).second = nDiff; 462 } 463 } 464 465 void TextSearch::MakeForwardTab2() 466 { 467 // create the jumptable for the search text 468 if( pJumpTable2 ) 469 { 470 if( bIsForwardTab ) 471 return ; // the jumpTable is ok 472 delete pJumpTable2; 473 } 474 bIsForwardTab = true; 475 476 sal_Int32 n, nLen = sSrchStr2.getLength(); 477 pJumpTable2 = new TextSearchJumpTable; 478 479 for( n = 0; n < nLen - 1; ++n ) 480 { 481 sal_Unicode cCh = sSrchStr2[n]; 482 sal_Int32 nDiff = nLen - n - 1; 483 484 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 485 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 486 pJumpTable2->insert( aEntry ); 487 if ( !aPair.second ) 488 (*(aPair.first)).second = nDiff; 489 } 490 } 491 492 void TextSearch::MakeBackwardTab() 493 { 494 // create the jumptable for the search text 495 if( pJumpTable ) 496 { 497 if( !bIsForwardTab ) 498 return ; // the jumpTable is ok 499 delete pJumpTable; 500 } 501 bIsForwardTab = false; 502 503 sal_Int32 n, nLen = sSrchStr.getLength(); 504 pJumpTable = new TextSearchJumpTable; 505 506 for( n = nLen-1; n > 0; --n ) 507 { 508 sal_Unicode cCh = sSrchStr[n]; 509 TextSearchJumpTable::value_type aEntry( cCh, n ); 510 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 511 pJumpTable->insert( aEntry ); 512 if ( !aPair.second ) 513 (*(aPair.first)).second = n; 514 } 515 } 516 517 void TextSearch::MakeBackwardTab2() 518 { 519 // create the jumptable for the search text 520 if( pJumpTable2 ) 521 { 522 if( !bIsForwardTab ) 523 return ; // the jumpTable is ok 524 delete pJumpTable2; 525 } 526 bIsForwardTab = false; 527 528 sal_Int32 n, nLen = sSrchStr2.getLength(); 529 pJumpTable2 = new TextSearchJumpTable; 530 531 for( n = nLen-1; n > 0; --n ) 532 { 533 sal_Unicode cCh = sSrchStr2[n]; 534 TextSearchJumpTable::value_type aEntry( cCh, n ); 535 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 536 pJumpTable2->insert( aEntry ); 537 if ( !aPair.second ) 538 (*(aPair.first)).second = n; 539 } 540 } 541 542 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 543 { 544 TextSearchJumpTable *pJump; 545 OUString sSearchKey; 546 547 if ( bUsePrimarySrchStr ) { 548 pJump = pJumpTable; 549 sSearchKey = sSrchStr; 550 } else { 551 pJump = pJumpTable2; 552 sSearchKey = sSrchStr2; 553 } 554 555 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 556 if ( iLook == pJump->end() ) 557 return sSearchKey.getLength(); 558 return (*iLook).second; 559 } 560 561 562 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 563 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 564 throw(RuntimeException) 565 { 566 SearchResult aRet; 567 aRet.subRegExpressions = 0; 568 569 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 570 571 OUString aStr( searchStr ); 572 sal_Int32 nSuchIdx = aStr.getLength(); 573 sal_Int32 nEnde = endPos; 574 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 575 return aRet; 576 577 578 if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 579 return aRet; 580 581 nEnde -= sSearchKey.getLength(); 582 583 if (bUsePrimarySrchStr) 584 MakeForwardTab(); // create the jumptable 585 else 586 MakeForwardTab2(); 587 588 for (sal_Int32 nCmpIdx = startPos; // start position for the search 589 nCmpIdx <= nEnde; 590 nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 591 { 592 // if the match would be the completed cells, skip it. 593 if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 594 && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 595 continue; 596 597 nSuchIdx = sSearchKey.getLength() - 1; 598 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 599 { 600 if( nSuchIdx == 0 ) 601 { 602 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 603 { 604 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 605 bool bAtStart = !nCmpIdx; 606 bool bAtEnd = nFndEnd == endPos; 607 bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 608 bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 609 // * 1 -> only one word in the paragraph 610 // * 2 -> at begin of paragraph 611 // * 3 -> at end of paragraph 612 // * 4 -> inside the paragraph 613 if( !( ( bAtStart && bAtEnd ) || // 1 614 ( bAtStart && bDelimBehind ) || // 2 615 ( bAtEnd && bDelimBefore ) || // 3 616 ( bDelimBefore && bDelimBehind ))) // 4 617 break; 618 } 619 620 aRet.subRegExpressions = 1; 621 aRet.startOffset.realloc( 1 ); 622 aRet.startOffset[ 0 ] = nCmpIdx; 623 aRet.endOffset.realloc( 1 ); 624 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 625 626 return aRet; 627 } 628 else 629 nSuchIdx--; 630 } 631 } 632 return aRet; 633 } 634 635 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 636 throw(RuntimeException) 637 { 638 SearchResult aRet; 639 aRet.subRegExpressions = 0; 640 641 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 642 643 OUString aStr( searchStr ); 644 sal_Int32 nSuchIdx = aStr.getLength(); 645 sal_Int32 nEnde = endPos; 646 if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 647 return aRet; 648 649 if (bUsePrimarySrchStr) 650 MakeBackwardTab(); // create the jumptable 651 else 652 MakeBackwardTab2(); 653 654 if( nEnde == nSuchIdx ) // end position for the search 655 nEnde = sSearchKey.getLength(); 656 else 657 nEnde += sSearchKey.getLength(); 658 659 sal_Int32 nCmpIdx = startPos; // start position for the search 660 661 while (nCmpIdx >= nEnde) 662 { 663 // if the match would be the completed cells, skip it. 664 if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 665 sSearchKey.getLength() )) && (!checkCTLEnd || 666 isCellStart( aStr, nCmpIdx))) 667 { 668 nSuchIdx = 0; 669 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 670 aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 671 nSuchIdx++; 672 if( nSuchIdx >= sSearchKey.getLength() ) 673 { 674 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 675 { 676 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 677 bool bAtStart = !nFndStt; 678 bool bAtEnd = nCmpIdx == startPos; 679 bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 680 bool bDelimBefore = bAtStart || // begin of paragraph 681 IsDelimiter( aStr, nFndStt-1 ); 682 // * 1 -> only one word in the paragraph 683 // * 2 -> at begin of paragraph 684 // * 3 -> at end of paragraph 685 // * 4 -> inside the paragraph 686 if( ( bAtStart && bAtEnd ) || // 1 687 ( bAtStart && bDelimBehind ) || // 2 688 ( bAtEnd && bDelimBefore ) || // 3 689 ( bDelimBefore && bDelimBehind )) // 4 690 { 691 aRet.subRegExpressions = 1; 692 aRet.startOffset.realloc( 1 ); 693 aRet.startOffset[ 0 ] = nCmpIdx; 694 aRet.endOffset.realloc( 1 ); 695 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 696 return aRet; 697 } 698 } 699 else 700 { 701 aRet.subRegExpressions = 1; 702 aRet.startOffset.realloc( 1 ); 703 aRet.startOffset[ 0 ] = nCmpIdx; 704 aRet.endOffset.realloc( 1 ); 705 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 706 return aRet; 707 } 708 } 709 } 710 nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 711 if( nCmpIdx < nSuchIdx ) 712 return aRet; 713 nCmpIdx -= nSuchIdx; 714 } 715 return aRet; 716 } 717 718 719 720 //--------------------------------------------------------------------------- 721 // ------- Methoden fuer die Suche ueber Regular-Expressions -------------- 722 723 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 724 sal_Int32 startPos, sal_Int32 endPos ) 725 throw(RuntimeException) 726 { 727 SearchResult aRet; 728 aRet.subRegExpressions = 0; 729 OUString aStr( searchStr ); 730 731 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE | 732 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag )); 733 734 pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength()); 735 736 struct re_registers regs; 737 738 // Clear structure 739 memset((void *)®s, 0, sizeof(struct re_registers)); 740 if ( ! pRegExp->re_search(®s, startPos) ) 741 { 742 if( regs.num_of_match > 0 && 743 (regs.start[0] != -1 && regs.end[0] != -1) ) 744 { 745 aRet.startOffset.realloc(regs.num_of_match); 746 aRet.endOffset.realloc(regs.num_of_match); 747 748 sal_Int32 i = 0, j = 0; 749 while( j < regs.num_of_match ) 750 { 751 if( regs.start[j] != -1 && regs.end[j] != -1 ) 752 { 753 aRet.startOffset[i] = regs.start[j]; 754 aRet.endOffset[i] = regs.end[j]; 755 ++i; 756 } 757 ++j; 758 } 759 aRet.subRegExpressions = i; 760 } 761 if ( regs.num_regs > 0 ) 762 { 763 if ( regs.start ) 764 free(regs.start); 765 if ( regs.end ) 766 free(regs.end); 767 } 768 } 769 770 return aRet; 771 } 772 773 /* 774 * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr 775 */ 776 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 777 sal_Int32 startPos, sal_Int32 endPos ) 778 throw(RuntimeException) 779 { 780 SearchResult aRet; 781 aRet.subRegExpressions = 0; 782 OUString aStr( searchStr ); 783 784 sal_Int32 nOffset = 0; 785 sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos; 786 787 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE | 788 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag )); 789 790 if( startPos ) 791 nOffset = startPos - 1; 792 793 // search only in the subString 794 if( bSearchInSel && nStrEnde ) 795 { 796 aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde ); 797 if( nOffset > nStrEnde ) 798 nOffset = nOffset - nStrEnde; 799 else 800 nOffset = 0; 801 } 802 803 // set the length to negative for reverse search 804 pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) ); 805 struct re_registers regs; 806 807 // Clear structure 808 memset((void *)®s, 0, sizeof(struct re_registers)); 809 if ( ! pRegExp->re_search(®s, nOffset) ) 810 { 811 if( regs.num_of_match > 0 && 812 (regs.start[0] != -1 && regs.end[0] != -1) ) 813 { 814 nOffset = bSearchInSel ? nStrEnde : 0; 815 aRet.startOffset.realloc(regs.num_of_match); 816 aRet.endOffset.realloc(regs.num_of_match); 817 818 sal_Int32 i = 0, j = 0; 819 while( j < regs.num_of_match ) 820 { 821 if( regs.start[j] != -1 && regs.end[j] != -1 ) 822 { 823 aRet.startOffset[i] = regs.end[j] + nOffset; 824 aRet.endOffset[i] = regs.start[j] + nOffset; 825 ++i; 826 } 827 ++j; 828 } 829 aRet.subRegExpressions = i; 830 } 831 if ( regs.num_regs > 0 ) 832 { 833 if ( regs.start ) 834 free(regs.start); 835 if ( regs.end ) 836 free(regs.end); 837 } 838 } 839 840 return aRet; 841 } 842 843 // Phonetische Suche von Worten 844 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 845 sal_Int32 startPos, sal_Int32 endPos ) 846 throw(RuntimeException) 847 { 848 SearchResult aRet; 849 aRet.subRegExpressions = 0; 850 851 if( !xBreak.is() ) 852 return aRet; 853 854 OUString aWTemp( searchStr ); 855 856 register sal_Int32 nStt, nEnd; 857 858 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 859 aSrchPara.Locale, 860 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 861 862 do 863 { 864 if( aWBnd.startPos >= endPos ) 865 break; 866 nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 867 nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 868 869 if( nStt < nEnd && 870 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 871 { 872 aRet.subRegExpressions = 1; 873 aRet.startOffset.realloc( 1 ); 874 aRet.startOffset[ 0 ] = nStt; 875 aRet.endOffset.realloc( 1 ); 876 aRet.endOffset[ 0 ] = nEnd; 877 break; 878 } 879 880 nStt = nEnd - 1; 881 aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 882 WordType::ANYWORD_IGNOREWHITESPACES); 883 } while( aWBnd.startPos != aWBnd.endPos || 884 (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 885 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 886 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 887 // and nextWord() does also => don't loop forever. 888 return aRet; 889 } 890 891 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 892 sal_Int32 startPos, sal_Int32 endPos ) 893 throw(RuntimeException) 894 { 895 SearchResult aRet; 896 aRet.subRegExpressions = 0; 897 898 if( !xBreak.is() ) 899 return aRet; 900 901 OUString aWTemp( searchStr ); 902 903 register sal_Int32 nStt, nEnd; 904 905 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 906 aSrchPara.Locale, 907 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 908 909 do 910 { 911 if( aWBnd.endPos <= endPos ) 912 break; 913 nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 914 nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 915 916 if( nStt < nEnd && 917 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 918 { 919 aRet.subRegExpressions = 1; 920 aRet.startOffset.realloc( 1 ); 921 aRet.startOffset[ 0 ] = nEnd; 922 aRet.endOffset.realloc( 1 ); 923 aRet.endOffset[ 0 ] = nStt; 924 break; 925 } 926 if( !nStt ) 927 break; 928 929 aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 930 WordType::ANYWORD_IGNOREWHITESPACES); 931 } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 932 return aRet; 933 } 934 935 936 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 937 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 938 939 static OUString getServiceName_Static() 940 { 941 return OUString::createFromAscii( cSearchName ); 942 } 943 944 static OUString getImplementationName_Static() 945 { 946 return OUString::createFromAscii( cSearchImpl ); 947 } 948 949 OUString SAL_CALL 950 TextSearch::getImplementationName() 951 throw( RuntimeException ) 952 { 953 return getImplementationName_Static(); 954 } 955 956 sal_Bool SAL_CALL 957 TextSearch::supportsService(const OUString& rServiceName) 958 throw( RuntimeException ) 959 { 960 return !rServiceName.compareToAscii( cSearchName ); 961 } 962 963 Sequence< OUString > SAL_CALL 964 TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 965 { 966 Sequence< OUString > aRet(1); 967 aRet[0] = getServiceName_Static(); 968 return aRet; 969 } 970 971 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 972 SAL_CALL TextSearch_CreateInstance( 973 const ::com::sun::star::uno::Reference< 974 ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 975 { 976 return ::com::sun::star::uno::Reference< 977 ::com::sun::star::uno::XInterface >( 978 (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 979 } 980 981 extern "C" 982 { 983 984 void SAL_CALL component_getImplementationEnvironment( 985 const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 986 { 987 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 988 } 989 990 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 991 void* _pServiceManager, void* /*_pRegistryKey*/ ) 992 { 993 void* pRet = NULL; 994 995 ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 996 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 997 ( _pServiceManager ); 998 ::com::sun::star::uno::Reference< 999 ::com::sun::star::lang::XSingleServiceFactory > xFactory; 1000 1001 if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 1002 { 1003 ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 1004 aServiceNames[0] = getServiceName_Static(); 1005 xFactory = ::cppu::createSingleFactory( 1006 pServiceManager, getImplementationName_Static(), 1007 &TextSearch_CreateInstance, aServiceNames ); 1008 } 1009 1010 if ( xFactory.is() ) 1011 { 1012 xFactory->acquire(); 1013 pRet = xFactory.get(); 1014 } 1015 1016 return pRet; 1017 } 1018 1019 } // extern "C" 1020