1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include "textsearch.hxx" 32 #include "levdis.hxx" 33 #include <com/sun/star/lang/Locale.hpp> 34 #include <com/sun/star/lang/XMultiServiceFactory.hpp> 35 #include <comphelper/processfactory.hxx> 36 #include <com/sun/star/i18n/UnicodeType.hpp> 37 #include <com/sun/star/util/SearchFlags.hpp> 38 #include <com/sun/star/i18n/WordType.hpp> 39 #include <com/sun/star/i18n/ScriptType.hpp> 40 #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 41 #include <com/sun/star/i18n/KCharacterType.hpp> 42 #include <com/sun/star/registry/XRegistryKey.hpp> 43 #include <cppuhelper/factory.hxx> 44 #include <cppuhelper/weak.hxx> 45 46 #ifdef _MSC_VER 47 // get rid of that dumb compiler warning 48 // identifier was truncated to '255' characters in the debug information 49 // for STL template usage, if .pdb files are to be created 50 #pragma warning( disable: 4786 ) 51 #endif 52 53 #include <string.h> 54 55 using namespace ::com::sun::star::util; 56 using namespace ::com::sun::star::uno; 57 using namespace ::com::sun::star::lang; 58 using namespace ::com::sun::star::i18n; 59 using namespace ::rtl; 60 61 static sal_Int32 COMPLEX_TRANS_MASK_TMP = 62 TransliterationModules_ignoreBaFa_ja_JP | 63 TransliterationModules_ignoreIterationMark_ja_JP | 64 TransliterationModules_ignoreTiJi_ja_JP | 65 TransliterationModules_ignoreHyuByu_ja_JP | 66 TransliterationModules_ignoreSeZe_ja_JP | 67 TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 68 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 69 TransliterationModules_ignoreProlongedSoundMark_ja_JP; 70 static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH; 71 static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH; 72 // Above 2 transliteration is simple but need to take effect in 73 // complex transliteration 74 75 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 76 : xMSF( rxMSF ) 77 , pJumpTable( 0 ) 78 , pJumpTable2( 0 ) 79 , pRegexMatcher( NULL ) 80 , pWLD( 0 ) 81 { 82 SearchOptions aOpt; 83 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 84 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 85 //aOpt.Locale = ???; 86 setOptions( aOpt ); 87 } 88 89 TextSearch::~TextSearch() 90 { 91 delete pRegexMatcher; 92 delete pWLD; 93 delete pJumpTable; 94 delete pJumpTable2; 95 } 96 97 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 98 { 99 aSrchPara = rOptions; 100 101 delete pRegexMatcher, pRegexMatcher = NULL; 102 delete pWLD, pWLD = 0; 103 delete pJumpTable, pJumpTable = 0; 104 delete pJumpTable2, pJumpTable2 = 0; 105 106 // Create Transliteration class 107 if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 108 { 109 if( !xTranslit.is() ) 110 { 111 Reference < XInterface > xI = xMSF->createInstance( 112 OUString::createFromAscii( 113 "com.sun.star.i18n.Transliteration")); 114 if ( xI.is() ) 115 xI->queryInterface( ::getCppuType( 116 (const Reference< XExtendedTransliteration >*)0)) 117 >>= xTranslit; 118 } 119 // Load transliteration module 120 if( xTranslit.is() ) 121 xTranslit->loadModule( 122 (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 123 aSrchPara.Locale); 124 } 125 else if( xTranslit.is() ) 126 xTranslit = 0; 127 128 // Create Transliteration for 2<->1, 2<->2 transliteration 129 if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 130 { 131 if( !xTranslit2.is() ) 132 { 133 Reference < XInterface > xI = xMSF->createInstance( 134 OUString::createFromAscii( 135 "com.sun.star.i18n.Transliteration")); 136 if ( xI.is() ) 137 xI->queryInterface( ::getCppuType( 138 (const Reference< XExtendedTransliteration >*)0)) 139 >>= xTranslit2; 140 } 141 // Load transliteration module 142 if( xTranslit2.is() ) 143 xTranslit2->loadModule( 144 (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 145 aSrchPara.Locale); 146 } 147 148 if ( !xBreak.is() ) 149 { 150 Reference < XInterface > xI = xMSF->createInstance( 151 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 152 if( xI.is() ) 153 xI->queryInterface( ::getCppuType( 154 (const Reference< XBreakIterator >*)0)) 155 >>= xBreak; 156 } 157 158 sSrchStr = aSrchPara.searchString; 159 160 // use transliteration here 161 if ( xTranslit.is() && 162 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 163 sSrchStr = xTranslit->transliterateString2String( 164 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 165 166 if ( xTranslit2.is() && 167 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 168 sSrchStr2 = xTranslit2->transliterateString2String( 169 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 170 171 // When start or end of search string is a complex script type, we need to 172 // make sure the result boundary is not located in the middle of cell. 173 checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 174 ScriptType::COMPLEX)); 175 checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 176 sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 177 178 switch( aSrchPara.algorithmType) 179 { 180 case SearchAlgorithms_REGEXP: 181 fnForward = &TextSearch::RESrchFrwrd; 182 fnBackward = &TextSearch::RESrchBkwrd; 183 184 { 185 sal_uInt32 nIcuSearchFlags = 0; 186 // map com::sun::star::util::SearchFlags to ICU uregex.h flags 187 // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE 188 // REG_NEWLINE is neither defined properly nor used anywhere => not implemented 189 // REG_NOSUB is not used anywhere => not implemented 190 // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute 191 // LEV_RELAXED is only used for SearchAlgorithm==Approximate 192 // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it??? 193 if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0) 194 nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE; 195 UErrorCode nIcuErr = U_ZERO_ERROR; 196 // assumption: transliteration doesn't mangle regexp control chars 197 OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr 198 : ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString); 199 const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength()); 200 pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); 201 if( nIcuErr) 202 { delete pRegexMatcher; pRegexMatcher = NULL;} 203 } break; 204 205 case SearchAlgorithms_APPROXIMATE: 206 fnForward = &TextSearch::ApproxSrchFrwrd; 207 fnBackward = &TextSearch::ApproxSrchBkwrd; 208 209 pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 210 aSrchPara.insertedChars, aSrchPara.deletedChars, 211 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 212 213 nLimit = pWLD->GetLimit(); 214 break; 215 216 default: 217 fnForward = &TextSearch::NSrchFrwrd; 218 fnBackward = &TextSearch::NSrchBkwrd; 219 break; 220 } 221 } 222 223 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 224 { 225 sal_Int32 nRet = 0, nEnd = rOff.getLength(); 226 while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 227 return nRet; 228 } 229 230 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 231 throw( RuntimeException ) 232 { 233 sal_Int32 nDone; 234 return nPos == xBreak->previousCharacters(searchStr, nPos+1, 235 aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 236 } 237 238 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 239 throw( RuntimeException ) 240 { 241 SearchResult sres; 242 243 OUString in_str(searchStr); 244 sal_Int32 newStartPos = startPos; 245 sal_Int32 newEndPos = endPos; 246 247 bUsePrimarySrchStr = true; 248 249 if ( xTranslit.is() ) 250 { 251 // apply normal transliteration (1<->1, 1<->0) 252 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 253 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 254 255 // JP 20.6.2001: also the start and end positions must be corrected! 256 if( startPos ) 257 newStartPos = FindPosInSeq_Impl( offset, startPos ); 258 259 if( endPos < searchStr.getLength() ) 260 newEndPos = FindPosInSeq_Impl( offset, endPos ); 261 else 262 newEndPos = in_str.getLength(); 263 264 sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 265 266 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 267 { 268 if (sres.startOffset[k]) 269 sres.startOffset[k] = offset[sres.startOffset[k]]; 270 // JP 20.6.2001: end is ever exclusive and then don't return 271 // the position of the next character - return the 272 // next position behind the last found character! 273 // "a b c" find "b" must return 2,3 and not 2,4!!! 274 if (sres.endOffset[k]) 275 sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 276 } 277 } 278 else 279 { 280 sres = (this->*fnForward)( in_str, startPos, endPos ); 281 } 282 283 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 284 { 285 SearchResult sres2; 286 287 in_str = OUString(searchStr); 288 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 289 290 in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 291 292 if( startPos ) 293 startPos = FindPosInSeq_Impl( offset, startPos ); 294 295 if( endPos < searchStr.getLength() ) 296 endPos = FindPosInSeq_Impl( offset, endPos ); 297 else 298 endPos = in_str.getLength(); 299 300 bUsePrimarySrchStr = false; 301 sres2 = (this->*fnForward)( in_str, startPos, endPos ); 302 303 for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 304 { 305 if (sres2.startOffset[k]) 306 sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 307 if (sres2.endOffset[k]) 308 sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 309 } 310 311 // pick first and long one 312 if ( sres.subRegExpressions == 0) 313 return sres2; 314 if ( sres2.subRegExpressions == 1) 315 { 316 if ( sres.startOffset[0] > sres2.startOffset[0]) 317 return sres2; 318 else if ( sres.startOffset[0] == sres2.startOffset[0] && 319 sres.endOffset[0] < sres2.endOffset[0]) 320 return sres2; 321 } 322 } 323 324 return sres; 325 } 326 327 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 328 throw(RuntimeException) 329 { 330 SearchResult sres; 331 332 OUString in_str(searchStr); 333 sal_Int32 newStartPos = startPos; 334 sal_Int32 newEndPos = endPos; 335 336 bUsePrimarySrchStr = true; 337 338 if ( xTranslit.is() ) 339 { 340 // apply only simple 1<->1 transliteration here 341 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 342 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 343 344 // JP 20.6.2001: also the start and end positions must be corrected! 345 if( startPos < searchStr.getLength() ) 346 newStartPos = FindPosInSeq_Impl( offset, startPos ); 347 else 348 newStartPos = in_str.getLength(); 349 350 if( endPos ) 351 newEndPos = FindPosInSeq_Impl( offset, endPos ); 352 353 sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 354 355 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 356 { 357 if (sres.startOffset[k]) 358 sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 359 // JP 20.6.2001: end is ever exclusive and then don't return 360 // the position of the next character - return the 361 // next position behind the last found character! 362 // "a b c" find "b" must return 2,3 and not 2,4!!! 363 if (sres.endOffset[k]) 364 sres.endOffset[k] = offset[sres.endOffset[k]]; 365 } 366 } 367 else 368 { 369 sres = (this->*fnBackward)( in_str, startPos, endPos ); 370 } 371 372 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 373 { 374 SearchResult sres2; 375 376 in_str = OUString(searchStr); 377 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 378 379 in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 380 381 if( startPos < searchStr.getLength() ) 382 startPos = FindPosInSeq_Impl( offset, startPos ); 383 else 384 startPos = in_str.getLength(); 385 386 if( endPos ) 387 endPos = FindPosInSeq_Impl( offset, endPos ); 388 389 bUsePrimarySrchStr = false; 390 sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 391 392 for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 393 { 394 if (sres2.startOffset[k]) 395 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 396 if (sres2.endOffset[k]) 397 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 398 } 399 400 // pick last and long one 401 if ( sres.subRegExpressions == 0 ) 402 return sres2; 403 if ( sres2.subRegExpressions == 1 ) 404 { 405 if ( sres.startOffset[0] < sres2.startOffset[0] ) 406 return sres2; 407 if ( sres.startOffset[0] == sres2.startOffset[0] && 408 sres.endOffset[0] > sres2.endOffset[0] ) 409 return sres2; 410 } 411 } 412 413 return sres; 414 } 415 416 //--------------------------------------------------------------------- 417 418 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 419 { 420 bool bRet = 1; 421 if( '\x7f' != rStr[nPos]) 422 { 423 if ( !xCharClass.is() ) 424 { 425 Reference < XInterface > xI = xMSF->createInstance( 426 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 427 if( xI.is() ) 428 xI->queryInterface( ::getCppuType( 429 (const Reference< XCharacterClassification >*)0)) 430 >>= xCharClass; 431 } 432 if ( xCharClass.is() ) 433 { 434 sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 435 aSrchPara.Locale ); 436 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 437 KCharacterType::LETTER ) & nCType ) ) 438 bRet = 0; 439 } 440 } 441 return bRet; 442 } 443 444 // --------- helper methods for Boyer-Moore like text searching ---------- 445 // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available 446 447 void TextSearch::MakeForwardTab() 448 { 449 // create the jumptable for the search text 450 if( pJumpTable ) 451 { 452 if( bIsForwardTab ) 453 return ; // the jumpTable is ok 454 delete pJumpTable; 455 } 456 bIsForwardTab = true; 457 458 sal_Int32 n, nLen = sSrchStr.getLength(); 459 pJumpTable = new TextSearchJumpTable; 460 461 for( n = 0; n < nLen - 1; ++n ) 462 { 463 sal_Unicode cCh = sSrchStr[n]; 464 sal_Int32 nDiff = nLen - n - 1; 465 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 466 467 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 468 pJumpTable->insert( aEntry ); 469 if ( !aPair.second ) 470 (*(aPair.first)).second = nDiff; 471 } 472 } 473 474 void TextSearch::MakeForwardTab2() 475 { 476 // create the jumptable for the search text 477 if( pJumpTable2 ) 478 { 479 if( bIsForwardTab ) 480 return ; // the jumpTable is ok 481 delete pJumpTable2; 482 } 483 bIsForwardTab = true; 484 485 sal_Int32 n, nLen = sSrchStr2.getLength(); 486 pJumpTable2 = new TextSearchJumpTable; 487 488 for( n = 0; n < nLen - 1; ++n ) 489 { 490 sal_Unicode cCh = sSrchStr2[n]; 491 sal_Int32 nDiff = nLen - n - 1; 492 493 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 494 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 495 pJumpTable2->insert( aEntry ); 496 if ( !aPair.second ) 497 (*(aPair.first)).second = nDiff; 498 } 499 } 500 501 void TextSearch::MakeBackwardTab() 502 { 503 // create the jumptable for the search text 504 if( pJumpTable ) 505 { 506 if( !bIsForwardTab ) 507 return ; // the jumpTable is ok 508 delete pJumpTable; 509 } 510 bIsForwardTab = false; 511 512 sal_Int32 n, nLen = sSrchStr.getLength(); 513 pJumpTable = new TextSearchJumpTable; 514 515 for( n = nLen-1; n > 0; --n ) 516 { 517 sal_Unicode cCh = sSrchStr[n]; 518 TextSearchJumpTable::value_type aEntry( cCh, n ); 519 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 520 pJumpTable->insert( aEntry ); 521 if ( !aPair.second ) 522 (*(aPair.first)).second = n; 523 } 524 } 525 526 void TextSearch::MakeBackwardTab2() 527 { 528 // create the jumptable for the search text 529 if( pJumpTable2 ) 530 { 531 if( !bIsForwardTab ) 532 return ; // the jumpTable is ok 533 delete pJumpTable2; 534 } 535 bIsForwardTab = false; 536 537 sal_Int32 n, nLen = sSrchStr2.getLength(); 538 pJumpTable2 = new TextSearchJumpTable; 539 540 for( n = nLen-1; n > 0; --n ) 541 { 542 sal_Unicode cCh = sSrchStr2[n]; 543 TextSearchJumpTable::value_type aEntry( cCh, n ); 544 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 545 pJumpTable2->insert( aEntry ); 546 if ( !aPair.second ) 547 (*(aPair.first)).second = n; 548 } 549 } 550 551 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 552 { 553 TextSearchJumpTable *pJump; 554 OUString sSearchKey; 555 556 if ( bUsePrimarySrchStr ) { 557 pJump = pJumpTable; 558 sSearchKey = sSrchStr; 559 } else { 560 pJump = pJumpTable2; 561 sSearchKey = sSrchStr2; 562 } 563 564 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 565 if ( iLook == pJump->end() ) 566 return sSearchKey.getLength(); 567 return (*iLook).second; 568 } 569 570 571 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 572 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 573 throw(RuntimeException) 574 { 575 SearchResult aRet; 576 aRet.subRegExpressions = 0; 577 578 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 579 580 OUString aStr( searchStr ); 581 sal_Int32 nSuchIdx = aStr.getLength(); 582 sal_Int32 nEnde = endPos; 583 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 584 return aRet; 585 586 587 if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 588 return aRet; 589 590 nEnde -= sSearchKey.getLength(); 591 592 if (bUsePrimarySrchStr) 593 MakeForwardTab(); // create the jumptable 594 else 595 MakeForwardTab2(); 596 597 for (sal_Int32 nCmpIdx = startPos; // start position for the search 598 nCmpIdx <= nEnde; 599 nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 600 { 601 // if the match would be the completed cells, skip it. 602 if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 603 && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 604 continue; 605 606 nSuchIdx = sSearchKey.getLength() - 1; 607 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 608 { 609 if( nSuchIdx == 0 ) 610 { 611 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 612 { 613 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 614 bool bAtStart = !nCmpIdx; 615 bool bAtEnd = nFndEnd == endPos; 616 bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 617 bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 618 // * 1 -> only one word in the paragraph 619 // * 2 -> at begin of paragraph 620 // * 3 -> at end of paragraph 621 // * 4 -> inside the paragraph 622 if( !( ( bAtStart && bAtEnd ) || // 1 623 ( bAtStart && bDelimBehind ) || // 2 624 ( bAtEnd && bDelimBefore ) || // 3 625 ( bDelimBefore && bDelimBehind ))) // 4 626 break; 627 } 628 629 aRet.subRegExpressions = 1; 630 aRet.startOffset.realloc( 1 ); 631 aRet.startOffset[ 0 ] = nCmpIdx; 632 aRet.endOffset.realloc( 1 ); 633 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 634 635 return aRet; 636 } 637 else 638 nSuchIdx--; 639 } 640 } 641 return aRet; 642 } 643 644 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 645 throw(RuntimeException) 646 { 647 SearchResult aRet; 648 aRet.subRegExpressions = 0; 649 650 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 651 652 OUString aStr( searchStr ); 653 sal_Int32 nSuchIdx = aStr.getLength(); 654 sal_Int32 nEnde = endPos; 655 if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 656 return aRet; 657 658 if (bUsePrimarySrchStr) 659 MakeBackwardTab(); // create the jumptable 660 else 661 MakeBackwardTab2(); 662 663 if( nEnde == nSuchIdx ) // end position for the search 664 nEnde = sSearchKey.getLength(); 665 else 666 nEnde += sSearchKey.getLength(); 667 668 sal_Int32 nCmpIdx = startPos; // start position for the search 669 670 while (nCmpIdx >= nEnde) 671 { 672 // if the match would be the completed cells, skip it. 673 if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 674 sSearchKey.getLength() )) && (!checkCTLEnd || 675 isCellStart( aStr, nCmpIdx))) 676 { 677 nSuchIdx = 0; 678 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 679 aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 680 nSuchIdx++; 681 if( nSuchIdx >= sSearchKey.getLength() ) 682 { 683 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 684 { 685 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 686 bool bAtStart = !nFndStt; 687 bool bAtEnd = nCmpIdx == startPos; 688 bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 689 bool bDelimBefore = bAtStart || // begin of paragraph 690 IsDelimiter( aStr, nFndStt-1 ); 691 // * 1 -> only one word in the paragraph 692 // * 2 -> at begin of paragraph 693 // * 3 -> at end of paragraph 694 // * 4 -> inside the paragraph 695 if( ( bAtStart && bAtEnd ) || // 1 696 ( bAtStart && bDelimBehind ) || // 2 697 ( bAtEnd && bDelimBefore ) || // 3 698 ( bDelimBefore && bDelimBehind )) // 4 699 { 700 aRet.subRegExpressions = 1; 701 aRet.startOffset.realloc( 1 ); 702 aRet.startOffset[ 0 ] = nCmpIdx; 703 aRet.endOffset.realloc( 1 ); 704 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 705 return aRet; 706 } 707 } 708 else 709 { 710 aRet.subRegExpressions = 1; 711 aRet.startOffset.realloc( 1 ); 712 aRet.startOffset[ 0 ] = nCmpIdx; 713 aRet.endOffset.realloc( 1 ); 714 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 715 return aRet; 716 } 717 } 718 } 719 nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 720 if( nCmpIdx < nSuchIdx ) 721 return aRet; 722 nCmpIdx -= nSuchIdx; 723 } 724 return aRet; 725 } 726 727 //--------------------------------------------------------------------------- 728 729 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 730 sal_Int32 startPos, sal_Int32 endPos ) 731 throw(RuntimeException) 732 { 733 SearchResult aRet; 734 aRet.subRegExpressions = 0; 735 if( !pRegexMatcher) 736 return aRet; 737 738 if( endPos > searchStr.getLength()) 739 endPos = searchStr.getLength(); 740 741 // use the ICU RegexMatcher to find the matches 742 UErrorCode nIcuErr = U_ZERO_ERROR; 743 const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos); 744 pRegexMatcher->reset( aSearchTargetStr); 745 if( !pRegexMatcher->find( startPos, nIcuErr)) 746 return aRet; 747 748 aRet.subRegExpressions = 1; 749 aRet.startOffset.realloc( aRet.subRegExpressions); 750 aRet.endOffset.realloc( aRet.subRegExpressions); 751 aRet.startOffset[0] = pRegexMatcher->start( nIcuErr); 752 aRet.endOffset[0] = pRegexMatcher->end( nIcuErr); 753 754 return aRet; 755 } 756 757 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 758 sal_Int32 startPos, sal_Int32 endPos ) 759 throw(RuntimeException) 760 { 761 // NOTE: for backwards search callers provide startPos/endPos inverted! 762 SearchResult aRet; 763 aRet.subRegExpressions = 0; 764 if( !pRegexMatcher) 765 return aRet; 766 767 if( startPos > searchStr.getLength()) 768 startPos = searchStr.getLength(); 769 770 // use the ICU RegexMatcher to find the matches 771 // TODO: use ICU's backward searching once it becomes available 772 UErrorCode nIcuErr = U_ZERO_ERROR; 773 const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos); 774 pRegexMatcher->reset( aSearchTargetStr); 775 if( !pRegexMatcher->find( endPos, nIcuErr)) 776 return aRet; 777 778 aRet.subRegExpressions = 1; 779 aRet.startOffset.realloc( aRet.subRegExpressions); 780 aRet.endOffset.realloc( aRet.subRegExpressions); 781 782 do { 783 // NOTE: backward search seems to be expected to have startOfs/endOfs inverted! 784 aRet.startOffset[0] = pRegexMatcher->end( nIcuErr); 785 aRet.endOffset[0] = pRegexMatcher->start( nIcuErr); 786 } while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr)); 787 788 return aRet; 789 } 790 791 //--------------------------------------------------------------------------- 792 793 // search for words phonetically 794 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 795 sal_Int32 startPos, sal_Int32 endPos ) 796 throw(RuntimeException) 797 { 798 SearchResult aRet; 799 aRet.subRegExpressions = 0; 800 801 if( !xBreak.is() ) 802 return aRet; 803 804 OUString aWTemp( searchStr ); 805 806 register sal_Int32 nStt, nEnd; 807 808 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 809 aSrchPara.Locale, 810 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 811 812 do 813 { 814 if( aWBnd.startPos >= endPos ) 815 break; 816 nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 817 nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 818 819 if( nStt < nEnd && 820 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 821 { 822 aRet.subRegExpressions = 1; 823 aRet.startOffset.realloc( 1 ); 824 aRet.startOffset[ 0 ] = nStt; 825 aRet.endOffset.realloc( 1 ); 826 aRet.endOffset[ 0 ] = nEnd; 827 break; 828 } 829 830 nStt = nEnd - 1; 831 aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 832 WordType::ANYWORD_IGNOREWHITESPACES); 833 } while( aWBnd.startPos != aWBnd.endPos || 834 (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 835 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 836 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 837 // and nextWord() does also => don't loop forever. 838 return aRet; 839 } 840 841 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 842 sal_Int32 startPos, sal_Int32 endPos ) 843 throw(RuntimeException) 844 { 845 SearchResult aRet; 846 aRet.subRegExpressions = 0; 847 848 if( !xBreak.is() ) 849 return aRet; 850 851 OUString aWTemp( searchStr ); 852 853 register sal_Int32 nStt, nEnd; 854 855 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 856 aSrchPara.Locale, 857 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 858 859 do 860 { 861 if( aWBnd.endPos <= endPos ) 862 break; 863 nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 864 nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 865 866 if( nStt < nEnd && 867 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 868 { 869 aRet.subRegExpressions = 1; 870 aRet.startOffset.realloc( 1 ); 871 aRet.startOffset[ 0 ] = nEnd; 872 aRet.endOffset.realloc( 1 ); 873 aRet.endOffset[ 0 ] = nStt; 874 break; 875 } 876 if( !nStt ) 877 break; 878 879 aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 880 WordType::ANYWORD_IGNOREWHITESPACES); 881 } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 882 return aRet; 883 } 884 885 886 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 887 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 888 889 static OUString getServiceName_Static() 890 { 891 return OUString::createFromAscii( cSearchName ); 892 } 893 894 static OUString getImplementationName_Static() 895 { 896 return OUString::createFromAscii( cSearchImpl ); 897 } 898 899 OUString SAL_CALL 900 TextSearch::getImplementationName() 901 throw( RuntimeException ) 902 { 903 return getImplementationName_Static(); 904 } 905 906 sal_Bool SAL_CALL 907 TextSearch::supportsService(const OUString& rServiceName) 908 throw( RuntimeException ) 909 { 910 return !rServiceName.compareToAscii( cSearchName ); 911 } 912 913 Sequence< OUString > SAL_CALL 914 TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 915 { 916 Sequence< OUString > aRet(1); 917 aRet[0] = getServiceName_Static(); 918 return aRet; 919 } 920 921 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 922 SAL_CALL TextSearch_CreateInstance( 923 const ::com::sun::star::uno::Reference< 924 ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 925 { 926 return ::com::sun::star::uno::Reference< 927 ::com::sun::star::uno::XInterface >( 928 (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 929 } 930 931 extern "C" 932 { 933 934 void SAL_CALL component_getImplementationEnvironment( 935 const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 936 { 937 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 938 } 939 940 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 941 void* _pServiceManager, void* /*_pRegistryKey*/ ) 942 { 943 void* pRet = NULL; 944 945 ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 946 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 947 ( _pServiceManager ); 948 ::com::sun::star::uno::Reference< 949 ::com::sun::star::lang::XSingleServiceFactory > xFactory; 950 951 if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 952 { 953 ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 954 aServiceNames[0] = getServiceName_Static(); 955 xFactory = ::cppu::createSingleFactory( 956 pServiceManager, getImplementationName_Static(), 957 &TextSearch_CreateInstance, aServiceNames ); 958 } 959 960 if ( xFactory.is() ) 961 { 962 xFactory->acquire(); 963 pRet = xFactory.get(); 964 } 965 966 return pRet; 967 } 968 969 } // extern "C" 970