1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include "textsearch.hxx" 28 #include "levdis.hxx" 29 #include <com/sun/star/lang/Locale.hpp> 30 #include <com/sun/star/lang/XMultiServiceFactory.hpp> 31 #include <comphelper/processfactory.hxx> 32 #include <com/sun/star/i18n/UnicodeType.hpp> 33 #include <com/sun/star/util/SearchFlags.hpp> 34 #include <com/sun/star/i18n/WordType.hpp> 35 #include <com/sun/star/i18n/ScriptType.hpp> 36 #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 37 #include <com/sun/star/i18n/KCharacterType.hpp> 38 #include <com/sun/star/registry/XRegistryKey.hpp> 39 #include <cppuhelper/factory.hxx> 40 #include <cppuhelper/weak.hxx> 41 42 #ifdef _MSC_VER 43 // get rid of that dumb compiler warning 44 // identifier was truncated to '255' characters in the debug information 45 // for STL template usage, if .pdb files are to be created 46 #pragma warning( disable: 4786 ) 47 #endif 48 49 #include <string.h> 50 51 using namespace ::com::sun::star::util; 52 using namespace ::com::sun::star::uno; 53 using namespace ::com::sun::star::lang; 54 using namespace ::com::sun::star::i18n; 55 using namespace ::rtl; 56 57 static sal_Int32 COMPLEX_TRANS_MASK_TMP = 58 TransliterationModules_ignoreBaFa_ja_JP | 59 TransliterationModules_ignoreIterationMark_ja_JP | 60 TransliterationModules_ignoreTiJi_ja_JP | 61 TransliterationModules_ignoreHyuByu_ja_JP | 62 TransliterationModules_ignoreSeZe_ja_JP | 63 TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 64 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 65 TransliterationModules_ignoreProlongedSoundMark_ja_JP; 66 static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH; 67 static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH; 68 // Above 2 transliteration is simple but need to take effect in 69 // complex transliteration 70 71 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 72 : xMSF( rxMSF ) 73 , pJumpTable( 0 ) 74 , pJumpTable2( 0 ) 75 , pRegexMatcher( NULL ) 76 , pWLD( 0 ) 77 { 78 SearchOptions aOpt; 79 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 80 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 81 //aOpt.Locale = ???; 82 setOptions( aOpt ); 83 } 84 85 TextSearch::~TextSearch() 86 { 87 delete pRegexMatcher; 88 delete pWLD; 89 delete pJumpTable; 90 delete pJumpTable2; 91 } 92 93 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 94 { 95 aSrchPara = rOptions; 96 97 delete pRegexMatcher, pRegexMatcher = NULL; 98 delete pWLD, pWLD = 0; 99 delete pJumpTable, pJumpTable = 0; 100 delete pJumpTable2, pJumpTable2 = 0; 101 102 // Create Transliteration class 103 if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 104 { 105 if( !xTranslit.is() ) 106 { 107 Reference < XInterface > xI = xMSF->createInstance( 108 OUString::createFromAscii( 109 "com.sun.star.i18n.Transliteration")); 110 if ( xI.is() ) 111 xI->queryInterface( ::getCppuType( 112 (const Reference< XExtendedTransliteration >*)0)) 113 >>= xTranslit; 114 } 115 // Load transliteration module 116 if( xTranslit.is() ) 117 xTranslit->loadModule( 118 (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 119 aSrchPara.Locale); 120 } 121 else if( xTranslit.is() ) 122 xTranslit = 0; 123 124 // Create Transliteration for 2<->1, 2<->2 transliteration 125 if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 126 { 127 if( !xTranslit2.is() ) 128 { 129 Reference < XInterface > xI = xMSF->createInstance( 130 OUString::createFromAscii( 131 "com.sun.star.i18n.Transliteration")); 132 if ( xI.is() ) 133 xI->queryInterface( ::getCppuType( 134 (const Reference< XExtendedTransliteration >*)0)) 135 >>= xTranslit2; 136 } 137 // Load transliteration module 138 if( xTranslit2.is() ) 139 xTranslit2->loadModule( 140 (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 141 aSrchPara.Locale); 142 } 143 144 if ( !xBreak.is() ) 145 { 146 Reference < XInterface > xI = xMSF->createInstance( 147 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 148 if( xI.is() ) 149 xI->queryInterface( ::getCppuType( 150 (const Reference< XBreakIterator >*)0)) 151 >>= xBreak; 152 } 153 154 sSrchStr = aSrchPara.searchString; 155 156 // use transliteration here 157 if ( xTranslit.is() && 158 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 159 sSrchStr = xTranslit->transliterateString2String( 160 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 161 162 if ( xTranslit2.is() && 163 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 164 sSrchStr2 = xTranslit2->transliterateString2String( 165 aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 166 167 // When start or end of search string is a complex script type, we need to 168 // make sure the result boundary is not located in the middle of cell. 169 checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 170 ScriptType::COMPLEX)); 171 checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 172 sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 173 174 switch( aSrchPara.algorithmType) 175 { 176 case SearchAlgorithms_REGEXP: 177 fnForward = &TextSearch::RESrchFrwrd; 178 fnBackward = &TextSearch::RESrchBkwrd; 179 180 { 181 sal_uInt32 nIcuSearchFlags = 0; 182 // map com::sun::star::util::SearchFlags to ICU uregex.h flags 183 // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE 184 // REG_NEWLINE is neither defined properly nor used anywhere => not implemented 185 // REG_NOSUB is not used anywhere => not implemented 186 // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute 187 // LEV_RELAXED is only used for SearchAlgorithm==Approximate 188 // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it??? 189 if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0) 190 nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE; 191 UErrorCode nIcuErr = U_ZERO_ERROR; 192 // assumption: transliteration doesn't mangle regexp control chars 193 OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr 194 : ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString); 195 const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength()); 196 pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); 197 if( nIcuErr) 198 { delete pRegexMatcher; pRegexMatcher = NULL;} 199 } break; 200 201 case SearchAlgorithms_APPROXIMATE: 202 fnForward = &TextSearch::ApproxSrchFrwrd; 203 fnBackward = &TextSearch::ApproxSrchBkwrd; 204 205 pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 206 aSrchPara.insertedChars, aSrchPara.deletedChars, 207 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 208 209 nLimit = pWLD->GetLimit(); 210 break; 211 212 default: 213 fnForward = &TextSearch::NSrchFrwrd; 214 fnBackward = &TextSearch::NSrchBkwrd; 215 break; 216 } 217 } 218 219 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 220 { 221 sal_Int32 nRet = 0, nEnd = rOff.getLength(); 222 while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 223 return nRet; 224 } 225 226 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 227 throw( RuntimeException ) 228 { 229 sal_Int32 nDone; 230 return nPos == xBreak->previousCharacters(searchStr, nPos+1, 231 aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 232 } 233 234 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 235 throw( RuntimeException ) 236 { 237 SearchResult sres; 238 239 OUString in_str(searchStr); 240 sal_Int32 newStartPos = startPos; 241 sal_Int32 newEndPos = endPos; 242 243 bUsePrimarySrchStr = true; 244 245 if ( xTranslit.is() ) 246 { 247 // apply normal transliteration (1<->1, 1<->0) 248 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 249 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 250 251 // JP 20.6.2001: also the start and end positions must be corrected! 252 if( startPos ) 253 newStartPos = FindPosInSeq_Impl( offset, startPos ); 254 255 if( endPos < searchStr.getLength() ) 256 newEndPos = FindPosInSeq_Impl( offset, endPos ); 257 else 258 newEndPos = in_str.getLength(); 259 260 sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 261 262 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 263 { 264 if (sres.startOffset[k]) 265 sres.startOffset[k] = offset[sres.startOffset[k]]; 266 // JP 20.6.2001: end is ever exclusive and then don't return 267 // the position of the next character - return the 268 // next position behind the last found character! 269 // "a b c" find "b" must return 2,3 and not 2,4!!! 270 if (sres.endOffset[k]) 271 sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 272 } 273 } 274 else 275 { 276 sres = (this->*fnForward)( in_str, startPos, endPos ); 277 } 278 279 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 280 { 281 SearchResult sres2; 282 283 in_str = OUString(searchStr); 284 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 285 286 in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 287 288 if( startPos ) 289 startPos = FindPosInSeq_Impl( offset, startPos ); 290 291 if( endPos < searchStr.getLength() ) 292 endPos = FindPosInSeq_Impl( offset, endPos ); 293 else 294 endPos = in_str.getLength(); 295 296 bUsePrimarySrchStr = false; 297 sres2 = (this->*fnForward)( in_str, startPos, endPos ); 298 299 for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 300 { 301 if (sres2.startOffset[k]) 302 sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 303 if (sres2.endOffset[k]) 304 sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 305 } 306 307 // pick first and long one 308 if ( sres.subRegExpressions == 0) 309 return sres2; 310 if ( sres2.subRegExpressions == 1) 311 { 312 if ( sres.startOffset[0] > sres2.startOffset[0]) 313 return sres2; 314 else if ( sres.startOffset[0] == sres2.startOffset[0] && 315 sres.endOffset[0] < sres2.endOffset[0]) 316 return sres2; 317 } 318 } 319 320 return sres; 321 } 322 323 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 324 throw(RuntimeException) 325 { 326 SearchResult sres; 327 328 OUString in_str(searchStr); 329 sal_Int32 newStartPos = startPos; 330 sal_Int32 newEndPos = endPos; 331 332 bUsePrimarySrchStr = true; 333 334 if ( xTranslit.is() ) 335 { 336 // apply only simple 1<->1 transliteration here 337 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 338 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 339 340 // JP 20.6.2001: also the start and end positions must be corrected! 341 if( startPos < searchStr.getLength() ) 342 newStartPos = FindPosInSeq_Impl( offset, startPos ); 343 else 344 newStartPos = in_str.getLength(); 345 346 if( endPos ) 347 newEndPos = FindPosInSeq_Impl( offset, endPos ); 348 349 sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 350 351 for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 352 { 353 if (sres.startOffset[k]) 354 sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 355 // JP 20.6.2001: end is ever exclusive and then don't return 356 // the position of the next character - return the 357 // next position behind the last found character! 358 // "a b c" find "b" must return 2,3 and not 2,4!!! 359 if (sres.endOffset[k]) 360 sres.endOffset[k] = offset[sres.endOffset[k]]; 361 } 362 } 363 else 364 { 365 sres = (this->*fnBackward)( in_str, startPos, endPos ); 366 } 367 368 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 369 { 370 SearchResult sres2; 371 372 in_str = OUString(searchStr); 373 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 374 375 in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 376 377 if( startPos < searchStr.getLength() ) 378 startPos = FindPosInSeq_Impl( offset, startPos ); 379 else 380 startPos = in_str.getLength(); 381 382 if( endPos ) 383 endPos = FindPosInSeq_Impl( offset, endPos ); 384 385 bUsePrimarySrchStr = false; 386 sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 387 388 for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 389 { 390 if (sres2.startOffset[k]) 391 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 392 if (sres2.endOffset[k]) 393 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 394 } 395 396 // pick last and long one 397 if ( sres.subRegExpressions == 0 ) 398 return sres2; 399 if ( sres2.subRegExpressions == 1 ) 400 { 401 if ( sres.startOffset[0] < sres2.startOffset[0] ) 402 return sres2; 403 if ( sres.startOffset[0] == sres2.startOffset[0] && 404 sres.endOffset[0] > sres2.endOffset[0] ) 405 return sres2; 406 } 407 } 408 409 return sres; 410 } 411 412 //--------------------------------------------------------------------- 413 414 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 415 { 416 bool bRet = 1; 417 if( '\x7f' != rStr[nPos]) 418 { 419 if ( !xCharClass.is() ) 420 { 421 Reference < XInterface > xI = xMSF->createInstance( 422 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 423 if( xI.is() ) 424 xI->queryInterface( ::getCppuType( 425 (const Reference< XCharacterClassification >*)0)) 426 >>= xCharClass; 427 } 428 if ( xCharClass.is() ) 429 { 430 sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 431 aSrchPara.Locale ); 432 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 433 KCharacterType::LETTER ) & nCType ) ) 434 bRet = 0; 435 } 436 } 437 return bRet; 438 } 439 440 // --------- helper methods for Boyer-Moore like text searching ---------- 441 // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available 442 443 void TextSearch::MakeForwardTab() 444 { 445 // create the jumptable for the search text 446 if( pJumpTable ) 447 { 448 if( bIsForwardTab ) 449 return ; // the jumpTable is ok 450 delete pJumpTable; 451 } 452 bIsForwardTab = true; 453 454 sal_Int32 n, nLen = sSrchStr.getLength(); 455 pJumpTable = new TextSearchJumpTable; 456 457 for( n = 0; n < nLen - 1; ++n ) 458 { 459 sal_Unicode cCh = sSrchStr[n]; 460 sal_Int32 nDiff = nLen - n - 1; 461 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 462 463 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 464 pJumpTable->insert( aEntry ); 465 if ( !aPair.second ) 466 (*(aPair.first)).second = nDiff; 467 } 468 } 469 470 void TextSearch::MakeForwardTab2() 471 { 472 // create the jumptable for the search text 473 if( pJumpTable2 ) 474 { 475 if( bIsForwardTab ) 476 return ; // the jumpTable is ok 477 delete pJumpTable2; 478 } 479 bIsForwardTab = true; 480 481 sal_Int32 n, nLen = sSrchStr2.getLength(); 482 pJumpTable2 = new TextSearchJumpTable; 483 484 for( n = 0; n < nLen - 1; ++n ) 485 { 486 sal_Unicode cCh = sSrchStr2[n]; 487 sal_Int32 nDiff = nLen - n - 1; 488 489 TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 490 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 491 pJumpTable2->insert( aEntry ); 492 if ( !aPair.second ) 493 (*(aPair.first)).second = nDiff; 494 } 495 } 496 497 void TextSearch::MakeBackwardTab() 498 { 499 // create the jumptable for the search text 500 if( pJumpTable ) 501 { 502 if( !bIsForwardTab ) 503 return ; // the jumpTable is ok 504 delete pJumpTable; 505 } 506 bIsForwardTab = false; 507 508 sal_Int32 n, nLen = sSrchStr.getLength(); 509 pJumpTable = new TextSearchJumpTable; 510 511 for( n = nLen-1; n > 0; --n ) 512 { 513 sal_Unicode cCh = sSrchStr[n]; 514 TextSearchJumpTable::value_type aEntry( cCh, n ); 515 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 516 pJumpTable->insert( aEntry ); 517 if ( !aPair.second ) 518 (*(aPair.first)).second = n; 519 } 520 } 521 522 void TextSearch::MakeBackwardTab2() 523 { 524 // create the jumptable for the search text 525 if( pJumpTable2 ) 526 { 527 if( !bIsForwardTab ) 528 return ; // the jumpTable is ok 529 delete pJumpTable2; 530 } 531 bIsForwardTab = false; 532 533 sal_Int32 n, nLen = sSrchStr2.getLength(); 534 pJumpTable2 = new TextSearchJumpTable; 535 536 for( n = nLen-1; n > 0; --n ) 537 { 538 sal_Unicode cCh = sSrchStr2[n]; 539 TextSearchJumpTable::value_type aEntry( cCh, n ); 540 ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 541 pJumpTable2->insert( aEntry ); 542 if ( !aPair.second ) 543 (*(aPair.first)).second = n; 544 } 545 } 546 547 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 548 { 549 TextSearchJumpTable *pJump; 550 OUString sSearchKey; 551 552 if ( bUsePrimarySrchStr ) { 553 pJump = pJumpTable; 554 sSearchKey = sSrchStr; 555 } else { 556 pJump = pJumpTable2; 557 sSearchKey = sSrchStr2; 558 } 559 560 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 561 if ( iLook == pJump->end() ) 562 return sSearchKey.getLength(); 563 return (*iLook).second; 564 } 565 566 567 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 568 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 569 throw(RuntimeException) 570 { 571 SearchResult aRet; 572 aRet.subRegExpressions = 0; 573 574 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 575 576 OUString aStr( searchStr ); 577 sal_Int32 nSuchIdx = aStr.getLength(); 578 sal_Int32 nEnde = endPos; 579 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 580 return aRet; 581 582 583 if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 584 return aRet; 585 586 nEnde -= sSearchKey.getLength(); 587 588 if (bUsePrimarySrchStr) 589 MakeForwardTab(); // create the jumptable 590 else 591 MakeForwardTab2(); 592 593 for (sal_Int32 nCmpIdx = startPos; // start position for the search 594 nCmpIdx <= nEnde; 595 nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 596 { 597 // if the match would be the completed cells, skip it. 598 if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 599 && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 600 continue; 601 602 nSuchIdx = sSearchKey.getLength() - 1; 603 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 604 { 605 if( nSuchIdx == 0 ) 606 { 607 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 608 { 609 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 610 bool bAtStart = !nCmpIdx; 611 bool bAtEnd = nFndEnd == endPos; 612 bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 613 bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 614 // * 1 -> only one word in the paragraph 615 // * 2 -> at begin of paragraph 616 // * 3 -> at end of paragraph 617 // * 4 -> inside the paragraph 618 if( !( ( bAtStart && bAtEnd ) || // 1 619 ( bAtStart && bDelimBehind ) || // 2 620 ( bAtEnd && bDelimBefore ) || // 3 621 ( bDelimBefore && bDelimBehind ))) // 4 622 break; 623 } 624 625 aRet.subRegExpressions = 1; 626 aRet.startOffset.realloc( 1 ); 627 aRet.startOffset[ 0 ] = nCmpIdx; 628 aRet.endOffset.realloc( 1 ); 629 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 630 631 return aRet; 632 } 633 else 634 nSuchIdx--; 635 } 636 } 637 return aRet; 638 } 639 640 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 641 throw(RuntimeException) 642 { 643 SearchResult aRet; 644 aRet.subRegExpressions = 0; 645 646 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 647 648 OUString aStr( searchStr ); 649 sal_Int32 nSuchIdx = aStr.getLength(); 650 sal_Int32 nEnde = endPos; 651 if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 652 return aRet; 653 654 if (bUsePrimarySrchStr) 655 MakeBackwardTab(); // create the jumptable 656 else 657 MakeBackwardTab2(); 658 659 if( nEnde == nSuchIdx ) // end position for the search 660 nEnde = sSearchKey.getLength(); 661 else 662 nEnde += sSearchKey.getLength(); 663 664 sal_Int32 nCmpIdx = startPos; // start position for the search 665 666 while (nCmpIdx >= nEnde) 667 { 668 // if the match would be the completed cells, skip it. 669 if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 670 sSearchKey.getLength() )) && (!checkCTLEnd || 671 isCellStart( aStr, nCmpIdx))) 672 { 673 nSuchIdx = 0; 674 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 675 aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 676 nSuchIdx++; 677 if( nSuchIdx >= sSearchKey.getLength() ) 678 { 679 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 680 { 681 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 682 bool bAtStart = !nFndStt; 683 bool bAtEnd = nCmpIdx == startPos; 684 bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 685 bool bDelimBefore = bAtStart || // begin of paragraph 686 IsDelimiter( aStr, nFndStt-1 ); 687 // * 1 -> only one word in the paragraph 688 // * 2 -> at begin of paragraph 689 // * 3 -> at end of paragraph 690 // * 4 -> inside the paragraph 691 if( ( bAtStart && bAtEnd ) || // 1 692 ( bAtStart && bDelimBehind ) || // 2 693 ( bAtEnd && bDelimBefore ) || // 3 694 ( bDelimBefore && bDelimBehind )) // 4 695 { 696 aRet.subRegExpressions = 1; 697 aRet.startOffset.realloc( 1 ); 698 aRet.startOffset[ 0 ] = nCmpIdx; 699 aRet.endOffset.realloc( 1 ); 700 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 701 return aRet; 702 } 703 } 704 else 705 { 706 aRet.subRegExpressions = 1; 707 aRet.startOffset.realloc( 1 ); 708 aRet.startOffset[ 0 ] = nCmpIdx; 709 aRet.endOffset.realloc( 1 ); 710 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 711 return aRet; 712 } 713 } 714 } 715 nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 716 if( nCmpIdx < nSuchIdx ) 717 return aRet; 718 nCmpIdx -= nSuchIdx; 719 } 720 return aRet; 721 } 722 723 //--------------------------------------------------------------------------- 724 725 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 726 sal_Int32 startPos, sal_Int32 endPos ) 727 throw(RuntimeException) 728 { 729 SearchResult aRet; 730 aRet.subRegExpressions = 0; 731 if( !pRegexMatcher) 732 return aRet; 733 734 if( endPos > searchStr.getLength()) 735 endPos = searchStr.getLength(); 736 737 // use the ICU RegexMatcher to find the matches 738 UErrorCode nIcuErr = U_ZERO_ERROR; 739 const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos); 740 pRegexMatcher->reset( aSearchTargetStr); 741 if( !pRegexMatcher->find( startPos, nIcuErr)) 742 return aRet; 743 744 const int nGroupCount = pRegexMatcher->groupCount(); 745 aRet.subRegExpressions = nGroupCount + 1; 746 aRet.startOffset.realloc( aRet.subRegExpressions); 747 aRet.endOffset.realloc( aRet.subRegExpressions); 748 aRet.startOffset[0] = pRegexMatcher->start( nIcuErr); 749 aRet.endOffset[0] = pRegexMatcher->end( nIcuErr); 750 for( int i = 1; i <= nGroupCount; ++i) { 751 aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr); 752 aRet.endOffset[i] = pRegexMatcher->end( i, nIcuErr); 753 } 754 755 return aRet; 756 } 757 758 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 759 sal_Int32 startPos, sal_Int32 endPos ) 760 throw(RuntimeException) 761 { 762 // NOTE: for backwards search callers provide startPos/endPos inverted! 763 SearchResult aRet; 764 aRet.subRegExpressions = 0; 765 if( !pRegexMatcher) 766 return aRet; 767 768 if( startPos > searchStr.getLength()) 769 startPos = searchStr.getLength(); 770 771 // use the ICU RegexMatcher to find the matches 772 // TODO: use ICU's backward searching once it becomes available 773 // as its replacement using forward search is not as good as the real thing 774 UErrorCode nIcuErr = U_ZERO_ERROR; 775 const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos); 776 pRegexMatcher->reset( aSearchTargetStr); 777 if( !pRegexMatcher->find( endPos, nIcuErr)) 778 return aRet; 779 780 // find the last match 781 int nLastPos = 0; 782 do { 783 nLastPos = pRegexMatcher->start( nIcuErr); 784 } while( pRegexMatcher->find( nLastPos + 1, nIcuErr)); 785 786 // find last match again to get its details 787 pRegexMatcher->find( nLastPos, nIcuErr); 788 789 // fill in the details of the last match 790 const int nGroupCount = pRegexMatcher->groupCount(); 791 aRet.subRegExpressions = nGroupCount + 1; 792 aRet.startOffset.realloc( aRet.subRegExpressions); 793 aRet.endOffset.realloc( aRet.subRegExpressions); 794 // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted! 795 aRet.startOffset[0] = pRegexMatcher->end( nIcuErr); 796 aRet.endOffset[0] = pRegexMatcher->start( nIcuErr); 797 for( int i = 1; i <= nGroupCount; ++i) { 798 aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr); 799 aRet.endOffset[i] = pRegexMatcher->start( i, nIcuErr); 800 } 801 802 return aRet; 803 } 804 805 //--------------------------------------------------------------------------- 806 807 // search for words phonetically 808 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 809 sal_Int32 startPos, sal_Int32 endPos ) 810 throw(RuntimeException) 811 { 812 SearchResult aRet; 813 aRet.subRegExpressions = 0; 814 815 if( !xBreak.is() ) 816 return aRet; 817 818 OUString aWTemp( searchStr ); 819 820 register sal_Int32 nStt, nEnd; 821 822 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 823 aSrchPara.Locale, 824 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 825 826 do 827 { 828 if( aWBnd.startPos >= endPos ) 829 break; 830 nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 831 nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 832 833 if( nStt < nEnd && 834 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 835 { 836 aRet.subRegExpressions = 1; 837 aRet.startOffset.realloc( 1 ); 838 aRet.startOffset[ 0 ] = nStt; 839 aRet.endOffset.realloc( 1 ); 840 aRet.endOffset[ 0 ] = nEnd; 841 break; 842 } 843 844 nStt = nEnd - 1; 845 aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 846 WordType::ANYWORD_IGNOREWHITESPACES); 847 } while( aWBnd.startPos != aWBnd.endPos || 848 (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 849 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 850 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 851 // and nextWord() does also => don't loop forever. 852 return aRet; 853 } 854 855 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 856 sal_Int32 startPos, sal_Int32 endPos ) 857 throw(RuntimeException) 858 { 859 SearchResult aRet; 860 aRet.subRegExpressions = 0; 861 862 if( !xBreak.is() ) 863 return aRet; 864 865 OUString aWTemp( searchStr ); 866 867 register sal_Int32 nStt, nEnd; 868 869 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 870 aSrchPara.Locale, 871 WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 872 873 do 874 { 875 if( aWBnd.endPos <= endPos ) 876 break; 877 nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 878 nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 879 880 if( nStt < nEnd && 881 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 882 { 883 aRet.subRegExpressions = 1; 884 aRet.startOffset.realloc( 1 ); 885 aRet.startOffset[ 0 ] = nEnd; 886 aRet.endOffset.realloc( 1 ); 887 aRet.endOffset[ 0 ] = nStt; 888 break; 889 } 890 if( !nStt ) 891 break; 892 893 aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 894 WordType::ANYWORD_IGNOREWHITESPACES); 895 } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 896 return aRet; 897 } 898 899 900 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 901 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 902 903 static OUString getServiceName_Static() 904 { 905 return OUString::createFromAscii( cSearchName ); 906 } 907 908 static OUString getImplementationName_Static() 909 { 910 return OUString::createFromAscii( cSearchImpl ); 911 } 912 913 OUString SAL_CALL 914 TextSearch::getImplementationName() 915 throw( RuntimeException ) 916 { 917 return getImplementationName_Static(); 918 } 919 920 sal_Bool SAL_CALL 921 TextSearch::supportsService(const OUString& rServiceName) 922 throw( RuntimeException ) 923 { 924 return !rServiceName.compareToAscii( cSearchName ); 925 } 926 927 Sequence< OUString > SAL_CALL 928 TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 929 { 930 Sequence< OUString > aRet(1); 931 aRet[0] = getServiceName_Static(); 932 return aRet; 933 } 934 935 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 936 SAL_CALL TextSearch_CreateInstance( 937 const ::com::sun::star::uno::Reference< 938 ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 939 { 940 return ::com::sun::star::uno::Reference< 941 ::com::sun::star::uno::XInterface >( 942 (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 943 } 944 945 extern "C" 946 { 947 948 void SAL_CALL component_getImplementationEnvironment( 949 const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 950 { 951 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 952 } 953 954 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 955 void* _pServiceManager, void* /*_pRegistryKey*/ ) 956 { 957 void* pRet = NULL; 958 959 ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 960 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 961 ( _pServiceManager ); 962 ::com::sun::star::uno::Reference< 963 ::com::sun::star::lang::XSingleServiceFactory > xFactory; 964 965 if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 966 { 967 ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 968 aServiceNames[0] = getServiceName_Static(); 969 xFactory = ::cppu::createSingleFactory( 970 pServiceManager, getImplementationName_Static(), 971 &TextSearch_CreateInstance, aServiceNames ); 972 } 973 974 if ( xFactory.is() ) 975 { 976 xFactory->acquire(); 977 pRet = xFactory.get(); 978 } 979 980 return pRet; 981 } 982 983 } // extern "C" 984