1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include "textsearch.hxx" 32*cdf0e10cSrcweir #include "levdis.hxx" 33*cdf0e10cSrcweir #include <regexp/reclass.hxx> 34*cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp> 35*cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp> 36*cdf0e10cSrcweir #include <comphelper/processfactory.hxx> 37*cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 38*cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp> 39*cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp> 40*cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp> 41*cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 42*cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 43*cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp> 44*cdf0e10cSrcweir #include <cppuhelper/factory.hxx> 45*cdf0e10cSrcweir #include <cppuhelper/weak.hxx> 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir #ifdef _MSC_VER 48*cdf0e10cSrcweir // get rid of that dumb compiler warning 49*cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information 50*cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created 51*cdf0e10cSrcweir #pragma warning( disable: 4786 ) 52*cdf0e10cSrcweir #endif 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir #include <string.h> 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir using namespace ::com::sun::star::util; 57*cdf0e10cSrcweir using namespace ::com::sun::star::uno; 58*cdf0e10cSrcweir using namespace ::com::sun::star::lang; 59*cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 60*cdf0e10cSrcweir using namespace ::rtl; 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP = 63*cdf0e10cSrcweir TransliterationModules_ignoreBaFa_ja_JP | 64*cdf0e10cSrcweir TransliterationModules_ignoreIterationMark_ja_JP | 65*cdf0e10cSrcweir TransliterationModules_ignoreTiJi_ja_JP | 66*cdf0e10cSrcweir TransliterationModules_ignoreHyuByu_ja_JP | 67*cdf0e10cSrcweir TransliterationModules_ignoreSeZe_ja_JP | 68*cdf0e10cSrcweir TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 69*cdf0e10cSrcweir TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 70*cdf0e10cSrcweir TransliterationModules_ignoreProlongedSoundMark_ja_JP; 71*cdf0e10cSrcweir static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP; 72*cdf0e10cSrcweir static const sal_Int32 COMPLEX_TRANS_MASK = 73*cdf0e10cSrcweir COMPLEX_TRANS_MASK_TMP | 74*cdf0e10cSrcweir TransliterationModules_IGNORE_KANA | 75*cdf0e10cSrcweir TransliterationModules_IGNORE_WIDTH; 76*cdf0e10cSrcweir // Above 2 transliteration is simple but need to take effect in 77*cdf0e10cSrcweir // complex transliteration 78*cdf0e10cSrcweir 79*cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 80*cdf0e10cSrcweir : xMSF( rxMSF ) 81*cdf0e10cSrcweir , pJumpTable( 0 ) 82*cdf0e10cSrcweir , pJumpTable2( 0 ) 83*cdf0e10cSrcweir , pRegExp( 0 ) 84*cdf0e10cSrcweir , pWLD( 0 ) 85*cdf0e10cSrcweir { 86*cdf0e10cSrcweir SearchOptions aOpt; 87*cdf0e10cSrcweir aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 88*cdf0e10cSrcweir aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 89*cdf0e10cSrcweir //aOpt.Locale = ???; 90*cdf0e10cSrcweir setOptions( aOpt ); 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir 93*cdf0e10cSrcweir TextSearch::~TextSearch() 94*cdf0e10cSrcweir { 95*cdf0e10cSrcweir delete pRegExp; 96*cdf0e10cSrcweir delete pWLD; 97*cdf0e10cSrcweir delete pJumpTable; 98*cdf0e10cSrcweir delete pJumpTable2; 99*cdf0e10cSrcweir } 100*cdf0e10cSrcweir 101*cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 102*cdf0e10cSrcweir { 103*cdf0e10cSrcweir aSrchPara = rOptions; 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir delete pRegExp, pRegExp = 0; 106*cdf0e10cSrcweir delete pWLD, pWLD = 0; 107*cdf0e10cSrcweir delete pJumpTable, pJumpTable = 0; 108*cdf0e10cSrcweir delete pJumpTable2, pJumpTable2 = 0; 109*cdf0e10cSrcweir 110*cdf0e10cSrcweir // Create Transliteration class 111*cdf0e10cSrcweir if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 112*cdf0e10cSrcweir { 113*cdf0e10cSrcweir if( !xTranslit.is() ) 114*cdf0e10cSrcweir { 115*cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 116*cdf0e10cSrcweir OUString::createFromAscii( 117*cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 118*cdf0e10cSrcweir if ( xI.is() ) 119*cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 120*cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 121*cdf0e10cSrcweir >>= xTranslit; 122*cdf0e10cSrcweir } 123*cdf0e10cSrcweir // Load transliteration module 124*cdf0e10cSrcweir if( xTranslit.is() ) 125*cdf0e10cSrcweir xTranslit->loadModule( 126*cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 127*cdf0e10cSrcweir aSrchPara.Locale); 128*cdf0e10cSrcweir } 129*cdf0e10cSrcweir else if( xTranslit.is() ) 130*cdf0e10cSrcweir xTranslit = 0; 131*cdf0e10cSrcweir 132*cdf0e10cSrcweir // Create Transliteration for 2<->1, 2<->2 transliteration 133*cdf0e10cSrcweir if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 134*cdf0e10cSrcweir { 135*cdf0e10cSrcweir if( !xTranslit2.is() ) 136*cdf0e10cSrcweir { 137*cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 138*cdf0e10cSrcweir OUString::createFromAscii( 139*cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 140*cdf0e10cSrcweir if ( xI.is() ) 141*cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 142*cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 143*cdf0e10cSrcweir >>= xTranslit2; 144*cdf0e10cSrcweir } 145*cdf0e10cSrcweir // Load transliteration module 146*cdf0e10cSrcweir if( xTranslit2.is() ) 147*cdf0e10cSrcweir xTranslit2->loadModule( 148*cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 149*cdf0e10cSrcweir aSrchPara.Locale); 150*cdf0e10cSrcweir } 151*cdf0e10cSrcweir 152*cdf0e10cSrcweir if ( !xBreak.is() ) 153*cdf0e10cSrcweir { 154*cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 155*cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 156*cdf0e10cSrcweir if( xI.is() ) 157*cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 158*cdf0e10cSrcweir (const Reference< XBreakIterator >*)0)) 159*cdf0e10cSrcweir >>= xBreak; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir 162*cdf0e10cSrcweir sSrchStr = aSrchPara.searchString; 163*cdf0e10cSrcweir 164*cdf0e10cSrcweir // use transliteration here, but only if not RegEx, which does it different 165*cdf0e10cSrcweir if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() && 166*cdf0e10cSrcweir aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 167*cdf0e10cSrcweir sSrchStr = xTranslit->transliterateString2String( 168*cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() && 171*cdf0e10cSrcweir aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 172*cdf0e10cSrcweir sSrchStr2 = xTranslit2->transliterateString2String( 173*cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 174*cdf0e10cSrcweir 175*cdf0e10cSrcweir // When start or end of search string is a complex script type, we need to 176*cdf0e10cSrcweir // make sure the result boundary is not located in the middle of cell. 177*cdf0e10cSrcweir checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 178*cdf0e10cSrcweir ScriptType::COMPLEX)); 179*cdf0e10cSrcweir checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 180*cdf0e10cSrcweir sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 181*cdf0e10cSrcweir 182*cdf0e10cSrcweir if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP ) 183*cdf0e10cSrcweir { 184*cdf0e10cSrcweir fnForward = &TextSearch::RESrchFrwrd; 185*cdf0e10cSrcweir fnBackward = &TextSearch::RESrchBkwrd; 186*cdf0e10cSrcweir 187*cdf0e10cSrcweir pRegExp = new Regexpr( aSrchPara, xTranslit ); 188*cdf0e10cSrcweir } 189*cdf0e10cSrcweir else 190*cdf0e10cSrcweir { 191*cdf0e10cSrcweir if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE ) 192*cdf0e10cSrcweir { 193*cdf0e10cSrcweir fnForward = &TextSearch::ApproxSrchFrwrd; 194*cdf0e10cSrcweir fnBackward = &TextSearch::ApproxSrchBkwrd; 195*cdf0e10cSrcweir 196*cdf0e10cSrcweir pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 197*cdf0e10cSrcweir aSrchPara.insertedChars, aSrchPara.deletedChars, 198*cdf0e10cSrcweir 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 199*cdf0e10cSrcweir 200*cdf0e10cSrcweir nLimit = pWLD->GetLimit(); 201*cdf0e10cSrcweir } 202*cdf0e10cSrcweir else 203*cdf0e10cSrcweir { 204*cdf0e10cSrcweir fnForward = &TextSearch::NSrchFrwrd; 205*cdf0e10cSrcweir fnBackward = &TextSearch::NSrchBkwrd; 206*cdf0e10cSrcweir } 207*cdf0e10cSrcweir } 208*cdf0e10cSrcweir } 209*cdf0e10cSrcweir 210*cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 211*cdf0e10cSrcweir { 212*cdf0e10cSrcweir sal_Int32 nRet = 0, nEnd = rOff.getLength(); 213*cdf0e10cSrcweir while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 214*cdf0e10cSrcweir return nRet; 215*cdf0e10cSrcweir } 216*cdf0e10cSrcweir 217*cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 218*cdf0e10cSrcweir throw( RuntimeException ) 219*cdf0e10cSrcweir { 220*cdf0e10cSrcweir sal_Int32 nDone; 221*cdf0e10cSrcweir return nPos == xBreak->previousCharacters(searchStr, nPos+1, 222*cdf0e10cSrcweir aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 223*cdf0e10cSrcweir } 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 226*cdf0e10cSrcweir throw( RuntimeException ) 227*cdf0e10cSrcweir { 228*cdf0e10cSrcweir SearchResult sres; 229*cdf0e10cSrcweir 230*cdf0e10cSrcweir OUString in_str(searchStr); 231*cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 232*cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 233*cdf0e10cSrcweir 234*cdf0e10cSrcweir bUsePrimarySrchStr = true; 235*cdf0e10cSrcweir 236*cdf0e10cSrcweir if ( xTranslit.is() ) 237*cdf0e10cSrcweir { 238*cdf0e10cSrcweir // apply normal transliteration (1<->1, 1<->0) 239*cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 240*cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 241*cdf0e10cSrcweir 242*cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 243*cdf0e10cSrcweir if( startPos ) 244*cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 247*cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 248*cdf0e10cSrcweir else 249*cdf0e10cSrcweir newEndPos = in_str.getLength(); 250*cdf0e10cSrcweir 251*cdf0e10cSrcweir sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 252*cdf0e10cSrcweir 253*cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 254*cdf0e10cSrcweir { 255*cdf0e10cSrcweir if (sres.startOffset[k]) 256*cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k]]; 257*cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 258*cdf0e10cSrcweir // the position of the next character - return the 259*cdf0e10cSrcweir // next position behind the last found character! 260*cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 261*cdf0e10cSrcweir if (sres.endOffset[k]) 262*cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 263*cdf0e10cSrcweir } 264*cdf0e10cSrcweir } 265*cdf0e10cSrcweir else 266*cdf0e10cSrcweir { 267*cdf0e10cSrcweir sres = (this->*fnForward)( in_str, startPos, endPos ); 268*cdf0e10cSrcweir } 269*cdf0e10cSrcweir 270*cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 271*cdf0e10cSrcweir { 272*cdf0e10cSrcweir SearchResult sres2; 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir in_str = OUString(searchStr); 275*cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 276*cdf0e10cSrcweir 277*cdf0e10cSrcweir in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 278*cdf0e10cSrcweir 279*cdf0e10cSrcweir if( startPos ) 280*cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 281*cdf0e10cSrcweir 282*cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 283*cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 284*cdf0e10cSrcweir else 285*cdf0e10cSrcweir endPos = in_str.getLength(); 286*cdf0e10cSrcweir 287*cdf0e10cSrcweir bUsePrimarySrchStr = false; 288*cdf0e10cSrcweir sres2 = (this->*fnForward)( in_str, startPos, endPos ); 289*cdf0e10cSrcweir 290*cdf0e10cSrcweir for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 291*cdf0e10cSrcweir { 292*cdf0e10cSrcweir if (sres2.startOffset[k]) 293*cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 294*cdf0e10cSrcweir if (sres2.endOffset[k]) 295*cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 296*cdf0e10cSrcweir } 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir // pick first and long one 299*cdf0e10cSrcweir if ( sres.subRegExpressions == 0) 300*cdf0e10cSrcweir return sres2; 301*cdf0e10cSrcweir if ( sres2.subRegExpressions == 1) 302*cdf0e10cSrcweir { 303*cdf0e10cSrcweir if ( sres.startOffset[0] > sres2.startOffset[0]) 304*cdf0e10cSrcweir return sres2; 305*cdf0e10cSrcweir else if ( sres.startOffset[0] == sres2.startOffset[0] && 306*cdf0e10cSrcweir sres.endOffset[0] < sres2.endOffset[0]) 307*cdf0e10cSrcweir return sres2; 308*cdf0e10cSrcweir } 309*cdf0e10cSrcweir } 310*cdf0e10cSrcweir 311*cdf0e10cSrcweir return sres; 312*cdf0e10cSrcweir } 313*cdf0e10cSrcweir 314*cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 315*cdf0e10cSrcweir throw(RuntimeException) 316*cdf0e10cSrcweir { 317*cdf0e10cSrcweir SearchResult sres; 318*cdf0e10cSrcweir 319*cdf0e10cSrcweir OUString in_str(searchStr); 320*cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 321*cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 322*cdf0e10cSrcweir 323*cdf0e10cSrcweir bUsePrimarySrchStr = true; 324*cdf0e10cSrcweir 325*cdf0e10cSrcweir if ( xTranslit.is() ) 326*cdf0e10cSrcweir { 327*cdf0e10cSrcweir // apply only simple 1<->1 transliteration here 328*cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 329*cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 330*cdf0e10cSrcweir 331*cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 332*cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 333*cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 334*cdf0e10cSrcweir else 335*cdf0e10cSrcweir newStartPos = in_str.getLength(); 336*cdf0e10cSrcweir 337*cdf0e10cSrcweir if( endPos ) 338*cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 339*cdf0e10cSrcweir 340*cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 341*cdf0e10cSrcweir 342*cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 343*cdf0e10cSrcweir { 344*cdf0e10cSrcweir if (sres.startOffset[k]) 345*cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 346*cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 347*cdf0e10cSrcweir // the position of the next character - return the 348*cdf0e10cSrcweir // next position behind the last found character! 349*cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 350*cdf0e10cSrcweir if (sres.endOffset[k]) 351*cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]]; 352*cdf0e10cSrcweir } 353*cdf0e10cSrcweir } 354*cdf0e10cSrcweir else 355*cdf0e10cSrcweir { 356*cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, startPos, endPos ); 357*cdf0e10cSrcweir } 358*cdf0e10cSrcweir 359*cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 360*cdf0e10cSrcweir { 361*cdf0e10cSrcweir SearchResult sres2; 362*cdf0e10cSrcweir 363*cdf0e10cSrcweir in_str = OUString(searchStr); 364*cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 365*cdf0e10cSrcweir 366*cdf0e10cSrcweir in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 367*cdf0e10cSrcweir 368*cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 369*cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 370*cdf0e10cSrcweir else 371*cdf0e10cSrcweir startPos = in_str.getLength(); 372*cdf0e10cSrcweir 373*cdf0e10cSrcweir if( endPos ) 374*cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 375*cdf0e10cSrcweir 376*cdf0e10cSrcweir bUsePrimarySrchStr = false; 377*cdf0e10cSrcweir sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 378*cdf0e10cSrcweir 379*cdf0e10cSrcweir for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 380*cdf0e10cSrcweir { 381*cdf0e10cSrcweir if (sres2.startOffset[k]) 382*cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 383*cdf0e10cSrcweir if (sres2.endOffset[k]) 384*cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 385*cdf0e10cSrcweir } 386*cdf0e10cSrcweir 387*cdf0e10cSrcweir // pick last and long one 388*cdf0e10cSrcweir if ( sres.subRegExpressions == 0 ) 389*cdf0e10cSrcweir return sres2; 390*cdf0e10cSrcweir if ( sres2.subRegExpressions == 1 ) 391*cdf0e10cSrcweir { 392*cdf0e10cSrcweir if ( sres.startOffset[0] < sres2.startOffset[0] ) 393*cdf0e10cSrcweir return sres2; 394*cdf0e10cSrcweir if ( sres.startOffset[0] == sres2.startOffset[0] && 395*cdf0e10cSrcweir sres.endOffset[0] > sres2.endOffset[0] ) 396*cdf0e10cSrcweir return sres2; 397*cdf0e10cSrcweir } 398*cdf0e10cSrcweir } 399*cdf0e10cSrcweir 400*cdf0e10cSrcweir return sres; 401*cdf0e10cSrcweir } 402*cdf0e10cSrcweir 403*cdf0e10cSrcweir 404*cdf0e10cSrcweir 405*cdf0e10cSrcweir //--------------- die Wort-Trennner ---------------------------------- 406*cdf0e10cSrcweir 407*cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 408*cdf0e10cSrcweir { 409*cdf0e10cSrcweir bool bRet = 1; 410*cdf0e10cSrcweir if( '\x7f' != rStr[nPos]) 411*cdf0e10cSrcweir { 412*cdf0e10cSrcweir if ( !xCharClass.is() ) 413*cdf0e10cSrcweir { 414*cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 415*cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 416*cdf0e10cSrcweir if( xI.is() ) 417*cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 418*cdf0e10cSrcweir (const Reference< XCharacterClassification >*)0)) 419*cdf0e10cSrcweir >>= xCharClass; 420*cdf0e10cSrcweir } 421*cdf0e10cSrcweir if ( xCharClass.is() ) 422*cdf0e10cSrcweir { 423*cdf0e10cSrcweir sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 424*cdf0e10cSrcweir aSrchPara.Locale ); 425*cdf0e10cSrcweir if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 426*cdf0e10cSrcweir KCharacterType::LETTER ) & nCType ) ) 427*cdf0e10cSrcweir bRet = 0; 428*cdf0e10cSrcweir } 429*cdf0e10cSrcweir } 430*cdf0e10cSrcweir return bRet; 431*cdf0e10cSrcweir } 432*cdf0e10cSrcweir 433*cdf0e10cSrcweir 434*cdf0e10cSrcweir 435*cdf0e10cSrcweir // --------- methods for the kind of boyer-morre search ------------------ 436*cdf0e10cSrcweir 437*cdf0e10cSrcweir 438*cdf0e10cSrcweir void TextSearch::MakeForwardTab() 439*cdf0e10cSrcweir { 440*cdf0e10cSrcweir // create the jumptable for the search text 441*cdf0e10cSrcweir if( pJumpTable ) 442*cdf0e10cSrcweir { 443*cdf0e10cSrcweir if( bIsForwardTab ) 444*cdf0e10cSrcweir return ; // the jumpTable is ok 445*cdf0e10cSrcweir delete pJumpTable; 446*cdf0e10cSrcweir } 447*cdf0e10cSrcweir bIsForwardTab = true; 448*cdf0e10cSrcweir 449*cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 450*cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 451*cdf0e10cSrcweir 452*cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 453*cdf0e10cSrcweir { 454*cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 455*cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 456*cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 457*cdf0e10cSrcweir 458*cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 459*cdf0e10cSrcweir pJumpTable->insert( aEntry ); 460*cdf0e10cSrcweir if ( !aPair.second ) 461*cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 462*cdf0e10cSrcweir } 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir 465*cdf0e10cSrcweir void TextSearch::MakeForwardTab2() 466*cdf0e10cSrcweir { 467*cdf0e10cSrcweir // create the jumptable for the search text 468*cdf0e10cSrcweir if( pJumpTable2 ) 469*cdf0e10cSrcweir { 470*cdf0e10cSrcweir if( bIsForwardTab ) 471*cdf0e10cSrcweir return ; // the jumpTable is ok 472*cdf0e10cSrcweir delete pJumpTable2; 473*cdf0e10cSrcweir } 474*cdf0e10cSrcweir bIsForwardTab = true; 475*cdf0e10cSrcweir 476*cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 477*cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 478*cdf0e10cSrcweir 479*cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 480*cdf0e10cSrcweir { 481*cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 482*cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 483*cdf0e10cSrcweir 484*cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 485*cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 486*cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 487*cdf0e10cSrcweir if ( !aPair.second ) 488*cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 489*cdf0e10cSrcweir } 490*cdf0e10cSrcweir } 491*cdf0e10cSrcweir 492*cdf0e10cSrcweir void TextSearch::MakeBackwardTab() 493*cdf0e10cSrcweir { 494*cdf0e10cSrcweir // create the jumptable for the search text 495*cdf0e10cSrcweir if( pJumpTable ) 496*cdf0e10cSrcweir { 497*cdf0e10cSrcweir if( !bIsForwardTab ) 498*cdf0e10cSrcweir return ; // the jumpTable is ok 499*cdf0e10cSrcweir delete pJumpTable; 500*cdf0e10cSrcweir } 501*cdf0e10cSrcweir bIsForwardTab = false; 502*cdf0e10cSrcweir 503*cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 504*cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 505*cdf0e10cSrcweir 506*cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 507*cdf0e10cSrcweir { 508*cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 509*cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 510*cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 511*cdf0e10cSrcweir pJumpTable->insert( aEntry ); 512*cdf0e10cSrcweir if ( !aPair.second ) 513*cdf0e10cSrcweir (*(aPair.first)).second = n; 514*cdf0e10cSrcweir } 515*cdf0e10cSrcweir } 516*cdf0e10cSrcweir 517*cdf0e10cSrcweir void TextSearch::MakeBackwardTab2() 518*cdf0e10cSrcweir { 519*cdf0e10cSrcweir // create the jumptable for the search text 520*cdf0e10cSrcweir if( pJumpTable2 ) 521*cdf0e10cSrcweir { 522*cdf0e10cSrcweir if( !bIsForwardTab ) 523*cdf0e10cSrcweir return ; // the jumpTable is ok 524*cdf0e10cSrcweir delete pJumpTable2; 525*cdf0e10cSrcweir } 526*cdf0e10cSrcweir bIsForwardTab = false; 527*cdf0e10cSrcweir 528*cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 529*cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 530*cdf0e10cSrcweir 531*cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 532*cdf0e10cSrcweir { 533*cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 534*cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 535*cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 536*cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 537*cdf0e10cSrcweir if ( !aPair.second ) 538*cdf0e10cSrcweir (*(aPair.first)).second = n; 539*cdf0e10cSrcweir } 540*cdf0e10cSrcweir } 541*cdf0e10cSrcweir 542*cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 543*cdf0e10cSrcweir { 544*cdf0e10cSrcweir TextSearchJumpTable *pJump; 545*cdf0e10cSrcweir OUString sSearchKey; 546*cdf0e10cSrcweir 547*cdf0e10cSrcweir if ( bUsePrimarySrchStr ) { 548*cdf0e10cSrcweir pJump = pJumpTable; 549*cdf0e10cSrcweir sSearchKey = sSrchStr; 550*cdf0e10cSrcweir } else { 551*cdf0e10cSrcweir pJump = pJumpTable2; 552*cdf0e10cSrcweir sSearchKey = sSrchStr2; 553*cdf0e10cSrcweir } 554*cdf0e10cSrcweir 555*cdf0e10cSrcweir TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 556*cdf0e10cSrcweir if ( iLook == pJump->end() ) 557*cdf0e10cSrcweir return sSearchKey.getLength(); 558*cdf0e10cSrcweir return (*iLook).second; 559*cdf0e10cSrcweir } 560*cdf0e10cSrcweir 561*cdf0e10cSrcweir 562*cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 563*cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 564*cdf0e10cSrcweir throw(RuntimeException) 565*cdf0e10cSrcweir { 566*cdf0e10cSrcweir SearchResult aRet; 567*cdf0e10cSrcweir aRet.subRegExpressions = 0; 568*cdf0e10cSrcweir 569*cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 570*cdf0e10cSrcweir 571*cdf0e10cSrcweir OUString aStr( searchStr ); 572*cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 573*cdf0e10cSrcweir sal_Int32 nEnde = endPos; 574*cdf0e10cSrcweir if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 575*cdf0e10cSrcweir return aRet; 576*cdf0e10cSrcweir 577*cdf0e10cSrcweir 578*cdf0e10cSrcweir if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 579*cdf0e10cSrcweir return aRet; 580*cdf0e10cSrcweir 581*cdf0e10cSrcweir nEnde -= sSearchKey.getLength(); 582*cdf0e10cSrcweir 583*cdf0e10cSrcweir if (bUsePrimarySrchStr) 584*cdf0e10cSrcweir MakeForwardTab(); // create the jumptable 585*cdf0e10cSrcweir else 586*cdf0e10cSrcweir MakeForwardTab2(); 587*cdf0e10cSrcweir 588*cdf0e10cSrcweir for (sal_Int32 nCmpIdx = startPos; // start position for the search 589*cdf0e10cSrcweir nCmpIdx <= nEnde; 590*cdf0e10cSrcweir nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 591*cdf0e10cSrcweir { 592*cdf0e10cSrcweir // if the match would be the completed cells, skip it. 593*cdf0e10cSrcweir if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 594*cdf0e10cSrcweir && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 595*cdf0e10cSrcweir continue; 596*cdf0e10cSrcweir 597*cdf0e10cSrcweir nSuchIdx = sSearchKey.getLength() - 1; 598*cdf0e10cSrcweir while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 599*cdf0e10cSrcweir { 600*cdf0e10cSrcweir if( nSuchIdx == 0 ) 601*cdf0e10cSrcweir { 602*cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 603*cdf0e10cSrcweir { 604*cdf0e10cSrcweir sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 605*cdf0e10cSrcweir bool bAtStart = !nCmpIdx; 606*cdf0e10cSrcweir bool bAtEnd = nFndEnd == endPos; 607*cdf0e10cSrcweir bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 608*cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 609*cdf0e10cSrcweir // * 1 -> only one word in the paragraph 610*cdf0e10cSrcweir // * 2 -> at begin of paragraph 611*cdf0e10cSrcweir // * 3 -> at end of paragraph 612*cdf0e10cSrcweir // * 4 -> inside the paragraph 613*cdf0e10cSrcweir if( !( ( bAtStart && bAtEnd ) || // 1 614*cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 615*cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 616*cdf0e10cSrcweir ( bDelimBefore && bDelimBehind ))) // 4 617*cdf0e10cSrcweir break; 618*cdf0e10cSrcweir } 619*cdf0e10cSrcweir 620*cdf0e10cSrcweir aRet.subRegExpressions = 1; 621*cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 622*cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 623*cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 624*cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 625*cdf0e10cSrcweir 626*cdf0e10cSrcweir return aRet; 627*cdf0e10cSrcweir } 628*cdf0e10cSrcweir else 629*cdf0e10cSrcweir nSuchIdx--; 630*cdf0e10cSrcweir } 631*cdf0e10cSrcweir } 632*cdf0e10cSrcweir return aRet; 633*cdf0e10cSrcweir } 634*cdf0e10cSrcweir 635*cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 636*cdf0e10cSrcweir throw(RuntimeException) 637*cdf0e10cSrcweir { 638*cdf0e10cSrcweir SearchResult aRet; 639*cdf0e10cSrcweir aRet.subRegExpressions = 0; 640*cdf0e10cSrcweir 641*cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 642*cdf0e10cSrcweir 643*cdf0e10cSrcweir OUString aStr( searchStr ); 644*cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 645*cdf0e10cSrcweir sal_Int32 nEnde = endPos; 646*cdf0e10cSrcweir if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 647*cdf0e10cSrcweir return aRet; 648*cdf0e10cSrcweir 649*cdf0e10cSrcweir if (bUsePrimarySrchStr) 650*cdf0e10cSrcweir MakeBackwardTab(); // create the jumptable 651*cdf0e10cSrcweir else 652*cdf0e10cSrcweir MakeBackwardTab2(); 653*cdf0e10cSrcweir 654*cdf0e10cSrcweir if( nEnde == nSuchIdx ) // end position for the search 655*cdf0e10cSrcweir nEnde = sSearchKey.getLength(); 656*cdf0e10cSrcweir else 657*cdf0e10cSrcweir nEnde += sSearchKey.getLength(); 658*cdf0e10cSrcweir 659*cdf0e10cSrcweir sal_Int32 nCmpIdx = startPos; // start position for the search 660*cdf0e10cSrcweir 661*cdf0e10cSrcweir while (nCmpIdx >= nEnde) 662*cdf0e10cSrcweir { 663*cdf0e10cSrcweir // if the match would be the completed cells, skip it. 664*cdf0e10cSrcweir if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 665*cdf0e10cSrcweir sSearchKey.getLength() )) && (!checkCTLEnd || 666*cdf0e10cSrcweir isCellStart( aStr, nCmpIdx))) 667*cdf0e10cSrcweir { 668*cdf0e10cSrcweir nSuchIdx = 0; 669*cdf0e10cSrcweir while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 670*cdf0e10cSrcweir aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 671*cdf0e10cSrcweir nSuchIdx++; 672*cdf0e10cSrcweir if( nSuchIdx >= sSearchKey.getLength() ) 673*cdf0e10cSrcweir { 674*cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 675*cdf0e10cSrcweir { 676*cdf0e10cSrcweir sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 677*cdf0e10cSrcweir bool bAtStart = !nFndStt; 678*cdf0e10cSrcweir bool bAtEnd = nCmpIdx == startPos; 679*cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 680*cdf0e10cSrcweir bool bDelimBefore = bAtStart || // begin of paragraph 681*cdf0e10cSrcweir IsDelimiter( aStr, nFndStt-1 ); 682*cdf0e10cSrcweir // * 1 -> only one word in the paragraph 683*cdf0e10cSrcweir // * 2 -> at begin of paragraph 684*cdf0e10cSrcweir // * 3 -> at end of paragraph 685*cdf0e10cSrcweir // * 4 -> inside the paragraph 686*cdf0e10cSrcweir if( ( bAtStart && bAtEnd ) || // 1 687*cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 688*cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 689*cdf0e10cSrcweir ( bDelimBefore && bDelimBehind )) // 4 690*cdf0e10cSrcweir { 691*cdf0e10cSrcweir aRet.subRegExpressions = 1; 692*cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 693*cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 694*cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 695*cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 696*cdf0e10cSrcweir return aRet; 697*cdf0e10cSrcweir } 698*cdf0e10cSrcweir } 699*cdf0e10cSrcweir else 700*cdf0e10cSrcweir { 701*cdf0e10cSrcweir aRet.subRegExpressions = 1; 702*cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 703*cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 704*cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 705*cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 706*cdf0e10cSrcweir return aRet; 707*cdf0e10cSrcweir } 708*cdf0e10cSrcweir } 709*cdf0e10cSrcweir } 710*cdf0e10cSrcweir nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 711*cdf0e10cSrcweir if( nCmpIdx < nSuchIdx ) 712*cdf0e10cSrcweir return aRet; 713*cdf0e10cSrcweir nCmpIdx -= nSuchIdx; 714*cdf0e10cSrcweir } 715*cdf0e10cSrcweir return aRet; 716*cdf0e10cSrcweir } 717*cdf0e10cSrcweir 718*cdf0e10cSrcweir 719*cdf0e10cSrcweir 720*cdf0e10cSrcweir //--------------------------------------------------------------------------- 721*cdf0e10cSrcweir // ------- Methoden fuer die Suche ueber Regular-Expressions -------------- 722*cdf0e10cSrcweir 723*cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 724*cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 725*cdf0e10cSrcweir throw(RuntimeException) 726*cdf0e10cSrcweir { 727*cdf0e10cSrcweir SearchResult aRet; 728*cdf0e10cSrcweir aRet.subRegExpressions = 0; 729*cdf0e10cSrcweir OUString aStr( searchStr ); 730*cdf0e10cSrcweir 731*cdf0e10cSrcweir bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE | 732*cdf0e10cSrcweir SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag )); 733*cdf0e10cSrcweir 734*cdf0e10cSrcweir pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength()); 735*cdf0e10cSrcweir 736*cdf0e10cSrcweir struct re_registers regs; 737*cdf0e10cSrcweir 738*cdf0e10cSrcweir // Clear structure 739*cdf0e10cSrcweir memset((void *)®s, 0, sizeof(struct re_registers)); 740*cdf0e10cSrcweir if ( ! pRegExp->re_search(®s, startPos) ) 741*cdf0e10cSrcweir { 742*cdf0e10cSrcweir if( regs.num_of_match > 0 && 743*cdf0e10cSrcweir (regs.start[0] != -1 && regs.end[0] != -1) ) 744*cdf0e10cSrcweir { 745*cdf0e10cSrcweir aRet.startOffset.realloc(regs.num_of_match); 746*cdf0e10cSrcweir aRet.endOffset.realloc(regs.num_of_match); 747*cdf0e10cSrcweir 748*cdf0e10cSrcweir sal_Int32 i = 0, j = 0; 749*cdf0e10cSrcweir while( j < regs.num_of_match ) 750*cdf0e10cSrcweir { 751*cdf0e10cSrcweir if( regs.start[j] != -1 && regs.end[j] != -1 ) 752*cdf0e10cSrcweir { 753*cdf0e10cSrcweir aRet.startOffset[i] = regs.start[j]; 754*cdf0e10cSrcweir aRet.endOffset[i] = regs.end[j]; 755*cdf0e10cSrcweir ++i; 756*cdf0e10cSrcweir } 757*cdf0e10cSrcweir ++j; 758*cdf0e10cSrcweir } 759*cdf0e10cSrcweir aRet.subRegExpressions = i; 760*cdf0e10cSrcweir } 761*cdf0e10cSrcweir if ( regs.num_regs > 0 ) 762*cdf0e10cSrcweir { 763*cdf0e10cSrcweir if ( regs.start ) 764*cdf0e10cSrcweir free(regs.start); 765*cdf0e10cSrcweir if ( regs.end ) 766*cdf0e10cSrcweir free(regs.end); 767*cdf0e10cSrcweir } 768*cdf0e10cSrcweir } 769*cdf0e10cSrcweir 770*cdf0e10cSrcweir return aRet; 771*cdf0e10cSrcweir } 772*cdf0e10cSrcweir 773*cdf0e10cSrcweir /* 774*cdf0e10cSrcweir * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr 775*cdf0e10cSrcweir */ 776*cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 777*cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 778*cdf0e10cSrcweir throw(RuntimeException) 779*cdf0e10cSrcweir { 780*cdf0e10cSrcweir SearchResult aRet; 781*cdf0e10cSrcweir aRet.subRegExpressions = 0; 782*cdf0e10cSrcweir OUString aStr( searchStr ); 783*cdf0e10cSrcweir 784*cdf0e10cSrcweir sal_Int32 nOffset = 0; 785*cdf0e10cSrcweir sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos; 786*cdf0e10cSrcweir 787*cdf0e10cSrcweir bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE | 788*cdf0e10cSrcweir SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag )); 789*cdf0e10cSrcweir 790*cdf0e10cSrcweir if( startPos ) 791*cdf0e10cSrcweir nOffset = startPos - 1; 792*cdf0e10cSrcweir 793*cdf0e10cSrcweir // search only in the subString 794*cdf0e10cSrcweir if( bSearchInSel && nStrEnde ) 795*cdf0e10cSrcweir { 796*cdf0e10cSrcweir aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde ); 797*cdf0e10cSrcweir if( nOffset > nStrEnde ) 798*cdf0e10cSrcweir nOffset = nOffset - nStrEnde; 799*cdf0e10cSrcweir else 800*cdf0e10cSrcweir nOffset = 0; 801*cdf0e10cSrcweir } 802*cdf0e10cSrcweir 803*cdf0e10cSrcweir // set the length to negative for reverse search 804*cdf0e10cSrcweir pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) ); 805*cdf0e10cSrcweir struct re_registers regs; 806*cdf0e10cSrcweir 807*cdf0e10cSrcweir // Clear structure 808*cdf0e10cSrcweir memset((void *)®s, 0, sizeof(struct re_registers)); 809*cdf0e10cSrcweir if ( ! pRegExp->re_search(®s, nOffset) ) 810*cdf0e10cSrcweir { 811*cdf0e10cSrcweir if( regs.num_of_match > 0 && 812*cdf0e10cSrcweir (regs.start[0] != -1 && regs.end[0] != -1) ) 813*cdf0e10cSrcweir { 814*cdf0e10cSrcweir nOffset = bSearchInSel ? nStrEnde : 0; 815*cdf0e10cSrcweir aRet.startOffset.realloc(regs.num_of_match); 816*cdf0e10cSrcweir aRet.endOffset.realloc(regs.num_of_match); 817*cdf0e10cSrcweir 818*cdf0e10cSrcweir sal_Int32 i = 0, j = 0; 819*cdf0e10cSrcweir while( j < regs.num_of_match ) 820*cdf0e10cSrcweir { 821*cdf0e10cSrcweir if( regs.start[j] != -1 && regs.end[j] != -1 ) 822*cdf0e10cSrcweir { 823*cdf0e10cSrcweir aRet.startOffset[i] = regs.end[j] + nOffset; 824*cdf0e10cSrcweir aRet.endOffset[i] = regs.start[j] + nOffset; 825*cdf0e10cSrcweir ++i; 826*cdf0e10cSrcweir } 827*cdf0e10cSrcweir ++j; 828*cdf0e10cSrcweir } 829*cdf0e10cSrcweir aRet.subRegExpressions = i; 830*cdf0e10cSrcweir } 831*cdf0e10cSrcweir if ( regs.num_regs > 0 ) 832*cdf0e10cSrcweir { 833*cdf0e10cSrcweir if ( regs.start ) 834*cdf0e10cSrcweir free(regs.start); 835*cdf0e10cSrcweir if ( regs.end ) 836*cdf0e10cSrcweir free(regs.end); 837*cdf0e10cSrcweir } 838*cdf0e10cSrcweir } 839*cdf0e10cSrcweir 840*cdf0e10cSrcweir return aRet; 841*cdf0e10cSrcweir } 842*cdf0e10cSrcweir 843*cdf0e10cSrcweir // Phonetische Suche von Worten 844*cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 845*cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 846*cdf0e10cSrcweir throw(RuntimeException) 847*cdf0e10cSrcweir { 848*cdf0e10cSrcweir SearchResult aRet; 849*cdf0e10cSrcweir aRet.subRegExpressions = 0; 850*cdf0e10cSrcweir 851*cdf0e10cSrcweir if( !xBreak.is() ) 852*cdf0e10cSrcweir return aRet; 853*cdf0e10cSrcweir 854*cdf0e10cSrcweir OUString aWTemp( searchStr ); 855*cdf0e10cSrcweir 856*cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 857*cdf0e10cSrcweir 858*cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 859*cdf0e10cSrcweir aSrchPara.Locale, 860*cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 861*cdf0e10cSrcweir 862*cdf0e10cSrcweir do 863*cdf0e10cSrcweir { 864*cdf0e10cSrcweir if( aWBnd.startPos >= endPos ) 865*cdf0e10cSrcweir break; 866*cdf0e10cSrcweir nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 867*cdf0e10cSrcweir nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 868*cdf0e10cSrcweir 869*cdf0e10cSrcweir if( nStt < nEnd && 870*cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 871*cdf0e10cSrcweir { 872*cdf0e10cSrcweir aRet.subRegExpressions = 1; 873*cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 874*cdf0e10cSrcweir aRet.startOffset[ 0 ] = nStt; 875*cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 876*cdf0e10cSrcweir aRet.endOffset[ 0 ] = nEnd; 877*cdf0e10cSrcweir break; 878*cdf0e10cSrcweir } 879*cdf0e10cSrcweir 880*cdf0e10cSrcweir nStt = nEnd - 1; 881*cdf0e10cSrcweir aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 882*cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 883*cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || 884*cdf0e10cSrcweir (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 885*cdf0e10cSrcweir // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 886*cdf0e10cSrcweir // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 887*cdf0e10cSrcweir // and nextWord() does also => don't loop forever. 888*cdf0e10cSrcweir return aRet; 889*cdf0e10cSrcweir } 890*cdf0e10cSrcweir 891*cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 892*cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 893*cdf0e10cSrcweir throw(RuntimeException) 894*cdf0e10cSrcweir { 895*cdf0e10cSrcweir SearchResult aRet; 896*cdf0e10cSrcweir aRet.subRegExpressions = 0; 897*cdf0e10cSrcweir 898*cdf0e10cSrcweir if( !xBreak.is() ) 899*cdf0e10cSrcweir return aRet; 900*cdf0e10cSrcweir 901*cdf0e10cSrcweir OUString aWTemp( searchStr ); 902*cdf0e10cSrcweir 903*cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 904*cdf0e10cSrcweir 905*cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 906*cdf0e10cSrcweir aSrchPara.Locale, 907*cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 908*cdf0e10cSrcweir 909*cdf0e10cSrcweir do 910*cdf0e10cSrcweir { 911*cdf0e10cSrcweir if( aWBnd.endPos <= endPos ) 912*cdf0e10cSrcweir break; 913*cdf0e10cSrcweir nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 914*cdf0e10cSrcweir nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 915*cdf0e10cSrcweir 916*cdf0e10cSrcweir if( nStt < nEnd && 917*cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 918*cdf0e10cSrcweir { 919*cdf0e10cSrcweir aRet.subRegExpressions = 1; 920*cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 921*cdf0e10cSrcweir aRet.startOffset[ 0 ] = nEnd; 922*cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 923*cdf0e10cSrcweir aRet.endOffset[ 0 ] = nStt; 924*cdf0e10cSrcweir break; 925*cdf0e10cSrcweir } 926*cdf0e10cSrcweir if( !nStt ) 927*cdf0e10cSrcweir break; 928*cdf0e10cSrcweir 929*cdf0e10cSrcweir aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 930*cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 931*cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 932*cdf0e10cSrcweir return aRet; 933*cdf0e10cSrcweir } 934*cdf0e10cSrcweir 935*cdf0e10cSrcweir 936*cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 937*cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 938*cdf0e10cSrcweir 939*cdf0e10cSrcweir static OUString getServiceName_Static() 940*cdf0e10cSrcweir { 941*cdf0e10cSrcweir return OUString::createFromAscii( cSearchName ); 942*cdf0e10cSrcweir } 943*cdf0e10cSrcweir 944*cdf0e10cSrcweir static OUString getImplementationName_Static() 945*cdf0e10cSrcweir { 946*cdf0e10cSrcweir return OUString::createFromAscii( cSearchImpl ); 947*cdf0e10cSrcweir } 948*cdf0e10cSrcweir 949*cdf0e10cSrcweir OUString SAL_CALL 950*cdf0e10cSrcweir TextSearch::getImplementationName() 951*cdf0e10cSrcweir throw( RuntimeException ) 952*cdf0e10cSrcweir { 953*cdf0e10cSrcweir return getImplementationName_Static(); 954*cdf0e10cSrcweir } 955*cdf0e10cSrcweir 956*cdf0e10cSrcweir sal_Bool SAL_CALL 957*cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName) 958*cdf0e10cSrcweir throw( RuntimeException ) 959*cdf0e10cSrcweir { 960*cdf0e10cSrcweir return !rServiceName.compareToAscii( cSearchName ); 961*cdf0e10cSrcweir } 962*cdf0e10cSrcweir 963*cdf0e10cSrcweir Sequence< OUString > SAL_CALL 964*cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 965*cdf0e10cSrcweir { 966*cdf0e10cSrcweir Sequence< OUString > aRet(1); 967*cdf0e10cSrcweir aRet[0] = getServiceName_Static(); 968*cdf0e10cSrcweir return aRet; 969*cdf0e10cSrcweir } 970*cdf0e10cSrcweir 971*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 972*cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance( 973*cdf0e10cSrcweir const ::com::sun::star::uno::Reference< 974*cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 975*cdf0e10cSrcweir { 976*cdf0e10cSrcweir return ::com::sun::star::uno::Reference< 977*cdf0e10cSrcweir ::com::sun::star::uno::XInterface >( 978*cdf0e10cSrcweir (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 979*cdf0e10cSrcweir } 980*cdf0e10cSrcweir 981*cdf0e10cSrcweir extern "C" 982*cdf0e10cSrcweir { 983*cdf0e10cSrcweir 984*cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment( 985*cdf0e10cSrcweir const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 986*cdf0e10cSrcweir { 987*cdf0e10cSrcweir *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 988*cdf0e10cSrcweir } 989*cdf0e10cSrcweir 990*cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 991*cdf0e10cSrcweir void* _pServiceManager, void* /*_pRegistryKey*/ ) 992*cdf0e10cSrcweir { 993*cdf0e10cSrcweir void* pRet = NULL; 994*cdf0e10cSrcweir 995*cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 996*cdf0e10cSrcweir reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 997*cdf0e10cSrcweir ( _pServiceManager ); 998*cdf0e10cSrcweir ::com::sun::star::uno::Reference< 999*cdf0e10cSrcweir ::com::sun::star::lang::XSingleServiceFactory > xFactory; 1000*cdf0e10cSrcweir 1001*cdf0e10cSrcweir if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 1002*cdf0e10cSrcweir { 1003*cdf0e10cSrcweir ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 1004*cdf0e10cSrcweir aServiceNames[0] = getServiceName_Static(); 1005*cdf0e10cSrcweir xFactory = ::cppu::createSingleFactory( 1006*cdf0e10cSrcweir pServiceManager, getImplementationName_Static(), 1007*cdf0e10cSrcweir &TextSearch_CreateInstance, aServiceNames ); 1008*cdf0e10cSrcweir } 1009*cdf0e10cSrcweir 1010*cdf0e10cSrcweir if ( xFactory.is() ) 1011*cdf0e10cSrcweir { 1012*cdf0e10cSrcweir xFactory->acquire(); 1013*cdf0e10cSrcweir pRet = xFactory.get(); 1014*cdf0e10cSrcweir } 1015*cdf0e10cSrcweir 1016*cdf0e10cSrcweir return pRet; 1017*cdf0e10cSrcweir } 1018*cdf0e10cSrcweir 1019*cdf0e10cSrcweir } // extern "C" 1020