1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include "textsearch.hxx"
32 #include "levdis.hxx"
33 #include <com/sun/star/lang/Locale.hpp>
34 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
35 #include <comphelper/processfactory.hxx>
36 #include <com/sun/star/i18n/UnicodeType.hpp>
37 #include <com/sun/star/util/SearchFlags.hpp>
38 #include <com/sun/star/i18n/WordType.hpp>
39 #include <com/sun/star/i18n/ScriptType.hpp>
40 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
41 #include <com/sun/star/i18n/KCharacterType.hpp>
42 #include <com/sun/star/registry/XRegistryKey.hpp>
43 #include <cppuhelper/factory.hxx>
44 #include <cppuhelper/weak.hxx>
45 
46 #ifdef _MSC_VER
47 // get rid of that dumb compiler warning
48 // identifier was truncated to '255' characters in the debug information
49 // for STL template usage, if .pdb files are to be created
50 #pragma warning( disable: 4786 )
51 #endif
52 
53 #include <string.h>
54 
55 using namespace ::com::sun::star::util;
56 using namespace ::com::sun::star::uno;
57 using namespace ::com::sun::star::lang;
58 using namespace ::com::sun::star::i18n;
59 using namespace ::rtl;
60 
61 static sal_Int32 COMPLEX_TRANS_MASK_TMP =
62     TransliterationModules_ignoreBaFa_ja_JP |
63     TransliterationModules_ignoreIterationMark_ja_JP |
64     TransliterationModules_ignoreTiJi_ja_JP |
65     TransliterationModules_ignoreHyuByu_ja_JP |
66     TransliterationModules_ignoreSeZe_ja_JP |
67     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
68     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
69     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
70 static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH;
71 static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
72     // Above 2 transliteration is simple but need to take effect in
73     // complex transliteration
74 
75 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
76         : xMSF( rxMSF )
77         , pJumpTable( 0 )
78         , pJumpTable2( 0 )
79         , pRegexMatcher( NULL )
80         , pWLD( 0 )
81 {
82     SearchOptions aOpt;
83     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
84     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
85     //aOpt.Locale = ???;
86     setOptions( aOpt );
87 }
88 
89 TextSearch::~TextSearch()
90 {
91     delete pRegexMatcher;
92     delete pWLD;
93     delete pJumpTable;
94     delete pJumpTable2;
95 }
96 
97 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
98 {
99     aSrchPara = rOptions;
100 
101     delete pRegexMatcher, pRegexMatcher = NULL;
102     delete pWLD, pWLD = 0;
103     delete pJumpTable, pJumpTable = 0;
104     delete pJumpTable2, pJumpTable2 = 0;
105 
106     // Create Transliteration class
107     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
108     {
109         if( !xTranslit.is() )
110         {
111             Reference < XInterface > xI = xMSF->createInstance(
112                     OUString::createFromAscii(
113                         "com.sun.star.i18n.Transliteration"));
114             if ( xI.is() )
115                 xI->queryInterface( ::getCppuType(
116                             (const Reference< XExtendedTransliteration >*)0))
117                     >>= xTranslit;
118         }
119         // Load transliteration module
120         if( xTranslit.is() )
121             xTranslit->loadModule(
122                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
123                     aSrchPara.Locale);
124     }
125     else if( xTranslit.is() )
126         xTranslit = 0;
127 
128     // Create Transliteration for 2<->1, 2<->2 transliteration
129     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
130     {
131         if( !xTranslit2.is() )
132         {
133             Reference < XInterface > xI = xMSF->createInstance(
134                     OUString::createFromAscii(
135                         "com.sun.star.i18n.Transliteration"));
136             if ( xI.is() )
137                 xI->queryInterface( ::getCppuType(
138                             (const Reference< XExtendedTransliteration >*)0))
139                     >>= xTranslit2;
140         }
141         // Load transliteration module
142         if( xTranslit2.is() )
143             xTranslit2->loadModule(
144                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
145                     aSrchPara.Locale);
146     }
147 
148     if ( !xBreak.is() )
149     {
150         Reference < XInterface > xI = xMSF->createInstance(
151                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
152         if( xI.is() )
153             xI->queryInterface( ::getCppuType(
154                         (const Reference< XBreakIterator >*)0))
155                 >>= xBreak;
156     }
157 
158     sSrchStr = aSrchPara.searchString;
159 
160     // use transliteration here
161     if ( xTranslit.is() &&
162 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
163         sSrchStr = xTranslit->transliterateString2String(
164                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
165 
166     if ( xTranslit2.is() &&
167 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
168 	sSrchStr2 = xTranslit2->transliterateString2String(
169 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
170 
171     // When start or end of search string is a complex script type, we need to
172     // make sure the result boundary is not located in the middle of cell.
173     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
174                 ScriptType::COMPLEX));
175     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
176                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
177 
178     switch( aSrchPara.algorithmType)
179     {
180 		case SearchAlgorithms_REGEXP:
181 			fnForward = &TextSearch::RESrchFrwrd;
182 			fnBackward = &TextSearch::RESrchBkwrd;
183 
184 			{
185 			sal_uInt32 nIcuSearchFlags = 0;
186 			// map com::sun::star::util::SearchFlags to ICU uregex.h flags
187 			// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
188 			// REG_NEWLINE is neither defined properly nor used anywhere => not implemented
189 			// REG_NOSUB is not used anywhere => not implemented
190 			// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
191 			// LEV_RELAXED is only used for SearchAlgorithm==Approximate
192 			// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
193 			if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
194 				nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
195 			UErrorCode nIcuErr = U_ZERO_ERROR;
196 			// assumption: transliteration doesn't mangle regexp control chars
197 			OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
198 					: ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
199 			const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength());
200 			pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
201 			if( nIcuErr)
202 				{ delete pRegexMatcher; pRegexMatcher = NULL;}
203 			} break;
204 
205 		case SearchAlgorithms_APPROXIMATE:
206             fnForward = &TextSearch::ApproxSrchFrwrd;
207             fnBackward = &TextSearch::ApproxSrchBkwrd;
208 
209             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
210                     aSrchPara.insertedChars, aSrchPara.deletedChars,
211                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
212 
213             nLimit = pWLD->GetLimit();
214 			break;
215 
216 		default:
217             fnForward = &TextSearch::NSrchFrwrd;
218             fnBackward = &TextSearch::NSrchBkwrd;
219 			break;
220     }
221 }
222 
223 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
224 {
225     sal_Int32 nRet = 0, nEnd = rOff.getLength();
226     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
227     return nRet;
228 }
229 
230 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
231         throw( RuntimeException )
232 {
233     sal_Int32 nDone;
234     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
235             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
236 }
237 
238 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
239         throw( RuntimeException )
240 {
241     SearchResult sres;
242 
243     OUString in_str(searchStr);
244     sal_Int32 newStartPos = startPos;
245     sal_Int32 newEndPos = endPos;
246 
247     bUsePrimarySrchStr = true;
248 
249     if ( xTranslit.is() )
250     {
251         // apply normal transliteration (1<->1, 1<->0)
252         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
253         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
254 
255         // JP 20.6.2001: also the start and end positions must be corrected!
256         if( startPos )
257             newStartPos = FindPosInSeq_Impl( offset, startPos );
258 
259         if( endPos < searchStr.getLength() )
260 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
261         else
262             newEndPos = in_str.getLength();
263 
264         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
265 
266         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
267         {
268             if (sres.startOffset[k])
269 	      sres.startOffset[k] = offset[sres.startOffset[k]];
270             // JP 20.6.2001: end is ever exclusive and then don't return
271             //               the position of the next character - return the
272             //               next position behind the last found character!
273             //               "a b c" find "b" must return 2,3 and not 2,4!!!
274             if (sres.endOffset[k])
275 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
276         }
277     }
278     else
279     {
280         sres = (this->*fnForward)( in_str, startPos, endPos );
281     }
282 
283     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
284     {
285         SearchResult sres2;
286 
287 	in_str = OUString(searchStr);
288         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
289 
290         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
291 
292         if( startPos )
293             startPos = FindPosInSeq_Impl( offset, startPos );
294 
295         if( endPos < searchStr.getLength() )
296             endPos = FindPosInSeq_Impl( offset, endPos );
297         else
298             endPos = in_str.getLength();
299 
300 	bUsePrimarySrchStr = false;
301         sres2 = (this->*fnForward)( in_str, startPos, endPos );
302 
303         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
304         {
305             if (sres2.startOffset[k])
306 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
307             if (sres2.endOffset[k])
308 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
309         }
310 
311 	// pick first and long one
312 	if ( sres.subRegExpressions == 0)
313 	    return sres2;
314 	if ( sres2.subRegExpressions == 1)
315 	{
316 	    if ( sres.startOffset[0] > sres2.startOffset[0])
317 	        return sres2;
318 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
319 	        sres.endOffset[0] < sres2.endOffset[0])
320 	        return sres2;
321 	}
322     }
323 
324     return sres;
325 }
326 
327 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
328         throw(RuntimeException)
329 {
330     SearchResult sres;
331 
332     OUString in_str(searchStr);
333     sal_Int32 newStartPos = startPos;
334     sal_Int32 newEndPos = endPos;
335 
336     bUsePrimarySrchStr = true;
337 
338     if ( xTranslit.is() )
339     {
340         // apply only simple 1<->1 transliteration here
341         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
342 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
343 
344         // JP 20.6.2001: also the start and end positions must be corrected!
345         if( startPos < searchStr.getLength() )
346             newStartPos = FindPosInSeq_Impl( offset, startPos );
347 	else
348 	    newStartPos = in_str.getLength();
349 
350         if( endPos )
351 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
352 
353         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
354 
355         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
356         {
357             if (sres.startOffset[k])
358 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
359             // JP 20.6.2001: end is ever exclusive and then don't return
360             //               the position of the next character - return the
361             //               next position behind the last found character!
362             //               "a b c" find "b" must return 2,3 and not 2,4!!!
363             if (sres.endOffset[k])
364 	      sres.endOffset[k] = offset[sres.endOffset[k]];
365         }
366     }
367     else
368     {
369         sres = (this->*fnBackward)( in_str, startPos, endPos );
370     }
371 
372     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
373     {
374 	SearchResult sres2;
375 
376 	in_str = OUString(searchStr);
377         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
378 
379         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
380 
381         if( startPos < searchStr.getLength() )
382             startPos = FindPosInSeq_Impl( offset, startPos );
383         else
384             startPos = in_str.getLength();
385 
386         if( endPos )
387             endPos = FindPosInSeq_Impl( offset, endPos );
388 
389 	bUsePrimarySrchStr = false;
390 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
391 
392         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
393         {
394             if (sres2.startOffset[k])
395                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
396             if (sres2.endOffset[k])
397                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
398         }
399 
400 	// pick last and long one
401 	if ( sres.subRegExpressions == 0 )
402 	    return sres2;
403 	if ( sres2.subRegExpressions == 1 )
404 	{
405 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
406 	        return sres2;
407 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
408 		sres.endOffset[0] > sres2.endOffset[0] )
409 	        return sres2;
410 	}
411     }
412 
413     return sres;
414 }
415 
416 //---------------------------------------------------------------------
417 
418 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
419 {
420     bool bRet = 1;
421     if( '\x7f' != rStr[nPos])
422     {
423         if ( !xCharClass.is() )
424         {
425             Reference < XInterface > xI = xMSF->createInstance(
426                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
427             if( xI.is() )
428                 xI->queryInterface( ::getCppuType(
429                             (const Reference< XCharacterClassification >*)0))
430                     >>= xCharClass;
431         }
432         if ( xCharClass.is() )
433         {
434             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
435                     aSrchPara.Locale );
436             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
437                             KCharacterType::LETTER ) & nCType ) )
438                 bRet = 0;
439         }
440     }
441     return bRet;
442 }
443 
444 // --------- helper methods for Boyer-Moore like text searching ----------
445 // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
446 
447 void TextSearch::MakeForwardTab()
448 {
449     // create the jumptable for the search text
450     if( pJumpTable )
451     {
452         if( bIsForwardTab )
453             return ;                                        // the jumpTable is ok
454         delete pJumpTable;
455     }
456     bIsForwardTab = true;
457 
458     sal_Int32 n, nLen = sSrchStr.getLength();
459     pJumpTable = new TextSearchJumpTable;
460 
461     for( n = 0; n < nLen - 1; ++n )
462     {
463         sal_Unicode cCh = sSrchStr[n];
464         sal_Int32 nDiff = nLen - n - 1;
465 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
466 
467         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
468             pJumpTable->insert( aEntry );
469         if ( !aPair.second )
470             (*(aPair.first)).second = nDiff;
471     }
472 }
473 
474 void TextSearch::MakeForwardTab2()
475 {
476     // create the jumptable for the search text
477     if( pJumpTable2 )
478     {
479         if( bIsForwardTab )
480             return ;                                        // the jumpTable is ok
481         delete pJumpTable2;
482     }
483     bIsForwardTab = true;
484 
485     sal_Int32 n, nLen = sSrchStr2.getLength();
486     pJumpTable2 = new TextSearchJumpTable;
487 
488     for( n = 0; n < nLen - 1; ++n )
489     {
490         sal_Unicode cCh = sSrchStr2[n];
491         sal_Int32 nDiff = nLen - n - 1;
492 
493 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
494         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
495             pJumpTable2->insert( aEntry );
496         if ( !aPair.second )
497             (*(aPair.first)).second = nDiff;
498     }
499 }
500 
501 void TextSearch::MakeBackwardTab()
502 {
503     // create the jumptable for the search text
504     if( pJumpTable )
505     {
506         if( !bIsForwardTab )
507             return ;                                        // the jumpTable is ok
508         delete pJumpTable;
509     }
510     bIsForwardTab = false;
511 
512     sal_Int32 n, nLen = sSrchStr.getLength();
513     pJumpTable = new TextSearchJumpTable;
514 
515     for( n = nLen-1; n > 0; --n )
516     {
517         sal_Unicode cCh = sSrchStr[n];
518         TextSearchJumpTable::value_type aEntry( cCh, n );
519         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
520             pJumpTable->insert( aEntry );
521         if ( !aPair.second )
522             (*(aPair.first)).second = n;
523     }
524 }
525 
526 void TextSearch::MakeBackwardTab2()
527 {
528     // create the jumptable for the search text
529     if( pJumpTable2 )
530     {
531         if( !bIsForwardTab )
532             return ;                                        // the jumpTable is ok
533         delete pJumpTable2;
534     }
535     bIsForwardTab = false;
536 
537     sal_Int32 n, nLen = sSrchStr2.getLength();
538     pJumpTable2 = new TextSearchJumpTable;
539 
540     for( n = nLen-1; n > 0; --n )
541     {
542         sal_Unicode cCh = sSrchStr2[n];
543         TextSearchJumpTable::value_type aEntry( cCh, n );
544         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
545             pJumpTable2->insert( aEntry );
546         if ( !aPair.second )
547             (*(aPair.first)).second = n;
548     }
549 }
550 
551 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
552 {
553     TextSearchJumpTable *pJump;
554     OUString sSearchKey;
555 
556     if ( bUsePrimarySrchStr ) {
557       pJump = pJumpTable;
558       sSearchKey = sSrchStr;
559     } else {
560       pJump = pJumpTable2;
561       sSearchKey = sSrchStr2;
562     }
563 
564     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
565     if ( iLook == pJump->end() )
566         return sSearchKey.getLength();
567     return (*iLook).second;
568 }
569 
570 
571 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
572 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
573         throw(RuntimeException)
574 {
575     SearchResult aRet;
576     aRet.subRegExpressions = 0;
577 
578     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
579 
580     OUString aStr( searchStr );
581     sal_Int32 nSuchIdx = aStr.getLength();
582     sal_Int32 nEnde = endPos;
583     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
584         return aRet;
585 
586 
587     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
588         return aRet;
589 
590     nEnde -= sSearchKey.getLength();
591 
592     if (bUsePrimarySrchStr)
593       MakeForwardTab();                   // create the jumptable
594     else
595       MakeForwardTab2();
596 
597     for (sal_Int32 nCmpIdx = startPos; // start position for the search
598             nCmpIdx <= nEnde;
599             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
600     {
601         // if the match would be the completed cells, skip it.
602         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
603                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
604             continue;
605 
606         nSuchIdx = sSearchKey.getLength() - 1;
607         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
608         {
609             if( nSuchIdx == 0 )
610             {
611                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
612                 {
613                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
614                     bool bAtStart = !nCmpIdx;
615                     bool bAtEnd = nFndEnd == endPos;
616                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
617                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
618                     //  *       1 -> only one word in the paragraph
619                     //  *       2 -> at begin of paragraph
620                     //  *       3 -> at end of paragraph
621                     //  *       4 -> inside the paragraph
622                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
623                                 ( bAtStart && bDelimBehind ) ||     // 2
624                                 ( bAtEnd && bDelimBefore ) ||       // 3
625                                 ( bDelimBefore && bDelimBehind )))  // 4
626                         break;
627                 }
628 
629                 aRet.subRegExpressions = 1;
630                 aRet.startOffset.realloc( 1 );
631                 aRet.startOffset[ 0 ] = nCmpIdx;
632                 aRet.endOffset.realloc( 1 );
633                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
634 
635                 return aRet;
636             }
637             else
638                 nSuchIdx--;
639         }
640     }
641     return aRet;
642 }
643 
644 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
645         throw(RuntimeException)
646 {
647     SearchResult aRet;
648     aRet.subRegExpressions = 0;
649 
650     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
651 
652     OUString aStr( searchStr );
653     sal_Int32 nSuchIdx = aStr.getLength();
654     sal_Int32 nEnde = endPos;
655     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
656         return aRet;
657 
658     if (bUsePrimarySrchStr)
659       MakeBackwardTab();                      // create the jumptable
660     else
661       MakeBackwardTab2();
662 
663     if( nEnde == nSuchIdx )                 // end position for the search
664         nEnde = sSearchKey.getLength();
665     else
666         nEnde += sSearchKey.getLength();
667 
668     sal_Int32 nCmpIdx = startPos;          // start position for the search
669 
670     while (nCmpIdx >= nEnde)
671     {
672         // if the match would be the completed cells, skip it.
673         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
674                         sSearchKey.getLength() )) && (!checkCTLEnd ||
675                     isCellStart( aStr, nCmpIdx)))
676         {
677             nSuchIdx = 0;
678             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
679                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
680                 nSuchIdx++;
681             if( nSuchIdx >= sSearchKey.getLength() )
682             {
683                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
684                 {
685                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
686                     bool bAtStart = !nFndStt;
687                     bool bAtEnd = nCmpIdx == startPos;
688                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
689                     bool bDelimBefore = bAtStart || // begin of paragraph
690                         IsDelimiter( aStr, nFndStt-1 );
691                     //  *       1 -> only one word in the paragraph
692                     //  *       2 -> at begin of paragraph
693                     //  *       3 -> at end of paragraph
694                     //  *       4 -> inside the paragraph
695                     if( ( bAtStart && bAtEnd ) ||           // 1
696                             ( bAtStart && bDelimBehind ) ||     // 2
697                             ( bAtEnd && bDelimBefore ) ||       // 3
698                             ( bDelimBefore && bDelimBehind ))   // 4
699                     {
700                         aRet.subRegExpressions = 1;
701                         aRet.startOffset.realloc( 1 );
702                         aRet.startOffset[ 0 ] = nCmpIdx;
703                         aRet.endOffset.realloc( 1 );
704                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
705                         return aRet;
706                     }
707                 }
708                 else
709                 {
710                     aRet.subRegExpressions = 1;
711                     aRet.startOffset.realloc( 1 );
712                     aRet.startOffset[ 0 ] = nCmpIdx;
713                     aRet.endOffset.realloc( 1 );
714                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
715                     return aRet;
716                 }
717             }
718         }
719         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
720         if( nCmpIdx < nSuchIdx )
721             return aRet;
722         nCmpIdx -= nSuchIdx;
723     }
724     return aRet;
725 }
726 
727 //---------------------------------------------------------------------------
728 
729 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
730                                       sal_Int32 startPos, sal_Int32 endPos )
731             throw(RuntimeException)
732 {
733 	SearchResult aRet;
734 	aRet.subRegExpressions = 0;
735 	if( !pRegexMatcher)
736 		return aRet;
737 
738 	if( endPos > searchStr.getLength())
739 		endPos = searchStr.getLength();
740 
741 	// use the ICU RegexMatcher to find the matches
742 	UErrorCode nIcuErr = U_ZERO_ERROR;
743 	const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos);
744 	pRegexMatcher->reset( aSearchTargetStr);
745 	if( !pRegexMatcher->find( startPos, nIcuErr))
746 		return aRet;
747 
748 	aRet.subRegExpressions = 1;
749 	aRet.startOffset.realloc( aRet.subRegExpressions);
750 	aRet.endOffset.realloc( aRet.subRegExpressions);
751 	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
752 	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
753 
754 	return aRet;
755 }
756 
757 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
758                                       sal_Int32 startPos, sal_Int32 endPos )
759             throw(RuntimeException)
760 {
761 	// NOTE: for backwards search callers provide startPos/endPos inverted!
762 	SearchResult aRet;
763 	aRet.subRegExpressions = 0;
764 	if( !pRegexMatcher)
765 		return aRet;
766 
767 	if( startPos > searchStr.getLength())
768 		startPos = searchStr.getLength();
769 
770 	// use the ICU RegexMatcher to find the matches
771 	// TODO: use ICU's backward searching once it becomes available
772 	UErrorCode nIcuErr = U_ZERO_ERROR;
773 	const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos);
774 	pRegexMatcher->reset( aSearchTargetStr);
775 	if( !pRegexMatcher->find( endPos, nIcuErr))
776 		return aRet;
777 
778 	aRet.subRegExpressions = 1;
779 	aRet.startOffset.realloc( aRet.subRegExpressions);
780 	aRet.endOffset.realloc( aRet.subRegExpressions);
781 
782 	do {
783 		// NOTE: backward search seems to be expected to have startOfs/endOfs inverted!
784 		aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
785 		aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
786 	} while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr));
787 
788 	return aRet;
789 }
790 
791 //---------------------------------------------------------------------------
792 
793 // search for words phonetically
794 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
795                                           sal_Int32 startPos, sal_Int32 endPos )
796             throw(RuntimeException)
797 {
798     SearchResult aRet;
799     aRet.subRegExpressions = 0;
800 
801     if( !xBreak.is() )
802         return aRet;
803 
804     OUString aWTemp( searchStr );
805 
806     register sal_Int32 nStt, nEnd;
807 
808     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
809             aSrchPara.Locale,
810             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
811 
812     do
813     {
814         if( aWBnd.startPos >= endPos )
815             break;
816         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
817         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
818 
819         if( nStt < nEnd &&
820                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
821         {
822             aRet.subRegExpressions = 1;
823             aRet.startOffset.realloc( 1 );
824             aRet.startOffset[ 0 ] = nStt;
825             aRet.endOffset.realloc( 1 );
826             aRet.endOffset[ 0 ] = nEnd;
827             break;
828         }
829 
830         nStt = nEnd - 1;
831         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
832                 WordType::ANYWORD_IGNOREWHITESPACES);
833     } while( aWBnd.startPos != aWBnd.endPos ||
834             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
835     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
836     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
837     // and nextWord() does also => don't loop forever.
838     return aRet;
839 }
840 
841 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
842                                           sal_Int32 startPos, sal_Int32 endPos )
843             throw(RuntimeException)
844 {
845     SearchResult aRet;
846     aRet.subRegExpressions = 0;
847 
848     if( !xBreak.is() )
849         return aRet;
850 
851     OUString aWTemp( searchStr );
852 
853     register sal_Int32 nStt, nEnd;
854 
855     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
856             aSrchPara.Locale,
857             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
858 
859     do
860     {
861         if( aWBnd.endPos <= endPos )
862             break;
863         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
864         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
865 
866         if( nStt < nEnd &&
867                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
868         {
869             aRet.subRegExpressions = 1;
870             aRet.startOffset.realloc( 1 );
871             aRet.startOffset[ 0 ] = nEnd;
872             aRet.endOffset.realloc( 1 );
873             aRet.endOffset[ 0 ] = nStt;
874             break;
875         }
876         if( !nStt )
877             break;
878 
879         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
880                 WordType::ANYWORD_IGNOREWHITESPACES);
881     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
882     return aRet;
883 }
884 
885 
886 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
887 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
888 
889 static OUString getServiceName_Static()
890 {
891     return OUString::createFromAscii( cSearchName );
892 }
893 
894 static OUString getImplementationName_Static()
895 {
896     return OUString::createFromAscii( cSearchImpl );
897 }
898 
899 OUString SAL_CALL
900 TextSearch::getImplementationName()
901                 throw( RuntimeException )
902 {
903     return getImplementationName_Static();
904 }
905 
906 sal_Bool SAL_CALL
907 TextSearch::supportsService(const OUString& rServiceName)
908                 throw( RuntimeException )
909 {
910     return !rServiceName.compareToAscii( cSearchName );
911 }
912 
913 Sequence< OUString > SAL_CALL
914 TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
915 {
916     Sequence< OUString > aRet(1);
917     aRet[0] = getServiceName_Static();
918     return aRet;
919 }
920 
921 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
922 SAL_CALL TextSearch_CreateInstance(
923         const ::com::sun::star::uno::Reference<
924         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
925 {
926     return ::com::sun::star::uno::Reference<
927         ::com::sun::star::uno::XInterface >(
928                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
929 }
930 
931 extern "C"
932 {
933 
934 void SAL_CALL component_getImplementationEnvironment(
935         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
936 {
937     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
938 }
939 
940 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
941         void* _pServiceManager, void* /*_pRegistryKey*/ )
942 {
943     void* pRet = NULL;
944 
945     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
946         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
947             ( _pServiceManager );
948     ::com::sun::star::uno::Reference<
949             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
950 
951     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
952     {
953         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
954         aServiceNames[0] = getServiceName_Static();
955         xFactory = ::cppu::createSingleFactory(
956                 pServiceManager, getImplementationName_Static(),
957                 &TextSearch_CreateInstance, aServiceNames );
958     }
959 
960     if ( xFactory.is() )
961     {
962         xFactory->acquire();
963         pRet = xFactory.get();
964     }
965 
966     return pRet;
967 }
968 
969 } // extern "C"
970