1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 
27 #include "textsearch.hxx"
28 #include "levdis.hxx"
29 #include <com/sun/star/lang/Locale.hpp>
30 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
31 #include <comphelper/processfactory.hxx>
32 #include <com/sun/star/i18n/UnicodeType.hpp>
33 #include <com/sun/star/util/SearchFlags.hpp>
34 #include <com/sun/star/i18n/WordType.hpp>
35 #include <com/sun/star/i18n/ScriptType.hpp>
36 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
37 #include <com/sun/star/i18n/KCharacterType.hpp>
38 #include <com/sun/star/registry/XRegistryKey.hpp>
39 #include <cppuhelper/factory.hxx>
40 #include <cppuhelper/weak.hxx>
41 
42 #ifdef _MSC_VER
43 // get rid of that dumb compiler warning
44 // identifier was truncated to '255' characters in the debug information
45 // for STL template usage, if .pdb files are to be created
46 #pragma warning( disable: 4786 )
47 #endif
48 
49 #include <string.h>
50 
51 using namespace ::com::sun::star::util;
52 using namespace ::com::sun::star::uno;
53 using namespace ::com::sun::star::lang;
54 using namespace ::com::sun::star::i18n;
55 using namespace ::rtl;
56 
57 static sal_Int32 COMPLEX_TRANS_MASK_TMP =
58     TransliterationModules_ignoreBaFa_ja_JP |
59     TransliterationModules_ignoreIterationMark_ja_JP |
60     TransliterationModules_ignoreTiJi_ja_JP |
61     TransliterationModules_ignoreHyuByu_ja_JP |
62     TransliterationModules_ignoreSeZe_ja_JP |
63     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
64     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
65     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
66 static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH;
67 static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
68     // Above 2 transliteration is simple but need to take effect in
69     // complex transliteration
70 
71 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
72         : xMSF( rxMSF )
73         , pJumpTable( 0 )
74         , pJumpTable2( 0 )
75         , pRegexMatcher( NULL )
76         , pWLD( 0 )
77 {
78     SearchOptions aOpt;
79     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
80     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
81     //aOpt.Locale = ???;
82     setOptions( aOpt );
83 }
84 
85 TextSearch::~TextSearch()
86 {
87     delete pRegexMatcher;
88     delete pWLD;
89     delete pJumpTable;
90     delete pJumpTable2;
91 }
92 
93 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
94 {
95     aSrchPara = rOptions;
96 
97     delete pRegexMatcher, pRegexMatcher = NULL;
98     delete pWLD, pWLD = 0;
99     delete pJumpTable, pJumpTable = 0;
100     delete pJumpTable2, pJumpTable2 = 0;
101 
102     // Create Transliteration class
103     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
104     {
105         if( !xTranslit.is() )
106         {
107             Reference < XInterface > xI = xMSF->createInstance(
108                     OUString::createFromAscii(
109                         "com.sun.star.i18n.Transliteration"));
110             if ( xI.is() )
111                 xI->queryInterface( ::getCppuType(
112                             (const Reference< XExtendedTransliteration >*)0))
113                     >>= xTranslit;
114         }
115         // Load transliteration module
116         if( xTranslit.is() )
117             xTranslit->loadModule(
118                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
119                     aSrchPara.Locale);
120     }
121     else if( xTranslit.is() )
122         xTranslit = 0;
123 
124     // Create Transliteration for 2<->1, 2<->2 transliteration
125     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
126     {
127         if( !xTranslit2.is() )
128         {
129             Reference < XInterface > xI = xMSF->createInstance(
130                     OUString::createFromAscii(
131                         "com.sun.star.i18n.Transliteration"));
132             if ( xI.is() )
133                 xI->queryInterface( ::getCppuType(
134                             (const Reference< XExtendedTransliteration >*)0))
135                     >>= xTranslit2;
136         }
137         // Load transliteration module
138         if( xTranslit2.is() )
139             xTranslit2->loadModule(
140                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
141                     aSrchPara.Locale);
142     }
143 
144     if ( !xBreak.is() )
145     {
146         Reference < XInterface > xI = xMSF->createInstance(
147                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
148         if( xI.is() )
149             xI->queryInterface( ::getCppuType(
150                         (const Reference< XBreakIterator >*)0))
151                 >>= xBreak;
152     }
153 
154     sSrchStr = aSrchPara.searchString;
155 
156     // use transliteration here
157     if ( xTranslit.is() &&
158 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
159         sSrchStr = xTranslit->transliterateString2String(
160                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
161 
162     if ( xTranslit2.is() &&
163 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
164 	sSrchStr2 = xTranslit2->transliterateString2String(
165 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
166 
167     // When start or end of search string is a complex script type, we need to
168     // make sure the result boundary is not located in the middle of cell.
169     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
170                 ScriptType::COMPLEX));
171     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
172                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
173 
174     switch( aSrchPara.algorithmType)
175     {
176 		case SearchAlgorithms_REGEXP:
177 			fnForward = &TextSearch::RESrchFrwrd;
178 			fnBackward = &TextSearch::RESrchBkwrd;
179 
180 			{
181 			sal_uInt32 nIcuSearchFlags = 0;
182 			// map com::sun::star::util::SearchFlags to ICU uregex.h flags
183 			// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
184 			// REG_NEWLINE is neither defined properly nor used anywhere => not implemented
185 			// REG_NOSUB is not used anywhere => not implemented
186 			// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
187 			// LEV_RELAXED is only used for SearchAlgorithm==Approximate
188 			// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
189 			if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
190 				nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
191 			UErrorCode nIcuErr = U_ZERO_ERROR;
192 			// assumption: transliteration doesn't mangle regexp control chars
193 			OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
194 					: ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
195 			const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength());
196 			pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
197 			if( nIcuErr)
198 				{ delete pRegexMatcher; pRegexMatcher = NULL;}
199 			} break;
200 
201 		case SearchAlgorithms_APPROXIMATE:
202             fnForward = &TextSearch::ApproxSrchFrwrd;
203             fnBackward = &TextSearch::ApproxSrchBkwrd;
204 
205             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
206                     aSrchPara.insertedChars, aSrchPara.deletedChars,
207                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
208 
209             nLimit = pWLD->GetLimit();
210 			break;
211 
212 		default:
213             fnForward = &TextSearch::NSrchFrwrd;
214             fnBackward = &TextSearch::NSrchBkwrd;
215 			break;
216     }
217 }
218 
219 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
220 {
221     sal_Int32 nRet = 0, nEnd = rOff.getLength();
222     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
223     return nRet;
224 }
225 
226 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
227         throw( RuntimeException )
228 {
229     sal_Int32 nDone;
230     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
231             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
232 }
233 
234 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
235         throw( RuntimeException )
236 {
237     SearchResult sres;
238 
239     OUString in_str(searchStr);
240     sal_Int32 newStartPos = startPos;
241     sal_Int32 newEndPos = endPos;
242 
243     bUsePrimarySrchStr = true;
244 
245     if ( xTranslit.is() )
246     {
247         // apply normal transliteration (1<->1, 1<->0)
248         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
249         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
250 
251         // JP 20.6.2001: also the start and end positions must be corrected!
252         if( startPos )
253             newStartPos = FindPosInSeq_Impl( offset, startPos );
254 
255         if( endPos < searchStr.getLength() )
256 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
257         else
258             newEndPos = in_str.getLength();
259 
260         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
261 
262         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
263         {
264             if (sres.startOffset[k])
265 	      sres.startOffset[k] = offset[sres.startOffset[k]];
266             // JP 20.6.2001: end is ever exclusive and then don't return
267             //               the position of the next character - return the
268             //               next position behind the last found character!
269             //               "a b c" find "b" must return 2,3 and not 2,4!!!
270             if (sres.endOffset[k])
271 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
272         }
273     }
274     else
275     {
276         sres = (this->*fnForward)( in_str, startPos, endPos );
277     }
278 
279     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
280     {
281         SearchResult sres2;
282 
283 	in_str = OUString(searchStr);
284         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
285 
286         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
287 
288         if( startPos )
289             startPos = FindPosInSeq_Impl( offset, startPos );
290 
291         if( endPos < searchStr.getLength() )
292             endPos = FindPosInSeq_Impl( offset, endPos );
293         else
294             endPos = in_str.getLength();
295 
296 	bUsePrimarySrchStr = false;
297         sres2 = (this->*fnForward)( in_str, startPos, endPos );
298 
299         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
300         {
301             if (sres2.startOffset[k])
302 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
303             if (sres2.endOffset[k])
304 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
305         }
306 
307 	// pick first and long one
308 	if ( sres.subRegExpressions == 0)
309 	    return sres2;
310 	if ( sres2.subRegExpressions == 1)
311 	{
312 	    if ( sres.startOffset[0] > sres2.startOffset[0])
313 	        return sres2;
314 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
315 	        sres.endOffset[0] < sres2.endOffset[0])
316 	        return sres2;
317 	}
318     }
319 
320     return sres;
321 }
322 
323 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
324         throw(RuntimeException)
325 {
326     SearchResult sres;
327 
328     OUString in_str(searchStr);
329     sal_Int32 newStartPos = startPos;
330     sal_Int32 newEndPos = endPos;
331 
332     bUsePrimarySrchStr = true;
333 
334     if ( xTranslit.is() )
335     {
336         // apply only simple 1<->1 transliteration here
337         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
338 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
339 
340         // JP 20.6.2001: also the start and end positions must be corrected!
341         if( startPos < searchStr.getLength() )
342             newStartPos = FindPosInSeq_Impl( offset, startPos );
343 	else
344 	    newStartPos = in_str.getLength();
345 
346         if( endPos )
347 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
348 
349         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
350 
351         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
352         {
353             if (sres.startOffset[k])
354 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
355             // JP 20.6.2001: end is ever exclusive and then don't return
356             //               the position of the next character - return the
357             //               next position behind the last found character!
358             //               "a b c" find "b" must return 2,3 and not 2,4!!!
359             if (sres.endOffset[k])
360 	      sres.endOffset[k] = offset[sres.endOffset[k]];
361         }
362     }
363     else
364     {
365         sres = (this->*fnBackward)( in_str, startPos, endPos );
366     }
367 
368     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
369     {
370 	SearchResult sres2;
371 
372 	in_str = OUString(searchStr);
373         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
374 
375         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
376 
377         if( startPos < searchStr.getLength() )
378             startPos = FindPosInSeq_Impl( offset, startPos );
379         else
380             startPos = in_str.getLength();
381 
382         if( endPos )
383             endPos = FindPosInSeq_Impl( offset, endPos );
384 
385 	bUsePrimarySrchStr = false;
386 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
387 
388         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
389         {
390             if (sres2.startOffset[k])
391                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
392             if (sres2.endOffset[k])
393                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
394         }
395 
396 	// pick last and long one
397 	if ( sres.subRegExpressions == 0 )
398 	    return sres2;
399 	if ( sres2.subRegExpressions == 1 )
400 	{
401 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
402 	        return sres2;
403 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
404 		sres.endOffset[0] > sres2.endOffset[0] )
405 	        return sres2;
406 	}
407     }
408 
409     return sres;
410 }
411 
412 //---------------------------------------------------------------------
413 
414 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
415 {
416     bool bRet = 1;
417     if( '\x7f' != rStr[nPos])
418     {
419         if ( !xCharClass.is() )
420         {
421             Reference < XInterface > xI = xMSF->createInstance(
422                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
423             if( xI.is() )
424                 xI->queryInterface( ::getCppuType(
425                             (const Reference< XCharacterClassification >*)0))
426                     >>= xCharClass;
427         }
428         if ( xCharClass.is() )
429         {
430             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
431                     aSrchPara.Locale );
432             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
433                             KCharacterType::LETTER ) & nCType ) )
434                 bRet = 0;
435         }
436     }
437     return bRet;
438 }
439 
440 // --------- helper methods for Boyer-Moore like text searching ----------
441 // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
442 
443 void TextSearch::MakeForwardTab()
444 {
445     // create the jumptable for the search text
446     if( pJumpTable )
447     {
448         if( bIsForwardTab )
449             return ;                                        // the jumpTable is ok
450         delete pJumpTable;
451     }
452     bIsForwardTab = true;
453 
454     sal_Int32 n, nLen = sSrchStr.getLength();
455     pJumpTable = new TextSearchJumpTable;
456 
457     for( n = 0; n < nLen - 1; ++n )
458     {
459         sal_Unicode cCh = sSrchStr[n];
460         sal_Int32 nDiff = nLen - n - 1;
461 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
462 
463         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
464             pJumpTable->insert( aEntry );
465         if ( !aPair.second )
466             (*(aPair.first)).second = nDiff;
467     }
468 }
469 
470 void TextSearch::MakeForwardTab2()
471 {
472     // create the jumptable for the search text
473     if( pJumpTable2 )
474     {
475         if( bIsForwardTab )
476             return ;                                        // the jumpTable is ok
477         delete pJumpTable2;
478     }
479     bIsForwardTab = true;
480 
481     sal_Int32 n, nLen = sSrchStr2.getLength();
482     pJumpTable2 = new TextSearchJumpTable;
483 
484     for( n = 0; n < nLen - 1; ++n )
485     {
486         sal_Unicode cCh = sSrchStr2[n];
487         sal_Int32 nDiff = nLen - n - 1;
488 
489 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
490         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
491             pJumpTable2->insert( aEntry );
492         if ( !aPair.second )
493             (*(aPair.first)).second = nDiff;
494     }
495 }
496 
497 void TextSearch::MakeBackwardTab()
498 {
499     // create the jumptable for the search text
500     if( pJumpTable )
501     {
502         if( !bIsForwardTab )
503             return ;                                        // the jumpTable is ok
504         delete pJumpTable;
505     }
506     bIsForwardTab = false;
507 
508     sal_Int32 n, nLen = sSrchStr.getLength();
509     pJumpTable = new TextSearchJumpTable;
510 
511     for( n = nLen-1; n > 0; --n )
512     {
513         sal_Unicode cCh = sSrchStr[n];
514         TextSearchJumpTable::value_type aEntry( cCh, n );
515         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
516             pJumpTable->insert( aEntry );
517         if ( !aPair.second )
518             (*(aPair.first)).second = n;
519     }
520 }
521 
522 void TextSearch::MakeBackwardTab2()
523 {
524     // create the jumptable for the search text
525     if( pJumpTable2 )
526     {
527         if( !bIsForwardTab )
528             return ;                                        // the jumpTable is ok
529         delete pJumpTable2;
530     }
531     bIsForwardTab = false;
532 
533     sal_Int32 n, nLen = sSrchStr2.getLength();
534     pJumpTable2 = new TextSearchJumpTable;
535 
536     for( n = nLen-1; n > 0; --n )
537     {
538         sal_Unicode cCh = sSrchStr2[n];
539         TextSearchJumpTable::value_type aEntry( cCh, n );
540         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
541             pJumpTable2->insert( aEntry );
542         if ( !aPair.second )
543             (*(aPair.first)).second = n;
544     }
545 }
546 
547 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
548 {
549     TextSearchJumpTable *pJump;
550     OUString sSearchKey;
551 
552     if ( bUsePrimarySrchStr ) {
553       pJump = pJumpTable;
554       sSearchKey = sSrchStr;
555     } else {
556       pJump = pJumpTable2;
557       sSearchKey = sSrchStr2;
558     }
559 
560     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
561     if ( iLook == pJump->end() )
562         return sSearchKey.getLength();
563     return (*iLook).second;
564 }
565 
566 
567 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
568 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
569         throw(RuntimeException)
570 {
571     SearchResult aRet;
572     aRet.subRegExpressions = 0;
573 
574     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
575 
576     OUString aStr( searchStr );
577     sal_Int32 nSuchIdx = aStr.getLength();
578     sal_Int32 nEnde = endPos;
579     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
580         return aRet;
581 
582 
583     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
584         return aRet;
585 
586     nEnde -= sSearchKey.getLength();
587 
588     if (bUsePrimarySrchStr)
589       MakeForwardTab();                   // create the jumptable
590     else
591       MakeForwardTab2();
592 
593     for (sal_Int32 nCmpIdx = startPos; // start position for the search
594             nCmpIdx <= nEnde;
595             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
596     {
597         // if the match would be the completed cells, skip it.
598         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
599                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
600             continue;
601 
602         nSuchIdx = sSearchKey.getLength() - 1;
603         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
604         {
605             if( nSuchIdx == 0 )
606             {
607                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
608                 {
609                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
610                     bool bAtStart = !nCmpIdx;
611                     bool bAtEnd = nFndEnd == endPos;
612                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
613                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
614                     //  *       1 -> only one word in the paragraph
615                     //  *       2 -> at begin of paragraph
616                     //  *       3 -> at end of paragraph
617                     //  *       4 -> inside the paragraph
618                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
619                                 ( bAtStart && bDelimBehind ) ||     // 2
620                                 ( bAtEnd && bDelimBefore ) ||       // 3
621                                 ( bDelimBefore && bDelimBehind )))  // 4
622                         break;
623                 }
624 
625                 aRet.subRegExpressions = 1;
626                 aRet.startOffset.realloc( 1 );
627                 aRet.startOffset[ 0 ] = nCmpIdx;
628                 aRet.endOffset.realloc( 1 );
629                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
630 
631                 return aRet;
632             }
633             else
634                 nSuchIdx--;
635         }
636     }
637     return aRet;
638 }
639 
640 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
641         throw(RuntimeException)
642 {
643     SearchResult aRet;
644     aRet.subRegExpressions = 0;
645 
646     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
647 
648     OUString aStr( searchStr );
649     sal_Int32 nSuchIdx = aStr.getLength();
650     sal_Int32 nEnde = endPos;
651     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
652         return aRet;
653 
654     if (bUsePrimarySrchStr)
655       MakeBackwardTab();                      // create the jumptable
656     else
657       MakeBackwardTab2();
658 
659     if( nEnde == nSuchIdx )                 // end position for the search
660         nEnde = sSearchKey.getLength();
661     else
662         nEnde += sSearchKey.getLength();
663 
664     sal_Int32 nCmpIdx = startPos;          // start position for the search
665 
666     while (nCmpIdx >= nEnde)
667     {
668         // if the match would be the completed cells, skip it.
669         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
670                         sSearchKey.getLength() )) && (!checkCTLEnd ||
671                     isCellStart( aStr, nCmpIdx)))
672         {
673             nSuchIdx = 0;
674             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
675                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
676                 nSuchIdx++;
677             if( nSuchIdx >= sSearchKey.getLength() )
678             {
679                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
680                 {
681                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
682                     bool bAtStart = !nFndStt;
683                     bool bAtEnd = nCmpIdx == startPos;
684                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
685                     bool bDelimBefore = bAtStart || // begin of paragraph
686                         IsDelimiter( aStr, nFndStt-1 );
687                     //  *       1 -> only one word in the paragraph
688                     //  *       2 -> at begin of paragraph
689                     //  *       3 -> at end of paragraph
690                     //  *       4 -> inside the paragraph
691                     if( ( bAtStart && bAtEnd ) ||           // 1
692                             ( bAtStart && bDelimBehind ) ||     // 2
693                             ( bAtEnd && bDelimBefore ) ||       // 3
694                             ( bDelimBefore && bDelimBehind ))   // 4
695                     {
696                         aRet.subRegExpressions = 1;
697                         aRet.startOffset.realloc( 1 );
698                         aRet.startOffset[ 0 ] = nCmpIdx;
699                         aRet.endOffset.realloc( 1 );
700                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
701                         return aRet;
702                     }
703                 }
704                 else
705                 {
706                     aRet.subRegExpressions = 1;
707                     aRet.startOffset.realloc( 1 );
708                     aRet.startOffset[ 0 ] = nCmpIdx;
709                     aRet.endOffset.realloc( 1 );
710                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
711                     return aRet;
712                 }
713             }
714         }
715         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
716         if( nCmpIdx < nSuchIdx )
717             return aRet;
718         nCmpIdx -= nSuchIdx;
719     }
720     return aRet;
721 }
722 
723 //---------------------------------------------------------------------------
724 
725 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
726                                       sal_Int32 startPos, sal_Int32 endPos )
727             throw(RuntimeException)
728 {
729 	SearchResult aRet;
730 	aRet.subRegExpressions = 0;
731 	if( !pRegexMatcher)
732 		return aRet;
733 
734 	if( endPos > searchStr.getLength())
735 		endPos = searchStr.getLength();
736 
737 	// use the ICU RegexMatcher to find the matches
738 	UErrorCode nIcuErr = U_ZERO_ERROR;
739 	const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos);
740 	pRegexMatcher->reset( aSearchTargetStr);
741 	if( !pRegexMatcher->find( startPos, nIcuErr))
742 		return aRet;
743 
744 	const int nGroupCount = pRegexMatcher->groupCount();
745 	aRet.subRegExpressions = nGroupCount + 1;
746 	aRet.startOffset.realloc( aRet.subRegExpressions);
747 	aRet.endOffset.realloc( aRet.subRegExpressions);
748 	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
749 	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
750 	for( int i = 1; i <= nGroupCount; ++i) {
751 		aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
752 		aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
753 	}
754 
755 	return aRet;
756 }
757 
758 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
759                                       sal_Int32 startPos, sal_Int32 endPos )
760             throw(RuntimeException)
761 {
762 	// NOTE: for backwards search callers provide startPos/endPos inverted!
763 	SearchResult aRet;
764 	aRet.subRegExpressions = 0;
765 	if( !pRegexMatcher)
766 		return aRet;
767 
768 	if( startPos > searchStr.getLength())
769 		startPos = searchStr.getLength();
770 
771 	// use the ICU RegexMatcher to find the matches
772 	// TODO: use ICU's backward searching once it becomes available
773 	//       as its replacement using forward search is not as good as the real thing
774 	UErrorCode nIcuErr = U_ZERO_ERROR;
775 	const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos);
776 	pRegexMatcher->reset( aSearchTargetStr);
777 	if( !pRegexMatcher->find( endPos, nIcuErr))
778 		return aRet;
779 
780 	// find the last match
781 	int nLastPos = 0;
782 	do {
783 		nLastPos = pRegexMatcher->start( nIcuErr);
784 	} while( pRegexMatcher->find( nLastPos + 1, nIcuErr));
785 
786 	// find last match again to get its details
787 	pRegexMatcher->find( nLastPos, nIcuErr);
788 
789 	// fill in the details of the last match
790 	const int nGroupCount = pRegexMatcher->groupCount();
791 	aRet.subRegExpressions = nGroupCount + 1;
792 	aRet.startOffset.realloc( aRet.subRegExpressions);
793 	aRet.endOffset.realloc( aRet.subRegExpressions);
794 	// NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
795 	aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
796 	aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
797 	for( int i = 1; i <= nGroupCount; ++i) {
798 		aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
799 		aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
800 	}
801 
802 	return aRet;
803 }
804 
805 //---------------------------------------------------------------------------
806 
807 // search for words phonetically
808 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
809                                           sal_Int32 startPos, sal_Int32 endPos )
810             throw(RuntimeException)
811 {
812     SearchResult aRet;
813     aRet.subRegExpressions = 0;
814 
815     if( !xBreak.is() )
816         return aRet;
817 
818     OUString aWTemp( searchStr );
819 
820     register sal_Int32 nStt, nEnd;
821 
822     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
823             aSrchPara.Locale,
824             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
825 
826     do
827     {
828         if( aWBnd.startPos >= endPos )
829             break;
830         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
831         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
832 
833         if( nStt < nEnd &&
834                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
835         {
836             aRet.subRegExpressions = 1;
837             aRet.startOffset.realloc( 1 );
838             aRet.startOffset[ 0 ] = nStt;
839             aRet.endOffset.realloc( 1 );
840             aRet.endOffset[ 0 ] = nEnd;
841             break;
842         }
843 
844         nStt = nEnd - 1;
845         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
846                 WordType::ANYWORD_IGNOREWHITESPACES);
847     } while( aWBnd.startPos != aWBnd.endPos ||
848             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
849     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
850     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
851     // and nextWord() does also => don't loop forever.
852     return aRet;
853 }
854 
855 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
856                                           sal_Int32 startPos, sal_Int32 endPos )
857             throw(RuntimeException)
858 {
859     SearchResult aRet;
860     aRet.subRegExpressions = 0;
861 
862     if( !xBreak.is() )
863         return aRet;
864 
865     OUString aWTemp( searchStr );
866 
867     register sal_Int32 nStt, nEnd;
868 
869     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
870             aSrchPara.Locale,
871             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
872 
873     do
874     {
875         if( aWBnd.endPos <= endPos )
876             break;
877         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
878         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
879 
880         if( nStt < nEnd &&
881                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
882         {
883             aRet.subRegExpressions = 1;
884             aRet.startOffset.realloc( 1 );
885             aRet.startOffset[ 0 ] = nEnd;
886             aRet.endOffset.realloc( 1 );
887             aRet.endOffset[ 0 ] = nStt;
888             break;
889         }
890         if( !nStt )
891             break;
892 
893         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
894                 WordType::ANYWORD_IGNOREWHITESPACES);
895     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
896     return aRet;
897 }
898 
899 
900 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
901 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
902 
903 static OUString getServiceName_Static()
904 {
905     return OUString::createFromAscii( cSearchName );
906 }
907 
908 static OUString getImplementationName_Static()
909 {
910     return OUString::createFromAscii( cSearchImpl );
911 }
912 
913 OUString SAL_CALL
914 TextSearch::getImplementationName()
915                 throw( RuntimeException )
916 {
917     return getImplementationName_Static();
918 }
919 
920 sal_Bool SAL_CALL
921 TextSearch::supportsService(const OUString& rServiceName)
922                 throw( RuntimeException )
923 {
924     return !rServiceName.compareToAscii( cSearchName );
925 }
926 
927 Sequence< OUString > SAL_CALL
928 TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
929 {
930     Sequence< OUString > aRet(1);
931     aRet[0] = getServiceName_Static();
932     return aRet;
933 }
934 
935 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
936 SAL_CALL TextSearch_CreateInstance(
937         const ::com::sun::star::uno::Reference<
938         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
939 {
940     return ::com::sun::star::uno::Reference<
941         ::com::sun::star::uno::XInterface >(
942                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
943 }
944 
945 extern "C"
946 {
947 
948 void SAL_CALL component_getImplementationEnvironment(
949         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
950 {
951     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
952 }
953 
954 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
955         void* _pServiceManager, void* /*_pRegistryKey*/ )
956 {
957     void* pRet = NULL;
958 
959     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
960         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
961             ( _pServiceManager );
962     ::com::sun::star::uno::Reference<
963             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
964 
965     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
966     {
967         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
968         aServiceNames[0] = getServiceName_Static();
969         xFactory = ::cppu::createSingleFactory(
970                 pServiceManager, getImplementationName_Static(),
971                 &TextSearch_CreateInstance, aServiceNames );
972     }
973 
974     if ( xFactory.is() )
975     {
976         xFactory->acquire();
977         pRet = xFactory.get();
978     }
979 
980     return pRet;
981 }
982 
983 } // extern "C"
984