1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include "textsearch.hxx"
32 #include "levdis.hxx"
33 #include <regexp/reclass.hxx>
34 #include <com/sun/star/lang/Locale.hpp>
35 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
36 #include <comphelper/processfactory.hxx>
37 #include <com/sun/star/i18n/UnicodeType.hpp>
38 #include <com/sun/star/util/SearchFlags.hpp>
39 #include <com/sun/star/i18n/WordType.hpp>
40 #include <com/sun/star/i18n/ScriptType.hpp>
41 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
42 #include <com/sun/star/i18n/KCharacterType.hpp>
43 #include <com/sun/star/registry/XRegistryKey.hpp>
44 #include <cppuhelper/factory.hxx>
45 #include <cppuhelper/weak.hxx>
46 
47 #ifdef _MSC_VER
48 // get rid of that dumb compiler warning
49 // identifier was truncated to '255' characters in the debug information
50 // for STL template usage, if .pdb files are to be created
51 #pragma warning( disable: 4786 )
52 #endif
53 
54 #include <string.h>
55 
56 using namespace ::com::sun::star::util;
57 using namespace ::com::sun::star::uno;
58 using namespace ::com::sun::star::lang;
59 using namespace ::com::sun::star::i18n;
60 using namespace ::rtl;
61 
62 static sal_Int32 COMPLEX_TRANS_MASK_TMP =
63     TransliterationModules_ignoreBaFa_ja_JP |
64     TransliterationModules_ignoreIterationMark_ja_JP |
65     TransliterationModules_ignoreTiJi_ja_JP |
66     TransliterationModules_ignoreHyuByu_ja_JP |
67     TransliterationModules_ignoreSeZe_ja_JP |
68     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
69     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
70     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
71 static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
72 static const sal_Int32 COMPLEX_TRANS_MASK =
73     COMPLEX_TRANS_MASK_TMP |
74     TransliterationModules_IGNORE_KANA |
75     TransliterationModules_IGNORE_WIDTH;
76     // Above 2 transliteration is simple but need to take effect in
77     // complex transliteration
78 
79 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
80         : xMSF( rxMSF )
81         , pJumpTable( 0 )
82         , pJumpTable2( 0 )
83         , pRegExp( 0 )
84         , pWLD( 0 )
85 {
86     SearchOptions aOpt;
87     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
88     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
89     //aOpt.Locale = ???;
90     setOptions( aOpt );
91 }
92 
93 TextSearch::~TextSearch()
94 {
95     delete pRegExp;
96     delete pWLD;
97     delete pJumpTable;
98     delete pJumpTable2;
99 }
100 
101 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
102 {
103     aSrchPara = rOptions;
104 
105     delete pRegExp, pRegExp = 0;
106     delete pWLD, pWLD = 0;
107     delete pJumpTable, pJumpTable = 0;
108     delete pJumpTable2, pJumpTable2 = 0;
109 
110     // Create Transliteration class
111     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
112     {
113         if( !xTranslit.is() )
114         {
115             Reference < XInterface > xI = xMSF->createInstance(
116                     OUString::createFromAscii(
117                         "com.sun.star.i18n.Transliteration"));
118             if ( xI.is() )
119                 xI->queryInterface( ::getCppuType(
120                             (const Reference< XExtendedTransliteration >*)0))
121                     >>= xTranslit;
122         }
123         // Load transliteration module
124         if( xTranslit.is() )
125             xTranslit->loadModule(
126                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
127                     aSrchPara.Locale);
128     }
129     else if( xTranslit.is() )
130         xTranslit = 0;
131 
132     // Create Transliteration for 2<->1, 2<->2 transliteration
133     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
134     {
135         if( !xTranslit2.is() )
136         {
137             Reference < XInterface > xI = xMSF->createInstance(
138                     OUString::createFromAscii(
139                         "com.sun.star.i18n.Transliteration"));
140             if ( xI.is() )
141                 xI->queryInterface( ::getCppuType(
142                             (const Reference< XExtendedTransliteration >*)0))
143                     >>= xTranslit2;
144         }
145         // Load transliteration module
146         if( xTranslit2.is() )
147             xTranslit2->loadModule(
148                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
149                     aSrchPara.Locale);
150     }
151 
152     if ( !xBreak.is() )
153     {
154         Reference < XInterface > xI = xMSF->createInstance(
155                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
156         if( xI.is() )
157             xI->queryInterface( ::getCppuType(
158                         (const Reference< XBreakIterator >*)0))
159                 >>= xBreak;
160     }
161 
162     sSrchStr = aSrchPara.searchString;
163 
164     // use transliteration here, but only if not RegEx, which does it different
165     if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
166 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
167         sSrchStr = xTranslit->transliterateString2String(
168                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
169 
170     if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
171 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
172 	sSrchStr2 = xTranslit2->transliterateString2String(
173 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
174 
175     // When start or end of search string is a complex script type, we need to
176     // make sure the result boundary is not located in the middle of cell.
177     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
178                 ScriptType::COMPLEX));
179     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
180                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
181 
182     if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
183     {
184         fnForward = &TextSearch::RESrchFrwrd;
185         fnBackward = &TextSearch::RESrchBkwrd;
186 
187         pRegExp = new Regexpr( aSrchPara, xTranslit );
188     }
189     else
190     {
191         if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
192         {
193             fnForward = &TextSearch::ApproxSrchFrwrd;
194             fnBackward = &TextSearch::ApproxSrchBkwrd;
195 
196             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
197                     aSrchPara.insertedChars, aSrchPara.deletedChars,
198                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
199 
200             nLimit = pWLD->GetLimit();
201         }
202         else
203         {
204             fnForward = &TextSearch::NSrchFrwrd;
205             fnBackward = &TextSearch::NSrchBkwrd;
206         }
207     }
208 }
209 
210 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
211 {
212     sal_Int32 nRet = 0, nEnd = rOff.getLength();
213     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
214     return nRet;
215 }
216 
217 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
218         throw( RuntimeException )
219 {
220     sal_Int32 nDone;
221     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
222             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
223 }
224 
225 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
226         throw( RuntimeException )
227 {
228     SearchResult sres;
229 
230     OUString in_str(searchStr);
231     sal_Int32 newStartPos = startPos;
232     sal_Int32 newEndPos = endPos;
233 
234     bUsePrimarySrchStr = true;
235 
236     if ( xTranslit.is() )
237     {
238         // apply normal transliteration (1<->1, 1<->0)
239         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
240         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
241 
242         // JP 20.6.2001: also the start and end positions must be corrected!
243         if( startPos )
244             newStartPos = FindPosInSeq_Impl( offset, startPos );
245 
246         if( endPos < searchStr.getLength() )
247 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
248         else
249             newEndPos = in_str.getLength();
250 
251         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
252 
253         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
254         {
255             if (sres.startOffset[k])
256 	      sres.startOffset[k] = offset[sres.startOffset[k]];
257             // JP 20.6.2001: end is ever exclusive and then don't return
258             //               the position of the next character - return the
259             //               next position behind the last found character!
260             //               "a b c" find "b" must return 2,3 and not 2,4!!!
261             if (sres.endOffset[k])
262 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
263         }
264     }
265     else
266     {
267         sres = (this->*fnForward)( in_str, startPos, endPos );
268     }
269 
270     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
271     {
272         SearchResult sres2;
273 
274 	in_str = OUString(searchStr);
275         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
276 
277         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
278 
279         if( startPos )
280             startPos = FindPosInSeq_Impl( offset, startPos );
281 
282         if( endPos < searchStr.getLength() )
283             endPos = FindPosInSeq_Impl( offset, endPos );
284         else
285             endPos = in_str.getLength();
286 
287 	bUsePrimarySrchStr = false;
288         sres2 = (this->*fnForward)( in_str, startPos, endPos );
289 
290         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
291         {
292             if (sres2.startOffset[k])
293 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
294             if (sres2.endOffset[k])
295 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
296         }
297 
298 	// pick first and long one
299 	if ( sres.subRegExpressions == 0)
300 	    return sres2;
301 	if ( sres2.subRegExpressions == 1)
302 	{
303 	    if ( sres.startOffset[0] > sres2.startOffset[0])
304 	        return sres2;
305 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
306 	        sres.endOffset[0] < sres2.endOffset[0])
307 	        return sres2;
308 	}
309     }
310 
311     return sres;
312 }
313 
314 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
315         throw(RuntimeException)
316 {
317     SearchResult sres;
318 
319     OUString in_str(searchStr);
320     sal_Int32 newStartPos = startPos;
321     sal_Int32 newEndPos = endPos;
322 
323     bUsePrimarySrchStr = true;
324 
325     if ( xTranslit.is() )
326     {
327         // apply only simple 1<->1 transliteration here
328         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
329 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
330 
331         // JP 20.6.2001: also the start and end positions must be corrected!
332         if( startPos < searchStr.getLength() )
333             newStartPos = FindPosInSeq_Impl( offset, startPos );
334 	else
335 	    newStartPos = in_str.getLength();
336 
337         if( endPos )
338 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
339 
340         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
341 
342         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
343         {
344             if (sres.startOffset[k])
345 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
346             // JP 20.6.2001: end is ever exclusive and then don't return
347             //               the position of the next character - return the
348             //               next position behind the last found character!
349             //               "a b c" find "b" must return 2,3 and not 2,4!!!
350             if (sres.endOffset[k])
351 	      sres.endOffset[k] = offset[sres.endOffset[k]];
352         }
353     }
354     else
355     {
356         sres = (this->*fnBackward)( in_str, startPos, endPos );
357     }
358 
359     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
360     {
361 	SearchResult sres2;
362 
363 	in_str = OUString(searchStr);
364         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
365 
366         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
367 
368         if( startPos < searchStr.getLength() )
369             startPos = FindPosInSeq_Impl( offset, startPos );
370         else
371             startPos = in_str.getLength();
372 
373         if( endPos )
374             endPos = FindPosInSeq_Impl( offset, endPos );
375 
376 	bUsePrimarySrchStr = false;
377 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
378 
379         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
380         {
381             if (sres2.startOffset[k])
382                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
383             if (sres2.endOffset[k])
384                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
385         }
386 
387 	// pick last and long one
388 	if ( sres.subRegExpressions == 0 )
389 	    return sres2;
390 	if ( sres2.subRegExpressions == 1 )
391 	{
392 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
393 	        return sres2;
394 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
395 		sres.endOffset[0] > sres2.endOffset[0] )
396 	        return sres2;
397 	}
398     }
399 
400     return sres;
401 }
402 
403 
404 
405 //--------------- die Wort-Trennner ----------------------------------
406 
407 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
408 {
409     bool bRet = 1;
410     if( '\x7f' != rStr[nPos])
411     {
412         if ( !xCharClass.is() )
413         {
414             Reference < XInterface > xI = xMSF->createInstance(
415                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
416             if( xI.is() )
417                 xI->queryInterface( ::getCppuType(
418                             (const Reference< XCharacterClassification >*)0))
419                     >>= xCharClass;
420         }
421         if ( xCharClass.is() )
422         {
423             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
424                     aSrchPara.Locale );
425             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
426                             KCharacterType::LETTER ) & nCType ) )
427                 bRet = 0;
428         }
429     }
430     return bRet;
431 }
432 
433 
434 
435 // --------- methods for the kind of boyer-morre search ------------------
436 
437 
438 void TextSearch::MakeForwardTab()
439 {
440     // create the jumptable for the search text
441     if( pJumpTable )
442     {
443         if( bIsForwardTab )
444             return ;                                        // the jumpTable is ok
445         delete pJumpTable;
446     }
447     bIsForwardTab = true;
448 
449     sal_Int32 n, nLen = sSrchStr.getLength();
450     pJumpTable = new TextSearchJumpTable;
451 
452     for( n = 0; n < nLen - 1; ++n )
453     {
454         sal_Unicode cCh = sSrchStr[n];
455         sal_Int32 nDiff = nLen - n - 1;
456 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
457 
458         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
459             pJumpTable->insert( aEntry );
460         if ( !aPair.second )
461             (*(aPair.first)).second = nDiff;
462     }
463 }
464 
465 void TextSearch::MakeForwardTab2()
466 {
467     // create the jumptable for the search text
468     if( pJumpTable2 )
469     {
470         if( bIsForwardTab )
471             return ;                                        // the jumpTable is ok
472         delete pJumpTable2;
473     }
474     bIsForwardTab = true;
475 
476     sal_Int32 n, nLen = sSrchStr2.getLength();
477     pJumpTable2 = new TextSearchJumpTable;
478 
479     for( n = 0; n < nLen - 1; ++n )
480     {
481         sal_Unicode cCh = sSrchStr2[n];
482         sal_Int32 nDiff = nLen - n - 1;
483 
484 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
485         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
486             pJumpTable2->insert( aEntry );
487         if ( !aPair.second )
488             (*(aPair.first)).second = nDiff;
489     }
490 }
491 
492 void TextSearch::MakeBackwardTab()
493 {
494     // create the jumptable for the search text
495     if( pJumpTable )
496     {
497         if( !bIsForwardTab )
498             return ;                                        // the jumpTable is ok
499         delete pJumpTable;
500     }
501     bIsForwardTab = false;
502 
503     sal_Int32 n, nLen = sSrchStr.getLength();
504     pJumpTable = new TextSearchJumpTable;
505 
506     for( n = nLen-1; n > 0; --n )
507     {
508         sal_Unicode cCh = sSrchStr[n];
509         TextSearchJumpTable::value_type aEntry( cCh, n );
510         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
511             pJumpTable->insert( aEntry );
512         if ( !aPair.second )
513             (*(aPair.first)).second = n;
514     }
515 }
516 
517 void TextSearch::MakeBackwardTab2()
518 {
519     // create the jumptable for the search text
520     if( pJumpTable2 )
521     {
522         if( !bIsForwardTab )
523             return ;                                        // the jumpTable is ok
524         delete pJumpTable2;
525     }
526     bIsForwardTab = false;
527 
528     sal_Int32 n, nLen = sSrchStr2.getLength();
529     pJumpTable2 = new TextSearchJumpTable;
530 
531     for( n = nLen-1; n > 0; --n )
532     {
533         sal_Unicode cCh = sSrchStr2[n];
534         TextSearchJumpTable::value_type aEntry( cCh, n );
535         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
536             pJumpTable2->insert( aEntry );
537         if ( !aPair.second )
538             (*(aPair.first)).second = n;
539     }
540 }
541 
542 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
543 {
544     TextSearchJumpTable *pJump;
545     OUString sSearchKey;
546 
547     if ( bUsePrimarySrchStr ) {
548       pJump = pJumpTable;
549       sSearchKey = sSrchStr;
550     } else {
551       pJump = pJumpTable2;
552       sSearchKey = sSrchStr2;
553     }
554 
555     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
556     if ( iLook == pJump->end() )
557         return sSearchKey.getLength();
558     return (*iLook).second;
559 }
560 
561 
562 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
563 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
564         throw(RuntimeException)
565 {
566     SearchResult aRet;
567     aRet.subRegExpressions = 0;
568 
569     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
570 
571     OUString aStr( searchStr );
572     sal_Int32 nSuchIdx = aStr.getLength();
573     sal_Int32 nEnde = endPos;
574     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
575         return aRet;
576 
577 
578     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
579         return aRet;
580 
581     nEnde -= sSearchKey.getLength();
582 
583     if (bUsePrimarySrchStr)
584       MakeForwardTab();                   // create the jumptable
585     else
586       MakeForwardTab2();
587 
588     for (sal_Int32 nCmpIdx = startPos; // start position for the search
589             nCmpIdx <= nEnde;
590             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
591     {
592         // if the match would be the completed cells, skip it.
593         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
594                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
595             continue;
596 
597         nSuchIdx = sSearchKey.getLength() - 1;
598         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
599         {
600             if( nSuchIdx == 0 )
601             {
602                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
603                 {
604                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
605                     bool bAtStart = !nCmpIdx;
606                     bool bAtEnd = nFndEnd == endPos;
607                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
608                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
609                     //  *       1 -> only one word in the paragraph
610                     //  *       2 -> at begin of paragraph
611                     //  *       3 -> at end of paragraph
612                     //  *       4 -> inside the paragraph
613                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
614                                 ( bAtStart && bDelimBehind ) ||     // 2
615                                 ( bAtEnd && bDelimBefore ) ||       // 3
616                                 ( bDelimBefore && bDelimBehind )))  // 4
617                         break;
618                 }
619 
620                 aRet.subRegExpressions = 1;
621                 aRet.startOffset.realloc( 1 );
622                 aRet.startOffset[ 0 ] = nCmpIdx;
623                 aRet.endOffset.realloc( 1 );
624                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
625 
626                 return aRet;
627             }
628             else
629                 nSuchIdx--;
630         }
631     }
632     return aRet;
633 }
634 
635 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
636         throw(RuntimeException)
637 {
638     SearchResult aRet;
639     aRet.subRegExpressions = 0;
640 
641     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
642 
643     OUString aStr( searchStr );
644     sal_Int32 nSuchIdx = aStr.getLength();
645     sal_Int32 nEnde = endPos;
646     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
647         return aRet;
648 
649     if (bUsePrimarySrchStr)
650       MakeBackwardTab();                      // create the jumptable
651     else
652       MakeBackwardTab2();
653 
654     if( nEnde == nSuchIdx )                 // end position for the search
655         nEnde = sSearchKey.getLength();
656     else
657         nEnde += sSearchKey.getLength();
658 
659     sal_Int32 nCmpIdx = startPos;          // start position for the search
660 
661     while (nCmpIdx >= nEnde)
662     {
663         // if the match would be the completed cells, skip it.
664         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
665                         sSearchKey.getLength() )) && (!checkCTLEnd ||
666                     isCellStart( aStr, nCmpIdx)))
667         {
668             nSuchIdx = 0;
669             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
670                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
671                 nSuchIdx++;
672             if( nSuchIdx >= sSearchKey.getLength() )
673             {
674                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
675                 {
676                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
677                     bool bAtStart = !nFndStt;
678                     bool bAtEnd = nCmpIdx == startPos;
679                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
680                     bool bDelimBefore = bAtStart || // begin of paragraph
681                         IsDelimiter( aStr, nFndStt-1 );
682                     //  *       1 -> only one word in the paragraph
683                     //  *       2 -> at begin of paragraph
684                     //  *       3 -> at end of paragraph
685                     //  *       4 -> inside the paragraph
686                     if( ( bAtStart && bAtEnd ) ||           // 1
687                             ( bAtStart && bDelimBehind ) ||     // 2
688                             ( bAtEnd && bDelimBefore ) ||       // 3
689                             ( bDelimBefore && bDelimBehind ))   // 4
690                     {
691                         aRet.subRegExpressions = 1;
692                         aRet.startOffset.realloc( 1 );
693                         aRet.startOffset[ 0 ] = nCmpIdx;
694                         aRet.endOffset.realloc( 1 );
695                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
696                         return aRet;
697                     }
698                 }
699                 else
700                 {
701                     aRet.subRegExpressions = 1;
702                     aRet.startOffset.realloc( 1 );
703                     aRet.startOffset[ 0 ] = nCmpIdx;
704                     aRet.endOffset.realloc( 1 );
705                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
706                     return aRet;
707                 }
708             }
709         }
710         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
711         if( nCmpIdx < nSuchIdx )
712             return aRet;
713         nCmpIdx -= nSuchIdx;
714     }
715     return aRet;
716 }
717 
718 
719 
720 //---------------------------------------------------------------------------
721 // ------- Methoden fuer die Suche ueber Regular-Expressions --------------
722 
723 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
724                                       sal_Int32 startPos, sal_Int32 endPos )
725             throw(RuntimeException)
726 {
727     SearchResult aRet;
728     aRet.subRegExpressions = 0;
729     OUString aStr( searchStr );
730 
731     bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
732                     SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
733 
734     pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
735 
736     struct re_registers regs;
737 
738     // Clear structure
739     memset((void *)&regs, 0, sizeof(struct re_registers));
740     if ( ! pRegExp->re_search(&regs, startPos) )
741     {
742         if( regs.num_of_match > 0 &&
743                 (regs.start[0] != -1 && regs.end[0] != -1) )
744         {
745             aRet.startOffset.realloc(regs.num_of_match);
746             aRet.endOffset.realloc(regs.num_of_match);
747 
748             sal_Int32 i = 0, j = 0;
749             while( j < regs.num_of_match )
750             {
751                 if( regs.start[j] != -1 && regs.end[j] != -1 )
752                 {
753                     aRet.startOffset[i] = regs.start[j];
754                     aRet.endOffset[i] = regs.end[j];
755                     ++i;
756                 }
757                 ++j;
758             }
759             aRet.subRegExpressions = i;
760         }
761         if ( regs.num_regs > 0 )
762         {
763             if ( regs.start )
764                 free(regs.start);
765             if ( regs.end )
766                 free(regs.end);
767         }
768     }
769 
770     return aRet;
771 }
772 
773 /*
774  * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
775  */
776 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
777                                       sal_Int32 startPos, sal_Int32 endPos )
778             throw(RuntimeException)
779 {
780     SearchResult aRet;
781     aRet.subRegExpressions = 0;
782     OUString aStr( searchStr );
783 
784     sal_Int32 nOffset = 0;
785     sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
786 
787     bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
788                     SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
789 
790     if( startPos )
791         nOffset = startPos - 1;
792 
793     // search only in the subString
794     if( bSearchInSel && nStrEnde )
795     {
796         aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
797         if( nOffset > nStrEnde )
798             nOffset = nOffset - nStrEnde;
799         else
800             nOffset = 0;
801     }
802 
803     // set the length to negative for reverse search
804     pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
805     struct re_registers regs;
806 
807     // Clear structure
808     memset((void *)&regs, 0, sizeof(struct re_registers));
809     if ( ! pRegExp->re_search(&regs, nOffset) )
810     {
811         if( regs.num_of_match > 0 &&
812                 (regs.start[0] != -1 && regs.end[0] != -1) )
813         {
814             nOffset = bSearchInSel ? nStrEnde : 0;
815             aRet.startOffset.realloc(regs.num_of_match);
816             aRet.endOffset.realloc(regs.num_of_match);
817 
818             sal_Int32 i = 0, j = 0;
819             while( j < regs.num_of_match )
820             {
821                 if( regs.start[j] != -1 && regs.end[j] != -1 )
822                 {
823                     aRet.startOffset[i] = regs.end[j] + nOffset;
824                     aRet.endOffset[i] = regs.start[j] + nOffset;
825                     ++i;
826                 }
827                 ++j;
828             }
829             aRet.subRegExpressions = i;
830         }
831         if ( regs.num_regs > 0 )
832         {
833             if ( regs.start )
834                 free(regs.start);
835             if ( regs.end )
836                 free(regs.end);
837         }
838     }
839 
840     return aRet;
841 }
842 
843 // Phonetische Suche von Worten
844 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
845                                           sal_Int32 startPos, sal_Int32 endPos )
846             throw(RuntimeException)
847 {
848     SearchResult aRet;
849     aRet.subRegExpressions = 0;
850 
851     if( !xBreak.is() )
852         return aRet;
853 
854     OUString aWTemp( searchStr );
855 
856     register sal_Int32 nStt, nEnd;
857 
858     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
859             aSrchPara.Locale,
860             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
861 
862     do
863     {
864         if( aWBnd.startPos >= endPos )
865             break;
866         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
867         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
868 
869         if( nStt < nEnd &&
870                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
871         {
872             aRet.subRegExpressions = 1;
873             aRet.startOffset.realloc( 1 );
874             aRet.startOffset[ 0 ] = nStt;
875             aRet.endOffset.realloc( 1 );
876             aRet.endOffset[ 0 ] = nEnd;
877             break;
878         }
879 
880         nStt = nEnd - 1;
881         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
882                 WordType::ANYWORD_IGNOREWHITESPACES);
883     } while( aWBnd.startPos != aWBnd.endPos ||
884             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
885     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
886     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
887     // and nextWord() does also => don't loop forever.
888     return aRet;
889 }
890 
891 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
892                                           sal_Int32 startPos, sal_Int32 endPos )
893             throw(RuntimeException)
894 {
895     SearchResult aRet;
896     aRet.subRegExpressions = 0;
897 
898     if( !xBreak.is() )
899         return aRet;
900 
901     OUString aWTemp( searchStr );
902 
903     register sal_Int32 nStt, nEnd;
904 
905     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
906             aSrchPara.Locale,
907             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
908 
909     do
910     {
911         if( aWBnd.endPos <= endPos )
912             break;
913         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
914         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
915 
916         if( nStt < nEnd &&
917                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
918         {
919             aRet.subRegExpressions = 1;
920             aRet.startOffset.realloc( 1 );
921             aRet.startOffset[ 0 ] = nEnd;
922             aRet.endOffset.realloc( 1 );
923             aRet.endOffset[ 0 ] = nStt;
924             break;
925         }
926         if( !nStt )
927             break;
928 
929         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
930                 WordType::ANYWORD_IGNOREWHITESPACES);
931     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
932     return aRet;
933 }
934 
935 
936 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
937 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
938 
939 static OUString getServiceName_Static()
940 {
941     return OUString::createFromAscii( cSearchName );
942 }
943 
944 static OUString getImplementationName_Static()
945 {
946     return OUString::createFromAscii( cSearchImpl );
947 }
948 
949 OUString SAL_CALL
950 TextSearch::getImplementationName()
951                 throw( RuntimeException )
952 {
953     return getImplementationName_Static();
954 }
955 
956 sal_Bool SAL_CALL
957 TextSearch::supportsService(const OUString& rServiceName)
958                 throw( RuntimeException )
959 {
960     return !rServiceName.compareToAscii( cSearchName );
961 }
962 
963 Sequence< OUString > SAL_CALL
964 TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
965 {
966     Sequence< OUString > aRet(1);
967     aRet[0] = getServiceName_Static();
968     return aRet;
969 }
970 
971 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
972 SAL_CALL TextSearch_CreateInstance(
973         const ::com::sun::star::uno::Reference<
974         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
975 {
976     return ::com::sun::star::uno::Reference<
977         ::com::sun::star::uno::XInterface >(
978                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
979 }
980 
981 extern "C"
982 {
983 
984 void SAL_CALL component_getImplementationEnvironment(
985         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
986 {
987     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
988 }
989 
990 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
991         void* _pServiceManager, void* /*_pRegistryKey*/ )
992 {
993     void* pRet = NULL;
994 
995     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
996         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
997             ( _pServiceManager );
998     ::com::sun::star::uno::Reference<
999             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1000 
1001     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1002     {
1003         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
1004         aServiceNames[0] = getServiceName_Static();
1005         xFactory = ::cppu::createSingleFactory(
1006                 pServiceManager, getImplementationName_Static(),
1007                 &TextSearch_CreateInstance, aServiceNames );
1008     }
1009 
1010     if ( xFactory.is() )
1011     {
1012         xFactory->acquire();
1013         pRet = xFactory.get();
1014     }
1015 
1016     return pRet;
1017 }
1018 
1019 } // extern "C"
1020