1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include "textsearch.hxx"
28cdf0e10cSrcweir #include "levdis.hxx"
29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp>
30cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
31cdf0e10cSrcweir #include <comphelper/processfactory.hxx>
32cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
33cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp>
34cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
35cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp>
36cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
37cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
38cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp>
39cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
40cdf0e10cSrcweir #include <cppuhelper/weak.hxx>
41cdf0e10cSrcweir 
42cdf0e10cSrcweir #ifdef _MSC_VER
43cdf0e10cSrcweir // get rid of that dumb compiler warning
44cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information
45cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created
46cdf0e10cSrcweir #pragma warning( disable: 4786 )
47cdf0e10cSrcweir #endif
48cdf0e10cSrcweir 
49cdf0e10cSrcweir #include <string.h>
50cdf0e10cSrcweir 
51cdf0e10cSrcweir using namespace ::com::sun::star::util;
52cdf0e10cSrcweir using namespace ::com::sun::star::uno;
53cdf0e10cSrcweir using namespace ::com::sun::star::lang;
54cdf0e10cSrcweir using namespace ::com::sun::star::i18n;
55cdf0e10cSrcweir using namespace ::rtl;
56cdf0e10cSrcweir 
57cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP =
58cdf0e10cSrcweir     TransliterationModules_ignoreBaFa_ja_JP |
59cdf0e10cSrcweir     TransliterationModules_ignoreIterationMark_ja_JP |
60cdf0e10cSrcweir     TransliterationModules_ignoreTiJi_ja_JP |
61cdf0e10cSrcweir     TransliterationModules_ignoreHyuByu_ja_JP |
62cdf0e10cSrcweir     TransliterationModules_ignoreSeZe_ja_JP |
63cdf0e10cSrcweir     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
64cdf0e10cSrcweir     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
65cdf0e10cSrcweir     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
66cc450e3aSHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH;
67cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
68cdf0e10cSrcweir     // Above 2 transliteration is simple but need to take effect in
69cdf0e10cSrcweir     // complex transliteration
70cdf0e10cSrcweir 
71cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
72cdf0e10cSrcweir         : xMSF( rxMSF )
73cdf0e10cSrcweir         , pJumpTable( 0 )
74cdf0e10cSrcweir         , pJumpTable2( 0 )
75cc450e3aSHerbert Dürr         , pRegexMatcher( NULL )
76cdf0e10cSrcweir         , pWLD( 0 )
77cdf0e10cSrcweir {
78cdf0e10cSrcweir     SearchOptions aOpt;
79cdf0e10cSrcweir     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
80cdf0e10cSrcweir     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
81cdf0e10cSrcweir     //aOpt.Locale = ???;
82cdf0e10cSrcweir     setOptions( aOpt );
83cdf0e10cSrcweir }
84cdf0e10cSrcweir 
85cdf0e10cSrcweir TextSearch::~TextSearch()
86cdf0e10cSrcweir {
87cc450e3aSHerbert Dürr     delete pRegexMatcher;
88cdf0e10cSrcweir     delete pWLD;
89cdf0e10cSrcweir     delete pJumpTable;
90cdf0e10cSrcweir     delete pJumpTable2;
91cdf0e10cSrcweir }
92cdf0e10cSrcweir 
93cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
94cdf0e10cSrcweir {
95cdf0e10cSrcweir     aSrchPara = rOptions;
96cdf0e10cSrcweir 
97cc450e3aSHerbert Dürr     delete pRegexMatcher, pRegexMatcher = NULL;
98cdf0e10cSrcweir     delete pWLD, pWLD = 0;
99cdf0e10cSrcweir     delete pJumpTable, pJumpTable = 0;
100cdf0e10cSrcweir     delete pJumpTable2, pJumpTable2 = 0;
101cdf0e10cSrcweir 
102cdf0e10cSrcweir     // Create Transliteration class
103cdf0e10cSrcweir     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
104cdf0e10cSrcweir     {
105cdf0e10cSrcweir         if( !xTranslit.is() )
106cdf0e10cSrcweir         {
107cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
108cdf0e10cSrcweir                     OUString::createFromAscii(
109cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
110cdf0e10cSrcweir             if ( xI.is() )
111cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
112cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
113cdf0e10cSrcweir                     >>= xTranslit;
114cdf0e10cSrcweir         }
115cdf0e10cSrcweir         // Load transliteration module
116cdf0e10cSrcweir         if( xTranslit.is() )
117cdf0e10cSrcweir             xTranslit->loadModule(
118cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
119cdf0e10cSrcweir                     aSrchPara.Locale);
120cdf0e10cSrcweir     }
121cdf0e10cSrcweir     else if( xTranslit.is() )
122cdf0e10cSrcweir         xTranslit = 0;
123cdf0e10cSrcweir 
124cdf0e10cSrcweir     // Create Transliteration for 2<->1, 2<->2 transliteration
125cdf0e10cSrcweir     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
126cdf0e10cSrcweir     {
127cdf0e10cSrcweir         if( !xTranslit2.is() )
128cdf0e10cSrcweir         {
129cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
130cdf0e10cSrcweir                     OUString::createFromAscii(
131cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
132cdf0e10cSrcweir             if ( xI.is() )
133cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
134cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
135cdf0e10cSrcweir                     >>= xTranslit2;
136cdf0e10cSrcweir         }
137cdf0e10cSrcweir         // Load transliteration module
138cdf0e10cSrcweir         if( xTranslit2.is() )
139cdf0e10cSrcweir             xTranslit2->loadModule(
140cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
141cdf0e10cSrcweir                     aSrchPara.Locale);
142cdf0e10cSrcweir     }
143cdf0e10cSrcweir 
144cdf0e10cSrcweir     if ( !xBreak.is() )
145cdf0e10cSrcweir     {
146cdf0e10cSrcweir         Reference < XInterface > xI = xMSF->createInstance(
147cdf0e10cSrcweir                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
148cdf0e10cSrcweir         if( xI.is() )
149cdf0e10cSrcweir             xI->queryInterface( ::getCppuType(
150cdf0e10cSrcweir                         (const Reference< XBreakIterator >*)0))
151cdf0e10cSrcweir                 >>= xBreak;
152cdf0e10cSrcweir     }
153cdf0e10cSrcweir 
154cdf0e10cSrcweir     sSrchStr = aSrchPara.searchString;
155cdf0e10cSrcweir 
156cc450e3aSHerbert Dürr     // use transliteration here
157cc450e3aSHerbert Dürr     if ( xTranslit.is() &&
158cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
159cdf0e10cSrcweir         sSrchStr = xTranslit->transliterateString2String(
160cdf0e10cSrcweir                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
161cdf0e10cSrcweir 
162cc450e3aSHerbert Dürr     if ( xTranslit2.is() &&
163cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
164cdf0e10cSrcweir 	sSrchStr2 = xTranslit2->transliterateString2String(
165cdf0e10cSrcweir 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
166cdf0e10cSrcweir 
167cdf0e10cSrcweir     // When start or end of search string is a complex script type, we need to
168cdf0e10cSrcweir     // make sure the result boundary is not located in the middle of cell.
169cdf0e10cSrcweir     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
170cdf0e10cSrcweir                 ScriptType::COMPLEX));
171cdf0e10cSrcweir     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
172cdf0e10cSrcweir                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
173cdf0e10cSrcweir 
174cc450e3aSHerbert Dürr     switch( aSrchPara.algorithmType)
175cdf0e10cSrcweir     {
176cc450e3aSHerbert Dürr 		case SearchAlgorithms_REGEXP:
177cc450e3aSHerbert Dürr 			fnForward = &TextSearch::RESrchFrwrd;
178cc450e3aSHerbert Dürr 			fnBackward = &TextSearch::RESrchBkwrd;
179cc450e3aSHerbert Dürr 
180cc450e3aSHerbert Dürr 			{
181cc450e3aSHerbert Dürr 			sal_uInt32 nIcuSearchFlags = 0;
182cc450e3aSHerbert Dürr 			// map com::sun::star::util::SearchFlags to ICU uregex.h flags
183cc450e3aSHerbert Dürr 			// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
184cc450e3aSHerbert Dürr 			// REG_NEWLINE is neither defined properly nor used anywhere => not implemented
185cc450e3aSHerbert Dürr 			// REG_NOSUB is not used anywhere => not implemented
186cc450e3aSHerbert Dürr 			// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
187cc450e3aSHerbert Dürr 			// LEV_RELAXED is only used for SearchAlgorithm==Approximate
188cc450e3aSHerbert Dürr 			// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
189cc450e3aSHerbert Dürr 			if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
190cc450e3aSHerbert Dürr 				nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
191cc450e3aSHerbert Dürr 			UErrorCode nIcuErr = U_ZERO_ERROR;
192cc450e3aSHerbert Dürr 			// assumption: transliteration doesn't mangle regexp control chars
193cc450e3aSHerbert Dürr 			OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
194cc450e3aSHerbert Dürr 					: ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
195cc450e3aSHerbert Dürr 			const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength());
196cc450e3aSHerbert Dürr 			pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
197cc450e3aSHerbert Dürr 			if( nIcuErr)
198cc450e3aSHerbert Dürr 				{ delete pRegexMatcher; pRegexMatcher = NULL;}
199cc450e3aSHerbert Dürr 			} break;
200cc450e3aSHerbert Dürr 
201cc450e3aSHerbert Dürr 		case SearchAlgorithms_APPROXIMATE:
202cdf0e10cSrcweir             fnForward = &TextSearch::ApproxSrchFrwrd;
203cdf0e10cSrcweir             fnBackward = &TextSearch::ApproxSrchBkwrd;
204cdf0e10cSrcweir 
205cdf0e10cSrcweir             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
206cdf0e10cSrcweir                     aSrchPara.insertedChars, aSrchPara.deletedChars,
207cdf0e10cSrcweir                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
208cdf0e10cSrcweir 
209cdf0e10cSrcweir             nLimit = pWLD->GetLimit();
210cc450e3aSHerbert Dürr 			break;
211cc450e3aSHerbert Dürr 
212cc450e3aSHerbert Dürr 		default:
213cdf0e10cSrcweir             fnForward = &TextSearch::NSrchFrwrd;
214cdf0e10cSrcweir             fnBackward = &TextSearch::NSrchBkwrd;
215cc450e3aSHerbert Dürr 			break;
216cdf0e10cSrcweir     }
217cdf0e10cSrcweir }
218cdf0e10cSrcweir 
219cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
220cdf0e10cSrcweir {
221cdf0e10cSrcweir     sal_Int32 nRet = 0, nEnd = rOff.getLength();
222cdf0e10cSrcweir     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
223cdf0e10cSrcweir     return nRet;
224cdf0e10cSrcweir }
225cdf0e10cSrcweir 
226cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
227cdf0e10cSrcweir         throw( RuntimeException )
228cdf0e10cSrcweir {
229cdf0e10cSrcweir     sal_Int32 nDone;
230cdf0e10cSrcweir     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
231cdf0e10cSrcweir             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
232cdf0e10cSrcweir }
233cdf0e10cSrcweir 
234cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
235cdf0e10cSrcweir         throw( RuntimeException )
236cdf0e10cSrcweir {
237cdf0e10cSrcweir     SearchResult sres;
238cdf0e10cSrcweir 
239cdf0e10cSrcweir     OUString in_str(searchStr);
240cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
241cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
242cdf0e10cSrcweir 
243cdf0e10cSrcweir     bUsePrimarySrchStr = true;
244cdf0e10cSrcweir 
245cdf0e10cSrcweir     if ( xTranslit.is() )
246cdf0e10cSrcweir     {
247cdf0e10cSrcweir         // apply normal transliteration (1<->1, 1<->0)
248cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
249cdf0e10cSrcweir         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
250cdf0e10cSrcweir 
251cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
252cdf0e10cSrcweir         if( startPos )
253cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
254cdf0e10cSrcweir 
255cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
256cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
257cdf0e10cSrcweir         else
258cdf0e10cSrcweir             newEndPos = in_str.getLength();
259cdf0e10cSrcweir 
260cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
261cdf0e10cSrcweir 
262cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
263cdf0e10cSrcweir         {
264cdf0e10cSrcweir             if (sres.startOffset[k])
265cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k]];
266cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
267cdf0e10cSrcweir             //               the position of the next character - return the
268cdf0e10cSrcweir             //               next position behind the last found character!
269cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
270cdf0e10cSrcweir             if (sres.endOffset[k])
271cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
272cdf0e10cSrcweir         }
273cdf0e10cSrcweir     }
274cdf0e10cSrcweir     else
275cdf0e10cSrcweir     {
276cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, startPos, endPos );
277cdf0e10cSrcweir     }
278cdf0e10cSrcweir 
279cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
280cdf0e10cSrcweir     {
281cdf0e10cSrcweir         SearchResult sres2;
282cdf0e10cSrcweir 
283cdf0e10cSrcweir 	in_str = OUString(searchStr);
284cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
285cdf0e10cSrcweir 
286cdf0e10cSrcweir         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
287cdf0e10cSrcweir 
288cdf0e10cSrcweir         if( startPos )
289cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
290cdf0e10cSrcweir 
291cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
292cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
293cdf0e10cSrcweir         else
294cdf0e10cSrcweir             endPos = in_str.getLength();
295cdf0e10cSrcweir 
296cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
297cdf0e10cSrcweir         sres2 = (this->*fnForward)( in_str, startPos, endPos );
298cdf0e10cSrcweir 
299cdf0e10cSrcweir         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
300cdf0e10cSrcweir         {
301cdf0e10cSrcweir             if (sres2.startOffset[k])
302cdf0e10cSrcweir 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
303cdf0e10cSrcweir             if (sres2.endOffset[k])
304cdf0e10cSrcweir 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
305cdf0e10cSrcweir         }
306cdf0e10cSrcweir 
307cdf0e10cSrcweir 	// pick first and long one
308cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0)
309cdf0e10cSrcweir 	    return sres2;
310cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1)
311cdf0e10cSrcweir 	{
312cdf0e10cSrcweir 	    if ( sres.startOffset[0] > sres2.startOffset[0])
313cdf0e10cSrcweir 	        return sres2;
314cdf0e10cSrcweir 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
315cdf0e10cSrcweir 	        sres.endOffset[0] < sres2.endOffset[0])
316cdf0e10cSrcweir 	        return sres2;
317cdf0e10cSrcweir 	}
318cdf0e10cSrcweir     }
319cdf0e10cSrcweir 
320cdf0e10cSrcweir     return sres;
321cdf0e10cSrcweir }
322cdf0e10cSrcweir 
323cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
324cdf0e10cSrcweir         throw(RuntimeException)
325cdf0e10cSrcweir {
326cdf0e10cSrcweir     SearchResult sres;
327cdf0e10cSrcweir 
328cdf0e10cSrcweir     OUString in_str(searchStr);
329cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
330cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
331cdf0e10cSrcweir 
332cdf0e10cSrcweir     bUsePrimarySrchStr = true;
333cdf0e10cSrcweir 
334cdf0e10cSrcweir     if ( xTranslit.is() )
335cdf0e10cSrcweir     {
336cdf0e10cSrcweir         // apply only simple 1<->1 transliteration here
337cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
338cdf0e10cSrcweir 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
339cdf0e10cSrcweir 
340cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
341cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
342cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
343cdf0e10cSrcweir 	else
344cdf0e10cSrcweir 	    newStartPos = in_str.getLength();
345cdf0e10cSrcweir 
346cdf0e10cSrcweir         if( endPos )
347cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
348cdf0e10cSrcweir 
349cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
350cdf0e10cSrcweir 
351cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
352cdf0e10cSrcweir         {
353cdf0e10cSrcweir             if (sres.startOffset[k])
354cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
355cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
356cdf0e10cSrcweir             //               the position of the next character - return the
357cdf0e10cSrcweir             //               next position behind the last found character!
358cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
359cdf0e10cSrcweir             if (sres.endOffset[k])
360cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]];
361cdf0e10cSrcweir         }
362cdf0e10cSrcweir     }
363cdf0e10cSrcweir     else
364cdf0e10cSrcweir     {
365cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, startPos, endPos );
366cdf0e10cSrcweir     }
367cdf0e10cSrcweir 
368cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
369cdf0e10cSrcweir     {
370cdf0e10cSrcweir 	SearchResult sres2;
371cdf0e10cSrcweir 
372cdf0e10cSrcweir 	in_str = OUString(searchStr);
373cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
374cdf0e10cSrcweir 
375cdf0e10cSrcweir         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
376cdf0e10cSrcweir 
377cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
378cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
379cdf0e10cSrcweir         else
380cdf0e10cSrcweir             startPos = in_str.getLength();
381cdf0e10cSrcweir 
382cdf0e10cSrcweir         if( endPos )
383cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
384cdf0e10cSrcweir 
385cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
386cdf0e10cSrcweir 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
387cdf0e10cSrcweir 
388cdf0e10cSrcweir         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
389cdf0e10cSrcweir         {
390cdf0e10cSrcweir             if (sres2.startOffset[k])
391cdf0e10cSrcweir                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
392cdf0e10cSrcweir             if (sres2.endOffset[k])
393cdf0e10cSrcweir                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
394cdf0e10cSrcweir         }
395cdf0e10cSrcweir 
396cdf0e10cSrcweir 	// pick last and long one
397cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0 )
398cdf0e10cSrcweir 	    return sres2;
399cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1 )
400cdf0e10cSrcweir 	{
401cdf0e10cSrcweir 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
402cdf0e10cSrcweir 	        return sres2;
403cdf0e10cSrcweir 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
404cdf0e10cSrcweir 		sres.endOffset[0] > sres2.endOffset[0] )
405cdf0e10cSrcweir 	        return sres2;
406cdf0e10cSrcweir 	}
407cdf0e10cSrcweir     }
408cdf0e10cSrcweir 
409cdf0e10cSrcweir     return sres;
410cdf0e10cSrcweir }
411cdf0e10cSrcweir 
412cc450e3aSHerbert Dürr //---------------------------------------------------------------------
413cdf0e10cSrcweir 
414cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
415cdf0e10cSrcweir {
416cdf0e10cSrcweir     bool bRet = 1;
417cdf0e10cSrcweir     if( '\x7f' != rStr[nPos])
418cdf0e10cSrcweir     {
419cdf0e10cSrcweir         if ( !xCharClass.is() )
420cdf0e10cSrcweir         {
421cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
422cdf0e10cSrcweir                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
423cdf0e10cSrcweir             if( xI.is() )
424cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
425cdf0e10cSrcweir                             (const Reference< XCharacterClassification >*)0))
426cdf0e10cSrcweir                     >>= xCharClass;
427cdf0e10cSrcweir         }
428cdf0e10cSrcweir         if ( xCharClass.is() )
429cdf0e10cSrcweir         {
430cdf0e10cSrcweir             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
431cdf0e10cSrcweir                     aSrchPara.Locale );
432cdf0e10cSrcweir             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
433cdf0e10cSrcweir                             KCharacterType::LETTER ) & nCType ) )
434cdf0e10cSrcweir                 bRet = 0;
435cdf0e10cSrcweir         }
436cdf0e10cSrcweir     }
437cdf0e10cSrcweir     return bRet;
438cdf0e10cSrcweir }
439cdf0e10cSrcweir 
440cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ----------
441cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
442cdf0e10cSrcweir 
443cdf0e10cSrcweir void TextSearch::MakeForwardTab()
444cdf0e10cSrcweir {
445cdf0e10cSrcweir     // create the jumptable for the search text
446cdf0e10cSrcweir     if( pJumpTable )
447cdf0e10cSrcweir     {
448cdf0e10cSrcweir         if( bIsForwardTab )
449cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
450cdf0e10cSrcweir         delete pJumpTable;
451cdf0e10cSrcweir     }
452cdf0e10cSrcweir     bIsForwardTab = true;
453cdf0e10cSrcweir 
454cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
455cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
456cdf0e10cSrcweir 
457cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
458cdf0e10cSrcweir     {
459cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
460cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
461cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
462cdf0e10cSrcweir 
463cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
464cdf0e10cSrcweir             pJumpTable->insert( aEntry );
465cdf0e10cSrcweir         if ( !aPair.second )
466cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
467cdf0e10cSrcweir     }
468cdf0e10cSrcweir }
469cdf0e10cSrcweir 
470cdf0e10cSrcweir void TextSearch::MakeForwardTab2()
471cdf0e10cSrcweir {
472cdf0e10cSrcweir     // create the jumptable for the search text
473cdf0e10cSrcweir     if( pJumpTable2 )
474cdf0e10cSrcweir     {
475cdf0e10cSrcweir         if( bIsForwardTab )
476cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
477cdf0e10cSrcweir         delete pJumpTable2;
478cdf0e10cSrcweir     }
479cdf0e10cSrcweir     bIsForwardTab = true;
480cdf0e10cSrcweir 
481cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
482cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
483cdf0e10cSrcweir 
484cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
485cdf0e10cSrcweir     {
486cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
487cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
488cdf0e10cSrcweir 
489cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
490cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
491cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
492cdf0e10cSrcweir         if ( !aPair.second )
493cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
494cdf0e10cSrcweir     }
495cdf0e10cSrcweir }
496cdf0e10cSrcweir 
497cdf0e10cSrcweir void TextSearch::MakeBackwardTab()
498cdf0e10cSrcweir {
499cdf0e10cSrcweir     // create the jumptable for the search text
500cdf0e10cSrcweir     if( pJumpTable )
501cdf0e10cSrcweir     {
502cdf0e10cSrcweir         if( !bIsForwardTab )
503cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
504cdf0e10cSrcweir         delete pJumpTable;
505cdf0e10cSrcweir     }
506cdf0e10cSrcweir     bIsForwardTab = false;
507cdf0e10cSrcweir 
508cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
509cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
510cdf0e10cSrcweir 
511cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
512cdf0e10cSrcweir     {
513cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
514cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
515cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
516cdf0e10cSrcweir             pJumpTable->insert( aEntry );
517cdf0e10cSrcweir         if ( !aPair.second )
518cdf0e10cSrcweir             (*(aPair.first)).second = n;
519cdf0e10cSrcweir     }
520cdf0e10cSrcweir }
521cdf0e10cSrcweir 
522cdf0e10cSrcweir void TextSearch::MakeBackwardTab2()
523cdf0e10cSrcweir {
524cdf0e10cSrcweir     // create the jumptable for the search text
525cdf0e10cSrcweir     if( pJumpTable2 )
526cdf0e10cSrcweir     {
527cdf0e10cSrcweir         if( !bIsForwardTab )
528cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
529cdf0e10cSrcweir         delete pJumpTable2;
530cdf0e10cSrcweir     }
531cdf0e10cSrcweir     bIsForwardTab = false;
532cdf0e10cSrcweir 
533cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
534cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
535cdf0e10cSrcweir 
536cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
537cdf0e10cSrcweir     {
538cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
539cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
540cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
541cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
542cdf0e10cSrcweir         if ( !aPair.second )
543cdf0e10cSrcweir             (*(aPair.first)).second = n;
544cdf0e10cSrcweir     }
545cdf0e10cSrcweir }
546cdf0e10cSrcweir 
547cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
548cdf0e10cSrcweir {
549cdf0e10cSrcweir     TextSearchJumpTable *pJump;
550cdf0e10cSrcweir     OUString sSearchKey;
551cdf0e10cSrcweir 
552cdf0e10cSrcweir     if ( bUsePrimarySrchStr ) {
553cdf0e10cSrcweir       pJump = pJumpTable;
554cdf0e10cSrcweir       sSearchKey = sSrchStr;
555cdf0e10cSrcweir     } else {
556cdf0e10cSrcweir       pJump = pJumpTable2;
557cdf0e10cSrcweir       sSearchKey = sSrchStr2;
558cdf0e10cSrcweir     }
559cdf0e10cSrcweir 
560cdf0e10cSrcweir     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
561cdf0e10cSrcweir     if ( iLook == pJump->end() )
562cdf0e10cSrcweir         return sSearchKey.getLength();
563cdf0e10cSrcweir     return (*iLook).second;
564cdf0e10cSrcweir }
565cdf0e10cSrcweir 
566cdf0e10cSrcweir 
567cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
568cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
569cdf0e10cSrcweir         throw(RuntimeException)
570cdf0e10cSrcweir {
571cdf0e10cSrcweir     SearchResult aRet;
572cdf0e10cSrcweir     aRet.subRegExpressions = 0;
573cdf0e10cSrcweir 
574cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
575cdf0e10cSrcweir 
576cdf0e10cSrcweir     OUString aStr( searchStr );
577cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
578cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
579cdf0e10cSrcweir     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
580cdf0e10cSrcweir         return aRet;
581cdf0e10cSrcweir 
582cdf0e10cSrcweir 
583cdf0e10cSrcweir     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
584cdf0e10cSrcweir         return aRet;
585cdf0e10cSrcweir 
586cdf0e10cSrcweir     nEnde -= sSearchKey.getLength();
587cdf0e10cSrcweir 
588cdf0e10cSrcweir     if (bUsePrimarySrchStr)
589cdf0e10cSrcweir       MakeForwardTab();                   // create the jumptable
590cdf0e10cSrcweir     else
591cdf0e10cSrcweir       MakeForwardTab2();
592cdf0e10cSrcweir 
593cdf0e10cSrcweir     for (sal_Int32 nCmpIdx = startPos; // start position for the search
594cdf0e10cSrcweir             nCmpIdx <= nEnde;
595cdf0e10cSrcweir             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
596cdf0e10cSrcweir     {
597cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
598cdf0e10cSrcweir         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
599cdf0e10cSrcweir                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
600cdf0e10cSrcweir             continue;
601cdf0e10cSrcweir 
602cdf0e10cSrcweir         nSuchIdx = sSearchKey.getLength() - 1;
603cdf0e10cSrcweir         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
604cdf0e10cSrcweir         {
605cdf0e10cSrcweir             if( nSuchIdx == 0 )
606cdf0e10cSrcweir             {
607cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
608cdf0e10cSrcweir                 {
609cdf0e10cSrcweir                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
610cdf0e10cSrcweir                     bool bAtStart = !nCmpIdx;
611cdf0e10cSrcweir                     bool bAtEnd = nFndEnd == endPos;
612cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
613cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
614cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
615cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
616cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
617cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
618cdf0e10cSrcweir                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
619cdf0e10cSrcweir                                 ( bAtStart && bDelimBehind ) ||     // 2
620cdf0e10cSrcweir                                 ( bAtEnd && bDelimBefore ) ||       // 3
621cdf0e10cSrcweir                                 ( bDelimBefore && bDelimBehind )))  // 4
622cdf0e10cSrcweir                         break;
623cdf0e10cSrcweir                 }
624cdf0e10cSrcweir 
625cdf0e10cSrcweir                 aRet.subRegExpressions = 1;
626cdf0e10cSrcweir                 aRet.startOffset.realloc( 1 );
627cdf0e10cSrcweir                 aRet.startOffset[ 0 ] = nCmpIdx;
628cdf0e10cSrcweir                 aRet.endOffset.realloc( 1 );
629cdf0e10cSrcweir                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
630cdf0e10cSrcweir 
631cdf0e10cSrcweir                 return aRet;
632cdf0e10cSrcweir             }
633cdf0e10cSrcweir             else
634cdf0e10cSrcweir                 nSuchIdx--;
635cdf0e10cSrcweir         }
636cdf0e10cSrcweir     }
637cdf0e10cSrcweir     return aRet;
638cdf0e10cSrcweir }
639cdf0e10cSrcweir 
640cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
641cdf0e10cSrcweir         throw(RuntimeException)
642cdf0e10cSrcweir {
643cdf0e10cSrcweir     SearchResult aRet;
644cdf0e10cSrcweir     aRet.subRegExpressions = 0;
645cdf0e10cSrcweir 
646cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
647cdf0e10cSrcweir 
648cdf0e10cSrcweir     OUString aStr( searchStr );
649cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
650cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
651cdf0e10cSrcweir     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
652cdf0e10cSrcweir         return aRet;
653cdf0e10cSrcweir 
654cdf0e10cSrcweir     if (bUsePrimarySrchStr)
655cdf0e10cSrcweir       MakeBackwardTab();                      // create the jumptable
656cdf0e10cSrcweir     else
657cdf0e10cSrcweir       MakeBackwardTab2();
658cdf0e10cSrcweir 
659cdf0e10cSrcweir     if( nEnde == nSuchIdx )                 // end position for the search
660cdf0e10cSrcweir         nEnde = sSearchKey.getLength();
661cdf0e10cSrcweir     else
662cdf0e10cSrcweir         nEnde += sSearchKey.getLength();
663cdf0e10cSrcweir 
664cdf0e10cSrcweir     sal_Int32 nCmpIdx = startPos;          // start position for the search
665cdf0e10cSrcweir 
666cdf0e10cSrcweir     while (nCmpIdx >= nEnde)
667cdf0e10cSrcweir     {
668cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
669cdf0e10cSrcweir         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
670cdf0e10cSrcweir                         sSearchKey.getLength() )) && (!checkCTLEnd ||
671cdf0e10cSrcweir                     isCellStart( aStr, nCmpIdx)))
672cdf0e10cSrcweir         {
673cdf0e10cSrcweir             nSuchIdx = 0;
674cdf0e10cSrcweir             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
675cdf0e10cSrcweir                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
676cdf0e10cSrcweir                 nSuchIdx++;
677cdf0e10cSrcweir             if( nSuchIdx >= sSearchKey.getLength() )
678cdf0e10cSrcweir             {
679cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
680cdf0e10cSrcweir                 {
681cdf0e10cSrcweir                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
682cdf0e10cSrcweir                     bool bAtStart = !nFndStt;
683cdf0e10cSrcweir                     bool bAtEnd = nCmpIdx == startPos;
684cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
685cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || // begin of paragraph
686cdf0e10cSrcweir                         IsDelimiter( aStr, nFndStt-1 );
687cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
688cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
689cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
690cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
691cdf0e10cSrcweir                     if( ( bAtStart && bAtEnd ) ||           // 1
692cdf0e10cSrcweir                             ( bAtStart && bDelimBehind ) ||     // 2
693cdf0e10cSrcweir                             ( bAtEnd && bDelimBefore ) ||       // 3
694cdf0e10cSrcweir                             ( bDelimBefore && bDelimBehind ))   // 4
695cdf0e10cSrcweir                     {
696cdf0e10cSrcweir                         aRet.subRegExpressions = 1;
697cdf0e10cSrcweir                         aRet.startOffset.realloc( 1 );
698cdf0e10cSrcweir                         aRet.startOffset[ 0 ] = nCmpIdx;
699cdf0e10cSrcweir                         aRet.endOffset.realloc( 1 );
700cdf0e10cSrcweir                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
701cdf0e10cSrcweir                         return aRet;
702cdf0e10cSrcweir                     }
703cdf0e10cSrcweir                 }
704cdf0e10cSrcweir                 else
705cdf0e10cSrcweir                 {
706cdf0e10cSrcweir                     aRet.subRegExpressions = 1;
707cdf0e10cSrcweir                     aRet.startOffset.realloc( 1 );
708cdf0e10cSrcweir                     aRet.startOffset[ 0 ] = nCmpIdx;
709cdf0e10cSrcweir                     aRet.endOffset.realloc( 1 );
710cdf0e10cSrcweir                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
711cdf0e10cSrcweir                     return aRet;
712cdf0e10cSrcweir                 }
713cdf0e10cSrcweir             }
714cdf0e10cSrcweir         }
715cdf0e10cSrcweir         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
716cdf0e10cSrcweir         if( nCmpIdx < nSuchIdx )
717cdf0e10cSrcweir             return aRet;
718cdf0e10cSrcweir         nCmpIdx -= nSuchIdx;
719cdf0e10cSrcweir     }
720cdf0e10cSrcweir     return aRet;
721cdf0e10cSrcweir }
722cdf0e10cSrcweir 
723cdf0e10cSrcweir //---------------------------------------------------------------------------
724cdf0e10cSrcweir 
725cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
726cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
727cdf0e10cSrcweir             throw(RuntimeException)
728cdf0e10cSrcweir {
729cc450e3aSHerbert Dürr 	SearchResult aRet;
730cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
731cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
732cc450e3aSHerbert Dürr 		return aRet;
733cc450e3aSHerbert Dürr 
734cc450e3aSHerbert Dürr 	if( endPos > searchStr.getLength())
735cc450e3aSHerbert Dürr 		endPos = searchStr.getLength();
736cc450e3aSHerbert Dürr 
737cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
738cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
739cc450e3aSHerbert Dürr 	const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos);
740cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
741cc450e3aSHerbert Dürr 	if( !pRegexMatcher->find( startPos, nIcuErr))
742cc450e3aSHerbert Dürr 		return aRet;
743cc450e3aSHerbert Dürr 
744cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 1;
745cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
746cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
747cc450e3aSHerbert Dürr 	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
748cc450e3aSHerbert Dürr 	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
749cc450e3aSHerbert Dürr 
750cc450e3aSHerbert Dürr 	return aRet;
751cdf0e10cSrcweir }
752cdf0e10cSrcweir 
753cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
754cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
755cdf0e10cSrcweir             throw(RuntimeException)
756cdf0e10cSrcweir {
757cc450e3aSHerbert Dürr 	// NOTE: for backwards search callers provide startPos/endPos inverted!
758cc450e3aSHerbert Dürr 	SearchResult aRet;
759cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
760cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
761cc450e3aSHerbert Dürr 		return aRet;
762cc450e3aSHerbert Dürr 
763cc450e3aSHerbert Dürr 	if( startPos > searchStr.getLength())
764cc450e3aSHerbert Dürr 		startPos = searchStr.getLength();
765cc450e3aSHerbert Dürr 
766cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
767cc450e3aSHerbert Dürr 	// TODO: use ICU's backward searching once it becomes available
768cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
769cc450e3aSHerbert Dürr 	const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos);
770cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
771cc450e3aSHerbert Dürr 	if( !pRegexMatcher->find( endPos, nIcuErr))
772cc450e3aSHerbert Dürr 		return aRet;
773cc450e3aSHerbert Dürr 
774cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 1;
775cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
776cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
777cc450e3aSHerbert Dürr 
778cc450e3aSHerbert Dürr 	do {
779cc450e3aSHerbert Dürr 		// NOTE: backward search seems to be expected to have startOfs/endOfs inverted!
780cc450e3aSHerbert Dürr 		aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
781cc450e3aSHerbert Dürr 		aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
782cc450e3aSHerbert Dürr 	} while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr));
783cc450e3aSHerbert Dürr 
784cc450e3aSHerbert Dürr 	return aRet;
785cdf0e10cSrcweir }
786cdf0e10cSrcweir 
787cc450e3aSHerbert Dürr //---------------------------------------------------------------------------
788cc450e3aSHerbert Dürr 
789cc450e3aSHerbert Dürr // search for words phonetically
790cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
791cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
792cdf0e10cSrcweir             throw(RuntimeException)
793cdf0e10cSrcweir {
794cdf0e10cSrcweir     SearchResult aRet;
795cdf0e10cSrcweir     aRet.subRegExpressions = 0;
796cdf0e10cSrcweir 
797cdf0e10cSrcweir     if( !xBreak.is() )
798cdf0e10cSrcweir         return aRet;
799cdf0e10cSrcweir 
800cdf0e10cSrcweir     OUString aWTemp( searchStr );
801cdf0e10cSrcweir 
802cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
803cdf0e10cSrcweir 
804cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
805cdf0e10cSrcweir             aSrchPara.Locale,
806cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
807cdf0e10cSrcweir 
808cdf0e10cSrcweir     do
809cdf0e10cSrcweir     {
810cdf0e10cSrcweir         if( aWBnd.startPos >= endPos )
811cdf0e10cSrcweir             break;
812cdf0e10cSrcweir         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
813cdf0e10cSrcweir         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
814cdf0e10cSrcweir 
815cdf0e10cSrcweir         if( nStt < nEnd &&
816cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
817cdf0e10cSrcweir         {
818cdf0e10cSrcweir             aRet.subRegExpressions = 1;
819cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
820cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nStt;
821cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
822cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nEnd;
823cdf0e10cSrcweir             break;
824cdf0e10cSrcweir         }
825cdf0e10cSrcweir 
826cdf0e10cSrcweir         nStt = nEnd - 1;
827cdf0e10cSrcweir         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
828cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
829cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos ||
830cdf0e10cSrcweir             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
831cdf0e10cSrcweir     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
832cdf0e10cSrcweir     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
833cdf0e10cSrcweir     // and nextWord() does also => don't loop forever.
834cdf0e10cSrcweir     return aRet;
835cdf0e10cSrcweir }
836cdf0e10cSrcweir 
837cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
838cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
839cdf0e10cSrcweir             throw(RuntimeException)
840cdf0e10cSrcweir {
841cdf0e10cSrcweir     SearchResult aRet;
842cdf0e10cSrcweir     aRet.subRegExpressions = 0;
843cdf0e10cSrcweir 
844cdf0e10cSrcweir     if( !xBreak.is() )
845cdf0e10cSrcweir         return aRet;
846cdf0e10cSrcweir 
847cdf0e10cSrcweir     OUString aWTemp( searchStr );
848cdf0e10cSrcweir 
849cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
850cdf0e10cSrcweir 
851cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
852cdf0e10cSrcweir             aSrchPara.Locale,
853cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
854cdf0e10cSrcweir 
855cdf0e10cSrcweir     do
856cdf0e10cSrcweir     {
857cdf0e10cSrcweir         if( aWBnd.endPos <= endPos )
858cdf0e10cSrcweir             break;
859cdf0e10cSrcweir         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
860cdf0e10cSrcweir         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
861cdf0e10cSrcweir 
862cdf0e10cSrcweir         if( nStt < nEnd &&
863cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
864cdf0e10cSrcweir         {
865cdf0e10cSrcweir             aRet.subRegExpressions = 1;
866cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
867cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nEnd;
868cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
869cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nStt;
870cdf0e10cSrcweir             break;
871cdf0e10cSrcweir         }
872cdf0e10cSrcweir         if( !nStt )
873cdf0e10cSrcweir             break;
874cdf0e10cSrcweir 
875cdf0e10cSrcweir         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
876cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
877cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
878cdf0e10cSrcweir     return aRet;
879cdf0e10cSrcweir }
880cdf0e10cSrcweir 
881cdf0e10cSrcweir 
882cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
883cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
884cdf0e10cSrcweir 
885cdf0e10cSrcweir static OUString getServiceName_Static()
886cdf0e10cSrcweir {
887cdf0e10cSrcweir     return OUString::createFromAscii( cSearchName );
888cdf0e10cSrcweir }
889cdf0e10cSrcweir 
890cdf0e10cSrcweir static OUString getImplementationName_Static()
891cdf0e10cSrcweir {
892cdf0e10cSrcweir     return OUString::createFromAscii( cSearchImpl );
893cdf0e10cSrcweir }
894cdf0e10cSrcweir 
895cdf0e10cSrcweir OUString SAL_CALL
896cdf0e10cSrcweir TextSearch::getImplementationName()
897cdf0e10cSrcweir                 throw( RuntimeException )
898cdf0e10cSrcweir {
899cdf0e10cSrcweir     return getImplementationName_Static();
900cdf0e10cSrcweir }
901cdf0e10cSrcweir 
902cdf0e10cSrcweir sal_Bool SAL_CALL
903cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName)
904cdf0e10cSrcweir                 throw( RuntimeException )
905cdf0e10cSrcweir {
906cdf0e10cSrcweir     return !rServiceName.compareToAscii( cSearchName );
907cdf0e10cSrcweir }
908cdf0e10cSrcweir 
909cdf0e10cSrcweir Sequence< OUString > SAL_CALL
910cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
911cdf0e10cSrcweir {
912cdf0e10cSrcweir     Sequence< OUString > aRet(1);
913cdf0e10cSrcweir     aRet[0] = getServiceName_Static();
914cdf0e10cSrcweir     return aRet;
915cdf0e10cSrcweir }
916cdf0e10cSrcweir 
917cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
918cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance(
919cdf0e10cSrcweir         const ::com::sun::star::uno::Reference<
920cdf0e10cSrcweir         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
921cdf0e10cSrcweir {
922cdf0e10cSrcweir     return ::com::sun::star::uno::Reference<
923cdf0e10cSrcweir         ::com::sun::star::uno::XInterface >(
924cdf0e10cSrcweir                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
925cdf0e10cSrcweir }
926cdf0e10cSrcweir 
927cdf0e10cSrcweir extern "C"
928cdf0e10cSrcweir {
929cdf0e10cSrcweir 
930cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment(
931cdf0e10cSrcweir         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
932cdf0e10cSrcweir {
933cdf0e10cSrcweir     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
934cdf0e10cSrcweir }
935cdf0e10cSrcweir 
936cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
937cdf0e10cSrcweir         void* _pServiceManager, void* /*_pRegistryKey*/ )
938cdf0e10cSrcweir {
939cdf0e10cSrcweir     void* pRet = NULL;
940cdf0e10cSrcweir 
941cdf0e10cSrcweir     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
942cdf0e10cSrcweir         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
943cdf0e10cSrcweir             ( _pServiceManager );
944cdf0e10cSrcweir     ::com::sun::star::uno::Reference<
945cdf0e10cSrcweir             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
946cdf0e10cSrcweir 
947cdf0e10cSrcweir     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
948cdf0e10cSrcweir     {
949cdf0e10cSrcweir         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
950cdf0e10cSrcweir         aServiceNames[0] = getServiceName_Static();
951cdf0e10cSrcweir         xFactory = ::cppu::createSingleFactory(
952cdf0e10cSrcweir                 pServiceManager, getImplementationName_Static(),
953cdf0e10cSrcweir                 &TextSearch_CreateInstance, aServiceNames );
954cdf0e10cSrcweir     }
955cdf0e10cSrcweir 
956cdf0e10cSrcweir     if ( xFactory.is() )
957cdf0e10cSrcweir     {
958cdf0e10cSrcweir         xFactory->acquire();
959cdf0e10cSrcweir         pRet = xFactory.get();
960cdf0e10cSrcweir     }
961cdf0e10cSrcweir 
962cdf0e10cSrcweir     return pRet;
963cdf0e10cSrcweir }
964cdf0e10cSrcweir 
965cdf0e10cSrcweir } // extern "C"
966