xref: /trunk/main/unotools/source/i18n/textsearch.cxx (revision b5088357)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_unotools.hxx"
26 #include <i18npool/mslangid.hxx>
27 #include <tools/debug.hxx>
28 #ifndef _INTN_HXX //autogen
29 //#include <tools/intn.hxx>
30 #endif
31 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
32 #ifndef _COM_SUN_STAR_UTIL_SEARCHFLAGS_HDL_
33 #include <com/sun/star/util/SearchFlags.hdl>
34 #endif
35 #include <com/sun/star/i18n/TransliterationModules.hpp>
36 #include <unotools/charclass.hxx>
37 #include <comphelper/processfactory.hxx>
38 #include <unotools/textsearch.hxx>
39 #include <rtl/instance.hxx>
40 
41 using namespace ::com::sun::star::util;
42 using namespace ::com::sun::star::uno;
43 using namespace ::com::sun::star::lang;
44 
45 // ............................................................................
46 namespace utl
47 {
48 // ............................................................................
49 
SearchParam(const String & rText,SearchType eType,sal_Bool bCaseSensitive,sal_Bool bWrdOnly,sal_Bool bSearchInSel)50 SearchParam::SearchParam( const String &rText,
51 								SearchType eType,
52 								sal_Bool bCaseSensitive,
53 								sal_Bool bWrdOnly,
54 								sal_Bool bSearchInSel )
55 {
56 	sSrchStr        = rText;
57 	eSrchType       = eType;
58 
59 	bWordOnly       = bWrdOnly;
60 	bSrchInSel      = bSearchInSel;
61 	bCaseSense      = bCaseSensitive;
62 
63 	nTransliterationFlags = 0;
64 
65 	// Werte fuer "Gewichtete Levenshtein-Distanz"
66 	bLEV_Relaxed    = sal_True;
67 	nLEV_OtherX     = 2;
68 	nLEV_ShorterY   = 1;
69 	nLEV_LongerZ    = 3;
70 }
71 
SearchParam(const SearchParam & rParam)72 SearchParam::SearchParam( const SearchParam& rParam )
73 {
74 	sSrchStr        = rParam.sSrchStr;
75 	sReplaceStr     = rParam.sReplaceStr;
76 	eSrchType       = rParam.eSrchType;
77 
78 	bWordOnly       = rParam.bWordOnly;
79 	bSrchInSel      = rParam.bSrchInSel;
80 	bCaseSense      = rParam.bCaseSense;
81 
82 	bLEV_Relaxed    = rParam.bLEV_Relaxed;
83 	nLEV_OtherX     = rParam.nLEV_OtherX;
84 	nLEV_ShorterY   = rParam.nLEV_ShorterY;
85 	nLEV_LongerZ    = rParam.nLEV_LongerZ;
86 
87 	nTransliterationFlags = rParam.nTransliterationFlags;
88 }
89 
lcl_Equals(const SearchOptions & rSO1,const SearchOptions & rSO2)90 static bool lcl_Equals( const SearchOptions& rSO1, const SearchOptions& rSO2 )
91 {
92     return rSO1.algorithmType == rSO2.algorithmType &&
93         rSO1.searchFlag == rSO2.searchFlag &&
94         rSO1.searchString.equals(rSO2.searchString) &&
95         rSO1.replaceString.equals(rSO2.replaceString) &&
96         rSO1.changedChars == rSO2.changedChars &&
97         rSO1.deletedChars == rSO2.deletedChars &&
98         rSO1.insertedChars == rSO2.insertedChars &&
99         rSO1.Locale.Language == rSO2.Locale.Language &&
100         rSO1.Locale.Country == rSO2.Locale.Country &&
101         rSO1.Locale.Variant == rSO2.Locale.Variant &&
102         rSO1.transliterateFlags == rSO2.transliterateFlags;
103 }
104 
105 namespace
106 {
107     struct CachedTextSearch
108     {
109         ::osl::Mutex mutex;
110         ::com::sun::star::util::SearchOptions Options;
111         ::com::sun::star::uno::Reference< ::com::sun::star::util::XTextSearch > xTextSearch;
112     };
113 
114     struct theCachedTextSearch
115         : public rtl::Static< CachedTextSearch, theCachedTextSearch > {};
116 }
117 
getXTextSearch(const SearchOptions & rPara)118 Reference<XTextSearch> TextSearch::getXTextSearch( const SearchOptions& rPara )
119 {
120     CachedTextSearch &rCache = theCachedTextSearch::get();
121 
122     osl::MutexGuard aGuard(rCache.mutex);
123 
124     if ( lcl_Equals(rCache.Options, rPara) )
125         return rCache.xTextSearch;
126 
127     try
128     {
129         Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
130         rCache.xTextSearch.set( xMSF->createInstance(
131             ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(
132                         "com.sun.star.util.TextSearch" ) ) ), UNO_QUERY_THROW );
133         rCache.xTextSearch->setOptions( rPara );
134         rCache.Options = rPara;
135     }
136     catch ( Exception& )
137     {
138         DBG_ERRORFILE( "TextSearch ctor: Exception caught!" );
139     }
140     return rCache.xTextSearch;
141 }
142 
TextSearch(const SearchParam & rParam,LanguageType eLang)143 TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
144 {
145 	if( LANGUAGE_NONE == eLang )
146 		eLang = LANGUAGE_SYSTEM;
147     ::com::sun::star::lang::Locale aLocale(
148             MsLangId::convertLanguageToLocale( LanguageType(eLang)));
149 
150 	Init( rParam, aLocale);
151 }
152 
TextSearch(const SearchParam & rParam,const CharClass & rCClass)153 TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
154 {
155 	Init( rParam, rCClass.getLocale() );
156 }
157 
TextSearch(const SearchOptions & rPara)158 TextSearch::TextSearch( const SearchOptions& rPara )
159 {
160     xTextSearch = getXTextSearch( rPara );
161 }
162 
Init(const SearchParam & rParam,const::com::sun::star::lang::Locale & rLocale)163 void TextSearch::Init( const SearchParam & rParam,
164 						const ::com::sun::star::lang::Locale& rLocale )
165 {
166 	// convert SearchParam to the UNO SearchOptions
167 	SearchOptions aSOpt;
168 
169 	switch( rParam.GetSrchType() )
170 	{
171 	case SearchParam::SRCH_REGEXP:
172 		aSOpt.algorithmType = SearchAlgorithms_REGEXP;
173 		if( rParam.IsSrchInSelection() )
174 			aSOpt.searchFlag |= SearchFlags::REG_NOT_BEGINOFLINE |
175 								SearchFlags::REG_NOT_ENDOFLINE;
176 		break;
177 
178 	case SearchParam::SRCH_LEVDIST:
179 		aSOpt.algorithmType = SearchAlgorithms_APPROXIMATE;
180 		aSOpt.changedChars = rParam.GetLEVOther();
181 		aSOpt.deletedChars = rParam.GetLEVLonger();
182 		aSOpt.insertedChars = rParam.GetLEVShorter();
183 		if( rParam.IsSrchRelaxed() )
184 			aSOpt.searchFlag |= SearchFlags::LEV_RELAXED;
185 		break;
186 
187 //	case SearchParam::SRCH_NORMAL:
188 	default:
189 		aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
190 		if( rParam.IsSrchWordOnly() )
191 			aSOpt.searchFlag |= SearchFlags::NORM_WORD_ONLY;
192 		break;
193 	}
194 	aSOpt.searchString = rParam.GetSrchStr();
195 	aSOpt.replaceString = rParam.GetReplaceStr();
196 	aSOpt.Locale = rLocale;
197 	aSOpt.transliterateFlags = rParam.GetTransliterationFlags();
198 	if( !rParam.IsCaseSensitive() )
199     {
200 		aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
201         aSOpt.transliterateFlags |= ::com::sun::star::i18n::TransliterationModules_IGNORE_CASE;
202     }
203 
204     xTextSearch = getXTextSearch( aSOpt );
205 }
206 
SetLocale(const::com::sun::star::util::SearchOptions & rOptions,const::com::sun::star::lang::Locale & rLocale)207 void TextSearch::SetLocale( const ::com::sun::star::util::SearchOptions& rOptions,
208                             const ::com::sun::star::lang::Locale& rLocale )
209 {
210 	// convert SearchParam to the UNO SearchOptions
211     SearchOptions aSOpt( rOptions );
212     aSOpt.Locale = rLocale;
213 
214     xTextSearch = getXTextSearch( aSOpt );
215 }
216 
217 
~TextSearch()218 TextSearch::~TextSearch()
219 {
220 }
221 
222 /*
223  * Die allgemeinen Methoden zu Suchen. Diese rufen dann die entpsrecheden
224  * Methoden fuer die normale Suche oder der Suche nach Regular-Expressions
225  * ueber die MethodenPointer auf.
226  */
227 #if defined _MSC_VER
228 #pragma optimize("", off)
229 #pragma warning(push)
230 #pragma warning(disable: 4748)
231 #endif
SearchFrwrd(const String & rStr,xub_StrLen * pStart,xub_StrLen * pEnde,SearchResult * pRes)232 int TextSearch::SearchFrwrd( const String & rStr, xub_StrLen* pStart,
233 							xub_StrLen* pEnde, SearchResult* pRes )
234 {
235 	int nRet = 0;
236 	try
237 	{
238 		if( xTextSearch.is() )
239 		{
240 			SearchResult aRet( xTextSearch->searchForward(
241 													rStr, *pStart, *pEnde ));
242 			if( aRet.subRegExpressions > 0 )
243 			{
244 				nRet = 1;
245 				// the XTextsearch returns in startOffset the higher position
246 				// and the endposition is allways exclusive.
247 				// The caller of this function will have in startPos the
248 				// lower pos. and end
249 				*pStart = (xub_StrLen)aRet.startOffset[ 0 ];
250 				*pEnde = (xub_StrLen)aRet.endOffset[ 0 ];
251 				if( pRes )
252 					*pRes = aRet;
253 			}
254 		}
255 	}
256 	catch ( Exception& )
257 	{
258 		DBG_ERRORFILE( "SearchForward: Exception caught!" );
259 	}
260 	return nRet;
261 }
262 
SearchBkwrd(const String & rStr,xub_StrLen * pStart,xub_StrLen * pEnde,SearchResult * pRes)263 int TextSearch::SearchBkwrd( const String & rStr, xub_StrLen* pStart,
264 							xub_StrLen* pEnde, SearchResult* pRes )
265 {
266 	int nRet = 0;
267 	try
268 	{
269 		if( xTextSearch.is() )
270 		{
271 			SearchResult aRet( xTextSearch->searchBackward(
272 													rStr, *pStart, *pEnde ));
273 			if( aRet.subRegExpressions )
274 			{
275 				nRet = 1;
276 				// the XTextsearch returns in startOffset the higher position
277 				// and the endposition is allways exclusive.
278 				// The caller of this function will have in startPos the
279 				// lower pos. and end
280 				*pEnde = (xub_StrLen)aRet.startOffset[ 0 ];
281 				*pStart = (xub_StrLen)aRet.endOffset[ 0 ];
282 				if( pRes )
283 					*pRes = aRet;
284 			}
285 		}
286 	}
287 	catch ( Exception& )
288 	{
289 		DBG_ERRORFILE( "SearchBackward: Exception caught!" );
290 	}
291 	return nRet;
292 }
293 
ReplaceBackReferences(String & rReplaceStr,const String & rStr,const SearchResult & rResult)294 void TextSearch::ReplaceBackReferences( String& rReplaceStr, const String &rStr, const SearchResult& rResult )
295 {
296     if( rResult.subRegExpressions > 0 )
297     {
298         String sTab( '\t' );
299         sal_Unicode sSrchChrs[] = {'\\', '&', '$', 0};
300         String sTmp;
301         xub_StrLen nPos = 0;
302         sal_Unicode sFndChar;
303         while( STRING_NOTFOUND != ( nPos = rReplaceStr.SearchChar( sSrchChrs, nPos )) )
304         {
305             if( rReplaceStr.GetChar( nPos ) == '&')
306             {
307                 sal_uInt16 nStart = (sal_uInt16)(rResult.startOffset[0]);
308                 sal_uInt16 nLength = (sal_uInt16)(rResult.endOffset[0] - rResult.startOffset[0]);
309                 rReplaceStr.Erase( nPos, 1 );	// delete ampersand
310                 // replace by found string
311                 rReplaceStr.Insert( rStr, nStart, nLength, nPos );
312                 // jump over
313                 nPos = nPos + nLength;
314             }
315             else if( rReplaceStr.GetChar( nPos ) == '$')
316             {
317                 if( nPos + 1 < rReplaceStr.Len())
318                 {
319                     sFndChar = rReplaceStr.GetChar( nPos + 1 );
320                     switch(sFndChar)
321                     {   // placeholder for a backward reference?
322                         case '0':
323                         case '1':
324                         case '2':
325                         case '3':
326                         case '4':
327                         case '5':
328                         case '6':
329                         case '7':
330                         case '8':
331                         case '9':
332                         {
333                             rReplaceStr.Erase( nPos, 2 );	// delete both
334                             int i = sFndChar - '0';	// index
335                             if(i < rResult.subRegExpressions)
336                             {
337                                 sal_uInt16 nSttReg = (sal_uInt16)(rResult.startOffset[i]);
338                                 sal_uInt16 nRegLen = (sal_uInt16)(rResult.endOffset[i]);
339                                 if( nRegLen > nSttReg )
340                                     nRegLen = nRegLen - nSttReg;
341                                 else
342                                 {
343                                     nRegLen = nSttReg - nRegLen;
344                                     nSttReg = (sal_uInt16)(rResult.endOffset[i]);
345                                 }
346                                 // Copy reference from found string
347                                 sTmp = rStr.Copy((sal_uInt16)nSttReg, (sal_uInt16)nRegLen);
348                                 // insert
349                                 rReplaceStr.Insert( sTmp, nPos );
350                                 // and step over
351                                 nPos = nPos + sTmp.Len();
352                             }
353                         }
354                         break;
355                         default:
356                             nPos += 2; // leave both chars unchanged
357                             break;
358                     }
359                 }
360                 else
361                     ++nPos;
362             }
363             else
364             {
365                 // at least another character?
366                 if( nPos + 1 < rReplaceStr.Len())
367                 {
368                     sFndChar = rReplaceStr.GetChar( nPos + 1 );
369                     switch(sFndChar)
370                     {
371                         case '\\':
372                         case '&':
373                         case '$':
374                             rReplaceStr.Erase( nPos, 1 );
375                             nPos++;
376                         break;
377                         case 't':
378                             rReplaceStr.Erase( nPos, 2 ); // delete both
379                             rReplaceStr.Insert( sTab, nPos ); // insert tabulator
380                             nPos++;	// step over
381                         break;
382                         default:
383                             nPos += 2; // ignore both characters
384                         break;
385                     }
386                 }
387                 else
388                     ++nPos;
389             }
390         }
391     }
392 }
393 
394 
395 #if defined _MSC_VER
396 #pragma optimize("", on)
397 #pragma warning(pop)
398 #endif
399 
400 // ............................................................................
401 }	// namespace utl
402 // ............................................................................
403 
404