1*cdf0e10cSrcweir/*************************************************************************
2*cdf0e10cSrcweir *
3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir *
5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir *
7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir *
9*cdf0e10cSrcweir * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir *
11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir *
15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir *
21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir *
26*cdf0e10cSrcweir ************************************************************************/
27*cdf0e10cSrcweir
28*cdf0e10cSrcweir#ifndef __com_sun_star_lang_XTextSearch_idl__
29*cdf0e10cSrcweir#define __com_sun_star_lang_XTextSearch_idl__
30*cdf0e10cSrcweir
31*cdf0e10cSrcweir
32*cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
33*cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
34*cdf0e10cSrcweir//#include <com/sun/star/lang/CascadeTransliterator.idl>
35*cdf0e10cSrcweir
36*cdf0e10cSrcweir//=============================================================================
37*cdf0e10cSrcweir
38*cdf0e10cSrcweirmodule com { module sun { module star { module util {
39*cdf0e10cSrcweir
40*cdf0e10cSrcweir//=============================================================================
41*cdf0e10cSrcweir
42*cdf0e10cSrcweir
43*cdf0e10cSrcweirpublished enum SearchAlgorithms
44*cdf0e10cSrcweir{
45*cdf0e10cSrcweir    /// Literal
46*cdf0e10cSrcweir    ABSOLUTE,   // implemented as a kind of Boyer-Moore
47*cdf0e10cSrcweir    /// Regular expression
48*cdf0e10cSrcweir    REGEXP,
49*cdf0e10cSrcweir    /// Weighted Levenshtein Distance
50*cdf0e10cSrcweir    APPROXIMATE
51*cdf0e10cSrcweir};
52*cdf0e10cSrcweir
53*cdf0e10cSrcweir/// Flags for search methods
54*cdf0e10cSrcweirpublished constants SearchFlags
55*cdf0e10cSrcweir{
56*cdf0e10cSrcweir    /**
57*cdf0e10cSrcweir        @deprecated The constant ALL_IGNORE_CASE is never supported - use
58*cdf0e10cSrcweir                    <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const>
59*cdf0e10cSrcweir                    with
60*cdf0e10cSrcweir                    <member>SearchOptions::transliterateFlags</member>
61*cdf0e10cSrcweir                    instead.
62*cdf0e10cSrcweir
63*cdf0e10cSrcweir        @see <type scope="com::sun::star::i18n">TransliterationModules</type>
64*cdf0e10cSrcweir    */
65*cdf0e10cSrcweir    const long  ALL_IGNORE_CASE     = 0x00000001;
66*cdf0e10cSrcweir
67*cdf0e10cSrcweir    /** Flag for normal (Boyer-Moore) search / Search for word only. */
68*cdf0e10cSrcweir    const long  NORM_WORD_ONLY      = 0x00000010;
69*cdf0e10cSrcweir
70*cdf0e10cSrcweir    /** Flag for "regular expression" search / Interpret as extended
71*cdf0e10cSrcweir        regular expression.
72*cdf0e10cSrcweir
73*cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
74*cdf0e10cSrcweir    */
75*cdf0e10cSrcweir    const long  REG_EXTENDED        = 0x00000100;
76*cdf0e10cSrcweir
77*cdf0e10cSrcweir    /** Flag for "regular expression" search / No register information
78*cdf0e10cSrcweir        or backreferences, i.e., avoid sub expressions. Return only
79*cdf0e10cSrcweir        true/false if matched or not.
80*cdf0e10cSrcweir
81*cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
82*cdf0e10cSrcweir    */
83*cdf0e10cSrcweir    const long  REG_NOSUB           = 0x00000200;
84*cdf0e10cSrcweir
85*cdf0e10cSrcweir    /** Flag for "regular expression" search / Special new line
86*cdf0e10cSrcweir        treatment.
87*cdf0e10cSrcweir
88*cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
89*cdf0e10cSrcweir
90*cdf0e10cSrcweir        <p> A NEWLINE character in string will not be matched by a
91*cdf0e10cSrcweir        period outside bracket expression or by any form of a non
92*cdf0e10cSrcweir        matching list. </p>
93*cdf0e10cSrcweir
94*cdf0e10cSrcweir        <p> A circumflex (^) in pattern when used to specify expression
95*cdf0e10cSrcweir        anchoring will match the zero length string immediately after a
96*cdf0e10cSrcweir        newline in string, regardless of the setting of
97*cdf0e10cSrcweir        REG_NOT_BEGINOFLINE. </p>
98*cdf0e10cSrcweir
99*cdf0e10cSrcweir        <p> A dollar-sign ($) in pattern when used to specify expression
100*cdf0e10cSrcweir        anchoring, will match zero-length string immediately before a
101*cdf0e10cSrcweir        new line in string, regardless of the setting of
102*cdf0e10cSrcweir        REG_NOT_ENDOFLINE. </p>
103*cdf0e10cSrcweir    */
104*cdf0e10cSrcweir    const long  REG_NEWLINE         = 0x00000400;
105*cdf0e10cSrcweir
106*cdf0e10cSrcweir    /** The first character in the string is not the beginning of the
107*cdf0e10cSrcweir        line therefore ^ will not match with first character of the
108*cdf0e10cSrcweir        string.
109*cdf0e10cSrcweir    */
110*cdf0e10cSrcweir    const long  REG_NOT_BEGINOFLINE = 0x00000800;
111*cdf0e10cSrcweir
112*cdf0e10cSrcweir    /** The last character in the string is not the end of the line
113*cdf0e10cSrcweir        therefore $ will not match with last character of the string.
114*cdf0e10cSrcweir    */
115*cdf0e10cSrcweir    const long  REG_NOT_ENDOFLINE   = 0x00001000;
116*cdf0e10cSrcweir
117*cdf0e10cSrcweir    /** Flag for "Weighted Levenshtein Distance" search / Relaxed
118*cdf0e10cSrcweir        checking of limit, split weigh pools.
119*cdf0e10cSrcweir
120*cdf0e10cSrcweir        <p> If not specified (<b>strict</b>), the search is sucessful if
121*cdf0e10cSrcweir        the WLD is within a calculated limit where each insertion,
122*cdf0e10cSrcweir        deletion and replacement adds a weight to a common pool of
123*cdf0e10cSrcweir        weights. This is the mathematically correct WLD. </p>
124*cdf0e10cSrcweir
125*cdf0e10cSrcweir        <p> From a user's point of view the strict WLD is an
126*cdf0e10cSrcweir        exclusive-OR of the arguments given, for example if allowed
127*cdf0e10cSrcweir        insertions=2 and allowed replacements=2, the search fails if 2
128*cdf0e10cSrcweir        characters had been inserted and an additional operation would
129*cdf0e10cSrcweir        be needed to match. Depending on the weights it may also fail if
130*cdf0e10cSrcweir        1 character was inserted and 1 character replaced and an
131*cdf0e10cSrcweir        additional operation would be needed to match. The strict
132*cdf0e10cSrcweir        algorithm may match less than expected from a first glance of
133*cdf0e10cSrcweir        the specified arguments, but does not return false positives. </p>
134*cdf0e10cSrcweir
135*cdf0e10cSrcweir        <p> If specified (<b>relaxed</b>), the search is also successful
136*cdf0e10cSrcweir        if the combined pool for insertions and deletions is below a
137*cdf0e10cSrcweir        doubled calculated limit and replacements are treated
138*cdf0e10cSrcweir        differently. Additionally, swapped characters are counted as one
139*cdf0e10cSrcweir        replacement. </p>
140*cdf0e10cSrcweir
141*cdf0e10cSrcweir        <p> From a user's point of view the relaxed WLD is an
142*cdf0e10cSrcweir        inclusive-OR of the arguments given, for example if allowed
143*cdf0e10cSrcweir        insertions=2 and allowed replacements=2, the search succeeds if
144*cdf0e10cSrcweir        2 characters had been inserted and an additional replacement is
145*cdf0e10cSrcweir        needed to match. The relaxed algorithm may return false
146*cdf0e10cSrcweir        positives, but meets user expectation better. </p>
147*cdf0e10cSrcweir    */
148*cdf0e10cSrcweir    const long  LEV_RELAXED     = 0x00010000;
149*cdf0e10cSrcweir};
150*cdf0e10cSrcweir
151*cdf0e10cSrcweir
152*cdf0e10cSrcweirpublished  struct SearchOptions  {
153*cdf0e10cSrcweir	//-------------------------------------------------------------------------
154*cdf0e10cSrcweir    /** search type */
155*cdf0e10cSrcweir	SearchAlgorithms	algorithmType;
156*cdf0e10cSrcweir
157*cdf0e10cSrcweir	/** some flags - can be mixed
158*cdf0e10cSrcweir
159*cdf0e10cSrcweir		@see <type>SearchFlags</type>
160*cdf0e10cSrcweir	*/
161*cdf0e10cSrcweir	long 			searchFlag;
162*cdf0e10cSrcweir
163*cdf0e10cSrcweir    /** The text or pattern to be searched. */
164*cdf0e10cSrcweir	string			searchString;
165*cdf0e10cSrcweir
166*cdf0e10cSrcweir    /** The replacement text
167*cdf0e10cSrcweir        (is for optional replacing - SearchOption is only the data container for it) */
168*cdf0e10cSrcweir	string			replaceString;
169*cdf0e10cSrcweir
170*cdf0e10cSrcweir    /** The locale for case insensitive search. */
171*cdf0e10cSrcweir	::com::sun::star::lang::Locale  Locale;
172*cdf0e10cSrcweir
173*cdf0e10cSrcweir    /** This many characters can be different (as a replacement) between
174*cdf0e10cSrcweir        the found word and the search pattern in a "Weighted Levenshtein
175*cdf0e10cSrcweir        Distance" search. */
176*cdf0e10cSrcweir	long			changedChars;
177*cdf0e10cSrcweir
178*cdf0e10cSrcweir    /** This many characters can be missing in the found word in a
179*cdf0e10cSrcweir        "Weighted Levenshtein Distance" search. */
180*cdf0e10cSrcweir	long			deletedChars;
181*cdf0e10cSrcweir
182*cdf0e10cSrcweir    /** This many characters can be additional in the found word in a
183*cdf0e10cSrcweir        "Weighted Levenshtein Distance" search. */
184*cdf0e10cSrcweir	long			insertedChars;
185*cdf0e10cSrcweir
186*cdf0e10cSrcweir    /** Flags for the transliteration. Same meaning as the enum of
187*cdf0e10cSrcweir        <type scope="com::sun::star::i18n">TransliterationModules</type>
188*cdf0e10cSrcweir	*/
189*cdf0e10cSrcweir	long			transliterateFlags;
190*cdf0e10cSrcweir};
191*cdf0e10cSrcweir
192*cdf0e10cSrcweir
193*cdf0e10cSrcweirpublished  struct SearchResult  {
194*cdf0e10cSrcweir	//-------------------------------------------------------------------------
195*cdf0e10cSrcweir	/** Number of subexpressions,
196*cdf0e10cSrcweir	if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match.
197*cdf0e10cSrcweir	The start and endOffset are always dependent on the search direction.
198*cdf0e10cSrcweir	For example:
199*cdf0e10cSrcweir	if you search "X" in the text "-X-" the offset are:
200*cdf0e10cSrcweir		for forward: 	start = 1, end = 2
201*cdf0e10cSrcweir        for backward:   start = 2, end = 1
202*cdf0e10cSrcweir    Forward, the startOffset is inclusive, the endOffset exclusive.
203*cdf0e10cSrcweir    Backward, the startOffset is exclusive, the endOffset inclusive.
204*cdf0e10cSrcweir
205*cdf0e10cSrcweir	For regular expressions it can be greater than 1.
206*cdf0e10cSrcweir	If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string
207*cdf0e10cSrcweir	if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression
208*cdf0e10cSrcweir	startoffset[i] and endoffset[i] points to the matching substring of i th matching substring.
209*cdf0e10cSrcweir	*/
210*cdf0e10cSrcweir	long subRegExpressions;
211*cdf0e10cSrcweir	sequence<long> startOffset;		// inclusive
212*cdf0e10cSrcweir	sequence<long> endOffset;  		// exclusive
213*cdf0e10cSrcweir};
214*cdf0e10cSrcweir
215*cdf0e10cSrcweir
216*cdf0e10cSrcweir
217*cdf0e10cSrcweir/** enables an object to search in its content.
218*cdf0e10cSrcweir */
219*cdf0e10cSrcweirpublished interface XTextSearch : com::sun::star::uno::XInterface
220*cdf0e10cSrcweir{
221*cdf0e10cSrcweir	//-------------------------------------------------------------------------
222*cdf0e10cSrcweir	/** set the options for the forward or backward search.
223*cdf0e10cSrcweir
224*cdf0e10cSrcweir	*/
225*cdf0e10cSrcweir	void setOptions ([in] SearchOptions options);
226*cdf0e10cSrcweir	//-------------------------------------------------------------------------
227*cdf0e10cSrcweir	/** search forward in the searchStr, starts at startPos and ends by endpos.
228*cdf0e10cSrcweir		The result is returned in the SearchResult.
229*cdf0e10cSrcweir
230*cdf0e10cSrcweir	*/
231*cdf0e10cSrcweir	SearchResult  searchForward  ([in] string searchStr, [in] long startPos, [in] long endPos );
232*cdf0e10cSrcweir	//-------------------------------------------------------------------------
233*cdf0e10cSrcweir	/** search backward in the searchStr, starts at startPos and ends by endpos.
234*cdf0e10cSrcweir		The endpos must be lower then the startpos, because the function searches backward!
235*cdf0e10cSrcweir		The result is returned in the SearchResult.
236*cdf0e10cSrcweir
237*cdf0e10cSrcweir	*/
238*cdf0e10cSrcweir	SearchResult  searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos );
239*cdf0e10cSrcweir};
240*cdf0e10cSrcweir
241*cdf0e10cSrcweir//=============================================================================
242*cdf0e10cSrcweir}; }; }; };
243*cdf0e10cSrcweir
244*cdf0e10cSrcweir#endif
245