1*d1766043SAndrew Rist/**************************************************************
2cdf0e10cSrcweir *
3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*d1766043SAndrew Rist * or more contributor license agreements.  See the NOTICE file
5*d1766043SAndrew Rist * distributed with this work for additional information
6*d1766043SAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance
9*d1766043SAndrew Rist * with the License.  You may obtain a copy of the License at
10*d1766043SAndrew Rist *
11*d1766043SAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12*d1766043SAndrew Rist *
13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*d1766043SAndrew Rist * software distributed under the License is distributed on an
15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*d1766043SAndrew Rist * KIND, either express or implied.  See the License for the
17*d1766043SAndrew Rist * specific language governing permissions and limitations
18*d1766043SAndrew Rist * under the License.
19*d1766043SAndrew Rist *
20*d1766043SAndrew Rist *************************************************************/
21*d1766043SAndrew Rist
22*d1766043SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir#ifndef __com_sun_star_lang_XTextSearch_idl__
25cdf0e10cSrcweir#define __com_sun_star_lang_XTextSearch_idl__
26cdf0e10cSrcweir
27cdf0e10cSrcweir
28cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
29cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
30cdf0e10cSrcweir//#include <com/sun/star/lang/CascadeTransliterator.idl>
31cdf0e10cSrcweir
32cdf0e10cSrcweir//=============================================================================
33cdf0e10cSrcweir
34cdf0e10cSrcweirmodule com { module sun { module star { module util {
35cdf0e10cSrcweir
36cdf0e10cSrcweir//=============================================================================
37cdf0e10cSrcweir
38cdf0e10cSrcweir
39cdf0e10cSrcweirpublished enum SearchAlgorithms
40cdf0e10cSrcweir{
41cdf0e10cSrcweir    /// Literal
42cdf0e10cSrcweir    ABSOLUTE,   // implemented as a kind of Boyer-Moore
43cdf0e10cSrcweir    /// Regular expression
44cdf0e10cSrcweir    REGEXP,
45cdf0e10cSrcweir    /// Weighted Levenshtein Distance
46cdf0e10cSrcweir    APPROXIMATE
47cdf0e10cSrcweir};
48cdf0e10cSrcweir
49cdf0e10cSrcweir/// Flags for search methods
50cdf0e10cSrcweirpublished constants SearchFlags
51cdf0e10cSrcweir{
52cdf0e10cSrcweir    /**
53cdf0e10cSrcweir        @deprecated The constant ALL_IGNORE_CASE is never supported - use
54cdf0e10cSrcweir                    <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const>
55cdf0e10cSrcweir                    with
56cdf0e10cSrcweir                    <member>SearchOptions::transliterateFlags</member>
57cdf0e10cSrcweir                    instead.
58cdf0e10cSrcweir
59cdf0e10cSrcweir        @see <type scope="com::sun::star::i18n">TransliterationModules</type>
60cdf0e10cSrcweir    */
61cdf0e10cSrcweir    const long  ALL_IGNORE_CASE     = 0x00000001;
62cdf0e10cSrcweir
63cdf0e10cSrcweir    /** Flag for normal (Boyer-Moore) search / Search for word only. */
64cdf0e10cSrcweir    const long  NORM_WORD_ONLY      = 0x00000010;
65cdf0e10cSrcweir
66cdf0e10cSrcweir    /** Flag for "regular expression" search / Interpret as extended
67cdf0e10cSrcweir        regular expression.
68cdf0e10cSrcweir
69cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
70cdf0e10cSrcweir    */
71cdf0e10cSrcweir    const long  REG_EXTENDED        = 0x00000100;
72cdf0e10cSrcweir
73cdf0e10cSrcweir    /** Flag for "regular expression" search / No register information
74cdf0e10cSrcweir        or backreferences, i.e., avoid sub expressions. Return only
75cdf0e10cSrcweir        true/false if matched or not.
76cdf0e10cSrcweir
77cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
78cdf0e10cSrcweir    */
79cdf0e10cSrcweir    const long  REG_NOSUB           = 0x00000200;
80cdf0e10cSrcweir
81cdf0e10cSrcweir    /** Flag for "regular expression" search / Special new line
82cdf0e10cSrcweir        treatment.
83cdf0e10cSrcweir
84cdf0e10cSrcweir        @deprecated The flag is currently not supported by OOo.
85cdf0e10cSrcweir
86cdf0e10cSrcweir        <p> A NEWLINE character in string will not be matched by a
87cdf0e10cSrcweir        period outside bracket expression or by any form of a non
88cdf0e10cSrcweir        matching list. </p>
89cdf0e10cSrcweir
90cdf0e10cSrcweir        <p> A circumflex (^) in pattern when used to specify expression
91cdf0e10cSrcweir        anchoring will match the zero length string immediately after a
92cdf0e10cSrcweir        newline in string, regardless of the setting of
93cdf0e10cSrcweir        REG_NOT_BEGINOFLINE. </p>
94cdf0e10cSrcweir
95cdf0e10cSrcweir        <p> A dollar-sign ($) in pattern when used to specify expression
96cdf0e10cSrcweir        anchoring, will match zero-length string immediately before a
97cdf0e10cSrcweir        new line in string, regardless of the setting of
98cdf0e10cSrcweir        REG_NOT_ENDOFLINE. </p>
99cdf0e10cSrcweir    */
100cdf0e10cSrcweir    const long  REG_NEWLINE         = 0x00000400;
101cdf0e10cSrcweir
102cdf0e10cSrcweir    /** The first character in the string is not the beginning of the
103cdf0e10cSrcweir        line therefore ^ will not match with first character of the
104cdf0e10cSrcweir        string.
105cdf0e10cSrcweir    */
106cdf0e10cSrcweir    const long  REG_NOT_BEGINOFLINE = 0x00000800;
107cdf0e10cSrcweir
108cdf0e10cSrcweir    /** The last character in the string is not the end of the line
109cdf0e10cSrcweir        therefore $ will not match with last character of the string.
110cdf0e10cSrcweir    */
111cdf0e10cSrcweir    const long  REG_NOT_ENDOFLINE   = 0x00001000;
112cdf0e10cSrcweir
113cdf0e10cSrcweir    /** Flag for "Weighted Levenshtein Distance" search / Relaxed
114cdf0e10cSrcweir        checking of limit, split weigh pools.
115cdf0e10cSrcweir
116cdf0e10cSrcweir        <p> If not specified (<b>strict</b>), the search is sucessful if
117cdf0e10cSrcweir        the WLD is within a calculated limit where each insertion,
118cdf0e10cSrcweir        deletion and replacement adds a weight to a common pool of
119cdf0e10cSrcweir        weights. This is the mathematically correct WLD. </p>
120cdf0e10cSrcweir
121cdf0e10cSrcweir        <p> From a user's point of view the strict WLD is an
122cdf0e10cSrcweir        exclusive-OR of the arguments given, for example if allowed
123cdf0e10cSrcweir        insertions=2 and allowed replacements=2, the search fails if 2
124cdf0e10cSrcweir        characters had been inserted and an additional operation would
125cdf0e10cSrcweir        be needed to match. Depending on the weights it may also fail if
126cdf0e10cSrcweir        1 character was inserted and 1 character replaced and an
127cdf0e10cSrcweir        additional operation would be needed to match. The strict
128cdf0e10cSrcweir        algorithm may match less than expected from a first glance of
129cdf0e10cSrcweir        the specified arguments, but does not return false positives. </p>
130cdf0e10cSrcweir
131cdf0e10cSrcweir        <p> If specified (<b>relaxed</b>), the search is also successful
132cdf0e10cSrcweir        if the combined pool for insertions and deletions is below a
133cdf0e10cSrcweir        doubled calculated limit and replacements are treated
134cdf0e10cSrcweir        differently. Additionally, swapped characters are counted as one
135cdf0e10cSrcweir        replacement. </p>
136cdf0e10cSrcweir
137cdf0e10cSrcweir        <p> From a user's point of view the relaxed WLD is an
138cdf0e10cSrcweir        inclusive-OR of the arguments given, for example if allowed
139cdf0e10cSrcweir        insertions=2 and allowed replacements=2, the search succeeds if
140cdf0e10cSrcweir        2 characters had been inserted and an additional replacement is
141cdf0e10cSrcweir        needed to match. The relaxed algorithm may return false
142cdf0e10cSrcweir        positives, but meets user expectation better. </p>
143cdf0e10cSrcweir    */
144cdf0e10cSrcweir    const long  LEV_RELAXED     = 0x00010000;
145cdf0e10cSrcweir};
146cdf0e10cSrcweir
147cdf0e10cSrcweir
148cdf0e10cSrcweirpublished  struct SearchOptions  {
149cdf0e10cSrcweir	//-------------------------------------------------------------------------
150cdf0e10cSrcweir    /** search type */
151cdf0e10cSrcweir	SearchAlgorithms	algorithmType;
152cdf0e10cSrcweir
153cdf0e10cSrcweir	/** some flags - can be mixed
154cdf0e10cSrcweir
155cdf0e10cSrcweir		@see <type>SearchFlags</type>
156cdf0e10cSrcweir	*/
157cdf0e10cSrcweir	long 			searchFlag;
158cdf0e10cSrcweir
159cdf0e10cSrcweir    /** The text or pattern to be searched. */
160cdf0e10cSrcweir	string			searchString;
161cdf0e10cSrcweir
162cdf0e10cSrcweir    /** The replacement text
163cdf0e10cSrcweir        (is for optional replacing - SearchOption is only the data container for it) */
164cdf0e10cSrcweir	string			replaceString;
165cdf0e10cSrcweir
166cdf0e10cSrcweir    /** The locale for case insensitive search. */
167cdf0e10cSrcweir	::com::sun::star::lang::Locale  Locale;
168cdf0e10cSrcweir
169cdf0e10cSrcweir    /** This many characters can be different (as a replacement) between
170cdf0e10cSrcweir        the found word and the search pattern in a "Weighted Levenshtein
171cdf0e10cSrcweir        Distance" search. */
172cdf0e10cSrcweir	long			changedChars;
173cdf0e10cSrcweir
174cdf0e10cSrcweir    /** This many characters can be missing in the found word in a
175cdf0e10cSrcweir        "Weighted Levenshtein Distance" search. */
176cdf0e10cSrcweir	long			deletedChars;
177cdf0e10cSrcweir
178cdf0e10cSrcweir    /** This many characters can be additional in the found word in a
179cdf0e10cSrcweir        "Weighted Levenshtein Distance" search. */
180cdf0e10cSrcweir	long			insertedChars;
181cdf0e10cSrcweir
182cdf0e10cSrcweir    /** Flags for the transliteration. Same meaning as the enum of
183cdf0e10cSrcweir        <type scope="com::sun::star::i18n">TransliterationModules</type>
184cdf0e10cSrcweir	*/
185cdf0e10cSrcweir	long			transliterateFlags;
186cdf0e10cSrcweir};
187cdf0e10cSrcweir
188cdf0e10cSrcweir
189cdf0e10cSrcweirpublished  struct SearchResult  {
190cdf0e10cSrcweir	//-------------------------------------------------------------------------
191cdf0e10cSrcweir	/** Number of subexpressions,
192cdf0e10cSrcweir	if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match.
193cdf0e10cSrcweir	The start and endOffset are always dependent on the search direction.
194cdf0e10cSrcweir	For example:
195cdf0e10cSrcweir	if you search "X" in the text "-X-" the offset are:
196cdf0e10cSrcweir		for forward: 	start = 1, end = 2
197cdf0e10cSrcweir        for backward:   start = 2, end = 1
198cdf0e10cSrcweir    Forward, the startOffset is inclusive, the endOffset exclusive.
199cdf0e10cSrcweir    Backward, the startOffset is exclusive, the endOffset inclusive.
200cdf0e10cSrcweir
201cdf0e10cSrcweir	For regular expressions it can be greater than 1.
202cdf0e10cSrcweir	If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string
203cdf0e10cSrcweir	if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression
204cdf0e10cSrcweir	startoffset[i] and endoffset[i] points to the matching substring of i th matching substring.
205cdf0e10cSrcweir	*/
206cdf0e10cSrcweir	long subRegExpressions;
207cdf0e10cSrcweir	sequence<long> startOffset;		// inclusive
208cdf0e10cSrcweir	sequence<long> endOffset;  		// exclusive
209cdf0e10cSrcweir};
210cdf0e10cSrcweir
211cdf0e10cSrcweir
212cdf0e10cSrcweir
213cdf0e10cSrcweir/** enables an object to search in its content.
214cdf0e10cSrcweir */
215cdf0e10cSrcweirpublished interface XTextSearch : com::sun::star::uno::XInterface
216cdf0e10cSrcweir{
217cdf0e10cSrcweir	//-------------------------------------------------------------------------
218cdf0e10cSrcweir	/** set the options for the forward or backward search.
219cdf0e10cSrcweir
220cdf0e10cSrcweir	*/
221cdf0e10cSrcweir	void setOptions ([in] SearchOptions options);
222cdf0e10cSrcweir	//-------------------------------------------------------------------------
223cdf0e10cSrcweir	/** search forward in the searchStr, starts at startPos and ends by endpos.
224cdf0e10cSrcweir		The result is returned in the SearchResult.
225cdf0e10cSrcweir
226cdf0e10cSrcweir	*/
227cdf0e10cSrcweir	SearchResult  searchForward  ([in] string searchStr, [in] long startPos, [in] long endPos );
228cdf0e10cSrcweir	//-------------------------------------------------------------------------
229cdf0e10cSrcweir	/** search backward in the searchStr, starts at startPos and ends by endpos.
230cdf0e10cSrcweir		The endpos must be lower then the startpos, because the function searches backward!
231cdf0e10cSrcweir		The result is returned in the SearchResult.
232cdf0e10cSrcweir
233cdf0e10cSrcweir	*/
234cdf0e10cSrcweir	SearchResult  searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos );
235cdf0e10cSrcweir};
236cdf0e10cSrcweir
237cdf0e10cSrcweir//=============================================================================
238cdf0e10cSrcweir}; }; }; };
239cdf0e10cSrcweir
240cdf0e10cSrcweir#endif
241