1/**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements.  See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership.  The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License.  You may obtain a copy of the License at
10 *
11 *   http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied.  See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24#ifndef __com_sun_star_lang_XTextSearch_idl__
25#define __com_sun_star_lang_XTextSearch_idl__
26
27
28#include <com/sun/star/lang/Locale.idl>
29#include <com/sun/star/uno/XInterface.idl>
30//#include <com/sun/star/lang/CascadeTransliterator.idl>
31
32//=============================================================================
33
34module com { module sun { module star { module util {
35
36//=============================================================================
37
38
39published enum SearchAlgorithms
40{
41    /// Literal
42    ABSOLUTE,   // implemented as a kind of Boyer-Moore
43    /// Regular expression
44    REGEXP,
45    /// Weighted Levenshtein Distance
46    APPROXIMATE
47};
48
49/// Flags for search methods
50published constants SearchFlags
51{
52    /**
53        @deprecated The constant ALL_IGNORE_CASE is never supported - use
54                    <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const>
55                    with
56                    <member>SearchOptions::transliterateFlags</member>
57                    instead.
58
59        @see <type scope="com::sun::star::i18n">TransliterationModules</type>
60    */
61    const long  ALL_IGNORE_CASE     = 0x00000001;
62
63    /** Flag for normal (Boyer-Moore) search / Search for word only. */
64    const long  NORM_WORD_ONLY      = 0x00000010;
65
66    /** Flag for "regular expression" search / Interpret as extended
67        regular expression.
68
69        @deprecated The flag is currently not supported by OOo.
70    */
71    const long  REG_EXTENDED        = 0x00000100;
72
73    /** Flag for "regular expression" search / No register information
74        or backreferences, i.e., avoid sub expressions. Return only
75        true/false if matched or not.
76
77        @deprecated The flag is currently not supported by OOo.
78    */
79    const long  REG_NOSUB           = 0x00000200;
80
81    /** Flag for "regular expression" search / Special new line
82        treatment.
83
84        @deprecated The flag is currently not supported by OOo.
85
86        <p> A NEWLINE character in string will not be matched by a
87        period outside bracket expression or by any form of a non
88        matching list. </p>
89
90        <p> A circumflex (^) in pattern when used to specify expression
91        anchoring will match the zero length string immediately after a
92        newline in string, regardless of the setting of
93        REG_NOT_BEGINOFLINE. </p>
94
95        <p> A dollar-sign ($) in pattern when used to specify expression
96        anchoring, will match zero-length string immediately before a
97        new line in string, regardless of the setting of
98        REG_NOT_ENDOFLINE. </p>
99    */
100    const long  REG_NEWLINE         = 0x00000400;
101
102    /** The first character in the string is not the beginning of the
103        line therefore ^ will not match with first character of the
104        string.
105    */
106    const long  REG_NOT_BEGINOFLINE = 0x00000800;
107
108    /** The last character in the string is not the end of the line
109        therefore $ will not match with last character of the string.
110    */
111    const long  REG_NOT_ENDOFLINE   = 0x00001000;
112
113    /** Flag for "Weighted Levenshtein Distance" search / Relaxed
114        checking of limit, split weigh pools.
115
116        <p> If not specified (<b>strict</b>), the search is sucessful if
117        the WLD is within a calculated limit where each insertion,
118        deletion and replacement adds a weight to a common pool of
119        weights. This is the mathematically correct WLD. </p>
120
121        <p> From a user's point of view the strict WLD is an
122        exclusive-OR of the arguments given, for example if allowed
123        insertions=2 and allowed replacements=2, the search fails if 2
124        characters had been inserted and an additional operation would
125        be needed to match. Depending on the weights it may also fail if
126        1 character was inserted and 1 character replaced and an
127        additional operation would be needed to match. The strict
128        algorithm may match less than expected from a first glance of
129        the specified arguments, but does not return false positives. </p>
130
131        <p> If specified (<b>relaxed</b>), the search is also successful
132        if the combined pool for insertions and deletions is below a
133        doubled calculated limit and replacements are treated
134        differently. Additionally, swapped characters are counted as one
135        replacement. </p>
136
137        <p> From a user's point of view the relaxed WLD is an
138        inclusive-OR of the arguments given, for example if allowed
139        insertions=2 and allowed replacements=2, the search succeeds if
140        2 characters had been inserted and an additional replacement is
141        needed to match. The relaxed algorithm may return false
142        positives, but meets user expectation better. </p>
143    */
144    const long  LEV_RELAXED     = 0x00010000;
145};
146
147
148published  struct SearchOptions  {
149	//-------------------------------------------------------------------------
150    /** search type */
151	SearchAlgorithms	algorithmType;
152
153	/** some flags - can be mixed
154
155		@see <type>SearchFlags</type>
156	*/
157	long 			searchFlag;
158
159    /** The text or pattern to be searched. */
160	string			searchString;
161
162    /** The replacement text
163        (is for optional replacing - SearchOption is only the data container for it) */
164	string			replaceString;
165
166    /** The locale for case insensitive search. */
167	::com::sun::star::lang::Locale  Locale;
168
169    /** This many characters can be different (as a replacement) between
170        the found word and the search pattern in a "Weighted Levenshtein
171        Distance" search. */
172	long			changedChars;
173
174    /** This many characters can be missing in the found word in a
175        "Weighted Levenshtein Distance" search. */
176	long			deletedChars;
177
178    /** This many characters can be additional in the found word in a
179        "Weighted Levenshtein Distance" search. */
180	long			insertedChars;
181
182    /** Flags for the transliteration. Same meaning as the enum of
183        <type scope="com::sun::star::i18n">TransliterationModules</type>
184	*/
185	long			transliterateFlags;
186};
187
188
189published  struct SearchResult  {
190	//-------------------------------------------------------------------------
191	/** Number of subexpressions,
192	if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match.
193	The start and endOffset are always dependent on the search direction.
194	For example:
195	if you search "X" in the text "-X-" the offset are:
196		for forward: 	start = 1, end = 2
197        for backward:   start = 2, end = 1
198    Forward, the startOffset is inclusive, the endOffset exclusive.
199    Backward, the startOffset is exclusive, the endOffset inclusive.
200
201	For regular expressions it can be greater than 1.
202	If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string
203	if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression
204	startoffset[i] and endoffset[i] points to the matching substring of i th matching substring.
205	*/
206	long subRegExpressions;
207	sequence<long> startOffset;		// inclusive
208	sequence<long> endOffset;  		// exclusive
209};
210
211
212
213/** enables an object to search in its content.
214 */
215published interface XTextSearch : com::sun::star::uno::XInterface
216{
217	//-------------------------------------------------------------------------
218	/** set the options for the forward or backward search.
219
220	*/
221	void setOptions ([in] SearchOptions options);
222	//-------------------------------------------------------------------------
223	/** search forward in the searchStr, starts at startPos and ends by endpos.
224		The result is returned in the SearchResult.
225
226	*/
227	SearchResult  searchForward  ([in] string searchStr, [in] long startPos, [in] long endPos );
228	//-------------------------------------------------------------------------
229	/** search backward in the searchStr, starts at startPos and ends by endpos.
230		The endpos must be lower then the startpos, because the function searches backward!
231		The result is returned in the SearchResult.
232
233	*/
234	SearchResult  searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos );
235};
236
237//=============================================================================
238}; }; }; };
239
240#endif
241