1*b1cdbd2cSJim Jagielski/**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements.  See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership.  The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License.  You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski *   http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied.  See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_i18n_XTransliteration_idl__
24*b1cdbd2cSJim Jagielski#define __com_sun_star_i18n_XTransliteration_idl__
25*b1cdbd2cSJim Jagielski
26*b1cdbd2cSJim Jagielski#include <com/sun/star/lang/Locale.idl>
27*b1cdbd2cSJim Jagielski#include <com/sun/star/uno/XInterface.idl>
28*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/TransliterationModules.idl>
29*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/TransliterationModulesNew.idl>
30*b1cdbd2cSJim Jagielski
31*b1cdbd2cSJim Jagielski//=============================================================================
32*b1cdbd2cSJim Jagielski
33*b1cdbd2cSJim Jagielskimodule com { module sun { module star { module i18n {
34*b1cdbd2cSJim Jagielski
35*b1cdbd2cSJim Jagielski//=============================================================================
36*b1cdbd2cSJim Jagielski
37*b1cdbd2cSJim Jagielski/**
38*b1cdbd2cSJim Jagielski    Character conversions like case folding or Hiragana to Katakana.
39*b1cdbd2cSJim Jagielski
40*b1cdbd2cSJim Jagielski    <p> Transliteration is a character to character conversion but it is
41*b1cdbd2cSJim Jagielski    not always a one to one mapping between characters. Transliteration
42*b1cdbd2cSJim Jagielski    modules are primarily used by collation, and search and replace
43*b1cdbd2cSJim Jagielski    modules to perform approximate search. It can also be used to format
44*b1cdbd2cSJim Jagielski    the numbers in different numbering systems. <p/>
45*b1cdbd2cSJim Jagielski
46*b1cdbd2cSJim Jagielski    <p> In order to select transliteration modules for different
47*b1cdbd2cSJim Jagielski    purposes, they are classified with attributes of
48*b1cdbd2cSJim Jagielski    <type>TransliterationType</type>. <p/>
49*b1cdbd2cSJim Jagielski
50*b1cdbd2cSJim Jagielski    <p> For Western languages there would be three transliteration
51*b1cdbd2cSJim Jagielski    modules available to compare two mixed case strings: upper to lower,
52*b1cdbd2cSJim Jagielski    lower to upper, and ignore case. </p>
53*b1cdbd2cSJim Jagielski
54*b1cdbd2cSJim Jagielski    <p> A typical calling sequence of transliteration is
55*b1cdbd2cSJim Jagielski        <ol>
56*b1cdbd2cSJim Jagielski            <li> getAvailableModules() </li>
57*b1cdbd2cSJim Jagielski            <li> loadModulesByImplNames() </li>
58*b1cdbd2cSJim Jagielski            <li> equals() </li>
59*b1cdbd2cSJim Jagielski        </ol>
60*b1cdbd2cSJim Jagielski    or another one is
61*b1cdbd2cSJim Jagielski        <ol>
62*b1cdbd2cSJim Jagielski            <li> loadModule() </li>
63*b1cdbd2cSJim Jagielski            <li> transliterate() </li>
64*b1cdbd2cSJim Jagielski        </ol>
65*b1cdbd2cSJim Jagielski    </p>
66*b1cdbd2cSJim Jagielski
67*b1cdbd2cSJim Jagielski*/
68*b1cdbd2cSJim Jagielski
69*b1cdbd2cSJim Jagielski/* comment:
70*b1cdbd2cSJim Jagielski * 0.
71*b1cdbd2cSJim Jagielski * All the IGNORE-type functionalities (Range, equals) are based on mapping.
72*b1cdbd2cSJim Jagielski * except equals() method in IGNORE_CASE, which is based on Locale-independent
73*b1cdbd2cSJim Jagielski * casefolding
74*b1cdbd2cSJim Jagielski * ( This second assumption is very complicated and may cause confusion of use)
75*b1cdbd2cSJim Jagielski *
76*b1cdbd2cSJim Jagielski * 1.
77*b1cdbd2cSJim Jagielski * We are assuming Upper to Lower mapping as one of transliteration.
78*b1cdbd2cSJim Jagielski * The mapping depends on Locale.
79*b1cdbd2cSJim Jagielski * Upper <-> Lower methods are just wrappers to provide equals() and Range()
80*b1cdbd2cSJim Jagielski *
81*b1cdbd2cSJim Jagielski * 2.
82*b1cdbd2cSJim Jagielski * equals() in IGNORE_CASE module is locale-independent and
83*b1cdbd2cSJim Jagielski * we don't provide locale-sensitive ones.
84*b1cdbd2cSJim Jagielski * The reason we provided locale-independent ones is that IGNORE_CASE is mainly
85*b1cdbd2cSJim Jagielski * dedicated to StarOffice internal code.
86*b1cdbd2cSJim Jagielski *
87*b1cdbd2cSJim Jagielski * 3.
88*b1cdbd2cSJim Jagielski * TransliterationModules is used just for convenience without calling
89*b1cdbd2cSJim Jagielski * getAvailableModule.
90*b1cdbd2cSJim Jagielski *
91*b1cdbd2cSJim Jagielski * 4.
92*b1cdbd2cSJim Jagielski * Implementation name in the methods below is not the same as
93*b1cdbd2cSJim Jagielski * the true implemenation name registered.
94*b1cdbd2cSJim Jagielski * In particular, for generic modules:"UPPERCASE_LOWERCASE",
95*b1cdbd2cSJim Jagielski * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name.
96*b1cdbd2cSJim Jagielski */
97*b1cdbd2cSJim Jagielski
98*b1cdbd2cSJim Jagielski
99*b1cdbd2cSJim Jagielskipublished interface XTransliteration: com::sun::star::uno::XInterface
100*b1cdbd2cSJim Jagielski{
101*b1cdbd2cSJim Jagielski
102*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
103*b1cdbd2cSJim Jagielski    /** Unique ASCII name to identify a module. This name is used
104*b1cdbd2cSJim Jagielski        to get its localized name for menus, dialogs etc. The behavior
105*b1cdbd2cSJim Jagielski        is undefined for <const>TransliterationType::CASCADE</const>
106*b1cdbd2cSJim Jagielski        modules.
107*b1cdbd2cSJim Jagielski     */
108*b1cdbd2cSJim Jagielski    string  getName();
109*b1cdbd2cSJim Jagielski
110*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
111*b1cdbd2cSJim Jagielski    /** Return the attribute(s) associated with this transliterator
112*b1cdbd2cSJim Jagielski        object, as defined in <type>TransliterationType</type>. The
113*b1cdbd2cSJim Jagielski        value is determined by the transliteration modules. For example,
114*b1cdbd2cSJim Jagielski        for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for
115*b1cdbd2cSJim Jagielski        IGNORE_CASE, IGNORE is returned.
116*b1cdbd2cSJim Jagielski     */
117*b1cdbd2cSJim Jagielski    short   getType();
118*b1cdbd2cSJim Jagielski
119*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
120*b1cdbd2cSJim Jagielski    /** Load instance of predefined module - old style method.
121*b1cdbd2cSJim Jagielski     */
122*b1cdbd2cSJim Jagielski    void    loadModule( [in] TransliterationModules eModType,
123*b1cdbd2cSJim Jagielski                        [in] ::com::sun::star::lang::Locale aLocale );
124*b1cdbd2cSJim Jagielski
125*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
126*b1cdbd2cSJim Jagielski    /** Load a sequence of instances of predefined modules - supersedes
127*b1cdbd2cSJim Jagielski        method <member>XTransliteration::loadModule()</member>.
128*b1cdbd2cSJim Jagielski     */
129*b1cdbd2cSJim Jagielski    void    loadModuleNew(  [in] sequence <TransliterationModulesNew> aModType,
130*b1cdbd2cSJim Jagielski                            [in] ::com::sun::star::lang::Locale aLocale );
131*b1cdbd2cSJim Jagielski
132*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
133*b1cdbd2cSJim Jagielski    /** Load instance of UNO registered module.
134*b1cdbd2cSJim Jagielski
135*b1cdbd2cSJim Jagielski        <p> Each transliteration module is registered under a different
136*b1cdbd2cSJim Jagielski        service name. The convention for the service name is
137*b1cdbd2cSJim Jagielski        com.sun.star.i18n.Transliteration.l10n.{implName}. The
138*b1cdbd2cSJim Jagielski        {implName} is a unique name used to identify a module. The
139*b1cdbd2cSJim Jagielski        implName is used to get a localized name for the transliteration
140*b1cdbd2cSJim Jagielski        module. The implName is used in locale data to list the
141*b1cdbd2cSJim Jagielski        available transliteration modules for the locale. There are some
142*b1cdbd2cSJim Jagielski        transliteration modules that are always available. The names of
143*b1cdbd2cSJim Jagielski        those modules are listed as enum
144*b1cdbd2cSJim Jagielski        <type>TransliterationModules</type> names. For modules not
145*b1cdbd2cSJim Jagielski        listed there it is possible to load them directly by their
146*b1cdbd2cSJim Jagielski        implName.
147*b1cdbd2cSJim Jagielski
148*b1cdbd2cSJim Jagielski        @param aImplName
149*b1cdbd2cSJim Jagielski            The module's {implName} under which it is registered with
150*b1cdbd2cSJim Jagielski            com.sun.star.i18n.Transliteration.l10n.{implName}.
151*b1cdbd2cSJim Jagielski     */
152*b1cdbd2cSJim Jagielski    void    loadModuleByImplName(   [in] string aImplName,
153*b1cdbd2cSJim Jagielski                                    [in] ::com::sun::star::lang::Locale aLocale );
154*b1cdbd2cSJim Jagielski
155*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
156*b1cdbd2cSJim Jagielski    /** Load a sequence of instances of transliteration modules.
157*b1cdbd2cSJim Jagielski        Output of one module is feeded as input to the next module in
158*b1cdbd2cSJim Jagielski        the sequence. The object created by this call has
159*b1cdbd2cSJim Jagielski        <type>TransliterationType</type> CASCADE and IGNORE types.
160*b1cdbd2cSJim Jagielski
161*b1cdbd2cSJim Jagielski        @param aImplNameList
162*b1cdbd2cSJim Jagielski            Only IGNORE type modules can be specified.
163*b1cdbd2cSJim Jagielski    */
164*b1cdbd2cSJim Jagielski    void    loadModulesByImplNames( [in] sequence <string> aImplNameList,
165*b1cdbd2cSJim Jagielski                                    [in] ::com::sun::star::lang::Locale aLocale );
166*b1cdbd2cSJim Jagielski
167*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
168*b1cdbd2cSJim Jagielski    /** List the available transliteration modules for a given locale.
169*b1cdbd2cSJim Jagielski        It can be filtered based on its type.
170*b1cdbd2cSJim Jagielski
171*b1cdbd2cSJim Jagielski        @param nType
172*b1cdbd2cSJim Jagielski            A bitmask field of values defined in
173*b1cdbd2cSJim Jagielski            <type>TransliterationType</type>
174*b1cdbd2cSJim Jagielski    */
175*b1cdbd2cSJim Jagielski    sequence<string>    getAvailableModules(
176*b1cdbd2cSJim Jagielski                            [in] ::com::sun::star::lang::Locale aLocale,
177*b1cdbd2cSJim Jagielski                            [in] short nType );
178*b1cdbd2cSJim Jagielski
179*b1cdbd2cSJim Jagielski
180*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
181*b1cdbd2cSJim Jagielski    /** Transliterate a substring. This method can be called if the
182*b1cdbd2cSJim Jagielski        object doesn't have <type>TransliterationType</type> IGNORE
183*b1cdbd2cSJim Jagielski        attribute.
184*b1cdbd2cSJim Jagielski
185*b1cdbd2cSJim Jagielski        @param aStr
186*b1cdbd2cSJim Jagielski            The input string.
187*b1cdbd2cSJim Jagielski
188*b1cdbd2cSJim Jagielski        @param nStartPos
189*b1cdbd2cSJim Jagielski            Start position within aStr from where transliteration starts.
190*b1cdbd2cSJim Jagielski
191*b1cdbd2cSJim Jagielski        @param nCount
192*b1cdbd2cSJim Jagielski            Number of codepoints to be transliterated.
193*b1cdbd2cSJim Jagielski
194*b1cdbd2cSJim Jagielski        @param rOffset
195*b1cdbd2cSJim Jagielski            To find the grapheme of input string corresponding to the
196*b1cdbd2cSJim Jagielski            grapheme of output string, rOffset provides the offset array
197*b1cdbd2cSJim Jagielski            whose index is the offset of output string, the element
198*b1cdbd2cSJim Jagielski            containing the position within the input string before
199*b1cdbd2cSJim Jagielski            transliteration.
200*b1cdbd2cSJim Jagielski     */
201*b1cdbd2cSJim Jagielski    string  transliterate(  [in] string aInStr, [in] long nStartPos,
202*b1cdbd2cSJim Jagielski                            [in] long nCount, [out] sequence <long> rOffset );
203*b1cdbd2cSJim Jagielski
204*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
205*b1cdbd2cSJim Jagielski    /** @deprecated
206*b1cdbd2cSJim Jagielski        For internal use, this method is supported to get the
207*b1cdbd2cSJim Jagielski        "transliteration", which equals() is based on.
208*b1cdbd2cSJim Jagielski     */
209*b1cdbd2cSJim Jagielski    string  folding(    [in] string aInStr, [in] long nStartPos,
210*b1cdbd2cSJim Jagielski                        [in] long nCount, [out] sequence <long> rOffset );
211*b1cdbd2cSJim Jagielski
212*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
213*b1cdbd2cSJim Jagielski    /** Match two substrings and find if they are equivalent as per this
214*b1cdbd2cSJim Jagielski        transliteration.
215*b1cdbd2cSJim Jagielski
216*b1cdbd2cSJim Jagielski        <p> This method can be called if the object has
217*b1cdbd2cSJim Jagielski        <type>TransliterationType</type> IGNORE attribute. </p>
218*b1cdbd2cSJim Jagielski
219*b1cdbd2cSJim Jagielski        <p> Returns the number of matched code points in any case, even if
220*b1cdbd2cSJim Jagielski        strings are not equal, for example: <br/>
221*b1cdbd2cSJim Jagielski        equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
222*b1cdbd2cSJim Jagielski        returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/>
223*b1cdbd2cSJim Jagielski        equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
224*b1cdbd2cSJim Jagielski        returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p>
225*b1cdbd2cSJim Jagielski
226*b1cdbd2cSJim Jagielski        @param aStr1
227*b1cdbd2cSJim Jagielski            First string to match.
228*b1cdbd2cSJim Jagielski
229*b1cdbd2cSJim Jagielski        @param nPos1
230*b1cdbd2cSJim Jagielski            Start position within aStr1.
231*b1cdbd2cSJim Jagielski
232*b1cdbd2cSJim Jagielski        @param nCount1
233*b1cdbd2cSJim Jagielski            Number of code points to use of aStr1.
234*b1cdbd2cSJim Jagielski
235*b1cdbd2cSJim Jagielski        @param rMatch1
236*b1cdbd2cSJim Jagielski            Returns number of matched code points in aStr1.
237*b1cdbd2cSJim Jagielski
238*b1cdbd2cSJim Jagielski        @param aStr2
239*b1cdbd2cSJim Jagielski            Second string to match.
240*b1cdbd2cSJim Jagielski
241*b1cdbd2cSJim Jagielski        @param nPos2
242*b1cdbd2cSJim Jagielski            Start position within aStr2.
243*b1cdbd2cSJim Jagielski
244*b1cdbd2cSJim Jagielski        @param nCount2
245*b1cdbd2cSJim Jagielski            Number of code points to use of aStr2.
246*b1cdbd2cSJim Jagielski
247*b1cdbd2cSJim Jagielski        @param rMatch2
248*b1cdbd2cSJim Jagielski            Returns number of matched code points in aStr2.
249*b1cdbd2cSJim Jagielski
250*b1cdbd2cSJim Jagielski        @returns
251*b1cdbd2cSJim Jagielski            <TRUE/> if the substrings are equal per this
252*b1cdbd2cSJim Jagielski            transliteration <br/>
253*b1cdbd2cSJim Jagielski            <FALSE/> else.
254*b1cdbd2cSJim Jagielski     */
255*b1cdbd2cSJim Jagielski
256*b1cdbd2cSJim Jagielski    boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1,
257*b1cdbd2cSJim Jagielski                    [out] long rMatch1,
258*b1cdbd2cSJim Jagielski                    [in] string aStr2, [in] long nPos2, [in] long nCount2,
259*b1cdbd2cSJim Jagielski                    [out] long rMatch2 );
260*b1cdbd2cSJim Jagielski
261*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
262*b1cdbd2cSJim Jagielski    /** Transliterate one set of characters to another.
263*b1cdbd2cSJim Jagielski
264*b1cdbd2cSJim Jagielski        <p> This method is intended for getting corresponding ranges and
265*b1cdbd2cSJim Jagielski        can be called if the object has <type>TransliterationType</type>
266*b1cdbd2cSJim Jagielski        IGNORE attribute. </p>
267*b1cdbd2cSJim Jagielski
268*b1cdbd2cSJim Jagielski        <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" )
269*b1cdbd2cSJim Jagielski        returns {"A","I","a","i"}, transliterateRange( "a", "a" )
270*b1cdbd2cSJim Jagielski        returns {"A","A","a","a"}. </p>
271*b1cdbd2cSJim Jagielski
272*b1cdbd2cSJim Jagielski        <p> Use this transliteration to create regular expresssions like
273*b1cdbd2cSJim Jagielski        [a-i] --> [A-Ia-i]. </p>
274*b1cdbd2cSJim Jagielski
275*b1cdbd2cSJim Jagielski        @returns
276*b1cdbd2cSJim Jagielski            String sequence containing corresponding transliterated
277*b1cdbd2cSJim Jagielski            pairs of characters to represent a range.
278*b1cdbd2cSJim Jagielski     */
279*b1cdbd2cSJim Jagielski    sequence <string>   transliterateRange( [in] string aStr1, [in] string aStr2 );
280*b1cdbd2cSJim Jagielski
281*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
282*b1cdbd2cSJim Jagielski    /** Compare 2 substrings as per this transliteration. It translates both
283*b1cdbd2cSJim Jagielski        substrings before comparing them.
284*b1cdbd2cSJim Jagielski
285*b1cdbd2cSJim Jagielski        @param aStr1
286*b1cdbd2cSJim Jagielski            First string.
287*b1cdbd2cSJim Jagielski
288*b1cdbd2cSJim Jagielski        @param nOff1
289*b1cdbd2cSJim Jagielski            Offset (from 0) of the first substring.
290*b1cdbd2cSJim Jagielski
291*b1cdbd2cSJim Jagielski        @param nLen1
292*b1cdbd2cSJim Jagielski            Length (from offset) of the first substring.
293*b1cdbd2cSJim Jagielski
294*b1cdbd2cSJim Jagielski        @param aStr2
295*b1cdbd2cSJim Jagielski            Second string.
296*b1cdbd2cSJim Jagielski
297*b1cdbd2cSJim Jagielski        @param nOff2
298*b1cdbd2cSJim Jagielski            Offset (from 0) of the second substring.
299*b1cdbd2cSJim Jagielski
300*b1cdbd2cSJim Jagielski        @param nLen2
301*b1cdbd2cSJim Jagielski            Length (from offset) of the second substring.
302*b1cdbd2cSJim Jagielski
303*b1cdbd2cSJim Jagielski        @returns
304*b1cdbd2cSJim Jagielski            1 if the first substring is greater than the second substring <br/>
305*b1cdbd2cSJim Jagielski            0 if the first substring is equal to the second substring <br/>
306*b1cdbd2cSJim Jagielski            -1 if the first substring is less than the second substring
307*b1cdbd2cSJim Jagielski    */
308*b1cdbd2cSJim Jagielski    long    compareSubstring(   [in] string aStr1, [in] long nOff1, [in] long nLen1,
309*b1cdbd2cSJim Jagielski                                [in] string aStr2, [in] long nOff2, [in] long nLen2 );
310*b1cdbd2cSJim Jagielski
311*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
312*b1cdbd2cSJim Jagielski    /** Compare 2 strings as per this transliteration. It translates both
313*b1cdbd2cSJim Jagielski        strings before comparing them.
314*b1cdbd2cSJim Jagielski
315*b1cdbd2cSJim Jagielski        @returns
316*b1cdbd2cSJim Jagielski            1 if the first string is greater than the second string <br/>
317*b1cdbd2cSJim Jagielski            0 if the first string is equal to the second string <br/>
318*b1cdbd2cSJim Jagielski            -1 if the first string is less than the second string
319*b1cdbd2cSJim Jagielski    */
320*b1cdbd2cSJim Jagielski    long    compareString( [in] string aStr1, [in] string aStr2 );
321*b1cdbd2cSJim Jagielski
322*b1cdbd2cSJim Jagielski};
323*b1cdbd2cSJim Jagielski
324*b1cdbd2cSJim Jagielski//=============================================================================
325*b1cdbd2cSJim Jagielski}; }; }; };
326*b1cdbd2cSJim Jagielski
327*b1cdbd2cSJim Jagielski#endif
328