1*b1cdbd2cSJim Jagielski/************************************************************** 2*b1cdbd2cSJim Jagielski * 3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one 4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file 5*b1cdbd2cSJim Jagielski * distributed with this work for additional information 6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file 7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the 8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance 9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at 10*b1cdbd2cSJim Jagielski * 11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0 12*b1cdbd2cSJim Jagielski * 13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing, 14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an 15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the 17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations 18*b1cdbd2cSJim Jagielski * under the License. 19*b1cdbd2cSJim Jagielski * 20*b1cdbd2cSJim Jagielski *************************************************************/ 21*b1cdbd2cSJim Jagielski 22*b1cdbd2cSJim Jagielski 23*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_i18n_XTransliteration_idl__ 24*b1cdbd2cSJim Jagielski#define __com_sun_star_i18n_XTransliteration_idl__ 25*b1cdbd2cSJim Jagielski 26*b1cdbd2cSJim Jagielski#include <com/sun/star/lang/Locale.idl> 27*b1cdbd2cSJim Jagielski#include <com/sun/star/uno/XInterface.idl> 28*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/TransliterationModules.idl> 29*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/TransliterationModulesNew.idl> 30*b1cdbd2cSJim Jagielski 31*b1cdbd2cSJim Jagielski//============================================================================= 32*b1cdbd2cSJim Jagielski 33*b1cdbd2cSJim Jagielskimodule com { module sun { module star { module i18n { 34*b1cdbd2cSJim Jagielski 35*b1cdbd2cSJim Jagielski//============================================================================= 36*b1cdbd2cSJim Jagielski 37*b1cdbd2cSJim Jagielski/** 38*b1cdbd2cSJim Jagielski Character conversions like case folding or Hiragana to Katakana. 39*b1cdbd2cSJim Jagielski 40*b1cdbd2cSJim Jagielski <p> Transliteration is a character to character conversion but it is 41*b1cdbd2cSJim Jagielski not always a one to one mapping between characters. Transliteration 42*b1cdbd2cSJim Jagielski modules are primarily used by collation, and search and replace 43*b1cdbd2cSJim Jagielski modules to perform approximate search. It can also be used to format 44*b1cdbd2cSJim Jagielski the numbers in different numbering systems. <p/> 45*b1cdbd2cSJim Jagielski 46*b1cdbd2cSJim Jagielski <p> In order to select transliteration modules for different 47*b1cdbd2cSJim Jagielski purposes, they are classified with attributes of 48*b1cdbd2cSJim Jagielski <type>TransliterationType</type>. <p/> 49*b1cdbd2cSJim Jagielski 50*b1cdbd2cSJim Jagielski <p> For Western languages there would be three transliteration 51*b1cdbd2cSJim Jagielski modules available to compare two mixed case strings: upper to lower, 52*b1cdbd2cSJim Jagielski lower to upper, and ignore case. </p> 53*b1cdbd2cSJim Jagielski 54*b1cdbd2cSJim Jagielski <p> A typical calling sequence of transliteration is 55*b1cdbd2cSJim Jagielski <ol> 56*b1cdbd2cSJim Jagielski <li> getAvailableModules() </li> 57*b1cdbd2cSJim Jagielski <li> loadModulesByImplNames() </li> 58*b1cdbd2cSJim Jagielski <li> equals() </li> 59*b1cdbd2cSJim Jagielski </ol> 60*b1cdbd2cSJim Jagielski or another one is 61*b1cdbd2cSJim Jagielski <ol> 62*b1cdbd2cSJim Jagielski <li> loadModule() </li> 63*b1cdbd2cSJim Jagielski <li> transliterate() </li> 64*b1cdbd2cSJim Jagielski </ol> 65*b1cdbd2cSJim Jagielski </p> 66*b1cdbd2cSJim Jagielski 67*b1cdbd2cSJim Jagielski*/ 68*b1cdbd2cSJim Jagielski 69*b1cdbd2cSJim Jagielski/* comment: 70*b1cdbd2cSJim Jagielski * 0. 71*b1cdbd2cSJim Jagielski * All the IGNORE-type functionalities (Range, equals) are based on mapping. 72*b1cdbd2cSJim Jagielski * except equals() method in IGNORE_CASE, which is based on Locale-independent 73*b1cdbd2cSJim Jagielski * casefolding 74*b1cdbd2cSJim Jagielski * ( This second assumption is very complicated and may cause confusion of use) 75*b1cdbd2cSJim Jagielski * 76*b1cdbd2cSJim Jagielski * 1. 77*b1cdbd2cSJim Jagielski * We are assuming Upper to Lower mapping as one of transliteration. 78*b1cdbd2cSJim Jagielski * The mapping depends on Locale. 79*b1cdbd2cSJim Jagielski * Upper <-> Lower methods are just wrappers to provide equals() and Range() 80*b1cdbd2cSJim Jagielski * 81*b1cdbd2cSJim Jagielski * 2. 82*b1cdbd2cSJim Jagielski * equals() in IGNORE_CASE module is locale-independent and 83*b1cdbd2cSJim Jagielski * we don't provide locale-sensitive ones. 84*b1cdbd2cSJim Jagielski * The reason we provided locale-independent ones is that IGNORE_CASE is mainly 85*b1cdbd2cSJim Jagielski * dedicated to StarOffice internal code. 86*b1cdbd2cSJim Jagielski * 87*b1cdbd2cSJim Jagielski * 3. 88*b1cdbd2cSJim Jagielski * TransliterationModules is used just for convenience without calling 89*b1cdbd2cSJim Jagielski * getAvailableModule. 90*b1cdbd2cSJim Jagielski * 91*b1cdbd2cSJim Jagielski * 4. 92*b1cdbd2cSJim Jagielski * Implementation name in the methods below is not the same as 93*b1cdbd2cSJim Jagielski * the true implemenation name registered. 94*b1cdbd2cSJim Jagielski * In particular, for generic modules:"UPPERCASE_LOWERCASE", 95*b1cdbd2cSJim Jagielski * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name. 96*b1cdbd2cSJim Jagielski */ 97*b1cdbd2cSJim Jagielski 98*b1cdbd2cSJim Jagielski 99*b1cdbd2cSJim Jagielskipublished interface XTransliteration: com::sun::star::uno::XInterface 100*b1cdbd2cSJim Jagielski{ 101*b1cdbd2cSJim Jagielski 102*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 103*b1cdbd2cSJim Jagielski /** Unique ASCII name to identify a module. This name is used 104*b1cdbd2cSJim Jagielski to get its localized name for menus, dialogs etc. The behavior 105*b1cdbd2cSJim Jagielski is undefined for <const>TransliterationType::CASCADE</const> 106*b1cdbd2cSJim Jagielski modules. 107*b1cdbd2cSJim Jagielski */ 108*b1cdbd2cSJim Jagielski string getName(); 109*b1cdbd2cSJim Jagielski 110*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 111*b1cdbd2cSJim Jagielski /** Return the attribute(s) associated with this transliterator 112*b1cdbd2cSJim Jagielski object, as defined in <type>TransliterationType</type>. The 113*b1cdbd2cSJim Jagielski value is determined by the transliteration modules. For example, 114*b1cdbd2cSJim Jagielski for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for 115*b1cdbd2cSJim Jagielski IGNORE_CASE, IGNORE is returned. 116*b1cdbd2cSJim Jagielski */ 117*b1cdbd2cSJim Jagielski short getType(); 118*b1cdbd2cSJim Jagielski 119*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 120*b1cdbd2cSJim Jagielski /** Load instance of predefined module - old style method. 121*b1cdbd2cSJim Jagielski */ 122*b1cdbd2cSJim Jagielski void loadModule( [in] TransliterationModules eModType, 123*b1cdbd2cSJim Jagielski [in] ::com::sun::star::lang::Locale aLocale ); 124*b1cdbd2cSJim Jagielski 125*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 126*b1cdbd2cSJim Jagielski /** Load a sequence of instances of predefined modules - supersedes 127*b1cdbd2cSJim Jagielski method <member>XTransliteration::loadModule()</member>. 128*b1cdbd2cSJim Jagielski */ 129*b1cdbd2cSJim Jagielski void loadModuleNew( [in] sequence <TransliterationModulesNew> aModType, 130*b1cdbd2cSJim Jagielski [in] ::com::sun::star::lang::Locale aLocale ); 131*b1cdbd2cSJim Jagielski 132*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 133*b1cdbd2cSJim Jagielski /** Load instance of UNO registered module. 134*b1cdbd2cSJim Jagielski 135*b1cdbd2cSJim Jagielski <p> Each transliteration module is registered under a different 136*b1cdbd2cSJim Jagielski service name. The convention for the service name is 137*b1cdbd2cSJim Jagielski com.sun.star.i18n.Transliteration.l10n.{implName}. The 138*b1cdbd2cSJim Jagielski {implName} is a unique name used to identify a module. The 139*b1cdbd2cSJim Jagielski implName is used to get a localized name for the transliteration 140*b1cdbd2cSJim Jagielski module. The implName is used in locale data to list the 141*b1cdbd2cSJim Jagielski available transliteration modules for the locale. There are some 142*b1cdbd2cSJim Jagielski transliteration modules that are always available. The names of 143*b1cdbd2cSJim Jagielski those modules are listed as enum 144*b1cdbd2cSJim Jagielski <type>TransliterationModules</type> names. For modules not 145*b1cdbd2cSJim Jagielski listed there it is possible to load them directly by their 146*b1cdbd2cSJim Jagielski implName. 147*b1cdbd2cSJim Jagielski 148*b1cdbd2cSJim Jagielski @param aImplName 149*b1cdbd2cSJim Jagielski The module's {implName} under which it is registered with 150*b1cdbd2cSJim Jagielski com.sun.star.i18n.Transliteration.l10n.{implName}. 151*b1cdbd2cSJim Jagielski */ 152*b1cdbd2cSJim Jagielski void loadModuleByImplName( [in] string aImplName, 153*b1cdbd2cSJim Jagielski [in] ::com::sun::star::lang::Locale aLocale ); 154*b1cdbd2cSJim Jagielski 155*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 156*b1cdbd2cSJim Jagielski /** Load a sequence of instances of transliteration modules. 157*b1cdbd2cSJim Jagielski Output of one module is feeded as input to the next module in 158*b1cdbd2cSJim Jagielski the sequence. The object created by this call has 159*b1cdbd2cSJim Jagielski <type>TransliterationType</type> CASCADE and IGNORE types. 160*b1cdbd2cSJim Jagielski 161*b1cdbd2cSJim Jagielski @param aImplNameList 162*b1cdbd2cSJim Jagielski Only IGNORE type modules can be specified. 163*b1cdbd2cSJim Jagielski */ 164*b1cdbd2cSJim Jagielski void loadModulesByImplNames( [in] sequence <string> aImplNameList, 165*b1cdbd2cSJim Jagielski [in] ::com::sun::star::lang::Locale aLocale ); 166*b1cdbd2cSJim Jagielski 167*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 168*b1cdbd2cSJim Jagielski /** List the available transliteration modules for a given locale. 169*b1cdbd2cSJim Jagielski It can be filtered based on its type. 170*b1cdbd2cSJim Jagielski 171*b1cdbd2cSJim Jagielski @param nType 172*b1cdbd2cSJim Jagielski A bitmask field of values defined in 173*b1cdbd2cSJim Jagielski <type>TransliterationType</type> 174*b1cdbd2cSJim Jagielski */ 175*b1cdbd2cSJim Jagielski sequence<string> getAvailableModules( 176*b1cdbd2cSJim Jagielski [in] ::com::sun::star::lang::Locale aLocale, 177*b1cdbd2cSJim Jagielski [in] short nType ); 178*b1cdbd2cSJim Jagielski 179*b1cdbd2cSJim Jagielski 180*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 181*b1cdbd2cSJim Jagielski /** Transliterate a substring. This method can be called if the 182*b1cdbd2cSJim Jagielski object doesn't have <type>TransliterationType</type> IGNORE 183*b1cdbd2cSJim Jagielski attribute. 184*b1cdbd2cSJim Jagielski 185*b1cdbd2cSJim Jagielski @param aStr 186*b1cdbd2cSJim Jagielski The input string. 187*b1cdbd2cSJim Jagielski 188*b1cdbd2cSJim Jagielski @param nStartPos 189*b1cdbd2cSJim Jagielski Start position within aStr from where transliteration starts. 190*b1cdbd2cSJim Jagielski 191*b1cdbd2cSJim Jagielski @param nCount 192*b1cdbd2cSJim Jagielski Number of codepoints to be transliterated. 193*b1cdbd2cSJim Jagielski 194*b1cdbd2cSJim Jagielski @param rOffset 195*b1cdbd2cSJim Jagielski To find the grapheme of input string corresponding to the 196*b1cdbd2cSJim Jagielski grapheme of output string, rOffset provides the offset array 197*b1cdbd2cSJim Jagielski whose index is the offset of output string, the element 198*b1cdbd2cSJim Jagielski containing the position within the input string before 199*b1cdbd2cSJim Jagielski transliteration. 200*b1cdbd2cSJim Jagielski */ 201*b1cdbd2cSJim Jagielski string transliterate( [in] string aInStr, [in] long nStartPos, 202*b1cdbd2cSJim Jagielski [in] long nCount, [out] sequence <long> rOffset ); 203*b1cdbd2cSJim Jagielski 204*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 205*b1cdbd2cSJim Jagielski /** @deprecated 206*b1cdbd2cSJim Jagielski For internal use, this method is supported to get the 207*b1cdbd2cSJim Jagielski "transliteration", which equals() is based on. 208*b1cdbd2cSJim Jagielski */ 209*b1cdbd2cSJim Jagielski string folding( [in] string aInStr, [in] long nStartPos, 210*b1cdbd2cSJim Jagielski [in] long nCount, [out] sequence <long> rOffset ); 211*b1cdbd2cSJim Jagielski 212*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 213*b1cdbd2cSJim Jagielski /** Match two substrings and find if they are equivalent as per this 214*b1cdbd2cSJim Jagielski transliteration. 215*b1cdbd2cSJim Jagielski 216*b1cdbd2cSJim Jagielski <p> This method can be called if the object has 217*b1cdbd2cSJim Jagielski <type>TransliterationType</type> IGNORE attribute. </p> 218*b1cdbd2cSJim Jagielski 219*b1cdbd2cSJim Jagielski <p> Returns the number of matched code points in any case, even if 220*b1cdbd2cSJim Jagielski strings are not equal, for example: <br/> 221*b1cdbd2cSJim Jagielski equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 222*b1cdbd2cSJim Jagielski returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/> 223*b1cdbd2cSJim Jagielski equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 224*b1cdbd2cSJim Jagielski returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p> 225*b1cdbd2cSJim Jagielski 226*b1cdbd2cSJim Jagielski @param aStr1 227*b1cdbd2cSJim Jagielski First string to match. 228*b1cdbd2cSJim Jagielski 229*b1cdbd2cSJim Jagielski @param nPos1 230*b1cdbd2cSJim Jagielski Start position within aStr1. 231*b1cdbd2cSJim Jagielski 232*b1cdbd2cSJim Jagielski @param nCount1 233*b1cdbd2cSJim Jagielski Number of code points to use of aStr1. 234*b1cdbd2cSJim Jagielski 235*b1cdbd2cSJim Jagielski @param rMatch1 236*b1cdbd2cSJim Jagielski Returns number of matched code points in aStr1. 237*b1cdbd2cSJim Jagielski 238*b1cdbd2cSJim Jagielski @param aStr2 239*b1cdbd2cSJim Jagielski Second string to match. 240*b1cdbd2cSJim Jagielski 241*b1cdbd2cSJim Jagielski @param nPos2 242*b1cdbd2cSJim Jagielski Start position within aStr2. 243*b1cdbd2cSJim Jagielski 244*b1cdbd2cSJim Jagielski @param nCount2 245*b1cdbd2cSJim Jagielski Number of code points to use of aStr2. 246*b1cdbd2cSJim Jagielski 247*b1cdbd2cSJim Jagielski @param rMatch2 248*b1cdbd2cSJim Jagielski Returns number of matched code points in aStr2. 249*b1cdbd2cSJim Jagielski 250*b1cdbd2cSJim Jagielski @returns 251*b1cdbd2cSJim Jagielski <TRUE/> if the substrings are equal per this 252*b1cdbd2cSJim Jagielski transliteration <br/> 253*b1cdbd2cSJim Jagielski <FALSE/> else. 254*b1cdbd2cSJim Jagielski */ 255*b1cdbd2cSJim Jagielski 256*b1cdbd2cSJim Jagielski boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1, 257*b1cdbd2cSJim Jagielski [out] long rMatch1, 258*b1cdbd2cSJim Jagielski [in] string aStr2, [in] long nPos2, [in] long nCount2, 259*b1cdbd2cSJim Jagielski [out] long rMatch2 ); 260*b1cdbd2cSJim Jagielski 261*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 262*b1cdbd2cSJim Jagielski /** Transliterate one set of characters to another. 263*b1cdbd2cSJim Jagielski 264*b1cdbd2cSJim Jagielski <p> This method is intended for getting corresponding ranges and 265*b1cdbd2cSJim Jagielski can be called if the object has <type>TransliterationType</type> 266*b1cdbd2cSJim Jagielski IGNORE attribute. </p> 267*b1cdbd2cSJim Jagielski 268*b1cdbd2cSJim Jagielski <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" ) 269*b1cdbd2cSJim Jagielski returns {"A","I","a","i"}, transliterateRange( "a", "a" ) 270*b1cdbd2cSJim Jagielski returns {"A","A","a","a"}. </p> 271*b1cdbd2cSJim Jagielski 272*b1cdbd2cSJim Jagielski <p> Use this transliteration to create regular expresssions like 273*b1cdbd2cSJim Jagielski [a-i] --> [A-Ia-i]. </p> 274*b1cdbd2cSJim Jagielski 275*b1cdbd2cSJim Jagielski @returns 276*b1cdbd2cSJim Jagielski String sequence containing corresponding transliterated 277*b1cdbd2cSJim Jagielski pairs of characters to represent a range. 278*b1cdbd2cSJim Jagielski */ 279*b1cdbd2cSJim Jagielski sequence <string> transliterateRange( [in] string aStr1, [in] string aStr2 ); 280*b1cdbd2cSJim Jagielski 281*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 282*b1cdbd2cSJim Jagielski /** Compare 2 substrings as per this transliteration. It translates both 283*b1cdbd2cSJim Jagielski substrings before comparing them. 284*b1cdbd2cSJim Jagielski 285*b1cdbd2cSJim Jagielski @param aStr1 286*b1cdbd2cSJim Jagielski First string. 287*b1cdbd2cSJim Jagielski 288*b1cdbd2cSJim Jagielski @param nOff1 289*b1cdbd2cSJim Jagielski Offset (from 0) of the first substring. 290*b1cdbd2cSJim Jagielski 291*b1cdbd2cSJim Jagielski @param nLen1 292*b1cdbd2cSJim Jagielski Length (from offset) of the first substring. 293*b1cdbd2cSJim Jagielski 294*b1cdbd2cSJim Jagielski @param aStr2 295*b1cdbd2cSJim Jagielski Second string. 296*b1cdbd2cSJim Jagielski 297*b1cdbd2cSJim Jagielski @param nOff2 298*b1cdbd2cSJim Jagielski Offset (from 0) of the second substring. 299*b1cdbd2cSJim Jagielski 300*b1cdbd2cSJim Jagielski @param nLen2 301*b1cdbd2cSJim Jagielski Length (from offset) of the second substring. 302*b1cdbd2cSJim Jagielski 303*b1cdbd2cSJim Jagielski @returns 304*b1cdbd2cSJim Jagielski 1 if the first substring is greater than the second substring <br/> 305*b1cdbd2cSJim Jagielski 0 if the first substring is equal to the second substring <br/> 306*b1cdbd2cSJim Jagielski -1 if the first substring is less than the second substring 307*b1cdbd2cSJim Jagielski */ 308*b1cdbd2cSJim Jagielski long compareSubstring( [in] string aStr1, [in] long nOff1, [in] long nLen1, 309*b1cdbd2cSJim Jagielski [in] string aStr2, [in] long nOff2, [in] long nLen2 ); 310*b1cdbd2cSJim Jagielski 311*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 312*b1cdbd2cSJim Jagielski /** Compare 2 strings as per this transliteration. It translates both 313*b1cdbd2cSJim Jagielski strings before comparing them. 314*b1cdbd2cSJim Jagielski 315*b1cdbd2cSJim Jagielski @returns 316*b1cdbd2cSJim Jagielski 1 if the first string is greater than the second string <br/> 317*b1cdbd2cSJim Jagielski 0 if the first string is equal to the second string <br/> 318*b1cdbd2cSJim Jagielski -1 if the first string is less than the second string 319*b1cdbd2cSJim Jagielski */ 320*b1cdbd2cSJim Jagielski long compareString( [in] string aStr1, [in] string aStr2 ); 321*b1cdbd2cSJim Jagielski 322*b1cdbd2cSJim Jagielski}; 323*b1cdbd2cSJim Jagielski 324*b1cdbd2cSJim Jagielski//============================================================================= 325*b1cdbd2cSJim Jagielski}; }; }; }; 326*b1cdbd2cSJim Jagielski 327*b1cdbd2cSJim Jagielski#endif 328