1*d1766043SAndrew Rist/************************************************************** 2cdf0e10cSrcweir * 3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*d1766043SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*d1766043SAndrew Rist * distributed with this work for additional information 6*d1766043SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance 9*d1766043SAndrew Rist * with the License. You may obtain a copy of the License at 10*d1766043SAndrew Rist * 11*d1766043SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*d1766043SAndrew Rist * 13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*d1766043SAndrew Rist * software distributed under the License is distributed on an 15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*d1766043SAndrew Rist * KIND, either express or implied. See the License for the 17*d1766043SAndrew Rist * specific language governing permissions and limitations 18*d1766043SAndrew Rist * under the License. 19*d1766043SAndrew Rist * 20*d1766043SAndrew Rist *************************************************************/ 21*d1766043SAndrew Rist 22*d1766043SAndrew Rist 23cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XTransliteration_idl__ 24cdf0e10cSrcweir#define __com_sun_star_i18n_XTransliteration_idl__ 25cdf0e10cSrcweir 26cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 27cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 28cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModules.idl> 29cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModulesNew.idl> 30cdf0e10cSrcweir 31cdf0e10cSrcweir//============================================================================= 32cdf0e10cSrcweir 33cdf0e10cSrcweirmodule com { module sun { module star { module i18n { 34cdf0e10cSrcweir 35cdf0e10cSrcweir//============================================================================= 36cdf0e10cSrcweir 37cdf0e10cSrcweir/** 38cdf0e10cSrcweir Character conversions like case folding or Hiragana to Katakana. 39cdf0e10cSrcweir 40cdf0e10cSrcweir <p> Transliteration is a character to character conversion but it is 41cdf0e10cSrcweir not always a one to one mapping between characters. Transliteration 42cdf0e10cSrcweir modules are primarily used by collation, and search and replace 43cdf0e10cSrcweir modules to perform approximate search. It can also be used to format 44cdf0e10cSrcweir the numbers in different numbering systems. <p/> 45cdf0e10cSrcweir 46cdf0e10cSrcweir <p> In order to select transliteration modules for different 47cdf0e10cSrcweir purposes, they are classified with attributes of 48cdf0e10cSrcweir <type>TransliterationType</type>. <p/> 49cdf0e10cSrcweir 50cdf0e10cSrcweir <p> For Western languages there would be three transliteration 51cdf0e10cSrcweir modules available to compare two mixed case strings: upper to lower, 52cdf0e10cSrcweir lower to upper, and ignore case. </p> 53cdf0e10cSrcweir 54cdf0e10cSrcweir <p> A typical calling sequence of transliteration is 55cdf0e10cSrcweir <ol> 56cdf0e10cSrcweir <li> getAvailableModules() </li> 57cdf0e10cSrcweir <li> loadModulesByImplNames() </li> 58cdf0e10cSrcweir <li> equals() </li> 59cdf0e10cSrcweir </ol> 60cdf0e10cSrcweir or another one is 61cdf0e10cSrcweir <ol> 62cdf0e10cSrcweir <li> loadModule() </li> 63cdf0e10cSrcweir <li> transliterate() </li> 64cdf0e10cSrcweir </ol> 65cdf0e10cSrcweir </p> 66cdf0e10cSrcweir 67cdf0e10cSrcweir*/ 68cdf0e10cSrcweir 69cdf0e10cSrcweir/* comment: 70cdf0e10cSrcweir * 0. 71cdf0e10cSrcweir * All the IGNORE-type functionalities (Range, equals) are based on mapping. 72cdf0e10cSrcweir * except equals() method in IGNORE_CASE, which is based on Locale-independent 73cdf0e10cSrcweir * casefolding 74cdf0e10cSrcweir * ( This second assumption is very complicated and may cause confusion of use) 75cdf0e10cSrcweir * 76cdf0e10cSrcweir * 1. 77cdf0e10cSrcweir * We are assuming Upper to Lower mapping as one of transliteration. 78cdf0e10cSrcweir * The mapping depends on Locale. 79cdf0e10cSrcweir * Upper <-> Lower methods are just wrappers to provide equals() and Range() 80cdf0e10cSrcweir * 81cdf0e10cSrcweir * 2. 82cdf0e10cSrcweir * equals() in IGNORE_CASE module is locale-independent and 83cdf0e10cSrcweir * we don't provide locale-sensitive ones. 84cdf0e10cSrcweir * The reason we provided locale-independent ones is that IGNORE_CASE is mainly 85cdf0e10cSrcweir * dedicated to StarOffice internal code. 86cdf0e10cSrcweir * 87cdf0e10cSrcweir * 3. 88cdf0e10cSrcweir * TransliterationModules is used just for convenience without calling 89cdf0e10cSrcweir * getAvailableModule. 90cdf0e10cSrcweir * 91cdf0e10cSrcweir * 4. 92cdf0e10cSrcweir * Implementation name in the methods below is not the same as 93cdf0e10cSrcweir * the true implemenation name registered. 94cdf0e10cSrcweir * In particular, for generic modules:"UPPERCASE_LOWERCASE", 95cdf0e10cSrcweir * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name. 96cdf0e10cSrcweir */ 97cdf0e10cSrcweir 98cdf0e10cSrcweir 99cdf0e10cSrcweirpublished interface XTransliteration: com::sun::star::uno::XInterface 100cdf0e10cSrcweir{ 101cdf0e10cSrcweir 102cdf0e10cSrcweir //------------------------------------------------------------------------ 103cdf0e10cSrcweir /** Unique ASCII name to identify a module. This name is used 104cdf0e10cSrcweir to get its localized name for menus, dialogs etc. The behavior 105cdf0e10cSrcweir is undefined for <const>TransliterationType::CASCADE</const> 106cdf0e10cSrcweir modules. 107cdf0e10cSrcweir */ 108cdf0e10cSrcweir string getName(); 109cdf0e10cSrcweir 110cdf0e10cSrcweir //------------------------------------------------------------------------ 111cdf0e10cSrcweir /** Return the attribute(s) associated with this transliterator 112cdf0e10cSrcweir object, as defined in <type>TransliterationType</type>. The 113cdf0e10cSrcweir value is determined by the transliteration modules. For example, 114cdf0e10cSrcweir for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for 115cdf0e10cSrcweir IGNORE_CASE, IGNORE is returned. 116cdf0e10cSrcweir */ 117cdf0e10cSrcweir short getType(); 118cdf0e10cSrcweir 119cdf0e10cSrcweir //------------------------------------------------------------------------ 120cdf0e10cSrcweir /** Load instance of predefined module - old style method. 121cdf0e10cSrcweir */ 122cdf0e10cSrcweir void loadModule( [in] TransliterationModules eModType, 123cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 124cdf0e10cSrcweir 125cdf0e10cSrcweir //------------------------------------------------------------------------ 126cdf0e10cSrcweir /** Load a sequence of instances of predefined modules - supersedes 127cdf0e10cSrcweir method <member>XTransliteration::loadModule()</member>. 128cdf0e10cSrcweir */ 129cdf0e10cSrcweir void loadModuleNew( [in] sequence <TransliterationModulesNew> aModType, 130cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 131cdf0e10cSrcweir 132cdf0e10cSrcweir //------------------------------------------------------------------------ 133cdf0e10cSrcweir /** Load instance of UNO registered module. 134cdf0e10cSrcweir 135cdf0e10cSrcweir <p> Each transliteration module is registered under a different 136cdf0e10cSrcweir service name. The convention for the service name is 137cdf0e10cSrcweir com.sun.star.i18n.Transliteration.l10n.{implName}. The 138cdf0e10cSrcweir {implName} is a unique name used to identify a module. The 139cdf0e10cSrcweir implName is used to get a localized name for the transliteration 140cdf0e10cSrcweir module. The implName is used in locale data to list the 141cdf0e10cSrcweir available transliteration modules for the locale. There are some 142cdf0e10cSrcweir transliteration modules that are always available. The names of 143cdf0e10cSrcweir those modules are listed as enum 144cdf0e10cSrcweir <type>TransliterationModules</type> names. For modules not 145cdf0e10cSrcweir listed there it is possible to load them directly by their 146cdf0e10cSrcweir implName. 147cdf0e10cSrcweir 148cdf0e10cSrcweir @param aImplName 149cdf0e10cSrcweir The module's {implName} under which it is registered with 150cdf0e10cSrcweir com.sun.star.i18n.Transliteration.l10n.{implName}. 151cdf0e10cSrcweir */ 152cdf0e10cSrcweir void loadModuleByImplName( [in] string aImplName, 153cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 154cdf0e10cSrcweir 155cdf0e10cSrcweir //------------------------------------------------------------------------ 156cdf0e10cSrcweir /** Load a sequence of instances of transliteration modules. 157cdf0e10cSrcweir Output of one module is feeded as input to the next module in 158cdf0e10cSrcweir the sequence. The object created by this call has 159cdf0e10cSrcweir <type>TransliterationType</type> CASCADE and IGNORE types. 160cdf0e10cSrcweir 161cdf0e10cSrcweir @param aImplNameList 162cdf0e10cSrcweir Only IGNORE type modules can be specified. 163cdf0e10cSrcweir */ 164cdf0e10cSrcweir void loadModulesByImplNames( [in] sequence <string> aImplNameList, 165cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 166cdf0e10cSrcweir 167cdf0e10cSrcweir //------------------------------------------------------------------------ 168cdf0e10cSrcweir /** List the available transliteration modules for a given locale. 169cdf0e10cSrcweir It can be filtered based on its type. 170cdf0e10cSrcweir 171cdf0e10cSrcweir @param nType 172cdf0e10cSrcweir A bitmask field of values defined in 173cdf0e10cSrcweir <type>TransliterationType</type> 174cdf0e10cSrcweir */ 175cdf0e10cSrcweir sequence<string> getAvailableModules( 176cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 177cdf0e10cSrcweir [in] short nType ); 178cdf0e10cSrcweir 179cdf0e10cSrcweir 180cdf0e10cSrcweir //------------------------------------------------------------------------ 181cdf0e10cSrcweir /** Transliterate a substring. This method can be called if the 182cdf0e10cSrcweir object doesn't have <type>TransliterationType</type> IGNORE 183cdf0e10cSrcweir attribute. 184cdf0e10cSrcweir 185cdf0e10cSrcweir @param aStr 186cdf0e10cSrcweir The input string. 187cdf0e10cSrcweir 188cdf0e10cSrcweir @param nStartPos 189cdf0e10cSrcweir Start position within aStr from where transliteration starts. 190cdf0e10cSrcweir 191cdf0e10cSrcweir @param nCount 192cdf0e10cSrcweir Number of codepoints to be transliterated. 193cdf0e10cSrcweir 194cdf0e10cSrcweir @param rOffset 195cdf0e10cSrcweir To find the grapheme of input string corresponding to the 196cdf0e10cSrcweir grapheme of output string, rOffset provides the offset array 197cdf0e10cSrcweir whose index is the offset of output string, the element 198cdf0e10cSrcweir containing the position within the input string before 199cdf0e10cSrcweir transliteration. 200cdf0e10cSrcweir */ 201cdf0e10cSrcweir string transliterate( [in] string aInStr, [in] long nStartPos, 202cdf0e10cSrcweir [in] long nCount, [out] sequence <long> rOffset ); 203cdf0e10cSrcweir 204cdf0e10cSrcweir //------------------------------------------------------------------------ 205cdf0e10cSrcweir /** @deprecated 206cdf0e10cSrcweir For internal use, this method is supported to get the 207cdf0e10cSrcweir "transliteration", which equals() is based on. 208cdf0e10cSrcweir */ 209cdf0e10cSrcweir string folding( [in] string aInStr, [in] long nStartPos, 210cdf0e10cSrcweir [in] long nCount, [out] sequence <long> rOffset ); 211cdf0e10cSrcweir 212cdf0e10cSrcweir //------------------------------------------------------------------------ 213cdf0e10cSrcweir /** Match two substrings and find if they are equivalent as per this 214cdf0e10cSrcweir transliteration. 215cdf0e10cSrcweir 216cdf0e10cSrcweir <p> This method can be called if the object has 217cdf0e10cSrcweir <type>TransliterationType</type> IGNORE attribute. </p> 218cdf0e10cSrcweir 219cdf0e10cSrcweir <p> Returns the number of matched code points in any case, even if 220cdf0e10cSrcweir strings are not equal, for example: <br/> 221cdf0e10cSrcweir equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 222cdf0e10cSrcweir returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/> 223cdf0e10cSrcweir equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 224cdf0e10cSrcweir returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p> 225cdf0e10cSrcweir 226cdf0e10cSrcweir @param aStr1 227cdf0e10cSrcweir First string to match. 228cdf0e10cSrcweir 229cdf0e10cSrcweir @param nPos1 230cdf0e10cSrcweir Start position within aStr1. 231cdf0e10cSrcweir 232cdf0e10cSrcweir @param nCount1 233cdf0e10cSrcweir Number of code points to use of aStr1. 234cdf0e10cSrcweir 235cdf0e10cSrcweir @param rMatch1 236cdf0e10cSrcweir Returns number of matched code points in aStr1. 237cdf0e10cSrcweir 238cdf0e10cSrcweir @param aStr2 239cdf0e10cSrcweir Second string to match. 240cdf0e10cSrcweir 241cdf0e10cSrcweir @param nPos2 242cdf0e10cSrcweir Start position within aStr2. 243cdf0e10cSrcweir 244cdf0e10cSrcweir @param nCount2 245cdf0e10cSrcweir Number of code points to use of aStr2. 246cdf0e10cSrcweir 247cdf0e10cSrcweir @param rMatch2 248cdf0e10cSrcweir Returns number of matched code points in aStr2. 249cdf0e10cSrcweir 250cdf0e10cSrcweir @returns 251cdf0e10cSrcweir <TRUE/> if the substrings are equal per this 252cdf0e10cSrcweir transliteration <br/> 253cdf0e10cSrcweir <FALSE/> else. 254cdf0e10cSrcweir */ 255cdf0e10cSrcweir 256cdf0e10cSrcweir boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1, 257cdf0e10cSrcweir [out] long rMatch1, 258cdf0e10cSrcweir [in] string aStr2, [in] long nPos2, [in] long nCount2, 259cdf0e10cSrcweir [out] long rMatch2 ); 260cdf0e10cSrcweir 261cdf0e10cSrcweir //------------------------------------------------------------------------ 262cdf0e10cSrcweir /** Transliterate one set of characters to another. 263cdf0e10cSrcweir 264cdf0e10cSrcweir <p> This method is intended for getting corresponding ranges and 265cdf0e10cSrcweir can be called if the object has <type>TransliterationType</type> 266cdf0e10cSrcweir IGNORE attribute. </p> 267cdf0e10cSrcweir 268cdf0e10cSrcweir <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" ) 269cdf0e10cSrcweir returns {"A","I","a","i"}, transliterateRange( "a", "a" ) 270cdf0e10cSrcweir returns {"A","A","a","a"}. </p> 271cdf0e10cSrcweir 272cdf0e10cSrcweir <p> Use this transliteration to create regular expresssions like 273cdf0e10cSrcweir [a-i] --> [A-Ia-i]. </p> 274cdf0e10cSrcweir 275cdf0e10cSrcweir @returns 276cdf0e10cSrcweir String sequence containing corresponding transliterated 277cdf0e10cSrcweir pairs of characters to represent a range. 278cdf0e10cSrcweir */ 279cdf0e10cSrcweir sequence <string> transliterateRange( [in] string aStr1, [in] string aStr2 ); 280cdf0e10cSrcweir 281cdf0e10cSrcweir //------------------------------------------------------------------------ 282cdf0e10cSrcweir /** Compare 2 substrings as per this transliteration. It translates both 283cdf0e10cSrcweir substrings before comparing them. 284cdf0e10cSrcweir 285cdf0e10cSrcweir @param aStr1 286cdf0e10cSrcweir First string. 287cdf0e10cSrcweir 288cdf0e10cSrcweir @param nOff1 289cdf0e10cSrcweir Offset (from 0) of the first substring. 290cdf0e10cSrcweir 291cdf0e10cSrcweir @param nLen1 292cdf0e10cSrcweir Length (from offset) of the first substring. 293cdf0e10cSrcweir 294cdf0e10cSrcweir @param aStr2 295cdf0e10cSrcweir Second string. 296cdf0e10cSrcweir 297cdf0e10cSrcweir @param nOff2 298cdf0e10cSrcweir Offset (from 0) of the second substring. 299cdf0e10cSrcweir 300cdf0e10cSrcweir @param nLen2 301cdf0e10cSrcweir Length (from offset) of the second substring. 302cdf0e10cSrcweir 303cdf0e10cSrcweir @returns 304cdf0e10cSrcweir 1 if the first substring is greater than the second substring <br/> 305cdf0e10cSrcweir 0 if the first substring is equal to the second substring <br/> 306cdf0e10cSrcweir -1 if the first substring is less than the second substring 307cdf0e10cSrcweir */ 308cdf0e10cSrcweir long compareSubstring( [in] string aStr1, [in] long nOff1, [in] long nLen1, 309cdf0e10cSrcweir [in] string aStr2, [in] long nOff2, [in] long nLen2 ); 310cdf0e10cSrcweir 311cdf0e10cSrcweir //------------------------------------------------------------------------ 312cdf0e10cSrcweir /** Compare 2 strings as per this transliteration. It translates both 313cdf0e10cSrcweir strings before comparing them. 314cdf0e10cSrcweir 315cdf0e10cSrcweir @returns 316cdf0e10cSrcweir 1 if the first string is greater than the second string <br/> 317cdf0e10cSrcweir 0 if the first string is equal to the second string <br/> 318cdf0e10cSrcweir -1 if the first string is less than the second string 319cdf0e10cSrcweir */ 320cdf0e10cSrcweir long compareString( [in] string aStr1, [in] string aStr2 ); 321cdf0e10cSrcweir 322cdf0e10cSrcweir}; 323cdf0e10cSrcweir 324cdf0e10cSrcweir//============================================================================= 325cdf0e10cSrcweir}; }; }; }; 326cdf0e10cSrcweir 327cdf0e10cSrcweir#endif 328