xref: /AOO42X/main/i18npool/source/collator/collator_unicode.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir // generated list of languages
32*cdf0e10cSrcweir #include "lrl_include.hxx"
33*cdf0e10cSrcweir 
34*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
35*cdf0e10cSrcweir #include <collator_unicode.hxx>
36*cdf0e10cSrcweir #include <localedata.hxx>
37*cdf0e10cSrcweir #include <com/sun/star/i18n/CollatorOptions.hpp>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir using namespace ::com::sun::star;
40*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
41*cdf0e10cSrcweir using namespace ::com::sun::star::uno;
42*cdf0e10cSrcweir using namespace ::rtl;
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir Collator_Unicode::Collator_Unicode()
47*cdf0e10cSrcweir {
48*cdf0e10cSrcweir     implementationName = "com.sun.star.i18n.Collator_Unicode";
49*cdf0e10cSrcweir     collator = NULL;
50*cdf0e10cSrcweir     uca_base = NULL;
51*cdf0e10cSrcweir     hModule = NULL;
52*cdf0e10cSrcweir }
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir Collator_Unicode::~Collator_Unicode()
55*cdf0e10cSrcweir {
56*cdf0e10cSrcweir     if (collator) delete collator;
57*cdf0e10cSrcweir     if (uca_base) delete uca_base;
58*cdf0e10cSrcweir     if (hModule) osl_unloadModule(hModule);
59*cdf0e10cSrcweir }
60*cdf0e10cSrcweir 
61*cdf0e10cSrcweir sal_Int32 SAL_CALL
62*cdf0e10cSrcweir Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, sal_Int32 len1,
63*cdf0e10cSrcweir     const OUString& str2, sal_Int32 off2, sal_Int32 len2) throw(RuntimeException)
64*cdf0e10cSrcweir {
65*cdf0e10cSrcweir     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2); // UChar != sal_Unicode in MinGW
66*cdf0e10cSrcweir }
67*cdf0e10cSrcweir 
68*cdf0e10cSrcweir sal_Int32 SAL_CALL
69*cdf0e10cSrcweir Collator_Unicode::compareString( const OUString& str1, const OUString& str2) throw(RuntimeException)
70*cdf0e10cSrcweir {
71*cdf0e10cSrcweir     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), reinterpret_cast<const UChar *>(str2.getStr()));   // UChar != sal_Unicode in MinGW
72*cdf0e10cSrcweir }
73*cdf0e10cSrcweir 
74*cdf0e10cSrcweir extern "C" { static void SAL_CALL thisModule() {} }
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir sal_Int32 SAL_CALL
77*cdf0e10cSrcweir Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::Locale& rLocale, sal_Int32 options)
78*cdf0e10cSrcweir     throw(RuntimeException)
79*cdf0e10cSrcweir {
80*cdf0e10cSrcweir     if (!collator) {
81*cdf0e10cSrcweir         UErrorCode status = U_ZERO_ERROR;
82*cdf0e10cSrcweir         OUString rule = LocaleData().getCollatorRuleByAlgorithm(rLocale, rAlgorithm);
83*cdf0e10cSrcweir         if (rule.getLength() > 0) {
84*cdf0e10cSrcweir             collator = new RuleBasedCollator(reinterpret_cast<const UChar *>(rule.getStr()), status);   // UChar != sal_Unicode in MinGW
85*cdf0e10cSrcweir             if (! U_SUCCESS(status)) throw RuntimeException();
86*cdf0e10cSrcweir         }
87*cdf0e10cSrcweir         if (!collator && OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
88*cdf0e10cSrcweir             OUStringBuffer aBuf;
89*cdf0e10cSrcweir #ifdef SAL_DLLPREFIX
90*cdf0e10cSrcweir             aBuf.appendAscii(SAL_DLLPREFIX);
91*cdf0e10cSrcweir #endif
92*cdf0e10cSrcweir             aBuf.appendAscii( "collator_data" ).appendAscii( SAL_DLLEXTENSION );
93*cdf0e10cSrcweir             hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
94*cdf0e10cSrcweir             if (hModule) {
95*cdf0e10cSrcweir                 const sal_uInt8* (*func)() = NULL;
96*cdf0e10cSrcweir                 aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
97*cdf0e10cSrcweir                 if (rLocale.Language.equalsAscii("zh")) {
98*cdf0e10cSrcweir                     OUString func_base = aBuf.makeStringAndClear();
99*cdf0e10cSrcweir                     if (OUString::createFromAscii("TW HK MO").indexOf(rLocale.Country) >= 0)
100*cdf0e10cSrcweir                         func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule,
101*cdf0e10cSrcweir                                     (func_base + OUString::createFromAscii("TW_") + rAlgorithm).pData);
102*cdf0e10cSrcweir                     if (!func)
103*cdf0e10cSrcweir                         func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, (func_base + rAlgorithm).pData);
104*cdf0e10cSrcweir                 } else {
105*cdf0e10cSrcweir                     if (rLocale.Language.equalsAscii("ja")) {
106*cdf0e10cSrcweir                         // replace algrithm name to implementation name.
107*cdf0e10cSrcweir                         if (rAlgorithm.equalsAscii("phonetic (alphanumeric first)") )
108*cdf0e10cSrcweir                             aBuf.appendAscii("phonetic_alphanumeric_first");
109*cdf0e10cSrcweir                         else if (rAlgorithm.equalsAscii("phonetic (alphanumeric last)"))
110*cdf0e10cSrcweir                             aBuf.appendAscii("phonetic_alphanumeric_last");
111*cdf0e10cSrcweir                         else
112*cdf0e10cSrcweir                             aBuf.append(rAlgorithm);
113*cdf0e10cSrcweir                     } else {
114*cdf0e10cSrcweir                         aBuf.append(rAlgorithm);
115*cdf0e10cSrcweir                     }
116*cdf0e10cSrcweir                     func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, aBuf.makeStringAndClear().pData);
117*cdf0e10cSrcweir                 }
118*cdf0e10cSrcweir                 if (func) {
119*cdf0e10cSrcweir                     const sal_uInt8* ruleImage=func();
120*cdf0e10cSrcweir                     uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
121*cdf0e10cSrcweir                     if (! U_SUCCESS(status)) throw RuntimeException();
122*cdf0e10cSrcweir                     collator = new RuleBasedCollator(reinterpret_cast<const uint8_t*>(ruleImage), -1, uca_base, status);
123*cdf0e10cSrcweir                     if (! U_SUCCESS(status)) throw RuntimeException();
124*cdf0e10cSrcweir                 }
125*cdf0e10cSrcweir             }
126*cdf0e10cSrcweir         }
127*cdf0e10cSrcweir         if (!collator) {
128*cdf0e10cSrcweir             /** ICU collators are loaded using a locale only.
129*cdf0e10cSrcweir                 ICU uses Variant as collation algorithm name (like de__PHONEBOOK
130*cdf0e10cSrcweir                 locale), note the empty territory (Country) designator in this special
131*cdf0e10cSrcweir                 case here. The icu::Locale contructor changes the algorithm name to
132*cdf0e10cSrcweir                 uppercase itself, so we don't have to bother with that.
133*cdf0e10cSrcweir             */
134*cdf0e10cSrcweir             icu::Locale icuLocale(
135*cdf0e10cSrcweir                    OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(),
136*cdf0e10cSrcweir                    OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(),
137*cdf0e10cSrcweir                    OUStringToOString(rAlgorithm, RTL_TEXTENCODING_ASCII_US).getStr());
138*cdf0e10cSrcweir             // load ICU collator
139*cdf0e10cSrcweir             collator = (RuleBasedCollator*) icu::Collator::createInstance(icuLocale, status);
140*cdf0e10cSrcweir             if (! U_SUCCESS(status)) throw RuntimeException();
141*cdf0e10cSrcweir         }
142*cdf0e10cSrcweir     }
143*cdf0e10cSrcweir 
144*cdf0e10cSrcweir     if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
145*cdf0e10cSrcweir         collator->setStrength(Collator::PRIMARY);
146*cdf0e10cSrcweir     else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
147*cdf0e10cSrcweir         collator->setStrength(Collator::SECONDARY);
148*cdf0e10cSrcweir     else
149*cdf0e10cSrcweir         collator->setStrength(Collator::TERTIARY);
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir     return(0);
152*cdf0e10cSrcweir }
153*cdf0e10cSrcweir 
154*cdf0e10cSrcweir 
155*cdf0e10cSrcweir OUString SAL_CALL
156*cdf0e10cSrcweir Collator_Unicode::getImplementationName() throw( RuntimeException )
157*cdf0e10cSrcweir {
158*cdf0e10cSrcweir     return OUString::createFromAscii(implementationName);
159*cdf0e10cSrcweir }
160*cdf0e10cSrcweir 
161*cdf0e10cSrcweir sal_Bool SAL_CALL
162*cdf0e10cSrcweir Collator_Unicode::supportsService(const rtl::OUString& rServiceName) throw( RuntimeException )
163*cdf0e10cSrcweir {
164*cdf0e10cSrcweir     return !rServiceName.compareToAscii(implementationName);
165*cdf0e10cSrcweir }
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir Sequence< OUString > SAL_CALL
168*cdf0e10cSrcweir Collator_Unicode::getSupportedServiceNames() throw( RuntimeException )
169*cdf0e10cSrcweir {
170*cdf0e10cSrcweir     Sequence< OUString > aRet(1);
171*cdf0e10cSrcweir     aRet[0] = OUString::createFromAscii(implementationName);
172*cdf0e10cSrcweir     return aRet;
173*cdf0e10cSrcweir }
174*cdf0e10cSrcweir 
175*cdf0e10cSrcweir } } } }
176*cdf0e10cSrcweir 
177