1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir #include <indexentrysupplier_default.hxx>
31*cdf0e10cSrcweir #include <localedata.hxx>
32*cdf0e10cSrcweir #include <i18nutil/unicode.hxx>
33*cdf0e10cSrcweir #include <com/sun/star/i18n/CollatorOptions.hpp>
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir using namespace ::com::sun::star::uno;
36*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
37*cdf0e10cSrcweir using namespace ::rtl;
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode(
42*cdf0e10cSrcweir     const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) :
43*cdf0e10cSrcweir     IndexEntrySupplier_Common(rxMSF)
44*cdf0e10cSrcweir {
45*cdf0e10cSrcweir 	implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
46*cdf0e10cSrcweir     index = new Index(rxMSF);
47*cdf0e10cSrcweir }
48*cdf0e10cSrcweir 
49*cdf0e10cSrcweir IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode()
50*cdf0e10cSrcweir {
51*cdf0e10cSrcweir     delete index;
52*cdf0e10cSrcweir }
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
55*cdf0e10cSrcweir 	const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException)
56*cdf0e10cSrcweir {
57*cdf0e10cSrcweir     index->init(rLocale, rAlgorithm);
58*cdf0e10cSrcweir     return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
59*cdf0e10cSrcweir }
60*cdf0e10cSrcweir 
61*cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
62*cdf0e10cSrcweir 	const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException)
63*cdf0e10cSrcweir {
64*cdf0e10cSrcweir     return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
65*cdf0e10cSrcweir }
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry(
68*cdf0e10cSrcweir 	const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
69*cdf0e10cSrcweir 	const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
70*cdf0e10cSrcweir 	throw (RuntimeException)
71*cdf0e10cSrcweir {
72*cdf0e10cSrcweir     sal_Int16 result =
73*cdf0e10cSrcweir             index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
74*cdf0e10cSrcweir             index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
75*cdf0e10cSrcweir     if (result == 0)
76*cdf0e10cSrcweir         return IndexEntrySupplier_Common::compareIndexEntry(
77*cdf0e10cSrcweir                     rIndexEntry1, rPhoneticEntry1, rLocale1,
78*cdf0e10cSrcweir                     rIndexEntry2, rPhoneticEntry2, rLocale2);
79*cdf0e10cSrcweir     return result > 0 ? 1 : -1;
80*cdf0e10cSrcweir }
81*cdf0e10cSrcweir 
82*cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
83*cdf0e10cSrcweir 	const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) {
84*cdf0e10cSrcweir 
85*cdf0e10cSrcweir     if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
86*cdf0e10cSrcweir         return index->getIndexDescription(rIndexEntry);
87*cdf0e10cSrcweir     else
88*cdf0e10cSrcweir         return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
89*cdf0e10cSrcweir }
90*cdf0e10cSrcweir 
91*cdf0e10cSrcweir IndexTable::IndexTable()
92*cdf0e10cSrcweir {
93*cdf0e10cSrcweir     table = NULL;
94*cdf0e10cSrcweir }
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir IndexTable::~IndexTable()
97*cdf0e10cSrcweir {
98*cdf0e10cSrcweir     if (table) free(table);
99*cdf0e10cSrcweir }
100*cdf0e10cSrcweir 
101*cdf0e10cSrcweir void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index)
102*cdf0e10cSrcweir {
103*cdf0e10cSrcweir     start=start_;
104*cdf0e10cSrcweir     end=end_;
105*cdf0e10cSrcweir     table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8));
106*cdf0e10cSrcweir     for (sal_Unicode i = start; i <= end; i++) {
107*cdf0e10cSrcweir         sal_Int16 j;
108*cdf0e10cSrcweir         for (j = 0; j < key_count; j++) {
109*cdf0e10cSrcweir             if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
110*cdf0e10cSrcweir                 table[i-start] = sal::static_int_cast<sal_uInt8>(j);
111*cdf0e10cSrcweir                 break;
112*cdf0e10cSrcweir             }
113*cdf0e10cSrcweir         }
114*cdf0e10cSrcweir         if (j == key_count)
115*cdf0e10cSrcweir             table[i-start] = 0xFF;
116*cdf0e10cSrcweir     }
117*cdf0e10cSrcweir }
118*cdf0e10cSrcweir 
119*cdf0e10cSrcweir Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF)
120*cdf0e10cSrcweir {
121*cdf0e10cSrcweir 	collator = new CollatorImpl(rxMSF);
122*cdf0e10cSrcweir }
123*cdf0e10cSrcweir 
124*cdf0e10cSrcweir Index::~Index()
125*cdf0e10cSrcweir {
126*cdf0e10cSrcweir     delete collator;
127*cdf0e10cSrcweir }
128*cdf0e10cSrcweir 
129*cdf0e10cSrcweir sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2)
130*cdf0e10cSrcweir {
131*cdf0e10cSrcweir     return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
132*cdf0e10cSrcweir }
133*cdf0e10cSrcweir 
134*cdf0e10cSrcweir sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
135*cdf0e10cSrcweir {
136*cdf0e10cSrcweir     sal_Int32 startPos=0;
137*cdf0e10cSrcweir     if (skipping_chars.getLength() > 0)
138*cdf0e10cSrcweir         while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
139*cdf0e10cSrcweir             startPos++;
140*cdf0e10cSrcweir     if (mkey_count > 0) {
141*cdf0e10cSrcweir         for (sal_Int16 i = 0; i < mkey_count; i++) {
142*cdf0e10cSrcweir             sal_Int32 len = keys[mkeys[i]].mkey.getLength();
143*cdf0e10cSrcweir             if (collator->compareSubstring(rIndexEntry, startPos, len,
144*cdf0e10cSrcweir                                     keys[mkeys[i]].mkey, 0, len) == 0)
145*cdf0e10cSrcweir                 return mkeys[i];
146*cdf0e10cSrcweir         }
147*cdf0e10cSrcweir     }
148*cdf0e10cSrcweir     sal_Unicode code = rIndexEntry[startPos];
149*cdf0e10cSrcweir     for (sal_Int16 i = 0; i < table_count; i++) {
150*cdf0e10cSrcweir         if (tables[i].start <= code && code <= tables[i].end)
151*cdf0e10cSrcweir             return tables[i].table[code-tables[i].start];
152*cdf0e10cSrcweir     }
153*cdf0e10cSrcweir     return 0xFF;
154*cdf0e10cSrcweir }
155*cdf0e10cSrcweir 
156*cdf0e10cSrcweir OUString Index::getIndexDescription(const OUString& rIndexEntry)
157*cdf0e10cSrcweir {
158*cdf0e10cSrcweir     sal_Int16 wgt = getIndexWeight(rIndexEntry);
159*cdf0e10cSrcweir     if (wgt < MAX_KEYS) {
160*cdf0e10cSrcweir         if (keys[wgt].desc.getLength())
161*cdf0e10cSrcweir             return keys[wgt].desc;
162*cdf0e10cSrcweir         else if (keys[wgt].key > 0)
163*cdf0e10cSrcweir             return OUString(&keys[wgt].key, 1);
164*cdf0e10cSrcweir         else
165*cdf0e10cSrcweir             return keys[wgt].mkey;
166*cdf0e10cSrcweir     }
167*cdf0e10cSrcweir     sal_Int32 nPos=0;
168*cdf0e10cSrcweir     sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0);
169*cdf0e10cSrcweir     return OUString(&indexChar, 1);
170*cdf0e10cSrcweir }
171*cdf0e10cSrcweir 
172*cdf0e10cSrcweir #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString())
173*cdf0e10cSrcweir 
174*cdf0e10cSrcweir void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException)
175*cdf0e10cSrcweir {
176*cdf0e10cSrcweir     OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm);
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir     if (!keyStr.getLength()) {
179*cdf0e10cSrcweir         keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN,
180*cdf0e10cSrcweir                     LocaleData().getDefaultIndexAlgorithm(LOCALE_EN));
181*cdf0e10cSrcweir         if (!keyStr)
182*cdf0e10cSrcweir             throw RuntimeException();
183*cdf0e10cSrcweir     }
184*cdf0e10cSrcweir 
185*cdf0e10cSrcweir     sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
186*cdf0e10cSrcweir     mkey_count=key_count=0;
187*cdf0e10cSrcweir     skipping_chars=OUString();
188*cdf0e10cSrcweir     sal_Int16 i, j;
189*cdf0e10cSrcweir 
190*cdf0e10cSrcweir     for (i = 0; i < len && key_count < MAX_KEYS; i++)
191*cdf0e10cSrcweir     {
192*cdf0e10cSrcweir         sal_Unicode curr = keyStr[i];
193*cdf0e10cSrcweir         sal_Unicode close = sal_Unicode(')');
194*cdf0e10cSrcweir 
195*cdf0e10cSrcweir         if (unicode::isWhiteSpace(curr))
196*cdf0e10cSrcweir             continue;
197*cdf0e10cSrcweir 
198*cdf0e10cSrcweir         switch(curr) {
199*cdf0e10cSrcweir             case sal_Unicode('-'):
200*cdf0e10cSrcweir                 if (key_count > 0 && i + 1 < len ) {
201*cdf0e10cSrcweir                     for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
202*cdf0e10cSrcweir                         keys[key_count].key = keys[key_count-1].key+1;
203*cdf0e10cSrcweir                         keys[key_count].desc = OUString();
204*cdf0e10cSrcweir                     }
205*cdf0e10cSrcweir                 } else
206*cdf0e10cSrcweir                     throw RuntimeException();
207*cdf0e10cSrcweir                 break;
208*cdf0e10cSrcweir             case sal_Unicode('['):
209*cdf0e10cSrcweir                 for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) {
210*cdf0e10cSrcweir                     if (unicode::isWhiteSpace(keyStr[i])) {
211*cdf0e10cSrcweir                         continue;
212*cdf0e10cSrcweir                     } else if (keyStr[i] == sal_Unicode('_')) {
213*cdf0e10cSrcweir                         for (curr=keyStr[i-1]+1;  curr <= keyStr[i+1]; curr++)
214*cdf0e10cSrcweir                             skipping_chars+=OUString(curr);
215*cdf0e10cSrcweir                         i+=2;
216*cdf0e10cSrcweir                     } else {
217*cdf0e10cSrcweir                         skipping_chars+=OUString(keyStr[i]);
218*cdf0e10cSrcweir                     }
219*cdf0e10cSrcweir                 }
220*cdf0e10cSrcweir                 break;
221*cdf0e10cSrcweir             case sal_Unicode('{'):
222*cdf0e10cSrcweir                 close = sal_Unicode('}');
223*cdf0e10cSrcweir             case sal_Unicode('('):
224*cdf0e10cSrcweir                 if (key_count > 0) {
225*cdf0e10cSrcweir                     sal_Int16 end = i+1;
226*cdf0e10cSrcweir                     for (end=i+1; end < len && keyStr[end] != close; end++) ;
227*cdf0e10cSrcweir 
228*cdf0e10cSrcweir                     if (end >= len) // no found
229*cdf0e10cSrcweir                         throw RuntimeException();
230*cdf0e10cSrcweir                     if (close == sal_Unicode(')'))
231*cdf0e10cSrcweir                         keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
232*cdf0e10cSrcweir                     else {
233*cdf0e10cSrcweir                         mkeys[mkey_count++]=key_count;
234*cdf0e10cSrcweir                         keys[key_count].key = 0;
235*cdf0e10cSrcweir                         keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
236*cdf0e10cSrcweir                         keys[key_count++].desc=OUString();
237*cdf0e10cSrcweir                     }
238*cdf0e10cSrcweir                     i=end+1;
239*cdf0e10cSrcweir                 } else
240*cdf0e10cSrcweir                     throw RuntimeException();
241*cdf0e10cSrcweir                 break;
242*cdf0e10cSrcweir             default:
243*cdf0e10cSrcweir                 keys[key_count].key = curr;
244*cdf0e10cSrcweir                 keys[key_count++].desc = OUString();
245*cdf0e10cSrcweir                 break;
246*cdf0e10cSrcweir         }
247*cdf0e10cSrcweir     }
248*cdf0e10cSrcweir     for (i = 0; i < mkey_count; i++) {
249*cdf0e10cSrcweir         for (j=i+1; j < mkey_count; j++) {
250*cdf0e10cSrcweir             if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
251*cdf0e10cSrcweir                 sal_Int16 k = mkeys[i];
252*cdf0e10cSrcweir                 mkeys[i] = mkeys[j];
253*cdf0e10cSrcweir                 mkeys[j] = k;
254*cdf0e10cSrcweir             }
255*cdf0e10cSrcweir         }
256*cdf0e10cSrcweir     }
257*cdf0e10cSrcweir }
258*cdf0e10cSrcweir 
259*cdf0e10cSrcweir void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException)
260*cdf0e10cSrcweir {
261*cdf0e10cSrcweir     makeIndexKeys(rLocale, algorithm);
262*cdf0e10cSrcweir 
263*cdf0e10cSrcweir     Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale );
264*cdf0e10cSrcweir 
265*cdf0e10cSrcweir     if (scriptList.getLength() == 0) {
266*cdf0e10cSrcweir         scriptList = LocaleData().getUnicodeScripts(LOCALE_EN);
267*cdf0e10cSrcweir         if (scriptList.getLength() == 0)
268*cdf0e10cSrcweir             throw RuntimeException();
269*cdf0e10cSrcweir     }
270*cdf0e10cSrcweir 
271*cdf0e10cSrcweir     table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
272*cdf0e10cSrcweir     if (table_count > MAX_TABLES)
273*cdf0e10cSrcweir         throw RuntimeException();
274*cdf0e10cSrcweir 
275*cdf0e10cSrcweir     collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
276*cdf0e10cSrcweir     sal_Int16 j=0;
277*cdf0e10cSrcweir     sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0);
278*cdf0e10cSrcweir     sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0);
279*cdf0e10cSrcweir     for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) {
280*cdf0e10cSrcweir         if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
281*cdf0e10cSrcweir             tables[j++].init(start, end, keys, key_count, this);
282*cdf0e10cSrcweir             start = unicode::getUnicodeScriptStart(scriptList[i]);
283*cdf0e10cSrcweir         }
284*cdf0e10cSrcweir         end = unicode::getUnicodeScriptEnd(scriptList[i]);
285*cdf0e10cSrcweir     }
286*cdf0e10cSrcweir     tables[j++].init(start, end, keys, key_count, this);
287*cdf0e10cSrcweir     table_count = j;
288*cdf0e10cSrcweir }
289*cdf0e10cSrcweir 
290*cdf0e10cSrcweir } } } }
291