1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 #include <indexentrysupplier_default.hxx>
31 #include <localedata.hxx>
32 #include <i18nutil/unicode.hxx>
33 #include <com/sun/star/i18n/CollatorOptions.hpp>
34 
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
37 using namespace ::rtl;
38 
39 namespace com { namespace sun { namespace star { namespace i18n {
40 
41 IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode(
42     const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) :
43     IndexEntrySupplier_Common(rxMSF)
44 {
45 	implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
46     index = new Index(rxMSF);
47 }
48 
49 IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode()
50 {
51     delete index;
52 }
53 
54 sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
55 	const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException)
56 {
57     index->init(rLocale, rAlgorithm);
58     return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
59 }
60 
61 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
62 	const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException)
63 {
64     return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
65 }
66 
67 sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry(
68 	const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
69 	const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
70 	throw (RuntimeException)
71 {
72     sal_Int16 result =
73             index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
74             index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
75     if (result == 0)
76         return IndexEntrySupplier_Common::compareIndexEntry(
77                     rIndexEntry1, rPhoneticEntry1, rLocale1,
78                     rIndexEntry2, rPhoneticEntry2, rLocale2);
79     return result > 0 ? 1 : -1;
80 }
81 
82 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
83 	const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) {
84 
85     if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
86         return index->getIndexDescription(rIndexEntry);
87     else
88         return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
89 }
90 
91 IndexTable::IndexTable()
92 {
93     table = NULL;
94 }
95 
96 IndexTable::~IndexTable()
97 {
98     if (table) free(table);
99 }
100 
101 void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index)
102 {
103     start=start_;
104     end=end_;
105     table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8));
106     for (sal_Unicode i = start; i <= end; i++) {
107         sal_Int16 j;
108         for (j = 0; j < key_count; j++) {
109             if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
110                 table[i-start] = sal::static_int_cast<sal_uInt8>(j);
111                 break;
112             }
113         }
114         if (j == key_count)
115             table[i-start] = 0xFF;
116     }
117 }
118 
119 Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF)
120 {
121 	collator = new CollatorImpl(rxMSF);
122 }
123 
124 Index::~Index()
125 {
126     delete collator;
127 }
128 
129 sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2)
130 {
131     return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
132 }
133 
134 sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
135 {
136     sal_Int32 startPos=0;
137     if (skipping_chars.getLength() > 0)
138         while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
139             startPos++;
140     if (mkey_count > 0) {
141         for (sal_Int16 i = 0; i < mkey_count; i++) {
142             sal_Int32 len = keys[mkeys[i]].mkey.getLength();
143             if (collator->compareSubstring(rIndexEntry, startPos, len,
144                                     keys[mkeys[i]].mkey, 0, len) == 0)
145                 return mkeys[i];
146         }
147     }
148     sal_Unicode code = rIndexEntry[startPos];
149     for (sal_Int16 i = 0; i < table_count; i++) {
150         if (tables[i].start <= code && code <= tables[i].end)
151             return tables[i].table[code-tables[i].start];
152     }
153     return 0xFF;
154 }
155 
156 OUString Index::getIndexDescription(const OUString& rIndexEntry)
157 {
158     sal_Int16 wgt = getIndexWeight(rIndexEntry);
159     if (wgt < MAX_KEYS) {
160         if (keys[wgt].desc.getLength())
161             return keys[wgt].desc;
162         else if (keys[wgt].key > 0)
163             return OUString(&keys[wgt].key, 1);
164         else
165             return keys[wgt].mkey;
166     }
167     sal_Int32 nPos=0;
168     sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0);
169     return OUString(&indexChar, 1);
170 }
171 
172 #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString())
173 
174 void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException)
175 {
176     OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm);
177 
178     if (!keyStr.getLength()) {
179         keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN,
180                     LocaleData().getDefaultIndexAlgorithm(LOCALE_EN));
181         if (!keyStr)
182             throw RuntimeException();
183     }
184 
185     sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
186     mkey_count=key_count=0;
187     skipping_chars=OUString();
188     sal_Int16 i, j;
189 
190     for (i = 0; i < len && key_count < MAX_KEYS; i++)
191     {
192         sal_Unicode curr = keyStr[i];
193         sal_Unicode close = sal_Unicode(')');
194 
195         if (unicode::isWhiteSpace(curr))
196             continue;
197 
198         switch(curr) {
199             case sal_Unicode('-'):
200                 if (key_count > 0 && i + 1 < len ) {
201                     for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
202                         keys[key_count].key = keys[key_count-1].key+1;
203                         keys[key_count].desc = OUString();
204                     }
205                 } else
206                     throw RuntimeException();
207                 break;
208             case sal_Unicode('['):
209                 for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) {
210                     if (unicode::isWhiteSpace(keyStr[i])) {
211                         continue;
212                     } else if (keyStr[i] == sal_Unicode('_')) {
213                         for (curr=keyStr[i-1]+1;  curr <= keyStr[i+1]; curr++)
214                             skipping_chars+=OUString(curr);
215                         i+=2;
216                     } else {
217                         skipping_chars+=OUString(keyStr[i]);
218                     }
219                 }
220                 break;
221             case sal_Unicode('{'):
222                 close = sal_Unicode('}');
223             case sal_Unicode('('):
224                 if (key_count > 0) {
225                     sal_Int16 end = i+1;
226                     for (end=i+1; end < len && keyStr[end] != close; end++) ;
227 
228                     if (end >= len) // no found
229                         throw RuntimeException();
230                     if (close == sal_Unicode(')'))
231                         keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
232                     else {
233                         mkeys[mkey_count++]=key_count;
234                         keys[key_count].key = 0;
235                         keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
236                         keys[key_count++].desc=OUString();
237                     }
238                     i=end+1;
239                 } else
240                     throw RuntimeException();
241                 break;
242             default:
243                 keys[key_count].key = curr;
244                 keys[key_count++].desc = OUString();
245                 break;
246         }
247     }
248     for (i = 0; i < mkey_count; i++) {
249         for (j=i+1; j < mkey_count; j++) {
250             if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
251                 sal_Int16 k = mkeys[i];
252                 mkeys[i] = mkeys[j];
253                 mkeys[j] = k;
254             }
255         }
256     }
257 }
258 
259 void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException)
260 {
261     makeIndexKeys(rLocale, algorithm);
262 
263     Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale );
264 
265     if (scriptList.getLength() == 0) {
266         scriptList = LocaleData().getUnicodeScripts(LOCALE_EN);
267         if (scriptList.getLength() == 0)
268             throw RuntimeException();
269     }
270 
271     table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
272     if (table_count > MAX_TABLES)
273         throw RuntimeException();
274 
275     collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
276     sal_Int16 j=0;
277     sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0);
278     sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0);
279     for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) {
280         if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
281             tables[j++].init(start, end, keys, key_count, this);
282             start = unicode::getUnicodeScriptStart(scriptList[i]);
283         }
284         end = unicode::getUnicodeScriptEnd(scriptList[i]);
285     }
286     tables[j++].init(start, end, keys, key_count, this);
287     table_count = j;
288 }
289 
290 } } } }
291