1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 #include <indexentrysupplier_default.hxx>
27 #include <localedata.hxx>
28 #include <i18nutil/unicode.hxx>
29 #include <com/sun/star/i18n/CollatorOptions.hpp>
30 
31 using namespace ::com::sun::star::uno;
32 using namespace ::com::sun::star::lang;
33 using namespace ::rtl;
34 
35 namespace com { namespace sun { namespace star { namespace i18n {
36 
37 IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode(
38     const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) :
39     IndexEntrySupplier_Common(rxMSF)
40 {
41 	implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
42     index = new Index(rxMSF);
43 }
44 
45 IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode()
46 {
47     delete index;
48 }
49 
50 sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
51 	const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException)
52 {
53     index->init(rLocale, rAlgorithm);
54     return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
55 }
56 
57 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
58 	const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException)
59 {
60     return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
61 }
62 
63 sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry(
64 	const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
65 	const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
66 	throw (RuntimeException)
67 {
68     sal_Int16 result =
69             index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
70             index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
71     if (result == 0)
72         return IndexEntrySupplier_Common::compareIndexEntry(
73                     rIndexEntry1, rPhoneticEntry1, rLocale1,
74                     rIndexEntry2, rPhoneticEntry2, rLocale2);
75     return result > 0 ? 1 : -1;
76 }
77 
78 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
79 	const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) {
80 
81     if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
82         return index->getIndexDescription(rIndexEntry);
83     else
84         return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
85 }
86 
87 IndexTable::IndexTable()
88 {
89     table = NULL;
90 }
91 
92 IndexTable::~IndexTable()
93 {
94     if (table) free(table);
95 }
96 
97 void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index)
98 {
99     start=start_;
100     end=end_;
101     table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8));
102     for (sal_Unicode i = start; i <= end; i++) {
103         sal_Int16 j;
104         for (j = 0; j < key_count; j++) {
105             if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
106                 table[i-start] = sal::static_int_cast<sal_uInt8>(j);
107                 break;
108             }
109         }
110         if (j == key_count)
111             table[i-start] = 0xFF;
112     }
113 }
114 
115 Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF)
116 {
117 	collator = new CollatorImpl(rxMSF);
118 }
119 
120 Index::~Index()
121 {
122     delete collator;
123 }
124 
125 sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2)
126 {
127     return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
128 }
129 
130 sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
131 {
132     sal_Int32 startPos=0;
133     if (skipping_chars.getLength() > 0)
134         while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
135             startPos++;
136     if (mkey_count > 0) {
137         for (sal_Int16 i = 0; i < mkey_count; i++) {
138             sal_Int32 len = keys[mkeys[i]].mkey.getLength();
139             if (collator->compareSubstring(rIndexEntry, startPos, len,
140                                     keys[mkeys[i]].mkey, 0, len) == 0)
141                 return mkeys[i];
142         }
143     }
144     sal_Unicode code = rIndexEntry[startPos];
145     for (sal_Int16 i = 0; i < table_count; i++) {
146         if (tables[i].start <= code && code <= tables[i].end)
147             return tables[i].table[code-tables[i].start];
148     }
149     return 0xFF;
150 }
151 
152 OUString Index::getIndexDescription(const OUString& rIndexEntry)
153 {
154     sal_Int16 wgt = getIndexWeight(rIndexEntry);
155     if (wgt < MAX_KEYS) {
156         if (keys[wgt].desc.getLength())
157             return keys[wgt].desc;
158         else if (keys[wgt].key > 0)
159             return OUString(&keys[wgt].key, 1);
160         else
161             return keys[wgt].mkey;
162     }
163     sal_Int32 nPos=0;
164     sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0);
165     return OUString(&indexChar, 1);
166 }
167 
168 #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString())
169 
170 void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException)
171 {
172     OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm);
173 
174     if (!keyStr.getLength()) {
175         keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN,
176                     LocaleData().getDefaultIndexAlgorithm(LOCALE_EN));
177         if (!keyStr)
178             throw RuntimeException();
179     }
180 
181     sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
182     mkey_count=key_count=0;
183     skipping_chars=OUString();
184     sal_Int16 i, j;
185 
186     for (i = 0; i < len && key_count < MAX_KEYS; i++)
187     {
188         sal_Unicode curr = keyStr[i];
189         sal_Unicode close = sal_Unicode(')');
190 
191         if (unicode::isWhiteSpace(curr))
192             continue;
193 
194         switch(curr) {
195             case sal_Unicode('-'):
196                 if (key_count > 0 && i + 1 < len ) {
197                     for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
198                         keys[key_count].key = keys[key_count-1].key+1;
199                         keys[key_count].desc = OUString();
200                     }
201                 } else
202                     throw RuntimeException();
203                 break;
204             case sal_Unicode('['):
205                 for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) {
206                     if (unicode::isWhiteSpace(keyStr[i])) {
207                         continue;
208                     } else if (keyStr[i] == sal_Unicode('_')) {
209                         for (curr=keyStr[i-1]+1;  curr <= keyStr[i+1]; curr++)
210                             skipping_chars+=OUString(curr);
211                         i+=2;
212                     } else {
213                         skipping_chars+=OUString(keyStr[i]);
214                     }
215                 }
216                 break;
217             case sal_Unicode('{'):
218                 close = sal_Unicode('}');
219             case sal_Unicode('('):
220                 if (key_count > 0) {
221                     sal_Int16 end = i+1;
222                     for (end=i+1; end < len && keyStr[end] != close; end++) ;
223 
224                     if (end >= len) // no found
225                         throw RuntimeException();
226                     if (close == sal_Unicode(')'))
227                         keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
228                     else {
229                         mkeys[mkey_count++]=key_count;
230                         keys[key_count].key = 0;
231                         keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
232                         keys[key_count++].desc=OUString();
233                     }
234                     i=end+1;
235                 } else
236                     throw RuntimeException();
237                 break;
238             default:
239                 keys[key_count].key = curr;
240                 keys[key_count++].desc = OUString();
241                 break;
242         }
243     }
244     for (i = 0; i < mkey_count; i++) {
245         for (j=i+1; j < mkey_count; j++) {
246             if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
247                 sal_Int16 k = mkeys[i];
248                 mkeys[i] = mkeys[j];
249                 mkeys[j] = k;
250             }
251         }
252     }
253 }
254 
255 void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException)
256 {
257     makeIndexKeys(rLocale, algorithm);
258 
259     Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale );
260 
261     if (scriptList.getLength() == 0) {
262         scriptList = LocaleData().getUnicodeScripts(LOCALE_EN);
263         if (scriptList.getLength() == 0)
264             throw RuntimeException();
265     }
266 
267     table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
268     if (table_count > MAX_TABLES)
269         throw RuntimeException();
270 
271     collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
272     sal_Int16 j=0;
273     sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0);
274     sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0);
275     for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) {
276         if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
277             tables[j++].init(start, end, keys, key_count, this);
278             start = unicode::getUnicodeScriptStart(scriptList[i]);
279         }
280         end = unicode::getUnicodeScriptEnd(scriptList[i]);
281     }
282     tables[j++].init(start, end, keys, key_count, this);
283     table_count = j;
284 }
285 
286 } } } }
287