1*449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*449ab281SAndrew Rist * distributed with this work for additional information 6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10*449ab281SAndrew Rist * 11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*449ab281SAndrew Rist * 13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*449ab281SAndrew Rist * software distributed under the License is distributed on an 15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17*449ab281SAndrew Rist * specific language governing permissions and limitations 18*449ab281SAndrew Rist * under the License. 19*449ab281SAndrew Rist * 20*449ab281SAndrew Rist *************************************************************/ 21*449ab281SAndrew Rist 22*449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir #include <indexentrysupplier_default.hxx> 27cdf0e10cSrcweir #include <localedata.hxx> 28cdf0e10cSrcweir #include <i18nutil/unicode.hxx> 29cdf0e10cSrcweir #include <com/sun/star/i18n/CollatorOptions.hpp> 30cdf0e10cSrcweir 31cdf0e10cSrcweir using namespace ::com::sun::star::uno; 32cdf0e10cSrcweir using namespace ::com::sun::star::lang; 33cdf0e10cSrcweir using namespace ::rtl; 34cdf0e10cSrcweir 35cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 36cdf0e10cSrcweir 37cdf0e10cSrcweir IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode( 38cdf0e10cSrcweir const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) : 39cdf0e10cSrcweir IndexEntrySupplier_Common(rxMSF) 40cdf0e10cSrcweir { 41cdf0e10cSrcweir implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode"; 42cdf0e10cSrcweir index = new Index(rxMSF); 43cdf0e10cSrcweir } 44cdf0e10cSrcweir 45cdf0e10cSrcweir IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode() 46cdf0e10cSrcweir { 47cdf0e10cSrcweir delete index; 48cdf0e10cSrcweir } 49cdf0e10cSrcweir 50cdf0e10cSrcweir sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale, 51cdf0e10cSrcweir const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException) 52cdf0e10cSrcweir { 53cdf0e10cSrcweir index->init(rLocale, rAlgorithm); 54cdf0e10cSrcweir return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions); 55cdf0e10cSrcweir } 56cdf0e10cSrcweir 57cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry, 58cdf0e10cSrcweir const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException) 59cdf0e10cSrcweir { 60cdf0e10cSrcweir return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale)); 61cdf0e10cSrcweir } 62cdf0e10cSrcweir 63cdf0e10cSrcweir sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry( 64cdf0e10cSrcweir const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1, 65cdf0e10cSrcweir const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 ) 66cdf0e10cSrcweir throw (RuntimeException) 67cdf0e10cSrcweir { 68cdf0e10cSrcweir sal_Int16 result = 69cdf0e10cSrcweir index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) - 70cdf0e10cSrcweir index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2)); 71cdf0e10cSrcweir if (result == 0) 72cdf0e10cSrcweir return IndexEntrySupplier_Common::compareIndexEntry( 73cdf0e10cSrcweir rIndexEntry1, rPhoneticEntry1, rLocale1, 74cdf0e10cSrcweir rIndexEntry2, rPhoneticEntry2, rLocale2); 75cdf0e10cSrcweir return result > 0 ? 1 : -1; 76cdf0e10cSrcweir } 77cdf0e10cSrcweir 78cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry, 79cdf0e10cSrcweir const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) { 80cdf0e10cSrcweir 81cdf0e10cSrcweir if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)) 82cdf0e10cSrcweir return index->getIndexDescription(rIndexEntry); 83cdf0e10cSrcweir else 84cdf0e10cSrcweir return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm); 85cdf0e10cSrcweir } 86cdf0e10cSrcweir 87cdf0e10cSrcweir IndexTable::IndexTable() 88cdf0e10cSrcweir { 89cdf0e10cSrcweir table = NULL; 90cdf0e10cSrcweir } 91cdf0e10cSrcweir 92cdf0e10cSrcweir IndexTable::~IndexTable() 93cdf0e10cSrcweir { 94cdf0e10cSrcweir if (table) free(table); 95cdf0e10cSrcweir } 96cdf0e10cSrcweir 97cdf0e10cSrcweir void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index) 98cdf0e10cSrcweir { 99cdf0e10cSrcweir start=start_; 100cdf0e10cSrcweir end=end_; 101cdf0e10cSrcweir table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8)); 102cdf0e10cSrcweir for (sal_Unicode i = start; i <= end; i++) { 103cdf0e10cSrcweir sal_Int16 j; 104cdf0e10cSrcweir for (j = 0; j < key_count; j++) { 105cdf0e10cSrcweir if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) { 106cdf0e10cSrcweir table[i-start] = sal::static_int_cast<sal_uInt8>(j); 107cdf0e10cSrcweir break; 108cdf0e10cSrcweir } 109cdf0e10cSrcweir } 110cdf0e10cSrcweir if (j == key_count) 111cdf0e10cSrcweir table[i-start] = 0xFF; 112cdf0e10cSrcweir } 113cdf0e10cSrcweir } 114cdf0e10cSrcweir 115cdf0e10cSrcweir Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF) 116cdf0e10cSrcweir { 117cdf0e10cSrcweir collator = new CollatorImpl(rxMSF); 118cdf0e10cSrcweir } 119cdf0e10cSrcweir 120cdf0e10cSrcweir Index::~Index() 121cdf0e10cSrcweir { 122cdf0e10cSrcweir delete collator; 123cdf0e10cSrcweir } 124cdf0e10cSrcweir 125cdf0e10cSrcweir sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2) 126cdf0e10cSrcweir { 127cdf0e10cSrcweir return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) ); 128cdf0e10cSrcweir } 129cdf0e10cSrcweir 130cdf0e10cSrcweir sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry) 131cdf0e10cSrcweir { 132cdf0e10cSrcweir sal_Int32 startPos=0; 133cdf0e10cSrcweir if (skipping_chars.getLength() > 0) 134cdf0e10cSrcweir while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0) 135cdf0e10cSrcweir startPos++; 136cdf0e10cSrcweir if (mkey_count > 0) { 137cdf0e10cSrcweir for (sal_Int16 i = 0; i < mkey_count; i++) { 138cdf0e10cSrcweir sal_Int32 len = keys[mkeys[i]].mkey.getLength(); 139cdf0e10cSrcweir if (collator->compareSubstring(rIndexEntry, startPos, len, 140cdf0e10cSrcweir keys[mkeys[i]].mkey, 0, len) == 0) 141cdf0e10cSrcweir return mkeys[i]; 142cdf0e10cSrcweir } 143cdf0e10cSrcweir } 144cdf0e10cSrcweir sal_Unicode code = rIndexEntry[startPos]; 145cdf0e10cSrcweir for (sal_Int16 i = 0; i < table_count; i++) { 146cdf0e10cSrcweir if (tables[i].start <= code && code <= tables[i].end) 147cdf0e10cSrcweir return tables[i].table[code-tables[i].start]; 148cdf0e10cSrcweir } 149cdf0e10cSrcweir return 0xFF; 150cdf0e10cSrcweir } 151cdf0e10cSrcweir 152cdf0e10cSrcweir OUString Index::getIndexDescription(const OUString& rIndexEntry) 153cdf0e10cSrcweir { 154cdf0e10cSrcweir sal_Int16 wgt = getIndexWeight(rIndexEntry); 155cdf0e10cSrcweir if (wgt < MAX_KEYS) { 156cdf0e10cSrcweir if (keys[wgt].desc.getLength()) 157cdf0e10cSrcweir return keys[wgt].desc; 158cdf0e10cSrcweir else if (keys[wgt].key > 0) 159cdf0e10cSrcweir return OUString(&keys[wgt].key, 1); 160cdf0e10cSrcweir else 161cdf0e10cSrcweir return keys[wgt].mkey; 162cdf0e10cSrcweir } 163cdf0e10cSrcweir sal_Int32 nPos=0; 164cdf0e10cSrcweir sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0); 165cdf0e10cSrcweir return OUString(&indexChar, 1); 166cdf0e10cSrcweir } 167cdf0e10cSrcweir 168cdf0e10cSrcweir #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString()) 169cdf0e10cSrcweir 170cdf0e10cSrcweir void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException) 171cdf0e10cSrcweir { 172cdf0e10cSrcweir OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm); 173cdf0e10cSrcweir 174cdf0e10cSrcweir if (!keyStr.getLength()) { 175cdf0e10cSrcweir keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN, 176cdf0e10cSrcweir LocaleData().getDefaultIndexAlgorithm(LOCALE_EN)); 177cdf0e10cSrcweir if (!keyStr) 178cdf0e10cSrcweir throw RuntimeException(); 179cdf0e10cSrcweir } 180cdf0e10cSrcweir 181cdf0e10cSrcweir sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() ); 182cdf0e10cSrcweir mkey_count=key_count=0; 183cdf0e10cSrcweir skipping_chars=OUString(); 184cdf0e10cSrcweir sal_Int16 i, j; 185cdf0e10cSrcweir 186cdf0e10cSrcweir for (i = 0; i < len && key_count < MAX_KEYS; i++) 187cdf0e10cSrcweir { 188cdf0e10cSrcweir sal_Unicode curr = keyStr[i]; 189cdf0e10cSrcweir sal_Unicode close = sal_Unicode(')'); 190cdf0e10cSrcweir 191cdf0e10cSrcweir if (unicode::isWhiteSpace(curr)) 192cdf0e10cSrcweir continue; 193cdf0e10cSrcweir 194cdf0e10cSrcweir switch(curr) { 195cdf0e10cSrcweir case sal_Unicode('-'): 196cdf0e10cSrcweir if (key_count > 0 && i + 1 < len ) { 197cdf0e10cSrcweir for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) { 198cdf0e10cSrcweir keys[key_count].key = keys[key_count-1].key+1; 199cdf0e10cSrcweir keys[key_count].desc = OUString(); 200cdf0e10cSrcweir } 201cdf0e10cSrcweir } else 202cdf0e10cSrcweir throw RuntimeException(); 203cdf0e10cSrcweir break; 204cdf0e10cSrcweir case sal_Unicode('['): 205cdf0e10cSrcweir for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) { 206cdf0e10cSrcweir if (unicode::isWhiteSpace(keyStr[i])) { 207cdf0e10cSrcweir continue; 208cdf0e10cSrcweir } else if (keyStr[i] == sal_Unicode('_')) { 209cdf0e10cSrcweir for (curr=keyStr[i-1]+1; curr <= keyStr[i+1]; curr++) 210cdf0e10cSrcweir skipping_chars+=OUString(curr); 211cdf0e10cSrcweir i+=2; 212cdf0e10cSrcweir } else { 213cdf0e10cSrcweir skipping_chars+=OUString(keyStr[i]); 214cdf0e10cSrcweir } 215cdf0e10cSrcweir } 216cdf0e10cSrcweir break; 217cdf0e10cSrcweir case sal_Unicode('{'): 218cdf0e10cSrcweir close = sal_Unicode('}'); 219cdf0e10cSrcweir case sal_Unicode('('): 220cdf0e10cSrcweir if (key_count > 0) { 221cdf0e10cSrcweir sal_Int16 end = i+1; 222cdf0e10cSrcweir for (end=i+1; end < len && keyStr[end] != close; end++) ; 223cdf0e10cSrcweir 224cdf0e10cSrcweir if (end >= len) // no found 225cdf0e10cSrcweir throw RuntimeException(); 226cdf0e10cSrcweir if (close == sal_Unicode(')')) 227cdf0e10cSrcweir keys[key_count-1].desc = keyStr.copy(i+1, end-i-1); 228cdf0e10cSrcweir else { 229cdf0e10cSrcweir mkeys[mkey_count++]=key_count; 230cdf0e10cSrcweir keys[key_count].key = 0; 231cdf0e10cSrcweir keys[key_count].mkey = keyStr.copy(i+1, end-i-1); 232cdf0e10cSrcweir keys[key_count++].desc=OUString(); 233cdf0e10cSrcweir } 234cdf0e10cSrcweir i=end+1; 235cdf0e10cSrcweir } else 236cdf0e10cSrcweir throw RuntimeException(); 237cdf0e10cSrcweir break; 238cdf0e10cSrcweir default: 239cdf0e10cSrcweir keys[key_count].key = curr; 240cdf0e10cSrcweir keys[key_count++].desc = OUString(); 241cdf0e10cSrcweir break; 242cdf0e10cSrcweir } 243cdf0e10cSrcweir } 244cdf0e10cSrcweir for (i = 0; i < mkey_count; i++) { 245cdf0e10cSrcweir for (j=i+1; j < mkey_count; j++) { 246cdf0e10cSrcweir if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) { 247cdf0e10cSrcweir sal_Int16 k = mkeys[i]; 248cdf0e10cSrcweir mkeys[i] = mkeys[j]; 249cdf0e10cSrcweir mkeys[j] = k; 250cdf0e10cSrcweir } 251cdf0e10cSrcweir } 252cdf0e10cSrcweir } 253cdf0e10cSrcweir } 254cdf0e10cSrcweir 255cdf0e10cSrcweir void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException) 256cdf0e10cSrcweir { 257cdf0e10cSrcweir makeIndexKeys(rLocale, algorithm); 258cdf0e10cSrcweir 259cdf0e10cSrcweir Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale ); 260cdf0e10cSrcweir 261cdf0e10cSrcweir if (scriptList.getLength() == 0) { 262cdf0e10cSrcweir scriptList = LocaleData().getUnicodeScripts(LOCALE_EN); 263cdf0e10cSrcweir if (scriptList.getLength() == 0) 264cdf0e10cSrcweir throw RuntimeException(); 265cdf0e10cSrcweir } 266cdf0e10cSrcweir 267cdf0e10cSrcweir table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() ); 268cdf0e10cSrcweir if (table_count > MAX_TABLES) 269cdf0e10cSrcweir throw RuntimeException(); 270cdf0e10cSrcweir 271cdf0e10cSrcweir collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT); 272cdf0e10cSrcweir sal_Int16 j=0; 273cdf0e10cSrcweir sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0); 274cdf0e10cSrcweir sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0); 275cdf0e10cSrcweir for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) { 276cdf0e10cSrcweir if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) { 277cdf0e10cSrcweir tables[j++].init(start, end, keys, key_count, this); 278cdf0e10cSrcweir start = unicode::getUnicodeScriptStart(scriptList[i]); 279cdf0e10cSrcweir } 280cdf0e10cSrcweir end = unicode::getUnicodeScriptEnd(scriptList[i]); 281cdf0e10cSrcweir } 282cdf0e10cSrcweir tables[j++].init(start, end, keys, key_count, this); 283cdf0e10cSrcweir table_count = j; 284cdf0e10cSrcweir } 285cdf0e10cSrcweir 286cdf0e10cSrcweir } } } } 287