1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir #include <indexentrysupplier_default.hxx> 31*cdf0e10cSrcweir #include <localedata.hxx> 32*cdf0e10cSrcweir #include <i18nutil/unicode.hxx> 33*cdf0e10cSrcweir #include <com/sun/star/i18n/CollatorOptions.hpp> 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir using namespace ::com::sun::star::uno; 36*cdf0e10cSrcweir using namespace ::com::sun::star::lang; 37*cdf0e10cSrcweir using namespace ::rtl; 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode( 42*cdf0e10cSrcweir const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) : 43*cdf0e10cSrcweir IndexEntrySupplier_Common(rxMSF) 44*cdf0e10cSrcweir { 45*cdf0e10cSrcweir implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode"; 46*cdf0e10cSrcweir index = new Index(rxMSF); 47*cdf0e10cSrcweir } 48*cdf0e10cSrcweir 49*cdf0e10cSrcweir IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode() 50*cdf0e10cSrcweir { 51*cdf0e10cSrcweir delete index; 52*cdf0e10cSrcweir } 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale, 55*cdf0e10cSrcweir const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException) 56*cdf0e10cSrcweir { 57*cdf0e10cSrcweir index->init(rLocale, rAlgorithm); 58*cdf0e10cSrcweir return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions); 59*cdf0e10cSrcweir } 60*cdf0e10cSrcweir 61*cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry, 62*cdf0e10cSrcweir const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException) 63*cdf0e10cSrcweir { 64*cdf0e10cSrcweir return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale)); 65*cdf0e10cSrcweir } 66*cdf0e10cSrcweir 67*cdf0e10cSrcweir sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry( 68*cdf0e10cSrcweir const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1, 69*cdf0e10cSrcweir const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 ) 70*cdf0e10cSrcweir throw (RuntimeException) 71*cdf0e10cSrcweir { 72*cdf0e10cSrcweir sal_Int16 result = 73*cdf0e10cSrcweir index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) - 74*cdf0e10cSrcweir index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2)); 75*cdf0e10cSrcweir if (result == 0) 76*cdf0e10cSrcweir return IndexEntrySupplier_Common::compareIndexEntry( 77*cdf0e10cSrcweir rIndexEntry1, rPhoneticEntry1, rLocale1, 78*cdf0e10cSrcweir rIndexEntry2, rPhoneticEntry2, rLocale2); 79*cdf0e10cSrcweir return result > 0 ? 1 : -1; 80*cdf0e10cSrcweir } 81*cdf0e10cSrcweir 82*cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry, 83*cdf0e10cSrcweir const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) { 84*cdf0e10cSrcweir 85*cdf0e10cSrcweir if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)) 86*cdf0e10cSrcweir return index->getIndexDescription(rIndexEntry); 87*cdf0e10cSrcweir else 88*cdf0e10cSrcweir return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm); 89*cdf0e10cSrcweir } 90*cdf0e10cSrcweir 91*cdf0e10cSrcweir IndexTable::IndexTable() 92*cdf0e10cSrcweir { 93*cdf0e10cSrcweir table = NULL; 94*cdf0e10cSrcweir } 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir IndexTable::~IndexTable() 97*cdf0e10cSrcweir { 98*cdf0e10cSrcweir if (table) free(table); 99*cdf0e10cSrcweir } 100*cdf0e10cSrcweir 101*cdf0e10cSrcweir void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index) 102*cdf0e10cSrcweir { 103*cdf0e10cSrcweir start=start_; 104*cdf0e10cSrcweir end=end_; 105*cdf0e10cSrcweir table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8)); 106*cdf0e10cSrcweir for (sal_Unicode i = start; i <= end; i++) { 107*cdf0e10cSrcweir sal_Int16 j; 108*cdf0e10cSrcweir for (j = 0; j < key_count; j++) { 109*cdf0e10cSrcweir if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) { 110*cdf0e10cSrcweir table[i-start] = sal::static_int_cast<sal_uInt8>(j); 111*cdf0e10cSrcweir break; 112*cdf0e10cSrcweir } 113*cdf0e10cSrcweir } 114*cdf0e10cSrcweir if (j == key_count) 115*cdf0e10cSrcweir table[i-start] = 0xFF; 116*cdf0e10cSrcweir } 117*cdf0e10cSrcweir } 118*cdf0e10cSrcweir 119*cdf0e10cSrcweir Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF) 120*cdf0e10cSrcweir { 121*cdf0e10cSrcweir collator = new CollatorImpl(rxMSF); 122*cdf0e10cSrcweir } 123*cdf0e10cSrcweir 124*cdf0e10cSrcweir Index::~Index() 125*cdf0e10cSrcweir { 126*cdf0e10cSrcweir delete collator; 127*cdf0e10cSrcweir } 128*cdf0e10cSrcweir 129*cdf0e10cSrcweir sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2) 130*cdf0e10cSrcweir { 131*cdf0e10cSrcweir return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) ); 132*cdf0e10cSrcweir } 133*cdf0e10cSrcweir 134*cdf0e10cSrcweir sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry) 135*cdf0e10cSrcweir { 136*cdf0e10cSrcweir sal_Int32 startPos=0; 137*cdf0e10cSrcweir if (skipping_chars.getLength() > 0) 138*cdf0e10cSrcweir while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0) 139*cdf0e10cSrcweir startPos++; 140*cdf0e10cSrcweir if (mkey_count > 0) { 141*cdf0e10cSrcweir for (sal_Int16 i = 0; i < mkey_count; i++) { 142*cdf0e10cSrcweir sal_Int32 len = keys[mkeys[i]].mkey.getLength(); 143*cdf0e10cSrcweir if (collator->compareSubstring(rIndexEntry, startPos, len, 144*cdf0e10cSrcweir keys[mkeys[i]].mkey, 0, len) == 0) 145*cdf0e10cSrcweir return mkeys[i]; 146*cdf0e10cSrcweir } 147*cdf0e10cSrcweir } 148*cdf0e10cSrcweir sal_Unicode code = rIndexEntry[startPos]; 149*cdf0e10cSrcweir for (sal_Int16 i = 0; i < table_count; i++) { 150*cdf0e10cSrcweir if (tables[i].start <= code && code <= tables[i].end) 151*cdf0e10cSrcweir return tables[i].table[code-tables[i].start]; 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir return 0xFF; 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir OUString Index::getIndexDescription(const OUString& rIndexEntry) 157*cdf0e10cSrcweir { 158*cdf0e10cSrcweir sal_Int16 wgt = getIndexWeight(rIndexEntry); 159*cdf0e10cSrcweir if (wgt < MAX_KEYS) { 160*cdf0e10cSrcweir if (keys[wgt].desc.getLength()) 161*cdf0e10cSrcweir return keys[wgt].desc; 162*cdf0e10cSrcweir else if (keys[wgt].key > 0) 163*cdf0e10cSrcweir return OUString(&keys[wgt].key, 1); 164*cdf0e10cSrcweir else 165*cdf0e10cSrcweir return keys[wgt].mkey; 166*cdf0e10cSrcweir } 167*cdf0e10cSrcweir sal_Int32 nPos=0; 168*cdf0e10cSrcweir sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0); 169*cdf0e10cSrcweir return OUString(&indexChar, 1); 170*cdf0e10cSrcweir } 171*cdf0e10cSrcweir 172*cdf0e10cSrcweir #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString()) 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException) 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm); 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir if (!keyStr.getLength()) { 179*cdf0e10cSrcweir keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN, 180*cdf0e10cSrcweir LocaleData().getDefaultIndexAlgorithm(LOCALE_EN)); 181*cdf0e10cSrcweir if (!keyStr) 182*cdf0e10cSrcweir throw RuntimeException(); 183*cdf0e10cSrcweir } 184*cdf0e10cSrcweir 185*cdf0e10cSrcweir sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() ); 186*cdf0e10cSrcweir mkey_count=key_count=0; 187*cdf0e10cSrcweir skipping_chars=OUString(); 188*cdf0e10cSrcweir sal_Int16 i, j; 189*cdf0e10cSrcweir 190*cdf0e10cSrcweir for (i = 0; i < len && key_count < MAX_KEYS; i++) 191*cdf0e10cSrcweir { 192*cdf0e10cSrcweir sal_Unicode curr = keyStr[i]; 193*cdf0e10cSrcweir sal_Unicode close = sal_Unicode(')'); 194*cdf0e10cSrcweir 195*cdf0e10cSrcweir if (unicode::isWhiteSpace(curr)) 196*cdf0e10cSrcweir continue; 197*cdf0e10cSrcweir 198*cdf0e10cSrcweir switch(curr) { 199*cdf0e10cSrcweir case sal_Unicode('-'): 200*cdf0e10cSrcweir if (key_count > 0 && i + 1 < len ) { 201*cdf0e10cSrcweir for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) { 202*cdf0e10cSrcweir keys[key_count].key = keys[key_count-1].key+1; 203*cdf0e10cSrcweir keys[key_count].desc = OUString(); 204*cdf0e10cSrcweir } 205*cdf0e10cSrcweir } else 206*cdf0e10cSrcweir throw RuntimeException(); 207*cdf0e10cSrcweir break; 208*cdf0e10cSrcweir case sal_Unicode('['): 209*cdf0e10cSrcweir for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) { 210*cdf0e10cSrcweir if (unicode::isWhiteSpace(keyStr[i])) { 211*cdf0e10cSrcweir continue; 212*cdf0e10cSrcweir } else if (keyStr[i] == sal_Unicode('_')) { 213*cdf0e10cSrcweir for (curr=keyStr[i-1]+1; curr <= keyStr[i+1]; curr++) 214*cdf0e10cSrcweir skipping_chars+=OUString(curr); 215*cdf0e10cSrcweir i+=2; 216*cdf0e10cSrcweir } else { 217*cdf0e10cSrcweir skipping_chars+=OUString(keyStr[i]); 218*cdf0e10cSrcweir } 219*cdf0e10cSrcweir } 220*cdf0e10cSrcweir break; 221*cdf0e10cSrcweir case sal_Unicode('{'): 222*cdf0e10cSrcweir close = sal_Unicode('}'); 223*cdf0e10cSrcweir case sal_Unicode('('): 224*cdf0e10cSrcweir if (key_count > 0) { 225*cdf0e10cSrcweir sal_Int16 end = i+1; 226*cdf0e10cSrcweir for (end=i+1; end < len && keyStr[end] != close; end++) ; 227*cdf0e10cSrcweir 228*cdf0e10cSrcweir if (end >= len) // no found 229*cdf0e10cSrcweir throw RuntimeException(); 230*cdf0e10cSrcweir if (close == sal_Unicode(')')) 231*cdf0e10cSrcweir keys[key_count-1].desc = keyStr.copy(i+1, end-i-1); 232*cdf0e10cSrcweir else { 233*cdf0e10cSrcweir mkeys[mkey_count++]=key_count; 234*cdf0e10cSrcweir keys[key_count].key = 0; 235*cdf0e10cSrcweir keys[key_count].mkey = keyStr.copy(i+1, end-i-1); 236*cdf0e10cSrcweir keys[key_count++].desc=OUString(); 237*cdf0e10cSrcweir } 238*cdf0e10cSrcweir i=end+1; 239*cdf0e10cSrcweir } else 240*cdf0e10cSrcweir throw RuntimeException(); 241*cdf0e10cSrcweir break; 242*cdf0e10cSrcweir default: 243*cdf0e10cSrcweir keys[key_count].key = curr; 244*cdf0e10cSrcweir keys[key_count++].desc = OUString(); 245*cdf0e10cSrcweir break; 246*cdf0e10cSrcweir } 247*cdf0e10cSrcweir } 248*cdf0e10cSrcweir for (i = 0; i < mkey_count; i++) { 249*cdf0e10cSrcweir for (j=i+1; j < mkey_count; j++) { 250*cdf0e10cSrcweir if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) { 251*cdf0e10cSrcweir sal_Int16 k = mkeys[i]; 252*cdf0e10cSrcweir mkeys[i] = mkeys[j]; 253*cdf0e10cSrcweir mkeys[j] = k; 254*cdf0e10cSrcweir } 255*cdf0e10cSrcweir } 256*cdf0e10cSrcweir } 257*cdf0e10cSrcweir } 258*cdf0e10cSrcweir 259*cdf0e10cSrcweir void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException) 260*cdf0e10cSrcweir { 261*cdf0e10cSrcweir makeIndexKeys(rLocale, algorithm); 262*cdf0e10cSrcweir 263*cdf0e10cSrcweir Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale ); 264*cdf0e10cSrcweir 265*cdf0e10cSrcweir if (scriptList.getLength() == 0) { 266*cdf0e10cSrcweir scriptList = LocaleData().getUnicodeScripts(LOCALE_EN); 267*cdf0e10cSrcweir if (scriptList.getLength() == 0) 268*cdf0e10cSrcweir throw RuntimeException(); 269*cdf0e10cSrcweir } 270*cdf0e10cSrcweir 271*cdf0e10cSrcweir table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() ); 272*cdf0e10cSrcweir if (table_count > MAX_TABLES) 273*cdf0e10cSrcweir throw RuntimeException(); 274*cdf0e10cSrcweir 275*cdf0e10cSrcweir collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT); 276*cdf0e10cSrcweir sal_Int16 j=0; 277*cdf0e10cSrcweir sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0); 278*cdf0e10cSrcweir sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0); 279*cdf0e10cSrcweir for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) { 280*cdf0e10cSrcweir if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) { 281*cdf0e10cSrcweir tables[j++].init(start, end, keys, key_count, this); 282*cdf0e10cSrcweir start = unicode::getUnicodeScriptStart(scriptList[i]); 283*cdf0e10cSrcweir } 284*cdf0e10cSrcweir end = unicode::getUnicodeScriptEnd(scriptList[i]); 285*cdf0e10cSrcweir } 286*cdf0e10cSrcweir tables[j++].init(start, end, keys, key_count, this); 287*cdf0e10cSrcweir table_count = j; 288*cdf0e10cSrcweir } 289*cdf0e10cSrcweir 290*cdf0e10cSrcweir } } } } 291