1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include <breakiteratorImpl.hxx> 32 #include <unicode/uchar.h> 33 #include <rtl/ustrbuf.hxx> 34 35 using namespace ::com::sun::star::uno; 36 using namespace ::com::sun::star::lang; 37 using namespace ::rtl; 38 39 namespace com { namespace sun { namespace star { namespace i18n { 40 41 BreakIteratorImpl::BreakIteratorImpl( const Reference < XMultiServiceFactory >& rxMSF ) : xMSF( rxMSF ) 42 { 43 } 44 45 BreakIteratorImpl::BreakIteratorImpl() 46 { 47 } 48 49 BreakIteratorImpl::~BreakIteratorImpl() 50 { 51 // Clear lookuptable 52 for (size_t l = 0; l < lookupTable.size(); l++) 53 delete lookupTable[l]; 54 lookupTable.clear(); 55 } 56 57 #define LBI getLocaleSpecificBreakIterator(rLocale) 58 59 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos, 60 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 61 throw(RuntimeException) 62 { 63 if (nCount < 0) throw RuntimeException(); 64 65 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone); 66 } 67 68 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos, 69 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 70 throw(RuntimeException) 71 { 72 if (nCount < 0) throw RuntimeException(); 73 74 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone); 75 } 76 77 #define isZWSP(c) (ch == 0x200B) 78 79 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection) 80 { 81 sal_uInt32 ch=0; 82 sal_Int32 pos=nPos; 83 switch (rWordType) { 84 case WordType::ANYWORD_IGNOREWHITESPACES: 85 if (bDirection) 86 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos; 87 else 88 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos; 89 break; 90 case WordType::DICTIONARY_WORD: 91 if (bDirection) 92 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) || 93 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos; 94 else 95 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) || 96 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos; 97 break; 98 case WordType::WORD_COUNT: 99 if (bDirection) 100 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos; 101 else 102 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos; 103 break; 104 } 105 return nPos; 106 } 107 108 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos, 109 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 110 { 111 sal_Int32 len = Text.getLength(); 112 if( nStartPos < 0 || len == 0 ) 113 result.endPos = result.startPos = 0; 114 else if (nStartPos >= len) 115 result.endPos = result.startPos = len; 116 else { 117 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType); 118 119 nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True); 120 121 if ( nStartPos != result.startPos) { 122 if( nStartPos >= len ) 123 result.startPos = result.endPos = len; 124 else { 125 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True); 126 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts 127 if (result.startPos < nStartPos) result.startPos = nStartPos; 128 } 129 } 130 } 131 return result; 132 } 133 134 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) { 135 return rLocale.Language.equalsAscii("zh") || rLocale.Language.equalsAscii("ja") || rLocale.Language.equalsAscii("ko"); 136 } 137 138 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos, 139 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException) 140 { 141 sal_Int32 len = Text.getLength(); 142 if( nStartPos <= 0 || len == 0 ) { 143 result.endPos = result.startPos = 0; 144 return result; 145 } else if (nStartPos > len) { 146 result.endPos = result.startPos = len; 147 return result; 148 } 149 150 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False); 151 152 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return 153 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary. 154 result.startPos = nPos; 155 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) { 156 result.endPos = -1; 157 return result; 158 } 159 160 return LBI->previousWord(Text, result.startPos, rLocale, rWordType); 161 } 162 163 164 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale, 165 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException) 166 { 167 sal_Int32 len = Text.getLength(); 168 if( nPos < 0 || len == 0 ) 169 result.endPos = result.startPos = 0; 170 else if (nPos > len) 171 result.endPos = result.startPos = len; 172 else { 173 sal_Int32 next, prev; 174 next = skipSpace(Text, nPos, len, rWordType, sal_True); 175 prev = skipSpace(Text, nPos, len, rWordType, sal_False); 176 if (prev == 0 && next == len) { 177 result.endPos = result.startPos = nPos; 178 } else if (prev == 0 && ! bDirection) { 179 result.endPos = result.startPos = 0; 180 } else if (next == len && bDirection) { 181 result.endPos = result.startPos = len; 182 } else { 183 if (next != prev) { 184 if (next == nPos && next != len) 185 bDirection = sal_True; 186 else if (prev == nPos && prev != 0) 187 bDirection = sal_False; 188 else 189 nPos = bDirection ? next : prev; 190 } 191 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection); 192 } 193 } 194 return result; 195 } 196 197 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos, 198 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 199 { 200 sal_Int32 len = Text.getLength(); 201 202 if (nPos < 0 || nPos >= len) return sal_False; 203 204 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True); 205 206 if (tmp != nPos) return sal_False; 207 208 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True); 209 210 return result.startPos == nPos; 211 } 212 213 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos, 214 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 215 { 216 sal_Int32 len = Text.getLength(); 217 218 if (nPos <= 0 || nPos > len) return sal_False; 219 220 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False); 221 222 if (tmp != nPos) return sal_False; 223 224 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False); 225 226 return result.endPos == nPos; 227 } 228 229 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos, 230 const Locale &rLocale ) throw(RuntimeException) 231 { 232 if (nStartPos < 0 || nStartPos > Text.getLength()) 233 return -1; 234 if (Text.getLength() == 0) return 0; 235 return LBI->beginOfSentence(Text, nStartPos, rLocale); 236 } 237 238 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos, 239 const Locale &rLocale ) throw(RuntimeException) 240 { 241 if (nStartPos < 0 || nStartPos > Text.getLength()) 242 return -1; 243 if (Text.getLength() == 0) return 0; 244 return LBI->endOfSentence(Text, nStartPos, rLocale); 245 } 246 247 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos, 248 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions, 249 const LineBreakUserOptions& bOptions ) throw(RuntimeException) 250 { 251 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions); 252 } 253 254 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos ) 255 throw(RuntimeException) 256 { 257 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK : 258 getScriptClass(Text.iterateCodePoints(&nPos, 0)); 259 } 260 261 262 /** Increments/decrements position first, then obtains character. 263 @return current position, may be -1 or text length if string was consumed. 264 */ 265 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) { 266 sal_Int32 nLen = Text.getLength(); 267 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) { 268 ch = 0; 269 nStartPos = nStartPos + inc < 0 ? -1 : nLen; 270 } else { 271 ch = Text.iterateCodePoints(&nStartPos, inc); 272 // Fix for #i80436#. 273 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat 274 // suspicious as if it cures a symptom.. anyway, had to add 275 // nStartPos < Text.getLength() to silence the (correct) assertion 276 // in rtl_uString_iterateCodePoints() if Text was one character 277 // (codepoint) only, made up of a surrogate pair. 278 //if (inc > 0 && nStartPos < Text.getLength()) 279 // ch = Text.iterateCodePoints(&nStartPos, 0); 280 // With surrogates, nStartPos may actually point behind string 281 // now, even if inc is only +1 282 if (inc > 0) 283 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0); 284 } 285 return nStartPos; 286 } 287 288 289 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text, 290 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 291 { 292 if (nStartPos < 0 || nStartPos >= Text.getLength()) 293 return -1; 294 295 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) 296 return -1; 297 298 if (nStartPos == 0) return 0; 299 sal_uInt32 ch=0; 300 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) { 301 if (nStartPos == 0) return 0; 302 } 303 304 return iterateCodePoints(Text, nStartPos, 1, ch); 305 } 306 307 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text, 308 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 309 { 310 if (nStartPos < 0 || nStartPos >= Text.getLength()) 311 return -1; 312 313 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) 314 return -1; 315 316 sal_Int32 strLen = Text.getLength(); 317 sal_uInt32 ch=0; 318 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) { 319 sal_Int16 currentCharScriptType = getScriptClass(ch); 320 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK) 321 break; 322 } 323 return nStartPos; 324 } 325 326 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text, 327 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 328 { 329 if (nStartPos < 0) 330 return -1; 331 if (nStartPos > Text.getLength()) 332 nStartPos = Text.getLength(); 333 334 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2; 335 336 sal_uInt32 ch=0; 337 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) { 338 if ((((numberOfChange % 2) == 0) ^ (ScriptType != getScriptClass(ch)))) 339 numberOfChange--; 340 else if (nStartPos == 0) { 341 if (numberOfChange > 0) 342 numberOfChange--; 343 if (nStartPos > 0) 344 Text.iterateCodePoints(&nStartPos, -1); 345 else 346 return -1; 347 } 348 } 349 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1; 350 } 351 352 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos, 353 sal_Int16 ScriptType ) throw(RuntimeException) 354 355 { 356 if (nStartPos < 0) 357 nStartPos = 0; 358 sal_Int32 strLen = Text.getLength(); 359 if (nStartPos > strLen) 360 return -1; 361 362 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1; 363 364 sal_uInt32 ch=0; 365 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) { 366 sal_Int16 currentCharScriptType = getScriptClass(ch); 367 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) : 368 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)) 369 numberOfChange--; 370 } 371 return numberOfChange == 0 ? nStartPos : -1; 372 } 373 374 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos, 375 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 376 { 377 if (CharType == CharType::ANY_CHAR) return 0; 378 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 379 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1; 380 381 sal_Int32 nPos=nStartPos; 382 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; } 383 return nStartPos; // begin of char block is inclusive 384 } 385 386 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos, 387 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 388 { 389 sal_Int32 strLen = Text.getLength(); 390 391 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive 392 if (nStartPos < 0 || nStartPos >= strLen) return -1; 393 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1; 394 395 sal_uInt32 ch=0; 396 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {} 397 return nStartPos; // end of char block is exclusive 398 } 399 400 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos, 401 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 402 { 403 if (CharType == CharType::ANY_CHAR) return -1; 404 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 405 406 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1; 407 sal_Int32 strLen = Text.getLength(); 408 409 sal_uInt32 ch=0; 410 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) { 411 if ((CharType != (sal_Int16)u_charType(ch)) ^ (numberOfChange == 1)) 412 numberOfChange--; 413 } 414 return numberOfChange == 0 ? nStartPos : -1; 415 } 416 417 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos, 418 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 419 { 420 if(CharType == CharType::ANY_CHAR) return -1; 421 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 422 423 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2; 424 425 sal_uInt32 ch=0; 426 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) { 427 if (((numberOfChange % 2) == 0) ^ (CharType != (sal_Int16)u_charType(ch))) 428 numberOfChange--; 429 if (nStartPos == 0 && numberOfChange > 0) { 430 numberOfChange--; 431 if (numberOfChange == 0) return nStartPos; 432 } 433 } 434 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1; 435 } 436 437 438 439 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/, 440 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException) 441 { 442 return 0; 443 } 444 445 typedef struct { 446 UBlockCode from; 447 UBlockCode to; 448 sal_Int16 script; 449 } UBlock2Script; 450 451 // for a list of the UBLOCK_... values see: 452 // http://icu-project.org/apiref/icu4c/uchar_8h.html 453 // where enum UBlockCode is defined. 454 // See also http://www.unicode.org/charts/ for general reference 455 static UBlock2Script scriptList[] = { 456 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK}, 457 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN}, 458 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX}, 459 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN}, 460 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN}, 461 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX}, 462 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN}, 463 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX}, 464 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN}, 465 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN}, 466 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN}, 467 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX}, 468 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN}, 469 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX}, 470 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN}, 471 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN}, 472 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN}, 473 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN} 474 }; 475 476 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script) 477 478 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar) 479 { 480 static sal_uInt32 lastChar = 0; 481 static sal_Int16 nRet = 0; 482 483 if (currentChar != lastChar) { 484 lastChar = currentChar; 485 486 //JP 21.9.2001: handle specific characters - always as weak 487 // definition of 1 - this breaks a word 488 // 2 - this can be inside a word 489 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char. 490 if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar) 491 nRet = ScriptType::WEAK; 492 // workaround for Coptic 493 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar) 494 nRet = ScriptType::LATIN; 495 // work-around for ligatures (see http://www.unicode.org/charts/PDF/UFB00.pdf) 496 else if ((0xFB00 <= currentChar && currentChar <= 0xFB06) || 497 (0xFB13 <= currentChar && currentChar <= 0xFB17)) 498 nRet = ScriptType::LATIN; 499 else { 500 UBlockCode block=ublock_getCode(currentChar); 501 sal_uInt16 i; 502 for ( i = 0; i < scriptListCount; i++) { 503 if (block <= scriptList[i].to) break; 504 } 505 nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK; 506 } 507 } 508 return nRet; 509 } 510 511 static inline sal_Bool operator == (const Locale& l1, const Locale& l2) { 512 return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant; 513 } 514 515 sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException ) 516 { 517 // to share service between same Language but different Country code, like zh_CN and zh_TW 518 for (size_t l = 0; l < lookupTable.size(); l++) { 519 lookupTableItem *listItem = lookupTable[l]; 520 if (aLocaleName == listItem->aLocale.Language) { 521 xBI = listItem->xBI; 522 return sal_True; 523 } 524 } 525 526 Reference < uno::XInterface > xI = xMSF->createInstance( 527 OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName); 528 529 if ( xI.is() ) { 530 xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI; 531 if (xBI.is()) { 532 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI)); 533 return sal_True; 534 } 535 } 536 return sal_False; 537 } 538 539 Reference < XBreakIterator > SAL_CALL 540 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException) 541 { 542 if (xBI.is() && rLocale == aLocale) 543 return xBI; 544 else if (xMSF.is()) { 545 aLocale = rLocale; 546 547 for (size_t i = 0; i < lookupTable.size(); i++) { 548 lookupTableItem *listItem = lookupTable[i]; 549 if (rLocale == listItem->aLocale) 550 return xBI = listItem->xBI; 551 } 552 553 sal_Unicode under = (sal_Unicode)'_'; 554 555 sal_Int32 l = rLocale.Language.getLength(); 556 sal_Int32 c = rLocale.Country.getLength(); 557 sal_Int32 v = rLocale.Variant.getLength(); 558 OUStringBuffer aBuf(l+c+v+3); 559 560 if ((l > 0 && c > 0 && v > 0 && 561 // load service with name <base>_<lang>_<country>_<varian> 562 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append( 563 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) || 564 (l > 0 && c > 0 && 565 // load service with name <base>_<lang>_<country> 566 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append( 567 rLocale.Country).makeStringAndClear())) || 568 (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 && 569 (rLocale.Country.compareToAscii("HK") == 0 || 570 rLocale.Country.compareToAscii("MO") == 0) && 571 // if the country code is HK or MO, one more step to try TW. 572 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii( 573 "TW").makeStringAndClear())) || 574 (l > 0 && 575 // load service with name <base>_<lang> 576 createLocaleSpecificBreakIterator(rLocale.Language)) || 577 // load default service with name <base>_Unicode 578 createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) { 579 lookupTable.push_back( new lookupTableItem(aLocale, xBI) ); 580 return xBI; 581 } 582 } 583 throw RuntimeException(); 584 } 585 586 const sal_Char cBreakIterator[] = "com.sun.star.i18n.BreakIterator"; 587 588 OUString SAL_CALL 589 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException ) 590 { 591 return OUString::createFromAscii(cBreakIterator); 592 } 593 594 sal_Bool SAL_CALL 595 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException ) 596 { 597 return !rServiceName.compareToAscii(cBreakIterator); 598 } 599 600 Sequence< OUString > SAL_CALL 601 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException ) 602 { 603 Sequence< OUString > aRet(1); 604 aRet[0] = OUString::createFromAscii(cBreakIterator); 605 return aRet; 606 } 607 608 } } } } 609 610