1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir // xdictionary.cpp: implementation of the xdictionary class. 32*cdf0e10cSrcweir // 33*cdf0e10cSrcweir ////////////////////////////////////////////////////////////////////// 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx> 37*cdf0e10cSrcweir 38*cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp> 39*cdf0e10cSrcweir #include <xdictionary.hxx> 40*cdf0e10cSrcweir #include <unicode/uchar.h> 41*cdf0e10cSrcweir #include <string.h> 42*cdf0e10cSrcweir #include <breakiteratorImpl.hxx> 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir ////////////////////////////////////////////////////////////////////// 45*cdf0e10cSrcweir // Construction/Destruction 46*cdf0e10cSrcweir ////////////////////////////////////////////////////////////////////// 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir using namespace rtl; 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir extern "C" { static void SAL_CALL thisModule() {} } 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir xdictionary::xdictionary(const sal_Char *lang) : 55*cdf0e10cSrcweir existMark( NULL ), 56*cdf0e10cSrcweir index1( NULL ), 57*cdf0e10cSrcweir index2( NULL ), 58*cdf0e10cSrcweir lenArray( NULL ), 59*cdf0e10cSrcweir dataArea( NULL ), 60*cdf0e10cSrcweir hModule( NULL ), 61*cdf0e10cSrcweir boundary(), 62*cdf0e10cSrcweir japaneseWordBreak( sal_False ) 63*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 64*cdf0e10cSrcweir // For CTL breakiterator, where the word boundary should not be inside cell. 65*cdf0e10cSrcweir , 66*cdf0e10cSrcweir useCellBoundary( sal_False ), 67*cdf0e10cSrcweir cellBoundary( NULL ) 68*cdf0e10cSrcweir #endif 69*cdf0e10cSrcweir { 70*cdf0e10cSrcweir index1 = 0; 71*cdf0e10cSrcweir #ifdef SAL_DLLPREFIX 72*cdf0e10cSrcweir OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh) 73*cdf0e10cSrcweir aBuf.appendAscii( SAL_DLLPREFIX ); 74*cdf0e10cSrcweir #else 75*cdf0e10cSrcweir OUStringBuffer aBuf( strlen(lang) + 7 + 4 ); // mostly "*.dll" (with * == dict_zh) 76*cdf0e10cSrcweir #endif 77*cdf0e10cSrcweir aBuf.appendAscii( "dict_" ).appendAscii( lang ).appendAscii( SAL_DLLEXTENSION ); 78*cdf0e10cSrcweir hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT ); 79*cdf0e10cSrcweir if( hModule ) { 80*cdf0e10cSrcweir sal_IntPtr (*func)(); 81*cdf0e10cSrcweir func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getExistMark").pData ); 82*cdf0e10cSrcweir existMark = (sal_uInt8*) (*func)(); 83*cdf0e10cSrcweir func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex1").pData ); 84*cdf0e10cSrcweir index1 = (sal_Int16*) (*func)(); 85*cdf0e10cSrcweir func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex2").pData ); 86*cdf0e10cSrcweir index2 = (sal_Int32*) (*func)(); 87*cdf0e10cSrcweir func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getLenArray").pData ); 88*cdf0e10cSrcweir lenArray = (sal_Int32*) (*func)(); 89*cdf0e10cSrcweir func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getDataArea").pData ); 90*cdf0e10cSrcweir dataArea = (sal_Unicode*) (*func)(); 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir else 93*cdf0e10cSrcweir { 94*cdf0e10cSrcweir existMark = NULL; 95*cdf0e10cSrcweir index1 = NULL; 96*cdf0e10cSrcweir index2 = NULL; 97*cdf0e10cSrcweir lenArray = NULL; 98*cdf0e10cSrcweir dataArea = NULL; 99*cdf0e10cSrcweir } 100*cdf0e10cSrcweir 101*cdf0e10cSrcweir for (sal_Int32 i = 0; i < CACHE_MAX; i++) 102*cdf0e10cSrcweir cache[i].size = 0; 103*cdf0e10cSrcweir 104*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 105*cdf0e10cSrcweir useCellBoundary = sal_False; 106*cdf0e10cSrcweir cellBoundary = NULL; 107*cdf0e10cSrcweir #endif 108*cdf0e10cSrcweir japaneseWordBreak = sal_False; 109*cdf0e10cSrcweir } 110*cdf0e10cSrcweir 111*cdf0e10cSrcweir xdictionary::~xdictionary() { 112*cdf0e10cSrcweir osl_unloadModule(hModule); 113*cdf0e10cSrcweir for (sal_Int32 i = 0; i < CACHE_MAX; i++) { 114*cdf0e10cSrcweir if (cache[i].size > 0) { 115*cdf0e10cSrcweir delete cache[i].contents; 116*cdf0e10cSrcweir delete cache[i].wordboundary; 117*cdf0e10cSrcweir } 118*cdf0e10cSrcweir } 119*cdf0e10cSrcweir } 120*cdf0e10cSrcweir 121*cdf0e10cSrcweir void xdictionary::setJapaneseWordBreak() 122*cdf0e10cSrcweir { 123*cdf0e10cSrcweir japaneseWordBreak = sal_True; 124*cdf0e10cSrcweir } 125*cdf0e10cSrcweir 126*cdf0e10cSrcweir sal_Bool xdictionary::exists(const sal_uInt32 c) { 127*cdf0e10cSrcweir // 0x1FFF is the hardcoded limit in gendict for existMarks 128*cdf0e10cSrcweir sal_Bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False; 129*cdf0e10cSrcweir if (!exist && japaneseWordBreak) 130*cdf0e10cSrcweir return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN; 131*cdf0e10cSrcweir else 132*cdf0e10cSrcweir return exist; 133*cdf0e10cSrcweir } 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen) { 136*cdf0e10cSrcweir 137*cdf0e10cSrcweir if ( !index1 ) return 0; 138*cdf0e10cSrcweir 139*cdf0e10cSrcweir sal_Int16 idx = index1[str[0] >> 8]; 140*cdf0e10cSrcweir 141*cdf0e10cSrcweir if (idx == 0xFF) return 0; 142*cdf0e10cSrcweir 143*cdf0e10cSrcweir idx = (idx<<8) | (str[0]&0xff); 144*cdf0e10cSrcweir 145*cdf0e10cSrcweir sal_uInt32 begin = index2[idx], end = index2[idx+1]; 146*cdf0e10cSrcweir 147*cdf0e10cSrcweir if (begin == 0) return 0; 148*cdf0e10cSrcweir 149*cdf0e10cSrcweir str++; sLen--; // first character is not stored in the dictionary 150*cdf0e10cSrcweir for (sal_uInt32 i = end; i > begin; i--) { 151*cdf0e10cSrcweir sal_Int32 len = lenArray[i] - lenArray[i - 1]; 152*cdf0e10cSrcweir if (sLen >= len) { 153*cdf0e10cSrcweir const sal_Unicode *dstr = dataArea + lenArray[i-1]; 154*cdf0e10cSrcweir sal_Int32 pos = 0; 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir while (pos < len && dstr[pos] == str[pos]) { pos++; } 157*cdf0e10cSrcweir 158*cdf0e10cSrcweir if (pos == len) 159*cdf0e10cSrcweir return len + 1; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir return 0; 163*cdf0e10cSrcweir } 164*cdf0e10cSrcweir 165*cdf0e10cSrcweir 166*cdf0e10cSrcweir /* 167*cdf0e10cSrcweir * c-tor 168*cdf0e10cSrcweir */ 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir WordBreakCache::WordBreakCache() : 171*cdf0e10cSrcweir length( 0 ), 172*cdf0e10cSrcweir contents( NULL ), 173*cdf0e10cSrcweir wordboundary( NULL ), 174*cdf0e10cSrcweir size( 0 ) 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir } 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir /* 179*cdf0e10cSrcweir * Compare two unicode string, 180*cdf0e10cSrcweir */ 181*cdf0e10cSrcweir 182*cdf0e10cSrcweir sal_Bool WordBreakCache::equals(const sal_Unicode* str, Boundary& boundary) { 183*cdf0e10cSrcweir // Different length, different string. 184*cdf0e10cSrcweir if (length != boundary.endPos - boundary.startPos) return sal_False; 185*cdf0e10cSrcweir 186*cdf0e10cSrcweir for (sal_Int32 i = 0; i < length; i++) 187*cdf0e10cSrcweir if (contents[i] != str[i + boundary.startPos]) return sal_False; 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir return sal_True; 190*cdf0e10cSrcweir } 191*cdf0e10cSrcweir 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir /* 194*cdf0e10cSrcweir * Retrieve the segment containing the character at pos. 195*cdf0e10cSrcweir * @param pos : Position of the given character. 196*cdf0e10cSrcweir * @return true if CJK. 197*cdf0e10cSrcweir */ 198*cdf0e10cSrcweir sal_Bool xdictionary::seekSegment(const rtl::OUString &rText, sal_Int32 pos, 199*cdf0e10cSrcweir Boundary& segBoundary) 200*cdf0e10cSrcweir { 201*cdf0e10cSrcweir sal_Int32 indexUtf16; 202*cdf0e10cSrcweir segBoundary.endPos = segBoundary.startPos = pos; 203*cdf0e10cSrcweir 204*cdf0e10cSrcweir indexUtf16 = pos; 205*cdf0e10cSrcweir while (indexUtf16 > 0) 206*cdf0e10cSrcweir { 207*cdf0e10cSrcweir sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, -1); 208*cdf0e10cSrcweir if (u_isWhitespace(ch) || exists(ch)) 209*cdf0e10cSrcweir segBoundary.startPos = indexUtf16; 210*cdf0e10cSrcweir else 211*cdf0e10cSrcweir break; 212*cdf0e10cSrcweir } 213*cdf0e10cSrcweir 214*cdf0e10cSrcweir indexUtf16 = pos; 215*cdf0e10cSrcweir while (indexUtf16 < rText.getLength()) 216*cdf0e10cSrcweir { 217*cdf0e10cSrcweir sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1); 218*cdf0e10cSrcweir if (u_isWhitespace(ch) || exists(ch)) 219*cdf0e10cSrcweir segBoundary.endPos = indexUtf16; 220*cdf0e10cSrcweir else 221*cdf0e10cSrcweir break; 222*cdf0e10cSrcweir } 223*cdf0e10cSrcweir 224*cdf0e10cSrcweir indexUtf16 = segBoundary.startPos; 225*cdf0e10cSrcweir rText.iterateCodePoints(&indexUtf16, 1); 226*cdf0e10cSrcweir return segBoundary.endPos > indexUtf16; 227*cdf0e10cSrcweir } 228*cdf0e10cSrcweir 229*cdf0e10cSrcweir #define KANJA 1 230*cdf0e10cSrcweir #define KATAKANA 2 231*cdf0e10cSrcweir #define HIRAKANA 3 232*cdf0e10cSrcweir 233*cdf0e10cSrcweir static sal_Int16 JapaneseCharType(sal_Unicode c) 234*cdf0e10cSrcweir { 235*cdf0e10cSrcweir if (0x3041 <= c && c <= 0x309e) 236*cdf0e10cSrcweir return HIRAKANA; 237*cdf0e10cSrcweir if ((0x30a1 <= c && c <= 0x30fe) || (0xff65 <= c && c <= 0xff9f)) 238*cdf0e10cSrcweir return KATAKANA; 239*cdf0e10cSrcweir return KANJA; 240*cdf0e10cSrcweir } 241*cdf0e10cSrcweir 242*cdf0e10cSrcweir WordBreakCache& xdictionary::getCache(const sal_Unicode *text, Boundary& wordBoundary) 243*cdf0e10cSrcweir { 244*cdf0e10cSrcweir 245*cdf0e10cSrcweir WordBreakCache& aCache = cache[text[0] & 0x1f]; 246*cdf0e10cSrcweir 247*cdf0e10cSrcweir if (aCache.size != 0 && aCache.equals(text, wordBoundary)) 248*cdf0e10cSrcweir return aCache; 249*cdf0e10cSrcweir 250*cdf0e10cSrcweir sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos; 251*cdf0e10cSrcweir 252*cdf0e10cSrcweir if (aCache.size == 0 || len > aCache.size) { 253*cdf0e10cSrcweir if (aCache.size != 0) { 254*cdf0e10cSrcweir delete aCache.contents; 255*cdf0e10cSrcweir delete aCache.wordboundary; 256*cdf0e10cSrcweir aCache.size = len; 257*cdf0e10cSrcweir } 258*cdf0e10cSrcweir else 259*cdf0e10cSrcweir aCache.size = len > DEFAULT_SIZE ? len : DEFAULT_SIZE; 260*cdf0e10cSrcweir aCache.contents = new sal_Unicode[aCache.size + 1]; 261*cdf0e10cSrcweir aCache.wordboundary = new sal_Int32[aCache.size + 2]; 262*cdf0e10cSrcweir } 263*cdf0e10cSrcweir aCache.length = len; 264*cdf0e10cSrcweir memcpy(aCache.contents, text + wordBoundary.startPos, len * sizeof(sal_Unicode)); 265*cdf0e10cSrcweir *(aCache.contents + len) = 0x0000; 266*cdf0e10cSrcweir // reset the wordboundary in cache 267*cdf0e10cSrcweir memset(aCache.wordboundary, '\0', sizeof(sal_Int32)*(len + 2)); 268*cdf0e10cSrcweir 269*cdf0e10cSrcweir sal_Int32 i = 0; // loop variable 270*cdf0e10cSrcweir while (aCache.wordboundary[i] < aCache.length) { 271*cdf0e10cSrcweir len = 0; 272*cdf0e10cSrcweir // look the continuous white space as one word and cashe it 273*cdf0e10cSrcweir while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + aCache.wordboundary[i] + len])) 274*cdf0e10cSrcweir len ++; 275*cdf0e10cSrcweir 276*cdf0e10cSrcweir if (len == 0) { 277*cdf0e10cSrcweir const sal_Unicode *str = text + wordBoundary.startPos + aCache.wordboundary[i]; 278*cdf0e10cSrcweir sal_Int32 slen = aCache.length - aCache.wordboundary[i]; 279*cdf0e10cSrcweir sal_Int16 type = 0, count = 0; 280*cdf0e10cSrcweir for (;len == 0 && slen > 0; str++, slen--) { 281*cdf0e10cSrcweir len = getLongestMatch(str, slen); 282*cdf0e10cSrcweir if (len == 0) { 283*cdf0e10cSrcweir if (!japaneseWordBreak) { 284*cdf0e10cSrcweir len = 1; 285*cdf0e10cSrcweir } else { 286*cdf0e10cSrcweir if (count == 0) 287*cdf0e10cSrcweir type = JapaneseCharType(*str); 288*cdf0e10cSrcweir else if (type != JapaneseCharType(*str)) 289*cdf0e10cSrcweir break; 290*cdf0e10cSrcweir count++; 291*cdf0e10cSrcweir } 292*cdf0e10cSrcweir } 293*cdf0e10cSrcweir } 294*cdf0e10cSrcweir if (count) { 295*cdf0e10cSrcweir aCache.wordboundary[i+1] = aCache.wordboundary[i] + count; 296*cdf0e10cSrcweir i++; 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 299*cdf0e10cSrcweir if (useCellBoundary) { 300*cdf0e10cSrcweir sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1]; 301*cdf0e10cSrcweir if (cBoundary > 0) 302*cdf0e10cSrcweir aCache.wordboundary[i] = cBoundary - wordBoundary.startPos; 303*cdf0e10cSrcweir } 304*cdf0e10cSrcweir #endif 305*cdf0e10cSrcweir } 306*cdf0e10cSrcweir } 307*cdf0e10cSrcweir 308*cdf0e10cSrcweir if (len) { 309*cdf0e10cSrcweir aCache.wordboundary[i+1] = aCache.wordboundary[i] + len; 310*cdf0e10cSrcweir i++; 311*cdf0e10cSrcweir 312*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 313*cdf0e10cSrcweir if (useCellBoundary) { 314*cdf0e10cSrcweir sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1]; 315*cdf0e10cSrcweir if (cBoundary > 0) 316*cdf0e10cSrcweir aCache.wordboundary[i] = cBoundary - wordBoundary.startPos; 317*cdf0e10cSrcweir } 318*cdf0e10cSrcweir #endif 319*cdf0e10cSrcweir } 320*cdf0e10cSrcweir } 321*cdf0e10cSrcweir aCache.wordboundary[i + 1] = aCache.length + 1; 322*cdf0e10cSrcweir 323*cdf0e10cSrcweir return aCache; 324*cdf0e10cSrcweir } 325*cdf0e10cSrcweir 326*cdf0e10cSrcweir Boundary xdictionary::previousWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType) 327*cdf0e10cSrcweir { 328*cdf0e10cSrcweir // looking for the first non-whitespace character from anyPos 329*cdf0e10cSrcweir sal_uInt32 ch = rText.iterateCodePoints(&anyPos, -1); 330*cdf0e10cSrcweir 331*cdf0e10cSrcweir while (anyPos > 0 && u_isWhitespace(ch)) ch = rText.iterateCodePoints(&anyPos, -1); 332*cdf0e10cSrcweir 333*cdf0e10cSrcweir return getWordBoundary(rText, anyPos, wordType, true); 334*cdf0e10cSrcweir } 335*cdf0e10cSrcweir 336*cdf0e10cSrcweir Boundary xdictionary::nextWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType) 337*cdf0e10cSrcweir { 338*cdf0e10cSrcweir boundary = getWordBoundary(rText, anyPos, wordType, true); 339*cdf0e10cSrcweir anyPos = boundary.endPos; 340*cdf0e10cSrcweir if (anyPos < rText.getLength()) { 341*cdf0e10cSrcweir // looknig for the first non-whitespace character from anyPos 342*cdf0e10cSrcweir sal_uInt32 ch = rText.iterateCodePoints(&anyPos, 1); 343*cdf0e10cSrcweir while (u_isWhitespace(ch)) ch=rText.iterateCodePoints(&anyPos, 1); 344*cdf0e10cSrcweir rText.iterateCodePoints(&anyPos, -1); 345*cdf0e10cSrcweir } 346*cdf0e10cSrcweir 347*cdf0e10cSrcweir return getWordBoundary(rText, anyPos, wordType, true); 348*cdf0e10cSrcweir } 349*cdf0e10cSrcweir 350*cdf0e10cSrcweir Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType, sal_Bool bDirection) 351*cdf0e10cSrcweir { 352*cdf0e10cSrcweir const sal_Unicode *text=rText.getStr(); 353*cdf0e10cSrcweir sal_Int32 len=rText.getLength(); 354*cdf0e10cSrcweir if (anyPos >= len || anyPos < 0) { 355*cdf0e10cSrcweir boundary.startPos = boundary.endPos = anyPos < 0 ? 0 : len; 356*cdf0e10cSrcweir } else if (seekSegment(rText, anyPos, boundary)) { // character in dict 357*cdf0e10cSrcweir WordBreakCache& aCache = getCache(text, boundary); 358*cdf0e10cSrcweir sal_Int32 i = 0; 359*cdf0e10cSrcweir 360*cdf0e10cSrcweir while (aCache.wordboundary[i] <= anyPos - boundary.startPos) i++; 361*cdf0e10cSrcweir 362*cdf0e10cSrcweir sal_Int32 startPos = aCache.wordboundary[i - 1]; 363*cdf0e10cSrcweir // if bDirection is false 364*cdf0e10cSrcweir if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos)) 365*cdf0e10cSrcweir { 366*cdf0e10cSrcweir sal_Int32 indexUtf16 = anyPos-1; 367*cdf0e10cSrcweir sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1); 368*cdf0e10cSrcweir if (u_isWhitespace(ch)) 369*cdf0e10cSrcweir i--; 370*cdf0e10cSrcweir } 371*cdf0e10cSrcweir boundary.endPos = boundary.startPos; 372*cdf0e10cSrcweir rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]); 373*cdf0e10cSrcweir rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]); 374*cdf0e10cSrcweir } else { 375*cdf0e10cSrcweir boundary.startPos = anyPos; 376*cdf0e10cSrcweir if (anyPos < len) rText.iterateCodePoints(&anyPos, 1); 377*cdf0e10cSrcweir boundary.endPos = anyPos < len ? anyPos : len; 378*cdf0e10cSrcweir } 379*cdf0e10cSrcweir if (wordType == WordType::WORD_COUNT) { 380*cdf0e10cSrcweir // skip punctuation for word count. 381*cdf0e10cSrcweir while (boundary.endPos < len) 382*cdf0e10cSrcweir { 383*cdf0e10cSrcweir sal_Int32 indexUtf16 = boundary.endPos; 384*cdf0e10cSrcweir if (u_ispunct(rText.iterateCodePoints(&indexUtf16, 1))) 385*cdf0e10cSrcweir boundary.endPos = indexUtf16; 386*cdf0e10cSrcweir else 387*cdf0e10cSrcweir break; 388*cdf0e10cSrcweir } 389*cdf0e10cSrcweir } 390*cdf0e10cSrcweir 391*cdf0e10cSrcweir return boundary; 392*cdf0e10cSrcweir } 393*cdf0e10cSrcweir 394*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE 395*cdf0e10cSrcweir void xdictionary::setCellBoundary(sal_Int32* cellArray) 396*cdf0e10cSrcweir { 397*cdf0e10cSrcweir useCellBoundary = sal_True; 398*cdf0e10cSrcweir cellBoundary = cellArray; 399*cdf0e10cSrcweir } 400*cdf0e10cSrcweir #endif 401*cdf0e10cSrcweir 402*cdf0e10cSrcweir } } } } 403