1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir // xdictionary.cpp: implementation of the xdictionary class.
32*cdf0e10cSrcweir //
33*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
37*cdf0e10cSrcweir 
38*cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
39*cdf0e10cSrcweir #include <xdictionary.hxx>
40*cdf0e10cSrcweir #include <unicode/uchar.h>
41*cdf0e10cSrcweir #include <string.h>
42*cdf0e10cSrcweir #include <breakiteratorImpl.hxx>
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
45*cdf0e10cSrcweir // Construction/Destruction
46*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir using namespace rtl;
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
51*cdf0e10cSrcweir 
52*cdf0e10cSrcweir extern "C" { static void SAL_CALL thisModule() {} }
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir xdictionary::xdictionary(const sal_Char *lang) :
55*cdf0e10cSrcweir     existMark( NULL ),
56*cdf0e10cSrcweir     index1( NULL ),
57*cdf0e10cSrcweir     index2( NULL ),
58*cdf0e10cSrcweir     lenArray( NULL ),
59*cdf0e10cSrcweir     dataArea( NULL ),
60*cdf0e10cSrcweir     hModule( NULL ),
61*cdf0e10cSrcweir     boundary(),
62*cdf0e10cSrcweir     japaneseWordBreak( sal_False )
63*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
64*cdf0e10cSrcweir     // For CTL breakiterator, where the word boundary should not be inside cell.
65*cdf0e10cSrcweir     ,
66*cdf0e10cSrcweir     useCellBoundary( sal_False ),
67*cdf0e10cSrcweir     cellBoundary( NULL )
68*cdf0e10cSrcweir #endif
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir 	index1 = 0;
71*cdf0e10cSrcweir #ifdef SAL_DLLPREFIX
72*cdf0e10cSrcweir     OUStringBuffer aBuf( strlen(lang) + 7 + 6 );    // mostly "lib*.so" (with * == dict_zh)
73*cdf0e10cSrcweir     aBuf.appendAscii( SAL_DLLPREFIX );
74*cdf0e10cSrcweir #else
75*cdf0e10cSrcweir     OUStringBuffer aBuf( strlen(lang) + 7 + 4 );    // mostly "*.dll" (with * == dict_zh)
76*cdf0e10cSrcweir #endif
77*cdf0e10cSrcweir     aBuf.appendAscii( "dict_" ).appendAscii( lang ).appendAscii( SAL_DLLEXTENSION );
78*cdf0e10cSrcweir         hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
79*cdf0e10cSrcweir         if( hModule ) {
80*cdf0e10cSrcweir             sal_IntPtr (*func)();
81*cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getExistMark").pData );
82*cdf0e10cSrcweir             existMark = (sal_uInt8*) (*func)();
83*cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex1").pData );
84*cdf0e10cSrcweir             index1 = (sal_Int16*) (*func)();
85*cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex2").pData );
86*cdf0e10cSrcweir             index2 = (sal_Int32*) (*func)();
87*cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getLenArray").pData );
88*cdf0e10cSrcweir             lenArray = (sal_Int32*) (*func)();
89*cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getDataArea").pData );
90*cdf0e10cSrcweir             dataArea = (sal_Unicode*) (*func)();
91*cdf0e10cSrcweir         }
92*cdf0e10cSrcweir         else
93*cdf0e10cSrcweir 		{
94*cdf0e10cSrcweir             existMark = NULL;
95*cdf0e10cSrcweir 			index1 = NULL;
96*cdf0e10cSrcweir 			index2 = NULL;
97*cdf0e10cSrcweir 			lenArray = NULL;
98*cdf0e10cSrcweir 			dataArea = NULL;
99*cdf0e10cSrcweir 		}
100*cdf0e10cSrcweir 
101*cdf0e10cSrcweir 		for (sal_Int32 i = 0; i < CACHE_MAX; i++)
102*cdf0e10cSrcweir             cache[i].size = 0;
103*cdf0e10cSrcweir 
104*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
105*cdf0e10cSrcweir         useCellBoundary = sal_False;
106*cdf0e10cSrcweir         cellBoundary = NULL;
107*cdf0e10cSrcweir #endif
108*cdf0e10cSrcweir         japaneseWordBreak = sal_False;
109*cdf0e10cSrcweir }
110*cdf0e10cSrcweir 
111*cdf0e10cSrcweir xdictionary::~xdictionary() {
112*cdf0e10cSrcweir         osl_unloadModule(hModule);
113*cdf0e10cSrcweir         for (sal_Int32 i = 0; i < CACHE_MAX; i++) {
114*cdf0e10cSrcweir             if (cache[i].size > 0) {
115*cdf0e10cSrcweir                 delete cache[i].contents;
116*cdf0e10cSrcweir                 delete cache[i].wordboundary;
117*cdf0e10cSrcweir             }
118*cdf0e10cSrcweir         }
119*cdf0e10cSrcweir }
120*cdf0e10cSrcweir 
121*cdf0e10cSrcweir void xdictionary::setJapaneseWordBreak()
122*cdf0e10cSrcweir {
123*cdf0e10cSrcweir         japaneseWordBreak = sal_True;
124*cdf0e10cSrcweir }
125*cdf0e10cSrcweir 
126*cdf0e10cSrcweir sal_Bool xdictionary::exists(const sal_uInt32 c) {
127*cdf0e10cSrcweir         // 0x1FFF is the hardcoded limit in gendict for existMarks
128*cdf0e10cSrcweir         sal_Bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False;
129*cdf0e10cSrcweir         if (!exist && japaneseWordBreak)
130*cdf0e10cSrcweir             return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN;
131*cdf0e10cSrcweir         else
132*cdf0e10cSrcweir             return exist;
133*cdf0e10cSrcweir }
134*cdf0e10cSrcweir 
135*cdf0e10cSrcweir sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen) {
136*cdf0e10cSrcweir 
137*cdf0e10cSrcweir 		if ( !index1 ) return 0;
138*cdf0e10cSrcweir 
139*cdf0e10cSrcweir         sal_Int16 idx = index1[str[0] >> 8];
140*cdf0e10cSrcweir 
141*cdf0e10cSrcweir         if (idx == 0xFF) return 0;
142*cdf0e10cSrcweir 
143*cdf0e10cSrcweir         idx = (idx<<8) | (str[0]&0xff);
144*cdf0e10cSrcweir 
145*cdf0e10cSrcweir         sal_uInt32 begin = index2[idx], end = index2[idx+1];
146*cdf0e10cSrcweir 
147*cdf0e10cSrcweir         if (begin == 0) return 0;
148*cdf0e10cSrcweir 
149*cdf0e10cSrcweir         str++; sLen--; // first character is not stored in the dictionary
150*cdf0e10cSrcweir         for (sal_uInt32 i = end; i > begin; i--) {
151*cdf0e10cSrcweir             sal_Int32 len = lenArray[i] - lenArray[i - 1];
152*cdf0e10cSrcweir             if (sLen >= len) {
153*cdf0e10cSrcweir                 const sal_Unicode *dstr = dataArea + lenArray[i-1];
154*cdf0e10cSrcweir                 sal_Int32 pos = 0;
155*cdf0e10cSrcweir 
156*cdf0e10cSrcweir                 while (pos < len && dstr[pos] == str[pos]) { pos++; }
157*cdf0e10cSrcweir 
158*cdf0e10cSrcweir                 if (pos == len)
159*cdf0e10cSrcweir                     return len + 1;
160*cdf0e10cSrcweir             }
161*cdf0e10cSrcweir         }
162*cdf0e10cSrcweir         return 0;
163*cdf0e10cSrcweir }
164*cdf0e10cSrcweir 
165*cdf0e10cSrcweir 
166*cdf0e10cSrcweir /*
167*cdf0e10cSrcweir  * c-tor
168*cdf0e10cSrcweir  */
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir WordBreakCache::WordBreakCache() :
171*cdf0e10cSrcweir     length( 0 ),
172*cdf0e10cSrcweir     contents( NULL ),
173*cdf0e10cSrcweir     wordboundary( NULL ),
174*cdf0e10cSrcweir     size( 0 )
175*cdf0e10cSrcweir {
176*cdf0e10cSrcweir }
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir /*
179*cdf0e10cSrcweir  * Compare two unicode string,
180*cdf0e10cSrcweir  */
181*cdf0e10cSrcweir 
182*cdf0e10cSrcweir sal_Bool WordBreakCache::equals(const sal_Unicode* str, Boundary& boundary) {
183*cdf0e10cSrcweir         // Different length, different string.
184*cdf0e10cSrcweir         if (length != boundary.endPos - boundary.startPos) return sal_False;
185*cdf0e10cSrcweir 
186*cdf0e10cSrcweir         for (sal_Int32 i = 0; i < length; i++)
187*cdf0e10cSrcweir             if (contents[i] != str[i + boundary.startPos]) return sal_False;
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir         return sal_True;
190*cdf0e10cSrcweir }
191*cdf0e10cSrcweir 
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir /*
194*cdf0e10cSrcweir  * Retrieve the segment containing the character at pos.
195*cdf0e10cSrcweir  * @param pos : Position of the given character.
196*cdf0e10cSrcweir  * @return true if CJK.
197*cdf0e10cSrcweir  */
198*cdf0e10cSrcweir sal_Bool xdictionary::seekSegment(const rtl::OUString &rText, sal_Int32 pos,
199*cdf0e10cSrcweir 	Boundary& segBoundary)
200*cdf0e10cSrcweir {
201*cdf0e10cSrcweir     sal_Int32 indexUtf16;
202*cdf0e10cSrcweir     segBoundary.endPos = segBoundary.startPos = pos;
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir     indexUtf16 = pos;
205*cdf0e10cSrcweir     while (indexUtf16 > 0)
206*cdf0e10cSrcweir     {
207*cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, -1);
208*cdf0e10cSrcweir         if (u_isWhitespace(ch) || exists(ch))
209*cdf0e10cSrcweir             segBoundary.startPos = indexUtf16;
210*cdf0e10cSrcweir         else
211*cdf0e10cSrcweir             break;
212*cdf0e10cSrcweir     }
213*cdf0e10cSrcweir 
214*cdf0e10cSrcweir     indexUtf16 = pos;
215*cdf0e10cSrcweir     while (indexUtf16 < rText.getLength())
216*cdf0e10cSrcweir     {
217*cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1);
218*cdf0e10cSrcweir         if (u_isWhitespace(ch) || exists(ch))
219*cdf0e10cSrcweir             segBoundary.endPos = indexUtf16;
220*cdf0e10cSrcweir         else
221*cdf0e10cSrcweir             break;
222*cdf0e10cSrcweir     }
223*cdf0e10cSrcweir 
224*cdf0e10cSrcweir     indexUtf16 = segBoundary.startPos;
225*cdf0e10cSrcweir     rText.iterateCodePoints(&indexUtf16, 1);
226*cdf0e10cSrcweir     return segBoundary.endPos > indexUtf16;
227*cdf0e10cSrcweir }
228*cdf0e10cSrcweir 
229*cdf0e10cSrcweir #define KANJA       1
230*cdf0e10cSrcweir #define KATAKANA    2
231*cdf0e10cSrcweir #define HIRAKANA    3
232*cdf0e10cSrcweir 
233*cdf0e10cSrcweir static sal_Int16 JapaneseCharType(sal_Unicode c)
234*cdf0e10cSrcweir {
235*cdf0e10cSrcweir     if (0x3041 <= c && c <= 0x309e)
236*cdf0e10cSrcweir         return HIRAKANA;
237*cdf0e10cSrcweir     if ((0x30a1 <= c && c <= 0x30fe) || (0xff65 <= c && c <= 0xff9f))
238*cdf0e10cSrcweir         return KATAKANA;
239*cdf0e10cSrcweir     return KANJA;
240*cdf0e10cSrcweir }
241*cdf0e10cSrcweir 
242*cdf0e10cSrcweir WordBreakCache& xdictionary::getCache(const sal_Unicode *text, Boundary& wordBoundary)
243*cdf0e10cSrcweir {
244*cdf0e10cSrcweir 
245*cdf0e10cSrcweir         WordBreakCache& aCache = cache[text[0] & 0x1f];
246*cdf0e10cSrcweir 
247*cdf0e10cSrcweir         if (aCache.size != 0 && aCache.equals(text, wordBoundary))
248*cdf0e10cSrcweir             return aCache;
249*cdf0e10cSrcweir 
250*cdf0e10cSrcweir         sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos;
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir         if (aCache.size == 0 || len > aCache.size) {
253*cdf0e10cSrcweir             if (aCache.size != 0) {
254*cdf0e10cSrcweir                 delete aCache.contents;
255*cdf0e10cSrcweir                 delete aCache.wordboundary;
256*cdf0e10cSrcweir                 aCache.size = len;
257*cdf0e10cSrcweir             }
258*cdf0e10cSrcweir             else
259*cdf0e10cSrcweir                 aCache.size = len > DEFAULT_SIZE ? len : DEFAULT_SIZE;
260*cdf0e10cSrcweir             aCache.contents = new sal_Unicode[aCache.size + 1];
261*cdf0e10cSrcweir             aCache.wordboundary = new sal_Int32[aCache.size + 2];
262*cdf0e10cSrcweir         }
263*cdf0e10cSrcweir         aCache.length  = len;
264*cdf0e10cSrcweir         memcpy(aCache.contents, text + wordBoundary.startPos, len * sizeof(sal_Unicode));
265*cdf0e10cSrcweir         *(aCache.contents + len) = 0x0000;
266*cdf0e10cSrcweir         // reset the wordboundary in cache
267*cdf0e10cSrcweir         memset(aCache.wordboundary, '\0', sizeof(sal_Int32)*(len + 2));
268*cdf0e10cSrcweir 
269*cdf0e10cSrcweir         sal_Int32 i = 0;        // loop variable
270*cdf0e10cSrcweir         while (aCache.wordboundary[i] < aCache.length) {
271*cdf0e10cSrcweir             len = 0;
272*cdf0e10cSrcweir             // look the continuous white space as one word and cashe it
273*cdf0e10cSrcweir             while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + aCache.wordboundary[i] + len]))
274*cdf0e10cSrcweir                 len ++;
275*cdf0e10cSrcweir 
276*cdf0e10cSrcweir             if (len == 0) {
277*cdf0e10cSrcweir                 const sal_Unicode *str = text + wordBoundary.startPos + aCache.wordboundary[i];
278*cdf0e10cSrcweir                 sal_Int32 slen = aCache.length - aCache.wordboundary[i];
279*cdf0e10cSrcweir                 sal_Int16 type = 0, count = 0;
280*cdf0e10cSrcweir                 for (;len == 0 && slen > 0; str++, slen--) {
281*cdf0e10cSrcweir                     len = getLongestMatch(str, slen);
282*cdf0e10cSrcweir                     if (len == 0) {
283*cdf0e10cSrcweir                         if (!japaneseWordBreak) {
284*cdf0e10cSrcweir                             len = 1;
285*cdf0e10cSrcweir                         } else {
286*cdf0e10cSrcweir                             if (count == 0)
287*cdf0e10cSrcweir                                 type = JapaneseCharType(*str);
288*cdf0e10cSrcweir                             else if (type != JapaneseCharType(*str))
289*cdf0e10cSrcweir                                 break;
290*cdf0e10cSrcweir                             count++;
291*cdf0e10cSrcweir                         }
292*cdf0e10cSrcweir                     }
293*cdf0e10cSrcweir                 }
294*cdf0e10cSrcweir                 if (count) {
295*cdf0e10cSrcweir                     aCache.wordboundary[i+1] = aCache.wordboundary[i] + count;
296*cdf0e10cSrcweir                     i++;
297*cdf0e10cSrcweir 
298*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
299*cdf0e10cSrcweir                     if (useCellBoundary) {
300*cdf0e10cSrcweir                         sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
301*cdf0e10cSrcweir                         if (cBoundary > 0)
302*cdf0e10cSrcweir                             aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
303*cdf0e10cSrcweir                     }
304*cdf0e10cSrcweir #endif
305*cdf0e10cSrcweir                 }
306*cdf0e10cSrcweir             }
307*cdf0e10cSrcweir 
308*cdf0e10cSrcweir             if (len) {
309*cdf0e10cSrcweir                 aCache.wordboundary[i+1] = aCache.wordboundary[i] + len;
310*cdf0e10cSrcweir                 i++;
311*cdf0e10cSrcweir 
312*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
313*cdf0e10cSrcweir                 if (useCellBoundary) {
314*cdf0e10cSrcweir                     sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
315*cdf0e10cSrcweir                     if (cBoundary > 0)
316*cdf0e10cSrcweir                         aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
317*cdf0e10cSrcweir                 }
318*cdf0e10cSrcweir #endif
319*cdf0e10cSrcweir             }
320*cdf0e10cSrcweir         }
321*cdf0e10cSrcweir         aCache.wordboundary[i + 1] = aCache.length + 1;
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir         return aCache;
324*cdf0e10cSrcweir }
325*cdf0e10cSrcweir 
326*cdf0e10cSrcweir Boundary xdictionary::previousWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType)
327*cdf0e10cSrcweir {
328*cdf0e10cSrcweir         // looking for the first non-whitespace character from anyPos
329*cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&anyPos, -1);
330*cdf0e10cSrcweir 
331*cdf0e10cSrcweir         while (anyPos > 0 && u_isWhitespace(ch)) ch = rText.iterateCodePoints(&anyPos, -1);
332*cdf0e10cSrcweir 
333*cdf0e10cSrcweir         return getWordBoundary(rText, anyPos, wordType, true);
334*cdf0e10cSrcweir }
335*cdf0e10cSrcweir 
336*cdf0e10cSrcweir Boundary xdictionary::nextWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType)
337*cdf0e10cSrcweir {
338*cdf0e10cSrcweir         boundary = getWordBoundary(rText, anyPos, wordType, true);
339*cdf0e10cSrcweir         anyPos = boundary.endPos;
340*cdf0e10cSrcweir         if (anyPos < rText.getLength()) {
341*cdf0e10cSrcweir             // looknig for the first non-whitespace character from anyPos
342*cdf0e10cSrcweir             sal_uInt32 ch = rText.iterateCodePoints(&anyPos, 1);
343*cdf0e10cSrcweir             while (u_isWhitespace(ch)) ch=rText.iterateCodePoints(&anyPos, 1);
344*cdf0e10cSrcweir             rText.iterateCodePoints(&anyPos, -1);
345*cdf0e10cSrcweir         }
346*cdf0e10cSrcweir 
347*cdf0e10cSrcweir         return getWordBoundary(rText, anyPos, wordType, true);
348*cdf0e10cSrcweir }
349*cdf0e10cSrcweir 
350*cdf0e10cSrcweir Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType, sal_Bool bDirection)
351*cdf0e10cSrcweir {
352*cdf0e10cSrcweir         const sal_Unicode *text=rText.getStr();
353*cdf0e10cSrcweir         sal_Int32 len=rText.getLength();
354*cdf0e10cSrcweir         if (anyPos >= len || anyPos < 0) {
355*cdf0e10cSrcweir             boundary.startPos = boundary.endPos = anyPos < 0 ? 0 : len;
356*cdf0e10cSrcweir         } else if (seekSegment(rText, anyPos, boundary)) {          // character in dict
357*cdf0e10cSrcweir             WordBreakCache& aCache = getCache(text, boundary);
358*cdf0e10cSrcweir             sal_Int32 i = 0;
359*cdf0e10cSrcweir 
360*cdf0e10cSrcweir             while (aCache.wordboundary[i] <= anyPos - boundary.startPos) i++;
361*cdf0e10cSrcweir 
362*cdf0e10cSrcweir             sal_Int32 startPos = aCache.wordboundary[i - 1];
363*cdf0e10cSrcweir             // if bDirection is false
364*cdf0e10cSrcweir             if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos))
365*cdf0e10cSrcweir             {
366*cdf0e10cSrcweir                 sal_Int32 indexUtf16 = anyPos-1;
367*cdf0e10cSrcweir                 sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1);
368*cdf0e10cSrcweir                 if (u_isWhitespace(ch))
369*cdf0e10cSrcweir                     i--;
370*cdf0e10cSrcweir             }
371*cdf0e10cSrcweir             boundary.endPos = boundary.startPos;
372*cdf0e10cSrcweir             rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]);
373*cdf0e10cSrcweir             rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]);
374*cdf0e10cSrcweir         } else {
375*cdf0e10cSrcweir             boundary.startPos = anyPos;
376*cdf0e10cSrcweir             if (anyPos < len) rText.iterateCodePoints(&anyPos, 1);
377*cdf0e10cSrcweir             boundary.endPos = anyPos < len ? anyPos : len;
378*cdf0e10cSrcweir         }
379*cdf0e10cSrcweir         if (wordType == WordType::WORD_COUNT) {
380*cdf0e10cSrcweir             // skip punctuation for word count.
381*cdf0e10cSrcweir             while (boundary.endPos < len)
382*cdf0e10cSrcweir             {
383*cdf0e10cSrcweir                 sal_Int32 indexUtf16 = boundary.endPos;
384*cdf0e10cSrcweir                 if (u_ispunct(rText.iterateCodePoints(&indexUtf16, 1)))
385*cdf0e10cSrcweir                     boundary.endPos = indexUtf16;
386*cdf0e10cSrcweir                 else
387*cdf0e10cSrcweir                     break;
388*cdf0e10cSrcweir             }
389*cdf0e10cSrcweir         }
390*cdf0e10cSrcweir 
391*cdf0e10cSrcweir         return boundary;
392*cdf0e10cSrcweir }
393*cdf0e10cSrcweir 
394*cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
395*cdf0e10cSrcweir void xdictionary::setCellBoundary(sal_Int32* cellArray)
396*cdf0e10cSrcweir {
397*cdf0e10cSrcweir         useCellBoundary = sal_True;
398*cdf0e10cSrcweir         cellBoundary = cellArray;
399*cdf0e10cSrcweir }
400*cdf0e10cSrcweir #endif
401*cdf0e10cSrcweir 
402*cdf0e10cSrcweir } } } }
403