1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir // xdictionary.cpp: implementation of the xdictionary class.
28cdf0e10cSrcweir //
29cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
30cdf0e10cSrcweir 
31cdf0e10cSrcweir 
32cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
33cdf0e10cSrcweir 
34cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
35cdf0e10cSrcweir #include <xdictionary.hxx>
36cdf0e10cSrcweir #include <unicode/uchar.h>
37cdf0e10cSrcweir #include <string.h>
38cdf0e10cSrcweir #include <breakiteratorImpl.hxx>
39cdf0e10cSrcweir 
40cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
41cdf0e10cSrcweir // Construction/Destruction
42cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
43cdf0e10cSrcweir 
44cdf0e10cSrcweir using namespace rtl;
45cdf0e10cSrcweir 
46cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
47cdf0e10cSrcweir 
thisModule()48cdf0e10cSrcweir extern "C" { static void SAL_CALL thisModule() {} }
49cdf0e10cSrcweir 
xdictionary(const sal_Char * lang)50cdf0e10cSrcweir xdictionary::xdictionary(const sal_Char *lang) :
51cdf0e10cSrcweir     existMark( NULL ),
52cdf0e10cSrcweir     index1( NULL ),
53cdf0e10cSrcweir     index2( NULL ),
54cdf0e10cSrcweir     lenArray( NULL ),
55cdf0e10cSrcweir     dataArea( NULL ),
56cdf0e10cSrcweir     hModule( NULL ),
57cdf0e10cSrcweir     boundary(),
58cdf0e10cSrcweir     japaneseWordBreak( sal_False )
59cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
60cdf0e10cSrcweir     // For CTL breakiterator, where the word boundary should not be inside cell.
61cdf0e10cSrcweir     ,
62cdf0e10cSrcweir     useCellBoundary( sal_False ),
63cdf0e10cSrcweir     cellBoundary( NULL )
64cdf0e10cSrcweir #endif
65cdf0e10cSrcweir {
66cdf0e10cSrcweir 	index1 = 0;
67cdf0e10cSrcweir #ifdef SAL_DLLPREFIX
68cdf0e10cSrcweir     OUStringBuffer aBuf( strlen(lang) + 7 + 6 );    // mostly "lib*.so" (with * == dict_zh)
69cdf0e10cSrcweir     aBuf.appendAscii( SAL_DLLPREFIX );
70cdf0e10cSrcweir #else
71cdf0e10cSrcweir     OUStringBuffer aBuf( strlen(lang) + 7 + 4 );    // mostly "*.dll" (with * == dict_zh)
72cdf0e10cSrcweir #endif
73cdf0e10cSrcweir     aBuf.appendAscii( "dict_" ).appendAscii( lang ).appendAscii( SAL_DLLEXTENSION );
74cdf0e10cSrcweir         hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
75cdf0e10cSrcweir         if( hModule ) {
76cdf0e10cSrcweir             sal_IntPtr (*func)();
77cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getExistMark").pData );
78cdf0e10cSrcweir             existMark = (sal_uInt8*) (*func)();
79cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex1").pData );
80cdf0e10cSrcweir             index1 = (sal_Int16*) (*func)();
81cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex2").pData );
82cdf0e10cSrcweir             index2 = (sal_Int32*) (*func)();
83cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getLenArray").pData );
84cdf0e10cSrcweir             lenArray = (sal_Int32*) (*func)();
85cdf0e10cSrcweir             func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getDataArea").pData );
86cdf0e10cSrcweir             dataArea = (sal_Unicode*) (*func)();
87cdf0e10cSrcweir         }
88cdf0e10cSrcweir         else
89cdf0e10cSrcweir 		{
90cdf0e10cSrcweir             existMark = NULL;
91cdf0e10cSrcweir 			index1 = NULL;
92cdf0e10cSrcweir 			index2 = NULL;
93cdf0e10cSrcweir 			lenArray = NULL;
94cdf0e10cSrcweir 			dataArea = NULL;
95cdf0e10cSrcweir 		}
96cdf0e10cSrcweir 
97cdf0e10cSrcweir 		for (sal_Int32 i = 0; i < CACHE_MAX; i++)
98cdf0e10cSrcweir             cache[i].size = 0;
99cdf0e10cSrcweir 
100cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
101cdf0e10cSrcweir         useCellBoundary = sal_False;
102cdf0e10cSrcweir         cellBoundary = NULL;
103cdf0e10cSrcweir #endif
104cdf0e10cSrcweir         japaneseWordBreak = sal_False;
105cdf0e10cSrcweir }
106cdf0e10cSrcweir 
~xdictionary()107cdf0e10cSrcweir xdictionary::~xdictionary() {
108cdf0e10cSrcweir         osl_unloadModule(hModule);
109cdf0e10cSrcweir         for (sal_Int32 i = 0; i < CACHE_MAX; i++) {
110cdf0e10cSrcweir             if (cache[i].size > 0) {
111cdf0e10cSrcweir                 delete cache[i].contents;
112cdf0e10cSrcweir                 delete cache[i].wordboundary;
113cdf0e10cSrcweir             }
114cdf0e10cSrcweir         }
115cdf0e10cSrcweir }
116cdf0e10cSrcweir 
setJapaneseWordBreak()117cdf0e10cSrcweir void xdictionary::setJapaneseWordBreak()
118cdf0e10cSrcweir {
119cdf0e10cSrcweir         japaneseWordBreak = sal_True;
120cdf0e10cSrcweir }
121cdf0e10cSrcweir 
exists(const sal_uInt32 c)122cdf0e10cSrcweir sal_Bool xdictionary::exists(const sal_uInt32 c) {
123cdf0e10cSrcweir         // 0x1FFF is the hardcoded limit in gendict for existMarks
124cdf0e10cSrcweir         sal_Bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False;
125cdf0e10cSrcweir         if (!exist && japaneseWordBreak)
126cdf0e10cSrcweir             return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN;
127cdf0e10cSrcweir         else
128cdf0e10cSrcweir             return exist;
129cdf0e10cSrcweir }
130cdf0e10cSrcweir 
getLongestMatch(const sal_Unicode * str,sal_Int32 sLen)131cdf0e10cSrcweir sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen) {
132cdf0e10cSrcweir 
133cdf0e10cSrcweir 		if ( !index1 ) return 0;
134cdf0e10cSrcweir 
135cdf0e10cSrcweir         sal_Int16 idx = index1[str[0] >> 8];
136cdf0e10cSrcweir 
137cdf0e10cSrcweir         if (idx == 0xFF) return 0;
138cdf0e10cSrcweir 
139cdf0e10cSrcweir         idx = (idx<<8) | (str[0]&0xff);
140cdf0e10cSrcweir 
141cdf0e10cSrcweir         sal_uInt32 begin = index2[idx], end = index2[idx+1];
142cdf0e10cSrcweir 
143cdf0e10cSrcweir         if (begin == 0) return 0;
144cdf0e10cSrcweir 
145cdf0e10cSrcweir         str++; sLen--; // first character is not stored in the dictionary
146cdf0e10cSrcweir         for (sal_uInt32 i = end; i > begin; i--) {
147cdf0e10cSrcweir             sal_Int32 len = lenArray[i] - lenArray[i - 1];
148cdf0e10cSrcweir             if (sLen >= len) {
149cdf0e10cSrcweir                 const sal_Unicode *dstr = dataArea + lenArray[i-1];
150cdf0e10cSrcweir                 sal_Int32 pos = 0;
151cdf0e10cSrcweir 
152cdf0e10cSrcweir                 while (pos < len && dstr[pos] == str[pos]) { pos++; }
153cdf0e10cSrcweir 
154cdf0e10cSrcweir                 if (pos == len)
155cdf0e10cSrcweir                     return len + 1;
156cdf0e10cSrcweir             }
157cdf0e10cSrcweir         }
158cdf0e10cSrcweir         return 0;
159cdf0e10cSrcweir }
160cdf0e10cSrcweir 
161cdf0e10cSrcweir 
162cdf0e10cSrcweir /*
163cdf0e10cSrcweir  * c-tor
164cdf0e10cSrcweir  */
165cdf0e10cSrcweir 
WordBreakCache()166cdf0e10cSrcweir WordBreakCache::WordBreakCache() :
167cdf0e10cSrcweir     length( 0 ),
168cdf0e10cSrcweir     contents( NULL ),
169cdf0e10cSrcweir     wordboundary( NULL ),
170cdf0e10cSrcweir     size( 0 )
171cdf0e10cSrcweir {
172cdf0e10cSrcweir }
173cdf0e10cSrcweir 
174cdf0e10cSrcweir /*
175cdf0e10cSrcweir  * Compare two unicode string,
176cdf0e10cSrcweir  */
177cdf0e10cSrcweir 
equals(const sal_Unicode * str,Boundary & boundary)178cdf0e10cSrcweir sal_Bool WordBreakCache::equals(const sal_Unicode* str, Boundary& boundary) {
179cdf0e10cSrcweir         // Different length, different string.
180cdf0e10cSrcweir         if (length != boundary.endPos - boundary.startPos) return sal_False;
181cdf0e10cSrcweir 
182cdf0e10cSrcweir         for (sal_Int32 i = 0; i < length; i++)
183cdf0e10cSrcweir             if (contents[i] != str[i + boundary.startPos]) return sal_False;
184cdf0e10cSrcweir 
185cdf0e10cSrcweir         return sal_True;
186cdf0e10cSrcweir }
187cdf0e10cSrcweir 
188cdf0e10cSrcweir 
189cdf0e10cSrcweir /*
190cdf0e10cSrcweir  * Retrieve the segment containing the character at pos.
191cdf0e10cSrcweir  * @param pos : Position of the given character.
192cdf0e10cSrcweir  * @return true if CJK.
193cdf0e10cSrcweir  */
seekSegment(const rtl::OUString & rText,sal_Int32 pos,Boundary & segBoundary)194cdf0e10cSrcweir sal_Bool xdictionary::seekSegment(const rtl::OUString &rText, sal_Int32 pos,
195cdf0e10cSrcweir 	Boundary& segBoundary)
196cdf0e10cSrcweir {
197cdf0e10cSrcweir     sal_Int32 indexUtf16;
198cdf0e10cSrcweir     segBoundary.endPos = segBoundary.startPos = pos;
199cdf0e10cSrcweir 
200cdf0e10cSrcweir     indexUtf16 = pos;
201cdf0e10cSrcweir     while (indexUtf16 > 0)
202cdf0e10cSrcweir     {
203cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, -1);
204cdf0e10cSrcweir         if (u_isWhitespace(ch) || exists(ch))
205cdf0e10cSrcweir             segBoundary.startPos = indexUtf16;
206cdf0e10cSrcweir         else
207cdf0e10cSrcweir             break;
208cdf0e10cSrcweir     }
209cdf0e10cSrcweir 
210cdf0e10cSrcweir     indexUtf16 = pos;
211cdf0e10cSrcweir     while (indexUtf16 < rText.getLength())
212cdf0e10cSrcweir     {
213cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1);
214cdf0e10cSrcweir         if (u_isWhitespace(ch) || exists(ch))
215cdf0e10cSrcweir             segBoundary.endPos = indexUtf16;
216cdf0e10cSrcweir         else
217cdf0e10cSrcweir             break;
218cdf0e10cSrcweir     }
219cdf0e10cSrcweir 
220cdf0e10cSrcweir     indexUtf16 = segBoundary.startPos;
221cdf0e10cSrcweir     rText.iterateCodePoints(&indexUtf16, 1);
222cdf0e10cSrcweir     return segBoundary.endPos > indexUtf16;
223cdf0e10cSrcweir }
224cdf0e10cSrcweir 
225cdf0e10cSrcweir #define KANJA       1
226cdf0e10cSrcweir #define KATAKANA    2
227cdf0e10cSrcweir #define HIRAKANA    3
228cdf0e10cSrcweir 
JapaneseCharType(sal_Unicode c)229cdf0e10cSrcweir static sal_Int16 JapaneseCharType(sal_Unicode c)
230cdf0e10cSrcweir {
231cdf0e10cSrcweir     if (0x3041 <= c && c <= 0x309e)
232cdf0e10cSrcweir         return HIRAKANA;
233cdf0e10cSrcweir     if ((0x30a1 <= c && c <= 0x30fe) || (0xff65 <= c && c <= 0xff9f))
234cdf0e10cSrcweir         return KATAKANA;
235cdf0e10cSrcweir     return KANJA;
236cdf0e10cSrcweir }
237cdf0e10cSrcweir 
getCache(const sal_Unicode * text,Boundary & wordBoundary)238cdf0e10cSrcweir WordBreakCache& xdictionary::getCache(const sal_Unicode *text, Boundary& wordBoundary)
239cdf0e10cSrcweir {
240cdf0e10cSrcweir 
241cdf0e10cSrcweir         WordBreakCache& aCache = cache[text[0] & 0x1f];
242cdf0e10cSrcweir 
243cdf0e10cSrcweir         if (aCache.size != 0 && aCache.equals(text, wordBoundary))
244cdf0e10cSrcweir             return aCache;
245cdf0e10cSrcweir 
246cdf0e10cSrcweir         sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos;
247cdf0e10cSrcweir 
248cdf0e10cSrcweir         if (aCache.size == 0 || len > aCache.size) {
249cdf0e10cSrcweir             if (aCache.size != 0) {
250cdf0e10cSrcweir                 delete aCache.contents;
251cdf0e10cSrcweir                 delete aCache.wordboundary;
252cdf0e10cSrcweir                 aCache.size = len;
253cdf0e10cSrcweir             }
254cdf0e10cSrcweir             else
255cdf0e10cSrcweir                 aCache.size = len > DEFAULT_SIZE ? len : DEFAULT_SIZE;
256cdf0e10cSrcweir             aCache.contents = new sal_Unicode[aCache.size + 1];
257cdf0e10cSrcweir             aCache.wordboundary = new sal_Int32[aCache.size + 2];
258cdf0e10cSrcweir         }
259cdf0e10cSrcweir         aCache.length  = len;
260cdf0e10cSrcweir         memcpy(aCache.contents, text + wordBoundary.startPos, len * sizeof(sal_Unicode));
261cdf0e10cSrcweir         *(aCache.contents + len) = 0x0000;
262cdf0e10cSrcweir         // reset the wordboundary in cache
263cdf0e10cSrcweir         memset(aCache.wordboundary, '\0', sizeof(sal_Int32)*(len + 2));
264cdf0e10cSrcweir 
265cdf0e10cSrcweir         sal_Int32 i = 0;        // loop variable
266cdf0e10cSrcweir         while (aCache.wordboundary[i] < aCache.length) {
267cdf0e10cSrcweir             len = 0;
268cdf0e10cSrcweir             // look the continuous white space as one word and cashe it
269cdf0e10cSrcweir             while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + aCache.wordboundary[i] + len]))
270cdf0e10cSrcweir                 len ++;
271cdf0e10cSrcweir 
272cdf0e10cSrcweir             if (len == 0) {
273cdf0e10cSrcweir                 const sal_Unicode *str = text + wordBoundary.startPos + aCache.wordboundary[i];
274cdf0e10cSrcweir                 sal_Int32 slen = aCache.length - aCache.wordboundary[i];
275cdf0e10cSrcweir                 sal_Int16 type = 0, count = 0;
276cdf0e10cSrcweir                 for (;len == 0 && slen > 0; str++, slen--) {
277cdf0e10cSrcweir                     len = getLongestMatch(str, slen);
278cdf0e10cSrcweir                     if (len == 0) {
279cdf0e10cSrcweir                         if (!japaneseWordBreak) {
280cdf0e10cSrcweir                             len = 1;
281cdf0e10cSrcweir                         } else {
282cdf0e10cSrcweir                             if (count == 0)
283cdf0e10cSrcweir                                 type = JapaneseCharType(*str);
284cdf0e10cSrcweir                             else if (type != JapaneseCharType(*str))
285cdf0e10cSrcweir                                 break;
286cdf0e10cSrcweir                             count++;
287cdf0e10cSrcweir                         }
288cdf0e10cSrcweir                     }
289cdf0e10cSrcweir                 }
290cdf0e10cSrcweir                 if (count) {
291cdf0e10cSrcweir                     aCache.wordboundary[i+1] = aCache.wordboundary[i] + count;
292cdf0e10cSrcweir                     i++;
293cdf0e10cSrcweir 
294cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
295cdf0e10cSrcweir                     if (useCellBoundary) {
296cdf0e10cSrcweir                         sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
297cdf0e10cSrcweir                         if (cBoundary > 0)
298cdf0e10cSrcweir                             aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
299cdf0e10cSrcweir                     }
300cdf0e10cSrcweir #endif
301cdf0e10cSrcweir                 }
302cdf0e10cSrcweir             }
303cdf0e10cSrcweir 
304cdf0e10cSrcweir             if (len) {
305cdf0e10cSrcweir                 aCache.wordboundary[i+1] = aCache.wordboundary[i] + len;
306cdf0e10cSrcweir                 i++;
307cdf0e10cSrcweir 
308cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
309cdf0e10cSrcweir                 if (useCellBoundary) {
310cdf0e10cSrcweir                     sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
311cdf0e10cSrcweir                     if (cBoundary > 0)
312cdf0e10cSrcweir                         aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
313cdf0e10cSrcweir                 }
314cdf0e10cSrcweir #endif
315cdf0e10cSrcweir             }
316cdf0e10cSrcweir         }
317cdf0e10cSrcweir         aCache.wordboundary[i + 1] = aCache.length + 1;
318cdf0e10cSrcweir 
319cdf0e10cSrcweir         return aCache;
320cdf0e10cSrcweir }
321cdf0e10cSrcweir 
previousWord(const OUString & rText,sal_Int32 anyPos,sal_Int16 wordType)322cdf0e10cSrcweir Boundary xdictionary::previousWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType)
323cdf0e10cSrcweir {
324cdf0e10cSrcweir         // looking for the first non-whitespace character from anyPos
325cdf0e10cSrcweir         sal_uInt32 ch = rText.iterateCodePoints(&anyPos, -1);
326cdf0e10cSrcweir 
327cdf0e10cSrcweir         while (anyPos > 0 && u_isWhitespace(ch)) ch = rText.iterateCodePoints(&anyPos, -1);
328cdf0e10cSrcweir 
329cdf0e10cSrcweir         return getWordBoundary(rText, anyPos, wordType, true);
330cdf0e10cSrcweir }
331cdf0e10cSrcweir 
nextWord(const OUString & rText,sal_Int32 anyPos,sal_Int16 wordType)332cdf0e10cSrcweir Boundary xdictionary::nextWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType)
333cdf0e10cSrcweir {
334cdf0e10cSrcweir         boundary = getWordBoundary(rText, anyPos, wordType, true);
335cdf0e10cSrcweir         anyPos = boundary.endPos;
336cdf0e10cSrcweir         if (anyPos < rText.getLength()) {
337cdf0e10cSrcweir             // looknig for the first non-whitespace character from anyPos
338cdf0e10cSrcweir             sal_uInt32 ch = rText.iterateCodePoints(&anyPos, 1);
339cdf0e10cSrcweir             while (u_isWhitespace(ch)) ch=rText.iterateCodePoints(&anyPos, 1);
340cdf0e10cSrcweir             rText.iterateCodePoints(&anyPos, -1);
341cdf0e10cSrcweir         }
342cdf0e10cSrcweir 
343cdf0e10cSrcweir         return getWordBoundary(rText, anyPos, wordType, true);
344cdf0e10cSrcweir }
345cdf0e10cSrcweir 
getWordBoundary(const OUString & rText,sal_Int32 anyPos,sal_Int16 wordType,sal_Bool bDirection)346cdf0e10cSrcweir Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType, sal_Bool bDirection)
347cdf0e10cSrcweir {
348cdf0e10cSrcweir         const sal_Unicode *text=rText.getStr();
349cdf0e10cSrcweir         sal_Int32 len=rText.getLength();
350cdf0e10cSrcweir         if (anyPos >= len || anyPos < 0) {
351cdf0e10cSrcweir             boundary.startPos = boundary.endPos = anyPos < 0 ? 0 : len;
352cdf0e10cSrcweir         } else if (seekSegment(rText, anyPos, boundary)) {          // character in dict
353cdf0e10cSrcweir             WordBreakCache& aCache = getCache(text, boundary);
354cdf0e10cSrcweir             sal_Int32 i = 0;
355cdf0e10cSrcweir 
356cdf0e10cSrcweir             while (aCache.wordboundary[i] <= anyPos - boundary.startPos) i++;
357cdf0e10cSrcweir 
358cdf0e10cSrcweir             sal_Int32 startPos = aCache.wordboundary[i - 1];
359cdf0e10cSrcweir             // if bDirection is false
360cdf0e10cSrcweir             if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos))
361cdf0e10cSrcweir             {
362cdf0e10cSrcweir                 sal_Int32 indexUtf16 = anyPos-1;
363cdf0e10cSrcweir                 sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1);
364cdf0e10cSrcweir                 if (u_isWhitespace(ch))
365cdf0e10cSrcweir                     i--;
366cdf0e10cSrcweir             }
367cdf0e10cSrcweir             boundary.endPos = boundary.startPos;
368cdf0e10cSrcweir             rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]);
369cdf0e10cSrcweir             rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]);
370cdf0e10cSrcweir         } else {
371cdf0e10cSrcweir             boundary.startPos = anyPos;
372cdf0e10cSrcweir             if (anyPos < len) rText.iterateCodePoints(&anyPos, 1);
373cdf0e10cSrcweir             boundary.endPos = anyPos < len ? anyPos : len;
374cdf0e10cSrcweir         }
375cdf0e10cSrcweir         if (wordType == WordType::WORD_COUNT) {
376cdf0e10cSrcweir             // skip punctuation for word count.
377cdf0e10cSrcweir             while (boundary.endPos < len)
378cdf0e10cSrcweir             {
379cdf0e10cSrcweir                 sal_Int32 indexUtf16 = boundary.endPos;
380cdf0e10cSrcweir                 if (u_ispunct(rText.iterateCodePoints(&indexUtf16, 1)))
381cdf0e10cSrcweir                     boundary.endPos = indexUtf16;
382cdf0e10cSrcweir                 else
383cdf0e10cSrcweir                     break;
384cdf0e10cSrcweir             }
385cdf0e10cSrcweir         }
386cdf0e10cSrcweir 
387cdf0e10cSrcweir         return boundary;
388cdf0e10cSrcweir }
389cdf0e10cSrcweir 
390cdf0e10cSrcweir #if USE_CELL_BOUNDARY_CODE
setCellBoundary(sal_Int32 * cellArray)391cdf0e10cSrcweir void xdictionary::setCellBoundary(sal_Int32* cellArray)
392cdf0e10cSrcweir {
393cdf0e10cSrcweir         useCellBoundary = sal_True;
394cdf0e10cSrcweir         cellBoundary = cellArray;
395cdf0e10cSrcweir }
396cdf0e10cSrcweir #endif
397cdf0e10cSrcweir 
398cdf0e10cSrcweir } } } }
399