1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir #include <breakiterator_unicode.hxx>
31*cdf0e10cSrcweir #include <localedata.hxx>
32*cdf0e10cSrcweir #include <unicode/uchar.h>
33*cdf0e10cSrcweir #include <unicode/locid.h>
34*cdf0e10cSrcweir #include <unicode/rbbi.h>
35*cdf0e10cSrcweir #include <unicode/udata.h>
36*cdf0e10cSrcweir #include <rtl/strbuf.hxx>
37*cdf0e10cSrcweir #include <rtl/ustring.hxx>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir U_CDECL_BEGIN
40*cdf0e10cSrcweir extern const char OpenOffice_dat[];
41*cdf0e10cSrcweir U_CDECL_END
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir using namespace ::com::sun::star;
44*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
45*cdf0e10cSrcweir using namespace ::rtl;
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
48*cdf0e10cSrcweir 
49*cdf0e10cSrcweir #define ERROR ::com::sun::star::uno::RuntimeException()
50*cdf0e10cSrcweir 
51*cdf0e10cSrcweir //#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode";
52*cdf0e10cSrcweir 
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir BreakIterator_Unicode::BreakIterator_Unicode() :
55*cdf0e10cSrcweir     cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ),    // implementation name
56*cdf0e10cSrcweir     wordRule( "word" ),
57*cdf0e10cSrcweir     lineRule( "line" ),
58*cdf0e10cSrcweir     result(),
59*cdf0e10cSrcweir     character(),
60*cdf0e10cSrcweir     word(),
61*cdf0e10cSrcweir     sentence(),
62*cdf0e10cSrcweir     line(),
63*cdf0e10cSrcweir     icuBI( NULL ),
64*cdf0e10cSrcweir     aLocale(),
65*cdf0e10cSrcweir     aBreakType(),
66*cdf0e10cSrcweir     aWordType()
67*cdf0e10cSrcweir {
68*cdf0e10cSrcweir }
69*cdf0e10cSrcweir 
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir BreakIterator_Unicode::~BreakIterator_Unicode()
72*cdf0e10cSrcweir {
73*cdf0e10cSrcweir         if (icuBI && icuBI->aBreakIterator) {
74*cdf0e10cSrcweir             delete icuBI->aBreakIterator;
75*cdf0e10cSrcweir             icuBI->aBreakIterator=NULL;
76*cdf0e10cSrcweir         }
77*cdf0e10cSrcweir         if (character.aBreakIterator) delete character.aBreakIterator;
78*cdf0e10cSrcweir         if (word.aBreakIterator) delete word.aBreakIterator;
79*cdf0e10cSrcweir         if (sentence.aBreakIterator) delete sentence.aBreakIterator;
80*cdf0e10cSrcweir         if (line.aBreakIterator) delete line.aBreakIterator;
81*cdf0e10cSrcweir }
82*cdf0e10cSrcweir 
83*cdf0e10cSrcweir /*
84*cdf0e10cSrcweir 	Wrapper class to provide public access to the RuleBasedBreakIterator's
85*cdf0e10cSrcweir 	setbreakType method.
86*cdf0e10cSrcweir */
87*cdf0e10cSrcweir class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
88*cdf0e10cSrcweir 	public:
89*cdf0e10cSrcweir 		inline void publicSetBreakType(int32_t type) {
90*cdf0e10cSrcweir 			setBreakType(type);
91*cdf0e10cSrcweir 		};
92*cdf0e10cSrcweir 		OOoRuleBasedBreakIterator(UDataMemory* image,
93*cdf0e10cSrcweir 				UErrorCode &status) :
94*cdf0e10cSrcweir 			RuleBasedBreakIterator(image, status) { };
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir };
97*cdf0e10cSrcweir 
98*cdf0e10cSrcweir // loading ICU breakiterator on demand.
99*cdf0e10cSrcweir void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
100*cdf0e10cSrcweir         sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
101*cdf0e10cSrcweir {
102*cdf0e10cSrcweir     sal_Bool newBreak = sal_False;
103*cdf0e10cSrcweir     UErrorCode status = U_ZERO_ERROR;
104*cdf0e10cSrcweir     sal_Int16 breakType = 0;
105*cdf0e10cSrcweir     switch (rBreakType) {
106*cdf0e10cSrcweir         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
107*cdf0e10cSrcweir         case LOAD_WORD_BREAKITERATOR: icuBI=&word;
108*cdf0e10cSrcweir             switch (rWordType) {
109*cdf0e10cSrcweir                 case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
110*cdf0e10cSrcweir                 case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
111*cdf0e10cSrcweir                 case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
112*cdf0e10cSrcweir             }
113*cdf0e10cSrcweir             break;
114*cdf0e10cSrcweir         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
115*cdf0e10cSrcweir         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
116*cdf0e10cSrcweir     }
117*cdf0e10cSrcweir     if (!icuBI->aBreakIterator || rWordType != aWordType ||
118*cdf0e10cSrcweir             rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
119*cdf0e10cSrcweir             rLocale.Variant != aLocale.Variant) {
120*cdf0e10cSrcweir         if (icuBI->aBreakIterator) {
121*cdf0e10cSrcweir             delete icuBI->aBreakIterator;
122*cdf0e10cSrcweir             icuBI->aBreakIterator=NULL;
123*cdf0e10cSrcweir         }
124*cdf0e10cSrcweir         if (rule) {
125*cdf0e10cSrcweir             uno::Sequence< OUString > breakRules = LocaleData().getBreakIteratorRules(rLocale);
126*cdf0e10cSrcweir 
127*cdf0e10cSrcweir             status = U_ZERO_ERROR;
128*cdf0e10cSrcweir             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
129*cdf0e10cSrcweir             if ( !U_SUCCESS(status) ) throw ERROR;
130*cdf0e10cSrcweir 
131*cdf0e10cSrcweir             OOoRuleBasedBreakIterator *rbi = NULL;
132*cdf0e10cSrcweir 
133*cdf0e10cSrcweir             if (breakRules.getLength() > breakType && breakRules[breakType].getLength() > 0) {
134*cdf0e10cSrcweir                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
135*cdf0e10cSrcweir                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
136*cdf0e10cSrcweir             } else {
137*cdf0e10cSrcweir                 status = U_ZERO_ERROR;
138*cdf0e10cSrcweir                 OStringBuffer aUDName(64);
139*cdf0e10cSrcweir                 aUDName.append(rule);
140*cdf0e10cSrcweir                 aUDName.append('_');
141*cdf0e10cSrcweir                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
142*cdf0e10cSrcweir                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
143*cdf0e10cSrcweir                 if( U_SUCCESS(status) )
144*cdf0e10cSrcweir                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
145*cdf0e10cSrcweir                 if (!U_SUCCESS(status) ) {
146*cdf0e10cSrcweir                     status = U_ZERO_ERROR;
147*cdf0e10cSrcweir                     pUData = udata_open("OpenOffice", "brk", rule, &status);
148*cdf0e10cSrcweir                     if( U_SUCCESS(status) )
149*cdf0e10cSrcweir                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
150*cdf0e10cSrcweir                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
151*cdf0e10cSrcweir                 }
152*cdf0e10cSrcweir             }
153*cdf0e10cSrcweir             if (rbi) {
154*cdf0e10cSrcweir                 switch (rBreakType) {
155*cdf0e10cSrcweir                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
156*cdf0e10cSrcweir                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
157*cdf0e10cSrcweir                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
158*cdf0e10cSrcweir                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
159*cdf0e10cSrcweir                 }
160*cdf0e10cSrcweir                 icuBI->aBreakIterator = rbi;
161*cdf0e10cSrcweir             }
162*cdf0e10cSrcweir         }
163*cdf0e10cSrcweir 
164*cdf0e10cSrcweir         if (!icuBI->aBreakIterator) {
165*cdf0e10cSrcweir             icu::Locale icuLocale(
166*cdf0e10cSrcweir                     OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(),
167*cdf0e10cSrcweir                     OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(),
168*cdf0e10cSrcweir                     OUStringToOString(rLocale.Variant, RTL_TEXTENCODING_ASCII_US).getStr());
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir             status = U_ZERO_ERROR;
171*cdf0e10cSrcweir             switch (rBreakType) {
172*cdf0e10cSrcweir                 case LOAD_CHARACTER_BREAKITERATOR:
173*cdf0e10cSrcweir                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
174*cdf0e10cSrcweir                     break;
175*cdf0e10cSrcweir                 case LOAD_WORD_BREAKITERATOR:
176*cdf0e10cSrcweir                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
177*cdf0e10cSrcweir                     break;
178*cdf0e10cSrcweir                 case LOAD_SENTENCE_BREAKITERATOR:
179*cdf0e10cSrcweir                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
180*cdf0e10cSrcweir                     break;
181*cdf0e10cSrcweir                 case LOAD_LINE_BREAKITERATOR:
182*cdf0e10cSrcweir                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
183*cdf0e10cSrcweir                     break;
184*cdf0e10cSrcweir             }
185*cdf0e10cSrcweir             if ( !U_SUCCESS(status) ) {
186*cdf0e10cSrcweir                 icuBI->aBreakIterator=NULL;
187*cdf0e10cSrcweir                 throw ERROR;
188*cdf0e10cSrcweir             }
189*cdf0e10cSrcweir         }
190*cdf0e10cSrcweir         if (icuBI->aBreakIterator) {
191*cdf0e10cSrcweir             aLocale=rLocale;
192*cdf0e10cSrcweir             aWordType=rWordType;
193*cdf0e10cSrcweir             aBreakType=rBreakType;
194*cdf0e10cSrcweir             newBreak=sal_True;
195*cdf0e10cSrcweir         } else {
196*cdf0e10cSrcweir             throw ERROR;
197*cdf0e10cSrcweir         }
198*cdf0e10cSrcweir     }
199*cdf0e10cSrcweir 
200*cdf0e10cSrcweir     if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) {	// UChar != sal_Unicode in MinGW
201*cdf0e10cSrcweir         icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength());
202*cdf0e10cSrcweir         icuBI->aBreakIterator->setText(icuBI->aICUText);
203*cdf0e10cSrcweir     }
204*cdf0e10cSrcweir }
205*cdf0e10cSrcweir 
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
208*cdf0e10cSrcweir         sal_Int32 nStartPos, const lang::Locale &rLocale,
209*cdf0e10cSrcweir         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
210*cdf0e10cSrcweir         throw(uno::RuntimeException)
211*cdf0e10cSrcweir {
212*cdf0e10cSrcweir         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
213*cdf0e10cSrcweir             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
214*cdf0e10cSrcweir             for (nDone = 0; nDone < nCount; nDone++) {
215*cdf0e10cSrcweir                 nStartPos = character.aBreakIterator->following(nStartPos);
216*cdf0e10cSrcweir                 if (nStartPos == BreakIterator::DONE)
217*cdf0e10cSrcweir                     return Text.getLength();
218*cdf0e10cSrcweir             }
219*cdf0e10cSrcweir         } else { // for CHARACTER mode
220*cdf0e10cSrcweir             for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
221*cdf0e10cSrcweir                 Text.iterateCodePoints(&nStartPos, 1);
222*cdf0e10cSrcweir         }
223*cdf0e10cSrcweir         return nStartPos;
224*cdf0e10cSrcweir }
225*cdf0e10cSrcweir 
226*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
227*cdf0e10cSrcweir         sal_Int32 nStartPos, const lang::Locale& rLocale,
228*cdf0e10cSrcweir         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
229*cdf0e10cSrcweir         throw(uno::RuntimeException)
230*cdf0e10cSrcweir {
231*cdf0e10cSrcweir         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
232*cdf0e10cSrcweir             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
233*cdf0e10cSrcweir             for (nDone = 0; nDone < nCount; nDone++) {
234*cdf0e10cSrcweir                 nStartPos = character.aBreakIterator->preceding(nStartPos);
235*cdf0e10cSrcweir                 if (nStartPos == BreakIterator::DONE)
236*cdf0e10cSrcweir                     return 0;
237*cdf0e10cSrcweir             }
238*cdf0e10cSrcweir         } else { // for BS to delete one char and CHARACTER mode.
239*cdf0e10cSrcweir             for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
240*cdf0e10cSrcweir                 Text.iterateCodePoints(&nStartPos, -1);
241*cdf0e10cSrcweir         }
242*cdf0e10cSrcweir         return nStartPos;
243*cdf0e10cSrcweir }
244*cdf0e10cSrcweir 
245*cdf0e10cSrcweir 
246*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
247*cdf0e10cSrcweir     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException)
248*cdf0e10cSrcweir {
249*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
250*cdf0e10cSrcweir 
251*cdf0e10cSrcweir         result.startPos = word.aBreakIterator->following(nStartPos);
252*cdf0e10cSrcweir         if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
253*cdf0e10cSrcweir             result.endPos = result.startPos;
254*cdf0e10cSrcweir         else {
255*cdf0e10cSrcweir             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
256*cdf0e10cSrcweir                     rWordType == WordType::DICTIONARY_WORD ) &&
257*cdf0e10cSrcweir                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
258*cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->following(result.startPos);
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir             result.endPos = word.aBreakIterator->following(result.startPos);
261*cdf0e10cSrcweir             if(result.endPos == BreakIterator::DONE)
262*cdf0e10cSrcweir                 result.endPos = result.startPos;
263*cdf0e10cSrcweir         }
264*cdf0e10cSrcweir         return result;
265*cdf0e10cSrcweir }
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir 
268*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
269*cdf0e10cSrcweir         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException)
270*cdf0e10cSrcweir {
271*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir         result.startPos = word.aBreakIterator->preceding(nStartPos);
274*cdf0e10cSrcweir         if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
275*cdf0e10cSrcweir             result.endPos = result.startPos;
276*cdf0e10cSrcweir         else {
277*cdf0e10cSrcweir             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
278*cdf0e10cSrcweir                     rWordType == WordType::DICTIONARY_WORD) &&
279*cdf0e10cSrcweir                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
280*cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(result.startPos);
281*cdf0e10cSrcweir 
282*cdf0e10cSrcweir             result.endPos = word.aBreakIterator->following(result.startPos);
283*cdf0e10cSrcweir             if(result.endPos == BreakIterator::DONE)
284*cdf0e10cSrcweir                 result.endPos = result.startPos;
285*cdf0e10cSrcweir         }
286*cdf0e10cSrcweir         return result;
287*cdf0e10cSrcweir }
288*cdf0e10cSrcweir 
289*cdf0e10cSrcweir 
290*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
291*cdf0e10cSrcweir         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException)
292*cdf0e10cSrcweir {
293*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
294*cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
295*cdf0e10cSrcweir 
296*cdf0e10cSrcweir         if(word.aBreakIterator->isBoundary(nPos)) {
297*cdf0e10cSrcweir             result.startPos = result.endPos = nPos;
298*cdf0e10cSrcweir             if((bDirection || nPos == 0) && nPos < len) //forward
299*cdf0e10cSrcweir                 result.endPos = word.aBreakIterator->following(nPos);
300*cdf0e10cSrcweir             else
301*cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(nPos);
302*cdf0e10cSrcweir         } else {
303*cdf0e10cSrcweir             if(nPos <= 0) {
304*cdf0e10cSrcweir                 result.startPos = 0;
305*cdf0e10cSrcweir                 result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
306*cdf0e10cSrcweir             } else if(nPos >= len) {
307*cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(len);
308*cdf0e10cSrcweir                 result.endPos = len;
309*cdf0e10cSrcweir             } else {
310*cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(nPos);
311*cdf0e10cSrcweir                 result.endPos = word.aBreakIterator->following(nPos);
312*cdf0e10cSrcweir             }
313*cdf0e10cSrcweir         }
314*cdf0e10cSrcweir         if (result.startPos == BreakIterator::DONE)
315*cdf0e10cSrcweir             result.startPos = result.endPos;
316*cdf0e10cSrcweir         else if (result.endPos == BreakIterator::DONE)
317*cdf0e10cSrcweir             result.endPos = result.startPos;
318*cdf0e10cSrcweir 
319*cdf0e10cSrcweir         return result;
320*cdf0e10cSrcweir }
321*cdf0e10cSrcweir 
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
324*cdf0e10cSrcweir         const lang::Locale &rLocale ) throw(uno::RuntimeException)
325*cdf0e10cSrcweir {
326*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
327*cdf0e10cSrcweir 
328*cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
329*cdf0e10cSrcweir         if (len > 0 && nStartPos == len)
330*cdf0e10cSrcweir             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
331*cdf0e10cSrcweir         if (!sentence.aBreakIterator->isBoundary(nStartPos))
332*cdf0e10cSrcweir             nStartPos = sentence.aBreakIterator->preceding(nStartPos);
333*cdf0e10cSrcweir 
334*cdf0e10cSrcweir         // skip preceding space.
335*cdf0e10cSrcweir         sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
336*cdf0e10cSrcweir         while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
337*cdf0e10cSrcweir 		Text.iterateCodePoints(&nStartPos, -1);
338*cdf0e10cSrcweir 
339*cdf0e10cSrcweir         return nStartPos;
340*cdf0e10cSrcweir }
341*cdf0e10cSrcweir 
342*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
343*cdf0e10cSrcweir         const lang::Locale &rLocale ) throw(uno::RuntimeException)
344*cdf0e10cSrcweir {
345*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
346*cdf0e10cSrcweir 
347*cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
348*cdf0e10cSrcweir         if (len > 0 && nStartPos == len)
349*cdf0e10cSrcweir             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
350*cdf0e10cSrcweir         nStartPos = sentence.aBreakIterator->following(nStartPos);
351*cdf0e10cSrcweir 
352*cdf0e10cSrcweir         sal_Int32 nPos=nStartPos;
353*cdf0e10cSrcweir         while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
354*cdf0e10cSrcweir 
355*cdf0e10cSrcweir         return nStartPos;
356*cdf0e10cSrcweir }
357*cdf0e10cSrcweir 
358*cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
359*cdf0e10cSrcweir         const OUString& Text, sal_Int32 nStartPos,
360*cdf0e10cSrcweir         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
361*cdf0e10cSrcweir         const LineBreakHyphenationOptions& hOptions,
362*cdf0e10cSrcweir         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException)
363*cdf0e10cSrcweir {
364*cdf0e10cSrcweir         LineBreakResults lbr;
365*cdf0e10cSrcweir 
366*cdf0e10cSrcweir         if (nStartPos >= Text.getLength()) {
367*cdf0e10cSrcweir             lbr.breakIndex = Text.getLength();
368*cdf0e10cSrcweir             lbr.breakType = BreakType::WORDBOUNDARY;
369*cdf0e10cSrcweir             return lbr;
370*cdf0e10cSrcweir         }
371*cdf0e10cSrcweir 
372*cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
373*cdf0e10cSrcweir 
374*cdf0e10cSrcweir         sal_Bool GlueSpace=sal_True;
375*cdf0e10cSrcweir         while (GlueSpace) {
376*cdf0e10cSrcweir             if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
377*cdf0e10cSrcweir                 lbr.breakIndex = nStartPos;
378*cdf0e10cSrcweir                 lbr.breakType = BreakType::WORDBOUNDARY;
379*cdf0e10cSrcweir             } else if (hOptions.rHyphenator.is()) { //Hyphenation break
380*cdf0e10cSrcweir                 Boundary wBoundary = getWordBoundary( Text, nStartPos, rLocale,
381*cdf0e10cSrcweir                                                 WordType::DICTIONARY_WORD, false);
382*cdf0e10cSrcweir                 uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
383*cdf0e10cSrcweir                 aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
384*cdf0e10cSrcweir                     wBoundary.endPos - wBoundary.startPos), rLocale,
385*cdf0e10cSrcweir                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos), hOptions.aHyphenationOptions);
386*cdf0e10cSrcweir                 if (aHyphenatedWord.is()) {
387*cdf0e10cSrcweir                     lbr.rHyphenatedWord = aHyphenatedWord;
388*cdf0e10cSrcweir                     if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
389*cdf0e10cSrcweir                         lbr.breakIndex = -1;
390*cdf0e10cSrcweir                     else
391*cdf0e10cSrcweir                         lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
392*cdf0e10cSrcweir                     lbr.breakType = BreakType::HYPHENATION;
393*cdf0e10cSrcweir                 } else {
394*cdf0e10cSrcweir                     lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
395*cdf0e10cSrcweir                     lbr.breakType = BreakType::WORDBOUNDARY;;
396*cdf0e10cSrcweir                 }
397*cdf0e10cSrcweir             } else { //word boundary break
398*cdf0e10cSrcweir                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
399*cdf0e10cSrcweir                 lbr.breakType = BreakType::WORDBOUNDARY;
400*cdf0e10cSrcweir             }
401*cdf0e10cSrcweir 
402*cdf0e10cSrcweir #define WJ 0x2060   // Word Joiner
403*cdf0e10cSrcweir             GlueSpace=sal_False;
404*cdf0e10cSrcweir             if (lbr.breakType == BreakType::WORDBOUNDARY) {
405*cdf0e10cSrcweir                 nStartPos = lbr.breakIndex;
406*cdf0e10cSrcweir                 if (Text[nStartPos--] == WJ)
407*cdf0e10cSrcweir                     GlueSpace=sal_True;
408*cdf0e10cSrcweir                 while (nStartPos >= 0 &&
409*cdf0e10cSrcweir                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
410*cdf0e10cSrcweir                     if (Text[nStartPos--] == WJ)
411*cdf0e10cSrcweir                         GlueSpace=sal_True;
412*cdf0e10cSrcweir                 }
413*cdf0e10cSrcweir                 if (GlueSpace && nStartPos < 0)  {
414*cdf0e10cSrcweir                     lbr.breakIndex = 0;
415*cdf0e10cSrcweir                     break;
416*cdf0e10cSrcweir                 }
417*cdf0e10cSrcweir             }
418*cdf0e10cSrcweir         }
419*cdf0e10cSrcweir 
420*cdf0e10cSrcweir         return lbr;
421*cdf0e10cSrcweir }
422*cdf0e10cSrcweir 
423*cdf0e10cSrcweir 
424*cdf0e10cSrcweir 
425*cdf0e10cSrcweir OUString SAL_CALL
426*cdf0e10cSrcweir BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException )
427*cdf0e10cSrcweir {
428*cdf0e10cSrcweir         return OUString::createFromAscii(cBreakIterator);
429*cdf0e10cSrcweir }
430*cdf0e10cSrcweir 
431*cdf0e10cSrcweir sal_Bool SAL_CALL
432*cdf0e10cSrcweir BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException )
433*cdf0e10cSrcweir {
434*cdf0e10cSrcweir         return !rServiceName.compareToAscii(cBreakIterator);
435*cdf0e10cSrcweir }
436*cdf0e10cSrcweir 
437*cdf0e10cSrcweir uno::Sequence< OUString > SAL_CALL
438*cdf0e10cSrcweir BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException )
439*cdf0e10cSrcweir {
440*cdf0e10cSrcweir         uno::Sequence< OUString > aRet(1);
441*cdf0e10cSrcweir         aRet[0] = OUString::createFromAscii(cBreakIterator);
442*cdf0e10cSrcweir         return aRet;
443*cdf0e10cSrcweir }
444*cdf0e10cSrcweir 
445*cdf0e10cSrcweir } } } }
446