1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir #include <breakiterator_unicode.hxx>
27cdf0e10cSrcweir #include <localedata.hxx>
28cdf0e10cSrcweir #include <unicode/uchar.h>
29cdf0e10cSrcweir #include <unicode/locid.h>
30cdf0e10cSrcweir #include <unicode/rbbi.h>
31cdf0e10cSrcweir #include <unicode/udata.h>
32cdf0e10cSrcweir #include <rtl/strbuf.hxx>
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir U_CDECL_BEGIN
36cdf0e10cSrcweir extern const char OpenOffice_dat[];
37cdf0e10cSrcweir U_CDECL_END
38cdf0e10cSrcweir 
39cdf0e10cSrcweir using namespace ::com::sun::star;
40cdf0e10cSrcweir using namespace ::com::sun::star::lang;
41cdf0e10cSrcweir using namespace ::rtl;
42cdf0e10cSrcweir 
43cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
44cdf0e10cSrcweir 
45cdf0e10cSrcweir #define ERROR ::com::sun::star::uno::RuntimeException()
46cdf0e10cSrcweir 
47cdf0e10cSrcweir //#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode";
48cdf0e10cSrcweir 
49cdf0e10cSrcweir 
BreakIterator_Unicode()50cdf0e10cSrcweir BreakIterator_Unicode::BreakIterator_Unicode() :
51cdf0e10cSrcweir     cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ),    // implementation name
52cdf0e10cSrcweir     wordRule( "word" ),
53cdf0e10cSrcweir     lineRule( "line" ),
54cdf0e10cSrcweir     result(),
55cdf0e10cSrcweir     character(),
56cdf0e10cSrcweir     word(),
57cdf0e10cSrcweir     sentence(),
58cdf0e10cSrcweir     line(),
59cdf0e10cSrcweir     icuBI( NULL ),
60cdf0e10cSrcweir     aLocale(),
61cdf0e10cSrcweir     aBreakType(),
62cdf0e10cSrcweir     aWordType()
63cdf0e10cSrcweir {
64cdf0e10cSrcweir }
65cdf0e10cSrcweir 
66cdf0e10cSrcweir 
~BreakIterator_Unicode()67cdf0e10cSrcweir BreakIterator_Unicode::~BreakIterator_Unicode()
68cdf0e10cSrcweir {
69cdf0e10cSrcweir         if (icuBI && icuBI->aBreakIterator) {
70cdf0e10cSrcweir             delete icuBI->aBreakIterator;
71cdf0e10cSrcweir             icuBI->aBreakIterator=NULL;
72cdf0e10cSrcweir         }
73cdf0e10cSrcweir         if (character.aBreakIterator) delete character.aBreakIterator;
74cdf0e10cSrcweir         if (word.aBreakIterator) delete word.aBreakIterator;
75cdf0e10cSrcweir         if (sentence.aBreakIterator) delete sentence.aBreakIterator;
76cdf0e10cSrcweir         if (line.aBreakIterator) delete line.aBreakIterator;
77cdf0e10cSrcweir }
78cdf0e10cSrcweir 
79cdf0e10cSrcweir /*
80cdf0e10cSrcweir 	Wrapper class to provide public access to the RuleBasedBreakIterator's
81cdf0e10cSrcweir 	setbreakType method.
82cdf0e10cSrcweir */
83cdf0e10cSrcweir class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
84cdf0e10cSrcweir 	public:
publicSetBreakType(int32_t type)85cdf0e10cSrcweir 		inline void publicSetBreakType(int32_t type) {
86cdf0e10cSrcweir 			setBreakType(type);
87cdf0e10cSrcweir 		};
OOoRuleBasedBreakIterator(UDataMemory * image,UErrorCode & status)88cdf0e10cSrcweir 		OOoRuleBasedBreakIterator(UDataMemory* image,
89cdf0e10cSrcweir 				UErrorCode &status) :
90cdf0e10cSrcweir 			RuleBasedBreakIterator(image, status) { };
91cdf0e10cSrcweir 
92cdf0e10cSrcweir };
93cdf0e10cSrcweir 
94cdf0e10cSrcweir // loading ICU breakiterator on demand.
loadICUBreakIterator(const com::sun::star::lang::Locale & rLocale,sal_Int16 rBreakType,sal_Int16 rWordType,const sal_Char * rule,const OUString & rText)95cdf0e10cSrcweir void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
96cdf0e10cSrcweir         sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
97cdf0e10cSrcweir {
98cdf0e10cSrcweir     sal_Bool newBreak = sal_False;
99cdf0e10cSrcweir     UErrorCode status = U_ZERO_ERROR;
100cdf0e10cSrcweir     sal_Int16 breakType = 0;
101cdf0e10cSrcweir     switch (rBreakType) {
102cdf0e10cSrcweir         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
103cdf0e10cSrcweir         case LOAD_WORD_BREAKITERATOR: icuBI=&word;
104cdf0e10cSrcweir             switch (rWordType) {
105cdf0e10cSrcweir                 case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
106cdf0e10cSrcweir                 case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
107cdf0e10cSrcweir                 case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
108cdf0e10cSrcweir             }
109cdf0e10cSrcweir             break;
110cdf0e10cSrcweir         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
111cdf0e10cSrcweir         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
112cdf0e10cSrcweir     }
113cdf0e10cSrcweir     if (!icuBI->aBreakIterator || rWordType != aWordType ||
114cdf0e10cSrcweir             rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
115cdf0e10cSrcweir             rLocale.Variant != aLocale.Variant) {
116cdf0e10cSrcweir         if (icuBI->aBreakIterator) {
117cdf0e10cSrcweir             delete icuBI->aBreakIterator;
118cdf0e10cSrcweir             icuBI->aBreakIterator=NULL;
119cdf0e10cSrcweir         }
120cdf0e10cSrcweir         if (rule) {
121cdf0e10cSrcweir             uno::Sequence< OUString > breakRules = LocaleData().getBreakIteratorRules(rLocale);
122cdf0e10cSrcweir 
123cdf0e10cSrcweir             status = U_ZERO_ERROR;
124cdf0e10cSrcweir             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
125cdf0e10cSrcweir             if ( !U_SUCCESS(status) ) throw ERROR;
126cdf0e10cSrcweir 
127cdf0e10cSrcweir             OOoRuleBasedBreakIterator *rbi = NULL;
128cdf0e10cSrcweir 
129cdf0e10cSrcweir             if (breakRules.getLength() > breakType && breakRules[breakType].getLength() > 0) {
130cdf0e10cSrcweir                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
131cdf0e10cSrcweir                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
132cdf0e10cSrcweir             } else {
133cdf0e10cSrcweir                 status = U_ZERO_ERROR;
134cdf0e10cSrcweir                 OStringBuffer aUDName(64);
135cdf0e10cSrcweir                 aUDName.append(rule);
136cdf0e10cSrcweir                 aUDName.append('_');
137cdf0e10cSrcweir                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
138cdf0e10cSrcweir                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
139cdf0e10cSrcweir                 if( U_SUCCESS(status) )
140cdf0e10cSrcweir                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
141cdf0e10cSrcweir                 if (!U_SUCCESS(status) ) {
142cdf0e10cSrcweir                     status = U_ZERO_ERROR;
143cdf0e10cSrcweir                     pUData = udata_open("OpenOffice", "brk", rule, &status);
144cdf0e10cSrcweir                     if( U_SUCCESS(status) )
145cdf0e10cSrcweir                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
146cdf0e10cSrcweir                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
147cdf0e10cSrcweir                 }
148cdf0e10cSrcweir             }
149cdf0e10cSrcweir             if (rbi) {
150cdf0e10cSrcweir                 switch (rBreakType) {
151cdf0e10cSrcweir                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
152cdf0e10cSrcweir                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
153cdf0e10cSrcweir                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
154cdf0e10cSrcweir                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
155cdf0e10cSrcweir                 }
156cdf0e10cSrcweir                 icuBI->aBreakIterator = rbi;
157cdf0e10cSrcweir             }
158cdf0e10cSrcweir         }
159cdf0e10cSrcweir 
160cdf0e10cSrcweir         if (!icuBI->aBreakIterator) {
161cdf0e10cSrcweir             icu::Locale icuLocale(
162cdf0e10cSrcweir                     OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(),
163cdf0e10cSrcweir                     OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(),
164cdf0e10cSrcweir                     OUStringToOString(rLocale.Variant, RTL_TEXTENCODING_ASCII_US).getStr());
165cdf0e10cSrcweir 
166cdf0e10cSrcweir             status = U_ZERO_ERROR;
167cdf0e10cSrcweir             switch (rBreakType) {
168cdf0e10cSrcweir                 case LOAD_CHARACTER_BREAKITERATOR:
169cdf0e10cSrcweir                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
170cdf0e10cSrcweir                     break;
171cdf0e10cSrcweir                 case LOAD_WORD_BREAKITERATOR:
172cdf0e10cSrcweir                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
173cdf0e10cSrcweir                     break;
174cdf0e10cSrcweir                 case LOAD_SENTENCE_BREAKITERATOR:
175cdf0e10cSrcweir                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
176cdf0e10cSrcweir                     break;
177cdf0e10cSrcweir                 case LOAD_LINE_BREAKITERATOR:
178cdf0e10cSrcweir                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
179cdf0e10cSrcweir                     break;
180cdf0e10cSrcweir             }
181cdf0e10cSrcweir             if ( !U_SUCCESS(status) ) {
182cdf0e10cSrcweir                 icuBI->aBreakIterator=NULL;
183cdf0e10cSrcweir                 throw ERROR;
184cdf0e10cSrcweir             }
185cdf0e10cSrcweir         }
186cdf0e10cSrcweir         if (icuBI->aBreakIterator) {
187cdf0e10cSrcweir             aLocale=rLocale;
188cdf0e10cSrcweir             aWordType=rWordType;
189cdf0e10cSrcweir             aBreakType=rBreakType;
190cdf0e10cSrcweir             newBreak=sal_True;
191cdf0e10cSrcweir         } else {
192cdf0e10cSrcweir             throw ERROR;
193cdf0e10cSrcweir         }
194cdf0e10cSrcweir     }
195cdf0e10cSrcweir 
196cdf0e10cSrcweir     if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) {	// UChar != sal_Unicode in MinGW
197cdf0e10cSrcweir         icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength());
198cdf0e10cSrcweir         icuBI->aBreakIterator->setText(icuBI->aICUText);
199cdf0e10cSrcweir     }
200cdf0e10cSrcweir }
201cdf0e10cSrcweir 
202cdf0e10cSrcweir 
nextCharacters(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 nCharacterIteratorMode,sal_Int32 nCount,sal_Int32 & nDone)203cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
204cdf0e10cSrcweir         sal_Int32 nStartPos, const lang::Locale &rLocale,
205cdf0e10cSrcweir         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
206cdf0e10cSrcweir         throw(uno::RuntimeException)
207cdf0e10cSrcweir {
208cdf0e10cSrcweir         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
209cdf0e10cSrcweir             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
210cdf0e10cSrcweir             for (nDone = 0; nDone < nCount; nDone++) {
211cdf0e10cSrcweir                 nStartPos = character.aBreakIterator->following(nStartPos);
212cdf0e10cSrcweir                 if (nStartPos == BreakIterator::DONE)
213cdf0e10cSrcweir                     return Text.getLength();
214cdf0e10cSrcweir             }
215cdf0e10cSrcweir         } else { // for CHARACTER mode
216cdf0e10cSrcweir             for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
217cdf0e10cSrcweir                 Text.iterateCodePoints(&nStartPos, 1);
218cdf0e10cSrcweir         }
219cdf0e10cSrcweir         return nStartPos;
220cdf0e10cSrcweir }
221cdf0e10cSrcweir 
previousCharacters(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 nCharacterIteratorMode,sal_Int32 nCount,sal_Int32 & nDone)222cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
223cdf0e10cSrcweir         sal_Int32 nStartPos, const lang::Locale& rLocale,
224cdf0e10cSrcweir         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
225cdf0e10cSrcweir         throw(uno::RuntimeException)
226cdf0e10cSrcweir {
227cdf0e10cSrcweir         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
228cdf0e10cSrcweir             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
229cdf0e10cSrcweir             for (nDone = 0; nDone < nCount; nDone++) {
230cdf0e10cSrcweir                 nStartPos = character.aBreakIterator->preceding(nStartPos);
231cdf0e10cSrcweir                 if (nStartPos == BreakIterator::DONE)
232cdf0e10cSrcweir                     return 0;
233cdf0e10cSrcweir             }
234cdf0e10cSrcweir         } else { // for BS to delete one char and CHARACTER mode.
235cdf0e10cSrcweir             for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
236cdf0e10cSrcweir                 Text.iterateCodePoints(&nStartPos, -1);
237cdf0e10cSrcweir         }
238cdf0e10cSrcweir         return nStartPos;
239cdf0e10cSrcweir }
240cdf0e10cSrcweir 
241cdf0e10cSrcweir 
nextWord(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 rWordType)242cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
243cdf0e10cSrcweir     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException)
244cdf0e10cSrcweir {
245cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
246cdf0e10cSrcweir 
247cdf0e10cSrcweir         result.startPos = word.aBreakIterator->following(nStartPos);
248cdf0e10cSrcweir         if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
249cdf0e10cSrcweir             result.endPos = result.startPos;
250cdf0e10cSrcweir         else {
251cdf0e10cSrcweir             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
252cdf0e10cSrcweir                     rWordType == WordType::DICTIONARY_WORD ) &&
253cdf0e10cSrcweir                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
254cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->following(result.startPos);
255cdf0e10cSrcweir 
256cdf0e10cSrcweir             result.endPos = word.aBreakIterator->following(result.startPos);
257cdf0e10cSrcweir             if(result.endPos == BreakIterator::DONE)
258cdf0e10cSrcweir                 result.endPos = result.startPos;
259cdf0e10cSrcweir         }
260cdf0e10cSrcweir         return result;
261cdf0e10cSrcweir }
262cdf0e10cSrcweir 
263cdf0e10cSrcweir 
previousWord(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 rWordType)264cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
265cdf0e10cSrcweir         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException)
266cdf0e10cSrcweir {
267cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
268cdf0e10cSrcweir 
269cdf0e10cSrcweir         result.startPos = word.aBreakIterator->preceding(nStartPos);
270cdf0e10cSrcweir         if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
271cdf0e10cSrcweir             result.endPos = result.startPos;
272cdf0e10cSrcweir         else {
273cdf0e10cSrcweir             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
274cdf0e10cSrcweir                     rWordType == WordType::DICTIONARY_WORD) &&
275cdf0e10cSrcweir                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
276cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(result.startPos);
277cdf0e10cSrcweir 
278cdf0e10cSrcweir             result.endPos = word.aBreakIterator->following(result.startPos);
279cdf0e10cSrcweir             if(result.endPos == BreakIterator::DONE)
280cdf0e10cSrcweir                 result.endPos = result.startPos;
281cdf0e10cSrcweir         }
282cdf0e10cSrcweir         return result;
283cdf0e10cSrcweir }
284cdf0e10cSrcweir 
285cdf0e10cSrcweir 
getWordBoundary(const OUString & Text,sal_Int32 nPos,const lang::Locale & rLocale,sal_Int16 rWordType,sal_Bool bDirection)286cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
287cdf0e10cSrcweir         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException)
288cdf0e10cSrcweir {
289cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
290cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
291cdf0e10cSrcweir 
292cdf0e10cSrcweir         if(word.aBreakIterator->isBoundary(nPos)) {
293cdf0e10cSrcweir             result.startPos = result.endPos = nPos;
294cdf0e10cSrcweir             if((bDirection || nPos == 0) && nPos < len) //forward
295cdf0e10cSrcweir                 result.endPos = word.aBreakIterator->following(nPos);
296cdf0e10cSrcweir             else
297cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(nPos);
298cdf0e10cSrcweir         } else {
299cdf0e10cSrcweir             if(nPos <= 0) {
300cdf0e10cSrcweir                 result.startPos = 0;
301cdf0e10cSrcweir                 result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
302cdf0e10cSrcweir             } else if(nPos >= len) {
303cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(len);
304cdf0e10cSrcweir                 result.endPos = len;
305cdf0e10cSrcweir             } else {
306cdf0e10cSrcweir                 result.startPos = word.aBreakIterator->preceding(nPos);
307cdf0e10cSrcweir                 result.endPos = word.aBreakIterator->following(nPos);
308cdf0e10cSrcweir             }
309cdf0e10cSrcweir         }
310cdf0e10cSrcweir         if (result.startPos == BreakIterator::DONE)
311cdf0e10cSrcweir             result.startPos = result.endPos;
312cdf0e10cSrcweir         else if (result.endPos == BreakIterator::DONE)
313cdf0e10cSrcweir             result.endPos = result.startPos;
314cdf0e10cSrcweir 
315cdf0e10cSrcweir         return result;
316cdf0e10cSrcweir }
317cdf0e10cSrcweir 
318cdf0e10cSrcweir 
beginOfSentence(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale)319cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
320cdf0e10cSrcweir         const lang::Locale &rLocale ) throw(uno::RuntimeException)
321cdf0e10cSrcweir {
322cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
323cdf0e10cSrcweir 
324cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
325cdf0e10cSrcweir         if (len > 0 && nStartPos == len)
326cdf0e10cSrcweir             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
327cdf0e10cSrcweir         if (!sentence.aBreakIterator->isBoundary(nStartPos))
328cdf0e10cSrcweir             nStartPos = sentence.aBreakIterator->preceding(nStartPos);
329cdf0e10cSrcweir 
330cdf0e10cSrcweir         // skip preceding space.
331cdf0e10cSrcweir         sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
332cdf0e10cSrcweir         while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
333cdf0e10cSrcweir 		Text.iterateCodePoints(&nStartPos, -1);
334cdf0e10cSrcweir 
335cdf0e10cSrcweir         return nStartPos;
336cdf0e10cSrcweir }
337cdf0e10cSrcweir 
endOfSentence(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale)338cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
339cdf0e10cSrcweir         const lang::Locale &rLocale ) throw(uno::RuntimeException)
340cdf0e10cSrcweir {
341cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
342cdf0e10cSrcweir 
343cdf0e10cSrcweir         sal_Int32 len = Text.getLength();
344cdf0e10cSrcweir         if (len > 0 && nStartPos == len)
345cdf0e10cSrcweir             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
346cdf0e10cSrcweir         nStartPos = sentence.aBreakIterator->following(nStartPos);
347cdf0e10cSrcweir 
348cdf0e10cSrcweir         sal_Int32 nPos=nStartPos;
349cdf0e10cSrcweir         while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
350cdf0e10cSrcweir 
351cdf0e10cSrcweir         return nStartPos;
352cdf0e10cSrcweir }
353cdf0e10cSrcweir 
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int32 nMinBreakPos,const LineBreakHyphenationOptions & hOptions,const LineBreakUserOptions &)354cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
355cdf0e10cSrcweir         const OUString& Text, sal_Int32 nStartPos,
356cdf0e10cSrcweir         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
357cdf0e10cSrcweir         const LineBreakHyphenationOptions& hOptions,
358cdf0e10cSrcweir         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException)
359cdf0e10cSrcweir {
360cdf0e10cSrcweir         LineBreakResults lbr;
361cdf0e10cSrcweir 
362cdf0e10cSrcweir         if (nStartPos >= Text.getLength()) {
363cdf0e10cSrcweir             lbr.breakIndex = Text.getLength();
364cdf0e10cSrcweir             lbr.breakType = BreakType::WORDBOUNDARY;
365cdf0e10cSrcweir             return lbr;
366cdf0e10cSrcweir         }
367cdf0e10cSrcweir 
368cdf0e10cSrcweir         loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
369cdf0e10cSrcweir 
370cdf0e10cSrcweir         sal_Bool GlueSpace=sal_True;
371cdf0e10cSrcweir         while (GlueSpace) {
372cdf0e10cSrcweir             if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
373cdf0e10cSrcweir                 lbr.breakIndex = nStartPos;
374cdf0e10cSrcweir                 lbr.breakType = BreakType::WORDBOUNDARY;
375cdf0e10cSrcweir             } else if (hOptions.rHyphenator.is()) { //Hyphenation break
376cdf0e10cSrcweir                 Boundary wBoundary = getWordBoundary( Text, nStartPos, rLocale,
377cdf0e10cSrcweir                                                 WordType::DICTIONARY_WORD, false);
378cdf0e10cSrcweir                 uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
379cdf0e10cSrcweir                 aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
380cdf0e10cSrcweir                     wBoundary.endPos - wBoundary.startPos), rLocale,
381cdf0e10cSrcweir                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos), hOptions.aHyphenationOptions);
382cdf0e10cSrcweir                 if (aHyphenatedWord.is()) {
383cdf0e10cSrcweir                     lbr.rHyphenatedWord = aHyphenatedWord;
384cdf0e10cSrcweir                     if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
385cdf0e10cSrcweir                         lbr.breakIndex = -1;
386cdf0e10cSrcweir                     else
387cdf0e10cSrcweir                         lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
388cdf0e10cSrcweir                     lbr.breakType = BreakType::HYPHENATION;
389cdf0e10cSrcweir                 } else {
390cdf0e10cSrcweir                     lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
391cdf0e10cSrcweir                     lbr.breakType = BreakType::WORDBOUNDARY;;
392cdf0e10cSrcweir                 }
393cdf0e10cSrcweir             } else { //word boundary break
394cdf0e10cSrcweir                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
395cdf0e10cSrcweir                 lbr.breakType = BreakType::WORDBOUNDARY;
396cdf0e10cSrcweir             }
397cdf0e10cSrcweir 
398cdf0e10cSrcweir #define WJ 0x2060   // Word Joiner
399cdf0e10cSrcweir             GlueSpace=sal_False;
400cdf0e10cSrcweir             if (lbr.breakType == BreakType::WORDBOUNDARY) {
401cdf0e10cSrcweir                 nStartPos = lbr.breakIndex;
402cdf0e10cSrcweir                 if (Text[nStartPos--] == WJ)
403cdf0e10cSrcweir                     GlueSpace=sal_True;
404cdf0e10cSrcweir                 while (nStartPos >= 0 &&
405cdf0e10cSrcweir                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
406cdf0e10cSrcweir                     if (Text[nStartPos--] == WJ)
407cdf0e10cSrcweir                         GlueSpace=sal_True;
408cdf0e10cSrcweir                 }
409cdf0e10cSrcweir                 if (GlueSpace && nStartPos < 0)  {
410cdf0e10cSrcweir                     lbr.breakIndex = 0;
411cdf0e10cSrcweir                     break;
412cdf0e10cSrcweir                 }
413cdf0e10cSrcweir             }
414cdf0e10cSrcweir         }
415cdf0e10cSrcweir 
416cdf0e10cSrcweir         return lbr;
417cdf0e10cSrcweir }
418cdf0e10cSrcweir 
419cdf0e10cSrcweir 
420cdf0e10cSrcweir 
421cdf0e10cSrcweir OUString SAL_CALL
getImplementationName(void)422cdf0e10cSrcweir BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException )
423cdf0e10cSrcweir {
424cdf0e10cSrcweir         return OUString::createFromAscii(cBreakIterator);
425cdf0e10cSrcweir }
426cdf0e10cSrcweir 
427cdf0e10cSrcweir sal_Bool SAL_CALL
supportsService(const OUString & rServiceName)428cdf0e10cSrcweir BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException )
429cdf0e10cSrcweir {
430cdf0e10cSrcweir         return !rServiceName.compareToAscii(cBreakIterator);
431cdf0e10cSrcweir }
432cdf0e10cSrcweir 
433cdf0e10cSrcweir uno::Sequence< OUString > SAL_CALL
getSupportedServiceNames(void)434cdf0e10cSrcweir BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException )
435cdf0e10cSrcweir {
436cdf0e10cSrcweir         uno::Sequence< OUString > aRet(1);
437cdf0e10cSrcweir         aRet[0] = OUString::createFromAscii(cBreakIterator);
438cdf0e10cSrcweir         return aRet;
439cdf0e10cSrcweir }
440cdf0e10cSrcweir 
441cdf0e10cSrcweir } } } }
442