1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*449ab281SAndrew Rist * distributed with this work for additional information
6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at
10*449ab281SAndrew Rist *
11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist *
13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist * software distributed under the License is distributed on an
15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the
17*449ab281SAndrew Rist * specific language governing permissions and limitations
18*449ab281SAndrew Rist * under the License.
19*449ab281SAndrew Rist *
20*449ab281SAndrew Rist *************************************************************/
21*449ab281SAndrew Rist
22*449ab281SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir #include <breakiterator_unicode.hxx>
27cdf0e10cSrcweir #include <localedata.hxx>
28cdf0e10cSrcweir #include <unicode/uchar.h>
29cdf0e10cSrcweir #include <unicode/locid.h>
30cdf0e10cSrcweir #include <unicode/rbbi.h>
31cdf0e10cSrcweir #include <unicode/udata.h>
32cdf0e10cSrcweir #include <rtl/strbuf.hxx>
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir
35cdf0e10cSrcweir U_CDECL_BEGIN
36cdf0e10cSrcweir extern const char OpenOffice_dat[];
37cdf0e10cSrcweir U_CDECL_END
38cdf0e10cSrcweir
39cdf0e10cSrcweir using namespace ::com::sun::star;
40cdf0e10cSrcweir using namespace ::com::sun::star::lang;
41cdf0e10cSrcweir using namespace ::rtl;
42cdf0e10cSrcweir
43cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
44cdf0e10cSrcweir
45cdf0e10cSrcweir #define ERROR ::com::sun::star::uno::RuntimeException()
46cdf0e10cSrcweir
47cdf0e10cSrcweir //#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode";
48cdf0e10cSrcweir
49cdf0e10cSrcweir
BreakIterator_Unicode()50cdf0e10cSrcweir BreakIterator_Unicode::BreakIterator_Unicode() :
51cdf0e10cSrcweir cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ), // implementation name
52cdf0e10cSrcweir wordRule( "word" ),
53cdf0e10cSrcweir lineRule( "line" ),
54cdf0e10cSrcweir result(),
55cdf0e10cSrcweir character(),
56cdf0e10cSrcweir word(),
57cdf0e10cSrcweir sentence(),
58cdf0e10cSrcweir line(),
59cdf0e10cSrcweir icuBI( NULL ),
60cdf0e10cSrcweir aLocale(),
61cdf0e10cSrcweir aBreakType(),
62cdf0e10cSrcweir aWordType()
63cdf0e10cSrcweir {
64cdf0e10cSrcweir }
65cdf0e10cSrcweir
66cdf0e10cSrcweir
~BreakIterator_Unicode()67cdf0e10cSrcweir BreakIterator_Unicode::~BreakIterator_Unicode()
68cdf0e10cSrcweir {
69cdf0e10cSrcweir if (icuBI && icuBI->aBreakIterator) {
70cdf0e10cSrcweir delete icuBI->aBreakIterator;
71cdf0e10cSrcweir icuBI->aBreakIterator=NULL;
72cdf0e10cSrcweir }
73cdf0e10cSrcweir if (character.aBreakIterator) delete character.aBreakIterator;
74cdf0e10cSrcweir if (word.aBreakIterator) delete word.aBreakIterator;
75cdf0e10cSrcweir if (sentence.aBreakIterator) delete sentence.aBreakIterator;
76cdf0e10cSrcweir if (line.aBreakIterator) delete line.aBreakIterator;
77cdf0e10cSrcweir }
78cdf0e10cSrcweir
79cdf0e10cSrcweir /*
80cdf0e10cSrcweir Wrapper class to provide public access to the RuleBasedBreakIterator's
81cdf0e10cSrcweir setbreakType method.
82cdf0e10cSrcweir */
83cdf0e10cSrcweir class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
84cdf0e10cSrcweir public:
publicSetBreakType(int32_t type)85cdf0e10cSrcweir inline void publicSetBreakType(int32_t type) {
86cdf0e10cSrcweir setBreakType(type);
87cdf0e10cSrcweir };
OOoRuleBasedBreakIterator(UDataMemory * image,UErrorCode & status)88cdf0e10cSrcweir OOoRuleBasedBreakIterator(UDataMemory* image,
89cdf0e10cSrcweir UErrorCode &status) :
90cdf0e10cSrcweir RuleBasedBreakIterator(image, status) { };
91cdf0e10cSrcweir
92cdf0e10cSrcweir };
93cdf0e10cSrcweir
94cdf0e10cSrcweir // loading ICU breakiterator on demand.
loadICUBreakIterator(const com::sun::star::lang::Locale & rLocale,sal_Int16 rBreakType,sal_Int16 rWordType,const sal_Char * rule,const OUString & rText)95cdf0e10cSrcweir void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
96cdf0e10cSrcweir sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
97cdf0e10cSrcweir {
98cdf0e10cSrcweir sal_Bool newBreak = sal_False;
99cdf0e10cSrcweir UErrorCode status = U_ZERO_ERROR;
100cdf0e10cSrcweir sal_Int16 breakType = 0;
101cdf0e10cSrcweir switch (rBreakType) {
102cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
103cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR: icuBI=&word;
104cdf0e10cSrcweir switch (rWordType) {
105cdf0e10cSrcweir case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
106cdf0e10cSrcweir case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
107cdf0e10cSrcweir case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
108cdf0e10cSrcweir }
109cdf0e10cSrcweir break;
110cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
111cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
112cdf0e10cSrcweir }
113cdf0e10cSrcweir if (!icuBI->aBreakIterator || rWordType != aWordType ||
114cdf0e10cSrcweir rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
115cdf0e10cSrcweir rLocale.Variant != aLocale.Variant) {
116cdf0e10cSrcweir if (icuBI->aBreakIterator) {
117cdf0e10cSrcweir delete icuBI->aBreakIterator;
118cdf0e10cSrcweir icuBI->aBreakIterator=NULL;
119cdf0e10cSrcweir }
120cdf0e10cSrcweir if (rule) {
121cdf0e10cSrcweir uno::Sequence< OUString > breakRules = LocaleData().getBreakIteratorRules(rLocale);
122cdf0e10cSrcweir
123cdf0e10cSrcweir status = U_ZERO_ERROR;
124cdf0e10cSrcweir udata_setAppData("OpenOffice", OpenOffice_dat, &status);
125cdf0e10cSrcweir if ( !U_SUCCESS(status) ) throw ERROR;
126cdf0e10cSrcweir
127cdf0e10cSrcweir OOoRuleBasedBreakIterator *rbi = NULL;
128cdf0e10cSrcweir
129cdf0e10cSrcweir if (breakRules.getLength() > breakType && breakRules[breakType].getLength() > 0) {
130cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
131cdf0e10cSrcweir OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
132cdf0e10cSrcweir } else {
133cdf0e10cSrcweir status = U_ZERO_ERROR;
134cdf0e10cSrcweir OStringBuffer aUDName(64);
135cdf0e10cSrcweir aUDName.append(rule);
136cdf0e10cSrcweir aUDName.append('_');
137cdf0e10cSrcweir aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
138cdf0e10cSrcweir UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
139cdf0e10cSrcweir if( U_SUCCESS(status) )
140cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator( pUData, status);
141cdf0e10cSrcweir if (!U_SUCCESS(status) ) {
142cdf0e10cSrcweir status = U_ZERO_ERROR;
143cdf0e10cSrcweir pUData = udata_open("OpenOffice", "brk", rule, &status);
144cdf0e10cSrcweir if( U_SUCCESS(status) )
145cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator( pUData, status);
146cdf0e10cSrcweir if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
147cdf0e10cSrcweir }
148cdf0e10cSrcweir }
149cdf0e10cSrcweir if (rbi) {
150cdf0e10cSrcweir switch (rBreakType) {
151cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
152cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
153cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
154cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
155cdf0e10cSrcweir }
156cdf0e10cSrcweir icuBI->aBreakIterator = rbi;
157cdf0e10cSrcweir }
158cdf0e10cSrcweir }
159cdf0e10cSrcweir
160cdf0e10cSrcweir if (!icuBI->aBreakIterator) {
161cdf0e10cSrcweir icu::Locale icuLocale(
162cdf0e10cSrcweir OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(),
163cdf0e10cSrcweir OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(),
164cdf0e10cSrcweir OUStringToOString(rLocale.Variant, RTL_TEXTENCODING_ASCII_US).getStr());
165cdf0e10cSrcweir
166cdf0e10cSrcweir status = U_ZERO_ERROR;
167cdf0e10cSrcweir switch (rBreakType) {
168cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR:
169cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createCharacterInstance(icuLocale, status);
170cdf0e10cSrcweir break;
171cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR:
172cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createWordInstance(icuLocale, status);
173cdf0e10cSrcweir break;
174cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR:
175cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
176cdf0e10cSrcweir break;
177cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR:
178cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
179cdf0e10cSrcweir break;
180cdf0e10cSrcweir }
181cdf0e10cSrcweir if ( !U_SUCCESS(status) ) {
182cdf0e10cSrcweir icuBI->aBreakIterator=NULL;
183cdf0e10cSrcweir throw ERROR;
184cdf0e10cSrcweir }
185cdf0e10cSrcweir }
186cdf0e10cSrcweir if (icuBI->aBreakIterator) {
187cdf0e10cSrcweir aLocale=rLocale;
188cdf0e10cSrcweir aWordType=rWordType;
189cdf0e10cSrcweir aBreakType=rBreakType;
190cdf0e10cSrcweir newBreak=sal_True;
191cdf0e10cSrcweir } else {
192cdf0e10cSrcweir throw ERROR;
193cdf0e10cSrcweir }
194cdf0e10cSrcweir }
195cdf0e10cSrcweir
196cdf0e10cSrcweir if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) { // UChar != sal_Unicode in MinGW
197cdf0e10cSrcweir icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength());
198cdf0e10cSrcweir icuBI->aBreakIterator->setText(icuBI->aICUText);
199cdf0e10cSrcweir }
200cdf0e10cSrcweir }
201cdf0e10cSrcweir
202cdf0e10cSrcweir
nextCharacters(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 nCharacterIteratorMode,sal_Int32 nCount,sal_Int32 & nDone)203cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
204cdf0e10cSrcweir sal_Int32 nStartPos, const lang::Locale &rLocale,
205cdf0e10cSrcweir sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
206cdf0e10cSrcweir throw(uno::RuntimeException)
207cdf0e10cSrcweir {
208cdf0e10cSrcweir if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
209cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
210cdf0e10cSrcweir for (nDone = 0; nDone < nCount; nDone++) {
211cdf0e10cSrcweir nStartPos = character.aBreakIterator->following(nStartPos);
212cdf0e10cSrcweir if (nStartPos == BreakIterator::DONE)
213cdf0e10cSrcweir return Text.getLength();
214cdf0e10cSrcweir }
215cdf0e10cSrcweir } else { // for CHARACTER mode
216cdf0e10cSrcweir for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
217cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, 1);
218cdf0e10cSrcweir }
219cdf0e10cSrcweir return nStartPos;
220cdf0e10cSrcweir }
221cdf0e10cSrcweir
previousCharacters(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 nCharacterIteratorMode,sal_Int32 nCount,sal_Int32 & nDone)222cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
223cdf0e10cSrcweir sal_Int32 nStartPos, const lang::Locale& rLocale,
224cdf0e10cSrcweir sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
225cdf0e10cSrcweir throw(uno::RuntimeException)
226cdf0e10cSrcweir {
227cdf0e10cSrcweir if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
228cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
229cdf0e10cSrcweir for (nDone = 0; nDone < nCount; nDone++) {
230cdf0e10cSrcweir nStartPos = character.aBreakIterator->preceding(nStartPos);
231cdf0e10cSrcweir if (nStartPos == BreakIterator::DONE)
232cdf0e10cSrcweir return 0;
233cdf0e10cSrcweir }
234cdf0e10cSrcweir } else { // for BS to delete one char and CHARACTER mode.
235cdf0e10cSrcweir for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
236cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1);
237cdf0e10cSrcweir }
238cdf0e10cSrcweir return nStartPos;
239cdf0e10cSrcweir }
240cdf0e10cSrcweir
241cdf0e10cSrcweir
nextWord(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 rWordType)242cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
243cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException)
244cdf0e10cSrcweir {
245cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
246cdf0e10cSrcweir
247cdf0e10cSrcweir result.startPos = word.aBreakIterator->following(nStartPos);
248cdf0e10cSrcweir if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
249cdf0e10cSrcweir result.endPos = result.startPos;
250cdf0e10cSrcweir else {
251cdf0e10cSrcweir if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
252cdf0e10cSrcweir rWordType == WordType::DICTIONARY_WORD ) &&
253cdf0e10cSrcweir u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
254cdf0e10cSrcweir result.startPos = word.aBreakIterator->following(result.startPos);
255cdf0e10cSrcweir
256cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(result.startPos);
257cdf0e10cSrcweir if(result.endPos == BreakIterator::DONE)
258cdf0e10cSrcweir result.endPos = result.startPos;
259cdf0e10cSrcweir }
260cdf0e10cSrcweir return result;
261cdf0e10cSrcweir }
262cdf0e10cSrcweir
263cdf0e10cSrcweir
previousWord(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int16 rWordType)264cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
265cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException)
266cdf0e10cSrcweir {
267cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
268cdf0e10cSrcweir
269cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nStartPos);
270cdf0e10cSrcweir if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
271cdf0e10cSrcweir result.endPos = result.startPos;
272cdf0e10cSrcweir else {
273cdf0e10cSrcweir if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
274cdf0e10cSrcweir rWordType == WordType::DICTIONARY_WORD) &&
275cdf0e10cSrcweir u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
276cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(result.startPos);
277cdf0e10cSrcweir
278cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(result.startPos);
279cdf0e10cSrcweir if(result.endPos == BreakIterator::DONE)
280cdf0e10cSrcweir result.endPos = result.startPos;
281cdf0e10cSrcweir }
282cdf0e10cSrcweir return result;
283cdf0e10cSrcweir }
284cdf0e10cSrcweir
285cdf0e10cSrcweir
getWordBoundary(const OUString & Text,sal_Int32 nPos,const lang::Locale & rLocale,sal_Int16 rWordType,sal_Bool bDirection)286cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
287cdf0e10cSrcweir sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException)
288cdf0e10cSrcweir {
289cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
290cdf0e10cSrcweir sal_Int32 len = Text.getLength();
291cdf0e10cSrcweir
292cdf0e10cSrcweir if(word.aBreakIterator->isBoundary(nPos)) {
293cdf0e10cSrcweir result.startPos = result.endPos = nPos;
294cdf0e10cSrcweir if((bDirection || nPos == 0) && nPos < len) //forward
295cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(nPos);
296cdf0e10cSrcweir else
297cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nPos);
298cdf0e10cSrcweir } else {
299cdf0e10cSrcweir if(nPos <= 0) {
300cdf0e10cSrcweir result.startPos = 0;
301cdf0e10cSrcweir result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
302cdf0e10cSrcweir } else if(nPos >= len) {
303cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(len);
304cdf0e10cSrcweir result.endPos = len;
305cdf0e10cSrcweir } else {
306cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nPos);
307cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(nPos);
308cdf0e10cSrcweir }
309cdf0e10cSrcweir }
310cdf0e10cSrcweir if (result.startPos == BreakIterator::DONE)
311cdf0e10cSrcweir result.startPos = result.endPos;
312cdf0e10cSrcweir else if (result.endPos == BreakIterator::DONE)
313cdf0e10cSrcweir result.endPos = result.startPos;
314cdf0e10cSrcweir
315cdf0e10cSrcweir return result;
316cdf0e10cSrcweir }
317cdf0e10cSrcweir
318cdf0e10cSrcweir
beginOfSentence(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale)319cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
320cdf0e10cSrcweir const lang::Locale &rLocale ) throw(uno::RuntimeException)
321cdf0e10cSrcweir {
322cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
323cdf0e10cSrcweir
324cdf0e10cSrcweir sal_Int32 len = Text.getLength();
325cdf0e10cSrcweir if (len > 0 && nStartPos == len)
326cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
327cdf0e10cSrcweir if (!sentence.aBreakIterator->isBoundary(nStartPos))
328cdf0e10cSrcweir nStartPos = sentence.aBreakIterator->preceding(nStartPos);
329cdf0e10cSrcweir
330cdf0e10cSrcweir // skip preceding space.
331cdf0e10cSrcweir sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
332cdf0e10cSrcweir while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
333cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1);
334cdf0e10cSrcweir
335cdf0e10cSrcweir return nStartPos;
336cdf0e10cSrcweir }
337cdf0e10cSrcweir
endOfSentence(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale)338cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
339cdf0e10cSrcweir const lang::Locale &rLocale ) throw(uno::RuntimeException)
340cdf0e10cSrcweir {
341cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
342cdf0e10cSrcweir
343cdf0e10cSrcweir sal_Int32 len = Text.getLength();
344cdf0e10cSrcweir if (len > 0 && nStartPos == len)
345cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
346cdf0e10cSrcweir nStartPos = sentence.aBreakIterator->following(nStartPos);
347cdf0e10cSrcweir
348cdf0e10cSrcweir sal_Int32 nPos=nStartPos;
349cdf0e10cSrcweir while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
350cdf0e10cSrcweir
351cdf0e10cSrcweir return nStartPos;
352cdf0e10cSrcweir }
353cdf0e10cSrcweir
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale & rLocale,sal_Int32 nMinBreakPos,const LineBreakHyphenationOptions & hOptions,const LineBreakUserOptions &)354cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
355cdf0e10cSrcweir const OUString& Text, sal_Int32 nStartPos,
356cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
357cdf0e10cSrcweir const LineBreakHyphenationOptions& hOptions,
358cdf0e10cSrcweir const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException)
359cdf0e10cSrcweir {
360cdf0e10cSrcweir LineBreakResults lbr;
361cdf0e10cSrcweir
362cdf0e10cSrcweir if (nStartPos >= Text.getLength()) {
363cdf0e10cSrcweir lbr.breakIndex = Text.getLength();
364cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;
365cdf0e10cSrcweir return lbr;
366cdf0e10cSrcweir }
367cdf0e10cSrcweir
368cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
369cdf0e10cSrcweir
370cdf0e10cSrcweir sal_Bool GlueSpace=sal_True;
371cdf0e10cSrcweir while (GlueSpace) {
372cdf0e10cSrcweir if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
373cdf0e10cSrcweir lbr.breakIndex = nStartPos;
374cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;
375cdf0e10cSrcweir } else if (hOptions.rHyphenator.is()) { //Hyphenation break
376cdf0e10cSrcweir Boundary wBoundary = getWordBoundary( Text, nStartPos, rLocale,
377cdf0e10cSrcweir WordType::DICTIONARY_WORD, false);
378cdf0e10cSrcweir uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
379cdf0e10cSrcweir aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
380cdf0e10cSrcweir wBoundary.endPos - wBoundary.startPos), rLocale,
381cdf0e10cSrcweir (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos), hOptions.aHyphenationOptions);
382cdf0e10cSrcweir if (aHyphenatedWord.is()) {
383cdf0e10cSrcweir lbr.rHyphenatedWord = aHyphenatedWord;
384cdf0e10cSrcweir if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
385cdf0e10cSrcweir lbr.breakIndex = -1;
386cdf0e10cSrcweir else
387cdf0e10cSrcweir lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
388cdf0e10cSrcweir lbr.breakType = BreakType::HYPHENATION;
389cdf0e10cSrcweir } else {
390cdf0e10cSrcweir lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
391cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;;
392cdf0e10cSrcweir }
393cdf0e10cSrcweir } else { //word boundary break
394cdf0e10cSrcweir lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
395cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;
396cdf0e10cSrcweir }
397cdf0e10cSrcweir
398cdf0e10cSrcweir #define WJ 0x2060 // Word Joiner
399cdf0e10cSrcweir GlueSpace=sal_False;
400cdf0e10cSrcweir if (lbr.breakType == BreakType::WORDBOUNDARY) {
401cdf0e10cSrcweir nStartPos = lbr.breakIndex;
402cdf0e10cSrcweir if (Text[nStartPos--] == WJ)
403cdf0e10cSrcweir GlueSpace=sal_True;
404cdf0e10cSrcweir while (nStartPos >= 0 &&
405cdf0e10cSrcweir (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
406cdf0e10cSrcweir if (Text[nStartPos--] == WJ)
407cdf0e10cSrcweir GlueSpace=sal_True;
408cdf0e10cSrcweir }
409cdf0e10cSrcweir if (GlueSpace && nStartPos < 0) {
410cdf0e10cSrcweir lbr.breakIndex = 0;
411cdf0e10cSrcweir break;
412cdf0e10cSrcweir }
413cdf0e10cSrcweir }
414cdf0e10cSrcweir }
415cdf0e10cSrcweir
416cdf0e10cSrcweir return lbr;
417cdf0e10cSrcweir }
418cdf0e10cSrcweir
419cdf0e10cSrcweir
420cdf0e10cSrcweir
421cdf0e10cSrcweir OUString SAL_CALL
getImplementationName(void)422cdf0e10cSrcweir BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException )
423cdf0e10cSrcweir {
424cdf0e10cSrcweir return OUString::createFromAscii(cBreakIterator);
425cdf0e10cSrcweir }
426cdf0e10cSrcweir
427cdf0e10cSrcweir sal_Bool SAL_CALL
supportsService(const OUString & rServiceName)428cdf0e10cSrcweir BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException )
429cdf0e10cSrcweir {
430cdf0e10cSrcweir return !rServiceName.compareToAscii(cBreakIterator);
431cdf0e10cSrcweir }
432cdf0e10cSrcweir
433cdf0e10cSrcweir uno::Sequence< OUString > SAL_CALL
getSupportedServiceNames(void)434cdf0e10cSrcweir BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException )
435cdf0e10cSrcweir {
436cdf0e10cSrcweir uno::Sequence< OUString > aRet(1);
437cdf0e10cSrcweir aRet[0] = OUString::createFromAscii(cBreakIterator);
438cdf0e10cSrcweir return aRet;
439cdf0e10cSrcweir }
440cdf0e10cSrcweir
441cdf0e10cSrcweir } } } }
442