1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30*cdf0e10cSrcweir #include <breakiterator_unicode.hxx> 31*cdf0e10cSrcweir #include <localedata.hxx> 32*cdf0e10cSrcweir #include <unicode/uchar.h> 33*cdf0e10cSrcweir #include <unicode/locid.h> 34*cdf0e10cSrcweir #include <unicode/rbbi.h> 35*cdf0e10cSrcweir #include <unicode/udata.h> 36*cdf0e10cSrcweir #include <rtl/strbuf.hxx> 37*cdf0e10cSrcweir #include <rtl/ustring.hxx> 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir U_CDECL_BEGIN 40*cdf0e10cSrcweir extern const char OpenOffice_dat[]; 41*cdf0e10cSrcweir U_CDECL_END 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir using namespace ::com::sun::star; 44*cdf0e10cSrcweir using namespace ::com::sun::star::lang; 45*cdf0e10cSrcweir using namespace ::rtl; 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 48*cdf0e10cSrcweir 49*cdf0e10cSrcweir #define ERROR ::com::sun::star::uno::RuntimeException() 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir //#define ImplementName "com.sun.star.i18n.BreakIterator_Unicode"; 52*cdf0e10cSrcweir 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir BreakIterator_Unicode::BreakIterator_Unicode() : 55*cdf0e10cSrcweir cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ), // implementation name 56*cdf0e10cSrcweir wordRule( "word" ), 57*cdf0e10cSrcweir lineRule( "line" ), 58*cdf0e10cSrcweir result(), 59*cdf0e10cSrcweir character(), 60*cdf0e10cSrcweir word(), 61*cdf0e10cSrcweir sentence(), 62*cdf0e10cSrcweir line(), 63*cdf0e10cSrcweir icuBI( NULL ), 64*cdf0e10cSrcweir aLocale(), 65*cdf0e10cSrcweir aBreakType(), 66*cdf0e10cSrcweir aWordType() 67*cdf0e10cSrcweir { 68*cdf0e10cSrcweir } 69*cdf0e10cSrcweir 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir BreakIterator_Unicode::~BreakIterator_Unicode() 72*cdf0e10cSrcweir { 73*cdf0e10cSrcweir if (icuBI && icuBI->aBreakIterator) { 74*cdf0e10cSrcweir delete icuBI->aBreakIterator; 75*cdf0e10cSrcweir icuBI->aBreakIterator=NULL; 76*cdf0e10cSrcweir } 77*cdf0e10cSrcweir if (character.aBreakIterator) delete character.aBreakIterator; 78*cdf0e10cSrcweir if (word.aBreakIterator) delete word.aBreakIterator; 79*cdf0e10cSrcweir if (sentence.aBreakIterator) delete sentence.aBreakIterator; 80*cdf0e10cSrcweir if (line.aBreakIterator) delete line.aBreakIterator; 81*cdf0e10cSrcweir } 82*cdf0e10cSrcweir 83*cdf0e10cSrcweir /* 84*cdf0e10cSrcweir Wrapper class to provide public access to the RuleBasedBreakIterator's 85*cdf0e10cSrcweir setbreakType method. 86*cdf0e10cSrcweir */ 87*cdf0e10cSrcweir class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator { 88*cdf0e10cSrcweir public: 89*cdf0e10cSrcweir inline void publicSetBreakType(int32_t type) { 90*cdf0e10cSrcweir setBreakType(type); 91*cdf0e10cSrcweir }; 92*cdf0e10cSrcweir OOoRuleBasedBreakIterator(UDataMemory* image, 93*cdf0e10cSrcweir UErrorCode &status) : 94*cdf0e10cSrcweir RuleBasedBreakIterator(image, status) { }; 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir }; 97*cdf0e10cSrcweir 98*cdf0e10cSrcweir // loading ICU breakiterator on demand. 99*cdf0e10cSrcweir void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale, 100*cdf0e10cSrcweir sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException) 101*cdf0e10cSrcweir { 102*cdf0e10cSrcweir sal_Bool newBreak = sal_False; 103*cdf0e10cSrcweir UErrorCode status = U_ZERO_ERROR; 104*cdf0e10cSrcweir sal_Int16 breakType = 0; 105*cdf0e10cSrcweir switch (rBreakType) { 106*cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break; 107*cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR: icuBI=&word; 108*cdf0e10cSrcweir switch (rWordType) { 109*cdf0e10cSrcweir case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break; 110*cdf0e10cSrcweir case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break; 111*cdf0e10cSrcweir case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break; 112*cdf0e10cSrcweir } 113*cdf0e10cSrcweir break; 114*cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break; 115*cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break; 116*cdf0e10cSrcweir } 117*cdf0e10cSrcweir if (!icuBI->aBreakIterator || rWordType != aWordType || 118*cdf0e10cSrcweir rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country || 119*cdf0e10cSrcweir rLocale.Variant != aLocale.Variant) { 120*cdf0e10cSrcweir if (icuBI->aBreakIterator) { 121*cdf0e10cSrcweir delete icuBI->aBreakIterator; 122*cdf0e10cSrcweir icuBI->aBreakIterator=NULL; 123*cdf0e10cSrcweir } 124*cdf0e10cSrcweir if (rule) { 125*cdf0e10cSrcweir uno::Sequence< OUString > breakRules = LocaleData().getBreakIteratorRules(rLocale); 126*cdf0e10cSrcweir 127*cdf0e10cSrcweir status = U_ZERO_ERROR; 128*cdf0e10cSrcweir udata_setAppData("OpenOffice", OpenOffice_dat, &status); 129*cdf0e10cSrcweir if ( !U_SUCCESS(status) ) throw ERROR; 130*cdf0e10cSrcweir 131*cdf0e10cSrcweir OOoRuleBasedBreakIterator *rbi = NULL; 132*cdf0e10cSrcweir 133*cdf0e10cSrcweir if (breakRules.getLength() > breakType && breakRules[breakType].getLength() > 0) { 134*cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk", 135*cdf0e10cSrcweir OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status); 136*cdf0e10cSrcweir } else { 137*cdf0e10cSrcweir status = U_ZERO_ERROR; 138*cdf0e10cSrcweir OStringBuffer aUDName(64); 139*cdf0e10cSrcweir aUDName.append(rule); 140*cdf0e10cSrcweir aUDName.append('_'); 141*cdf0e10cSrcweir aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US)); 142*cdf0e10cSrcweir UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status); 143*cdf0e10cSrcweir if( U_SUCCESS(status) ) 144*cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator( pUData, status); 145*cdf0e10cSrcweir if (!U_SUCCESS(status) ) { 146*cdf0e10cSrcweir status = U_ZERO_ERROR; 147*cdf0e10cSrcweir pUData = udata_open("OpenOffice", "brk", rule, &status); 148*cdf0e10cSrcweir if( U_SUCCESS(status) ) 149*cdf0e10cSrcweir rbi = new OOoRuleBasedBreakIterator( pUData, status); 150*cdf0e10cSrcweir if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL; 151*cdf0e10cSrcweir } 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir if (rbi) { 154*cdf0e10cSrcweir switch (rBreakType) { 155*cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break; 156*cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break; 157*cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break; 158*cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break; 159*cdf0e10cSrcweir } 160*cdf0e10cSrcweir icuBI->aBreakIterator = rbi; 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir } 163*cdf0e10cSrcweir 164*cdf0e10cSrcweir if (!icuBI->aBreakIterator) { 165*cdf0e10cSrcweir icu::Locale icuLocale( 166*cdf0e10cSrcweir OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(), 167*cdf0e10cSrcweir OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(), 168*cdf0e10cSrcweir OUStringToOString(rLocale.Variant, RTL_TEXTENCODING_ASCII_US).getStr()); 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir status = U_ZERO_ERROR; 171*cdf0e10cSrcweir switch (rBreakType) { 172*cdf0e10cSrcweir case LOAD_CHARACTER_BREAKITERATOR: 173*cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createCharacterInstance(icuLocale, status); 174*cdf0e10cSrcweir break; 175*cdf0e10cSrcweir case LOAD_WORD_BREAKITERATOR: 176*cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createWordInstance(icuLocale, status); 177*cdf0e10cSrcweir break; 178*cdf0e10cSrcweir case LOAD_SENTENCE_BREAKITERATOR: 179*cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status); 180*cdf0e10cSrcweir break; 181*cdf0e10cSrcweir case LOAD_LINE_BREAKITERATOR: 182*cdf0e10cSrcweir icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status); 183*cdf0e10cSrcweir break; 184*cdf0e10cSrcweir } 185*cdf0e10cSrcweir if ( !U_SUCCESS(status) ) { 186*cdf0e10cSrcweir icuBI->aBreakIterator=NULL; 187*cdf0e10cSrcweir throw ERROR; 188*cdf0e10cSrcweir } 189*cdf0e10cSrcweir } 190*cdf0e10cSrcweir if (icuBI->aBreakIterator) { 191*cdf0e10cSrcweir aLocale=rLocale; 192*cdf0e10cSrcweir aWordType=rWordType; 193*cdf0e10cSrcweir aBreakType=rBreakType; 194*cdf0e10cSrcweir newBreak=sal_True; 195*cdf0e10cSrcweir } else { 196*cdf0e10cSrcweir throw ERROR; 197*cdf0e10cSrcweir } 198*cdf0e10cSrcweir } 199*cdf0e10cSrcweir 200*cdf0e10cSrcweir if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) { // UChar != sal_Unicode in MinGW 201*cdf0e10cSrcweir icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()); 202*cdf0e10cSrcweir icuBI->aBreakIterator->setText(icuBI->aICUText); 203*cdf0e10cSrcweir } 204*cdf0e10cSrcweir } 205*cdf0e10cSrcweir 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text, 208*cdf0e10cSrcweir sal_Int32 nStartPos, const lang::Locale &rLocale, 209*cdf0e10cSrcweir sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 210*cdf0e10cSrcweir throw(uno::RuntimeException) 211*cdf0e10cSrcweir { 212*cdf0e10cSrcweir if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode 213*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text); 214*cdf0e10cSrcweir for (nDone = 0; nDone < nCount; nDone++) { 215*cdf0e10cSrcweir nStartPos = character.aBreakIterator->following(nStartPos); 216*cdf0e10cSrcweir if (nStartPos == BreakIterator::DONE) 217*cdf0e10cSrcweir return Text.getLength(); 218*cdf0e10cSrcweir } 219*cdf0e10cSrcweir } else { // for CHARACTER mode 220*cdf0e10cSrcweir for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++) 221*cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, 1); 222*cdf0e10cSrcweir } 223*cdf0e10cSrcweir return nStartPos; 224*cdf0e10cSrcweir } 225*cdf0e10cSrcweir 226*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text, 227*cdf0e10cSrcweir sal_Int32 nStartPos, const lang::Locale& rLocale, 228*cdf0e10cSrcweir sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 229*cdf0e10cSrcweir throw(uno::RuntimeException) 230*cdf0e10cSrcweir { 231*cdf0e10cSrcweir if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode 232*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text); 233*cdf0e10cSrcweir for (nDone = 0; nDone < nCount; nDone++) { 234*cdf0e10cSrcweir nStartPos = character.aBreakIterator->preceding(nStartPos); 235*cdf0e10cSrcweir if (nStartPos == BreakIterator::DONE) 236*cdf0e10cSrcweir return 0; 237*cdf0e10cSrcweir } 238*cdf0e10cSrcweir } else { // for BS to delete one char and CHARACTER mode. 239*cdf0e10cSrcweir for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++) 240*cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); 241*cdf0e10cSrcweir } 242*cdf0e10cSrcweir return nStartPos; 243*cdf0e10cSrcweir } 244*cdf0e10cSrcweir 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos, 247*cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException) 248*cdf0e10cSrcweir { 249*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text); 250*cdf0e10cSrcweir 251*cdf0e10cSrcweir result.startPos = word.aBreakIterator->following(nStartPos); 252*cdf0e10cSrcweir if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE ) 253*cdf0e10cSrcweir result.endPos = result.startPos; 254*cdf0e10cSrcweir else { 255*cdf0e10cSrcweir if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES || 256*cdf0e10cSrcweir rWordType == WordType::DICTIONARY_WORD ) && 257*cdf0e10cSrcweir u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) ) 258*cdf0e10cSrcweir result.startPos = word.aBreakIterator->following(result.startPos); 259*cdf0e10cSrcweir 260*cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(result.startPos); 261*cdf0e10cSrcweir if(result.endPos == BreakIterator::DONE) 262*cdf0e10cSrcweir result.endPos = result.startPos; 263*cdf0e10cSrcweir } 264*cdf0e10cSrcweir return result; 265*cdf0e10cSrcweir } 266*cdf0e10cSrcweir 267*cdf0e10cSrcweir 268*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos, 269*cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException) 270*cdf0e10cSrcweir { 271*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text); 272*cdf0e10cSrcweir 273*cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nStartPos); 274*cdf0e10cSrcweir if( result.startPos < 0 || result.startPos == BreakIterator::DONE) 275*cdf0e10cSrcweir result.endPos = result.startPos; 276*cdf0e10cSrcweir else { 277*cdf0e10cSrcweir if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES || 278*cdf0e10cSrcweir rWordType == WordType::DICTIONARY_WORD) && 279*cdf0e10cSrcweir u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) ) 280*cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(result.startPos); 281*cdf0e10cSrcweir 282*cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(result.startPos); 283*cdf0e10cSrcweir if(result.endPos == BreakIterator::DONE) 284*cdf0e10cSrcweir result.endPos = result.startPos; 285*cdf0e10cSrcweir } 286*cdf0e10cSrcweir return result; 287*cdf0e10cSrcweir } 288*cdf0e10cSrcweir 289*cdf0e10cSrcweir 290*cdf0e10cSrcweir Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale, 291*cdf0e10cSrcweir sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException) 292*cdf0e10cSrcweir { 293*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text); 294*cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 295*cdf0e10cSrcweir 296*cdf0e10cSrcweir if(word.aBreakIterator->isBoundary(nPos)) { 297*cdf0e10cSrcweir result.startPos = result.endPos = nPos; 298*cdf0e10cSrcweir if((bDirection || nPos == 0) && nPos < len) //forward 299*cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(nPos); 300*cdf0e10cSrcweir else 301*cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nPos); 302*cdf0e10cSrcweir } else { 303*cdf0e10cSrcweir if(nPos <= 0) { 304*cdf0e10cSrcweir result.startPos = 0; 305*cdf0e10cSrcweir result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0; 306*cdf0e10cSrcweir } else if(nPos >= len) { 307*cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(len); 308*cdf0e10cSrcweir result.endPos = len; 309*cdf0e10cSrcweir } else { 310*cdf0e10cSrcweir result.startPos = word.aBreakIterator->preceding(nPos); 311*cdf0e10cSrcweir result.endPos = word.aBreakIterator->following(nPos); 312*cdf0e10cSrcweir } 313*cdf0e10cSrcweir } 314*cdf0e10cSrcweir if (result.startPos == BreakIterator::DONE) 315*cdf0e10cSrcweir result.startPos = result.endPos; 316*cdf0e10cSrcweir else if (result.endPos == BreakIterator::DONE) 317*cdf0e10cSrcweir result.endPos = result.startPos; 318*cdf0e10cSrcweir 319*cdf0e10cSrcweir return result; 320*cdf0e10cSrcweir } 321*cdf0e10cSrcweir 322*cdf0e10cSrcweir 323*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos, 324*cdf0e10cSrcweir const lang::Locale &rLocale ) throw(uno::RuntimeException) 325*cdf0e10cSrcweir { 326*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text); 327*cdf0e10cSrcweir 328*cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 329*cdf0e10cSrcweir if (len > 0 && nStartPos == len) 330*cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence 331*cdf0e10cSrcweir if (!sentence.aBreakIterator->isBoundary(nStartPos)) 332*cdf0e10cSrcweir nStartPos = sentence.aBreakIterator->preceding(nStartPos); 333*cdf0e10cSrcweir 334*cdf0e10cSrcweir // skip preceding space. 335*cdf0e10cSrcweir sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1); 336*cdf0e10cSrcweir while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1); 337*cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); 338*cdf0e10cSrcweir 339*cdf0e10cSrcweir return nStartPos; 340*cdf0e10cSrcweir } 341*cdf0e10cSrcweir 342*cdf0e10cSrcweir sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos, 343*cdf0e10cSrcweir const lang::Locale &rLocale ) throw(uno::RuntimeException) 344*cdf0e10cSrcweir { 345*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text); 346*cdf0e10cSrcweir 347*cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 348*cdf0e10cSrcweir if (len > 0 && nStartPos == len) 349*cdf0e10cSrcweir Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence 350*cdf0e10cSrcweir nStartPos = sentence.aBreakIterator->following(nStartPos); 351*cdf0e10cSrcweir 352*cdf0e10cSrcweir sal_Int32 nPos=nStartPos; 353*cdf0e10cSrcweir while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos; 354*cdf0e10cSrcweir 355*cdf0e10cSrcweir return nStartPos; 356*cdf0e10cSrcweir } 357*cdf0e10cSrcweir 358*cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak( 359*cdf0e10cSrcweir const OUString& Text, sal_Int32 nStartPos, 360*cdf0e10cSrcweir const lang::Locale& rLocale, sal_Int32 nMinBreakPos, 361*cdf0e10cSrcweir const LineBreakHyphenationOptions& hOptions, 362*cdf0e10cSrcweir const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException) 363*cdf0e10cSrcweir { 364*cdf0e10cSrcweir LineBreakResults lbr; 365*cdf0e10cSrcweir 366*cdf0e10cSrcweir if (nStartPos >= Text.getLength()) { 367*cdf0e10cSrcweir lbr.breakIndex = Text.getLength(); 368*cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY; 369*cdf0e10cSrcweir return lbr; 370*cdf0e10cSrcweir } 371*cdf0e10cSrcweir 372*cdf0e10cSrcweir loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text); 373*cdf0e10cSrcweir 374*cdf0e10cSrcweir sal_Bool GlueSpace=sal_True; 375*cdf0e10cSrcweir while (GlueSpace) { 376*cdf0e10cSrcweir if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break 377*cdf0e10cSrcweir lbr.breakIndex = nStartPos; 378*cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY; 379*cdf0e10cSrcweir } else if (hOptions.rHyphenator.is()) { //Hyphenation break 380*cdf0e10cSrcweir Boundary wBoundary = getWordBoundary( Text, nStartPos, rLocale, 381*cdf0e10cSrcweir WordType::DICTIONARY_WORD, false); 382*cdf0e10cSrcweir uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord; 383*cdf0e10cSrcweir aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos, 384*cdf0e10cSrcweir wBoundary.endPos - wBoundary.startPos), rLocale, 385*cdf0e10cSrcweir (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos), hOptions.aHyphenationOptions); 386*cdf0e10cSrcweir if (aHyphenatedWord.is()) { 387*cdf0e10cSrcweir lbr.rHyphenatedWord = aHyphenatedWord; 388*cdf0e10cSrcweir if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos ) 389*cdf0e10cSrcweir lbr.breakIndex = -1; 390*cdf0e10cSrcweir else 391*cdf0e10cSrcweir lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos(); 392*cdf0e10cSrcweir lbr.breakType = BreakType::HYPHENATION; 393*cdf0e10cSrcweir } else { 394*cdf0e10cSrcweir lbr.breakIndex = line.aBreakIterator->preceding(nStartPos); 395*cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;; 396*cdf0e10cSrcweir } 397*cdf0e10cSrcweir } else { //word boundary break 398*cdf0e10cSrcweir lbr.breakIndex = line.aBreakIterator->preceding(nStartPos); 399*cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY; 400*cdf0e10cSrcweir } 401*cdf0e10cSrcweir 402*cdf0e10cSrcweir #define WJ 0x2060 // Word Joiner 403*cdf0e10cSrcweir GlueSpace=sal_False; 404*cdf0e10cSrcweir if (lbr.breakType == BreakType::WORDBOUNDARY) { 405*cdf0e10cSrcweir nStartPos = lbr.breakIndex; 406*cdf0e10cSrcweir if (Text[nStartPos--] == WJ) 407*cdf0e10cSrcweir GlueSpace=sal_True; 408*cdf0e10cSrcweir while (nStartPos >= 0 && 409*cdf0e10cSrcweir (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) { 410*cdf0e10cSrcweir if (Text[nStartPos--] == WJ) 411*cdf0e10cSrcweir GlueSpace=sal_True; 412*cdf0e10cSrcweir } 413*cdf0e10cSrcweir if (GlueSpace && nStartPos < 0) { 414*cdf0e10cSrcweir lbr.breakIndex = 0; 415*cdf0e10cSrcweir break; 416*cdf0e10cSrcweir } 417*cdf0e10cSrcweir } 418*cdf0e10cSrcweir } 419*cdf0e10cSrcweir 420*cdf0e10cSrcweir return lbr; 421*cdf0e10cSrcweir } 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir 424*cdf0e10cSrcweir 425*cdf0e10cSrcweir OUString SAL_CALL 426*cdf0e10cSrcweir BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException ) 427*cdf0e10cSrcweir { 428*cdf0e10cSrcweir return OUString::createFromAscii(cBreakIterator); 429*cdf0e10cSrcweir } 430*cdf0e10cSrcweir 431*cdf0e10cSrcweir sal_Bool SAL_CALL 432*cdf0e10cSrcweir BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException ) 433*cdf0e10cSrcweir { 434*cdf0e10cSrcweir return !rServiceName.compareToAscii(cBreakIterator); 435*cdf0e10cSrcweir } 436*cdf0e10cSrcweir 437*cdf0e10cSrcweir uno::Sequence< OUString > SAL_CALL 438*cdf0e10cSrcweir BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException ) 439*cdf0e10cSrcweir { 440*cdf0e10cSrcweir uno::Sequence< OUString > aRet(1); 441*cdf0e10cSrcweir aRet[0] = OUString::createFromAscii(cBreakIterator); 442*cdf0e10cSrcweir return aRet; 443*cdf0e10cSrcweir } 444*cdf0e10cSrcweir 445*cdf0e10cSrcweir } } } } 446