1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #define BREAKITERATOR_ALL
28cdf0e10cSrcweir #include <breakiterator_cjk.hxx>
29cdf0e10cSrcweir #include <localedata.hxx>
30cdf0e10cSrcweir #include <i18nutil/unicode.hxx>
31cdf0e10cSrcweir 
32cdf0e10cSrcweir using namespace ::com::sun::star::uno;
33cdf0e10cSrcweir using namespace ::com::sun::star::lang;
34cdf0e10cSrcweir using namespace ::rtl;
35cdf0e10cSrcweir 
36cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
37cdf0e10cSrcweir 
38cdf0e10cSrcweir //      ----------------------------------------------------
39cdf0e10cSrcweir //      class BreakIterator_CJK
40cdf0e10cSrcweir //      ----------------------------------------------------;
41cdf0e10cSrcweir 
BreakIterator_CJK()42cdf0e10cSrcweir BreakIterator_CJK::BreakIterator_CJK() :
43cdf0e10cSrcweir     dict( NULL ),
44cdf0e10cSrcweir     hangingCharacters()
45cdf0e10cSrcweir {
46cdf0e10cSrcweir         cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK";
47cdf0e10cSrcweir }
48cdf0e10cSrcweir 
49cdf0e10cSrcweir Boundary SAL_CALL
previousWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)50cdf0e10cSrcweir BreakIterator_CJK::previousWord(const OUString& text, sal_Int32 anyPos,
51cdf0e10cSrcweir         const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
52cdf0e10cSrcweir {
53cdf0e10cSrcweir         if (dict) {
54cdf0e10cSrcweir             result = dict->previousWord(text, anyPos, wordType);
55cdf0e10cSrcweir             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
56cdf0e10cSrcweir             if (result.endPos - result.startPos != 1 ||
57cdf0e10cSrcweir                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
58cdf0e10cSrcweir                 return result;
59cdf0e10cSrcweir             result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
60cdf0e10cSrcweir             if (result.endPos < anyPos)
61cdf0e10cSrcweir                 return result;
62cdf0e10cSrcweir         }
63cdf0e10cSrcweir         return BreakIterator_Unicode::previousWord(text, anyPos, nLocale, wordType);
64cdf0e10cSrcweir }
65cdf0e10cSrcweir 
66cdf0e10cSrcweir Boundary SAL_CALL
nextWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)67cdf0e10cSrcweir BreakIterator_CJK::nextWord(const OUString& text, sal_Int32 anyPos,
68cdf0e10cSrcweir         const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
69cdf0e10cSrcweir {
70cdf0e10cSrcweir         if (dict) {
71cdf0e10cSrcweir             result = dict->nextWord(text, anyPos, wordType);
72cdf0e10cSrcweir             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
73cdf0e10cSrcweir             if (result.endPos - result.startPos != 1 ||
74cdf0e10cSrcweir                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
75cdf0e10cSrcweir                 return result;
76cdf0e10cSrcweir             result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
77cdf0e10cSrcweir             if (result.startPos > anyPos)
78cdf0e10cSrcweir                 return result;
79cdf0e10cSrcweir         }
80cdf0e10cSrcweir         return BreakIterator_Unicode::nextWord(text, anyPos, nLocale, wordType);
81cdf0e10cSrcweir }
82cdf0e10cSrcweir 
83cdf0e10cSrcweir Boundary SAL_CALL
getWordBoundary(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType,sal_Bool bDirection)84cdf0e10cSrcweir BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
85cdf0e10cSrcweir         const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection )
86cdf0e10cSrcweir         throw(RuntimeException)
87cdf0e10cSrcweir {
88cdf0e10cSrcweir         if (dict) {
89cdf0e10cSrcweir             result = dict->getWordBoundary(text, anyPos, wordType, bDirection);
90cdf0e10cSrcweir             // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
91cdf0e10cSrcweir             if (result.endPos - result.startPos != 1 ||
92cdf0e10cSrcweir                     getScriptType(text, result.startPos) == ScriptType::ASIAN)
93cdf0e10cSrcweir                 return result;
94cdf0e10cSrcweir         }
95cdf0e10cSrcweir         return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
96cdf0e10cSrcweir }
97cdf0e10cSrcweir 
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale &,sal_Int32,const LineBreakHyphenationOptions &,const LineBreakUserOptions & bOptions)98cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
99cdf0e10cSrcweir         const OUString& Text, sal_Int32 nStartPos,
100cdf0e10cSrcweir         const lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
101cdf0e10cSrcweir         const LineBreakHyphenationOptions& /*hOptions*/,
102cdf0e10cSrcweir         const LineBreakUserOptions& bOptions ) throw(RuntimeException)
103cdf0e10cSrcweir {
104cdf0e10cSrcweir         LineBreakResults lbr;
105cdf0e10cSrcweir 
106cdf0e10cSrcweir         if (bOptions.allowPunctuationOutsideMargin &&
107cdf0e10cSrcweir                 hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
108cdf0e10cSrcweir                 (Text.iterateCodePoints( &nStartPos, 1), nStartPos == Text.getLength())) {
109cdf0e10cSrcweir             ; // do nothing
110cdf0e10cSrcweir         } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
111cdf0e10cSrcweir             while (nStartPos > 0 &&
112cdf0e10cSrcweir                     (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
113cdf0e10cSrcweir                     bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
114cdf0e10cSrcweir                 Text.iterateCodePoints( &nStartPos, -1);
115cdf0e10cSrcweir         }
116cdf0e10cSrcweir 
117cdf0e10cSrcweir         lbr.breakIndex = nStartPos;
118cdf0e10cSrcweir         lbr.breakType = BreakType::WORDBOUNDARY;
119cdf0e10cSrcweir         return lbr;
120cdf0e10cSrcweir }
121cdf0e10cSrcweir 
122cdf0e10cSrcweir #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
123cdf0e10cSrcweir //      ----------------------------------------------------
124cdf0e10cSrcweir //      class BreakIterator_zh
125cdf0e10cSrcweir //      ----------------------------------------------------;
BreakIterator_zh()126cdf0e10cSrcweir BreakIterator_zh::BreakIterator_zh()
127cdf0e10cSrcweir {
128cdf0e10cSrcweir         dict = new xdictionary("zh");
129cdf0e10cSrcweir         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
130cdf0e10cSrcweir         cBreakIterator = "com.sun.star.i18n.BreakIterator_zh";
131cdf0e10cSrcweir }
132cdf0e10cSrcweir 
~BreakIterator_zh()133cdf0e10cSrcweir BreakIterator_zh::~BreakIterator_zh()
134cdf0e10cSrcweir {
135cdf0e10cSrcweir         delete dict;
136cdf0e10cSrcweir }
137cdf0e10cSrcweir 
138cdf0e10cSrcweir //      ----------------------------------------------------
139cdf0e10cSrcweir //      class BreakIterator_zh_TW
140cdf0e10cSrcweir //      ----------------------------------------------------;
BreakIterator_zh_TW()141cdf0e10cSrcweir BreakIterator_zh_TW::BreakIterator_zh_TW()
142cdf0e10cSrcweir {
143cdf0e10cSrcweir         dict = new xdictionary("zh");
144cdf0e10cSrcweir         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
145cdf0e10cSrcweir         cBreakIterator = "com.sun.star.i18n.BreakIterator_zh_TW";
146cdf0e10cSrcweir }
147cdf0e10cSrcweir 
~BreakIterator_zh_TW()148cdf0e10cSrcweir BreakIterator_zh_TW::~BreakIterator_zh_TW()
149cdf0e10cSrcweir {
150cdf0e10cSrcweir         delete dict;
151cdf0e10cSrcweir }
152cdf0e10cSrcweir 
153cdf0e10cSrcweir //      ----------------------------------------------------
154cdf0e10cSrcweir //      class BreakIterator_ja
155cdf0e10cSrcweir //      ----------------------------------------------------;
BreakIterator_ja()156cdf0e10cSrcweir BreakIterator_ja::BreakIterator_ja()
157cdf0e10cSrcweir {
158cdf0e10cSrcweir         dict = new xdictionary("ja");
159cdf0e10cSrcweir         dict->setJapaneseWordBreak();
160cdf0e10cSrcweir         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
161cdf0e10cSrcweir         cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
162cdf0e10cSrcweir }
163cdf0e10cSrcweir 
~BreakIterator_ja()164cdf0e10cSrcweir BreakIterator_ja::~BreakIterator_ja()
165cdf0e10cSrcweir {
166cdf0e10cSrcweir         delete dict;
167cdf0e10cSrcweir }
168cdf0e10cSrcweir 
169cdf0e10cSrcweir //      ----------------------------------------------------
170cdf0e10cSrcweir //      class BreakIterator_ko
171cdf0e10cSrcweir //      ----------------------------------------------------;
BreakIterator_ko()172cdf0e10cSrcweir BreakIterator_ko::BreakIterator_ko()
173cdf0e10cSrcweir {
174cdf0e10cSrcweir         hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
175cdf0e10cSrcweir         cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
176cdf0e10cSrcweir }
177cdf0e10cSrcweir 
~BreakIterator_ko()178cdf0e10cSrcweir BreakIterator_ko::~BreakIterator_ko()
179cdf0e10cSrcweir {
180cdf0e10cSrcweir }
181cdf0e10cSrcweir 
182cdf0e10cSrcweir } } } }
183