1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*449ab281SAndrew Rist * distributed with this work for additional information
6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at
10*449ab281SAndrew Rist *
11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist *
13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist * software distributed under the License is distributed on an
15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the
17*449ab281SAndrew Rist * specific language governing permissions and limitations
18*449ab281SAndrew Rist * under the License.
19*449ab281SAndrew Rist *
20*449ab281SAndrew Rist *************************************************************/
21*449ab281SAndrew Rist
22*449ab281SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #define BREAKITERATOR_ALL
28cdf0e10cSrcweir #include <breakiterator_cjk.hxx>
29cdf0e10cSrcweir #include <localedata.hxx>
30cdf0e10cSrcweir #include <i18nutil/unicode.hxx>
31cdf0e10cSrcweir
32cdf0e10cSrcweir using namespace ::com::sun::star::uno;
33cdf0e10cSrcweir using namespace ::com::sun::star::lang;
34cdf0e10cSrcweir using namespace ::rtl;
35cdf0e10cSrcweir
36cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
37cdf0e10cSrcweir
38cdf0e10cSrcweir // ----------------------------------------------------
39cdf0e10cSrcweir // class BreakIterator_CJK
40cdf0e10cSrcweir // ----------------------------------------------------;
41cdf0e10cSrcweir
BreakIterator_CJK()42cdf0e10cSrcweir BreakIterator_CJK::BreakIterator_CJK() :
43cdf0e10cSrcweir dict( NULL ),
44cdf0e10cSrcweir hangingCharacters()
45cdf0e10cSrcweir {
46cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK";
47cdf0e10cSrcweir }
48cdf0e10cSrcweir
49cdf0e10cSrcweir Boundary SAL_CALL
previousWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)50cdf0e10cSrcweir BreakIterator_CJK::previousWord(const OUString& text, sal_Int32 anyPos,
51cdf0e10cSrcweir const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
52cdf0e10cSrcweir {
53cdf0e10cSrcweir if (dict) {
54cdf0e10cSrcweir result = dict->previousWord(text, anyPos, wordType);
55cdf0e10cSrcweir // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
56cdf0e10cSrcweir if (result.endPos - result.startPos != 1 ||
57cdf0e10cSrcweir getScriptType(text, result.startPos) == ScriptType::ASIAN)
58cdf0e10cSrcweir return result;
59cdf0e10cSrcweir result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
60cdf0e10cSrcweir if (result.endPos < anyPos)
61cdf0e10cSrcweir return result;
62cdf0e10cSrcweir }
63cdf0e10cSrcweir return BreakIterator_Unicode::previousWord(text, anyPos, nLocale, wordType);
64cdf0e10cSrcweir }
65cdf0e10cSrcweir
66cdf0e10cSrcweir Boundary SAL_CALL
nextWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)67cdf0e10cSrcweir BreakIterator_CJK::nextWord(const OUString& text, sal_Int32 anyPos,
68cdf0e10cSrcweir const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
69cdf0e10cSrcweir {
70cdf0e10cSrcweir if (dict) {
71cdf0e10cSrcweir result = dict->nextWord(text, anyPos, wordType);
72cdf0e10cSrcweir // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
73cdf0e10cSrcweir if (result.endPos - result.startPos != 1 ||
74cdf0e10cSrcweir getScriptType(text, result.startPos) == ScriptType::ASIAN)
75cdf0e10cSrcweir return result;
76cdf0e10cSrcweir result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
77cdf0e10cSrcweir if (result.startPos > anyPos)
78cdf0e10cSrcweir return result;
79cdf0e10cSrcweir }
80cdf0e10cSrcweir return BreakIterator_Unicode::nextWord(text, anyPos, nLocale, wordType);
81cdf0e10cSrcweir }
82cdf0e10cSrcweir
83cdf0e10cSrcweir Boundary SAL_CALL
getWordBoundary(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType,sal_Bool bDirection)84cdf0e10cSrcweir BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
85cdf0e10cSrcweir const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection )
86cdf0e10cSrcweir throw(RuntimeException)
87cdf0e10cSrcweir {
88cdf0e10cSrcweir if (dict) {
89cdf0e10cSrcweir result = dict->getWordBoundary(text, anyPos, wordType, bDirection);
90cdf0e10cSrcweir // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
91cdf0e10cSrcweir if (result.endPos - result.startPos != 1 ||
92cdf0e10cSrcweir getScriptType(text, result.startPos) == ScriptType::ASIAN)
93cdf0e10cSrcweir return result;
94cdf0e10cSrcweir }
95cdf0e10cSrcweir return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
96cdf0e10cSrcweir }
97cdf0e10cSrcweir
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale &,sal_Int32,const LineBreakHyphenationOptions &,const LineBreakUserOptions & bOptions)98cdf0e10cSrcweir LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
99cdf0e10cSrcweir const OUString& Text, sal_Int32 nStartPos,
100cdf0e10cSrcweir const lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
101cdf0e10cSrcweir const LineBreakHyphenationOptions& /*hOptions*/,
102cdf0e10cSrcweir const LineBreakUserOptions& bOptions ) throw(RuntimeException)
103cdf0e10cSrcweir {
104cdf0e10cSrcweir LineBreakResults lbr;
105cdf0e10cSrcweir
106cdf0e10cSrcweir if (bOptions.allowPunctuationOutsideMargin &&
107cdf0e10cSrcweir hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
108cdf0e10cSrcweir (Text.iterateCodePoints( &nStartPos, 1), nStartPos == Text.getLength())) {
109cdf0e10cSrcweir ; // do nothing
110cdf0e10cSrcweir } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
111cdf0e10cSrcweir while (nStartPos > 0 &&
112cdf0e10cSrcweir (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
113cdf0e10cSrcweir bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
114cdf0e10cSrcweir Text.iterateCodePoints( &nStartPos, -1);
115cdf0e10cSrcweir }
116cdf0e10cSrcweir
117cdf0e10cSrcweir lbr.breakIndex = nStartPos;
118cdf0e10cSrcweir lbr.breakType = BreakType::WORDBOUNDARY;
119cdf0e10cSrcweir return lbr;
120cdf0e10cSrcweir }
121cdf0e10cSrcweir
122cdf0e10cSrcweir #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
123cdf0e10cSrcweir // ----------------------------------------------------
124cdf0e10cSrcweir // class BreakIterator_zh
125cdf0e10cSrcweir // ----------------------------------------------------;
BreakIterator_zh()126cdf0e10cSrcweir BreakIterator_zh::BreakIterator_zh()
127cdf0e10cSrcweir {
128cdf0e10cSrcweir dict = new xdictionary("zh");
129cdf0e10cSrcweir hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
130cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_zh";
131cdf0e10cSrcweir }
132cdf0e10cSrcweir
~BreakIterator_zh()133cdf0e10cSrcweir BreakIterator_zh::~BreakIterator_zh()
134cdf0e10cSrcweir {
135cdf0e10cSrcweir delete dict;
136cdf0e10cSrcweir }
137cdf0e10cSrcweir
138cdf0e10cSrcweir // ----------------------------------------------------
139cdf0e10cSrcweir // class BreakIterator_zh_TW
140cdf0e10cSrcweir // ----------------------------------------------------;
BreakIterator_zh_TW()141cdf0e10cSrcweir BreakIterator_zh_TW::BreakIterator_zh_TW()
142cdf0e10cSrcweir {
143cdf0e10cSrcweir dict = new xdictionary("zh");
144cdf0e10cSrcweir hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
145cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_zh_TW";
146cdf0e10cSrcweir }
147cdf0e10cSrcweir
~BreakIterator_zh_TW()148cdf0e10cSrcweir BreakIterator_zh_TW::~BreakIterator_zh_TW()
149cdf0e10cSrcweir {
150cdf0e10cSrcweir delete dict;
151cdf0e10cSrcweir }
152cdf0e10cSrcweir
153cdf0e10cSrcweir // ----------------------------------------------------
154cdf0e10cSrcweir // class BreakIterator_ja
155cdf0e10cSrcweir // ----------------------------------------------------;
BreakIterator_ja()156cdf0e10cSrcweir BreakIterator_ja::BreakIterator_ja()
157cdf0e10cSrcweir {
158cdf0e10cSrcweir dict = new xdictionary("ja");
159cdf0e10cSrcweir dict->setJapaneseWordBreak();
160cdf0e10cSrcweir hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
161cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
162cdf0e10cSrcweir }
163cdf0e10cSrcweir
~BreakIterator_ja()164cdf0e10cSrcweir BreakIterator_ja::~BreakIterator_ja()
165cdf0e10cSrcweir {
166cdf0e10cSrcweir delete dict;
167cdf0e10cSrcweir }
168cdf0e10cSrcweir
169cdf0e10cSrcweir // ----------------------------------------------------
170cdf0e10cSrcweir // class BreakIterator_ko
171cdf0e10cSrcweir // ----------------------------------------------------;
BreakIterator_ko()172cdf0e10cSrcweir BreakIterator_ko::BreakIterator_ko()
173cdf0e10cSrcweir {
174cdf0e10cSrcweir hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
175cdf0e10cSrcweir cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
176cdf0e10cSrcweir }
177cdf0e10cSrcweir
~BreakIterator_ko()178cdf0e10cSrcweir BreakIterator_ko::~BreakIterator_ko()
179cdf0e10cSrcweir {
180cdf0e10cSrcweir }
181cdf0e10cSrcweir
182cdf0e10cSrcweir } } } }
183