1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 #define BREAKITERATOR_ALL
28 #include <breakiterator_cjk.hxx>
29 #include <localedata.hxx>
30 #include <i18nutil/unicode.hxx>
31
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::lang;
34 using namespace ::rtl;
35
36 namespace com { namespace sun { namespace star { namespace i18n {
37
38 // ----------------------------------------------------
39 // class BreakIterator_CJK
40 // ----------------------------------------------------;
41
BreakIterator_CJK()42 BreakIterator_CJK::BreakIterator_CJK() :
43 dict( NULL ),
44 hangingCharacters()
45 {
46 cBreakIterator = "com.sun.star.i18n.BreakIterator_CJK";
47 }
48
49 Boundary SAL_CALL
previousWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)50 BreakIterator_CJK::previousWord(const OUString& text, sal_Int32 anyPos,
51 const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
52 {
53 if (dict) {
54 result = dict->previousWord(text, anyPos, wordType);
55 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
56 if (result.endPos - result.startPos != 1 ||
57 getScriptType(text, result.startPos) == ScriptType::ASIAN)
58 return result;
59 result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
60 if (result.endPos < anyPos)
61 return result;
62 }
63 return BreakIterator_Unicode::previousWord(text, anyPos, nLocale, wordType);
64 }
65
66 Boundary SAL_CALL
nextWord(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType)67 BreakIterator_CJK::nextWord(const OUString& text, sal_Int32 anyPos,
68 const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
69 {
70 if (dict) {
71 result = dict->nextWord(text, anyPos, wordType);
72 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
73 if (result.endPos - result.startPos != 1 ||
74 getScriptType(text, result.startPos) == ScriptType::ASIAN)
75 return result;
76 result = BreakIterator_Unicode::getWordBoundary(text, result.startPos, nLocale, wordType, true);
77 if (result.startPos > anyPos)
78 return result;
79 }
80 return BreakIterator_Unicode::nextWord(text, anyPos, nLocale, wordType);
81 }
82
83 Boundary SAL_CALL
getWordBoundary(const OUString & text,sal_Int32 anyPos,const lang::Locale & nLocale,sal_Int16 wordType,sal_Bool bDirection)84 BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
85 const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection )
86 throw(RuntimeException)
87 {
88 if (dict) {
89 result = dict->getWordBoundary(text, anyPos, wordType, bDirection);
90 // #109813# for non-CJK, single character word, fallback to ICU breakiterator.
91 if (result.endPos - result.startPos != 1 ||
92 getScriptType(text, result.startPos) == ScriptType::ASIAN)
93 return result;
94 }
95 return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
96 }
97
getLineBreak(const OUString & Text,sal_Int32 nStartPos,const lang::Locale &,sal_Int32,const LineBreakHyphenationOptions &,const LineBreakUserOptions & bOptions)98 LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
99 const OUString& Text, sal_Int32 nStartPos,
100 const lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
101 const LineBreakHyphenationOptions& /*hOptions*/,
102 const LineBreakUserOptions& bOptions ) throw(RuntimeException)
103 {
104 LineBreakResults lbr;
105
106 if (bOptions.allowPunctuationOutsideMargin &&
107 hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
108 (Text.iterateCodePoints( &nStartPos, 1), nStartPos == Text.getLength())) {
109 ; // do nothing
110 } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
111 while (nStartPos > 0 &&
112 (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
113 bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
114 Text.iterateCodePoints( &nStartPos, -1);
115 }
116
117 lbr.breakIndex = nStartPos;
118 lbr.breakType = BreakType::WORDBOUNDARY;
119 return lbr;
120 }
121
122 #define LOCALE(language, country) lang::Locale(OUString::createFromAscii(language), OUString::createFromAscii(country), OUString())
123 // ----------------------------------------------------
124 // class BreakIterator_zh
125 // ----------------------------------------------------;
BreakIterator_zh()126 BreakIterator_zh::BreakIterator_zh()
127 {
128 dict = new xdictionary("zh");
129 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "CN"));
130 cBreakIterator = "com.sun.star.i18n.BreakIterator_zh";
131 }
132
~BreakIterator_zh()133 BreakIterator_zh::~BreakIterator_zh()
134 {
135 delete dict;
136 }
137
138 // ----------------------------------------------------
139 // class BreakIterator_zh_TW
140 // ----------------------------------------------------;
BreakIterator_zh_TW()141 BreakIterator_zh_TW::BreakIterator_zh_TW()
142 {
143 dict = new xdictionary("zh");
144 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("zh", "TW"));
145 cBreakIterator = "com.sun.star.i18n.BreakIterator_zh_TW";
146 }
147
~BreakIterator_zh_TW()148 BreakIterator_zh_TW::~BreakIterator_zh_TW()
149 {
150 delete dict;
151 }
152
153 // ----------------------------------------------------
154 // class BreakIterator_ja
155 // ----------------------------------------------------;
BreakIterator_ja()156 BreakIterator_ja::BreakIterator_ja()
157 {
158 dict = new xdictionary("ja");
159 dict->setJapaneseWordBreak();
160 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ja", "JP"));
161 cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
162 }
163
~BreakIterator_ja()164 BreakIterator_ja::~BreakIterator_ja()
165 {
166 delete dict;
167 }
168
169 // ----------------------------------------------------
170 // class BreakIterator_ko
171 // ----------------------------------------------------;
BreakIterator_ko()172 BreakIterator_ko::BreakIterator_ko()
173 {
174 hangingCharacters = LocaleData().getHangingCharacters(LOCALE("ko", "KR"));
175 cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
176 }
177
~BreakIterator_ko()178 BreakIterator_ko::~BreakIterator_ko()
179 {
180 }
181
182 } } } }
183