1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 #include <assert.h>
28 #include <textconversion.hxx>
29 #include <com/sun/star/i18n/TextConversionType.hpp>
30 #include <com/sun/star/i18n/TextConversionOption.hpp>
31 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
32 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
33 #include <i18nutil/x_rtl_ustring.h>
34
35 using namespace com::sun::star::lang;
36 using namespace com::sun::star::i18n;
37 using namespace com::sun::star::linguistic2;
38 using namespace com::sun::star::uno;
39 using namespace rtl;
40
41 namespace com { namespace sun { namespace star { namespace i18n {
42
TextConversion_zh(const Reference<XMultiServiceFactory> & xMSF)43 TextConversion_zh::TextConversion_zh( const Reference < XMultiServiceFactory >& xMSF )
44 {
45 Reference < XInterface > xI;
46 xI = xMSF->createInstance(
47 OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" ));
48 if ( xI.is() )
49 xI->queryInterface( getCppuType((const Reference< XConversionDictionaryList>*)0) ) >>= xCDL;
50
51 implementationName = "com.sun.star.i18n.TextConversion_zh";
52 }
53
getOneCharConversion(sal_Unicode ch,const sal_Unicode * Data,const sal_uInt16 * Index)54 sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
55 {
56 if (Data && Index) {
57 sal_Unicode address = Index[ch>>8];
58 if (address != 0xFFFF)
59 address = Data[address + (ch & 0xFF)];
60 return (address != 0xFFFF) ? address : ch;
61 } else {
62 return ch;
63 }
64 }
65
66 OUString SAL_CALL
getCharConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,sal_Bool toSChinese,sal_Int32 nConversionOptions)67 TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions)
68 {
69 const sal_Unicode *Data;
70 const sal_uInt16 *Index;
71
72 if (toSChinese) {
73 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))();
74 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))();
75 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
76 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))();
77 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))();
78 } else {
79 Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))();
80 Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))();
81 }
82
83 rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength ); // defined in x_rtl_ustring.h
84 for (sal_Int32 i = 0; i < nLength; i++)
85 newStr->buffer[i] =
86 getOneCharConversion(aText[nStartPos+i], Data, Index);
87 return OUString( newStr, SAL_NO_ACQUIRE ); // take over ownership of <newStr>
88 }
89
90 OUString SAL_CALL
getWordConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,sal_Bool toSChinese,sal_Int32 nConversionOptions,Sequence<sal_Int32> & offset)91 TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
92 {
93 sal_Int32 dictLen = 0;
94 sal_Int32 maxLen = 0;
95 const sal_uInt16 *index;
96 const sal_uInt16 *entry;
97 const sal_Unicode *charData;
98 const sal_uInt16 *charIndex;
99 sal_Bool one2one=sal_True;
100
101 const sal_Unicode *wordData = ((const sal_Unicode* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordData"))(dictLen);
102 if (toSChinese) {
103 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
104 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))();
105 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))();
106 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))();
107 } else {
108 index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
109 entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))();
110 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
111 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))();
112 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))();
113 } else {
114 charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))();
115 charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))();
116 }
117 }
118
119 if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
120 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
121
122 rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength * 2 ); // defined in x_rtl_ustring.h
123 sal_Int32 currPos = 0, count = 0;
124 while (currPos < nLength) {
125 sal_Int32 len = nLength - currPos;
126 sal_Bool found = sal_False;
127 if (len > maxLen)
128 len = maxLen;
129 for (; len > 0 && ! found; len--) {
130 OUString word = aText.copy(nStartPos + currPos, len);
131 sal_Int32 current = 0;
132 // user dictionary
133 if (xCDL.is()) {
134 Sequence < OUString > conversions;
135 try {
136 conversions = xCDL->queryConversions(word, 0, len,
137 aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
138 /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
139 nConversionOptions);
140 }
141 catch ( NoSupportException & ) {
142 // clear reference (when there is no user dictionary) in order
143 // to not always have to catch this exception again
144 // in further calls. (save time)
145 xCDL = 0;
146 }
147 catch (...) {
148 // catch all other exceptions to allow
149 // querying the system dictionary in the next line
150 }
151 if (conversions.getLength() > 0) {
152 if (offset.getLength() > 0) {
153 if (word.getLength() != conversions[0].getLength())
154 one2one=sal_False;
155 while (current < conversions[0].getLength()) {
156 offset[count] = nStartPos + currPos + (current *
157 word.getLength() / conversions[0].getLength());
158 newStr->buffer[count++] = conversions[0][current++];
159 }
160 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
161 } else {
162 while (current < conversions[0].getLength())
163 newStr->buffer[count++] = conversions[0][current++];
164 }
165 currPos += word.getLength();
166 found = sal_True;
167 }
168 }
169
170 if (!found && index[len+1] - index[len] > 0) {
171 sal_Int32 bottom = (sal_Int32) index[len];
172 sal_Int32 top = (sal_Int32) index[len+1] - 1;
173
174 while (bottom <= top && !found) {
175 current = (top + bottom) / 2;
176 const sal_Int32 result = word.compareTo(wordData + entry[current]);
177 if (result < 0)
178 top = current - 1;
179 else if (result > 0)
180 bottom = current + 1;
181 else {
182 if (toSChinese) // Traditionary/Simplified conversion,
183 for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
184 else // Simplified/Traditionary conversion, forwards search for next word
185 current = entry[current] + word.getLength() + 1;
186 sal_Int32 start=current;
187 if (offset.getLength() > 0) {
188 if (word.getLength() != OUString(&wordData[current]).getLength())
189 one2one=sal_False;
190 sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
191 while (wordData[current]) {
192 offset[count]=nStartPos + currPos + ((current-start) *
193 word.getLength() / convertedLength);
194 newStr->buffer[count++] = wordData[current++];
195 }
196 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
197 } else {
198 while (wordData[current])
199 newStr->buffer[count++] = wordData[current++];
200 }
201 currPos += word.getLength();
202 found = sal_True;
203 }
204 }
205 }
206 }
207 if (!found) {
208 if (offset.getLength() > 0)
209 offset[count]=nStartPos+currPos;
210 newStr->buffer[count++] =
211 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
212 currPos++;
213 }
214 }
215 if (offset.getLength() > 0)
216 offset.realloc(one2one ? 0 : count);
217 OUString resultStr( newStr->buffer, count );
218 x_rtl_uString_release( newStr );
219 return resultStr;
220 }
221
222 TextConversionResult SAL_CALL
getConversions(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions)223 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
224 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
225 throw( RuntimeException, IllegalArgumentException, NoSupportException )
226 {
227 TextConversionResult result;
228
229 result.Candidates.realloc(1);
230 result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
231 result.Boundary.startPos = nStartPos;
232 result.Boundary.endPos = nStartPos + nLength;
233
234 return result;
235 }
236
237 OUString SAL_CALL
getConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions)238 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
239 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
240 throw( RuntimeException, IllegalArgumentException, NoSupportException )
241 {
242 if (rLocale.Language.equalsAscii("zh") &&
243 ( nConversionType == TextConversionType::TO_SCHINESE ||
244 nConversionType == TextConversionType::TO_TCHINESE) ) {
245
246 aLocale=rLocale;
247 sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
248
249 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
250 // char to char dictionary
251 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
252 else {
253 Sequence <sal_Int32> offset;
254 // word to word dictionary
255 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
256 }
257 } else
258 throw NoSupportException(); // Conversion type is not supported in this service.
259 }
260
261 OUString SAL_CALL
getConversionWithOffset(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions,Sequence<sal_Int32> & offset)262 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
263 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
264 throw( RuntimeException, IllegalArgumentException, NoSupportException )
265 {
266 if (rLocale.Language.equalsAscii("zh") &&
267 ( nConversionType == TextConversionType::TO_SCHINESE ||
268 nConversionType == TextConversionType::TO_TCHINESE) ) {
269
270 aLocale=rLocale;
271 sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
272
273 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
274 offset.realloc(0);
275 // char to char dictionary
276 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
277 } else {
278 if (offset.getLength() < 2*nLength)
279 offset.realloc(2*nLength);
280 // word to word dictionary
281 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
282 }
283 } else
284 throw NoSupportException(); // Conversion type is not supported in this service.
285 }
286
287 sal_Bool SAL_CALL
interactiveConversion(const Locale &,sal_Int16,sal_Int32)288 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
289 throw( RuntimeException, IllegalArgumentException, NoSupportException )
290 {
291 return sal_False;
292 }
293
294 } } } }
295