1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include <assert.h>
32 #include <textconversion.hxx>
33 #include <com/sun/star/i18n/TextConversionType.hpp>
34 #include <com/sun/star/i18n/TextConversionOption.hpp>
35 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
36 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
37 #include <i18nutil/x_rtl_ustring.h>
38 
39 using namespace com::sun::star::lang;
40 using namespace com::sun::star::i18n;
41 using namespace com::sun::star::linguistic2;
42 using namespace com::sun::star::uno;
43 using namespace rtl;
44 
45 namespace com { namespace sun { namespace star { namespace i18n {
46 
47 TextConversion_zh::TextConversion_zh( const Reference < XMultiServiceFactory >& xMSF )
48 {
49     Reference < XInterface > xI;
50     xI = xMSF->createInstance(
51         OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" ));
52     if ( xI.is() )
53         xI->queryInterface( getCppuType((const Reference< XConversionDictionaryList>*)0) ) >>= xCDL;
54 
55     implementationName = "com.sun.star.i18n.TextConversion_zh";
56 }
57 
58 sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
59 {
60     if (Data && Index) {
61         sal_Unicode address = Index[ch>>8];
62         if (address != 0xFFFF)
63             address = Data[address + (ch & 0xFF)];
64         return (address != 0xFFFF) ? address : ch;
65     } else {
66         return ch;
67     }
68 }
69 
70 OUString SAL_CALL
71 TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions)
72 {
73     const sal_Unicode *Data;
74     const sal_uInt16 *Index;
75 
76     if (toSChinese) {
77         Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))();
78         Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))();
79     } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
80         Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))();
81         Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))();
82     } else {
83         Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))();
84         Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))();
85     }
86 
87     rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength ); // defined in x_rtl_ustring.h
88     for (sal_Int32 i = 0; i < nLength; i++)
89         newStr->buffer[i] =
90             getOneCharConversion(aText[nStartPos+i], Data, Index);
91     return OUString( newStr->buffer, nLength);
92 }
93 
94 OUString SAL_CALL
95 TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
96 {
97     sal_Int32 dictLen = 0;
98     sal_Int32 maxLen = 0;
99     const sal_uInt16 *index;
100     const sal_uInt16 *entry;
101     const sal_Unicode *charData;
102     const sal_uInt16 *charIndex;
103     sal_Bool one2one=sal_True;
104 
105     const sal_Unicode *wordData = ((const sal_Unicode* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordData"))(dictLen);
106     if (toSChinese) {
107         index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
108         entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))();
109         charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))();
110         charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))();
111     } else {
112         index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
113         entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))();
114         if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
115             charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))();
116             charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))();
117         } else {
118             charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))();
119             charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))();
120         }
121     }
122 
123     if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
124         return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
125 
126     rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength * 2 ); // defined in x_rtl_ustring.h
127     sal_Int32 currPos = 0, count = 0;
128     while (currPos < nLength) {
129         sal_Int32 len = nLength - currPos;
130         sal_Bool found = sal_False;
131         if (len > maxLen)
132             len = maxLen;
133         for (; len > 0 && ! found; len--) {
134             OUString word = aText.copy(nStartPos + currPos, len);
135             sal_Int32 current = 0;
136             // user dictionary
137             if (xCDL.is()) {
138                 Sequence < OUString > conversions;
139                 try {
140                     conversions = xCDL->queryConversions(word, 0, len,
141                             aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
142                             /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
143                             nConversionOptions);
144                 }
145                 catch ( NoSupportException & ) {
146                     // clear reference (when there is no user dictionary) in order
147                     // to not always have to catch this exception again
148                     // in further calls. (save time)
149                     xCDL = 0;
150                 }
151                 catch (...) {
152                     // catch all other exceptions to allow
153                     // querying the system dictionary in the next line
154                 }
155                 if (conversions.getLength() > 0) {
156                     if (offset.getLength() > 0) {
157                         if (word.getLength() != conversions[0].getLength())
158                             one2one=sal_False;
159                         while (current < conversions[0].getLength()) {
160                             offset[count] = nStartPos + currPos + (current *
161                                     word.getLength() / conversions[0].getLength());
162                             newStr->buffer[count++] = conversions[0][current++];
163                         }
164                         // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
165                     } else {
166                         while (current < conversions[0].getLength())
167                             newStr->buffer[count++] = conversions[0][current++];
168                     }
169                     currPos += word.getLength();
170                     found = sal_True;
171                 }
172             }
173 
174             if (!found && index[len+1] - index[len] > 0) {
175                 sal_Int32 bottom = (sal_Int32) index[len];
176                 sal_Int32 top = (sal_Int32) index[len+1] - 1;
177 
178                 while (bottom <= top && !found) {
179                     current = (top + bottom) / 2;
180                     const sal_Int32 result = word.compareTo(wordData + entry[current]);
181                     if (result < 0)
182                         top = current - 1;
183                     else if (result > 0)
184                         bottom = current + 1;
185                     else {
186                         if (toSChinese)   // Traditionary/Simplified conversion,
187                             for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
188                         else  // Simplified/Traditionary conversion, forwards search for next word
189                             current = entry[current] + word.getLength() + 1;
190                         sal_Int32 start=current;
191                         if (offset.getLength() > 0) {
192                             if (word.getLength() != OUString(&wordData[current]).getLength())
193                                 one2one=sal_False;
194                             sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
195                             while (wordData[current]) {
196                                 offset[count]=nStartPos + currPos + ((current-start) *
197                                     word.getLength() / convertedLength);
198                                 newStr->buffer[count++] = wordData[current++];
199                             }
200                             // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
201                         } else {
202                             while (wordData[current])
203                                 newStr->buffer[count++] = wordData[current++];
204                         }
205                         currPos += word.getLength();
206                         found = sal_True;
207                     }
208                 }
209             }
210         }
211         if (!found) {
212             if (offset.getLength() > 0)
213                 offset[count]=nStartPos+currPos;
214             newStr->buffer[count++] =
215                 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
216             currPos++;
217         }
218     }
219     if (offset.getLength() > 0)
220         offset.realloc(one2one ? 0 : count);
221     return OUString( newStr->buffer, count);
222 }
223 
224 TextConversionResult SAL_CALL
225 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
226     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
227     throw(  RuntimeException, IllegalArgumentException, NoSupportException )
228 {
229     TextConversionResult result;
230 
231     result.Candidates.realloc(1);
232     result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
233     result.Boundary.startPos = nStartPos;
234     result.Boundary.endPos = nStartPos + nLength;
235 
236     return result;
237 }
238 
239 OUString SAL_CALL
240 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
241     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
242     throw(  RuntimeException, IllegalArgumentException, NoSupportException )
243 {
244     if (rLocale.Language.equalsAscii("zh") &&
245             ( nConversionType == TextConversionType::TO_SCHINESE ||
246             nConversionType == TextConversionType::TO_TCHINESE) ) {
247 
248         aLocale=rLocale;
249         sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
250 
251         if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
252             // char to char dictionary
253             return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
254         else {
255             Sequence <sal_Int32> offset;
256             // word to word dictionary
257             return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
258         }
259     } else
260         throw NoSupportException(); // Conversion type is not supported in this service.
261 }
262 
263 OUString SAL_CALL
264 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
265     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
266     throw(  RuntimeException, IllegalArgumentException, NoSupportException )
267 {
268     if (rLocale.Language.equalsAscii("zh") &&
269             ( nConversionType == TextConversionType::TO_SCHINESE ||
270             nConversionType == TextConversionType::TO_TCHINESE) ) {
271 
272         aLocale=rLocale;
273         sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
274 
275         if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
276             offset.realloc(0);
277             // char to char dictionary
278             return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
279         } else {
280             if (offset.getLength() < 2*nLength)
281                 offset.realloc(2*nLength);
282             // word to word dictionary
283             return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
284         }
285     } else
286         throw NoSupportException(); // Conversion type is not supported in this service.
287 }
288 
289 sal_Bool SAL_CALL
290 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
291     throw(  RuntimeException, IllegalArgumentException, NoSupportException )
292 {
293     return sal_False;
294 }
295 
296 } } } }
297