1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 #include <cclass_unicode.hxx>
28 #include <com/sun/star/i18n/UnicodeScript.hpp>
29 #include <com/sun/star/i18n/UnicodeType.hpp>
30 #include <com/sun/star/i18n/KCharacterType.hpp>
31 #include <unicode/uchar.h>
32 #include <i18nutil/x_rtl_ustring.h>
33 #include <breakiteratorImpl.hxx>
34
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
37 using namespace ::rtl;
38
39 namespace com { namespace sun { namespace star { namespace i18n {
40 // ----------------------------------------------------
41 // class cclass_Unicode
42 // ----------------------------------------------------;
43
cclass_Unicode(uno::Reference<XMultiServiceFactory> xSMgr)44 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
45 pTable( NULL ),
46 pStart( NULL ),
47 pCont( NULL ),
48 nStartTypes( 0 ),
49 nContTypes( 0 ),
50 eState( ssGetChar ),
51 cGroupSep( ',' ),
52 cDecimalSep( '.' )
53 {
54 trans = new Transliteration_casemapping();
55 cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
56 }
57
~cclass_Unicode()58 cclass_Unicode::~cclass_Unicode() {
59 destroyParserTable();
60 delete trans;
61 }
62
63
64 OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)65 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
66 sal_Int32 len = Text.getLength();
67 if (nPos >= len)
68 return OUString();
69 if (nCount + nPos > len)
70 nCount = len - nPos;
71
72 trans->setMappingType(MappingTypeToUpper, rLocale);
73 return trans->transliterateString2String(Text, nPos, nCount);
74 }
75
76 OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)77 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
78 sal_Int32 len = Text.getLength();
79 if (nPos >= len)
80 return OUString();
81 if (nCount + nPos > len)
82 nCount = len - nPos;
83
84 trans->setMappingType(MappingTypeToLower, rLocale);
85 return trans->transliterateString2String(Text, nPos, nCount);
86 }
87
88 OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)89 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
90 sal_Int32 len = Text.getLength();
91 if (nPos >= len)
92 return OUString();
93 if (nCount + nPos > len)
94 nCount = len - nPos;
95
96 trans->setMappingType(MappingTypeToTitle, rLocale);
97 rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
98 sal_Unicode* out = pStr->buffer;
99 BreakIteratorImpl brk(xMSF);
100 Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
101 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
102 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103 if (i >= bdy.endPos)
104 bdy = brk.nextWord(Text, bdy.endPos, rLocale,
105 WordType::ANYWORD_IGNOREWHITESPACES);
106 *out = (i == bdy.startPos) ?
107 trans->transliterateChar2Char(Text[i]) : Text[i];
108 }
109 *out = 0;
110 return OUString( pStr, SAL_NO_ACQUIRE );
111 }
112
113 sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)114 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116 return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
117 }
118
119 sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)120 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
121 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
122 return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
123 }
124
125
126 sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)127 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
128 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
130 // while OO.o enum UnicideScript starts from 0.
131 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
132 return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
133 }
134
135
136 sal_Int32 SAL_CALL
getCharType(const OUString & Text,sal_Int32 * nPos,sal_Int32 increment)137 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
138 using namespace ::com::sun::star::i18n::KCharacterType;
139
140 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
141 if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
142 switch ( u_charType(ch) ) {
143 // Upper
144 case U_UPPERCASE_LETTER :
145 return UPPER|LETTER|PRINTABLE|BASE_FORM;
146
147 // Lower
148 case U_LOWERCASE_LETTER :
149 return LOWER|LETTER|PRINTABLE|BASE_FORM;
150
151 // Title
152 case U_TITLECASE_LETTER :
153 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
154
155 // Letter
156 case U_MODIFIER_LETTER :
157 case U_OTHER_LETTER :
158 return LETTER|PRINTABLE|BASE_FORM;
159
160 // Digit
161 case U_DECIMAL_DIGIT_NUMBER:
162 case U_LETTER_NUMBER:
163 case U_OTHER_NUMBER:
164 return DIGIT|PRINTABLE|BASE_FORM;
165
166 // Base
167 case U_NON_SPACING_MARK:
168 case U_ENCLOSING_MARK:
169 case U_COMBINING_SPACING_MARK:
170 return BASE_FORM|PRINTABLE;
171
172 // Print
173 case U_SPACE_SEPARATOR:
174
175 case U_DASH_PUNCTUATION:
176 case U_INITIAL_PUNCTUATION:
177 case U_FINAL_PUNCTUATION:
178 case U_CONNECTOR_PUNCTUATION:
179 case U_OTHER_PUNCTUATION:
180
181 case U_MATH_SYMBOL:
182 case U_CURRENCY_SYMBOL:
183 case U_MODIFIER_SYMBOL:
184 case U_OTHER_SYMBOL:
185 return PRINTABLE;
186
187 // Control
188 case U_CONTROL_CHAR:
189 case U_FORMAT_CHAR:
190 return CONTROL;
191
192 case U_LINE_SEPARATOR:
193 case U_PARAGRAPH_SEPARATOR:
194 return CONTROL|PRINTABLE;
195
196 // for all others
197 default:
198 return U_GENERAL_OTHER_TYPES;
199 }
200 }
201
202 sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)203 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
204 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
205 return getCharType(Text, &nPos, 0);
206
207 }
208
209 sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)210 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
211 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212
213 sal_Int32 result = getCharType(Text, &nPos, 0);
214 for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
215 result |= getCharType(Text, &nPos, 1);
216 return result;
217 }
218
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)219 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
220 const OUString& Text,
221 sal_Int32 nPos,
222 const Locale& rLocale,
223 sal_Int32 startCharTokenType,
224 const OUString& userDefinedCharactersStart,
225 sal_Int32 contCharTokenType,
226 const OUString& userDefinedCharactersCont )
227 throw(RuntimeException)
228 {
229 ParseResult r;
230 if ( Text.getLength() <= nPos )
231 return r;
232
233 setupParserTable( rLocale,
234 startCharTokenType, userDefinedCharactersStart,
235 contCharTokenType, userDefinedCharactersCont );
236 parseText( r, Text, nPos );
237
238 return r;
239 }
240
241
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)242 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
243 sal_Int32 nTokenType,
244 const OUString& Text,
245 sal_Int32 nPos,
246 const Locale& rLocale,
247 sal_Int32 startCharTokenType,
248 const OUString& userDefinedCharactersStart,
249 sal_Int32 contCharTokenType,
250 const OUString& userDefinedCharactersCont )
251 throw(RuntimeException)
252 {
253 ParseResult r;
254 if ( Text.getLength() <= nPos )
255 return r;
256
257 setupParserTable( rLocale,
258 startCharTokenType, userDefinedCharactersStart,
259 contCharTokenType, userDefinedCharactersCont );
260 parseText( r, Text, nPos, nTokenType );
261
262 return r;
263 }
264
getImplementationName()265 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
266 {
267 return OUString::createFromAscii(cClass);
268 }
269
270
supportsService(const OUString & rServiceName)271 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
272 {
273 return !rServiceName.compareToAscii(cClass);
274 }
275
getSupportedServiceNames()276 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
277 {
278 Sequence< OUString > aRet(1);
279 aRet[0] = OUString::createFromAscii(cClass);
280 return aRet;
281 }
282
283 } } } }
284
285