1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
30 
31 #include <cclass_unicode.hxx>
32 #include <com/sun/star/i18n/UnicodeScript.hpp>
33 #include <com/sun/star/i18n/UnicodeType.hpp>
34 #include <com/sun/star/i18n/KCharacterType.hpp>
35 #include <unicode/uchar.h>
36 #include <i18nutil/x_rtl_ustring.h>
37 #include <breakiteratorImpl.hxx>
38 
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::lang;
41 using namespace ::rtl;
42 
43 namespace com { namespace sun { namespace star { namespace i18n {
44 //	----------------------------------------------------
45 //	class cclass_Unicode
46 //	----------------------------------------------------;
47 
48 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
49 		pTable( NULL ),
50 		pStart( NULL ),
51 		pCont( NULL ),
52 		nStartTypes( 0 ),
53 		nContTypes( 0 ),
54 		eState( ssGetChar ),
55 		cGroupSep( ',' ),
56 		cDecimalSep( '.' )
57 {
58 	trans = new Transliteration_casemapping();
59 	cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
60 }
61 
62 cclass_Unicode::~cclass_Unicode() {
63 	destroyParserTable();
64 	delete trans;
65 }
66 
67 
68 OUString SAL_CALL
69 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
70     sal_Int32 len = Text.getLength();
71     if (nPos >= len)
72         return OUString();
73     if (nCount + nPos > len)
74         nCount = len - nPos;
75 
76     trans->setMappingType(MappingTypeToUpper, rLocale);
77     return trans->transliterateString2String(Text, nPos, nCount);
78 }
79 
80 OUString SAL_CALL
81 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
82     sal_Int32 len = Text.getLength();
83     if (nPos >= len)
84         return OUString();
85     if (nCount + nPos > len)
86         nCount = len - nPos;
87 
88     trans->setMappingType(MappingTypeToLower, rLocale);
89     return trans->transliterateString2String(Text, nPos, nCount);
90 }
91 
92 OUString SAL_CALL
93 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
94     sal_Int32 len = Text.getLength();
95     if (nPos >= len)
96         return OUString();
97     if (nCount + nPos > len)
98         nCount = len - nPos;
99 
100     trans->setMappingType(MappingTypeToTitle, rLocale);
101     rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 );
102     sal_Unicode* out = pStr->buffer;
103     BreakIteratorImpl brk(xMSF);
104     Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
105                 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
106     for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
107         if (i >= bdy.endPos)
108             bdy = brk.nextWord(Text, bdy.endPos, rLocale,
109                         WordType::ANYWORD_IGNOREWHITESPACES);
110         *out = (i == bdy.startPos) ?
111             trans->transliterateChar2Char(Text[i]) : Text[i];
112     }
113     *out = 0;
114     return OUString( pStr, SAL_NO_ACQUIRE );
115 }
116 
117 sal_Int16 SAL_CALL
118 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
119     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
120     return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
121 }
122 
123 sal_Int16 SAL_CALL
124 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
125     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
126     return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
127 }
128 
129 
130 sal_Int16 SAL_CALL
131 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
132     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
133     // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
134     // while OO.o enum UnicideScript starts from 0.
135     // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
136     return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
137 }
138 
139 
140 sal_Int32 SAL_CALL
141 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
142     using namespace ::com::sun::star::i18n::KCharacterType;
143 
144 	sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
145 	if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
146     switch ( u_charType(ch) ) {
147     // Upper
148     case U_UPPERCASE_LETTER :
149         return UPPER|LETTER|PRINTABLE|BASE_FORM;
150 
151     // Lower
152     case U_LOWERCASE_LETTER :
153         return LOWER|LETTER|PRINTABLE|BASE_FORM;
154 
155     // Title
156     case U_TITLECASE_LETTER :
157         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
158 
159     // Letter
160     case U_MODIFIER_LETTER :
161     case U_OTHER_LETTER :
162         return LETTER|PRINTABLE|BASE_FORM;
163 
164     // Digit
165     case U_DECIMAL_DIGIT_NUMBER:
166     case U_LETTER_NUMBER:
167     case U_OTHER_NUMBER:
168         return DIGIT|PRINTABLE|BASE_FORM;
169 
170     // Base
171     case U_NON_SPACING_MARK:
172     case U_ENCLOSING_MARK:
173     case U_COMBINING_SPACING_MARK:
174         return BASE_FORM|PRINTABLE;
175 
176     // Print
177     case U_SPACE_SEPARATOR:
178 
179     case U_DASH_PUNCTUATION:
180     case U_INITIAL_PUNCTUATION:
181     case U_FINAL_PUNCTUATION:
182     case U_CONNECTOR_PUNCTUATION:
183     case U_OTHER_PUNCTUATION:
184 
185     case U_MATH_SYMBOL:
186     case U_CURRENCY_SYMBOL:
187     case U_MODIFIER_SYMBOL:
188     case U_OTHER_SYMBOL:
189         return PRINTABLE;
190 
191     // Control
192     case U_CONTROL_CHAR:
193     case U_FORMAT_CHAR:
194         return CONTROL;
195 
196     case U_LINE_SEPARATOR:
197     case U_PARAGRAPH_SEPARATOR:
198         return CONTROL|PRINTABLE;
199 
200     // for all others
201     default:
202         return U_GENERAL_OTHER_TYPES;
203     }
204 }
205 
206 sal_Int32 SAL_CALL
207 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
208     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
209     return getCharType(Text, &nPos, 0);
210 
211 }
212 
213 sal_Int32 SAL_CALL
214 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
215     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
216 
217     sal_Int32 result = getCharType(Text, &nPos, 0);
218     for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
219         result |= getCharType(Text, &nPos, 1);
220     return result;
221 }
222 
223 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
224 			const OUString& Text,
225 			sal_Int32 nPos,
226 			const Locale& rLocale,
227 			sal_Int32 startCharTokenType,
228 			const OUString& userDefinedCharactersStart,
229 			sal_Int32 contCharTokenType,
230 			const OUString& userDefinedCharactersCont )
231 				throw(RuntimeException)
232 {
233 	ParseResult r;
234 	if ( Text.getLength() <= nPos )
235 		return r;
236 
237 	setupParserTable( rLocale,
238 		startCharTokenType, userDefinedCharactersStart,
239 		contCharTokenType, userDefinedCharactersCont );
240 	parseText( r, Text, nPos );
241 
242 	return r;
243 }
244 
245 
246 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
247 			sal_Int32 nTokenType,
248 			const OUString& Text,
249 			sal_Int32 nPos,
250 			const Locale& rLocale,
251 			sal_Int32 startCharTokenType,
252 			const OUString& userDefinedCharactersStart,
253 			sal_Int32 contCharTokenType,
254 			const OUString& userDefinedCharactersCont )
255 				throw(RuntimeException)
256 {
257 	ParseResult r;
258 	if ( Text.getLength() <= nPos )
259 		return r;
260 
261 	setupParserTable( rLocale,
262 		startCharTokenType, userDefinedCharactersStart,
263 		contCharTokenType, userDefinedCharactersCont );
264 	parseText( r, Text, nPos, nTokenType );
265 
266 	return r;
267 }
268 
269 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
270 {
271     return OUString::createFromAscii(cClass);
272 }
273 
274 
275 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
276 {
277     return !rServiceName.compareToAscii(cClass);
278 }
279 
280 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
281 {
282     Sequence< OUString > aRet(1);
283     aRet[0] = OUString::createFromAscii(cClass);
284     return aRet;
285 }
286 
287 } } } }
288 
289