1449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5449ab281SAndrew Rist  * distributed with this work for additional information
6449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10449ab281SAndrew Rist  *
11449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12449ab281SAndrew Rist  *
13449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14449ab281SAndrew Rist  * software distributed under the License is distributed on an
15449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17449ab281SAndrew Rist  * specific language governing permissions and limitations
18449ab281SAndrew Rist  * under the License.
19449ab281SAndrew Rist  *
20449ab281SAndrew Rist  *************************************************************/
21449ab281SAndrew Rist 
22449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <cclass_unicode.hxx>
28cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeScript.hpp>
29cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
30cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
31cdf0e10cSrcweir #include <unicode/uchar.h>
32cdf0e10cSrcweir #include <i18nutil/x_rtl_ustring.h>
33cdf0e10cSrcweir #include <breakiteratorImpl.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir using namespace ::com::sun::star::uno;
36cdf0e10cSrcweir using namespace ::com::sun::star::lang;
37cdf0e10cSrcweir using namespace ::rtl;
38cdf0e10cSrcweir 
39cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
40cdf0e10cSrcweir //	----------------------------------------------------
41cdf0e10cSrcweir //	class cclass_Unicode
42cdf0e10cSrcweir //	----------------------------------------------------;
43cdf0e10cSrcweir 
cclass_Unicode(uno::Reference<XMultiServiceFactory> xSMgr)44cdf0e10cSrcweir cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
45cdf0e10cSrcweir 		pTable( NULL ),
46cdf0e10cSrcweir 		pStart( NULL ),
47cdf0e10cSrcweir 		pCont( NULL ),
48cdf0e10cSrcweir 		nStartTypes( 0 ),
49cdf0e10cSrcweir 		nContTypes( 0 ),
50cdf0e10cSrcweir 		eState( ssGetChar ),
51cdf0e10cSrcweir 		cGroupSep( ',' ),
52cdf0e10cSrcweir 		cDecimalSep( '.' )
53cdf0e10cSrcweir {
54cdf0e10cSrcweir 	trans = new Transliteration_casemapping();
55cdf0e10cSrcweir 	cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
56cdf0e10cSrcweir }
57cdf0e10cSrcweir 
~cclass_Unicode()58cdf0e10cSrcweir cclass_Unicode::~cclass_Unicode() {
59cdf0e10cSrcweir 	destroyParserTable();
60cdf0e10cSrcweir 	delete trans;
61cdf0e10cSrcweir }
62cdf0e10cSrcweir 
63cdf0e10cSrcweir 
64cdf0e10cSrcweir OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)65cdf0e10cSrcweir cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
66cdf0e10cSrcweir     sal_Int32 len = Text.getLength();
67cdf0e10cSrcweir     if (nPos >= len)
68cdf0e10cSrcweir         return OUString();
69cdf0e10cSrcweir     if (nCount + nPos > len)
70cdf0e10cSrcweir         nCount = len - nPos;
71cdf0e10cSrcweir 
72cdf0e10cSrcweir     trans->setMappingType(MappingTypeToUpper, rLocale);
73cdf0e10cSrcweir     return trans->transliterateString2String(Text, nPos, nCount);
74cdf0e10cSrcweir }
75cdf0e10cSrcweir 
76cdf0e10cSrcweir OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)77cdf0e10cSrcweir cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
78cdf0e10cSrcweir     sal_Int32 len = Text.getLength();
79cdf0e10cSrcweir     if (nPos >= len)
80cdf0e10cSrcweir         return OUString();
81cdf0e10cSrcweir     if (nCount + nPos > len)
82cdf0e10cSrcweir         nCount = len - nPos;
83cdf0e10cSrcweir 
84cdf0e10cSrcweir     trans->setMappingType(MappingTypeToLower, rLocale);
85cdf0e10cSrcweir     return trans->transliterateString2String(Text, nPos, nCount);
86cdf0e10cSrcweir }
87cdf0e10cSrcweir 
88cdf0e10cSrcweir OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)89cdf0e10cSrcweir cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
90cdf0e10cSrcweir     sal_Int32 len = Text.getLength();
91cdf0e10cSrcweir     if (nPos >= len)
92cdf0e10cSrcweir         return OUString();
93cdf0e10cSrcweir     if (nCount + nPos > len)
94cdf0e10cSrcweir         nCount = len - nPos;
95cdf0e10cSrcweir 
96cdf0e10cSrcweir     trans->setMappingType(MappingTypeToTitle, rLocale);
97*4674bdb9SOliver-Rainer Wittmann     rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
98cdf0e10cSrcweir     sal_Unicode* out = pStr->buffer;
99cdf0e10cSrcweir     BreakIteratorImpl brk(xMSF);
100cdf0e10cSrcweir     Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
101cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
102cdf0e10cSrcweir     for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103cdf0e10cSrcweir         if (i >= bdy.endPos)
104cdf0e10cSrcweir             bdy = brk.nextWord(Text, bdy.endPos, rLocale,
105cdf0e10cSrcweir                         WordType::ANYWORD_IGNOREWHITESPACES);
106cdf0e10cSrcweir         *out = (i == bdy.startPos) ?
107cdf0e10cSrcweir             trans->transliterateChar2Char(Text[i]) : Text[i];
108cdf0e10cSrcweir     }
109cdf0e10cSrcweir     *out = 0;
110cdf0e10cSrcweir     return OUString( pStr, SAL_NO_ACQUIRE );
111cdf0e10cSrcweir }
112cdf0e10cSrcweir 
113cdf0e10cSrcweir sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)114cdf0e10cSrcweir cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115cdf0e10cSrcweir     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116cdf0e10cSrcweir     return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
117cdf0e10cSrcweir }
118cdf0e10cSrcweir 
119cdf0e10cSrcweir sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)120cdf0e10cSrcweir cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
121cdf0e10cSrcweir     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
122cdf0e10cSrcweir     return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
123cdf0e10cSrcweir }
124cdf0e10cSrcweir 
125cdf0e10cSrcweir 
126cdf0e10cSrcweir sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)127cdf0e10cSrcweir cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
128cdf0e10cSrcweir     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129cdf0e10cSrcweir     // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
130cdf0e10cSrcweir     // while OO.o enum UnicideScript starts from 0.
131cdf0e10cSrcweir     // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
132cdf0e10cSrcweir     return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
133cdf0e10cSrcweir }
134cdf0e10cSrcweir 
135cdf0e10cSrcweir 
136cdf0e10cSrcweir sal_Int32 SAL_CALL
getCharType(const OUString & Text,sal_Int32 * nPos,sal_Int32 increment)137cdf0e10cSrcweir cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
138cdf0e10cSrcweir     using namespace ::com::sun::star::i18n::KCharacterType;
139cdf0e10cSrcweir 
140cdf0e10cSrcweir 	sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
141cdf0e10cSrcweir 	if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
142cdf0e10cSrcweir     switch ( u_charType(ch) ) {
143cdf0e10cSrcweir     // Upper
144cdf0e10cSrcweir     case U_UPPERCASE_LETTER :
145cdf0e10cSrcweir         return UPPER|LETTER|PRINTABLE|BASE_FORM;
146cdf0e10cSrcweir 
147cdf0e10cSrcweir     // Lower
148cdf0e10cSrcweir     case U_LOWERCASE_LETTER :
149cdf0e10cSrcweir         return LOWER|LETTER|PRINTABLE|BASE_FORM;
150cdf0e10cSrcweir 
151cdf0e10cSrcweir     // Title
152cdf0e10cSrcweir     case U_TITLECASE_LETTER :
153cdf0e10cSrcweir         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
154cdf0e10cSrcweir 
155cdf0e10cSrcweir     // Letter
156cdf0e10cSrcweir     case U_MODIFIER_LETTER :
157cdf0e10cSrcweir     case U_OTHER_LETTER :
158cdf0e10cSrcweir         return LETTER|PRINTABLE|BASE_FORM;
159cdf0e10cSrcweir 
160cdf0e10cSrcweir     // Digit
161cdf0e10cSrcweir     case U_DECIMAL_DIGIT_NUMBER:
162cdf0e10cSrcweir     case U_LETTER_NUMBER:
163cdf0e10cSrcweir     case U_OTHER_NUMBER:
164cdf0e10cSrcweir         return DIGIT|PRINTABLE|BASE_FORM;
165cdf0e10cSrcweir 
166cdf0e10cSrcweir     // Base
167cdf0e10cSrcweir     case U_NON_SPACING_MARK:
168cdf0e10cSrcweir     case U_ENCLOSING_MARK:
169cdf0e10cSrcweir     case U_COMBINING_SPACING_MARK:
170cdf0e10cSrcweir         return BASE_FORM|PRINTABLE;
171cdf0e10cSrcweir 
172cdf0e10cSrcweir     // Print
173cdf0e10cSrcweir     case U_SPACE_SEPARATOR:
174cdf0e10cSrcweir 
175cdf0e10cSrcweir     case U_DASH_PUNCTUATION:
176cdf0e10cSrcweir     case U_INITIAL_PUNCTUATION:
177cdf0e10cSrcweir     case U_FINAL_PUNCTUATION:
178cdf0e10cSrcweir     case U_CONNECTOR_PUNCTUATION:
179cdf0e10cSrcweir     case U_OTHER_PUNCTUATION:
180cdf0e10cSrcweir 
181cdf0e10cSrcweir     case U_MATH_SYMBOL:
182cdf0e10cSrcweir     case U_CURRENCY_SYMBOL:
183cdf0e10cSrcweir     case U_MODIFIER_SYMBOL:
184cdf0e10cSrcweir     case U_OTHER_SYMBOL:
185cdf0e10cSrcweir         return PRINTABLE;
186cdf0e10cSrcweir 
187cdf0e10cSrcweir     // Control
188cdf0e10cSrcweir     case U_CONTROL_CHAR:
189cdf0e10cSrcweir     case U_FORMAT_CHAR:
190cdf0e10cSrcweir         return CONTROL;
191cdf0e10cSrcweir 
192cdf0e10cSrcweir     case U_LINE_SEPARATOR:
193cdf0e10cSrcweir     case U_PARAGRAPH_SEPARATOR:
194cdf0e10cSrcweir         return CONTROL|PRINTABLE;
195cdf0e10cSrcweir 
196cdf0e10cSrcweir     // for all others
197cdf0e10cSrcweir     default:
198cdf0e10cSrcweir         return U_GENERAL_OTHER_TYPES;
199cdf0e10cSrcweir     }
200cdf0e10cSrcweir }
201cdf0e10cSrcweir 
202cdf0e10cSrcweir sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)203cdf0e10cSrcweir cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
204cdf0e10cSrcweir     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
205cdf0e10cSrcweir     return getCharType(Text, &nPos, 0);
206cdf0e10cSrcweir 
207cdf0e10cSrcweir }
208cdf0e10cSrcweir 
209cdf0e10cSrcweir sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)210cdf0e10cSrcweir cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
211cdf0e10cSrcweir     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212cdf0e10cSrcweir 
213cdf0e10cSrcweir     sal_Int32 result = getCharType(Text, &nPos, 0);
214cdf0e10cSrcweir     for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
215cdf0e10cSrcweir         result |= getCharType(Text, &nPos, 1);
216cdf0e10cSrcweir     return result;
217cdf0e10cSrcweir }
218cdf0e10cSrcweir 
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)219cdf0e10cSrcweir ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
220cdf0e10cSrcweir 			const OUString& Text,
221cdf0e10cSrcweir 			sal_Int32 nPos,
222cdf0e10cSrcweir 			const Locale& rLocale,
223cdf0e10cSrcweir 			sal_Int32 startCharTokenType,
224cdf0e10cSrcweir 			const OUString& userDefinedCharactersStart,
225cdf0e10cSrcweir 			sal_Int32 contCharTokenType,
226cdf0e10cSrcweir 			const OUString& userDefinedCharactersCont )
227cdf0e10cSrcweir 				throw(RuntimeException)
228cdf0e10cSrcweir {
229cdf0e10cSrcweir 	ParseResult r;
230cdf0e10cSrcweir 	if ( Text.getLength() <= nPos )
231cdf0e10cSrcweir 		return r;
232cdf0e10cSrcweir 
233cdf0e10cSrcweir 	setupParserTable( rLocale,
234cdf0e10cSrcweir 		startCharTokenType, userDefinedCharactersStart,
235cdf0e10cSrcweir 		contCharTokenType, userDefinedCharactersCont );
236cdf0e10cSrcweir 	parseText( r, Text, nPos );
237cdf0e10cSrcweir 
238cdf0e10cSrcweir 	return r;
239cdf0e10cSrcweir }
240cdf0e10cSrcweir 
241cdf0e10cSrcweir 
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)242cdf0e10cSrcweir ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
243cdf0e10cSrcweir 			sal_Int32 nTokenType,
244cdf0e10cSrcweir 			const OUString& Text,
245cdf0e10cSrcweir 			sal_Int32 nPos,
246cdf0e10cSrcweir 			const Locale& rLocale,
247cdf0e10cSrcweir 			sal_Int32 startCharTokenType,
248cdf0e10cSrcweir 			const OUString& userDefinedCharactersStart,
249cdf0e10cSrcweir 			sal_Int32 contCharTokenType,
250cdf0e10cSrcweir 			const OUString& userDefinedCharactersCont )
251cdf0e10cSrcweir 				throw(RuntimeException)
252cdf0e10cSrcweir {
253cdf0e10cSrcweir 	ParseResult r;
254cdf0e10cSrcweir 	if ( Text.getLength() <= nPos )
255cdf0e10cSrcweir 		return r;
256cdf0e10cSrcweir 
257cdf0e10cSrcweir 	setupParserTable( rLocale,
258cdf0e10cSrcweir 		startCharTokenType, userDefinedCharactersStart,
259cdf0e10cSrcweir 		contCharTokenType, userDefinedCharactersCont );
260cdf0e10cSrcweir 	parseText( r, Text, nPos, nTokenType );
261cdf0e10cSrcweir 
262cdf0e10cSrcweir 	return r;
263cdf0e10cSrcweir }
264cdf0e10cSrcweir 
getImplementationName()265cdf0e10cSrcweir OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
266cdf0e10cSrcweir {
267cdf0e10cSrcweir     return OUString::createFromAscii(cClass);
268cdf0e10cSrcweir }
269cdf0e10cSrcweir 
270cdf0e10cSrcweir 
supportsService(const OUString & rServiceName)271cdf0e10cSrcweir sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
272cdf0e10cSrcweir {
273cdf0e10cSrcweir     return !rServiceName.compareToAscii(cClass);
274cdf0e10cSrcweir }
275cdf0e10cSrcweir 
getSupportedServiceNames()276cdf0e10cSrcweir Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
277cdf0e10cSrcweir {
278cdf0e10cSrcweir     Sequence< OUString > aRet(1);
279cdf0e10cSrcweir     aRet[0] = OUString::createFromAscii(cClass);
280cdf0e10cSrcweir     return aRet;
281cdf0e10cSrcweir }
282cdf0e10cSrcweir 
283cdf0e10cSrcweir } } } }
284cdf0e10cSrcweir 
285