1*b1cdbd2cSJim Jagielski /**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski // MARKER(update_precomp.py): autogen include statement, do not remove
25*b1cdbd2cSJim Jagielski #include "precompiled_i18npool.hxx"
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski #include <cclass_unicode.hxx>
28*b1cdbd2cSJim Jagielski #include <com/sun/star/i18n/UnicodeScript.hpp>
29*b1cdbd2cSJim Jagielski #include <com/sun/star/i18n/UnicodeType.hpp>
30*b1cdbd2cSJim Jagielski #include <com/sun/star/i18n/KCharacterType.hpp>
31*b1cdbd2cSJim Jagielski #include <unicode/uchar.h>
32*b1cdbd2cSJim Jagielski #include <i18nutil/x_rtl_ustring.h>
33*b1cdbd2cSJim Jagielski #include <breakiteratorImpl.hxx>
34*b1cdbd2cSJim Jagielski
35*b1cdbd2cSJim Jagielski using namespace ::com::sun::star::uno;
36*b1cdbd2cSJim Jagielski using namespace ::com::sun::star::lang;
37*b1cdbd2cSJim Jagielski using namespace ::rtl;
38*b1cdbd2cSJim Jagielski
39*b1cdbd2cSJim Jagielski namespace com { namespace sun { namespace star { namespace i18n {
40*b1cdbd2cSJim Jagielski // ----------------------------------------------------
41*b1cdbd2cSJim Jagielski // class cclass_Unicode
42*b1cdbd2cSJim Jagielski // ----------------------------------------------------;
43*b1cdbd2cSJim Jagielski
cclass_Unicode(uno::Reference<XMultiServiceFactory> xSMgr)44*b1cdbd2cSJim Jagielski cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
45*b1cdbd2cSJim Jagielski pTable( NULL ),
46*b1cdbd2cSJim Jagielski pStart( NULL ),
47*b1cdbd2cSJim Jagielski pCont( NULL ),
48*b1cdbd2cSJim Jagielski nStartTypes( 0 ),
49*b1cdbd2cSJim Jagielski nContTypes( 0 ),
50*b1cdbd2cSJim Jagielski eState( ssGetChar ),
51*b1cdbd2cSJim Jagielski cGroupSep( ',' ),
52*b1cdbd2cSJim Jagielski cDecimalSep( '.' )
53*b1cdbd2cSJim Jagielski {
54*b1cdbd2cSJim Jagielski trans = new Transliteration_casemapping();
55*b1cdbd2cSJim Jagielski cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
56*b1cdbd2cSJim Jagielski }
57*b1cdbd2cSJim Jagielski
~cclass_Unicode()58*b1cdbd2cSJim Jagielski cclass_Unicode::~cclass_Unicode() {
59*b1cdbd2cSJim Jagielski destroyParserTable();
60*b1cdbd2cSJim Jagielski delete trans;
61*b1cdbd2cSJim Jagielski }
62*b1cdbd2cSJim Jagielski
63*b1cdbd2cSJim Jagielski
64*b1cdbd2cSJim Jagielski OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)65*b1cdbd2cSJim Jagielski cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
66*b1cdbd2cSJim Jagielski sal_Int32 len = Text.getLength();
67*b1cdbd2cSJim Jagielski if (nPos >= len)
68*b1cdbd2cSJim Jagielski return OUString();
69*b1cdbd2cSJim Jagielski if (nCount + nPos > len)
70*b1cdbd2cSJim Jagielski nCount = len - nPos;
71*b1cdbd2cSJim Jagielski
72*b1cdbd2cSJim Jagielski trans->setMappingType(MappingTypeToUpper, rLocale);
73*b1cdbd2cSJim Jagielski return trans->transliterateString2String(Text, nPos, nCount);
74*b1cdbd2cSJim Jagielski }
75*b1cdbd2cSJim Jagielski
76*b1cdbd2cSJim Jagielski OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)77*b1cdbd2cSJim Jagielski cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
78*b1cdbd2cSJim Jagielski sal_Int32 len = Text.getLength();
79*b1cdbd2cSJim Jagielski if (nPos >= len)
80*b1cdbd2cSJim Jagielski return OUString();
81*b1cdbd2cSJim Jagielski if (nCount + nPos > len)
82*b1cdbd2cSJim Jagielski nCount = len - nPos;
83*b1cdbd2cSJim Jagielski
84*b1cdbd2cSJim Jagielski trans->setMappingType(MappingTypeToLower, rLocale);
85*b1cdbd2cSJim Jagielski return trans->transliterateString2String(Text, nPos, nCount);
86*b1cdbd2cSJim Jagielski }
87*b1cdbd2cSJim Jagielski
88*b1cdbd2cSJim Jagielski OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)89*b1cdbd2cSJim Jagielski cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
90*b1cdbd2cSJim Jagielski sal_Int32 len = Text.getLength();
91*b1cdbd2cSJim Jagielski if (nPos >= len)
92*b1cdbd2cSJim Jagielski return OUString();
93*b1cdbd2cSJim Jagielski if (nCount + nPos > len)
94*b1cdbd2cSJim Jagielski nCount = len - nPos;
95*b1cdbd2cSJim Jagielski
96*b1cdbd2cSJim Jagielski trans->setMappingType(MappingTypeToTitle, rLocale);
97*b1cdbd2cSJim Jagielski rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
98*b1cdbd2cSJim Jagielski sal_Unicode* out = pStr->buffer;
99*b1cdbd2cSJim Jagielski BreakIteratorImpl brk(xMSF);
100*b1cdbd2cSJim Jagielski Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
101*b1cdbd2cSJim Jagielski WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
102*b1cdbd2cSJim Jagielski for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103*b1cdbd2cSJim Jagielski if (i >= bdy.endPos)
104*b1cdbd2cSJim Jagielski bdy = brk.nextWord(Text, bdy.endPos, rLocale,
105*b1cdbd2cSJim Jagielski WordType::ANYWORD_IGNOREWHITESPACES);
106*b1cdbd2cSJim Jagielski *out = (i == bdy.startPos) ?
107*b1cdbd2cSJim Jagielski trans->transliterateChar2Char(Text[i]) : Text[i];
108*b1cdbd2cSJim Jagielski }
109*b1cdbd2cSJim Jagielski *out = 0;
110*b1cdbd2cSJim Jagielski return OUString( pStr, SAL_NO_ACQUIRE );
111*b1cdbd2cSJim Jagielski }
112*b1cdbd2cSJim Jagielski
113*b1cdbd2cSJim Jagielski sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)114*b1cdbd2cSJim Jagielski cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115*b1cdbd2cSJim Jagielski if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116*b1cdbd2cSJim Jagielski return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
117*b1cdbd2cSJim Jagielski }
118*b1cdbd2cSJim Jagielski
119*b1cdbd2cSJim Jagielski sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)120*b1cdbd2cSJim Jagielski cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
121*b1cdbd2cSJim Jagielski if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
122*b1cdbd2cSJim Jagielski return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
123*b1cdbd2cSJim Jagielski }
124*b1cdbd2cSJim Jagielski
125*b1cdbd2cSJim Jagielski
126*b1cdbd2cSJim Jagielski sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)127*b1cdbd2cSJim Jagielski cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
128*b1cdbd2cSJim Jagielski if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129*b1cdbd2cSJim Jagielski // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
130*b1cdbd2cSJim Jagielski // while OO.o enum UnicideScript starts from 0.
131*b1cdbd2cSJim Jagielski // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
132*b1cdbd2cSJim Jagielski return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
133*b1cdbd2cSJim Jagielski }
134*b1cdbd2cSJim Jagielski
135*b1cdbd2cSJim Jagielski
136*b1cdbd2cSJim Jagielski sal_Int32 SAL_CALL
getCharType(const OUString & Text,sal_Int32 * nPos,sal_Int32 increment)137*b1cdbd2cSJim Jagielski cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
138*b1cdbd2cSJim Jagielski using namespace ::com::sun::star::i18n::KCharacterType;
139*b1cdbd2cSJim Jagielski
140*b1cdbd2cSJim Jagielski sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
141*b1cdbd2cSJim Jagielski if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
142*b1cdbd2cSJim Jagielski switch ( u_charType(ch) ) {
143*b1cdbd2cSJim Jagielski // Upper
144*b1cdbd2cSJim Jagielski case U_UPPERCASE_LETTER :
145*b1cdbd2cSJim Jagielski return UPPER|LETTER|PRINTABLE|BASE_FORM;
146*b1cdbd2cSJim Jagielski
147*b1cdbd2cSJim Jagielski // Lower
148*b1cdbd2cSJim Jagielski case U_LOWERCASE_LETTER :
149*b1cdbd2cSJim Jagielski return LOWER|LETTER|PRINTABLE|BASE_FORM;
150*b1cdbd2cSJim Jagielski
151*b1cdbd2cSJim Jagielski // Title
152*b1cdbd2cSJim Jagielski case U_TITLECASE_LETTER :
153*b1cdbd2cSJim Jagielski return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
154*b1cdbd2cSJim Jagielski
155*b1cdbd2cSJim Jagielski // Letter
156*b1cdbd2cSJim Jagielski case U_MODIFIER_LETTER :
157*b1cdbd2cSJim Jagielski case U_OTHER_LETTER :
158*b1cdbd2cSJim Jagielski return LETTER|PRINTABLE|BASE_FORM;
159*b1cdbd2cSJim Jagielski
160*b1cdbd2cSJim Jagielski // Digit
161*b1cdbd2cSJim Jagielski case U_DECIMAL_DIGIT_NUMBER:
162*b1cdbd2cSJim Jagielski case U_LETTER_NUMBER:
163*b1cdbd2cSJim Jagielski case U_OTHER_NUMBER:
164*b1cdbd2cSJim Jagielski return DIGIT|PRINTABLE|BASE_FORM;
165*b1cdbd2cSJim Jagielski
166*b1cdbd2cSJim Jagielski // Base
167*b1cdbd2cSJim Jagielski case U_NON_SPACING_MARK:
168*b1cdbd2cSJim Jagielski case U_ENCLOSING_MARK:
169*b1cdbd2cSJim Jagielski case U_COMBINING_SPACING_MARK:
170*b1cdbd2cSJim Jagielski return BASE_FORM|PRINTABLE;
171*b1cdbd2cSJim Jagielski
172*b1cdbd2cSJim Jagielski // Print
173*b1cdbd2cSJim Jagielski case U_SPACE_SEPARATOR:
174*b1cdbd2cSJim Jagielski
175*b1cdbd2cSJim Jagielski case U_DASH_PUNCTUATION:
176*b1cdbd2cSJim Jagielski case U_INITIAL_PUNCTUATION:
177*b1cdbd2cSJim Jagielski case U_FINAL_PUNCTUATION:
178*b1cdbd2cSJim Jagielski case U_CONNECTOR_PUNCTUATION:
179*b1cdbd2cSJim Jagielski case U_OTHER_PUNCTUATION:
180*b1cdbd2cSJim Jagielski
181*b1cdbd2cSJim Jagielski case U_MATH_SYMBOL:
182*b1cdbd2cSJim Jagielski case U_CURRENCY_SYMBOL:
183*b1cdbd2cSJim Jagielski case U_MODIFIER_SYMBOL:
184*b1cdbd2cSJim Jagielski case U_OTHER_SYMBOL:
185*b1cdbd2cSJim Jagielski return PRINTABLE;
186*b1cdbd2cSJim Jagielski
187*b1cdbd2cSJim Jagielski // Control
188*b1cdbd2cSJim Jagielski case U_CONTROL_CHAR:
189*b1cdbd2cSJim Jagielski case U_FORMAT_CHAR:
190*b1cdbd2cSJim Jagielski return CONTROL;
191*b1cdbd2cSJim Jagielski
192*b1cdbd2cSJim Jagielski case U_LINE_SEPARATOR:
193*b1cdbd2cSJim Jagielski case U_PARAGRAPH_SEPARATOR:
194*b1cdbd2cSJim Jagielski return CONTROL|PRINTABLE;
195*b1cdbd2cSJim Jagielski
196*b1cdbd2cSJim Jagielski // for all others
197*b1cdbd2cSJim Jagielski default:
198*b1cdbd2cSJim Jagielski return U_GENERAL_OTHER_TYPES;
199*b1cdbd2cSJim Jagielski }
200*b1cdbd2cSJim Jagielski }
201*b1cdbd2cSJim Jagielski
202*b1cdbd2cSJim Jagielski sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)203*b1cdbd2cSJim Jagielski cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
204*b1cdbd2cSJim Jagielski if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
205*b1cdbd2cSJim Jagielski return getCharType(Text, &nPos, 0);
206*b1cdbd2cSJim Jagielski
207*b1cdbd2cSJim Jagielski }
208*b1cdbd2cSJim Jagielski
209*b1cdbd2cSJim Jagielski sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)210*b1cdbd2cSJim Jagielski cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
211*b1cdbd2cSJim Jagielski if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212*b1cdbd2cSJim Jagielski
213*b1cdbd2cSJim Jagielski sal_Int32 result = getCharType(Text, &nPos, 0);
214*b1cdbd2cSJim Jagielski for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
215*b1cdbd2cSJim Jagielski result |= getCharType(Text, &nPos, 1);
216*b1cdbd2cSJim Jagielski return result;
217*b1cdbd2cSJim Jagielski }
218*b1cdbd2cSJim Jagielski
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)219*b1cdbd2cSJim Jagielski ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
220*b1cdbd2cSJim Jagielski const OUString& Text,
221*b1cdbd2cSJim Jagielski sal_Int32 nPos,
222*b1cdbd2cSJim Jagielski const Locale& rLocale,
223*b1cdbd2cSJim Jagielski sal_Int32 startCharTokenType,
224*b1cdbd2cSJim Jagielski const OUString& userDefinedCharactersStart,
225*b1cdbd2cSJim Jagielski sal_Int32 contCharTokenType,
226*b1cdbd2cSJim Jagielski const OUString& userDefinedCharactersCont )
227*b1cdbd2cSJim Jagielski throw(RuntimeException)
228*b1cdbd2cSJim Jagielski {
229*b1cdbd2cSJim Jagielski ParseResult r;
230*b1cdbd2cSJim Jagielski if ( Text.getLength() <= nPos )
231*b1cdbd2cSJim Jagielski return r;
232*b1cdbd2cSJim Jagielski
233*b1cdbd2cSJim Jagielski setupParserTable( rLocale,
234*b1cdbd2cSJim Jagielski startCharTokenType, userDefinedCharactersStart,
235*b1cdbd2cSJim Jagielski contCharTokenType, userDefinedCharactersCont );
236*b1cdbd2cSJim Jagielski parseText( r, Text, nPos );
237*b1cdbd2cSJim Jagielski
238*b1cdbd2cSJim Jagielski return r;
239*b1cdbd2cSJim Jagielski }
240*b1cdbd2cSJim Jagielski
241*b1cdbd2cSJim Jagielski
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)242*b1cdbd2cSJim Jagielski ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
243*b1cdbd2cSJim Jagielski sal_Int32 nTokenType,
244*b1cdbd2cSJim Jagielski const OUString& Text,
245*b1cdbd2cSJim Jagielski sal_Int32 nPos,
246*b1cdbd2cSJim Jagielski const Locale& rLocale,
247*b1cdbd2cSJim Jagielski sal_Int32 startCharTokenType,
248*b1cdbd2cSJim Jagielski const OUString& userDefinedCharactersStart,
249*b1cdbd2cSJim Jagielski sal_Int32 contCharTokenType,
250*b1cdbd2cSJim Jagielski const OUString& userDefinedCharactersCont )
251*b1cdbd2cSJim Jagielski throw(RuntimeException)
252*b1cdbd2cSJim Jagielski {
253*b1cdbd2cSJim Jagielski ParseResult r;
254*b1cdbd2cSJim Jagielski if ( Text.getLength() <= nPos )
255*b1cdbd2cSJim Jagielski return r;
256*b1cdbd2cSJim Jagielski
257*b1cdbd2cSJim Jagielski setupParserTable( rLocale,
258*b1cdbd2cSJim Jagielski startCharTokenType, userDefinedCharactersStart,
259*b1cdbd2cSJim Jagielski contCharTokenType, userDefinedCharactersCont );
260*b1cdbd2cSJim Jagielski parseText( r, Text, nPos, nTokenType );
261*b1cdbd2cSJim Jagielski
262*b1cdbd2cSJim Jagielski return r;
263*b1cdbd2cSJim Jagielski }
264*b1cdbd2cSJim Jagielski
getImplementationName()265*b1cdbd2cSJim Jagielski OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
266*b1cdbd2cSJim Jagielski {
267*b1cdbd2cSJim Jagielski return OUString::createFromAscii(cClass);
268*b1cdbd2cSJim Jagielski }
269*b1cdbd2cSJim Jagielski
270*b1cdbd2cSJim Jagielski
supportsService(const OUString & rServiceName)271*b1cdbd2cSJim Jagielski sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
272*b1cdbd2cSJim Jagielski {
273*b1cdbd2cSJim Jagielski return !rServiceName.compareToAscii(cClass);
274*b1cdbd2cSJim Jagielski }
275*b1cdbd2cSJim Jagielski
getSupportedServiceNames()276*b1cdbd2cSJim Jagielski Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
277*b1cdbd2cSJim Jagielski {
278*b1cdbd2cSJim Jagielski Sequence< OUString > aRet(1);
279*b1cdbd2cSJim Jagielski aRet[0] = OUString::createFromAscii(cClass);
280*b1cdbd2cSJim Jagielski return aRet;
281*b1cdbd2cSJim Jagielski }
282*b1cdbd2cSJim Jagielski
283*b1cdbd2cSJim Jagielski } } } }
284*b1cdbd2cSJim Jagielski
285