1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include <cclass_unicode.hxx> 28 #include <com/sun/star/i18n/UnicodeScript.hpp> 29 #include <com/sun/star/i18n/UnicodeType.hpp> 30 #include <com/sun/star/i18n/KCharacterType.hpp> 31 #include <unicode/uchar.h> 32 #include <i18nutil/x_rtl_ustring.h> 33 #include <breakiteratorImpl.hxx> 34 35 using namespace ::com::sun::star::uno; 36 using namespace ::com::sun::star::lang; 37 using namespace ::rtl; 38 39 namespace com { namespace sun { namespace star { namespace i18n { 40 // ---------------------------------------------------- 41 // class cclass_Unicode 42 // ----------------------------------------------------; 43 44 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ), 45 pTable( NULL ), 46 pStart( NULL ), 47 pCont( NULL ), 48 nStartTypes( 0 ), 49 nContTypes( 0 ), 50 eState( ssGetChar ), 51 cGroupSep( ',' ), 52 cDecimalSep( '.' ) 53 { 54 trans = new Transliteration_casemapping(); 55 cClass = "com.sun.star.i18n.CharacterClassification_Unicode"; 56 } 57 58 cclass_Unicode::~cclass_Unicode() { 59 destroyParserTable(); 60 delete trans; 61 } 62 63 64 OUString SAL_CALL 65 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 66 sal_Int32 len = Text.getLength(); 67 if (nPos >= len) 68 return OUString(); 69 if (nCount + nPos > len) 70 nCount = len - nPos; 71 72 trans->setMappingType(MappingTypeToUpper, rLocale); 73 return trans->transliterateString2String(Text, nPos, nCount); 74 } 75 76 OUString SAL_CALL 77 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 78 sal_Int32 len = Text.getLength(); 79 if (nPos >= len) 80 return OUString(); 81 if (nCount + nPos > len) 82 nCount = len - nPos; 83 84 trans->setMappingType(MappingTypeToLower, rLocale); 85 return trans->transliterateString2String(Text, nPos, nCount); 86 } 87 88 OUString SAL_CALL 89 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 90 sal_Int32 len = Text.getLength(); 91 if (nPos >= len) 92 return OUString(); 93 if (nCount + nPos > len) 94 nCount = len - nPos; 95 96 trans->setMappingType(MappingTypeToTitle, rLocale); 97 rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 ); 98 sal_Unicode* out = pStr->buffer; 99 BreakIteratorImpl brk(xMSF); 100 Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale, 101 WordType::ANYWORD_IGNOREWHITESPACES, sal_True); 102 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) { 103 if (i >= bdy.endPos) 104 bdy = brk.nextWord(Text, bdy.endPos, rLocale, 105 WordType::ANYWORD_IGNOREWHITESPACES); 106 *out = (i == bdy.startPos) ? 107 trans->transliterateChar2Char(Text[i]) : Text[i]; 108 } 109 *out = 0; 110 return OUString( pStr, SAL_NO_ACQUIRE ); 111 } 112 113 sal_Int16 SAL_CALL 114 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 115 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 116 return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0)); 117 } 118 119 sal_Int16 SAL_CALL 120 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 121 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 122 return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0)); 123 } 124 125 126 sal_Int16 SAL_CALL 127 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 128 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 129 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin, 130 // while OO.o enum UnicideScript starts from 0. 131 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1. 132 return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1; 133 } 134 135 136 sal_Int32 SAL_CALL 137 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) { 138 using namespace ::com::sun::star::i18n::KCharacterType; 139 140 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment); 141 if (increment > 0) ch = Text.iterateCodePoints(nPos, 0); 142 switch ( u_charType(ch) ) { 143 // Upper 144 case U_UPPERCASE_LETTER : 145 return UPPER|LETTER|PRINTABLE|BASE_FORM; 146 147 // Lower 148 case U_LOWERCASE_LETTER : 149 return LOWER|LETTER|PRINTABLE|BASE_FORM; 150 151 // Title 152 case U_TITLECASE_LETTER : 153 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM; 154 155 // Letter 156 case U_MODIFIER_LETTER : 157 case U_OTHER_LETTER : 158 return LETTER|PRINTABLE|BASE_FORM; 159 160 // Digit 161 case U_DECIMAL_DIGIT_NUMBER: 162 case U_LETTER_NUMBER: 163 case U_OTHER_NUMBER: 164 return DIGIT|PRINTABLE|BASE_FORM; 165 166 // Base 167 case U_NON_SPACING_MARK: 168 case U_ENCLOSING_MARK: 169 case U_COMBINING_SPACING_MARK: 170 return BASE_FORM|PRINTABLE; 171 172 // Print 173 case U_SPACE_SEPARATOR: 174 175 case U_DASH_PUNCTUATION: 176 case U_INITIAL_PUNCTUATION: 177 case U_FINAL_PUNCTUATION: 178 case U_CONNECTOR_PUNCTUATION: 179 case U_OTHER_PUNCTUATION: 180 181 case U_MATH_SYMBOL: 182 case U_CURRENCY_SYMBOL: 183 case U_MODIFIER_SYMBOL: 184 case U_OTHER_SYMBOL: 185 return PRINTABLE; 186 187 // Control 188 case U_CONTROL_CHAR: 189 case U_FORMAT_CHAR: 190 return CONTROL; 191 192 case U_LINE_SEPARATOR: 193 case U_PARAGRAPH_SEPARATOR: 194 return CONTROL|PRINTABLE; 195 196 // for all others 197 default: 198 return U_GENERAL_OTHER_TYPES; 199 } 200 } 201 202 sal_Int32 SAL_CALL 203 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) { 204 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 205 return getCharType(Text, &nPos, 0); 206 207 } 208 209 sal_Int32 SAL_CALL 210 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) { 211 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 212 213 sal_Int32 result = getCharType(Text, &nPos, 0); 214 for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++) 215 result |= getCharType(Text, &nPos, 1); 216 return result; 217 } 218 219 ParseResult SAL_CALL cclass_Unicode::parseAnyToken( 220 const OUString& Text, 221 sal_Int32 nPos, 222 const Locale& rLocale, 223 sal_Int32 startCharTokenType, 224 const OUString& userDefinedCharactersStart, 225 sal_Int32 contCharTokenType, 226 const OUString& userDefinedCharactersCont ) 227 throw(RuntimeException) 228 { 229 ParseResult r; 230 if ( Text.getLength() <= nPos ) 231 return r; 232 233 setupParserTable( rLocale, 234 startCharTokenType, userDefinedCharactersStart, 235 contCharTokenType, userDefinedCharactersCont ); 236 parseText( r, Text, nPos ); 237 238 return r; 239 } 240 241 242 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken( 243 sal_Int32 nTokenType, 244 const OUString& Text, 245 sal_Int32 nPos, 246 const Locale& rLocale, 247 sal_Int32 startCharTokenType, 248 const OUString& userDefinedCharactersStart, 249 sal_Int32 contCharTokenType, 250 const OUString& userDefinedCharactersCont ) 251 throw(RuntimeException) 252 { 253 ParseResult r; 254 if ( Text.getLength() <= nPos ) 255 return r; 256 257 setupParserTable( rLocale, 258 startCharTokenType, userDefinedCharactersStart, 259 contCharTokenType, userDefinedCharactersCont ); 260 parseText( r, Text, nPos, nTokenType ); 261 262 return r; 263 } 264 265 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException ) 266 { 267 return OUString::createFromAscii(cClass); 268 } 269 270 271 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException ) 272 { 273 return !rServiceName.compareToAscii(cClass); 274 } 275 276 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException ) 277 { 278 Sequence< OUString > aRet(1); 279 aRet[0] = OUString::createFromAscii(cClass); 280 return aRet; 281 } 282 283 } } } } 284 285