1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_i18npool.hxx" 30 31 #include <cclass_unicode.hxx> 32 #include <com/sun/star/i18n/UnicodeScript.hpp> 33 #include <com/sun/star/i18n/UnicodeType.hpp> 34 #include <com/sun/star/i18n/KCharacterType.hpp> 35 #include <unicode/uchar.h> 36 #include <i18nutil/x_rtl_ustring.h> 37 #include <breakiteratorImpl.hxx> 38 39 using namespace ::com::sun::star::uno; 40 using namespace ::com::sun::star::lang; 41 using namespace ::rtl; 42 43 namespace com { namespace sun { namespace star { namespace i18n { 44 // ---------------------------------------------------- 45 // class cclass_Unicode 46 // ----------------------------------------------------; 47 48 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ), 49 pTable( NULL ), 50 pStart( NULL ), 51 pCont( NULL ), 52 nStartTypes( 0 ), 53 nContTypes( 0 ), 54 eState( ssGetChar ), 55 cGroupSep( ',' ), 56 cDecimalSep( '.' ) 57 { 58 trans = new Transliteration_casemapping(); 59 cClass = "com.sun.star.i18n.CharacterClassification_Unicode"; 60 } 61 62 cclass_Unicode::~cclass_Unicode() { 63 destroyParserTable(); 64 delete trans; 65 } 66 67 68 OUString SAL_CALL 69 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 70 sal_Int32 len = Text.getLength(); 71 if (nPos >= len) 72 return OUString(); 73 if (nCount + nPos > len) 74 nCount = len - nPos; 75 76 trans->setMappingType(MappingTypeToUpper, rLocale); 77 return trans->transliterateString2String(Text, nPos, nCount); 78 } 79 80 OUString SAL_CALL 81 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 82 sal_Int32 len = Text.getLength(); 83 if (nPos >= len) 84 return OUString(); 85 if (nCount + nPos > len) 86 nCount = len - nPos; 87 88 trans->setMappingType(MappingTypeToLower, rLocale); 89 return trans->transliterateString2String(Text, nPos, nCount); 90 } 91 92 OUString SAL_CALL 93 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 94 sal_Int32 len = Text.getLength(); 95 if (nPos >= len) 96 return OUString(); 97 if (nCount + nPos > len) 98 nCount = len - nPos; 99 100 trans->setMappingType(MappingTypeToTitle, rLocale); 101 rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 ); 102 sal_Unicode* out = pStr->buffer; 103 BreakIteratorImpl brk(xMSF); 104 Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale, 105 WordType::ANYWORD_IGNOREWHITESPACES, sal_True); 106 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) { 107 if (i >= bdy.endPos) 108 bdy = brk.nextWord(Text, bdy.endPos, rLocale, 109 WordType::ANYWORD_IGNOREWHITESPACES); 110 *out = (i == bdy.startPos) ? 111 trans->transliterateChar2Char(Text[i]) : Text[i]; 112 } 113 *out = 0; 114 return OUString( pStr, SAL_NO_ACQUIRE ); 115 } 116 117 sal_Int16 SAL_CALL 118 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 119 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 120 return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0)); 121 } 122 123 sal_Int16 SAL_CALL 124 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 125 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 126 return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0)); 127 } 128 129 130 sal_Int16 SAL_CALL 131 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 132 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 133 // ICU Unicode script type UBlockCode starts from 1 for Basci Latin, 134 // while OO.o enum UnicideScript starts from 0. 135 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1. 136 return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1; 137 } 138 139 140 sal_Int32 SAL_CALL 141 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) { 142 using namespace ::com::sun::star::i18n::KCharacterType; 143 144 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment); 145 if (increment > 0) ch = Text.iterateCodePoints(nPos, 0); 146 switch ( u_charType(ch) ) { 147 // Upper 148 case U_UPPERCASE_LETTER : 149 return UPPER|LETTER|PRINTABLE|BASE_FORM; 150 151 // Lower 152 case U_LOWERCASE_LETTER : 153 return LOWER|LETTER|PRINTABLE|BASE_FORM; 154 155 // Title 156 case U_TITLECASE_LETTER : 157 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM; 158 159 // Letter 160 case U_MODIFIER_LETTER : 161 case U_OTHER_LETTER : 162 return LETTER|PRINTABLE|BASE_FORM; 163 164 // Digit 165 case U_DECIMAL_DIGIT_NUMBER: 166 case U_LETTER_NUMBER: 167 case U_OTHER_NUMBER: 168 return DIGIT|PRINTABLE|BASE_FORM; 169 170 // Base 171 case U_NON_SPACING_MARK: 172 case U_ENCLOSING_MARK: 173 case U_COMBINING_SPACING_MARK: 174 return BASE_FORM|PRINTABLE; 175 176 // Print 177 case U_SPACE_SEPARATOR: 178 179 case U_DASH_PUNCTUATION: 180 case U_INITIAL_PUNCTUATION: 181 case U_FINAL_PUNCTUATION: 182 case U_CONNECTOR_PUNCTUATION: 183 case U_OTHER_PUNCTUATION: 184 185 case U_MATH_SYMBOL: 186 case U_CURRENCY_SYMBOL: 187 case U_MODIFIER_SYMBOL: 188 case U_OTHER_SYMBOL: 189 return PRINTABLE; 190 191 // Control 192 case U_CONTROL_CHAR: 193 case U_FORMAT_CHAR: 194 return CONTROL; 195 196 case U_LINE_SEPARATOR: 197 case U_PARAGRAPH_SEPARATOR: 198 return CONTROL|PRINTABLE; 199 200 // for all others 201 default: 202 return U_GENERAL_OTHER_TYPES; 203 } 204 } 205 206 sal_Int32 SAL_CALL 207 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) { 208 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 209 return getCharType(Text, &nPos, 0); 210 211 } 212 213 sal_Int32 SAL_CALL 214 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) { 215 if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 216 217 sal_Int32 result = getCharType(Text, &nPos, 0); 218 for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++) 219 result |= getCharType(Text, &nPos, 1); 220 return result; 221 } 222 223 ParseResult SAL_CALL cclass_Unicode::parseAnyToken( 224 const OUString& Text, 225 sal_Int32 nPos, 226 const Locale& rLocale, 227 sal_Int32 startCharTokenType, 228 const OUString& userDefinedCharactersStart, 229 sal_Int32 contCharTokenType, 230 const OUString& userDefinedCharactersCont ) 231 throw(RuntimeException) 232 { 233 ParseResult r; 234 if ( Text.getLength() <= nPos ) 235 return r; 236 237 setupParserTable( rLocale, 238 startCharTokenType, userDefinedCharactersStart, 239 contCharTokenType, userDefinedCharactersCont ); 240 parseText( r, Text, nPos ); 241 242 return r; 243 } 244 245 246 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken( 247 sal_Int32 nTokenType, 248 const OUString& Text, 249 sal_Int32 nPos, 250 const Locale& rLocale, 251 sal_Int32 startCharTokenType, 252 const OUString& userDefinedCharactersStart, 253 sal_Int32 contCharTokenType, 254 const OUString& userDefinedCharactersCont ) 255 throw(RuntimeException) 256 { 257 ParseResult r; 258 if ( Text.getLength() <= nPos ) 259 return r; 260 261 setupParserTable( rLocale, 262 startCharTokenType, userDefinedCharactersStart, 263 contCharTokenType, userDefinedCharactersCont ); 264 parseText( r, Text, nPos, nTokenType ); 265 266 return r; 267 } 268 269 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException ) 270 { 271 return OUString::createFromAscii(cClass); 272 } 273 274 275 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException ) 276 { 277 return !rServiceName.compareToAscii(cClass); 278 } 279 280 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException ) 281 { 282 Sequence< OUString > aRet(1); 283 aRet[0] = OUString::createFromAscii(cClass); 284 return aRet; 285 } 286 287 } } } } 288 289