1*449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*449ab281SAndrew Rist * distributed with this work for additional information 6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10*449ab281SAndrew Rist * 11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*449ab281SAndrew Rist * 13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*449ab281SAndrew Rist * software distributed under the License is distributed on an 15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17*449ab281SAndrew Rist * specific language governing permissions and limitations 18*449ab281SAndrew Rist * under the License. 19*449ab281SAndrew Rist * 20*449ab281SAndrew Rist *************************************************************/ 21*449ab281SAndrew Rist 22*449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <cclass_unicode.hxx> 28cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeScript.hpp> 29cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 30cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 31cdf0e10cSrcweir #include <unicode/uchar.h> 32cdf0e10cSrcweir #include <i18nutil/x_rtl_ustring.h> 33cdf0e10cSrcweir #include <breakiteratorImpl.hxx> 34cdf0e10cSrcweir 35cdf0e10cSrcweir using namespace ::com::sun::star::uno; 36cdf0e10cSrcweir using namespace ::com::sun::star::lang; 37cdf0e10cSrcweir using namespace ::rtl; 38cdf0e10cSrcweir 39cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 40cdf0e10cSrcweir // ---------------------------------------------------- 41cdf0e10cSrcweir // class cclass_Unicode 42cdf0e10cSrcweir // ----------------------------------------------------; 43cdf0e10cSrcweir 44cdf0e10cSrcweir cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ), 45cdf0e10cSrcweir pTable( NULL ), 46cdf0e10cSrcweir pStart( NULL ), 47cdf0e10cSrcweir pCont( NULL ), 48cdf0e10cSrcweir nStartTypes( 0 ), 49cdf0e10cSrcweir nContTypes( 0 ), 50cdf0e10cSrcweir eState( ssGetChar ), 51cdf0e10cSrcweir cGroupSep( ',' ), 52cdf0e10cSrcweir cDecimalSep( '.' ) 53cdf0e10cSrcweir { 54cdf0e10cSrcweir trans = new Transliteration_casemapping(); 55cdf0e10cSrcweir cClass = "com.sun.star.i18n.CharacterClassification_Unicode"; 56cdf0e10cSrcweir } 57cdf0e10cSrcweir 58cdf0e10cSrcweir cclass_Unicode::~cclass_Unicode() { 59cdf0e10cSrcweir destroyParserTable(); 60cdf0e10cSrcweir delete trans; 61cdf0e10cSrcweir } 62cdf0e10cSrcweir 63cdf0e10cSrcweir 64cdf0e10cSrcweir OUString SAL_CALL 65cdf0e10cSrcweir cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 66cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 67cdf0e10cSrcweir if (nPos >= len) 68cdf0e10cSrcweir return OUString(); 69cdf0e10cSrcweir if (nCount + nPos > len) 70cdf0e10cSrcweir nCount = len - nPos; 71cdf0e10cSrcweir 72cdf0e10cSrcweir trans->setMappingType(MappingTypeToUpper, rLocale); 73cdf0e10cSrcweir return trans->transliterateString2String(Text, nPos, nCount); 74cdf0e10cSrcweir } 75cdf0e10cSrcweir 76cdf0e10cSrcweir OUString SAL_CALL 77cdf0e10cSrcweir cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 78cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 79cdf0e10cSrcweir if (nPos >= len) 80cdf0e10cSrcweir return OUString(); 81cdf0e10cSrcweir if (nCount + nPos > len) 82cdf0e10cSrcweir nCount = len - nPos; 83cdf0e10cSrcweir 84cdf0e10cSrcweir trans->setMappingType(MappingTypeToLower, rLocale); 85cdf0e10cSrcweir return trans->transliterateString2String(Text, nPos, nCount); 86cdf0e10cSrcweir } 87cdf0e10cSrcweir 88cdf0e10cSrcweir OUString SAL_CALL 89cdf0e10cSrcweir cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) { 90cdf0e10cSrcweir sal_Int32 len = Text.getLength(); 91cdf0e10cSrcweir if (nPos >= len) 92cdf0e10cSrcweir return OUString(); 93cdf0e10cSrcweir if (nCount + nPos > len) 94cdf0e10cSrcweir nCount = len - nPos; 95cdf0e10cSrcweir 96cdf0e10cSrcweir trans->setMappingType(MappingTypeToTitle, rLocale); 97cdf0e10cSrcweir rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 ); 98cdf0e10cSrcweir sal_Unicode* out = pStr->buffer; 99cdf0e10cSrcweir BreakIteratorImpl brk(xMSF); 100cdf0e10cSrcweir Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale, 101cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True); 102cdf0e10cSrcweir for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) { 103cdf0e10cSrcweir if (i >= bdy.endPos) 104cdf0e10cSrcweir bdy = brk.nextWord(Text, bdy.endPos, rLocale, 105cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 106cdf0e10cSrcweir *out = (i == bdy.startPos) ? 107cdf0e10cSrcweir trans->transliterateChar2Char(Text[i]) : Text[i]; 108cdf0e10cSrcweir } 109cdf0e10cSrcweir *out = 0; 110cdf0e10cSrcweir return OUString( pStr, SAL_NO_ACQUIRE ); 111cdf0e10cSrcweir } 112cdf0e10cSrcweir 113cdf0e10cSrcweir sal_Int16 SAL_CALL 114cdf0e10cSrcweir cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 115cdf0e10cSrcweir if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 116cdf0e10cSrcweir return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0)); 117cdf0e10cSrcweir } 118cdf0e10cSrcweir 119cdf0e10cSrcweir sal_Int16 SAL_CALL 120cdf0e10cSrcweir cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 121cdf0e10cSrcweir if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 122cdf0e10cSrcweir return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0)); 123cdf0e10cSrcweir } 124cdf0e10cSrcweir 125cdf0e10cSrcweir 126cdf0e10cSrcweir sal_Int16 SAL_CALL 127cdf0e10cSrcweir cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) { 128cdf0e10cSrcweir if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 129cdf0e10cSrcweir // ICU Unicode script type UBlockCode starts from 1 for Basci Latin, 130cdf0e10cSrcweir // while OO.o enum UnicideScript starts from 0. 131cdf0e10cSrcweir // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1. 132cdf0e10cSrcweir return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1; 133cdf0e10cSrcweir } 134cdf0e10cSrcweir 135cdf0e10cSrcweir 136cdf0e10cSrcweir sal_Int32 SAL_CALL 137cdf0e10cSrcweir cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) { 138cdf0e10cSrcweir using namespace ::com::sun::star::i18n::KCharacterType; 139cdf0e10cSrcweir 140cdf0e10cSrcweir sal_uInt32 ch = Text.iterateCodePoints(nPos, increment); 141cdf0e10cSrcweir if (increment > 0) ch = Text.iterateCodePoints(nPos, 0); 142cdf0e10cSrcweir switch ( u_charType(ch) ) { 143cdf0e10cSrcweir // Upper 144cdf0e10cSrcweir case U_UPPERCASE_LETTER : 145cdf0e10cSrcweir return UPPER|LETTER|PRINTABLE|BASE_FORM; 146cdf0e10cSrcweir 147cdf0e10cSrcweir // Lower 148cdf0e10cSrcweir case U_LOWERCASE_LETTER : 149cdf0e10cSrcweir return LOWER|LETTER|PRINTABLE|BASE_FORM; 150cdf0e10cSrcweir 151cdf0e10cSrcweir // Title 152cdf0e10cSrcweir case U_TITLECASE_LETTER : 153cdf0e10cSrcweir return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM; 154cdf0e10cSrcweir 155cdf0e10cSrcweir // Letter 156cdf0e10cSrcweir case U_MODIFIER_LETTER : 157cdf0e10cSrcweir case U_OTHER_LETTER : 158cdf0e10cSrcweir return LETTER|PRINTABLE|BASE_FORM; 159cdf0e10cSrcweir 160cdf0e10cSrcweir // Digit 161cdf0e10cSrcweir case U_DECIMAL_DIGIT_NUMBER: 162cdf0e10cSrcweir case U_LETTER_NUMBER: 163cdf0e10cSrcweir case U_OTHER_NUMBER: 164cdf0e10cSrcweir return DIGIT|PRINTABLE|BASE_FORM; 165cdf0e10cSrcweir 166cdf0e10cSrcweir // Base 167cdf0e10cSrcweir case U_NON_SPACING_MARK: 168cdf0e10cSrcweir case U_ENCLOSING_MARK: 169cdf0e10cSrcweir case U_COMBINING_SPACING_MARK: 170cdf0e10cSrcweir return BASE_FORM|PRINTABLE; 171cdf0e10cSrcweir 172cdf0e10cSrcweir // Print 173cdf0e10cSrcweir case U_SPACE_SEPARATOR: 174cdf0e10cSrcweir 175cdf0e10cSrcweir case U_DASH_PUNCTUATION: 176cdf0e10cSrcweir case U_INITIAL_PUNCTUATION: 177cdf0e10cSrcweir case U_FINAL_PUNCTUATION: 178cdf0e10cSrcweir case U_CONNECTOR_PUNCTUATION: 179cdf0e10cSrcweir case U_OTHER_PUNCTUATION: 180cdf0e10cSrcweir 181cdf0e10cSrcweir case U_MATH_SYMBOL: 182cdf0e10cSrcweir case U_CURRENCY_SYMBOL: 183cdf0e10cSrcweir case U_MODIFIER_SYMBOL: 184cdf0e10cSrcweir case U_OTHER_SYMBOL: 185cdf0e10cSrcweir return PRINTABLE; 186cdf0e10cSrcweir 187cdf0e10cSrcweir // Control 188cdf0e10cSrcweir case U_CONTROL_CHAR: 189cdf0e10cSrcweir case U_FORMAT_CHAR: 190cdf0e10cSrcweir return CONTROL; 191cdf0e10cSrcweir 192cdf0e10cSrcweir case U_LINE_SEPARATOR: 193cdf0e10cSrcweir case U_PARAGRAPH_SEPARATOR: 194cdf0e10cSrcweir return CONTROL|PRINTABLE; 195cdf0e10cSrcweir 196cdf0e10cSrcweir // for all others 197cdf0e10cSrcweir default: 198cdf0e10cSrcweir return U_GENERAL_OTHER_TYPES; 199cdf0e10cSrcweir } 200cdf0e10cSrcweir } 201cdf0e10cSrcweir 202cdf0e10cSrcweir sal_Int32 SAL_CALL 203cdf0e10cSrcweir cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) { 204cdf0e10cSrcweir if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 205cdf0e10cSrcweir return getCharType(Text, &nPos, 0); 206cdf0e10cSrcweir 207cdf0e10cSrcweir } 208cdf0e10cSrcweir 209cdf0e10cSrcweir sal_Int32 SAL_CALL 210cdf0e10cSrcweir cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) { 211cdf0e10cSrcweir if ( nPos < 0 || Text.getLength() <= nPos ) return 0; 212cdf0e10cSrcweir 213cdf0e10cSrcweir sal_Int32 result = getCharType(Text, &nPos, 0); 214cdf0e10cSrcweir for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++) 215cdf0e10cSrcweir result |= getCharType(Text, &nPos, 1); 216cdf0e10cSrcweir return result; 217cdf0e10cSrcweir } 218cdf0e10cSrcweir 219cdf0e10cSrcweir ParseResult SAL_CALL cclass_Unicode::parseAnyToken( 220cdf0e10cSrcweir const OUString& Text, 221cdf0e10cSrcweir sal_Int32 nPos, 222cdf0e10cSrcweir const Locale& rLocale, 223cdf0e10cSrcweir sal_Int32 startCharTokenType, 224cdf0e10cSrcweir const OUString& userDefinedCharactersStart, 225cdf0e10cSrcweir sal_Int32 contCharTokenType, 226cdf0e10cSrcweir const OUString& userDefinedCharactersCont ) 227cdf0e10cSrcweir throw(RuntimeException) 228cdf0e10cSrcweir { 229cdf0e10cSrcweir ParseResult r; 230cdf0e10cSrcweir if ( Text.getLength() <= nPos ) 231cdf0e10cSrcweir return r; 232cdf0e10cSrcweir 233cdf0e10cSrcweir setupParserTable( rLocale, 234cdf0e10cSrcweir startCharTokenType, userDefinedCharactersStart, 235cdf0e10cSrcweir contCharTokenType, userDefinedCharactersCont ); 236cdf0e10cSrcweir parseText( r, Text, nPos ); 237cdf0e10cSrcweir 238cdf0e10cSrcweir return r; 239cdf0e10cSrcweir } 240cdf0e10cSrcweir 241cdf0e10cSrcweir 242cdf0e10cSrcweir ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken( 243cdf0e10cSrcweir sal_Int32 nTokenType, 244cdf0e10cSrcweir const OUString& Text, 245cdf0e10cSrcweir sal_Int32 nPos, 246cdf0e10cSrcweir const Locale& rLocale, 247cdf0e10cSrcweir sal_Int32 startCharTokenType, 248cdf0e10cSrcweir const OUString& userDefinedCharactersStart, 249cdf0e10cSrcweir sal_Int32 contCharTokenType, 250cdf0e10cSrcweir const OUString& userDefinedCharactersCont ) 251cdf0e10cSrcweir throw(RuntimeException) 252cdf0e10cSrcweir { 253cdf0e10cSrcweir ParseResult r; 254cdf0e10cSrcweir if ( Text.getLength() <= nPos ) 255cdf0e10cSrcweir return r; 256cdf0e10cSrcweir 257cdf0e10cSrcweir setupParserTable( rLocale, 258cdf0e10cSrcweir startCharTokenType, userDefinedCharactersStart, 259cdf0e10cSrcweir contCharTokenType, userDefinedCharactersCont ); 260cdf0e10cSrcweir parseText( r, Text, nPos, nTokenType ); 261cdf0e10cSrcweir 262cdf0e10cSrcweir return r; 263cdf0e10cSrcweir } 264cdf0e10cSrcweir 265cdf0e10cSrcweir OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException ) 266cdf0e10cSrcweir { 267cdf0e10cSrcweir return OUString::createFromAscii(cClass); 268cdf0e10cSrcweir } 269cdf0e10cSrcweir 270cdf0e10cSrcweir 271cdf0e10cSrcweir sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException ) 272cdf0e10cSrcweir { 273cdf0e10cSrcweir return !rServiceName.compareToAscii(cClass); 274cdf0e10cSrcweir } 275cdf0e10cSrcweir 276cdf0e10cSrcweir Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException ) 277cdf0e10cSrcweir { 278cdf0e10cSrcweir Sequence< OUString > aRet(1); 279cdf0e10cSrcweir aRet[0] = OUString::createFromAscii(cClass); 280cdf0e10cSrcweir return aRet; 281cdf0e10cSrcweir } 282cdf0e10cSrcweir 283cdf0e10cSrcweir } } } } 284cdf0e10cSrcweir 285