1/************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ 29#define __com_sun_star_i18n_XCharacterClassification_idl__ 30 31#include <com/sun/star/i18n/ParseResult.idl> 32 33#ifndef __com_sun_star_lang_Locale_idl__ 34#include <com/sun/star/lang/Locale.idl> 35#endif 36#ifndef __com_sun_star_uno_XInterface_idl__ 37#include <com/sun/star/uno/XInterface.idl> 38#endif 39 40//============================================================================ 41 42module com { module sun { module star { module i18n { 43 44//============================================================================ 45 46/* 47 48Possible tokens to be parsed with parse...Token(): 49 50UPASCALPHA=[A-Z] 51LOASCALPHA=[a-z] 52ASCALPHA=1*(UPASCALPHA|LOASCALPHA) 53ASCDIGIT=[0-9] 54ASC_UNDERSCORE='_' 55ASC_SPACE=' ' 56ASC_HT='\0x9' 57ASC_VT='\0xb' 58ASC_WS=ASC_SPACE|ASC_HT|ASC_VT 59ASC_DBL_QUOTE=\"; 60ASC_QUOTE=\' 61UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) 62 63ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit 64ALNUM=ALPHA|DIGIT 65CHAR=anycharacter 66WS=isWhiteSpace() 67SIGN='+'|'-' 68DECSEP=<locale dependent decimal separator> 69GRPSEP=<locale dependent thousand separator> 70EXPONENT=(E|e)[SIGN]1*ASC_DIGIT 71 72IDENTIFIER=ALPHA *ALNUM 73UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) 74ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) 75ANY_NAME=1*(ALNUM|DEFCHARS) 76SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE 77DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE 78ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] 79NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] 80 81*/ 82 83//============================================================================ 84 85/** 86 Character classification (upper, lower, digit, letter, number, ...) 87 and generic Unicode enabled parser. 88 */ 89 90published interface XCharacterClassification : com::sun::star::uno::XInterface 91{ 92 //------------------------------------------------------------------------ 93 /** Convert lower case alpha to upper case alpha, starting at 94 position <em>nPos</em> for <em>nCount</em> code points. 95 */ 96 string toUpper( [in] string aText, [in] long nPos, [in] long nCount, 97 [in] com::sun::star::lang::Locale aLocale ); 98 99 //------------------------------------------------------------------------ 100 /** Convert upper case alpha to lower case alpha, starting at 101 position <em>nPos</em> for <em>nCount</em> code points. 102 */ 103 string toLower( [in] string aText, [in] long nPos, [in] long nCount, 104 [in] com::sun::star::lang::Locale aLocale ); 105 106 //------------------------------------------------------------------------ 107 /** Convert to title case, starting at 108 position <em>nPos</em> for <em>nCount</em> code points. 109 */ 110 string toTitle( [in] string aText, [in] long nPos, [in] long nCount, 111 [in] com::sun::star::lang::Locale aLocale ); 112 113 //------------------------------------------------------------------------ 114 /// Get <type>UnicodeType</type> of character at position <em>nPos</em>. 115 short getType( [in] string aText, [in] long nPos ); 116 117 //------------------------------------------------------------------------ 118 /** Get <type>DirectionProperty</type> of character at position 119 <em>nPos</em>. 120 */ 121 short getCharacterDirection( [in] string aText, [in] long nPos ); 122 123 //------------------------------------------------------------------------ 124 /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>. 125 short getScript( [in] string aText, [in] long nPos ); 126 127 //------------------------------------------------------------------------ 128 /// Get <type>KCharacterType</type> of character at position <em>nPos</em>. 129 long getCharacterType( [in] string aText, [in] long nPos, 130 [in] com::sun::star::lang::Locale aLocale ); 131 132 //------------------------------------------------------------------------ 133 /** Get accumulated <type>KCharacterType</type>s of string starting 134 at position <em>nPos</em> of length <em>nCount</em> code points. 135 136 @returns 137 A number with appropriate flags set to indicate what type of 138 characters the string contains, each flag value being one of 139 KCharacterType values. 140 */ 141 long getStringType( [in] string aText, [in] long nPos, [in] long nCount, 142 [in] com::sun::star::lang::Locale aLocale ); 143 144 145 //------------------------------------------------------------------------ 146 /** 147 Parse a string for a token starting at position <em>nPos</em>. 148 149 <p> A name or identifier must match the 150 <type>KParseTokens</type> criteria passed in 151 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may 152 additionally contain characters of 153 <em>aUserDefinedCharactersStart</em> and/or 154 <em>aUserDefinedCharactersCont</em>. </p> 155 156 157 @returns 158 A filled <type>ParseResult</type> structure. If no 159 unambigous token could be parsed, 160 <member>ParseResult::TokenType</member> will be set to 161 <b>0</b> (zero), other fields will contain the values parsed 162 so far. 163 164 <p> If a token may represent either a numeric value or a 165 name according to the passed Start/Cont-Flags/Chars, both 166 <const>KParseType::ASC_NUM</const> (or 167 <const>KParseType::UNI_NUM</const>) and 168 <const>KParseType::IDENTNAME</const> are set in 169 <member>ParseResult::TokenType</member>. 170 171 @param aText 172 Text to be parsed. 173 174 @param nPos 175 Position where parsing starts. 176 177 @param aLocale 178 The locale, for example, for decimal and group separator or 179 character type determination. 180 181 @param nStartCharFlags 182 A set of <type>KParseTokens</type> constants determining the 183 allowed characters a name or identifier may start with. 184 185 @param aUserDefinedCharactersStart 186 A set of additionally allowed characters a name or 187 identifier may start with. 188 189 @param nContCharFlags 190 A set of <type>KParseTokens</type> constants determining the 191 allowed characters a name or identifier may continue with. 192 193 @param aUserDefinedCharactersCont 194 A set of additionally allowed characters a name or 195 identifier may continue with. 196 197 @example:C++ 198 <listing> 199 using namespace ::com::sun::star::i18n; 200 // First character of an identifier may be any alphabetic or underscore. 201 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; 202 // Continuing characters may be any alphanumeric or underscore or dot. 203 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; 204 // No further characters assumed to be contained in an identifier 205 String aEmptyString; 206 // Parse any token. 207 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, 208 nStartFlags, aEmptyString, nContFlags, aEmptyString ); 209 // Get parsed token. 210 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) 211 fValue = rRes.Value; 212 if ( rRes.TokenType & KParseType::IDENTNAME ) 213 aName = aText.Copy( nPos, rRes.EndPos - nPos ); 214 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) 215 aName = rRes.DequotedNameOrString; 216 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) 217 aString = rRes.DequotedNameOrString; 218 else if ( rRes.TokenType & KParseType::BOOLEAN ) 219 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 220 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) 221 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 222 </listing> 223 */ 224 225 ParseResult parseAnyToken( 226 [in] string aText, 227 [in] long nPos, 228 [in] com::sun::star::lang::Locale aLocale, 229 [in] long nStartCharFlags, 230 [in] string aUserDefinedCharactersStart, 231 [in] long nContCharFlags, 232 [in] string aUserDefinedCharactersCont 233 ); 234 235 //------------------------------------------------------------------------ 236 /** 237 Parse a string for a token of type <em>nTokenType</em> starting 238 at position <em>nPos</em>. 239 240 <p> Other parameters are the same as in 241 <member>parseAnyToken</member>. If the actual token does not 242 match the passed <em>nTokenType</em> a 243 <member>ParseResult::TokenType</member> set to <b>0</b> (zero) 244 is returned. </p> 245 246 @param nTokenType 247 One or more of the <type>KParseType</type> constants. 248 249 @example:C++ 250 <listing> 251 // Determine if a given name is a valid name (not quoted) and contains 252 // only allowed characters. 253 using namespace ::com::sun::star::i18n; 254 // First character of an identifier may be any alphanumeric or underscore. 255 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; 256 // No further characters assumed to be contained in an identifier start. 257 String aEmptyString; 258 // Continuing characters may be any alphanumeric or underscore. 259 sal_Int32 nContFlags = nStartFlags; 260 // Additionally, continuing characters may contain a blank. 261 String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); 262 // Parse predefined (must be an IDENTNAME) token. 263 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, 264 nStartFlags, aEmptyString, nContFlags, aContChars ); 265 // Test if it is an identifier name and if it only is one 266 // and no more else is following it. 267 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); 268 </listing> 269 */ 270 271 ParseResult parsePredefinedToken( 272 [in] long nTokenType, 273 [in] string aText, 274 [in] long nPos, 275 [in] com::sun::star::lang::Locale aLocale, 276 [in] long nStartCharFlags, 277 [in] string aUserDefinedCharactersStart, 278 [in] long nContCharFlags, 279 [in] string aUserDefinedCharactersCont 280 ); 281}; 282 283//============================================================================= 284}; }; }; }; 285 286#endif 287