1/************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ 25#define __com_sun_star_i18n_XCharacterClassification_idl__ 26 27#include <com/sun/star/i18n/ParseResult.idl> 28 29#ifndef __com_sun_star_lang_Locale_idl__ 30#include <com/sun/star/lang/Locale.idl> 31#endif 32#ifndef __com_sun_star_uno_XInterface_idl__ 33#include <com/sun/star/uno/XInterface.idl> 34#endif 35 36//============================================================================ 37 38module com { module sun { module star { module i18n { 39 40//============================================================================ 41 42/* 43 44Possible tokens to be parsed with parse...Token(): 45 46UPASCALPHA=[A-Z] 47LOASCALPHA=[a-z] 48ASCALPHA=1*(UPASCALPHA|LOASCALPHA) 49ASCDIGIT=[0-9] 50ASC_UNDERSCORE='_' 51ASC_SPACE=' ' 52ASC_HT='\0x9' 53ASC_VT='\0xb' 54ASC_WS=ASC_SPACE|ASC_HT|ASC_VT 55ASC_DBL_QUOTE=\"; 56ASC_QUOTE=\' 57UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) 58 59ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit 60ALNUM=ALPHA|DIGIT 61CHAR=anycharacter 62WS=isWhiteSpace() 63SIGN='+'|'-' 64DECSEP=<locale dependent decimal separator> 65GRPSEP=<locale dependent thousand separator> 66EXPONENT=(E|e)[SIGN]1*ASC_DIGIT 67 68IDENTIFIER=ALPHA *ALNUM 69UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) 70ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) 71ANY_NAME=1*(ALNUM|DEFCHARS) 72SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE 73DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE 74ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] 75NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] 76 77*/ 78 79//============================================================================ 80 81/** 82 Character classification (upper, lower, digit, letter, number, ...) 83 and generic Unicode enabled parser. 84 */ 85 86published interface XCharacterClassification : com::sun::star::uno::XInterface 87{ 88 //------------------------------------------------------------------------ 89 /** Convert lower case alpha to upper case alpha, starting at 90 position <em>nPos</em> for <em>nCount</em> code points. 91 */ 92 string toUpper( [in] string aText, [in] long nPos, [in] long nCount, 93 [in] com::sun::star::lang::Locale aLocale ); 94 95 //------------------------------------------------------------------------ 96 /** Convert upper case alpha to lower case alpha, starting at 97 position <em>nPos</em> for <em>nCount</em> code points. 98 */ 99 string toLower( [in] string aText, [in] long nPos, [in] long nCount, 100 [in] com::sun::star::lang::Locale aLocale ); 101 102 //------------------------------------------------------------------------ 103 /** Convert to title case, starting at 104 position <em>nPos</em> for <em>nCount</em> code points. 105 */ 106 string toTitle( [in] string aText, [in] long nPos, [in] long nCount, 107 [in] com::sun::star::lang::Locale aLocale ); 108 109 //------------------------------------------------------------------------ 110 /// Get <type>UnicodeType</type> of character at position <em>nPos</em>. 111 short getType( [in] string aText, [in] long nPos ); 112 113 //------------------------------------------------------------------------ 114 /** Get <type>DirectionProperty</type> of character at position 115 <em>nPos</em>. 116 */ 117 short getCharacterDirection( [in] string aText, [in] long nPos ); 118 119 //------------------------------------------------------------------------ 120 /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>. 121 short getScript( [in] string aText, [in] long nPos ); 122 123 //------------------------------------------------------------------------ 124 /// Get <type>KCharacterType</type> of character at position <em>nPos</em>. 125 long getCharacterType( [in] string aText, [in] long nPos, 126 [in] com::sun::star::lang::Locale aLocale ); 127 128 //------------------------------------------------------------------------ 129 /** Get accumulated <type>KCharacterType</type>s of string starting 130 at position <em>nPos</em> of length <em>nCount</em> code points. 131 132 @returns 133 A number with appropriate flags set to indicate what type of 134 characters the string contains, each flag value being one of 135 KCharacterType values. 136 */ 137 long getStringType( [in] string aText, [in] long nPos, [in] long nCount, 138 [in] com::sun::star::lang::Locale aLocale ); 139 140 141 //------------------------------------------------------------------------ 142 /** 143 Parse a string for a token starting at position <em>nPos</em>. 144 145 <p> A name or identifier must match the 146 <type>KParseTokens</type> criteria passed in 147 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may 148 additionally contain characters of 149 <em>aUserDefinedCharactersStart</em> and/or 150 <em>aUserDefinedCharactersCont</em>. </p> 151 152 153 @returns 154 A filled <type>ParseResult</type> structure. If no 155 unambiguous token could be parsed, 156 <member>ParseResult::TokenType</member> will be set to 157 <b>0</b> (zero), other fields will contain the values parsed 158 so far. 159 160 <p> If a token may represent either a numeric value or a 161 name according to the passed Start/Cont-Flags/Chars, both 162 <const>KParseType::ASC_NUM</const> (or 163 <const>KParseType::UNI_NUM</const>) and 164 <const>KParseType::IDENTNAME</const> are set in 165 <member>ParseResult::TokenType</member>. 166 167 @param aText 168 Text to be parsed. 169 170 @param nPos 171 Position where parsing starts. 172 173 @param aLocale 174 The locale, for example, for decimal and group separator or 175 character type determination. 176 177 @param nStartCharFlags 178 A set of <type>KParseTokens</type> constants determining the 179 allowed characters a name or identifier may start with. 180 181 @param aUserDefinedCharactersStart 182 A set of additionally allowed characters a name or 183 identifier may start with. 184 185 @param nContCharFlags 186 A set of <type>KParseTokens</type> constants determining the 187 allowed characters a name or identifier may continue with. 188 189 @param aUserDefinedCharactersCont 190 A set of additionally allowed characters a name or 191 identifier may continue with. 192 193 @example:C++ 194 <listing> 195 using namespace ::com::sun::star::i18n; 196 // First character of an identifier may be any alphabetic or underscore. 197 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; 198 // Continuing characters may be any alphanumeric or underscore or dot. 199 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; 200 // No further characters assumed to be contained in an identifier 201 String aEmptyString; 202 // Parse any token. 203 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, 204 nStartFlags, aEmptyString, nContFlags, aEmptyString ); 205 // Get parsed token. 206 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) 207 fValue = rRes.Value; 208 if ( rRes.TokenType & KParseType::IDENTNAME ) 209 aName = aText.Copy( nPos, rRes.EndPos - nPos ); 210 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) 211 aName = rRes.DequotedNameOrString; 212 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) 213 aString = rRes.DequotedNameOrString; 214 else if ( rRes.TokenType & KParseType::BOOLEAN ) 215 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 216 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) 217 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 218 </listing> 219 */ 220 221 ParseResult parseAnyToken( 222 [in] string aText, 223 [in] long nPos, 224 [in] com::sun::star::lang::Locale aLocale, 225 [in] long nStartCharFlags, 226 [in] string aUserDefinedCharactersStart, 227 [in] long nContCharFlags, 228 [in] string aUserDefinedCharactersCont 229 ); 230 231 //------------------------------------------------------------------------ 232 /** 233 Parse a string for a token of type <em>nTokenType</em> starting 234 at position <em>nPos</em>. 235 236 <p> Other parameters are the same as in 237 <member>parseAnyToken</member>. If the actual token does not 238 match the passed <em>nTokenType</em> a 239 <member>ParseResult::TokenType</member> set to <b>0</b> (zero) 240 is returned. </p> 241 242 @param nTokenType 243 One or more of the <type>KParseType</type> constants. 244 245 @example:C++ 246 <listing> 247 // Determine if a given name is a valid name (not quoted) and contains 248 // only allowed characters. 249 using namespace ::com::sun::star::i18n; 250 // First character of an identifier may be any alphanumeric or underscore. 251 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; 252 // No further characters assumed to be contained in an identifier start. 253 String aEmptyString; 254 // Continuing characters may be any alphanumeric or underscore. 255 sal_Int32 nContFlags = nStartFlags; 256 // Additionally, continuing characters may contain a blank. 257 String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); 258 // Parse predefined (must be an IDENTNAME) token. 259 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, 260 nStartFlags, aEmptyString, nContFlags, aContChars ); 261 // Test if it is an identifier name and if it only is one 262 // and no more else is following it. 263 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); 264 </listing> 265 */ 266 267 ParseResult parsePredefinedToken( 268 [in] long nTokenType, 269 [in] string aText, 270 [in] long nPos, 271 [in] com::sun::star::lang::Locale aLocale, 272 [in] long nStartCharFlags, 273 [in] string aUserDefinedCharactersStart, 274 [in] long nContCharFlags, 275 [in] string aUserDefinedCharactersCont 276 ); 277}; 278 279//============================================================================= 280 281}; }; }; }; 282 283#endif 284