1*d1766043SAndrew Rist/************************************************************** 2cdf0e10cSrcweir * 3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*d1766043SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*d1766043SAndrew Rist * distributed with this work for additional information 6*d1766043SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance 9*d1766043SAndrew Rist * with the License. You may obtain a copy of the License at 10*d1766043SAndrew Rist * 11*d1766043SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*d1766043SAndrew Rist * 13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*d1766043SAndrew Rist * software distributed under the License is distributed on an 15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*d1766043SAndrew Rist * KIND, either express or implied. See the License for the 17*d1766043SAndrew Rist * specific language governing permissions and limitations 18*d1766043SAndrew Rist * under the License. 19*d1766043SAndrew Rist * 20*d1766043SAndrew Rist *************************************************************/ 21*d1766043SAndrew Rist 22*d1766043SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ 25cdf0e10cSrcweir#define __com_sun_star_i18n_XCharacterClassification_idl__ 26cdf0e10cSrcweir 27cdf0e10cSrcweir#include <com/sun/star/i18n/ParseResult.idl> 28cdf0e10cSrcweir 29cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__ 30cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 31cdf0e10cSrcweir#endif 32cdf0e10cSrcweir#ifndef __com_sun_star_uno_XInterface_idl__ 33cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 34cdf0e10cSrcweir#endif 35cdf0e10cSrcweir 36cdf0e10cSrcweir//============================================================================ 37cdf0e10cSrcweir 38cdf0e10cSrcweirmodule com { module sun { module star { module i18n { 39cdf0e10cSrcweir 40cdf0e10cSrcweir//============================================================================ 41cdf0e10cSrcweir 42cdf0e10cSrcweir/* 43cdf0e10cSrcweir 44cdf0e10cSrcweirPossible tokens to be parsed with parse...Token(): 45cdf0e10cSrcweir 46cdf0e10cSrcweirUPASCALPHA=[A-Z] 47cdf0e10cSrcweirLOASCALPHA=[a-z] 48cdf0e10cSrcweirASCALPHA=1*(UPASCALPHA|LOASCALPHA) 49cdf0e10cSrcweirASCDIGIT=[0-9] 50cdf0e10cSrcweirASC_UNDERSCORE='_' 51cdf0e10cSrcweirASC_SPACE=' ' 52cdf0e10cSrcweirASC_HT='\0x9' 53cdf0e10cSrcweirASC_VT='\0xb' 54cdf0e10cSrcweirASC_WS=ASC_SPACE|ASC_HT|ASC_VT 55cdf0e10cSrcweirASC_DBL_QUOTE=\"; 56cdf0e10cSrcweirASC_QUOTE=\' 57cdf0e10cSrcweirUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) 58cdf0e10cSrcweir 59cdf0e10cSrcweirALPHA,DIGIT are the tokens which return true for isAlpha and isDigit 60cdf0e10cSrcweirALNUM=ALPHA|DIGIT 61cdf0e10cSrcweirCHAR=anycharacter 62cdf0e10cSrcweirWS=isWhiteSpace() 63cdf0e10cSrcweirSIGN='+'|'-' 64cdf0e10cSrcweirDECSEP=<locale dependent decimal separator> 65cdf0e10cSrcweirGRPSEP=<locale dependent thousand separator> 66cdf0e10cSrcweirEXPONENT=(E|e)[SIGN]1*ASC_DIGIT 67cdf0e10cSrcweir 68cdf0e10cSrcweirIDENTIFIER=ALPHA *ALNUM 69cdf0e10cSrcweirUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) 70cdf0e10cSrcweirALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) 71cdf0e10cSrcweirANY_NAME=1*(ALNUM|DEFCHARS) 72cdf0e10cSrcweirSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE 73cdf0e10cSrcweirDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE 74cdf0e10cSrcweirASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] 75cdf0e10cSrcweirNUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] 76cdf0e10cSrcweir 77cdf0e10cSrcweir*/ 78cdf0e10cSrcweir 79cdf0e10cSrcweir//============================================================================ 80cdf0e10cSrcweir 81cdf0e10cSrcweir/** 82cdf0e10cSrcweir Character classification (upper, lower, digit, letter, number, ...) 83cdf0e10cSrcweir and generic Unicode enabled parser. 84cdf0e10cSrcweir */ 85cdf0e10cSrcweir 86cdf0e10cSrcweirpublished interface XCharacterClassification : com::sun::star::uno::XInterface 87cdf0e10cSrcweir{ 88cdf0e10cSrcweir //------------------------------------------------------------------------ 89cdf0e10cSrcweir /** Convert lower case alpha to upper case alpha, starting at 90cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 91cdf0e10cSrcweir */ 92cdf0e10cSrcweir string toUpper( [in] string aText, [in] long nPos, [in] long nCount, 93cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 94cdf0e10cSrcweir 95cdf0e10cSrcweir //------------------------------------------------------------------------ 96cdf0e10cSrcweir /** Convert upper case alpha to lower case alpha, starting at 97cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 98cdf0e10cSrcweir */ 99cdf0e10cSrcweir string toLower( [in] string aText, [in] long nPos, [in] long nCount, 100cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 101cdf0e10cSrcweir 102cdf0e10cSrcweir //------------------------------------------------------------------------ 103cdf0e10cSrcweir /** Convert to title case, starting at 104cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 105cdf0e10cSrcweir */ 106cdf0e10cSrcweir string toTitle( [in] string aText, [in] long nPos, [in] long nCount, 107cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 108cdf0e10cSrcweir 109cdf0e10cSrcweir //------------------------------------------------------------------------ 110cdf0e10cSrcweir /// Get <type>UnicodeType</type> of character at position <em>nPos</em>. 111cdf0e10cSrcweir short getType( [in] string aText, [in] long nPos ); 112cdf0e10cSrcweir 113cdf0e10cSrcweir //------------------------------------------------------------------------ 114cdf0e10cSrcweir /** Get <type>DirectionProperty</type> of character at position 115cdf0e10cSrcweir <em>nPos</em>. 116cdf0e10cSrcweir */ 117cdf0e10cSrcweir short getCharacterDirection( [in] string aText, [in] long nPos ); 118cdf0e10cSrcweir 119cdf0e10cSrcweir //------------------------------------------------------------------------ 120cdf0e10cSrcweir /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>. 121cdf0e10cSrcweir short getScript( [in] string aText, [in] long nPos ); 122cdf0e10cSrcweir 123cdf0e10cSrcweir //------------------------------------------------------------------------ 124cdf0e10cSrcweir /// Get <type>KCharacterType</type> of character at position <em>nPos</em>. 125cdf0e10cSrcweir long getCharacterType( [in] string aText, [in] long nPos, 126cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 127cdf0e10cSrcweir 128cdf0e10cSrcweir //------------------------------------------------------------------------ 129cdf0e10cSrcweir /** Get accumulated <type>KCharacterType</type>s of string starting 130cdf0e10cSrcweir at position <em>nPos</em> of length <em>nCount</em> code points. 131cdf0e10cSrcweir 132cdf0e10cSrcweir @returns 133cdf0e10cSrcweir A number with appropriate flags set to indicate what type of 134cdf0e10cSrcweir characters the string contains, each flag value being one of 135cdf0e10cSrcweir KCharacterType values. 136cdf0e10cSrcweir */ 137cdf0e10cSrcweir long getStringType( [in] string aText, [in] long nPos, [in] long nCount, 138cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 139cdf0e10cSrcweir 140cdf0e10cSrcweir 141cdf0e10cSrcweir //------------------------------------------------------------------------ 142cdf0e10cSrcweir /** 143cdf0e10cSrcweir Parse a string for a token starting at position <em>nPos</em>. 144cdf0e10cSrcweir 145cdf0e10cSrcweir <p> A name or identifier must match the 146cdf0e10cSrcweir <type>KParseTokens</type> criteria passed in 147cdf0e10cSrcweir <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may 148cdf0e10cSrcweir additionally contain characters of 149cdf0e10cSrcweir <em>aUserDefinedCharactersStart</em> and/or 150cdf0e10cSrcweir <em>aUserDefinedCharactersCont</em>. </p> 151cdf0e10cSrcweir 152cdf0e10cSrcweir 153cdf0e10cSrcweir @returns 154cdf0e10cSrcweir A filled <type>ParseResult</type> structure. If no 155cdf0e10cSrcweir unambigous token could be parsed, 156cdf0e10cSrcweir <member>ParseResult::TokenType</member> will be set to 157cdf0e10cSrcweir <b>0</b> (zero), other fields will contain the values parsed 158cdf0e10cSrcweir so far. 159cdf0e10cSrcweir 160cdf0e10cSrcweir <p> If a token may represent either a numeric value or a 161cdf0e10cSrcweir name according to the passed Start/Cont-Flags/Chars, both 162cdf0e10cSrcweir <const>KParseType::ASC_NUM</const> (or 163cdf0e10cSrcweir <const>KParseType::UNI_NUM</const>) and 164cdf0e10cSrcweir <const>KParseType::IDENTNAME</const> are set in 165cdf0e10cSrcweir <member>ParseResult::TokenType</member>. 166cdf0e10cSrcweir 167cdf0e10cSrcweir @param aText 168cdf0e10cSrcweir Text to be parsed. 169cdf0e10cSrcweir 170cdf0e10cSrcweir @param nPos 171cdf0e10cSrcweir Position where parsing starts. 172cdf0e10cSrcweir 173cdf0e10cSrcweir @param aLocale 174cdf0e10cSrcweir The locale, for example, for decimal and group separator or 175cdf0e10cSrcweir character type determination. 176cdf0e10cSrcweir 177cdf0e10cSrcweir @param nStartCharFlags 178cdf0e10cSrcweir A set of <type>KParseTokens</type> constants determining the 179cdf0e10cSrcweir allowed characters a name or identifier may start with. 180cdf0e10cSrcweir 181cdf0e10cSrcweir @param aUserDefinedCharactersStart 182cdf0e10cSrcweir A set of additionally allowed characters a name or 183cdf0e10cSrcweir identifier may start with. 184cdf0e10cSrcweir 185cdf0e10cSrcweir @param nContCharFlags 186cdf0e10cSrcweir A set of <type>KParseTokens</type> constants determining the 187cdf0e10cSrcweir allowed characters a name or identifier may continue with. 188cdf0e10cSrcweir 189cdf0e10cSrcweir @param aUserDefinedCharactersCont 190cdf0e10cSrcweir A set of additionally allowed characters a name or 191cdf0e10cSrcweir identifier may continue with. 192cdf0e10cSrcweir 193cdf0e10cSrcweir @example:C++ 194cdf0e10cSrcweir <listing> 195cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 196cdf0e10cSrcweir // First character of an identifier may be any alphabetic or underscore. 197cdf0e10cSrcweir sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; 198cdf0e10cSrcweir // Continuing characters may be any alphanumeric or underscore or dot. 199cdf0e10cSrcweir sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; 200cdf0e10cSrcweir // No further characters assumed to be contained in an identifier 201cdf0e10cSrcweir String aEmptyString; 202cdf0e10cSrcweir // Parse any token. 203cdf0e10cSrcweir ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, 204cdf0e10cSrcweir nStartFlags, aEmptyString, nContFlags, aEmptyString ); 205cdf0e10cSrcweir // Get parsed token. 206cdf0e10cSrcweir if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) 207cdf0e10cSrcweir fValue = rRes.Value; 208cdf0e10cSrcweir if ( rRes.TokenType & KParseType::IDENTNAME ) 209cdf0e10cSrcweir aName = aText.Copy( nPos, rRes.EndPos - nPos ); 210cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) 211cdf0e10cSrcweir aName = rRes.DequotedNameOrString; 212cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) 213cdf0e10cSrcweir aString = rRes.DequotedNameOrString; 214cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::BOOLEAN ) 215cdf0e10cSrcweir aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 216cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) 217cdf0e10cSrcweir aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 218cdf0e10cSrcweir </listing> 219cdf0e10cSrcweir */ 220cdf0e10cSrcweir 221cdf0e10cSrcweir ParseResult parseAnyToken( 222cdf0e10cSrcweir [in] string aText, 223cdf0e10cSrcweir [in] long nPos, 224cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale, 225cdf0e10cSrcweir [in] long nStartCharFlags, 226cdf0e10cSrcweir [in] string aUserDefinedCharactersStart, 227cdf0e10cSrcweir [in] long nContCharFlags, 228cdf0e10cSrcweir [in] string aUserDefinedCharactersCont 229cdf0e10cSrcweir ); 230cdf0e10cSrcweir 231cdf0e10cSrcweir //------------------------------------------------------------------------ 232cdf0e10cSrcweir /** 233cdf0e10cSrcweir Parse a string for a token of type <em>nTokenType</em> starting 234cdf0e10cSrcweir at position <em>nPos</em>. 235cdf0e10cSrcweir 236cdf0e10cSrcweir <p> Other parameters are the same as in 237cdf0e10cSrcweir <member>parseAnyToken</member>. If the actual token does not 238cdf0e10cSrcweir match the passed <em>nTokenType</em> a 239cdf0e10cSrcweir <member>ParseResult::TokenType</member> set to <b>0</b> (zero) 240cdf0e10cSrcweir is returned. </p> 241cdf0e10cSrcweir 242cdf0e10cSrcweir @param nTokenType 243cdf0e10cSrcweir One or more of the <type>KParseType</type> constants. 244cdf0e10cSrcweir 245cdf0e10cSrcweir @example:C++ 246cdf0e10cSrcweir <listing> 247cdf0e10cSrcweir // Determine if a given name is a valid name (not quoted) and contains 248cdf0e10cSrcweir // only allowed characters. 249cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 250cdf0e10cSrcweir // First character of an identifier may be any alphanumeric or underscore. 251cdf0e10cSrcweir sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; 252cdf0e10cSrcweir // No further characters assumed to be contained in an identifier start. 253cdf0e10cSrcweir String aEmptyString; 254cdf0e10cSrcweir // Continuing characters may be any alphanumeric or underscore. 255cdf0e10cSrcweir sal_Int32 nContFlags = nStartFlags; 256cdf0e10cSrcweir // Additionally, continuing characters may contain a blank. 257cdf0e10cSrcweir String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); 258cdf0e10cSrcweir // Parse predefined (must be an IDENTNAME) token. 259cdf0e10cSrcweir ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, 260cdf0e10cSrcweir nStartFlags, aEmptyString, nContFlags, aContChars ); 261cdf0e10cSrcweir // Test if it is an identifier name and if it only is one 262cdf0e10cSrcweir // and no more else is following it. 263cdf0e10cSrcweir bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); 264cdf0e10cSrcweir </listing> 265cdf0e10cSrcweir */ 266cdf0e10cSrcweir 267cdf0e10cSrcweir ParseResult parsePredefinedToken( 268cdf0e10cSrcweir [in] long nTokenType, 269cdf0e10cSrcweir [in] string aText, 270cdf0e10cSrcweir [in] long nPos, 271cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale, 272cdf0e10cSrcweir [in] long nStartCharFlags, 273cdf0e10cSrcweir [in] string aUserDefinedCharactersStart, 274cdf0e10cSrcweir [in] long nContCharFlags, 275cdf0e10cSrcweir [in] string aUserDefinedCharactersCont 276cdf0e10cSrcweir ); 277cdf0e10cSrcweir}; 278cdf0e10cSrcweir 279cdf0e10cSrcweir//============================================================================= 280cdf0e10cSrcweir}; }; }; }; 281cdf0e10cSrcweir 282cdf0e10cSrcweir#endif 283