1*cdf0e10cSrcweir/************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ 29*cdf0e10cSrcweir#define __com_sun_star_i18n_XCharacterClassification_idl__ 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir#include <com/sun/star/i18n/ParseResult.idl> 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__ 34*cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 35*cdf0e10cSrcweir#endif 36*cdf0e10cSrcweir#ifndef __com_sun_star_uno_XInterface_idl__ 37*cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 38*cdf0e10cSrcweir#endif 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir//============================================================================ 41*cdf0e10cSrcweir 42*cdf0e10cSrcweirmodule com { module sun { module star { module i18n { 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir//============================================================================ 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir/* 47*cdf0e10cSrcweir 48*cdf0e10cSrcweirPossible tokens to be parsed with parse...Token(): 49*cdf0e10cSrcweir 50*cdf0e10cSrcweirUPASCALPHA=[A-Z] 51*cdf0e10cSrcweirLOASCALPHA=[a-z] 52*cdf0e10cSrcweirASCALPHA=1*(UPASCALPHA|LOASCALPHA) 53*cdf0e10cSrcweirASCDIGIT=[0-9] 54*cdf0e10cSrcweirASC_UNDERSCORE='_' 55*cdf0e10cSrcweirASC_SPACE=' ' 56*cdf0e10cSrcweirASC_HT='\0x9' 57*cdf0e10cSrcweirASC_VT='\0xb' 58*cdf0e10cSrcweirASC_WS=ASC_SPACE|ASC_HT|ASC_VT 59*cdf0e10cSrcweirASC_DBL_QUOTE=\"; 60*cdf0e10cSrcweirASC_QUOTE=\' 61*cdf0e10cSrcweirUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) 62*cdf0e10cSrcweir 63*cdf0e10cSrcweirALPHA,DIGIT are the tokens which return true for isAlpha and isDigit 64*cdf0e10cSrcweirALNUM=ALPHA|DIGIT 65*cdf0e10cSrcweirCHAR=anycharacter 66*cdf0e10cSrcweirWS=isWhiteSpace() 67*cdf0e10cSrcweirSIGN='+'|'-' 68*cdf0e10cSrcweirDECSEP=<locale dependent decimal separator> 69*cdf0e10cSrcweirGRPSEP=<locale dependent thousand separator> 70*cdf0e10cSrcweirEXPONENT=(E|e)[SIGN]1*ASC_DIGIT 71*cdf0e10cSrcweir 72*cdf0e10cSrcweirIDENTIFIER=ALPHA *ALNUM 73*cdf0e10cSrcweirUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) 74*cdf0e10cSrcweirALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) 75*cdf0e10cSrcweirANY_NAME=1*(ALNUM|DEFCHARS) 76*cdf0e10cSrcweirSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE 77*cdf0e10cSrcweirDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE 78*cdf0e10cSrcweirASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] 79*cdf0e10cSrcweirNUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] 80*cdf0e10cSrcweir 81*cdf0e10cSrcweir*/ 82*cdf0e10cSrcweir 83*cdf0e10cSrcweir//============================================================================ 84*cdf0e10cSrcweir 85*cdf0e10cSrcweir/** 86*cdf0e10cSrcweir Character classification (upper, lower, digit, letter, number, ...) 87*cdf0e10cSrcweir and generic Unicode enabled parser. 88*cdf0e10cSrcweir */ 89*cdf0e10cSrcweir 90*cdf0e10cSrcweirpublished interface XCharacterClassification : com::sun::star::uno::XInterface 91*cdf0e10cSrcweir{ 92*cdf0e10cSrcweir //------------------------------------------------------------------------ 93*cdf0e10cSrcweir /** Convert lower case alpha to upper case alpha, starting at 94*cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 95*cdf0e10cSrcweir */ 96*cdf0e10cSrcweir string toUpper( [in] string aText, [in] long nPos, [in] long nCount, 97*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir //------------------------------------------------------------------------ 100*cdf0e10cSrcweir /** Convert upper case alpha to lower case alpha, starting at 101*cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 102*cdf0e10cSrcweir */ 103*cdf0e10cSrcweir string toLower( [in] string aText, [in] long nPos, [in] long nCount, 104*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 105*cdf0e10cSrcweir 106*cdf0e10cSrcweir //------------------------------------------------------------------------ 107*cdf0e10cSrcweir /** Convert to title case, starting at 108*cdf0e10cSrcweir position <em>nPos</em> for <em>nCount</em> code points. 109*cdf0e10cSrcweir */ 110*cdf0e10cSrcweir string toTitle( [in] string aText, [in] long nPos, [in] long nCount, 111*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 112*cdf0e10cSrcweir 113*cdf0e10cSrcweir //------------------------------------------------------------------------ 114*cdf0e10cSrcweir /// Get <type>UnicodeType</type> of character at position <em>nPos</em>. 115*cdf0e10cSrcweir short getType( [in] string aText, [in] long nPos ); 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir //------------------------------------------------------------------------ 118*cdf0e10cSrcweir /** Get <type>DirectionProperty</type> of character at position 119*cdf0e10cSrcweir <em>nPos</em>. 120*cdf0e10cSrcweir */ 121*cdf0e10cSrcweir short getCharacterDirection( [in] string aText, [in] long nPos ); 122*cdf0e10cSrcweir 123*cdf0e10cSrcweir //------------------------------------------------------------------------ 124*cdf0e10cSrcweir /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>. 125*cdf0e10cSrcweir short getScript( [in] string aText, [in] long nPos ); 126*cdf0e10cSrcweir 127*cdf0e10cSrcweir //------------------------------------------------------------------------ 128*cdf0e10cSrcweir /// Get <type>KCharacterType</type> of character at position <em>nPos</em>. 129*cdf0e10cSrcweir long getCharacterType( [in] string aText, [in] long nPos, 130*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 131*cdf0e10cSrcweir 132*cdf0e10cSrcweir //------------------------------------------------------------------------ 133*cdf0e10cSrcweir /** Get accumulated <type>KCharacterType</type>s of string starting 134*cdf0e10cSrcweir at position <em>nPos</em> of length <em>nCount</em> code points. 135*cdf0e10cSrcweir 136*cdf0e10cSrcweir @returns 137*cdf0e10cSrcweir A number with appropriate flags set to indicate what type of 138*cdf0e10cSrcweir characters the string contains, each flag value being one of 139*cdf0e10cSrcweir KCharacterType values. 140*cdf0e10cSrcweir */ 141*cdf0e10cSrcweir long getStringType( [in] string aText, [in] long nPos, [in] long nCount, 142*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale ); 143*cdf0e10cSrcweir 144*cdf0e10cSrcweir 145*cdf0e10cSrcweir //------------------------------------------------------------------------ 146*cdf0e10cSrcweir /** 147*cdf0e10cSrcweir Parse a string for a token starting at position <em>nPos</em>. 148*cdf0e10cSrcweir 149*cdf0e10cSrcweir <p> A name or identifier must match the 150*cdf0e10cSrcweir <type>KParseTokens</type> criteria passed in 151*cdf0e10cSrcweir <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may 152*cdf0e10cSrcweir additionally contain characters of 153*cdf0e10cSrcweir <em>aUserDefinedCharactersStart</em> and/or 154*cdf0e10cSrcweir <em>aUserDefinedCharactersCont</em>. </p> 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir @returns 158*cdf0e10cSrcweir A filled <type>ParseResult</type> structure. If no 159*cdf0e10cSrcweir unambigous token could be parsed, 160*cdf0e10cSrcweir <member>ParseResult::TokenType</member> will be set to 161*cdf0e10cSrcweir <b>0</b> (zero), other fields will contain the values parsed 162*cdf0e10cSrcweir so far. 163*cdf0e10cSrcweir 164*cdf0e10cSrcweir <p> If a token may represent either a numeric value or a 165*cdf0e10cSrcweir name according to the passed Start/Cont-Flags/Chars, both 166*cdf0e10cSrcweir <const>KParseType::ASC_NUM</const> (or 167*cdf0e10cSrcweir <const>KParseType::UNI_NUM</const>) and 168*cdf0e10cSrcweir <const>KParseType::IDENTNAME</const> are set in 169*cdf0e10cSrcweir <member>ParseResult::TokenType</member>. 170*cdf0e10cSrcweir 171*cdf0e10cSrcweir @param aText 172*cdf0e10cSrcweir Text to be parsed. 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir @param nPos 175*cdf0e10cSrcweir Position where parsing starts. 176*cdf0e10cSrcweir 177*cdf0e10cSrcweir @param aLocale 178*cdf0e10cSrcweir The locale, for example, for decimal and group separator or 179*cdf0e10cSrcweir character type determination. 180*cdf0e10cSrcweir 181*cdf0e10cSrcweir @param nStartCharFlags 182*cdf0e10cSrcweir A set of <type>KParseTokens</type> constants determining the 183*cdf0e10cSrcweir allowed characters a name or identifier may start with. 184*cdf0e10cSrcweir 185*cdf0e10cSrcweir @param aUserDefinedCharactersStart 186*cdf0e10cSrcweir A set of additionally allowed characters a name or 187*cdf0e10cSrcweir identifier may start with. 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir @param nContCharFlags 190*cdf0e10cSrcweir A set of <type>KParseTokens</type> constants determining the 191*cdf0e10cSrcweir allowed characters a name or identifier may continue with. 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir @param aUserDefinedCharactersCont 194*cdf0e10cSrcweir A set of additionally allowed characters a name or 195*cdf0e10cSrcweir identifier may continue with. 196*cdf0e10cSrcweir 197*cdf0e10cSrcweir @example:C++ 198*cdf0e10cSrcweir <listing> 199*cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 200*cdf0e10cSrcweir // First character of an identifier may be any alphabetic or underscore. 201*cdf0e10cSrcweir sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; 202*cdf0e10cSrcweir // Continuing characters may be any alphanumeric or underscore or dot. 203*cdf0e10cSrcweir sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; 204*cdf0e10cSrcweir // No further characters assumed to be contained in an identifier 205*cdf0e10cSrcweir String aEmptyString; 206*cdf0e10cSrcweir // Parse any token. 207*cdf0e10cSrcweir ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, 208*cdf0e10cSrcweir nStartFlags, aEmptyString, nContFlags, aEmptyString ); 209*cdf0e10cSrcweir // Get parsed token. 210*cdf0e10cSrcweir if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) 211*cdf0e10cSrcweir fValue = rRes.Value; 212*cdf0e10cSrcweir if ( rRes.TokenType & KParseType::IDENTNAME ) 213*cdf0e10cSrcweir aName = aText.Copy( nPos, rRes.EndPos - nPos ); 214*cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) 215*cdf0e10cSrcweir aName = rRes.DequotedNameOrString; 216*cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) 217*cdf0e10cSrcweir aString = rRes.DequotedNameOrString; 218*cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::BOOLEAN ) 219*cdf0e10cSrcweir aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 220*cdf0e10cSrcweir else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) 221*cdf0e10cSrcweir aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 222*cdf0e10cSrcweir </listing> 223*cdf0e10cSrcweir */ 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir ParseResult parseAnyToken( 226*cdf0e10cSrcweir [in] string aText, 227*cdf0e10cSrcweir [in] long nPos, 228*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale, 229*cdf0e10cSrcweir [in] long nStartCharFlags, 230*cdf0e10cSrcweir [in] string aUserDefinedCharactersStart, 231*cdf0e10cSrcweir [in] long nContCharFlags, 232*cdf0e10cSrcweir [in] string aUserDefinedCharactersCont 233*cdf0e10cSrcweir ); 234*cdf0e10cSrcweir 235*cdf0e10cSrcweir //------------------------------------------------------------------------ 236*cdf0e10cSrcweir /** 237*cdf0e10cSrcweir Parse a string for a token of type <em>nTokenType</em> starting 238*cdf0e10cSrcweir at position <em>nPos</em>. 239*cdf0e10cSrcweir 240*cdf0e10cSrcweir <p> Other parameters are the same as in 241*cdf0e10cSrcweir <member>parseAnyToken</member>. If the actual token does not 242*cdf0e10cSrcweir match the passed <em>nTokenType</em> a 243*cdf0e10cSrcweir <member>ParseResult::TokenType</member> set to <b>0</b> (zero) 244*cdf0e10cSrcweir is returned. </p> 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir @param nTokenType 247*cdf0e10cSrcweir One or more of the <type>KParseType</type> constants. 248*cdf0e10cSrcweir 249*cdf0e10cSrcweir @example:C++ 250*cdf0e10cSrcweir <listing> 251*cdf0e10cSrcweir // Determine if a given name is a valid name (not quoted) and contains 252*cdf0e10cSrcweir // only allowed characters. 253*cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 254*cdf0e10cSrcweir // First character of an identifier may be any alphanumeric or underscore. 255*cdf0e10cSrcweir sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; 256*cdf0e10cSrcweir // No further characters assumed to be contained in an identifier start. 257*cdf0e10cSrcweir String aEmptyString; 258*cdf0e10cSrcweir // Continuing characters may be any alphanumeric or underscore. 259*cdf0e10cSrcweir sal_Int32 nContFlags = nStartFlags; 260*cdf0e10cSrcweir // Additionally, continuing characters may contain a blank. 261*cdf0e10cSrcweir String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); 262*cdf0e10cSrcweir // Parse predefined (must be an IDENTNAME) token. 263*cdf0e10cSrcweir ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, 264*cdf0e10cSrcweir nStartFlags, aEmptyString, nContFlags, aContChars ); 265*cdf0e10cSrcweir // Test if it is an identifier name and if it only is one 266*cdf0e10cSrcweir // and no more else is following it. 267*cdf0e10cSrcweir bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); 268*cdf0e10cSrcweir </listing> 269*cdf0e10cSrcweir */ 270*cdf0e10cSrcweir 271*cdf0e10cSrcweir ParseResult parsePredefinedToken( 272*cdf0e10cSrcweir [in] long nTokenType, 273*cdf0e10cSrcweir [in] string aText, 274*cdf0e10cSrcweir [in] long nPos, 275*cdf0e10cSrcweir [in] com::sun::star::lang::Locale aLocale, 276*cdf0e10cSrcweir [in] long nStartCharFlags, 277*cdf0e10cSrcweir [in] string aUserDefinedCharactersStart, 278*cdf0e10cSrcweir [in] long nContCharFlags, 279*cdf0e10cSrcweir [in] string aUserDefinedCharactersCont 280*cdf0e10cSrcweir ); 281*cdf0e10cSrcweir}; 282*cdf0e10cSrcweir 283*cdf0e10cSrcweir//============================================================================= 284*cdf0e10cSrcweir}; }; }; }; 285*cdf0e10cSrcweir 286*cdf0e10cSrcweir#endif 287