1*cdf0e10cSrcweir/*************************************************************************
2*cdf0e10cSrcweir *
3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir *
5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir *
7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir *
9*cdf0e10cSrcweir * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir *
11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir *
15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir *
21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir *
26*cdf0e10cSrcweir ************************************************************************/
27*cdf0e10cSrcweir
28*cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
29*cdf0e10cSrcweir#define __com_sun_star_i18n_XCharacterClassification_idl__
30*cdf0e10cSrcweir
31*cdf0e10cSrcweir#include <com/sun/star/i18n/ParseResult.idl>
32*cdf0e10cSrcweir
33*cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__
34*cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
35*cdf0e10cSrcweir#endif
36*cdf0e10cSrcweir#ifndef __com_sun_star_uno_XInterface_idl__
37*cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
38*cdf0e10cSrcweir#endif
39*cdf0e10cSrcweir
40*cdf0e10cSrcweir//============================================================================
41*cdf0e10cSrcweir
42*cdf0e10cSrcweirmodule com { module sun { module star { module i18n {
43*cdf0e10cSrcweir
44*cdf0e10cSrcweir//============================================================================
45*cdf0e10cSrcweir
46*cdf0e10cSrcweir/*
47*cdf0e10cSrcweir
48*cdf0e10cSrcweirPossible tokens to be parsed with  parse...Token():
49*cdf0e10cSrcweir
50*cdf0e10cSrcweirUPASCALPHA=[A-Z]
51*cdf0e10cSrcweirLOASCALPHA=[a-z]
52*cdf0e10cSrcweirASCALPHA=1*(UPASCALPHA|LOASCALPHA)
53*cdf0e10cSrcweirASCDIGIT=[0-9]
54*cdf0e10cSrcweirASC_UNDERSCORE='_'
55*cdf0e10cSrcweirASC_SPACE=' '
56*cdf0e10cSrcweirASC_HT='\0x9'
57*cdf0e10cSrcweirASC_VT='\0xb'
58*cdf0e10cSrcweirASC_WS=ASC_SPACE|ASC_HT|ASC_VT
59*cdf0e10cSrcweirASC_DBL_QUOTE=\";
60*cdf0e10cSrcweirASC_QUOTE=\'
61*cdf0e10cSrcweirUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
62*cdf0e10cSrcweir
63*cdf0e10cSrcweirALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
64*cdf0e10cSrcweirALNUM=ALPHA|DIGIT
65*cdf0e10cSrcweirCHAR=anycharacter
66*cdf0e10cSrcweirWS=isWhiteSpace()
67*cdf0e10cSrcweirSIGN='+'|'-'
68*cdf0e10cSrcweirDECSEP=<locale dependent decimal separator>
69*cdf0e10cSrcweirGRPSEP=<locale dependent thousand separator>
70*cdf0e10cSrcweirEXPONENT=(E|e)[SIGN]1*ASC_DIGIT
71*cdf0e10cSrcweir
72*cdf0e10cSrcweirIDENTIFIER=ALPHA *ALNUM
73*cdf0e10cSrcweirUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
74*cdf0e10cSrcweirALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
75*cdf0e10cSrcweirANY_NAME=1*(ALNUM|DEFCHARS)
76*cdf0e10cSrcweirSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
77*cdf0e10cSrcweirDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
78*cdf0e10cSrcweirASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
79*cdf0e10cSrcweirNUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
80*cdf0e10cSrcweir
81*cdf0e10cSrcweir*/
82*cdf0e10cSrcweir
83*cdf0e10cSrcweir//============================================================================
84*cdf0e10cSrcweir
85*cdf0e10cSrcweir/**
86*cdf0e10cSrcweir    Character classification (upper, lower, digit, letter, number, ...)
87*cdf0e10cSrcweir    and generic Unicode enabled parser.
88*cdf0e10cSrcweir */
89*cdf0e10cSrcweir
90*cdf0e10cSrcweirpublished interface XCharacterClassification : com::sun::star::uno::XInterface
91*cdf0e10cSrcweir{
92*cdf0e10cSrcweir    //------------------------------------------------------------------------
93*cdf0e10cSrcweir    /** Convert lower case alpha to upper case alpha, starting at
94*cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
95*cdf0e10cSrcweir     */
96*cdf0e10cSrcweir    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
97*cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
98*cdf0e10cSrcweir
99*cdf0e10cSrcweir    //------------------------------------------------------------------------
100*cdf0e10cSrcweir    /** Convert upper case alpha to lower case alpha, starting at
101*cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
102*cdf0e10cSrcweir     */
103*cdf0e10cSrcweir    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
104*cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
105*cdf0e10cSrcweir
106*cdf0e10cSrcweir    //------------------------------------------------------------------------
107*cdf0e10cSrcweir    /** Convert to title case, starting at
108*cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
109*cdf0e10cSrcweir     */
110*cdf0e10cSrcweir    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
111*cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
112*cdf0e10cSrcweir
113*cdf0e10cSrcweir    //------------------------------------------------------------------------
114*cdf0e10cSrcweir    /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
115*cdf0e10cSrcweir    short    getType( [in] string aText, [in] long nPos );
116*cdf0e10cSrcweir
117*cdf0e10cSrcweir    //------------------------------------------------------------------------
118*cdf0e10cSrcweir    /** Get <type>DirectionProperty</type> of character at position
119*cdf0e10cSrcweir        <em>nPos</em>.
120*cdf0e10cSrcweir     */
121*cdf0e10cSrcweir    short    getCharacterDirection( [in] string aText, [in] long nPos );
122*cdf0e10cSrcweir
123*cdf0e10cSrcweir    //------------------------------------------------------------------------
124*cdf0e10cSrcweir    /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
125*cdf0e10cSrcweir    short    getScript( [in] string aText, [in] long nPos );
126*cdf0e10cSrcweir
127*cdf0e10cSrcweir    //------------------------------------------------------------------------
128*cdf0e10cSrcweir    /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
129*cdf0e10cSrcweir    long getCharacterType( [in] string aText, [in] long nPos,
130*cdf0e10cSrcweir                           [in] com::sun::star::lang::Locale aLocale );
131*cdf0e10cSrcweir
132*cdf0e10cSrcweir    //------------------------------------------------------------------------
133*cdf0e10cSrcweir    /** Get accumulated <type>KCharacterType</type>s of string starting
134*cdf0e10cSrcweir        at position <em>nPos</em> of length <em>nCount</em> code points.
135*cdf0e10cSrcweir
136*cdf0e10cSrcweir        @returns
137*cdf0e10cSrcweir            A number with appropriate flags set to indicate what type of
138*cdf0e10cSrcweir            characters the string contains, each flag value being one of
139*cdf0e10cSrcweir            KCharacterType values.
140*cdf0e10cSrcweir    */
141*cdf0e10cSrcweir    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
142*cdf0e10cSrcweir                        [in] com::sun::star::lang::Locale aLocale );
143*cdf0e10cSrcweir
144*cdf0e10cSrcweir
145*cdf0e10cSrcweir    //------------------------------------------------------------------------
146*cdf0e10cSrcweir    /**
147*cdf0e10cSrcweir        Parse a string for a token starting at position <em>nPos</em>.
148*cdf0e10cSrcweir
149*cdf0e10cSrcweir        <p> A name or identifier must match the
150*cdf0e10cSrcweir        <type>KParseTokens</type> criteria passed in
151*cdf0e10cSrcweir        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
152*cdf0e10cSrcweir        additionally contain characters of
153*cdf0e10cSrcweir        <em>aUserDefinedCharactersStart</em> and/or
154*cdf0e10cSrcweir        <em>aUserDefinedCharactersCont</em>. </p>
155*cdf0e10cSrcweir
156*cdf0e10cSrcweir
157*cdf0e10cSrcweir        @returns
158*cdf0e10cSrcweir            A filled <type>ParseResult</type> structure. If no
159*cdf0e10cSrcweir            unambigous token could be parsed,
160*cdf0e10cSrcweir            <member>ParseResult::TokenType</member> will be set to
161*cdf0e10cSrcweir            <b>0</b> (zero), other fields will contain the values parsed
162*cdf0e10cSrcweir            so far.
163*cdf0e10cSrcweir
164*cdf0e10cSrcweir            <p> If a token may represent either a numeric value or a
165*cdf0e10cSrcweir            name according to the passed Start/Cont-Flags/Chars, both
166*cdf0e10cSrcweir            <const>KParseType::ASC_NUM</const> (or
167*cdf0e10cSrcweir            <const>KParseType::UNI_NUM</const>) and
168*cdf0e10cSrcweir            <const>KParseType::IDENTNAME</const> are set in
169*cdf0e10cSrcweir            <member>ParseResult::TokenType</member>.
170*cdf0e10cSrcweir
171*cdf0e10cSrcweir        @param  aText
172*cdf0e10cSrcweir            Text to be parsed.
173*cdf0e10cSrcweir
174*cdf0e10cSrcweir        @param  nPos
175*cdf0e10cSrcweir            Position where parsing starts.
176*cdf0e10cSrcweir
177*cdf0e10cSrcweir        @param  aLocale
178*cdf0e10cSrcweir            The locale, for example, for decimal and group separator or
179*cdf0e10cSrcweir            character type determination.
180*cdf0e10cSrcweir
181*cdf0e10cSrcweir        @param  nStartCharFlags
182*cdf0e10cSrcweir            A set of <type>KParseTokens</type> constants determining the
183*cdf0e10cSrcweir            allowed characters a name or identifier may start with.
184*cdf0e10cSrcweir
185*cdf0e10cSrcweir        @param  aUserDefinedCharactersStart
186*cdf0e10cSrcweir            A set of additionally allowed characters a name or
187*cdf0e10cSrcweir            identifier may start with.
188*cdf0e10cSrcweir
189*cdf0e10cSrcweir        @param  nContCharFlags
190*cdf0e10cSrcweir            A set of <type>KParseTokens</type> constants determining the
191*cdf0e10cSrcweir            allowed characters a name or identifier may continue with.
192*cdf0e10cSrcweir
193*cdf0e10cSrcweir        @param  aUserDefinedCharactersCont
194*cdf0e10cSrcweir            A set of additionally allowed characters a name or
195*cdf0e10cSrcweir            identifier may continue with.
196*cdf0e10cSrcweir
197*cdf0e10cSrcweir        @example:C++
198*cdf0e10cSrcweir        <listing>
199*cdf0e10cSrcweir            using namespace ::com::sun::star::i18n;
200*cdf0e10cSrcweir            // First character of an identifier may be any alphabetic or underscore.
201*cdf0e10cSrcweir            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
202*cdf0e10cSrcweir            // Continuing characters may be any alphanumeric or underscore or dot.
203*cdf0e10cSrcweir            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
204*cdf0e10cSrcweir            // No further characters assumed to be contained in an identifier
205*cdf0e10cSrcweir            String aEmptyString;
206*cdf0e10cSrcweir            // Parse any token.
207*cdf0e10cSrcweir            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
208*cdf0e10cSrcweir                nStartFlags, aEmptyString, nContFlags, aEmptyString );
209*cdf0e10cSrcweir            // Get parsed token.
210*cdf0e10cSrcweir            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
211*cdf0e10cSrcweir                fValue = rRes.Value;
212*cdf0e10cSrcweir            if ( rRes.TokenType & KParseType::IDENTNAME )
213*cdf0e10cSrcweir                aName = aText.Copy( nPos, rRes.EndPos - nPos );
214*cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
215*cdf0e10cSrcweir                aName = rRes.DequotedNameOrString;
216*cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
217*cdf0e10cSrcweir                aString = rRes.DequotedNameOrString;
218*cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::BOOLEAN )
219*cdf0e10cSrcweir                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
220*cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
221*cdf0e10cSrcweir                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
222*cdf0e10cSrcweir        </listing>
223*cdf0e10cSrcweir     */
224*cdf0e10cSrcweir
225*cdf0e10cSrcweir    ParseResult parseAnyToken(
226*cdf0e10cSrcweir                            [in] string aText,
227*cdf0e10cSrcweir                            [in] long nPos,
228*cdf0e10cSrcweir                            [in] com::sun::star::lang::Locale aLocale,
229*cdf0e10cSrcweir                            [in] long nStartCharFlags,
230*cdf0e10cSrcweir                            [in] string aUserDefinedCharactersStart,
231*cdf0e10cSrcweir                            [in] long nContCharFlags,
232*cdf0e10cSrcweir                            [in] string aUserDefinedCharactersCont
233*cdf0e10cSrcweir                            );
234*cdf0e10cSrcweir
235*cdf0e10cSrcweir    //------------------------------------------------------------------------
236*cdf0e10cSrcweir    /**
237*cdf0e10cSrcweir        Parse a string for a token of type <em>nTokenType</em> starting
238*cdf0e10cSrcweir        at position <em>nPos</em>.
239*cdf0e10cSrcweir
240*cdf0e10cSrcweir        <p> Other parameters are the same as in
241*cdf0e10cSrcweir        <member>parseAnyToken</member>. If the actual token does not
242*cdf0e10cSrcweir        match the passed <em>nTokenType</em> a
243*cdf0e10cSrcweir        <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
244*cdf0e10cSrcweir        is returned. </p>
245*cdf0e10cSrcweir
246*cdf0e10cSrcweir        @param  nTokenType
247*cdf0e10cSrcweir            One or more of the <type>KParseType</type> constants.
248*cdf0e10cSrcweir
249*cdf0e10cSrcweir        @example:C++
250*cdf0e10cSrcweir        <listing>
251*cdf0e10cSrcweir            // Determine if a given name is a valid name (not quoted) and contains
252*cdf0e10cSrcweir            // only allowed characters.
253*cdf0e10cSrcweir            using namespace ::com::sun::star::i18n;
254*cdf0e10cSrcweir            // First character of an identifier may be any alphanumeric or underscore.
255*cdf0e10cSrcweir            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
256*cdf0e10cSrcweir            // No further characters assumed to be contained in an identifier start.
257*cdf0e10cSrcweir            String aEmptyString;
258*cdf0e10cSrcweir            // Continuing characters may be any alphanumeric or underscore.
259*cdf0e10cSrcweir            sal_Int32 nContFlags = nStartFlags;
260*cdf0e10cSrcweir            // Additionally, continuing characters may contain a blank.
261*cdf0e10cSrcweir            String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
262*cdf0e10cSrcweir            // Parse predefined (must be an IDENTNAME) token.
263*cdf0e10cSrcweir            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
264*cdf0e10cSrcweir                nStartFlags, aEmptyString, nContFlags, aContChars );
265*cdf0e10cSrcweir            // Test if it is an identifier name and if it only is one
266*cdf0e10cSrcweir            // and no more else is following it.
267*cdf0e10cSrcweir            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
268*cdf0e10cSrcweir        </listing>
269*cdf0e10cSrcweir     */
270*cdf0e10cSrcweir
271*cdf0e10cSrcweir    ParseResult parsePredefinedToken(
272*cdf0e10cSrcweir                            [in] long nTokenType,
273*cdf0e10cSrcweir                            [in] string aText,
274*cdf0e10cSrcweir                            [in] long nPos,
275*cdf0e10cSrcweir                            [in] com::sun::star::lang::Locale aLocale,
276*cdf0e10cSrcweir                            [in] long nStartCharFlags,
277*cdf0e10cSrcweir                            [in] string aUserDefinedCharactersStart,
278*cdf0e10cSrcweir                            [in] long nContCharFlags,
279*cdf0e10cSrcweir                            [in] string aUserDefinedCharactersCont
280*cdf0e10cSrcweir                            );
281*cdf0e10cSrcweir};
282*cdf0e10cSrcweir
283*cdf0e10cSrcweir//=============================================================================
284*cdf0e10cSrcweir}; }; }; };
285*cdf0e10cSrcweir
286*cdf0e10cSrcweir#endif
287