1*d1766043SAndrew Rist/**************************************************************
2cdf0e10cSrcweir *
3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*d1766043SAndrew Rist * or more contributor license agreements.  See the NOTICE file
5*d1766043SAndrew Rist * distributed with this work for additional information
6*d1766043SAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance
9*d1766043SAndrew Rist * with the License.  You may obtain a copy of the License at
10*d1766043SAndrew Rist *
11*d1766043SAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12*d1766043SAndrew Rist *
13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*d1766043SAndrew Rist * software distributed under the License is distributed on an
15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*d1766043SAndrew Rist * KIND, either express or implied.  See the License for the
17*d1766043SAndrew Rist * specific language governing permissions and limitations
18*d1766043SAndrew Rist * under the License.
19*d1766043SAndrew Rist *
20*d1766043SAndrew Rist *************************************************************/
21*d1766043SAndrew Rist
22*d1766043SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
25cdf0e10cSrcweir#define __com_sun_star_i18n_XCharacterClassification_idl__
26cdf0e10cSrcweir
27cdf0e10cSrcweir#include <com/sun/star/i18n/ParseResult.idl>
28cdf0e10cSrcweir
29cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__
30cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
31cdf0e10cSrcweir#endif
32cdf0e10cSrcweir#ifndef __com_sun_star_uno_XInterface_idl__
33cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
34cdf0e10cSrcweir#endif
35cdf0e10cSrcweir
36cdf0e10cSrcweir//============================================================================
37cdf0e10cSrcweir
38cdf0e10cSrcweirmodule com { module sun { module star { module i18n {
39cdf0e10cSrcweir
40cdf0e10cSrcweir//============================================================================
41cdf0e10cSrcweir
42cdf0e10cSrcweir/*
43cdf0e10cSrcweir
44cdf0e10cSrcweirPossible tokens to be parsed with  parse...Token():
45cdf0e10cSrcweir
46cdf0e10cSrcweirUPASCALPHA=[A-Z]
47cdf0e10cSrcweirLOASCALPHA=[a-z]
48cdf0e10cSrcweirASCALPHA=1*(UPASCALPHA|LOASCALPHA)
49cdf0e10cSrcweirASCDIGIT=[0-9]
50cdf0e10cSrcweirASC_UNDERSCORE='_'
51cdf0e10cSrcweirASC_SPACE=' '
52cdf0e10cSrcweirASC_HT='\0x9'
53cdf0e10cSrcweirASC_VT='\0xb'
54cdf0e10cSrcweirASC_WS=ASC_SPACE|ASC_HT|ASC_VT
55cdf0e10cSrcweirASC_DBL_QUOTE=\";
56cdf0e10cSrcweirASC_QUOTE=\'
57cdf0e10cSrcweirUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
58cdf0e10cSrcweir
59cdf0e10cSrcweirALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
60cdf0e10cSrcweirALNUM=ALPHA|DIGIT
61cdf0e10cSrcweirCHAR=anycharacter
62cdf0e10cSrcweirWS=isWhiteSpace()
63cdf0e10cSrcweirSIGN='+'|'-'
64cdf0e10cSrcweirDECSEP=<locale dependent decimal separator>
65cdf0e10cSrcweirGRPSEP=<locale dependent thousand separator>
66cdf0e10cSrcweirEXPONENT=(E|e)[SIGN]1*ASC_DIGIT
67cdf0e10cSrcweir
68cdf0e10cSrcweirIDENTIFIER=ALPHA *ALNUM
69cdf0e10cSrcweirUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
70cdf0e10cSrcweirALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
71cdf0e10cSrcweirANY_NAME=1*(ALNUM|DEFCHARS)
72cdf0e10cSrcweirSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
73cdf0e10cSrcweirDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
74cdf0e10cSrcweirASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
75cdf0e10cSrcweirNUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
76cdf0e10cSrcweir
77cdf0e10cSrcweir*/
78cdf0e10cSrcweir
79cdf0e10cSrcweir//============================================================================
80cdf0e10cSrcweir
81cdf0e10cSrcweir/**
82cdf0e10cSrcweir    Character classification (upper, lower, digit, letter, number, ...)
83cdf0e10cSrcweir    and generic Unicode enabled parser.
84cdf0e10cSrcweir */
85cdf0e10cSrcweir
86cdf0e10cSrcweirpublished interface XCharacterClassification : com::sun::star::uno::XInterface
87cdf0e10cSrcweir{
88cdf0e10cSrcweir    //------------------------------------------------------------------------
89cdf0e10cSrcweir    /** Convert lower case alpha to upper case alpha, starting at
90cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
91cdf0e10cSrcweir     */
92cdf0e10cSrcweir    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
93cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
94cdf0e10cSrcweir
95cdf0e10cSrcweir    //------------------------------------------------------------------------
96cdf0e10cSrcweir    /** Convert upper case alpha to lower case alpha, starting at
97cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
98cdf0e10cSrcweir     */
99cdf0e10cSrcweir    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
100cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
101cdf0e10cSrcweir
102cdf0e10cSrcweir    //------------------------------------------------------------------------
103cdf0e10cSrcweir    /** Convert to title case, starting at
104cdf0e10cSrcweir        position <em>nPos</em> for <em>nCount</em> code points.
105cdf0e10cSrcweir     */
106cdf0e10cSrcweir    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
107cdf0e10cSrcweir                      [in] com::sun::star::lang::Locale aLocale );
108cdf0e10cSrcweir
109cdf0e10cSrcweir    //------------------------------------------------------------------------
110cdf0e10cSrcweir    /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
111cdf0e10cSrcweir    short    getType( [in] string aText, [in] long nPos );
112cdf0e10cSrcweir
113cdf0e10cSrcweir    //------------------------------------------------------------------------
114cdf0e10cSrcweir    /** Get <type>DirectionProperty</type> of character at position
115cdf0e10cSrcweir        <em>nPos</em>.
116cdf0e10cSrcweir     */
117cdf0e10cSrcweir    short    getCharacterDirection( [in] string aText, [in] long nPos );
118cdf0e10cSrcweir
119cdf0e10cSrcweir    //------------------------------------------------------------------------
120cdf0e10cSrcweir    /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
121cdf0e10cSrcweir    short    getScript( [in] string aText, [in] long nPos );
122cdf0e10cSrcweir
123cdf0e10cSrcweir    //------------------------------------------------------------------------
124cdf0e10cSrcweir    /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
125cdf0e10cSrcweir    long getCharacterType( [in] string aText, [in] long nPos,
126cdf0e10cSrcweir                           [in] com::sun::star::lang::Locale aLocale );
127cdf0e10cSrcweir
128cdf0e10cSrcweir    //------------------------------------------------------------------------
129cdf0e10cSrcweir    /** Get accumulated <type>KCharacterType</type>s of string starting
130cdf0e10cSrcweir        at position <em>nPos</em> of length <em>nCount</em> code points.
131cdf0e10cSrcweir
132cdf0e10cSrcweir        @returns
133cdf0e10cSrcweir            A number with appropriate flags set to indicate what type of
134cdf0e10cSrcweir            characters the string contains, each flag value being one of
135cdf0e10cSrcweir            KCharacterType values.
136cdf0e10cSrcweir    */
137cdf0e10cSrcweir    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
138cdf0e10cSrcweir                        [in] com::sun::star::lang::Locale aLocale );
139cdf0e10cSrcweir
140cdf0e10cSrcweir
141cdf0e10cSrcweir    //------------------------------------------------------------------------
142cdf0e10cSrcweir    /**
143cdf0e10cSrcweir        Parse a string for a token starting at position <em>nPos</em>.
144cdf0e10cSrcweir
145cdf0e10cSrcweir        <p> A name or identifier must match the
146cdf0e10cSrcweir        <type>KParseTokens</type> criteria passed in
147cdf0e10cSrcweir        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
148cdf0e10cSrcweir        additionally contain characters of
149cdf0e10cSrcweir        <em>aUserDefinedCharactersStart</em> and/or
150cdf0e10cSrcweir        <em>aUserDefinedCharactersCont</em>. </p>
151cdf0e10cSrcweir
152cdf0e10cSrcweir
153cdf0e10cSrcweir        @returns
154cdf0e10cSrcweir            A filled <type>ParseResult</type> structure. If no
155cdf0e10cSrcweir            unambigous token could be parsed,
156cdf0e10cSrcweir            <member>ParseResult::TokenType</member> will be set to
157cdf0e10cSrcweir            <b>0</b> (zero), other fields will contain the values parsed
158cdf0e10cSrcweir            so far.
159cdf0e10cSrcweir
160cdf0e10cSrcweir            <p> If a token may represent either a numeric value or a
161cdf0e10cSrcweir            name according to the passed Start/Cont-Flags/Chars, both
162cdf0e10cSrcweir            <const>KParseType::ASC_NUM</const> (or
163cdf0e10cSrcweir            <const>KParseType::UNI_NUM</const>) and
164cdf0e10cSrcweir            <const>KParseType::IDENTNAME</const> are set in
165cdf0e10cSrcweir            <member>ParseResult::TokenType</member>.
166cdf0e10cSrcweir
167cdf0e10cSrcweir        @param  aText
168cdf0e10cSrcweir            Text to be parsed.
169cdf0e10cSrcweir
170cdf0e10cSrcweir        @param  nPos
171cdf0e10cSrcweir            Position where parsing starts.
172cdf0e10cSrcweir
173cdf0e10cSrcweir        @param  aLocale
174cdf0e10cSrcweir            The locale, for example, for decimal and group separator or
175cdf0e10cSrcweir            character type determination.
176cdf0e10cSrcweir
177cdf0e10cSrcweir        @param  nStartCharFlags
178cdf0e10cSrcweir            A set of <type>KParseTokens</type> constants determining the
179cdf0e10cSrcweir            allowed characters a name or identifier may start with.
180cdf0e10cSrcweir
181cdf0e10cSrcweir        @param  aUserDefinedCharactersStart
182cdf0e10cSrcweir            A set of additionally allowed characters a name or
183cdf0e10cSrcweir            identifier may start with.
184cdf0e10cSrcweir
185cdf0e10cSrcweir        @param  nContCharFlags
186cdf0e10cSrcweir            A set of <type>KParseTokens</type> constants determining the
187cdf0e10cSrcweir            allowed characters a name or identifier may continue with.
188cdf0e10cSrcweir
189cdf0e10cSrcweir        @param  aUserDefinedCharactersCont
190cdf0e10cSrcweir            A set of additionally allowed characters a name or
191cdf0e10cSrcweir            identifier may continue with.
192cdf0e10cSrcweir
193cdf0e10cSrcweir        @example:C++
194cdf0e10cSrcweir        <listing>
195cdf0e10cSrcweir            using namespace ::com::sun::star::i18n;
196cdf0e10cSrcweir            // First character of an identifier may be any alphabetic or underscore.
197cdf0e10cSrcweir            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
198cdf0e10cSrcweir            // Continuing characters may be any alphanumeric or underscore or dot.
199cdf0e10cSrcweir            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
200cdf0e10cSrcweir            // No further characters assumed to be contained in an identifier
201cdf0e10cSrcweir            String aEmptyString;
202cdf0e10cSrcweir            // Parse any token.
203cdf0e10cSrcweir            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
204cdf0e10cSrcweir                nStartFlags, aEmptyString, nContFlags, aEmptyString );
205cdf0e10cSrcweir            // Get parsed token.
206cdf0e10cSrcweir            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
207cdf0e10cSrcweir                fValue = rRes.Value;
208cdf0e10cSrcweir            if ( rRes.TokenType & KParseType::IDENTNAME )
209cdf0e10cSrcweir                aName = aText.Copy( nPos, rRes.EndPos - nPos );
210cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
211cdf0e10cSrcweir                aName = rRes.DequotedNameOrString;
212cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
213cdf0e10cSrcweir                aString = rRes.DequotedNameOrString;
214cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::BOOLEAN )
215cdf0e10cSrcweir                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
216cdf0e10cSrcweir            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
217cdf0e10cSrcweir                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
218cdf0e10cSrcweir        </listing>
219cdf0e10cSrcweir     */
220cdf0e10cSrcweir
221cdf0e10cSrcweir    ParseResult parseAnyToken(
222cdf0e10cSrcweir                            [in] string aText,
223cdf0e10cSrcweir                            [in] long nPos,
224cdf0e10cSrcweir                            [in] com::sun::star::lang::Locale aLocale,
225cdf0e10cSrcweir                            [in] long nStartCharFlags,
226cdf0e10cSrcweir                            [in] string aUserDefinedCharactersStart,
227cdf0e10cSrcweir                            [in] long nContCharFlags,
228cdf0e10cSrcweir                            [in] string aUserDefinedCharactersCont
229cdf0e10cSrcweir                            );
230cdf0e10cSrcweir
231cdf0e10cSrcweir    //------------------------------------------------------------------------
232cdf0e10cSrcweir    /**
233cdf0e10cSrcweir        Parse a string for a token of type <em>nTokenType</em> starting
234cdf0e10cSrcweir        at position <em>nPos</em>.
235cdf0e10cSrcweir
236cdf0e10cSrcweir        <p> Other parameters are the same as in
237cdf0e10cSrcweir        <member>parseAnyToken</member>. If the actual token does not
238cdf0e10cSrcweir        match the passed <em>nTokenType</em> a
239cdf0e10cSrcweir        <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
240cdf0e10cSrcweir        is returned. </p>
241cdf0e10cSrcweir
242cdf0e10cSrcweir        @param  nTokenType
243cdf0e10cSrcweir            One or more of the <type>KParseType</type> constants.
244cdf0e10cSrcweir
245cdf0e10cSrcweir        @example:C++
246cdf0e10cSrcweir        <listing>
247cdf0e10cSrcweir            // Determine if a given name is a valid name (not quoted) and contains
248cdf0e10cSrcweir            // only allowed characters.
249cdf0e10cSrcweir            using namespace ::com::sun::star::i18n;
250cdf0e10cSrcweir            // First character of an identifier may be any alphanumeric or underscore.
251cdf0e10cSrcweir            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
252cdf0e10cSrcweir            // No further characters assumed to be contained in an identifier start.
253cdf0e10cSrcweir            String aEmptyString;
254cdf0e10cSrcweir            // Continuing characters may be any alphanumeric or underscore.
255cdf0e10cSrcweir            sal_Int32 nContFlags = nStartFlags;
256cdf0e10cSrcweir            // Additionally, continuing characters may contain a blank.
257cdf0e10cSrcweir            String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
258cdf0e10cSrcweir            // Parse predefined (must be an IDENTNAME) token.
259cdf0e10cSrcweir            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
260cdf0e10cSrcweir                nStartFlags, aEmptyString, nContFlags, aContChars );
261cdf0e10cSrcweir            // Test if it is an identifier name and if it only is one
262cdf0e10cSrcweir            // and no more else is following it.
263cdf0e10cSrcweir            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
264cdf0e10cSrcweir        </listing>
265cdf0e10cSrcweir     */
266cdf0e10cSrcweir
267cdf0e10cSrcweir    ParseResult parsePredefinedToken(
268cdf0e10cSrcweir                            [in] long nTokenType,
269cdf0e10cSrcweir                            [in] string aText,
270cdf0e10cSrcweir                            [in] long nPos,
271cdf0e10cSrcweir                            [in] com::sun::star::lang::Locale aLocale,
272cdf0e10cSrcweir                            [in] long nStartCharFlags,
273cdf0e10cSrcweir                            [in] string aUserDefinedCharactersStart,
274cdf0e10cSrcweir                            [in] long nContCharFlags,
275cdf0e10cSrcweir                            [in] string aUserDefinedCharactersCont
276cdf0e10cSrcweir                            );
277cdf0e10cSrcweir};
278cdf0e10cSrcweir
279cdf0e10cSrcweir//=============================================================================
280cdf0e10cSrcweir}; }; }; };
281cdf0e10cSrcweir
282cdf0e10cSrcweir#endif
283