1/**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements.  See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership.  The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License.  You may obtain a copy of the License at
10 *
11 *   http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied.  See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
25#define __com_sun_star_i18n_XCharacterClassification_idl__
26
27#include <com/sun/star/i18n/ParseResult.idl>
28
29#ifndef __com_sun_star_lang_Locale_idl__
30#include <com/sun/star/lang/Locale.idl>
31#endif
32#ifndef __com_sun_star_uno_XInterface_idl__
33#include <com/sun/star/uno/XInterface.idl>
34#endif
35
36//============================================================================
37
38module com { module sun { module star { module i18n {
39
40//============================================================================
41
42/*
43
44Possible tokens to be parsed with  parse...Token():
45
46UPASCALPHA=[A-Z]
47LOASCALPHA=[a-z]
48ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
49ASCDIGIT=[0-9]
50ASC_UNDERSCORE='_'
51ASC_SPACE=' '
52ASC_HT='\0x9'
53ASC_VT='\0xb'
54ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
55ASC_DBL_QUOTE=\";
56ASC_QUOTE=\'
57UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
58
59ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
60ALNUM=ALPHA|DIGIT
61CHAR=anycharacter
62WS=isWhiteSpace()
63SIGN='+'|'-'
64DECSEP=<locale dependent decimal separator>
65GRPSEP=<locale dependent thousand separator>
66EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
67
68IDENTIFIER=ALPHA *ALNUM
69UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
70ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
71ANY_NAME=1*(ALNUM|DEFCHARS)
72SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
73DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
74ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
75NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
76
77*/
78
79//============================================================================
80
81/**
82    Character classification (upper, lower, digit, letter, number, ...)
83    and generic Unicode enabled parser.
84 */
85
86published interface XCharacterClassification : com::sun::star::uno::XInterface
87{
88    //------------------------------------------------------------------------
89    /** Convert lower case alpha to upper case alpha, starting at
90        position <em>nPos</em> for <em>nCount</em> code points.
91     */
92    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
93                      [in] com::sun::star::lang::Locale aLocale );
94
95    //------------------------------------------------------------------------
96    /** Convert upper case alpha to lower case alpha, starting at
97        position <em>nPos</em> for <em>nCount</em> code points.
98     */
99    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
100                      [in] com::sun::star::lang::Locale aLocale );
101
102    //------------------------------------------------------------------------
103    /** Convert to title case, starting at
104        position <em>nPos</em> for <em>nCount</em> code points.
105     */
106    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
107                      [in] com::sun::star::lang::Locale aLocale );
108
109    //------------------------------------------------------------------------
110    /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
111    short    getType( [in] string aText, [in] long nPos );
112
113    //------------------------------------------------------------------------
114    /** Get <type>DirectionProperty</type> of character at position
115        <em>nPos</em>.
116     */
117    short    getCharacterDirection( [in] string aText, [in] long nPos );
118
119    //------------------------------------------------------------------------
120    /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
121    short    getScript( [in] string aText, [in] long nPos );
122
123    //------------------------------------------------------------------------
124    /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
125    long getCharacterType( [in] string aText, [in] long nPos,
126                           [in] com::sun::star::lang::Locale aLocale );
127
128    //------------------------------------------------------------------------
129    /** Get accumulated <type>KCharacterType</type>s of string starting
130        at position <em>nPos</em> of length <em>nCount</em> code points.
131
132        @returns
133            A number with appropriate flags set to indicate what type of
134            characters the string contains, each flag value being one of
135            KCharacterType values.
136    */
137    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
138                        [in] com::sun::star::lang::Locale aLocale );
139
140
141    //------------------------------------------------------------------------
142    /**
143        Parse a string for a token starting at position <em>nPos</em>.
144
145        <p> A name or identifier must match the
146        <type>KParseTokens</type> criteria passed in
147        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
148        additionally contain characters of
149        <em>aUserDefinedCharactersStart</em> and/or
150        <em>aUserDefinedCharactersCont</em>. </p>
151
152
153        @returns
154            A filled <type>ParseResult</type> structure. If no
155            unambigous token could be parsed,
156            <member>ParseResult::TokenType</member> will be set to
157            <b>0</b> (zero), other fields will contain the values parsed
158            so far.
159
160            <p> If a token may represent either a numeric value or a
161            name according to the passed Start/Cont-Flags/Chars, both
162            <const>KParseType::ASC_NUM</const> (or
163            <const>KParseType::UNI_NUM</const>) and
164            <const>KParseType::IDENTNAME</const> are set in
165            <member>ParseResult::TokenType</member>.
166
167        @param  aText
168            Text to be parsed.
169
170        @param  nPos
171            Position where parsing starts.
172
173        @param  aLocale
174            The locale, for example, for decimal and group separator or
175            character type determination.
176
177        @param  nStartCharFlags
178            A set of <type>KParseTokens</type> constants determining the
179            allowed characters a name or identifier may start with.
180
181        @param  aUserDefinedCharactersStart
182            A set of additionally allowed characters a name or
183            identifier may start with.
184
185        @param  nContCharFlags
186            A set of <type>KParseTokens</type> constants determining the
187            allowed characters a name or identifier may continue with.
188
189        @param  aUserDefinedCharactersCont
190            A set of additionally allowed characters a name or
191            identifier may continue with.
192
193        @example:C++
194        <listing>
195            using namespace ::com::sun::star::i18n;
196            // First character of an identifier may be any alphabetic or underscore.
197            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
198            // Continuing characters may be any alphanumeric or underscore or dot.
199            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
200            // No further characters assumed to be contained in an identifier
201            String aEmptyString;
202            // Parse any token.
203            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
204                nStartFlags, aEmptyString, nContFlags, aEmptyString );
205            // Get parsed token.
206            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
207                fValue = rRes.Value;
208            if ( rRes.TokenType & KParseType::IDENTNAME )
209                aName = aText.Copy( nPos, rRes.EndPos - nPos );
210            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
211                aName = rRes.DequotedNameOrString;
212            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
213                aString = rRes.DequotedNameOrString;
214            else if ( rRes.TokenType & KParseType::BOOLEAN )
215                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
216            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
217                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
218        </listing>
219     */
220
221    ParseResult parseAnyToken(
222                            [in] string aText,
223                            [in] long nPos,
224                            [in] com::sun::star::lang::Locale aLocale,
225                            [in] long nStartCharFlags,
226                            [in] string aUserDefinedCharactersStart,
227                            [in] long nContCharFlags,
228                            [in] string aUserDefinedCharactersCont
229                            );
230
231    //------------------------------------------------------------------------
232    /**
233        Parse a string for a token of type <em>nTokenType</em> starting
234        at position <em>nPos</em>.
235
236        <p> Other parameters are the same as in
237        <member>parseAnyToken</member>. If the actual token does not
238        match the passed <em>nTokenType</em> a
239        <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
240        is returned. </p>
241
242        @param  nTokenType
243            One or more of the <type>KParseType</type> constants.
244
245        @example:C++
246        <listing>
247            // Determine if a given name is a valid name (not quoted) and contains
248            // only allowed characters.
249            using namespace ::com::sun::star::i18n;
250            // First character of an identifier may be any alphanumeric or underscore.
251            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
252            // No further characters assumed to be contained in an identifier start.
253            String aEmptyString;
254            // Continuing characters may be any alphanumeric or underscore.
255            sal_Int32 nContFlags = nStartFlags;
256            // Additionally, continuing characters may contain a blank.
257            String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
258            // Parse predefined (must be an IDENTNAME) token.
259            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
260                nStartFlags, aEmptyString, nContFlags, aContChars );
261            // Test if it is an identifier name and if it only is one
262            // and no more else is following it.
263            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
264        </listing>
265     */
266
267    ParseResult parsePredefinedToken(
268                            [in] long nTokenType,
269                            [in] string aText,
270                            [in] long nPos,
271                            [in] com::sun::star::lang::Locale aLocale,
272                            [in] long nStartCharFlags,
273                            [in] string aUserDefinedCharactersStart,
274                            [in] long nContCharFlags,
275                            [in] string aUserDefinedCharactersCont
276                            );
277};
278
279//=============================================================================
280}; }; }; };
281
282#endif
283