xref: /AOO41X/main/i18npool/inc/cclass_unicode.hxx (revision f7bd9df41d712080226d57e4f6f528539c130a0c)
1*f7bd9df4SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*f7bd9df4SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*f7bd9df4SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*f7bd9df4SAndrew Rist  * distributed with this work for additional information
6*f7bd9df4SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*f7bd9df4SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*f7bd9df4SAndrew Rist  * "License"); you may not use this file except in compliance
9*f7bd9df4SAndrew Rist  * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir  *
11*f7bd9df4SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir  *
13*f7bd9df4SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*f7bd9df4SAndrew Rist  * software distributed under the License is distributed on an
15*f7bd9df4SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*f7bd9df4SAndrew Rist  * KIND, either express or implied.  See the License for the
17*f7bd9df4SAndrew Rist  * specific language governing permissions and limitations
18*f7bd9df4SAndrew Rist  * under the License.
19cdf0e10cSrcweir  *
20*f7bd9df4SAndrew Rist  *************************************************************/
21*f7bd9df4SAndrew Rist 
22*f7bd9df4SAndrew Rist 
23cdf0e10cSrcweir #ifndef _I18N_CCLASS_UNICODE_HXX_
24cdf0e10cSrcweir #define _I18N_CCLASS_UNICODE_HXX_
25cdf0e10cSrcweir 
26cdf0e10cSrcweir #include <com/sun/star/i18n/XNativeNumberSupplier.hpp>
27cdf0e10cSrcweir #include <com/sun/star/i18n/XCharacterClassification.hpp>
28cdf0e10cSrcweir #include <com/sun/star/i18n/XLocaleData.hpp>
29cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
30cdf0e10cSrcweir #include <cppuhelper/implbase1.hxx> // helper for implementations
31cdf0e10cSrcweir #include <com/sun/star/lang/XServiceInfo.hpp>
32cdf0e10cSrcweir 
33cdf0e10cSrcweir #define TRANSLITERATION_casemapping
34cdf0e10cSrcweir #include <transliteration_body.hxx>
35cdf0e10cSrcweir 
36cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
37cdf0e10cSrcweir 
38cdf0e10cSrcweir typedef sal_uInt32 UPT_FLAG_TYPE;
39cdf0e10cSrcweir 
40cdf0e10cSrcweir class cclass_Unicode : public cppu::WeakImplHelper1 < XCharacterClassification >
41cdf0e10cSrcweir {
42cdf0e10cSrcweir public:
43cdf0e10cSrcweir     cclass_Unicode(com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory > xSMgr );
44cdf0e10cSrcweir     ~cclass_Unicode();
45cdf0e10cSrcweir 
46cdf0e10cSrcweir     virtual rtl::OUString SAL_CALL toUpper( const rtl::OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
47cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale ) throw(com::sun::star::uno::RuntimeException);
48cdf0e10cSrcweir     virtual rtl::OUString SAL_CALL toLower( const rtl::OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
49cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale ) throw(com::sun::star::uno::RuntimeException);
50cdf0e10cSrcweir     virtual rtl::OUString SAL_CALL toTitle( const rtl::OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
51cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale ) throw(com::sun::star::uno::RuntimeException);
52cdf0e10cSrcweir     virtual sal_Int16 SAL_CALL getType( const rtl::OUString& Text, sal_Int32 nPos )  throw(com::sun::star::uno::RuntimeException);
53cdf0e10cSrcweir     virtual sal_Int16 SAL_CALL getCharacterDirection( const rtl::OUString& Text, sal_Int32 nPos )
54cdf0e10cSrcweir         throw(com::sun::star::uno::RuntimeException);
55cdf0e10cSrcweir     virtual sal_Int16 SAL_CALL getScript( const rtl::OUString& Text, sal_Int32 nPos ) throw(com::sun::star::uno::RuntimeException);
56cdf0e10cSrcweir     virtual sal_Int32 SAL_CALL getCharacterType( const rtl::OUString& text, sal_Int32 nPos,
57cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale ) throw(com::sun::star::uno::RuntimeException);
58cdf0e10cSrcweir     virtual sal_Int32 SAL_CALL getStringType( const rtl::OUString& text, sal_Int32 nPos, sal_Int32 nCount,
59cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale ) throw(com::sun::star::uno::RuntimeException);
60cdf0e10cSrcweir     virtual ParseResult SAL_CALL parseAnyToken( const rtl::OUString& Text, sal_Int32 nPos,
61cdf0e10cSrcweir         const com::sun::star::lang::Locale& rLocale, sal_Int32 nStartCharFlags, const rtl::OUString& userDefinedCharactersStart,
62cdf0e10cSrcweir         sal_Int32 nContCharFlags, const rtl::OUString& userDefinedCharactersCont ) throw(com::sun::star::uno::RuntimeException);
63cdf0e10cSrcweir     virtual ParseResult SAL_CALL parsePredefinedToken( sal_Int32 nTokenType, const rtl::OUString& Text,
64cdf0e10cSrcweir         sal_Int32 nPos, const com::sun::star::lang::Locale& rLocale, sal_Int32 nStartCharFlags,
65cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersStart, sal_Int32 nContCharFlags,
66cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersCont ) throw(com::sun::star::uno::RuntimeException);
67cdf0e10cSrcweir 
68cdf0e10cSrcweir     //XServiceInfo
69cdf0e10cSrcweir     virtual rtl::OUString SAL_CALL getImplementationName() throw( com::sun::star::uno::RuntimeException );
70cdf0e10cSrcweir     virtual sal_Bool SAL_CALL supportsService(const rtl::OUString& ServiceName) throw( com::sun::star::uno::RuntimeException );
71cdf0e10cSrcweir     virtual com::sun::star::uno::Sequence< rtl::OUString > SAL_CALL getSupportedServiceNames() throw( com::sun::star::uno::RuntimeException );
72cdf0e10cSrcweir 
73cdf0e10cSrcweir protected:
74cdf0e10cSrcweir     const sal_Char *cClass;
75cdf0e10cSrcweir 
76cdf0e10cSrcweir private:
77cdf0e10cSrcweir     Transliteration_casemapping *trans;
78cdf0e10cSrcweir 
79cdf0e10cSrcweir // --- parser specific (implemented in cclass_unicode_parser.cxx) ---
80cdf0e10cSrcweir 
81cdf0e10cSrcweir     enum ScanState
82cdf0e10cSrcweir     {
83cdf0e10cSrcweir         ssGetChar,
84cdf0e10cSrcweir         ssGetValue,
85cdf0e10cSrcweir         ssGetWord,
86cdf0e10cSrcweir         ssGetWordFirstChar,
87cdf0e10cSrcweir         ssGetString,
88cdf0e10cSrcweir         ssGetBool,
89cdf0e10cSrcweir         ssRewindFromValue,
90cdf0e10cSrcweir         ssIgnoreLeadingInRewind,
91cdf0e10cSrcweir         ssStopBack,
92cdf0e10cSrcweir         ssBounce,
93cdf0e10cSrcweir         ssStop
94cdf0e10cSrcweir     };
95cdf0e10cSrcweir 
96cdf0e10cSrcweir     static const sal_uInt8      nDefCnt;
97cdf0e10cSrcweir     static const UPT_FLAG_TYPE  pDefaultParserTable[];
98cdf0e10cSrcweir     static const sal_Int32      pParseTokensType[];
99cdf0e10cSrcweir 
100cdf0e10cSrcweir     /// Flag values of table.
101cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_ILLEGAL;
102cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR;
103cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR_BOOL;
104cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR_WORD;
105cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR_VALUE;
106cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR_STRING;
107cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_CHAR_DONTCARE;
108cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_BOOL;
109cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_WORD;
110cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_WORD_SEP;
111cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE;
112cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE_SEP;
113cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE_EXP;
114cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE_SIGN;
115cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE_EXP_VALUE;
116cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_VALUE_DIGIT;
117cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_NAME_SEP;
118cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_STRING_SEP;
119cdf0e10cSrcweir     static const UPT_FLAG_TYPE  TOKEN_EXCLUDED;
120cdf0e10cSrcweir 
121cdf0e10cSrcweir     /// If and where c occurs in pStr
122cdf0e10cSrcweir     static  const sal_Unicode*  StrChr( const sal_Unicode* pStr, sal_Unicode c );
123cdf0e10cSrcweir 
124cdf0e10cSrcweir 
125cdf0e10cSrcweir     com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory > xMSF;
126cdf0e10cSrcweir 
127cdf0e10cSrcweir     /// used for parser only
128cdf0e10cSrcweir     com::sun::star::lang::Locale    aParserLocale;
129cdf0e10cSrcweir     com::sun::star::uno::Reference < XLocaleData > xLocaleData;
130cdf0e10cSrcweir     com::sun::star::uno::Reference < com::sun::star::i18n::XNativeNumberSupplier > xNatNumSup;
131cdf0e10cSrcweir     rtl::OUString             aStartChars;
132cdf0e10cSrcweir     rtl::OUString             aContChars;
133cdf0e10cSrcweir     UPT_FLAG_TYPE*              pTable;
134cdf0e10cSrcweir     UPT_FLAG_TYPE*              pStart;
135cdf0e10cSrcweir     UPT_FLAG_TYPE*              pCont;
136cdf0e10cSrcweir     sal_Int32                   nStartTypes;
137cdf0e10cSrcweir     sal_Int32                   nContTypes;
138cdf0e10cSrcweir     ScanState                   eState;
139cdf0e10cSrcweir     sal_Unicode                 cGroupSep;
140cdf0e10cSrcweir     sal_Unicode                 cDecimalSep;
141cdf0e10cSrcweir 
142cdf0e10cSrcweir     /// Get corresponding KParseTokens flag for a character
143cdf0e10cSrcweir     sal_Int32 getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos );
144cdf0e10cSrcweir 
145cdf0e10cSrcweir     /// Access parser table flags.
146cdf0e10cSrcweir     UPT_FLAG_TYPE getFlags( const sal_Unicode* aStr, sal_Int32 nPos );
147cdf0e10cSrcweir 
148cdf0e10cSrcweir     /// Access parser flags via International and special definitions.
149cdf0e10cSrcweir     UPT_FLAG_TYPE getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos );
150cdf0e10cSrcweir 
151cdf0e10cSrcweir     /// Access parser table flags for user defined start characters.
152cdf0e10cSrcweir     UPT_FLAG_TYPE getStartCharsFlags( sal_Unicode c );
153cdf0e10cSrcweir 
154cdf0e10cSrcweir     /// Access parser table flags for user defined continuation characters.
155cdf0e10cSrcweir     UPT_FLAG_TYPE getContCharsFlags( sal_Unicode c );
156cdf0e10cSrcweir 
157cdf0e10cSrcweir     /// Setup parser table. Calls initParserTable() only if needed.
158cdf0e10cSrcweir     void setupParserTable( const com::sun::star::lang::Locale& rLocale, sal_Int32 startCharTokenType,
159cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
160cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersCont );
161cdf0e10cSrcweir 
162cdf0e10cSrcweir     /// Init parser table.
163cdf0e10cSrcweir     void initParserTable( const com::sun::star::lang::Locale& rLocale, sal_Int32 startCharTokenType,
164cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
165cdf0e10cSrcweir         const rtl::OUString& userDefinedCharactersCont );
166cdf0e10cSrcweir 
167cdf0e10cSrcweir     /// Destroy parser table.
168cdf0e10cSrcweir     void destroyParserTable();
169cdf0e10cSrcweir 
170cdf0e10cSrcweir     /// Parse a text.
171cdf0e10cSrcweir     void parseText( ParseResult& r, const rtl::OUString& rText, sal_Int32 nPos,
172cdf0e10cSrcweir         sal_Int32 nTokenType = 0xffffffff );
173cdf0e10cSrcweir 
174cdf0e10cSrcweir     /// Setup International class, new'ed only if different from existing.
175cdf0e10cSrcweir     sal_Bool setupInternational( const com::sun::star::lang::Locale& rLocale );
176cdf0e10cSrcweir 
177cdf0e10cSrcweir     /// Implementation of getCharacterType() for one single character
178cdf0e10cSrcweir     sal_Int32 SAL_CALL getCharType( const rtl::OUString& Text, sal_Int32 *nPos, sal_Int32 increment);
179cdf0e10cSrcweir 
180cdf0e10cSrcweir };
181cdf0e10cSrcweir 
182cdf0e10cSrcweir } } } }
183cdf0e10cSrcweir 
184cdf0e10cSrcweir #endif
185