xref: /aoo42x/main/svl/source/numbers/zforfind.hxx (revision 39a19a47)
1*39a19a47SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*39a19a47SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*39a19a47SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*39a19a47SAndrew Rist  * distributed with this work for additional information
6*39a19a47SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*39a19a47SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*39a19a47SAndrew Rist  * "License"); you may not use this file except in compliance
9*39a19a47SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*39a19a47SAndrew Rist  *
11*39a19a47SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*39a19a47SAndrew Rist  *
13*39a19a47SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*39a19a47SAndrew Rist  * software distributed under the License is distributed on an
15*39a19a47SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*39a19a47SAndrew Rist  * KIND, either express or implied.  See the License for the
17*39a19a47SAndrew Rist  * specific language governing permissions and limitations
18*39a19a47SAndrew Rist  * under the License.
19*39a19a47SAndrew Rist  *
20*39a19a47SAndrew Rist  *************************************************************/
21*39a19a47SAndrew Rist 
22*39a19a47SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #ifndef _ZFORFIND_HXX
25cdf0e10cSrcweir #define _ZFORFIND_HXX
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <tools/string.hxx>
28cdf0e10cSrcweir 
29cdf0e10cSrcweir class Date;
30cdf0e10cSrcweir class SvNumberformat;
31cdf0e10cSrcweir class SvNumberFormatter;
32cdf0e10cSrcweir 
33cdf0e10cSrcweir #define SV_MAX_ANZ_INPUT_STRINGS  20    // max count of substrings in input scanner
34cdf0e10cSrcweir 
35cdf0e10cSrcweir class ImpSvNumberInputScan
36cdf0e10cSrcweir {
37cdf0e10cSrcweir public:
38cdf0e10cSrcweir     ImpSvNumberInputScan( SvNumberFormatter* pFormatter );
39cdf0e10cSrcweir     ~ImpSvNumberInputScan();
40cdf0e10cSrcweir 
41cdf0e10cSrcweir /*!*/   void ChangeIntl();                      // MUST be called if language changes
42cdf0e10cSrcweir 
43cdf0e10cSrcweir     /// set reference date for offset calculation
44cdf0e10cSrcweir     void ChangeNullDate(
45cdf0e10cSrcweir             const sal_uInt16 nDay,
46cdf0e10cSrcweir             const sal_uInt16 nMonth,
47cdf0e10cSrcweir             const sal_uInt16 nYear );
48cdf0e10cSrcweir 
49cdf0e10cSrcweir     /// convert input string to number
50cdf0e10cSrcweir     sal_Bool IsNumberFormat(
51cdf0e10cSrcweir             const String& rString,              /// input string
52cdf0e10cSrcweir             short& F_Type,                      /// format type (in + out)
53cdf0e10cSrcweir             double& fOutNumber,                 /// value determined (out)
54cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL    /// optional a number format to which compare against
55cdf0e10cSrcweir             );
56cdf0e10cSrcweir 
57cdf0e10cSrcweir     /// after IsNumberFormat: get decimal position
GetDecPos() const58cdf0e10cSrcweir     short   GetDecPos() const { return nDecPos; }
59cdf0e10cSrcweir     /// after IsNumberFormat: get count of numeric substrings in input string
GetAnzNums() const60cdf0e10cSrcweir     sal_uInt16  GetAnzNums() const { return nAnzNums; }
61cdf0e10cSrcweir 
62cdf0e10cSrcweir     /// set threshold of two-digit year input
SetYear2000(sal_uInt16 nVal)63cdf0e10cSrcweir     void    SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
64cdf0e10cSrcweir     /// get threshold of two-digit year input
GetYear2000() const65cdf0e10cSrcweir     sal_uInt16  GetYear2000() const { return nYear2000; }
66cdf0e10cSrcweir 
67cdf0e10cSrcweir private:
68cdf0e10cSrcweir     SvNumberFormatter*  pFormatter;
69cdf0e10cSrcweir     String* pUpperMonthText;                    // Array of month names, uppercase
70cdf0e10cSrcweir     String* pUpperAbbrevMonthText;              // Array of month names, abbreviated, uppercase
71cdf0e10cSrcweir     String* pUpperDayText;                      // Array of day of week names, uppercase
72cdf0e10cSrcweir     String* pUpperAbbrevDayText;                // Array of day of week names, abbreviated, uppercase
73cdf0e10cSrcweir     String  aUpperCurrSymbol;                   // Currency symbol, uppercase
74cdf0e10cSrcweir     sal_Bool    bTextInitialized;                   // Whether days and months are initialized
75cdf0e10cSrcweir     Date* pNullDate;                            // 30Dec1899
76cdf0e10cSrcweir                                                 // Variables for provisional results:
77cdf0e10cSrcweir     String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings
78cdf0e10cSrcweir     sal_Bool   IsNum[SV_MAX_ANZ_INPUT_STRINGS];     // Whether a substring is numeric
79cdf0e10cSrcweir     sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS];     // Sequence of offsets to numeric strings
80cdf0e10cSrcweir     sal_uInt16 nAnzStrings;                         // Total count of scanned substrings
81cdf0e10cSrcweir     sal_uInt16 nAnzNums;                            // Count of numeric substrings
82cdf0e10cSrcweir     sal_Bool   bDecSepInDateSeps;                   // True <=> DecSep in {.,-,/,DateSep}
83cdf0e10cSrcweir     sal_uInt8   nMatchedAllStrings;                  // Scan...String() matched all substrings,
84cdf0e10cSrcweir                                                 // bit mask of nMatched... constants
85cdf0e10cSrcweir 
86cdf0e10cSrcweir     static const sal_uInt8 nMatchedEndString;        // 0x01
87cdf0e10cSrcweir     static const sal_uInt8 nMatchedMidString;        // 0x02
88cdf0e10cSrcweir     static const sal_uInt8 nMatchedStartString;      // 0x04
89cdf0e10cSrcweir     static const sal_uInt8 nMatchedVirgin;           // 0x08
90cdf0e10cSrcweir     static const sal_uInt8 nMatchedUsedAsReturn;     // 0x10
91cdf0e10cSrcweir 
92cdf0e10cSrcweir     int    nSign;                               // Sign of number
93cdf0e10cSrcweir     short  nMonth;                              // Month (1..x) if date
94cdf0e10cSrcweir                                                 // negative => short format
95cdf0e10cSrcweir     short  nMonthPos;                           // 1 = front, 2 = middle
96cdf0e10cSrcweir                                                 // 3 = end
97cdf0e10cSrcweir     sal_uInt16 nTimePos;                            // Index of first time separator (+1)
98cdf0e10cSrcweir     short  nDecPos;                             // Index of substring containing "," (+1)
99cdf0e10cSrcweir     short  nNegCheck;                           // '( )' for negative
100cdf0e10cSrcweir     short  nESign;                              // Sign of exponent
101cdf0e10cSrcweir     short  nAmPm;                               // +1 AM, -1 PM, 0 if none
102cdf0e10cSrcweir     short  nLogical;                            // -1 => False, 1 => True
103cdf0e10cSrcweir     sal_uInt16 nThousand;                           // Count of group (AKA thousand) separators
104cdf0e10cSrcweir     sal_uInt16 nPosThousandString;                  // Position of concatenaded 000,000,000 string
105cdf0e10cSrcweir     short  eScannedType;                        // Scanned type
106cdf0e10cSrcweir     short  eSetType;                            // Preset Type
107cdf0e10cSrcweir 
108cdf0e10cSrcweir     sal_uInt16 nStringScanNumFor;                   // Fixed strings recognized in
109cdf0e10cSrcweir                                                 // pFormat->NumFor[nNumForStringScan]
110cdf0e10cSrcweir     short  nStringScanSign;                     // Sign resulting of FixString
111cdf0e10cSrcweir     sal_uInt16 nYear2000;                           // Two-digit threshold
112cdf0e10cSrcweir                                                 // Year as 20xx
113cdf0e10cSrcweir                                                 // default 18
114cdf0e10cSrcweir                                                 // number <= nYear2000 => 20xx
115cdf0e10cSrcweir                                                 // number >  nYear2000 => 19xx
116cdf0e10cSrcweir     sal_uInt16  nTimezonePos;                       // Index of timezone separator (+1)
117cdf0e10cSrcweir     sal_uInt8    nMayBeIso8601;                      // 0:=dontknowyet, 1:=yes, 2:=no
118cdf0e10cSrcweir 
119cdf0e10cSrcweir #ifdef _ZFORFIND_CXX        // methods private to implementation
120cdf0e10cSrcweir     void Reset();                               // Reset all variables before start of analysis
121cdf0e10cSrcweir 
122cdf0e10cSrcweir     void InitText();                            // Init of months and days of week
123cdf0e10cSrcweir 
124cdf0e10cSrcweir     // Convert string to double.
125cdf0e10cSrcweir     // Only simple unsigned floating point values without any error detection,
126cdf0e10cSrcweir     // decimal separator has to be '.'
127cdf0e10cSrcweir     // If bForceFraction==sal_True the string is taken to be the fractional part
128cdf0e10cSrcweir     // of 0.1234 without the leading 0. (thus being just "1234").
129cdf0e10cSrcweir     double StringToDouble(
130cdf0e10cSrcweir             const String& rStr,
131cdf0e10cSrcweir             sal_Bool bForceFraction = sal_False );
132cdf0e10cSrcweir 
133cdf0e10cSrcweir     sal_Bool NextNumberStringSymbol(                // Next number/string symbol
134cdf0e10cSrcweir             const sal_Unicode*& pStr,
135cdf0e10cSrcweir             String& rSymbol );
136cdf0e10cSrcweir 
137cdf0e10cSrcweir     sal_Bool SkipThousands(                         // Concatenate ,000,23 blocks
138cdf0e10cSrcweir             const sal_Unicode*& pStr,           // in input to 000123
139cdf0e10cSrcweir             String& rSymbol );
140cdf0e10cSrcweir 
141cdf0e10cSrcweir     void NumberStringDivision(                  // Divide numbers/strings into
142cdf0e10cSrcweir             const String& rString );            // arrays and variables above.
143cdf0e10cSrcweir                                                 // Leading blanks and blanks
144cdf0e10cSrcweir                                                 // after numbers are thrown away
145cdf0e10cSrcweir 
146cdf0e10cSrcweir 
147cdf0e10cSrcweir                                                 // optimized substring versions
148cdf0e10cSrcweir 
StringContains(const String & rWhat,const String & rString,xub_StrLen nPos)149cdf0e10cSrcweir     static inline sal_Bool StringContains(          // Whether rString contains rWhat at nPos
150cdf0e10cSrcweir             const String& rWhat,
151cdf0e10cSrcweir             const String& rString,
152cdf0e10cSrcweir             xub_StrLen nPos )
153cdf0e10cSrcweir                 {   // mostly used with one character
154cdf0e10cSrcweir                     if ( rWhat.GetChar(0) != rString.GetChar(nPos) )
155cdf0e10cSrcweir                         return sal_False;
156cdf0e10cSrcweir                     return StringContainsImpl( rWhat, rString, nPos );
157cdf0e10cSrcweir                 }
StringPtrContains(const String & rWhat,const sal_Unicode * pString,xub_StrLen nPos)158cdf0e10cSrcweir     static inline sal_Bool StringPtrContains(       // Whether pString contains rWhat at nPos
159cdf0e10cSrcweir             const String& rWhat,
160cdf0e10cSrcweir             const sal_Unicode* pString,
161cdf0e10cSrcweir             xub_StrLen nPos )                   // nPos MUST be a valid offset from pString
162cdf0e10cSrcweir                 {   // mostly used with one character
163cdf0e10cSrcweir                     if ( rWhat.GetChar(0) != *(pString+nPos) )
164cdf0e10cSrcweir                         return sal_False;
165cdf0e10cSrcweir                     return StringPtrContainsImpl( rWhat, pString, nPos );
166cdf0e10cSrcweir                 }
167cdf0e10cSrcweir     static sal_Bool StringContainsImpl(             //! DO NOT use directly
168cdf0e10cSrcweir             const String& rWhat,
169cdf0e10cSrcweir             const String& rString,
170cdf0e10cSrcweir             xub_StrLen nPos );
171cdf0e10cSrcweir     static sal_Bool StringPtrContainsImpl(          //! DO NOT use directly
172cdf0e10cSrcweir             const String& rWhat,
173cdf0e10cSrcweir             const sal_Unicode* pString,
174cdf0e10cSrcweir             xub_StrLen nPos );
175cdf0e10cSrcweir 
176cdf0e10cSrcweir 
177cdf0e10cSrcweir     static inline sal_Bool SkipChar(                // Skip a special character
178cdf0e10cSrcweir             sal_Unicode c,
179cdf0e10cSrcweir             const String& rString,
180cdf0e10cSrcweir             xub_StrLen& nPos );
181cdf0e10cSrcweir     static inline void SkipBlanks(              // Skip blank
182cdf0e10cSrcweir             const String& rString,
183cdf0e10cSrcweir             xub_StrLen& nPos );
184cdf0e10cSrcweir     static inline sal_Bool SkipString(              // Jump over rWhat in rString at nPos
185cdf0e10cSrcweir             const String& rWhat,
186cdf0e10cSrcweir             const String& rString,
187cdf0e10cSrcweir             xub_StrLen& nPos );
188cdf0e10cSrcweir 
189cdf0e10cSrcweir     inline sal_Bool GetThousandSep(                 // Recognizes exactly ,111 as group separator
190cdf0e10cSrcweir             const String& rString,
191cdf0e10cSrcweir             xub_StrLen& nPos,
192cdf0e10cSrcweir             sal_uInt16 nStringPos );
193cdf0e10cSrcweir     short GetLogical(                           // Get boolean value
194cdf0e10cSrcweir             const String& rString );
195cdf0e10cSrcweir     short GetMonth(                             // Get month and advance string position
196cdf0e10cSrcweir             const String& rString,
197cdf0e10cSrcweir             xub_StrLen& nPos );
198cdf0e10cSrcweir     int GetDayOfWeek(                           // Get day of week and advance string position
199cdf0e10cSrcweir             const String& rString,
200cdf0e10cSrcweir             xub_StrLen& nPos );
201cdf0e10cSrcweir     sal_Bool GetCurrency(                           // Get currency symbol and advance string position
202cdf0e10cSrcweir             const String& rString,
203cdf0e10cSrcweir             xub_StrLen& nPos,
204cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL ); // optional number format to match against
205cdf0e10cSrcweir     sal_Bool GetTimeAmPm(                           // Get symbol AM or PM and advance string position
206cdf0e10cSrcweir             const String& rString,
207cdf0e10cSrcweir             xub_StrLen& nPos );
208cdf0e10cSrcweir     inline sal_Bool GetDecSep(                      // Get decimal separator and advance string position
209cdf0e10cSrcweir             const String& rString,
210cdf0e10cSrcweir             xub_StrLen& nPos );
211cdf0e10cSrcweir     inline sal_Bool GetTime100SecSep(               // Get hundredth seconds separator and advance string position
212cdf0e10cSrcweir             const String& rString,
213cdf0e10cSrcweir             xub_StrLen& nPos );
214cdf0e10cSrcweir     int GetSign(                                // Get sign  and advance string position
215cdf0e10cSrcweir             const String& rString,              // Including special case '('
216cdf0e10cSrcweir             xub_StrLen& nPos );
217cdf0e10cSrcweir     short GetESign(                             // Get sign of exponent and advance string position
218cdf0e10cSrcweir             const String& rString,
219cdf0e10cSrcweir             xub_StrLen& nPos );
220cdf0e10cSrcweir 
221cdf0e10cSrcweir     inline sal_Bool GetNextNumber(                  // Get next number as array offset
222cdf0e10cSrcweir             sal_uInt16& i,
223cdf0e10cSrcweir             sal_uInt16& j );
224cdf0e10cSrcweir 
225cdf0e10cSrcweir     void GetTimeRef(                            // Converts time -> double (only decimals)
226cdf0e10cSrcweir             double& fOutNumber,                 // result as double
227cdf0e10cSrcweir             sal_uInt16 nIndex,                      // Index of hour in input
228cdf0e10cSrcweir             sal_uInt16 nAnz );                      // Count of time substrings in input
229cdf0e10cSrcweir     sal_uInt16 ImplGetDay  ( sal_uInt16 nIndex );       // Day input, 0 if no match
230cdf0e10cSrcweir     sal_uInt16 ImplGetMonth( sal_uInt16 nIndex );       // Month input, zero based return, NumberOfMonths if no match
231cdf0e10cSrcweir     sal_uInt16 ImplGetYear ( sal_uInt16 nIndex );       // Year input, 0 if no match
232cdf0e10cSrcweir     sal_Bool GetDateRef(                            // Conversion of date to number
233cdf0e10cSrcweir             double& fDays,                      // OUT: days diff to null date
234cdf0e10cSrcweir             sal_uInt16& nCounter,                   // Count of date substrings
235cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL ); // optional number format to match against
236cdf0e10cSrcweir 
237cdf0e10cSrcweir     sal_Bool ScanStartString(                       // Analyze start of string
238cdf0e10cSrcweir             const String& rString,
239cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
240cdf0e10cSrcweir     sal_Bool ScanMidString(                         // Analyze middle substring
241cdf0e10cSrcweir             const String& rString,
242cdf0e10cSrcweir             sal_uInt16 nStringPos,
243cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
244cdf0e10cSrcweir     sal_Bool ScanEndString(                         // Analyze end of string
245cdf0e10cSrcweir             const String& rString,
246cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
247cdf0e10cSrcweir 
248cdf0e10cSrcweir     // Whether input may be a ISO 8601 date format, yyyy-mm-dd...
249cdf0e10cSrcweir     // checks if at least 3 numbers and first number>31
250cdf0e10cSrcweir     bool MayBeIso8601();
251cdf0e10cSrcweir 
252cdf0e10cSrcweir     // Compare rString to substring of array indexed by nString
253cdf0e10cSrcweir     // nString == 0xFFFF => last substring
254cdf0e10cSrcweir     sal_Bool ScanStringNumFor(
255cdf0e10cSrcweir             const String& rString,
256cdf0e10cSrcweir             xub_StrLen nPos,
257cdf0e10cSrcweir             const SvNumberformat* pFormat,
258cdf0e10cSrcweir             sal_uInt16 nString,
259cdf0e10cSrcweir             sal_Bool bDontDetectNegation = sal_False );
260cdf0e10cSrcweir 
261cdf0e10cSrcweir     // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True,
262cdf0e10cSrcweir     // else do nothing and return sal_False
263cdf0e10cSrcweir     sal_Bool MatchedReturn();
264cdf0e10cSrcweir 
265cdf0e10cSrcweir     //! Be sure that the string to be analyzed is already converted to upper
266cdf0e10cSrcweir     //! case and if it contained native humber digits that they are already
267cdf0e10cSrcweir     //! converted to ASCII.
268cdf0e10cSrcweir     sal_Bool IsNumberFormatMain(                    // Main anlyzing function
269cdf0e10cSrcweir             const String& rString,
270cdf0e10cSrcweir             double& fOutNumber,                 // return value if string is numeric
271cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL    // optional number format to match against
272cdf0e10cSrcweir             );
273cdf0e10cSrcweir 
274cdf0e10cSrcweir     static inline sal_Bool MyIsdigit( sal_Unicode c );
275cdf0e10cSrcweir 
276cdf0e10cSrcweir     // native number transliteration if necessary
277cdf0e10cSrcweir     void TransformInput( String& rString );
278cdf0e10cSrcweir 
279cdf0e10cSrcweir #endif  // _ZFORFIND_CXX
280cdf0e10cSrcweir };
281cdf0e10cSrcweir 
282cdf0e10cSrcweir 
283cdf0e10cSrcweir 
284cdf0e10cSrcweir #endif  // _ZFORFIND_HXX
285