xref: /aoo42x/main/svl/source/numbers/zforfind.hxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #ifndef _ZFORFIND_HXX
29*cdf0e10cSrcweir #define _ZFORFIND_HXX
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <tools/string.hxx>
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir class Date;
34*cdf0e10cSrcweir class SvNumberformat;
35*cdf0e10cSrcweir class SvNumberFormatter;
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir #define SV_MAX_ANZ_INPUT_STRINGS  20    // max count of substrings in input scanner
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir class ImpSvNumberInputScan
40*cdf0e10cSrcweir {
41*cdf0e10cSrcweir public:
42*cdf0e10cSrcweir     ImpSvNumberInputScan( SvNumberFormatter* pFormatter );
43*cdf0e10cSrcweir     ~ImpSvNumberInputScan();
44*cdf0e10cSrcweir 
45*cdf0e10cSrcweir /*!*/   void ChangeIntl();                      // MUST be called if language changes
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir     /// set reference date for offset calculation
48*cdf0e10cSrcweir     void ChangeNullDate(
49*cdf0e10cSrcweir             const sal_uInt16 nDay,
50*cdf0e10cSrcweir             const sal_uInt16 nMonth,
51*cdf0e10cSrcweir             const sal_uInt16 nYear );
52*cdf0e10cSrcweir 
53*cdf0e10cSrcweir     /// convert input string to number
54*cdf0e10cSrcweir     sal_Bool IsNumberFormat(
55*cdf0e10cSrcweir             const String& rString,              /// input string
56*cdf0e10cSrcweir             short& F_Type,                      /// format type (in + out)
57*cdf0e10cSrcweir             double& fOutNumber,                 /// value determined (out)
58*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL    /// optional a number format to which compare against
59*cdf0e10cSrcweir             );
60*cdf0e10cSrcweir 
61*cdf0e10cSrcweir     /// after IsNumberFormat: get decimal position
62*cdf0e10cSrcweir     short   GetDecPos() const { return nDecPos; }
63*cdf0e10cSrcweir     /// after IsNumberFormat: get count of numeric substrings in input string
64*cdf0e10cSrcweir     sal_uInt16  GetAnzNums() const { return nAnzNums; }
65*cdf0e10cSrcweir 
66*cdf0e10cSrcweir     /// set threshold of two-digit year input
67*cdf0e10cSrcweir     void    SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
68*cdf0e10cSrcweir     /// get threshold of two-digit year input
69*cdf0e10cSrcweir     sal_uInt16  GetYear2000() const { return nYear2000; }
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir private:
72*cdf0e10cSrcweir     SvNumberFormatter*  pFormatter;
73*cdf0e10cSrcweir     String* pUpperMonthText;                    // Array of month names, uppercase
74*cdf0e10cSrcweir     String* pUpperAbbrevMonthText;              // Array of month names, abbreviated, uppercase
75*cdf0e10cSrcweir     String* pUpperDayText;                      // Array of day of week names, uppercase
76*cdf0e10cSrcweir     String* pUpperAbbrevDayText;                // Array of day of week names, abbreviated, uppercase
77*cdf0e10cSrcweir     String  aUpperCurrSymbol;                   // Currency symbol, uppercase
78*cdf0e10cSrcweir     sal_Bool    bTextInitialized;                   // Whether days and months are initialized
79*cdf0e10cSrcweir     Date* pNullDate;                            // 30Dec1899
80*cdf0e10cSrcweir                                                 // Variables for provisional results:
81*cdf0e10cSrcweir     String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings
82*cdf0e10cSrcweir     sal_Bool   IsNum[SV_MAX_ANZ_INPUT_STRINGS];     // Whether a substring is numeric
83*cdf0e10cSrcweir     sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS];     // Sequence of offsets to numeric strings
84*cdf0e10cSrcweir     sal_uInt16 nAnzStrings;                         // Total count of scanned substrings
85*cdf0e10cSrcweir     sal_uInt16 nAnzNums;                            // Count of numeric substrings
86*cdf0e10cSrcweir     sal_Bool   bDecSepInDateSeps;                   // True <=> DecSep in {.,-,/,DateSep}
87*cdf0e10cSrcweir     sal_uInt8   nMatchedAllStrings;                  // Scan...String() matched all substrings,
88*cdf0e10cSrcweir                                                 // bit mask of nMatched... constants
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     static const sal_uInt8 nMatchedEndString;        // 0x01
91*cdf0e10cSrcweir     static const sal_uInt8 nMatchedMidString;        // 0x02
92*cdf0e10cSrcweir     static const sal_uInt8 nMatchedStartString;      // 0x04
93*cdf0e10cSrcweir     static const sal_uInt8 nMatchedVirgin;           // 0x08
94*cdf0e10cSrcweir     static const sal_uInt8 nMatchedUsedAsReturn;     // 0x10
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir     int    nSign;                               // Sign of number
97*cdf0e10cSrcweir     short  nMonth;                              // Month (1..x) if date
98*cdf0e10cSrcweir                                                 // negative => short format
99*cdf0e10cSrcweir     short  nMonthPos;                           // 1 = front, 2 = middle
100*cdf0e10cSrcweir                                                 // 3 = end
101*cdf0e10cSrcweir     sal_uInt16 nTimePos;                            // Index of first time separator (+1)
102*cdf0e10cSrcweir     short  nDecPos;                             // Index of substring containing "," (+1)
103*cdf0e10cSrcweir     short  nNegCheck;                           // '( )' for negative
104*cdf0e10cSrcweir     short  nESign;                              // Sign of exponent
105*cdf0e10cSrcweir     short  nAmPm;                               // +1 AM, -1 PM, 0 if none
106*cdf0e10cSrcweir     short  nLogical;                            // -1 => False, 1 => True
107*cdf0e10cSrcweir     sal_uInt16 nThousand;                           // Count of group (AKA thousand) separators
108*cdf0e10cSrcweir     sal_uInt16 nPosThousandString;                  // Position of concatenaded 000,000,000 string
109*cdf0e10cSrcweir     short  eScannedType;                        // Scanned type
110*cdf0e10cSrcweir     short  eSetType;                            // Preset Type
111*cdf0e10cSrcweir 
112*cdf0e10cSrcweir     sal_uInt16 nStringScanNumFor;                   // Fixed strings recognized in
113*cdf0e10cSrcweir                                                 // pFormat->NumFor[nNumForStringScan]
114*cdf0e10cSrcweir     short  nStringScanSign;                     // Sign resulting of FixString
115*cdf0e10cSrcweir     sal_uInt16 nYear2000;                           // Two-digit threshold
116*cdf0e10cSrcweir                                                 // Year as 20xx
117*cdf0e10cSrcweir                                                 // default 18
118*cdf0e10cSrcweir                                                 // number <= nYear2000 => 20xx
119*cdf0e10cSrcweir                                                 // number >  nYear2000 => 19xx
120*cdf0e10cSrcweir     sal_uInt16  nTimezonePos;                       // Index of timezone separator (+1)
121*cdf0e10cSrcweir     sal_uInt8    nMayBeIso8601;                      // 0:=dontknowyet, 1:=yes, 2:=no
122*cdf0e10cSrcweir 
123*cdf0e10cSrcweir #ifdef _ZFORFIND_CXX        // methods private to implementation
124*cdf0e10cSrcweir     void Reset();                               // Reset all variables before start of analysis
125*cdf0e10cSrcweir 
126*cdf0e10cSrcweir     void InitText();                            // Init of months and days of week
127*cdf0e10cSrcweir 
128*cdf0e10cSrcweir     // Convert string to double.
129*cdf0e10cSrcweir     // Only simple unsigned floating point values without any error detection,
130*cdf0e10cSrcweir     // decimal separator has to be '.'
131*cdf0e10cSrcweir     // If bForceFraction==sal_True the string is taken to be the fractional part
132*cdf0e10cSrcweir     // of 0.1234 without the leading 0. (thus being just "1234").
133*cdf0e10cSrcweir     double StringToDouble(
134*cdf0e10cSrcweir             const String& rStr,
135*cdf0e10cSrcweir             sal_Bool bForceFraction = sal_False );
136*cdf0e10cSrcweir 
137*cdf0e10cSrcweir     sal_Bool NextNumberStringSymbol(                // Next number/string symbol
138*cdf0e10cSrcweir             const sal_Unicode*& pStr,
139*cdf0e10cSrcweir             String& rSymbol );
140*cdf0e10cSrcweir 
141*cdf0e10cSrcweir     sal_Bool SkipThousands(                         // Concatenate ,000,23 blocks
142*cdf0e10cSrcweir             const sal_Unicode*& pStr,           // in input to 000123
143*cdf0e10cSrcweir             String& rSymbol );
144*cdf0e10cSrcweir 
145*cdf0e10cSrcweir     void NumberStringDivision(                  // Divide numbers/strings into
146*cdf0e10cSrcweir             const String& rString );            // arrays and variables above.
147*cdf0e10cSrcweir                                                 // Leading blanks and blanks
148*cdf0e10cSrcweir                                                 // after numbers are thrown away
149*cdf0e10cSrcweir 
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir                                                 // optimized substring versions
152*cdf0e10cSrcweir 
153*cdf0e10cSrcweir     static inline sal_Bool StringContains(          // Whether rString contains rWhat at nPos
154*cdf0e10cSrcweir             const String& rWhat,
155*cdf0e10cSrcweir             const String& rString,
156*cdf0e10cSrcweir             xub_StrLen nPos )
157*cdf0e10cSrcweir                 {   // mostly used with one character
158*cdf0e10cSrcweir                     if ( rWhat.GetChar(0) != rString.GetChar(nPos) )
159*cdf0e10cSrcweir                         return sal_False;
160*cdf0e10cSrcweir                     return StringContainsImpl( rWhat, rString, nPos );
161*cdf0e10cSrcweir                 }
162*cdf0e10cSrcweir     static inline sal_Bool StringPtrContains(       // Whether pString contains rWhat at nPos
163*cdf0e10cSrcweir             const String& rWhat,
164*cdf0e10cSrcweir             const sal_Unicode* pString,
165*cdf0e10cSrcweir             xub_StrLen nPos )                   // nPos MUST be a valid offset from pString
166*cdf0e10cSrcweir                 {   // mostly used with one character
167*cdf0e10cSrcweir                     if ( rWhat.GetChar(0) != *(pString+nPos) )
168*cdf0e10cSrcweir                         return sal_False;
169*cdf0e10cSrcweir                     return StringPtrContainsImpl( rWhat, pString, nPos );
170*cdf0e10cSrcweir                 }
171*cdf0e10cSrcweir     static sal_Bool StringContainsImpl(             //! DO NOT use directly
172*cdf0e10cSrcweir             const String& rWhat,
173*cdf0e10cSrcweir             const String& rString,
174*cdf0e10cSrcweir             xub_StrLen nPos );
175*cdf0e10cSrcweir     static sal_Bool StringPtrContainsImpl(          //! DO NOT use directly
176*cdf0e10cSrcweir             const String& rWhat,
177*cdf0e10cSrcweir             const sal_Unicode* pString,
178*cdf0e10cSrcweir             xub_StrLen nPos );
179*cdf0e10cSrcweir 
180*cdf0e10cSrcweir 
181*cdf0e10cSrcweir     static inline sal_Bool SkipChar(                // Skip a special character
182*cdf0e10cSrcweir             sal_Unicode c,
183*cdf0e10cSrcweir             const String& rString,
184*cdf0e10cSrcweir             xub_StrLen& nPos );
185*cdf0e10cSrcweir     static inline void SkipBlanks(              // Skip blank
186*cdf0e10cSrcweir             const String& rString,
187*cdf0e10cSrcweir             xub_StrLen& nPos );
188*cdf0e10cSrcweir     static inline sal_Bool SkipString(              // Jump over rWhat in rString at nPos
189*cdf0e10cSrcweir             const String& rWhat,
190*cdf0e10cSrcweir             const String& rString,
191*cdf0e10cSrcweir             xub_StrLen& nPos );
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir     inline sal_Bool GetThousandSep(                 // Recognizes exactly ,111 as group separator
194*cdf0e10cSrcweir             const String& rString,
195*cdf0e10cSrcweir             xub_StrLen& nPos,
196*cdf0e10cSrcweir             sal_uInt16 nStringPos );
197*cdf0e10cSrcweir     short GetLogical(                           // Get boolean value
198*cdf0e10cSrcweir             const String& rString );
199*cdf0e10cSrcweir     short GetMonth(                             // Get month and advance string position
200*cdf0e10cSrcweir             const String& rString,
201*cdf0e10cSrcweir             xub_StrLen& nPos );
202*cdf0e10cSrcweir     int GetDayOfWeek(                           // Get day of week and advance string position
203*cdf0e10cSrcweir             const String& rString,
204*cdf0e10cSrcweir             xub_StrLen& nPos );
205*cdf0e10cSrcweir     sal_Bool GetCurrency(                           // Get currency symbol and advance string position
206*cdf0e10cSrcweir             const String& rString,
207*cdf0e10cSrcweir             xub_StrLen& nPos,
208*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL ); // optional number format to match against
209*cdf0e10cSrcweir     sal_Bool GetTimeAmPm(                           // Get symbol AM or PM and advance string position
210*cdf0e10cSrcweir             const String& rString,
211*cdf0e10cSrcweir             xub_StrLen& nPos );
212*cdf0e10cSrcweir     inline sal_Bool GetDecSep(                      // Get decimal separator and advance string position
213*cdf0e10cSrcweir             const String& rString,
214*cdf0e10cSrcweir             xub_StrLen& nPos );
215*cdf0e10cSrcweir     inline sal_Bool GetTime100SecSep(               // Get hundredth seconds separator and advance string position
216*cdf0e10cSrcweir             const String& rString,
217*cdf0e10cSrcweir             xub_StrLen& nPos );
218*cdf0e10cSrcweir     int GetSign(                                // Get sign  and advance string position
219*cdf0e10cSrcweir             const String& rString,              // Including special case '('
220*cdf0e10cSrcweir             xub_StrLen& nPos );
221*cdf0e10cSrcweir     short GetESign(                             // Get sign of exponent and advance string position
222*cdf0e10cSrcweir             const String& rString,
223*cdf0e10cSrcweir             xub_StrLen& nPos );
224*cdf0e10cSrcweir 
225*cdf0e10cSrcweir     inline sal_Bool GetNextNumber(                  // Get next number as array offset
226*cdf0e10cSrcweir             sal_uInt16& i,
227*cdf0e10cSrcweir             sal_uInt16& j );
228*cdf0e10cSrcweir 
229*cdf0e10cSrcweir     void GetTimeRef(                            // Converts time -> double (only decimals)
230*cdf0e10cSrcweir             double& fOutNumber,                 // result as double
231*cdf0e10cSrcweir             sal_uInt16 nIndex,                      // Index of hour in input
232*cdf0e10cSrcweir             sal_uInt16 nAnz );                      // Count of time substrings in input
233*cdf0e10cSrcweir     sal_uInt16 ImplGetDay  ( sal_uInt16 nIndex );       // Day input, 0 if no match
234*cdf0e10cSrcweir     sal_uInt16 ImplGetMonth( sal_uInt16 nIndex );       // Month input, zero based return, NumberOfMonths if no match
235*cdf0e10cSrcweir     sal_uInt16 ImplGetYear ( sal_uInt16 nIndex );       // Year input, 0 if no match
236*cdf0e10cSrcweir     sal_Bool GetDateRef(                            // Conversion of date to number
237*cdf0e10cSrcweir             double& fDays,                      // OUT: days diff to null date
238*cdf0e10cSrcweir             sal_uInt16& nCounter,                   // Count of date substrings
239*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL ); // optional number format to match against
240*cdf0e10cSrcweir 
241*cdf0e10cSrcweir     sal_Bool ScanStartString(                       // Analyze start of string
242*cdf0e10cSrcweir             const String& rString,
243*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
244*cdf0e10cSrcweir     sal_Bool ScanMidString(                         // Analyze middle substring
245*cdf0e10cSrcweir             const String& rString,
246*cdf0e10cSrcweir             sal_uInt16 nStringPos,
247*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
248*cdf0e10cSrcweir     sal_Bool ScanEndString(                         // Analyze end of string
249*cdf0e10cSrcweir             const String& rString,
250*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL );
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir     // Whether input may be a ISO 8601 date format, yyyy-mm-dd...
253*cdf0e10cSrcweir     // checks if at least 3 numbers and first number>31
254*cdf0e10cSrcweir     bool MayBeIso8601();
255*cdf0e10cSrcweir 
256*cdf0e10cSrcweir     // Compare rString to substring of array indexed by nString
257*cdf0e10cSrcweir     // nString == 0xFFFF => last substring
258*cdf0e10cSrcweir     sal_Bool ScanStringNumFor(
259*cdf0e10cSrcweir             const String& rString,
260*cdf0e10cSrcweir             xub_StrLen nPos,
261*cdf0e10cSrcweir             const SvNumberformat* pFormat,
262*cdf0e10cSrcweir             sal_uInt16 nString,
263*cdf0e10cSrcweir             sal_Bool bDontDetectNegation = sal_False );
264*cdf0e10cSrcweir 
265*cdf0e10cSrcweir     // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True,
266*cdf0e10cSrcweir     // else do nothing and return sal_False
267*cdf0e10cSrcweir     sal_Bool MatchedReturn();
268*cdf0e10cSrcweir 
269*cdf0e10cSrcweir     //! Be sure that the string to be analyzed is already converted to upper
270*cdf0e10cSrcweir     //! case and if it contained native humber digits that they are already
271*cdf0e10cSrcweir     //! converted to ASCII.
272*cdf0e10cSrcweir     sal_Bool IsNumberFormatMain(                    // Main anlyzing function
273*cdf0e10cSrcweir             const String& rString,
274*cdf0e10cSrcweir             double& fOutNumber,                 // return value if string is numeric
275*cdf0e10cSrcweir             const SvNumberformat* pFormat = NULL    // optional number format to match against
276*cdf0e10cSrcweir             );
277*cdf0e10cSrcweir 
278*cdf0e10cSrcweir     static inline sal_Bool MyIsdigit( sal_Unicode c );
279*cdf0e10cSrcweir 
280*cdf0e10cSrcweir     // native number transliteration if necessary
281*cdf0e10cSrcweir     void TransformInput( String& rString );
282*cdf0e10cSrcweir 
283*cdf0e10cSrcweir #endif  // _ZFORFIND_CXX
284*cdf0e10cSrcweir };
285*cdf0e10cSrcweir 
286*cdf0e10cSrcweir 
287*cdf0e10cSrcweir 
288*cdf0e10cSrcweir #endif  // _ZFORFIND_HXX
289