1*39a19a47SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*39a19a47SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*39a19a47SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*39a19a47SAndrew Rist * distributed with this work for additional information 6*39a19a47SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*39a19a47SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*39a19a47SAndrew Rist * "License"); you may not use this file except in compliance 9*39a19a47SAndrew Rist * with the License. You may obtain a copy of the License at 10*39a19a47SAndrew Rist * 11*39a19a47SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*39a19a47SAndrew Rist * 13*39a19a47SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*39a19a47SAndrew Rist * software distributed under the License is distributed on an 15*39a19a47SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*39a19a47SAndrew Rist * KIND, either express or implied. See the License for the 17*39a19a47SAndrew Rist * specific language governing permissions and limitations 18*39a19a47SAndrew Rist * under the License. 19*39a19a47SAndrew Rist * 20*39a19a47SAndrew Rist *************************************************************/ 21*39a19a47SAndrew Rist 22*39a19a47SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir #ifndef _ZFORFIND_HXX 25cdf0e10cSrcweir #define _ZFORFIND_HXX 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <tools/string.hxx> 28cdf0e10cSrcweir 29cdf0e10cSrcweir class Date; 30cdf0e10cSrcweir class SvNumberformat; 31cdf0e10cSrcweir class SvNumberFormatter; 32cdf0e10cSrcweir 33cdf0e10cSrcweir #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner 34cdf0e10cSrcweir 35cdf0e10cSrcweir class ImpSvNumberInputScan 36cdf0e10cSrcweir { 37cdf0e10cSrcweir public: 38cdf0e10cSrcweir ImpSvNumberInputScan( SvNumberFormatter* pFormatter ); 39cdf0e10cSrcweir ~ImpSvNumberInputScan(); 40cdf0e10cSrcweir 41cdf0e10cSrcweir /*!*/ void ChangeIntl(); // MUST be called if language changes 42cdf0e10cSrcweir 43cdf0e10cSrcweir /// set reference date for offset calculation 44cdf0e10cSrcweir void ChangeNullDate( 45cdf0e10cSrcweir const sal_uInt16 nDay, 46cdf0e10cSrcweir const sal_uInt16 nMonth, 47cdf0e10cSrcweir const sal_uInt16 nYear ); 48cdf0e10cSrcweir 49cdf0e10cSrcweir /// convert input string to number 50cdf0e10cSrcweir sal_Bool IsNumberFormat( 51cdf0e10cSrcweir const String& rString, /// input string 52cdf0e10cSrcweir short& F_Type, /// format type (in + out) 53cdf0e10cSrcweir double& fOutNumber, /// value determined (out) 54cdf0e10cSrcweir const SvNumberformat* pFormat = NULL /// optional a number format to which compare against 55cdf0e10cSrcweir ); 56cdf0e10cSrcweir 57cdf0e10cSrcweir /// after IsNumberFormat: get decimal position GetDecPos() const58cdf0e10cSrcweir short GetDecPos() const { return nDecPos; } 59cdf0e10cSrcweir /// after IsNumberFormat: get count of numeric substrings in input string GetAnzNums() const60cdf0e10cSrcweir sal_uInt16 GetAnzNums() const { return nAnzNums; } 61cdf0e10cSrcweir 62cdf0e10cSrcweir /// set threshold of two-digit year input SetYear2000(sal_uInt16 nVal)63cdf0e10cSrcweir void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } 64cdf0e10cSrcweir /// get threshold of two-digit year input GetYear2000() const65cdf0e10cSrcweir sal_uInt16 GetYear2000() const { return nYear2000; } 66cdf0e10cSrcweir 67cdf0e10cSrcweir private: 68cdf0e10cSrcweir SvNumberFormatter* pFormatter; 69cdf0e10cSrcweir String* pUpperMonthText; // Array of month names, uppercase 70cdf0e10cSrcweir String* pUpperAbbrevMonthText; // Array of month names, abbreviated, uppercase 71cdf0e10cSrcweir String* pUpperDayText; // Array of day of week names, uppercase 72cdf0e10cSrcweir String* pUpperAbbrevDayText; // Array of day of week names, abbreviated, uppercase 73cdf0e10cSrcweir String aUpperCurrSymbol; // Currency symbol, uppercase 74cdf0e10cSrcweir sal_Bool bTextInitialized; // Whether days and months are initialized 75cdf0e10cSrcweir Date* pNullDate; // 30Dec1899 76cdf0e10cSrcweir // Variables for provisional results: 77cdf0e10cSrcweir String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings 78cdf0e10cSrcweir sal_Bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; // Whether a substring is numeric 79cdf0e10cSrcweir sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; // Sequence of offsets to numeric strings 80cdf0e10cSrcweir sal_uInt16 nAnzStrings; // Total count of scanned substrings 81cdf0e10cSrcweir sal_uInt16 nAnzNums; // Count of numeric substrings 82cdf0e10cSrcweir sal_Bool bDecSepInDateSeps; // True <=> DecSep in {.,-,/,DateSep} 83cdf0e10cSrcweir sal_uInt8 nMatchedAllStrings; // Scan...String() matched all substrings, 84cdf0e10cSrcweir // bit mask of nMatched... constants 85cdf0e10cSrcweir 86cdf0e10cSrcweir static const sal_uInt8 nMatchedEndString; // 0x01 87cdf0e10cSrcweir static const sal_uInt8 nMatchedMidString; // 0x02 88cdf0e10cSrcweir static const sal_uInt8 nMatchedStartString; // 0x04 89cdf0e10cSrcweir static const sal_uInt8 nMatchedVirgin; // 0x08 90cdf0e10cSrcweir static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 91cdf0e10cSrcweir 92cdf0e10cSrcweir int nSign; // Sign of number 93cdf0e10cSrcweir short nMonth; // Month (1..x) if date 94cdf0e10cSrcweir // negative => short format 95cdf0e10cSrcweir short nMonthPos; // 1 = front, 2 = middle 96cdf0e10cSrcweir // 3 = end 97cdf0e10cSrcweir sal_uInt16 nTimePos; // Index of first time separator (+1) 98cdf0e10cSrcweir short nDecPos; // Index of substring containing "," (+1) 99cdf0e10cSrcweir short nNegCheck; // '( )' for negative 100cdf0e10cSrcweir short nESign; // Sign of exponent 101cdf0e10cSrcweir short nAmPm; // +1 AM, -1 PM, 0 if none 102cdf0e10cSrcweir short nLogical; // -1 => False, 1 => True 103cdf0e10cSrcweir sal_uInt16 nThousand; // Count of group (AKA thousand) separators 104cdf0e10cSrcweir sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string 105cdf0e10cSrcweir short eScannedType; // Scanned type 106cdf0e10cSrcweir short eSetType; // Preset Type 107cdf0e10cSrcweir 108cdf0e10cSrcweir sal_uInt16 nStringScanNumFor; // Fixed strings recognized in 109cdf0e10cSrcweir // pFormat->NumFor[nNumForStringScan] 110cdf0e10cSrcweir short nStringScanSign; // Sign resulting of FixString 111cdf0e10cSrcweir sal_uInt16 nYear2000; // Two-digit threshold 112cdf0e10cSrcweir // Year as 20xx 113cdf0e10cSrcweir // default 18 114cdf0e10cSrcweir // number <= nYear2000 => 20xx 115cdf0e10cSrcweir // number > nYear2000 => 19xx 116cdf0e10cSrcweir sal_uInt16 nTimezonePos; // Index of timezone separator (+1) 117cdf0e10cSrcweir sal_uInt8 nMayBeIso8601; // 0:=dontknowyet, 1:=yes, 2:=no 118cdf0e10cSrcweir 119cdf0e10cSrcweir #ifdef _ZFORFIND_CXX // methods private to implementation 120cdf0e10cSrcweir void Reset(); // Reset all variables before start of analysis 121cdf0e10cSrcweir 122cdf0e10cSrcweir void InitText(); // Init of months and days of week 123cdf0e10cSrcweir 124cdf0e10cSrcweir // Convert string to double. 125cdf0e10cSrcweir // Only simple unsigned floating point values without any error detection, 126cdf0e10cSrcweir // decimal separator has to be '.' 127cdf0e10cSrcweir // If bForceFraction==sal_True the string is taken to be the fractional part 128cdf0e10cSrcweir // of 0.1234 without the leading 0. (thus being just "1234"). 129cdf0e10cSrcweir double StringToDouble( 130cdf0e10cSrcweir const String& rStr, 131cdf0e10cSrcweir sal_Bool bForceFraction = sal_False ); 132cdf0e10cSrcweir 133cdf0e10cSrcweir sal_Bool NextNumberStringSymbol( // Next number/string symbol 134cdf0e10cSrcweir const sal_Unicode*& pStr, 135cdf0e10cSrcweir String& rSymbol ); 136cdf0e10cSrcweir 137cdf0e10cSrcweir sal_Bool SkipThousands( // Concatenate ,000,23 blocks 138cdf0e10cSrcweir const sal_Unicode*& pStr, // in input to 000123 139cdf0e10cSrcweir String& rSymbol ); 140cdf0e10cSrcweir 141cdf0e10cSrcweir void NumberStringDivision( // Divide numbers/strings into 142cdf0e10cSrcweir const String& rString ); // arrays and variables above. 143cdf0e10cSrcweir // Leading blanks and blanks 144cdf0e10cSrcweir // after numbers are thrown away 145cdf0e10cSrcweir 146cdf0e10cSrcweir 147cdf0e10cSrcweir // optimized substring versions 148cdf0e10cSrcweir StringContains(const String & rWhat,const String & rString,xub_StrLen nPos)149cdf0e10cSrcweir static inline sal_Bool StringContains( // Whether rString contains rWhat at nPos 150cdf0e10cSrcweir const String& rWhat, 151cdf0e10cSrcweir const String& rString, 152cdf0e10cSrcweir xub_StrLen nPos ) 153cdf0e10cSrcweir { // mostly used with one character 154cdf0e10cSrcweir if ( rWhat.GetChar(0) != rString.GetChar(nPos) ) 155cdf0e10cSrcweir return sal_False; 156cdf0e10cSrcweir return StringContainsImpl( rWhat, rString, nPos ); 157cdf0e10cSrcweir } StringPtrContains(const String & rWhat,const sal_Unicode * pString,xub_StrLen nPos)158cdf0e10cSrcweir static inline sal_Bool StringPtrContains( // Whether pString contains rWhat at nPos 159cdf0e10cSrcweir const String& rWhat, 160cdf0e10cSrcweir const sal_Unicode* pString, 161cdf0e10cSrcweir xub_StrLen nPos ) // nPos MUST be a valid offset from pString 162cdf0e10cSrcweir { // mostly used with one character 163cdf0e10cSrcweir if ( rWhat.GetChar(0) != *(pString+nPos) ) 164cdf0e10cSrcweir return sal_False; 165cdf0e10cSrcweir return StringPtrContainsImpl( rWhat, pString, nPos ); 166cdf0e10cSrcweir } 167cdf0e10cSrcweir static sal_Bool StringContainsImpl( //! DO NOT use directly 168cdf0e10cSrcweir const String& rWhat, 169cdf0e10cSrcweir const String& rString, 170cdf0e10cSrcweir xub_StrLen nPos ); 171cdf0e10cSrcweir static sal_Bool StringPtrContainsImpl( //! DO NOT use directly 172cdf0e10cSrcweir const String& rWhat, 173cdf0e10cSrcweir const sal_Unicode* pString, 174cdf0e10cSrcweir xub_StrLen nPos ); 175cdf0e10cSrcweir 176cdf0e10cSrcweir 177cdf0e10cSrcweir static inline sal_Bool SkipChar( // Skip a special character 178cdf0e10cSrcweir sal_Unicode c, 179cdf0e10cSrcweir const String& rString, 180cdf0e10cSrcweir xub_StrLen& nPos ); 181cdf0e10cSrcweir static inline void SkipBlanks( // Skip blank 182cdf0e10cSrcweir const String& rString, 183cdf0e10cSrcweir xub_StrLen& nPos ); 184cdf0e10cSrcweir static inline sal_Bool SkipString( // Jump over rWhat in rString at nPos 185cdf0e10cSrcweir const String& rWhat, 186cdf0e10cSrcweir const String& rString, 187cdf0e10cSrcweir xub_StrLen& nPos ); 188cdf0e10cSrcweir 189cdf0e10cSrcweir inline sal_Bool GetThousandSep( // Recognizes exactly ,111 as group separator 190cdf0e10cSrcweir const String& rString, 191cdf0e10cSrcweir xub_StrLen& nPos, 192cdf0e10cSrcweir sal_uInt16 nStringPos ); 193cdf0e10cSrcweir short GetLogical( // Get boolean value 194cdf0e10cSrcweir const String& rString ); 195cdf0e10cSrcweir short GetMonth( // Get month and advance string position 196cdf0e10cSrcweir const String& rString, 197cdf0e10cSrcweir xub_StrLen& nPos ); 198cdf0e10cSrcweir int GetDayOfWeek( // Get day of week and advance string position 199cdf0e10cSrcweir const String& rString, 200cdf0e10cSrcweir xub_StrLen& nPos ); 201cdf0e10cSrcweir sal_Bool GetCurrency( // Get currency symbol and advance string position 202cdf0e10cSrcweir const String& rString, 203cdf0e10cSrcweir xub_StrLen& nPos, 204cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); // optional number format to match against 205cdf0e10cSrcweir sal_Bool GetTimeAmPm( // Get symbol AM or PM and advance string position 206cdf0e10cSrcweir const String& rString, 207cdf0e10cSrcweir xub_StrLen& nPos ); 208cdf0e10cSrcweir inline sal_Bool GetDecSep( // Get decimal separator and advance string position 209cdf0e10cSrcweir const String& rString, 210cdf0e10cSrcweir xub_StrLen& nPos ); 211cdf0e10cSrcweir inline sal_Bool GetTime100SecSep( // Get hundredth seconds separator and advance string position 212cdf0e10cSrcweir const String& rString, 213cdf0e10cSrcweir xub_StrLen& nPos ); 214cdf0e10cSrcweir int GetSign( // Get sign and advance string position 215cdf0e10cSrcweir const String& rString, // Including special case '(' 216cdf0e10cSrcweir xub_StrLen& nPos ); 217cdf0e10cSrcweir short GetESign( // Get sign of exponent and advance string position 218cdf0e10cSrcweir const String& rString, 219cdf0e10cSrcweir xub_StrLen& nPos ); 220cdf0e10cSrcweir 221cdf0e10cSrcweir inline sal_Bool GetNextNumber( // Get next number as array offset 222cdf0e10cSrcweir sal_uInt16& i, 223cdf0e10cSrcweir sal_uInt16& j ); 224cdf0e10cSrcweir 225cdf0e10cSrcweir void GetTimeRef( // Converts time -> double (only decimals) 226cdf0e10cSrcweir double& fOutNumber, // result as double 227cdf0e10cSrcweir sal_uInt16 nIndex, // Index of hour in input 228cdf0e10cSrcweir sal_uInt16 nAnz ); // Count of time substrings in input 229cdf0e10cSrcweir sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match 230cdf0e10cSrcweir sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match 231cdf0e10cSrcweir sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match 232cdf0e10cSrcweir sal_Bool GetDateRef( // Conversion of date to number 233cdf0e10cSrcweir double& fDays, // OUT: days diff to null date 234cdf0e10cSrcweir sal_uInt16& nCounter, // Count of date substrings 235cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); // optional number format to match against 236cdf0e10cSrcweir 237cdf0e10cSrcweir sal_Bool ScanStartString( // Analyze start of string 238cdf0e10cSrcweir const String& rString, 239cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 240cdf0e10cSrcweir sal_Bool ScanMidString( // Analyze middle substring 241cdf0e10cSrcweir const String& rString, 242cdf0e10cSrcweir sal_uInt16 nStringPos, 243cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 244cdf0e10cSrcweir sal_Bool ScanEndString( // Analyze end of string 245cdf0e10cSrcweir const String& rString, 246cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 247cdf0e10cSrcweir 248cdf0e10cSrcweir // Whether input may be a ISO 8601 date format, yyyy-mm-dd... 249cdf0e10cSrcweir // checks if at least 3 numbers and first number>31 250cdf0e10cSrcweir bool MayBeIso8601(); 251cdf0e10cSrcweir 252cdf0e10cSrcweir // Compare rString to substring of array indexed by nString 253cdf0e10cSrcweir // nString == 0xFFFF => last substring 254cdf0e10cSrcweir sal_Bool ScanStringNumFor( 255cdf0e10cSrcweir const String& rString, 256cdf0e10cSrcweir xub_StrLen nPos, 257cdf0e10cSrcweir const SvNumberformat* pFormat, 258cdf0e10cSrcweir sal_uInt16 nString, 259cdf0e10cSrcweir sal_Bool bDontDetectNegation = sal_False ); 260cdf0e10cSrcweir 261cdf0e10cSrcweir // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True, 262cdf0e10cSrcweir // else do nothing and return sal_False 263cdf0e10cSrcweir sal_Bool MatchedReturn(); 264cdf0e10cSrcweir 265cdf0e10cSrcweir //! Be sure that the string to be analyzed is already converted to upper 266cdf0e10cSrcweir //! case and if it contained native humber digits that they are already 267cdf0e10cSrcweir //! converted to ASCII. 268cdf0e10cSrcweir sal_Bool IsNumberFormatMain( // Main anlyzing function 269cdf0e10cSrcweir const String& rString, 270cdf0e10cSrcweir double& fOutNumber, // return value if string is numeric 271cdf0e10cSrcweir const SvNumberformat* pFormat = NULL // optional number format to match against 272cdf0e10cSrcweir ); 273cdf0e10cSrcweir 274cdf0e10cSrcweir static inline sal_Bool MyIsdigit( sal_Unicode c ); 275cdf0e10cSrcweir 276cdf0e10cSrcweir // native number transliteration if necessary 277cdf0e10cSrcweir void TransformInput( String& rString ); 278cdf0e10cSrcweir 279cdf0e10cSrcweir #endif // _ZFORFIND_CXX 280cdf0e10cSrcweir }; 281cdf0e10cSrcweir 282cdf0e10cSrcweir 283cdf0e10cSrcweir 284cdf0e10cSrcweir #endif // _ZFORFIND_HXX 285