1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #ifndef _ZFORFIND_HXX 29*cdf0e10cSrcweir #define _ZFORFIND_HXX 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <tools/string.hxx> 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir class Date; 34*cdf0e10cSrcweir class SvNumberformat; 35*cdf0e10cSrcweir class SvNumberFormatter; 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir class ImpSvNumberInputScan 40*cdf0e10cSrcweir { 41*cdf0e10cSrcweir public: 42*cdf0e10cSrcweir ImpSvNumberInputScan( SvNumberFormatter* pFormatter ); 43*cdf0e10cSrcweir ~ImpSvNumberInputScan(); 44*cdf0e10cSrcweir 45*cdf0e10cSrcweir /*!*/ void ChangeIntl(); // MUST be called if language changes 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir /// set reference date for offset calculation 48*cdf0e10cSrcweir void ChangeNullDate( 49*cdf0e10cSrcweir const sal_uInt16 nDay, 50*cdf0e10cSrcweir const sal_uInt16 nMonth, 51*cdf0e10cSrcweir const sal_uInt16 nYear ); 52*cdf0e10cSrcweir 53*cdf0e10cSrcweir /// convert input string to number 54*cdf0e10cSrcweir sal_Bool IsNumberFormat( 55*cdf0e10cSrcweir const String& rString, /// input string 56*cdf0e10cSrcweir short& F_Type, /// format type (in + out) 57*cdf0e10cSrcweir double& fOutNumber, /// value determined (out) 58*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL /// optional a number format to which compare against 59*cdf0e10cSrcweir ); 60*cdf0e10cSrcweir 61*cdf0e10cSrcweir /// after IsNumberFormat: get decimal position 62*cdf0e10cSrcweir short GetDecPos() const { return nDecPos; } 63*cdf0e10cSrcweir /// after IsNumberFormat: get count of numeric substrings in input string 64*cdf0e10cSrcweir sal_uInt16 GetAnzNums() const { return nAnzNums; } 65*cdf0e10cSrcweir 66*cdf0e10cSrcweir /// set threshold of two-digit year input 67*cdf0e10cSrcweir void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } 68*cdf0e10cSrcweir /// get threshold of two-digit year input 69*cdf0e10cSrcweir sal_uInt16 GetYear2000() const { return nYear2000; } 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir private: 72*cdf0e10cSrcweir SvNumberFormatter* pFormatter; 73*cdf0e10cSrcweir String* pUpperMonthText; // Array of month names, uppercase 74*cdf0e10cSrcweir String* pUpperAbbrevMonthText; // Array of month names, abbreviated, uppercase 75*cdf0e10cSrcweir String* pUpperDayText; // Array of day of week names, uppercase 76*cdf0e10cSrcweir String* pUpperAbbrevDayText; // Array of day of week names, abbreviated, uppercase 77*cdf0e10cSrcweir String aUpperCurrSymbol; // Currency symbol, uppercase 78*cdf0e10cSrcweir sal_Bool bTextInitialized; // Whether days and months are initialized 79*cdf0e10cSrcweir Date* pNullDate; // 30Dec1899 80*cdf0e10cSrcweir // Variables for provisional results: 81*cdf0e10cSrcweir String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings 82*cdf0e10cSrcweir sal_Bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; // Whether a substring is numeric 83*cdf0e10cSrcweir sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; // Sequence of offsets to numeric strings 84*cdf0e10cSrcweir sal_uInt16 nAnzStrings; // Total count of scanned substrings 85*cdf0e10cSrcweir sal_uInt16 nAnzNums; // Count of numeric substrings 86*cdf0e10cSrcweir sal_Bool bDecSepInDateSeps; // True <=> DecSep in {.,-,/,DateSep} 87*cdf0e10cSrcweir sal_uInt8 nMatchedAllStrings; // Scan...String() matched all substrings, 88*cdf0e10cSrcweir // bit mask of nMatched... constants 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir static const sal_uInt8 nMatchedEndString; // 0x01 91*cdf0e10cSrcweir static const sal_uInt8 nMatchedMidString; // 0x02 92*cdf0e10cSrcweir static const sal_uInt8 nMatchedStartString; // 0x04 93*cdf0e10cSrcweir static const sal_uInt8 nMatchedVirgin; // 0x08 94*cdf0e10cSrcweir static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir int nSign; // Sign of number 97*cdf0e10cSrcweir short nMonth; // Month (1..x) if date 98*cdf0e10cSrcweir // negative => short format 99*cdf0e10cSrcweir short nMonthPos; // 1 = front, 2 = middle 100*cdf0e10cSrcweir // 3 = end 101*cdf0e10cSrcweir sal_uInt16 nTimePos; // Index of first time separator (+1) 102*cdf0e10cSrcweir short nDecPos; // Index of substring containing "," (+1) 103*cdf0e10cSrcweir short nNegCheck; // '( )' for negative 104*cdf0e10cSrcweir short nESign; // Sign of exponent 105*cdf0e10cSrcweir short nAmPm; // +1 AM, -1 PM, 0 if none 106*cdf0e10cSrcweir short nLogical; // -1 => False, 1 => True 107*cdf0e10cSrcweir sal_uInt16 nThousand; // Count of group (AKA thousand) separators 108*cdf0e10cSrcweir sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string 109*cdf0e10cSrcweir short eScannedType; // Scanned type 110*cdf0e10cSrcweir short eSetType; // Preset Type 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir sal_uInt16 nStringScanNumFor; // Fixed strings recognized in 113*cdf0e10cSrcweir // pFormat->NumFor[nNumForStringScan] 114*cdf0e10cSrcweir short nStringScanSign; // Sign resulting of FixString 115*cdf0e10cSrcweir sal_uInt16 nYear2000; // Two-digit threshold 116*cdf0e10cSrcweir // Year as 20xx 117*cdf0e10cSrcweir // default 18 118*cdf0e10cSrcweir // number <= nYear2000 => 20xx 119*cdf0e10cSrcweir // number > nYear2000 => 19xx 120*cdf0e10cSrcweir sal_uInt16 nTimezonePos; // Index of timezone separator (+1) 121*cdf0e10cSrcweir sal_uInt8 nMayBeIso8601; // 0:=dontknowyet, 1:=yes, 2:=no 122*cdf0e10cSrcweir 123*cdf0e10cSrcweir #ifdef _ZFORFIND_CXX // methods private to implementation 124*cdf0e10cSrcweir void Reset(); // Reset all variables before start of analysis 125*cdf0e10cSrcweir 126*cdf0e10cSrcweir void InitText(); // Init of months and days of week 127*cdf0e10cSrcweir 128*cdf0e10cSrcweir // Convert string to double. 129*cdf0e10cSrcweir // Only simple unsigned floating point values without any error detection, 130*cdf0e10cSrcweir // decimal separator has to be '.' 131*cdf0e10cSrcweir // If bForceFraction==sal_True the string is taken to be the fractional part 132*cdf0e10cSrcweir // of 0.1234 without the leading 0. (thus being just "1234"). 133*cdf0e10cSrcweir double StringToDouble( 134*cdf0e10cSrcweir const String& rStr, 135*cdf0e10cSrcweir sal_Bool bForceFraction = sal_False ); 136*cdf0e10cSrcweir 137*cdf0e10cSrcweir sal_Bool NextNumberStringSymbol( // Next number/string symbol 138*cdf0e10cSrcweir const sal_Unicode*& pStr, 139*cdf0e10cSrcweir String& rSymbol ); 140*cdf0e10cSrcweir 141*cdf0e10cSrcweir sal_Bool SkipThousands( // Concatenate ,000,23 blocks 142*cdf0e10cSrcweir const sal_Unicode*& pStr, // in input to 000123 143*cdf0e10cSrcweir String& rSymbol ); 144*cdf0e10cSrcweir 145*cdf0e10cSrcweir void NumberStringDivision( // Divide numbers/strings into 146*cdf0e10cSrcweir const String& rString ); // arrays and variables above. 147*cdf0e10cSrcweir // Leading blanks and blanks 148*cdf0e10cSrcweir // after numbers are thrown away 149*cdf0e10cSrcweir 150*cdf0e10cSrcweir 151*cdf0e10cSrcweir // optimized substring versions 152*cdf0e10cSrcweir 153*cdf0e10cSrcweir static inline sal_Bool StringContains( // Whether rString contains rWhat at nPos 154*cdf0e10cSrcweir const String& rWhat, 155*cdf0e10cSrcweir const String& rString, 156*cdf0e10cSrcweir xub_StrLen nPos ) 157*cdf0e10cSrcweir { // mostly used with one character 158*cdf0e10cSrcweir if ( rWhat.GetChar(0) != rString.GetChar(nPos) ) 159*cdf0e10cSrcweir return sal_False; 160*cdf0e10cSrcweir return StringContainsImpl( rWhat, rString, nPos ); 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir static inline sal_Bool StringPtrContains( // Whether pString contains rWhat at nPos 163*cdf0e10cSrcweir const String& rWhat, 164*cdf0e10cSrcweir const sal_Unicode* pString, 165*cdf0e10cSrcweir xub_StrLen nPos ) // nPos MUST be a valid offset from pString 166*cdf0e10cSrcweir { // mostly used with one character 167*cdf0e10cSrcweir if ( rWhat.GetChar(0) != *(pString+nPos) ) 168*cdf0e10cSrcweir return sal_False; 169*cdf0e10cSrcweir return StringPtrContainsImpl( rWhat, pString, nPos ); 170*cdf0e10cSrcweir } 171*cdf0e10cSrcweir static sal_Bool StringContainsImpl( //! DO NOT use directly 172*cdf0e10cSrcweir const String& rWhat, 173*cdf0e10cSrcweir const String& rString, 174*cdf0e10cSrcweir xub_StrLen nPos ); 175*cdf0e10cSrcweir static sal_Bool StringPtrContainsImpl( //! DO NOT use directly 176*cdf0e10cSrcweir const String& rWhat, 177*cdf0e10cSrcweir const sal_Unicode* pString, 178*cdf0e10cSrcweir xub_StrLen nPos ); 179*cdf0e10cSrcweir 180*cdf0e10cSrcweir 181*cdf0e10cSrcweir static inline sal_Bool SkipChar( // Skip a special character 182*cdf0e10cSrcweir sal_Unicode c, 183*cdf0e10cSrcweir const String& rString, 184*cdf0e10cSrcweir xub_StrLen& nPos ); 185*cdf0e10cSrcweir static inline void SkipBlanks( // Skip blank 186*cdf0e10cSrcweir const String& rString, 187*cdf0e10cSrcweir xub_StrLen& nPos ); 188*cdf0e10cSrcweir static inline sal_Bool SkipString( // Jump over rWhat in rString at nPos 189*cdf0e10cSrcweir const String& rWhat, 190*cdf0e10cSrcweir const String& rString, 191*cdf0e10cSrcweir xub_StrLen& nPos ); 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir inline sal_Bool GetThousandSep( // Recognizes exactly ,111 as group separator 194*cdf0e10cSrcweir const String& rString, 195*cdf0e10cSrcweir xub_StrLen& nPos, 196*cdf0e10cSrcweir sal_uInt16 nStringPos ); 197*cdf0e10cSrcweir short GetLogical( // Get boolean value 198*cdf0e10cSrcweir const String& rString ); 199*cdf0e10cSrcweir short GetMonth( // Get month and advance string position 200*cdf0e10cSrcweir const String& rString, 201*cdf0e10cSrcweir xub_StrLen& nPos ); 202*cdf0e10cSrcweir int GetDayOfWeek( // Get day of week and advance string position 203*cdf0e10cSrcweir const String& rString, 204*cdf0e10cSrcweir xub_StrLen& nPos ); 205*cdf0e10cSrcweir sal_Bool GetCurrency( // Get currency symbol and advance string position 206*cdf0e10cSrcweir const String& rString, 207*cdf0e10cSrcweir xub_StrLen& nPos, 208*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); // optional number format to match against 209*cdf0e10cSrcweir sal_Bool GetTimeAmPm( // Get symbol AM or PM and advance string position 210*cdf0e10cSrcweir const String& rString, 211*cdf0e10cSrcweir xub_StrLen& nPos ); 212*cdf0e10cSrcweir inline sal_Bool GetDecSep( // Get decimal separator and advance string position 213*cdf0e10cSrcweir const String& rString, 214*cdf0e10cSrcweir xub_StrLen& nPos ); 215*cdf0e10cSrcweir inline sal_Bool GetTime100SecSep( // Get hundredth seconds separator and advance string position 216*cdf0e10cSrcweir const String& rString, 217*cdf0e10cSrcweir xub_StrLen& nPos ); 218*cdf0e10cSrcweir int GetSign( // Get sign and advance string position 219*cdf0e10cSrcweir const String& rString, // Including special case '(' 220*cdf0e10cSrcweir xub_StrLen& nPos ); 221*cdf0e10cSrcweir short GetESign( // Get sign of exponent and advance string position 222*cdf0e10cSrcweir const String& rString, 223*cdf0e10cSrcweir xub_StrLen& nPos ); 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir inline sal_Bool GetNextNumber( // Get next number as array offset 226*cdf0e10cSrcweir sal_uInt16& i, 227*cdf0e10cSrcweir sal_uInt16& j ); 228*cdf0e10cSrcweir 229*cdf0e10cSrcweir void GetTimeRef( // Converts time -> double (only decimals) 230*cdf0e10cSrcweir double& fOutNumber, // result as double 231*cdf0e10cSrcweir sal_uInt16 nIndex, // Index of hour in input 232*cdf0e10cSrcweir sal_uInt16 nAnz ); // Count of time substrings in input 233*cdf0e10cSrcweir sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match 234*cdf0e10cSrcweir sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match 235*cdf0e10cSrcweir sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match 236*cdf0e10cSrcweir sal_Bool GetDateRef( // Conversion of date to number 237*cdf0e10cSrcweir double& fDays, // OUT: days diff to null date 238*cdf0e10cSrcweir sal_uInt16& nCounter, // Count of date substrings 239*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); // optional number format to match against 240*cdf0e10cSrcweir 241*cdf0e10cSrcweir sal_Bool ScanStartString( // Analyze start of string 242*cdf0e10cSrcweir const String& rString, 243*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 244*cdf0e10cSrcweir sal_Bool ScanMidString( // Analyze middle substring 245*cdf0e10cSrcweir const String& rString, 246*cdf0e10cSrcweir sal_uInt16 nStringPos, 247*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 248*cdf0e10cSrcweir sal_Bool ScanEndString( // Analyze end of string 249*cdf0e10cSrcweir const String& rString, 250*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL ); 251*cdf0e10cSrcweir 252*cdf0e10cSrcweir // Whether input may be a ISO 8601 date format, yyyy-mm-dd... 253*cdf0e10cSrcweir // checks if at least 3 numbers and first number>31 254*cdf0e10cSrcweir bool MayBeIso8601(); 255*cdf0e10cSrcweir 256*cdf0e10cSrcweir // Compare rString to substring of array indexed by nString 257*cdf0e10cSrcweir // nString == 0xFFFF => last substring 258*cdf0e10cSrcweir sal_Bool ScanStringNumFor( 259*cdf0e10cSrcweir const String& rString, 260*cdf0e10cSrcweir xub_StrLen nPos, 261*cdf0e10cSrcweir const SvNumberformat* pFormat, 262*cdf0e10cSrcweir sal_uInt16 nString, 263*cdf0e10cSrcweir sal_Bool bDontDetectNegation = sal_False ); 264*cdf0e10cSrcweir 265*cdf0e10cSrcweir // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True, 266*cdf0e10cSrcweir // else do nothing and return sal_False 267*cdf0e10cSrcweir sal_Bool MatchedReturn(); 268*cdf0e10cSrcweir 269*cdf0e10cSrcweir //! Be sure that the string to be analyzed is already converted to upper 270*cdf0e10cSrcweir //! case and if it contained native humber digits that they are already 271*cdf0e10cSrcweir //! converted to ASCII. 272*cdf0e10cSrcweir sal_Bool IsNumberFormatMain( // Main anlyzing function 273*cdf0e10cSrcweir const String& rString, 274*cdf0e10cSrcweir double& fOutNumber, // return value if string is numeric 275*cdf0e10cSrcweir const SvNumberformat* pFormat = NULL // optional number format to match against 276*cdf0e10cSrcweir ); 277*cdf0e10cSrcweir 278*cdf0e10cSrcweir static inline sal_Bool MyIsdigit( sal_Unicode c ); 279*cdf0e10cSrcweir 280*cdf0e10cSrcweir // native number transliteration if necessary 281*cdf0e10cSrcweir void TransformInput( String& rString ); 282*cdf0e10cSrcweir 283*cdf0e10cSrcweir #endif // _ZFORFIND_CXX 284*cdf0e10cSrcweir }; 285*cdf0e10cSrcweir 286*cdf0e10cSrcweir 287*cdf0e10cSrcweir 288*cdf0e10cSrcweir #endif // _ZFORFIND_HXX 289