1*b1cdbd2cSJim Jagielski /************************************************************** 2*b1cdbd2cSJim Jagielski * 3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one 4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file 5*b1cdbd2cSJim Jagielski * distributed with this work for additional information 6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file 7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the 8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance 9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at 10*b1cdbd2cSJim Jagielski * 11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0 12*b1cdbd2cSJim Jagielski * 13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing, 14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an 15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the 17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations 18*b1cdbd2cSJim Jagielski * under the License. 19*b1cdbd2cSJim Jagielski * 20*b1cdbd2cSJim Jagielski *************************************************************/ 21*b1cdbd2cSJim Jagielski 22*b1cdbd2cSJim Jagielski 23*b1cdbd2cSJim Jagielski 24*b1cdbd2cSJim Jagielski #ifndef _ZFORFIND_HXX 25*b1cdbd2cSJim Jagielski #define _ZFORFIND_HXX 26*b1cdbd2cSJim Jagielski 27*b1cdbd2cSJim Jagielski #include <tools/string.hxx> 28*b1cdbd2cSJim Jagielski 29*b1cdbd2cSJim Jagielski class Date; 30*b1cdbd2cSJim Jagielski class SvNumberformat; 31*b1cdbd2cSJim Jagielski class SvNumberFormatter; 32*b1cdbd2cSJim Jagielski 33*b1cdbd2cSJim Jagielski #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner 34*b1cdbd2cSJim Jagielski 35*b1cdbd2cSJim Jagielski class ImpSvNumberInputScan 36*b1cdbd2cSJim Jagielski { 37*b1cdbd2cSJim Jagielski public: 38*b1cdbd2cSJim Jagielski ImpSvNumberInputScan( SvNumberFormatter* pFormatter ); 39*b1cdbd2cSJim Jagielski ~ImpSvNumberInputScan(); 40*b1cdbd2cSJim Jagielski 41*b1cdbd2cSJim Jagielski /*!*/ void ChangeIntl(); // MUST be called if language changes 42*b1cdbd2cSJim Jagielski 43*b1cdbd2cSJim Jagielski /// set reference date for offset calculation 44*b1cdbd2cSJim Jagielski void ChangeNullDate( 45*b1cdbd2cSJim Jagielski const sal_uInt16 nDay, 46*b1cdbd2cSJim Jagielski const sal_uInt16 nMonth, 47*b1cdbd2cSJim Jagielski const sal_uInt16 nYear ); 48*b1cdbd2cSJim Jagielski 49*b1cdbd2cSJim Jagielski /// convert input string to number 50*b1cdbd2cSJim Jagielski sal_Bool IsNumberFormat( 51*b1cdbd2cSJim Jagielski const String& rString, /// input string 52*b1cdbd2cSJim Jagielski short& F_Type, /// format type (in + out) 53*b1cdbd2cSJim Jagielski double& fOutNumber, /// value determined (out) 54*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL /// optional a number format to which compare against 55*b1cdbd2cSJim Jagielski ); 56*b1cdbd2cSJim Jagielski 57*b1cdbd2cSJim Jagielski /// after IsNumberFormat: get decimal position GetDecPos() const58*b1cdbd2cSJim Jagielski short GetDecPos() const { return nDecPos; } 59*b1cdbd2cSJim Jagielski /// after IsNumberFormat: get count of numeric substrings in input string GetAnzNums() const60*b1cdbd2cSJim Jagielski sal_uInt16 GetAnzNums() const { return nAnzNums; } 61*b1cdbd2cSJim Jagielski 62*b1cdbd2cSJim Jagielski /// set threshold of two-digit year input SetYear2000(sal_uInt16 nVal)63*b1cdbd2cSJim Jagielski void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } 64*b1cdbd2cSJim Jagielski /// get threshold of two-digit year input GetYear2000() const65*b1cdbd2cSJim Jagielski sal_uInt16 GetYear2000() const { return nYear2000; } 66*b1cdbd2cSJim Jagielski 67*b1cdbd2cSJim Jagielski private: 68*b1cdbd2cSJim Jagielski SvNumberFormatter* pFormatter; 69*b1cdbd2cSJim Jagielski String* pUpperMonthText; // Array of month names, uppercase 70*b1cdbd2cSJim Jagielski String* pUpperAbbrevMonthText; // Array of month names, abbreviated, uppercase 71*b1cdbd2cSJim Jagielski String* pUpperDayText; // Array of day of week names, uppercase 72*b1cdbd2cSJim Jagielski String* pUpperAbbrevDayText; // Array of day of week names, abbreviated, uppercase 73*b1cdbd2cSJim Jagielski String aUpperCurrSymbol; // Currency symbol, uppercase 74*b1cdbd2cSJim Jagielski sal_Bool bTextInitialized; // Whether days and months are initialized 75*b1cdbd2cSJim Jagielski Date* pNullDate; // 30Dec1899 76*b1cdbd2cSJim Jagielski // Variables for provisional results: 77*b1cdbd2cSJim Jagielski String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings 78*b1cdbd2cSJim Jagielski sal_Bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; // Whether a substring is numeric 79*b1cdbd2cSJim Jagielski sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; // Sequence of offsets to numeric strings 80*b1cdbd2cSJim Jagielski sal_uInt16 nAnzStrings; // Total count of scanned substrings 81*b1cdbd2cSJim Jagielski sal_uInt16 nAnzNums; // Count of numeric substrings 82*b1cdbd2cSJim Jagielski sal_Bool bDecSepInDateSeps; // True <=> DecSep in {.,-,/,DateSep} 83*b1cdbd2cSJim Jagielski sal_uInt8 nMatchedAllStrings; // Scan...String() matched all substrings, 84*b1cdbd2cSJim Jagielski // bit mask of nMatched... constants 85*b1cdbd2cSJim Jagielski 86*b1cdbd2cSJim Jagielski static const sal_uInt8 nMatchedEndString; // 0x01 87*b1cdbd2cSJim Jagielski static const sal_uInt8 nMatchedMidString; // 0x02 88*b1cdbd2cSJim Jagielski static const sal_uInt8 nMatchedStartString; // 0x04 89*b1cdbd2cSJim Jagielski static const sal_uInt8 nMatchedVirgin; // 0x08 90*b1cdbd2cSJim Jagielski static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 91*b1cdbd2cSJim Jagielski 92*b1cdbd2cSJim Jagielski int nSign; // Sign of number 93*b1cdbd2cSJim Jagielski short nMonth; // Month (1..x) if date 94*b1cdbd2cSJim Jagielski // negative => short format 95*b1cdbd2cSJim Jagielski short nMonthPos; // 1 = front, 2 = middle 96*b1cdbd2cSJim Jagielski // 3 = end 97*b1cdbd2cSJim Jagielski sal_uInt16 nTimePos; // Index of first time separator (+1) 98*b1cdbd2cSJim Jagielski short nDecPos; // Index of substring containing "," (+1) 99*b1cdbd2cSJim Jagielski short nNegCheck; // '( )' for negative 100*b1cdbd2cSJim Jagielski short nESign; // Sign of exponent 101*b1cdbd2cSJim Jagielski short nAmPm; // +1 AM, -1 PM, 0 if none 102*b1cdbd2cSJim Jagielski short nLogical; // -1 => False, 1 => True 103*b1cdbd2cSJim Jagielski sal_uInt16 nThousand; // Count of group (AKA thousand) separators 104*b1cdbd2cSJim Jagielski sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string 105*b1cdbd2cSJim Jagielski short eScannedType; // Scanned type 106*b1cdbd2cSJim Jagielski short eSetType; // Preset Type 107*b1cdbd2cSJim Jagielski 108*b1cdbd2cSJim Jagielski sal_uInt16 nStringScanNumFor; // Fixed strings recognized in 109*b1cdbd2cSJim Jagielski // pFormat->NumFor[nNumForStringScan] 110*b1cdbd2cSJim Jagielski short nStringScanSign; // Sign resulting of FixString 111*b1cdbd2cSJim Jagielski sal_uInt16 nYear2000; // Two-digit threshold 112*b1cdbd2cSJim Jagielski // Year as 20xx 113*b1cdbd2cSJim Jagielski // default 18 114*b1cdbd2cSJim Jagielski // number <= nYear2000 => 20xx 115*b1cdbd2cSJim Jagielski // number > nYear2000 => 19xx 116*b1cdbd2cSJim Jagielski sal_uInt16 nTimezonePos; // Index of timezone separator (+1) 117*b1cdbd2cSJim Jagielski sal_uInt8 nMayBeIso8601; // 0:=dontknowyet, 1:=yes, 2:=no 118*b1cdbd2cSJim Jagielski 119*b1cdbd2cSJim Jagielski #ifdef _ZFORFIND_CXX // methods private to implementation 120*b1cdbd2cSJim Jagielski void Reset(); // Reset all variables before start of analysis 121*b1cdbd2cSJim Jagielski 122*b1cdbd2cSJim Jagielski void InitText(); // Init of months and days of week 123*b1cdbd2cSJim Jagielski 124*b1cdbd2cSJim Jagielski // Convert string to double. 125*b1cdbd2cSJim Jagielski // Only simple unsigned floating point values without any error detection, 126*b1cdbd2cSJim Jagielski // decimal separator has to be '.' 127*b1cdbd2cSJim Jagielski // If bForceFraction==sal_True the string is taken to be the fractional part 128*b1cdbd2cSJim Jagielski // of 0.1234 without the leading 0. (thus being just "1234"). 129*b1cdbd2cSJim Jagielski double StringToDouble( 130*b1cdbd2cSJim Jagielski const String& rStr, 131*b1cdbd2cSJim Jagielski sal_Bool bForceFraction = sal_False ); 132*b1cdbd2cSJim Jagielski 133*b1cdbd2cSJim Jagielski sal_Bool NextNumberStringSymbol( // Next number/string symbol 134*b1cdbd2cSJim Jagielski const sal_Unicode*& pStr, 135*b1cdbd2cSJim Jagielski String& rSymbol ); 136*b1cdbd2cSJim Jagielski 137*b1cdbd2cSJim Jagielski sal_Bool SkipThousands( // Concatenate ,000,23 blocks 138*b1cdbd2cSJim Jagielski const sal_Unicode*& pStr, // in input to 000123 139*b1cdbd2cSJim Jagielski String& rSymbol ); 140*b1cdbd2cSJim Jagielski 141*b1cdbd2cSJim Jagielski void NumberStringDivision( // Divide numbers/strings into 142*b1cdbd2cSJim Jagielski const String& rString ); // arrays and variables above. 143*b1cdbd2cSJim Jagielski // Leading blanks and blanks 144*b1cdbd2cSJim Jagielski // after numbers are thrown away 145*b1cdbd2cSJim Jagielski 146*b1cdbd2cSJim Jagielski 147*b1cdbd2cSJim Jagielski // optimized substring versions 148*b1cdbd2cSJim Jagielski StringContains(const String & rWhat,const String & rString,xub_StrLen nPos)149*b1cdbd2cSJim Jagielski static inline sal_Bool StringContains( // Whether rString contains rWhat at nPos 150*b1cdbd2cSJim Jagielski const String& rWhat, 151*b1cdbd2cSJim Jagielski const String& rString, 152*b1cdbd2cSJim Jagielski xub_StrLen nPos ) 153*b1cdbd2cSJim Jagielski { // mostly used with one character 154*b1cdbd2cSJim Jagielski if ( rWhat.GetChar(0) != rString.GetChar(nPos) ) 155*b1cdbd2cSJim Jagielski return sal_False; 156*b1cdbd2cSJim Jagielski return StringContainsImpl( rWhat, rString, nPos ); 157*b1cdbd2cSJim Jagielski } StringPtrContains(const String & rWhat,const sal_Unicode * pString,xub_StrLen nPos)158*b1cdbd2cSJim Jagielski static inline sal_Bool StringPtrContains( // Whether pString contains rWhat at nPos 159*b1cdbd2cSJim Jagielski const String& rWhat, 160*b1cdbd2cSJim Jagielski const sal_Unicode* pString, 161*b1cdbd2cSJim Jagielski xub_StrLen nPos ) // nPos MUST be a valid offset from pString 162*b1cdbd2cSJim Jagielski { // mostly used with one character 163*b1cdbd2cSJim Jagielski if ( rWhat.GetChar(0) != *(pString+nPos) ) 164*b1cdbd2cSJim Jagielski return sal_False; 165*b1cdbd2cSJim Jagielski return StringPtrContainsImpl( rWhat, pString, nPos ); 166*b1cdbd2cSJim Jagielski } 167*b1cdbd2cSJim Jagielski static sal_Bool StringContainsImpl( //! DO NOT use directly 168*b1cdbd2cSJim Jagielski const String& rWhat, 169*b1cdbd2cSJim Jagielski const String& rString, 170*b1cdbd2cSJim Jagielski xub_StrLen nPos ); 171*b1cdbd2cSJim Jagielski static sal_Bool StringPtrContainsImpl( //! DO NOT use directly 172*b1cdbd2cSJim Jagielski const String& rWhat, 173*b1cdbd2cSJim Jagielski const sal_Unicode* pString, 174*b1cdbd2cSJim Jagielski xub_StrLen nPos ); 175*b1cdbd2cSJim Jagielski 176*b1cdbd2cSJim Jagielski 177*b1cdbd2cSJim Jagielski static inline sal_Bool SkipChar( // Skip a special character 178*b1cdbd2cSJim Jagielski sal_Unicode c, 179*b1cdbd2cSJim Jagielski const String& rString, 180*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 181*b1cdbd2cSJim Jagielski static inline void SkipBlanks( // Skip blank 182*b1cdbd2cSJim Jagielski const String& rString, 183*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 184*b1cdbd2cSJim Jagielski static inline sal_Bool SkipString( // Jump over rWhat in rString at nPos 185*b1cdbd2cSJim Jagielski const String& rWhat, 186*b1cdbd2cSJim Jagielski const String& rString, 187*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 188*b1cdbd2cSJim Jagielski 189*b1cdbd2cSJim Jagielski inline sal_Bool GetThousandSep( // Recognizes exactly ,111 as group separator 190*b1cdbd2cSJim Jagielski const String& rString, 191*b1cdbd2cSJim Jagielski xub_StrLen& nPos, 192*b1cdbd2cSJim Jagielski sal_uInt16 nStringPos ); 193*b1cdbd2cSJim Jagielski short GetLogical( // Get boolean value 194*b1cdbd2cSJim Jagielski const String& rString ); 195*b1cdbd2cSJim Jagielski short GetMonth( // Get month and advance string position 196*b1cdbd2cSJim Jagielski const String& rString, 197*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 198*b1cdbd2cSJim Jagielski int GetDayOfWeek( // Get day of week and advance string position 199*b1cdbd2cSJim Jagielski const String& rString, 200*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 201*b1cdbd2cSJim Jagielski sal_Bool GetCurrency( // Get currency symbol and advance string position 202*b1cdbd2cSJim Jagielski const String& rString, 203*b1cdbd2cSJim Jagielski xub_StrLen& nPos, 204*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL ); // optional number format to match against 205*b1cdbd2cSJim Jagielski sal_Bool GetTimeAmPm( // Get symbol AM or PM and advance string position 206*b1cdbd2cSJim Jagielski const String& rString, 207*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 208*b1cdbd2cSJim Jagielski inline sal_Bool GetDecSep( // Get decimal separator and advance string position 209*b1cdbd2cSJim Jagielski const String& rString, 210*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 211*b1cdbd2cSJim Jagielski inline sal_Bool GetTime100SecSep( // Get hundredth seconds separator and advance string position 212*b1cdbd2cSJim Jagielski const String& rString, 213*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 214*b1cdbd2cSJim Jagielski int GetSign( // Get sign and advance string position 215*b1cdbd2cSJim Jagielski const String& rString, // Including special case '(' 216*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 217*b1cdbd2cSJim Jagielski short GetESign( // Get sign of exponent and advance string position 218*b1cdbd2cSJim Jagielski const String& rString, 219*b1cdbd2cSJim Jagielski xub_StrLen& nPos ); 220*b1cdbd2cSJim Jagielski 221*b1cdbd2cSJim Jagielski inline sal_Bool GetNextNumber( // Get next number as array offset 222*b1cdbd2cSJim Jagielski sal_uInt16& i, 223*b1cdbd2cSJim Jagielski sal_uInt16& j ); 224*b1cdbd2cSJim Jagielski 225*b1cdbd2cSJim Jagielski void GetTimeRef( // Converts time -> double (only decimals) 226*b1cdbd2cSJim Jagielski double& fOutNumber, // result as double 227*b1cdbd2cSJim Jagielski sal_uInt16 nIndex, // Index of hour in input 228*b1cdbd2cSJim Jagielski sal_uInt16 nAnz ); // Count of time substrings in input 229*b1cdbd2cSJim Jagielski sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match 230*b1cdbd2cSJim Jagielski sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match 231*b1cdbd2cSJim Jagielski sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match 232*b1cdbd2cSJim Jagielski sal_Bool GetDateRef( // Conversion of date to number 233*b1cdbd2cSJim Jagielski double& fDays, // OUT: days diff to null date 234*b1cdbd2cSJim Jagielski sal_uInt16& nCounter, // Count of date substrings 235*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL ); // optional number format to match against 236*b1cdbd2cSJim Jagielski 237*b1cdbd2cSJim Jagielski sal_Bool ScanStartString( // Analyze start of string 238*b1cdbd2cSJim Jagielski const String& rString, 239*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL ); 240*b1cdbd2cSJim Jagielski sal_Bool ScanMidString( // Analyze middle substring 241*b1cdbd2cSJim Jagielski const String& rString, 242*b1cdbd2cSJim Jagielski sal_uInt16 nStringPos, 243*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL ); 244*b1cdbd2cSJim Jagielski sal_Bool ScanEndString( // Analyze end of string 245*b1cdbd2cSJim Jagielski const String& rString, 246*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL ); 247*b1cdbd2cSJim Jagielski 248*b1cdbd2cSJim Jagielski // Whether input may be a ISO 8601 date format, yyyy-mm-dd... 249*b1cdbd2cSJim Jagielski // checks if at least 3 numbers and first number>31 250*b1cdbd2cSJim Jagielski bool MayBeIso8601(); 251*b1cdbd2cSJim Jagielski 252*b1cdbd2cSJim Jagielski // Compare rString to substring of array indexed by nString 253*b1cdbd2cSJim Jagielski // nString == 0xFFFF => last substring 254*b1cdbd2cSJim Jagielski sal_Bool ScanStringNumFor( 255*b1cdbd2cSJim Jagielski const String& rString, 256*b1cdbd2cSJim Jagielski xub_StrLen nPos, 257*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat, 258*b1cdbd2cSJim Jagielski sal_uInt16 nString, 259*b1cdbd2cSJim Jagielski sal_Bool bDontDetectNegation = sal_False ); 260*b1cdbd2cSJim Jagielski 261*b1cdbd2cSJim Jagielski // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True, 262*b1cdbd2cSJim Jagielski // else do nothing and return sal_False 263*b1cdbd2cSJim Jagielski sal_Bool MatchedReturn(); 264*b1cdbd2cSJim Jagielski 265*b1cdbd2cSJim Jagielski //! Be sure that the string to be analyzed is already converted to upper 266*b1cdbd2cSJim Jagielski //! case and if it contained native humber digits that they are already 267*b1cdbd2cSJim Jagielski //! converted to ASCII. 268*b1cdbd2cSJim Jagielski sal_Bool IsNumberFormatMain( // Main anlyzing function 269*b1cdbd2cSJim Jagielski const String& rString, 270*b1cdbd2cSJim Jagielski double& fOutNumber, // return value if string is numeric 271*b1cdbd2cSJim Jagielski const SvNumberformat* pFormat = NULL // optional number format to match against 272*b1cdbd2cSJim Jagielski ); 273*b1cdbd2cSJim Jagielski 274*b1cdbd2cSJim Jagielski static inline sal_Bool MyIsdigit( sal_Unicode c ); 275*b1cdbd2cSJim Jagielski 276*b1cdbd2cSJim Jagielski // native number transliteration if necessary 277*b1cdbd2cSJim Jagielski void TransformInput( String& rString ); 278*b1cdbd2cSJim Jagielski 279*b1cdbd2cSJim Jagielski #endif // _ZFORFIND_CXX 280*b1cdbd2cSJim Jagielski }; 281*b1cdbd2cSJim Jagielski 282*b1cdbd2cSJim Jagielski 283*b1cdbd2cSJim Jagielski 284*b1cdbd2cSJim Jagielski #endif // _ZFORFIND_HXX 285