1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #ifndef _ZFORFIND_HXX 29 #define _ZFORFIND_HXX 30 31 #include <tools/string.hxx> 32 33 class Date; 34 class SvNumberformat; 35 class SvNumberFormatter; 36 37 #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner 38 39 class ImpSvNumberInputScan 40 { 41 public: 42 ImpSvNumberInputScan( SvNumberFormatter* pFormatter ); 43 ~ImpSvNumberInputScan(); 44 45 /*!*/ void ChangeIntl(); // MUST be called if language changes 46 47 /// set reference date for offset calculation 48 void ChangeNullDate( 49 const sal_uInt16 nDay, 50 const sal_uInt16 nMonth, 51 const sal_uInt16 nYear ); 52 53 /// convert input string to number 54 sal_Bool IsNumberFormat( 55 const String& rString, /// input string 56 short& F_Type, /// format type (in + out) 57 double& fOutNumber, /// value determined (out) 58 const SvNumberformat* pFormat = NULL /// optional a number format to which compare against 59 ); 60 61 /// after IsNumberFormat: get decimal position 62 short GetDecPos() const { return nDecPos; } 63 /// after IsNumberFormat: get count of numeric substrings in input string 64 sal_uInt16 GetAnzNums() const { return nAnzNums; } 65 66 /// set threshold of two-digit year input 67 void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } 68 /// get threshold of two-digit year input 69 sal_uInt16 GetYear2000() const { return nYear2000; } 70 71 private: 72 SvNumberFormatter* pFormatter; 73 String* pUpperMonthText; // Array of month names, uppercase 74 String* pUpperAbbrevMonthText; // Array of month names, abbreviated, uppercase 75 String* pUpperDayText; // Array of day of week names, uppercase 76 String* pUpperAbbrevDayText; // Array of day of week names, abbreviated, uppercase 77 String aUpperCurrSymbol; // Currency symbol, uppercase 78 sal_Bool bTextInitialized; // Whether days and months are initialized 79 Date* pNullDate; // 30Dec1899 80 // Variables for provisional results: 81 String sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; // Array of scanned substrings 82 sal_Bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; // Whether a substring is numeric 83 sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; // Sequence of offsets to numeric strings 84 sal_uInt16 nAnzStrings; // Total count of scanned substrings 85 sal_uInt16 nAnzNums; // Count of numeric substrings 86 sal_Bool bDecSepInDateSeps; // True <=> DecSep in {.,-,/,DateSep} 87 sal_uInt8 nMatchedAllStrings; // Scan...String() matched all substrings, 88 // bit mask of nMatched... constants 89 90 static const sal_uInt8 nMatchedEndString; // 0x01 91 static const sal_uInt8 nMatchedMidString; // 0x02 92 static const sal_uInt8 nMatchedStartString; // 0x04 93 static const sal_uInt8 nMatchedVirgin; // 0x08 94 static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 95 96 int nSign; // Sign of number 97 short nMonth; // Month (1..x) if date 98 // negative => short format 99 short nMonthPos; // 1 = front, 2 = middle 100 // 3 = end 101 sal_uInt16 nTimePos; // Index of first time separator (+1) 102 short nDecPos; // Index of substring containing "," (+1) 103 short nNegCheck; // '( )' for negative 104 short nESign; // Sign of exponent 105 short nAmPm; // +1 AM, -1 PM, 0 if none 106 short nLogical; // -1 => False, 1 => True 107 sal_uInt16 nThousand; // Count of group (AKA thousand) separators 108 sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string 109 short eScannedType; // Scanned type 110 short eSetType; // Preset Type 111 112 sal_uInt16 nStringScanNumFor; // Fixed strings recognized in 113 // pFormat->NumFor[nNumForStringScan] 114 short nStringScanSign; // Sign resulting of FixString 115 sal_uInt16 nYear2000; // Two-digit threshold 116 // Year as 20xx 117 // default 18 118 // number <= nYear2000 => 20xx 119 // number > nYear2000 => 19xx 120 sal_uInt16 nTimezonePos; // Index of timezone separator (+1) 121 sal_uInt8 nMayBeIso8601; // 0:=dontknowyet, 1:=yes, 2:=no 122 123 #ifdef _ZFORFIND_CXX // methods private to implementation 124 void Reset(); // Reset all variables before start of analysis 125 126 void InitText(); // Init of months and days of week 127 128 // Convert string to double. 129 // Only simple unsigned floating point values without any error detection, 130 // decimal separator has to be '.' 131 // If bForceFraction==sal_True the string is taken to be the fractional part 132 // of 0.1234 without the leading 0. (thus being just "1234"). 133 double StringToDouble( 134 const String& rStr, 135 sal_Bool bForceFraction = sal_False ); 136 137 sal_Bool NextNumberStringSymbol( // Next number/string symbol 138 const sal_Unicode*& pStr, 139 String& rSymbol ); 140 141 sal_Bool SkipThousands( // Concatenate ,000,23 blocks 142 const sal_Unicode*& pStr, // in input to 000123 143 String& rSymbol ); 144 145 void NumberStringDivision( // Divide numbers/strings into 146 const String& rString ); // arrays and variables above. 147 // Leading blanks and blanks 148 // after numbers are thrown away 149 150 151 // optimized substring versions 152 153 static inline sal_Bool StringContains( // Whether rString contains rWhat at nPos 154 const String& rWhat, 155 const String& rString, 156 xub_StrLen nPos ) 157 { // mostly used with one character 158 if ( rWhat.GetChar(0) != rString.GetChar(nPos) ) 159 return sal_False; 160 return StringContainsImpl( rWhat, rString, nPos ); 161 } 162 static inline sal_Bool StringPtrContains( // Whether pString contains rWhat at nPos 163 const String& rWhat, 164 const sal_Unicode* pString, 165 xub_StrLen nPos ) // nPos MUST be a valid offset from pString 166 { // mostly used with one character 167 if ( rWhat.GetChar(0) != *(pString+nPos) ) 168 return sal_False; 169 return StringPtrContainsImpl( rWhat, pString, nPos ); 170 } 171 static sal_Bool StringContainsImpl( //! DO NOT use directly 172 const String& rWhat, 173 const String& rString, 174 xub_StrLen nPos ); 175 static sal_Bool StringPtrContainsImpl( //! DO NOT use directly 176 const String& rWhat, 177 const sal_Unicode* pString, 178 xub_StrLen nPos ); 179 180 181 static inline sal_Bool SkipChar( // Skip a special character 182 sal_Unicode c, 183 const String& rString, 184 xub_StrLen& nPos ); 185 static inline void SkipBlanks( // Skip blank 186 const String& rString, 187 xub_StrLen& nPos ); 188 static inline sal_Bool SkipString( // Jump over rWhat in rString at nPos 189 const String& rWhat, 190 const String& rString, 191 xub_StrLen& nPos ); 192 193 inline sal_Bool GetThousandSep( // Recognizes exactly ,111 as group separator 194 const String& rString, 195 xub_StrLen& nPos, 196 sal_uInt16 nStringPos ); 197 short GetLogical( // Get boolean value 198 const String& rString ); 199 short GetMonth( // Get month and advance string position 200 const String& rString, 201 xub_StrLen& nPos ); 202 int GetDayOfWeek( // Get day of week and advance string position 203 const String& rString, 204 xub_StrLen& nPos ); 205 sal_Bool GetCurrency( // Get currency symbol and advance string position 206 const String& rString, 207 xub_StrLen& nPos, 208 const SvNumberformat* pFormat = NULL ); // optional number format to match against 209 sal_Bool GetTimeAmPm( // Get symbol AM or PM and advance string position 210 const String& rString, 211 xub_StrLen& nPos ); 212 inline sal_Bool GetDecSep( // Get decimal separator and advance string position 213 const String& rString, 214 xub_StrLen& nPos ); 215 inline sal_Bool GetTime100SecSep( // Get hundredth seconds separator and advance string position 216 const String& rString, 217 xub_StrLen& nPos ); 218 int GetSign( // Get sign and advance string position 219 const String& rString, // Including special case '(' 220 xub_StrLen& nPos ); 221 short GetESign( // Get sign of exponent and advance string position 222 const String& rString, 223 xub_StrLen& nPos ); 224 225 inline sal_Bool GetNextNumber( // Get next number as array offset 226 sal_uInt16& i, 227 sal_uInt16& j ); 228 229 void GetTimeRef( // Converts time -> double (only decimals) 230 double& fOutNumber, // result as double 231 sal_uInt16 nIndex, // Index of hour in input 232 sal_uInt16 nAnz ); // Count of time substrings in input 233 sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match 234 sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match 235 sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match 236 sal_Bool GetDateRef( // Conversion of date to number 237 double& fDays, // OUT: days diff to null date 238 sal_uInt16& nCounter, // Count of date substrings 239 const SvNumberformat* pFormat = NULL ); // optional number format to match against 240 241 sal_Bool ScanStartString( // Analyze start of string 242 const String& rString, 243 const SvNumberformat* pFormat = NULL ); 244 sal_Bool ScanMidString( // Analyze middle substring 245 const String& rString, 246 sal_uInt16 nStringPos, 247 const SvNumberformat* pFormat = NULL ); 248 sal_Bool ScanEndString( // Analyze end of string 249 const String& rString, 250 const SvNumberformat* pFormat = NULL ); 251 252 // Whether input may be a ISO 8601 date format, yyyy-mm-dd... 253 // checks if at least 3 numbers and first number>31 254 bool MayBeIso8601(); 255 256 // Compare rString to substring of array indexed by nString 257 // nString == 0xFFFF => last substring 258 sal_Bool ScanStringNumFor( 259 const String& rString, 260 xub_StrLen nPos, 261 const SvNumberformat* pFormat, 262 sal_uInt16 nString, 263 sal_Bool bDontDetectNegation = sal_False ); 264 265 // if nMatchedAllStrings set nMatchedUsedAsReturn and return sal_True, 266 // else do nothing and return sal_False 267 sal_Bool MatchedReturn(); 268 269 //! Be sure that the string to be analyzed is already converted to upper 270 //! case and if it contained native humber digits that they are already 271 //! converted to ASCII. 272 sal_Bool IsNumberFormatMain( // Main anlyzing function 273 const String& rString, 274 double& fOutNumber, // return value if string is numeric 275 const SvNumberformat* pFormat = NULL // optional number format to match against 276 ); 277 278 static inline sal_Bool MyIsdigit( sal_Unicode c ); 279 280 // native number transliteration if necessary 281 void TransformInput( String& rString ); 282 283 #endif // _ZFORFIND_CXX 284 }; 285 286 287 288 #endif // _ZFORFIND_HXX 289