1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #ifndef OOX_XLS_BIFFINPUTSTREAM_HXX 25 #define OOX_XLS_BIFFINPUTSTREAM_HXX 26 27 #include <vector> 28 #include "oox/helper/binaryinputstream.hxx" 29 #include "oox/xls/biffhelper.hxx" 30 #include "oox/xls/biffcodec.hxx" 31 32 namespace rtl { class OUStringBuffer; } 33 34 namespace oox { 35 namespace xls { 36 37 // ============================================================================ 38 39 namespace prv { 40 41 /** Buffers the contents of a raw record and encapsulates stream decoding. */ 42 class BiffInputRecordBuffer 43 { 44 public: 45 explicit BiffInputRecordBuffer( BinaryInputStream& rInStrm ); 46 47 /** Returns the wrapped binary base stream. */ getBaseStream() const48 inline const BinaryInputStream& getBaseStream() const { return mrInStrm; } 49 50 /** Sets a decoder object and decrypts buffered record data. */ 51 void setDecoder( const BiffDecoderRef& rxDecoder ); 52 /** Returns the current decoder object. */ getDecoder() const53 inline BiffDecoderRef getDecoder() const { return mxDecoder; } 54 /** Enables/disables usage of current decoder. */ 55 void enableDecoder( bool bEnable ); 56 57 /** Restarts the stream at the passed position. Buffer is invalid until the 58 next call of startRecord() or startNextRecord(). */ 59 void restartAt( sal_Int64 nPos ); 60 61 /** Reads the record header at the passed position. */ 62 bool startRecord( sal_Int64 nHeaderPos ); 63 /** Reads the next record header from the stream. */ 64 bool startNextRecord(); 65 /** Returns the start position of the record header in the core stream. */ 66 sal_uInt16 getNextRecId(); 67 68 /** Returns the start position of the record header in the core stream. */ getRecHeaderPos() const69 inline sal_Int64 getRecHeaderPos() const { return mnHeaderPos; } 70 /** Returns the current record identifier. */ getRecId() const71 inline sal_uInt16 getRecId() const { return mnRecId; } 72 /** Returns the current record size. */ getRecSize() const73 inline sal_uInt16 getRecSize() const { return mnRecSize; } 74 /** Returns the current read position in the current record body. */ getRecPos() const75 inline sal_uInt16 getRecPos() const { return mnRecPos; } 76 /** Returns the number of remaining bytes in the current record body. */ getRecLeft() const77 inline sal_uInt16 getRecLeft() const { return mnRecSize - mnRecPos; } 78 79 /** Reads nBytes bytes to the existing buffer opData. Must NOT overread the source buffer. */ 80 void read( void* opData, sal_uInt16 nBytes ); 81 /** Ignores nBytes bytes. Must NOT overread the buffer. */ 82 void skip( sal_uInt16 nBytes ); 83 84 private: 85 /** Updates data buffer from stream, if needed. */ 86 void updateBuffer(); 87 /** Updates decoded data from original data. */ 88 void updateDecoded(); 89 90 private: 91 typedef ::std::vector< sal_uInt8 > DataBuffer; 92 93 BinaryInputStream& mrInStrm; /// Core input stream. 94 DataBuffer maOriginalData; /// Original data read from stream. 95 DataBuffer maDecodedData; /// Decoded data. 96 DataBuffer* mpCurrentData; /// Points to data buffer currently in use. 97 BiffDecoderRef mxDecoder; /// Decoder object. 98 sal_Int64 mnHeaderPos; /// Stream start position of current record header. 99 sal_Int64 mnBodyPos; /// Stream start position of current record body. 100 sal_Int64 mnBufferBodyPos; /// Stream start position of buffered data. 101 sal_Int64 mnNextHeaderPos; /// Stream start position of next record header. 102 sal_uInt16 mnRecId; /// Current record identifier. 103 sal_uInt16 mnRecSize; /// Current record size. 104 sal_uInt16 mnRecPos; /// Current position in record body. 105 bool mbValidHeader; /// True = valid record header. 106 }; 107 108 } // namespace prv 109 110 // ============================================================================ 111 112 /** This class is used to read BIFF record streams. 113 114 An instance is constructed with a BinaryInputStream object. The passed 115 stream is reset to its start while constructing this stream. 116 117 To start reading a record call startNextRecord(). Now it is possible to 118 read all contents of the record using operator>>() or any of the read***() 119 functions. If some data exceeds the record size limit, the stream looks for 120 a following CONTINUE record and jumps automatically to it. It is NOT 121 allowed that an atomic data type is split into two records (e.g. 4 bytes of 122 a double in one record and the other 4 bytes in a following CONTINUE). 123 124 Trying to read over the record limits results in a stream error. The 125 isValid() function indicates that by returning false. From now on the data 126 returned by the read functions is undefined. The error state will be reset, 127 if the record is reset (with the function resetRecord()), or if the next 128 record is started. 129 130 To switch off the automatic lookup of CONTINUE records, use resetRecord() 131 with false parameter. This is useful e.g. on import of drawing layer data, 132 where sometimes solely CONTINUE records will occur. The automatic lookup 133 keeps switched off until the method resetRecord() is called with parameter 134 true. All other settings done on the stream (e.g. alternative CONTINUE 135 record identifier, enabled decryption, NUL substitution character) will be 136 reset to default values, if a new record is started. 137 138 The import stream supports decrypting the stream data. The contents of a 139 record (not the record header) will be encrypted by Excel if the file has 140 been stored with password protection. The functions setDecoder() and 141 enableDecoder() control the usage of the decryption algorithms. 142 setDecoder() sets a new decryption algorithm and initially enables it. 143 enableDecoder( false ) may be used to stop the usage of the decryption 144 temporarily (sometimes record contents are never encrypted, e.g. all BOF 145 records or the stream position in SHEET records). Decryption will be 146 reenabled automatically, if a new record is started with the function 147 startNextRecord(). 148 */ 149 class BiffInputStream : public BinaryInputStream 150 { 151 public: 152 /** Constructs the BIFF record stream using the passed binary stream. 153 154 @param rInStream 155 The base input stream. Must be seekable. Will be seeked to its 156 start position. 157 158 @param bContLookup Automatic CONTINUE lookup on/off. 159 */ 160 explicit BiffInputStream( 161 BinaryInputStream& rInStream, 162 bool bContLookup = true ); 163 164 // record control --------------------------------------------------------- 165 166 /** Sets stream pointer to the start of the next record content. 167 168 Ignores all CONTINUE records of the current record, if automatic 169 CONTINUE usage is switched on. 170 171 @return False = no record found (end of stream). 172 */ 173 bool startNextRecord(); 174 175 /** Sets stream pointer to the start of the content of the specified record. 176 177 The handle of the current record can be received and stored using the 178 function getRecHandle() for later usage with this function. The record 179 handle is equivalent to the position of the underlying binary stream, 180 thus the function can be used to perform a hard seek to a specific 181 position, if it is sure that a record starts exactly at this position. 182 183 @return False = no record found (invalid handle passed). 184 */ 185 bool startRecordByHandle( sal_Int64 nRecHandle ); 186 187 /** Sets stream pointer to begin of record content. 188 189 @param bContLookup 190 Automatic CONTINUE lookup on/off. In difference to other stream 191 settings, this setting is persistent until next call of this 192 function (because it is wanted to receive the next CONTINUE records 193 separately). 194 @param nAltContId 195 Sets an alternative record identifier for content continuation. 196 This value is reset automatically when a new record is started with 197 startNextRecord(). 198 */ 199 void resetRecord( 200 bool bContLookup, 201 sal_uInt16 nAltContId = BIFF_ID_UNKNOWN ); 202 203 /** Sets stream pointer before current record and invalidates stream. 204 205 The next call to startNextRecord() will start again the current record. 206 This can be used in situations where a loop or a function leaves on a 207 specific record, but the parent context expects to start this record by 208 itself. The stream is invalid as long as the first record has not been 209 started (it is not allowed to call any other stream operation then). 210 */ 211 void rewindRecord(); 212 213 // decoder ---------------------------------------------------------------- 214 215 /** Sets a new decoder object. 216 217 Enables decryption of record contents for the rest of the stream. 218 */ 219 void setDecoder( const BiffDecoderRef& rxDecoder ); 220 221 /** Enables/disables usage of current decoder. 222 223 Decryption is reenabled automatically, if a new record is started using 224 the function startNextRecord(). 225 */ 226 void enableDecoder( bool bEnable = true ); 227 228 // stream/record state and info ------------------------------------------- 229 230 /** Returns the current record identifier. */ getRecId() const231 inline sal_uInt16 getRecId() const { return mnRecId; } 232 /** Returns the record identifier of the following record. */ 233 sal_uInt16 getNextRecId(); 234 235 /** Returns a unique handle for the current record that can be used with 236 the function startRecordByHandle(). */ getRecHandle() const237 inline sal_Int64 getRecHandle() const { return mnRecHandle; } 238 239 // BinaryStreamBase interface (seeking) ----------------------------------- 240 241 /** Returns the data size of the whole record without record headers. */ 242 virtual sal_Int64 size() const; 243 /** Returns the position inside of the whole record content. */ 244 virtual sal_Int64 tell() const; 245 /** Seeks in record content to the specified position. */ 246 virtual void seek( sal_Int64 nRecPos ); 247 /** Closes the input stream but not the wrapped stream. */ 248 virtual void close(); 249 250 /** Returns the absolute position in the wrapped binary stream. */ 251 sal_Int64 tellBase() const; 252 /** Returns the total size of the wrapped binary stream. */ 253 sal_Int64 sizeBase() const; 254 255 // BinaryInputStream interface (stream read access) ----------------------- 256 257 /** Reads nBytes bytes to the passed sequence. 258 @return Number of bytes really read. */ 259 virtual sal_Int32 readData( StreamDataSequence& orData, sal_Int32 nBytes, size_t nAtomSize = 1 ); 260 /** Reads nBytes bytes and copies them to the passed buffer opMem. 261 @return Number of bytes really read. */ 262 virtual sal_Int32 readMemory( void* opMem, sal_Int32 nBytes, size_t nAtomSize = 1 ); 263 /** Seeks forward inside the current record. */ 264 virtual void skip( sal_Int32 nBytes, size_t nAtomSize = 1 ); 265 266 /** Stream operator for integral and floating-point types. */ 267 template< typename Type > operator >>(Type & ornValue)268 inline BiffInputStream& operator>>( Type& ornValue ) { readValue( ornValue ); return *this; } 269 270 // byte strings ----------------------------------------------------------- 271 272 /** Reads 8/16 bit string length and character array, and returns the string. 273 @param b16BitLen 274 True = Read 16-bit string length field before the character array. 275 False = Read 8-bit string length field before the character array. 276 @param bAllowNulChars 277 True = NUL characters are inserted into the imported string. 278 False = NUL characters are replaced by question marks (default). 279 */ 280 ::rtl::OString readByteString( bool b16BitLen, bool bAllowNulChars = false ); 281 282 /** Reads 8/16 bit string length and character array, and returns a Unicode string. 283 @param b16BitLen 284 True = Read 16-bit string length field before the character array. 285 False = Read 8-bit string length field before the character array. 286 @param eTextEnc The text encoding used to create the Unicode string. 287 @param bAllowNulChars 288 True = NUL characters are inserted into the imported string. 289 False = NUL characters are replaced by question marks (default). 290 */ 291 ::rtl::OUString readByteStringUC( bool b16BitLen, rtl_TextEncoding eTextEnc, bool bAllowNulChars = false ); 292 293 /** Ignores 8/16 bit string length and character array. 294 @param b16BitLen 295 True = Read 16-bit string length field before the character array. 296 False = Read 8-bit string length field before the character array. 297 */ 298 void skipByteString( bool b16BitLen ); 299 300 // Unicode strings -------------------------------------------------------- 301 302 /** Reads nChars characters of a BIFF8 string, and returns the string. 303 @param nChars Number of characters to read from the stream. 304 @param b16BitChars 305 True = The character array contains 16-bit characters. 306 False = The character array contains truncated 8-bit characters. 307 @param bAllowNulChars 308 True = NUL characters are inserted into the imported string. 309 False = NUL characters are replaced by question marks (default). 310 */ 311 ::rtl::OUString readUniStringChars( sal_uInt16 nChars, bool b16BitChars, bool bAllowNulChars = false ); 312 313 /** Reads 8-bit flags, extended header, nChar characters, extended data of 314 a BIFF8 string, and returns the string. 315 @param nChars Number of characters to read from the stream. 316 @param bAllowNulChars 317 True = NUL characters are inserted into the imported string. 318 False = NUL characters are replaced by question marks (default). 319 */ 320 ::rtl::OUString readUniStringBody( sal_uInt16 nChars, bool bAllowNulChars = false ); 321 322 /** Reads 16-bit character count, 8-bit flags, extended header, character 323 array, extended data of a BIFF8 string, and returns the string. 324 @param bAllowNulChars 325 True = NUL characters are inserted into the imported string. 326 False = NUL characters are replaced by question marks (default). 327 */ 328 ::rtl::OUString readUniString( bool bAllowNulChars = false ); 329 330 /** Ignores nChars characters of a BIFF8 string. 331 @param nChars Number of characters to skip in the stream. 332 @param b16BitChars 333 True = The character array contains 16-bit characters. 334 False = The character array contains truncated 8-bit characters. 335 */ 336 void skipUniStringChars( sal_uInt16 nChars, bool b16BitChars ); 337 338 /** Ignores 8-bit flags, extended header, nChar characters, extended data 339 of a BIFF8 string. 340 @param nChars Number of characters to skip in the stream. 341 */ 342 void skipUniStringBody( sal_uInt16 nChars ); 343 344 /** Ignores 16-bit character count, 8-bit flags, extended header, character 345 array, extended data of a BIFF8 string. 346 */ 347 void skipUniString(); 348 349 // ------------------------------------------------------------------------ 350 private: 351 /** Initializes all members after base stream has been seeked to new record. */ 352 void setupRecord(); 353 /** Restarts the current record from the beginning. */ 354 void restartRecord( bool bInvalidateRecSize ); 355 /** Sets stream pointer before specified record and invalidates stream. */ 356 void rewindToRecord( sal_Int64 nRecHandle ); 357 /** Returns true, if stream was able to start a valid record. */ isInRecord() const358 inline bool isInRecord() const { return mnRecHandle >= 0; } 359 360 /** Returns true, if the passed ID is real or alternative continuation record ID. */ 361 bool isContinueId( sal_uInt16 nRecId ) const; 362 /** Goes to start of the next CONTINUE record. 363 @descr Stream must be located at the end of a raw record, and handling 364 of CONTINUE records must be enabled. 365 @return True if next CONTINUE record has been found and initialized. */ 366 bool jumpToNextContinue(); 367 /** Goes to start of the next CONTINUE record while reading strings. 368 @descr Stream must be located at the end of a raw record. If reading 369 has been started in a CONTINUE record, jumps to an existing following 370 CONTINUE record, even if handling of CONTINUE records is disabled (this 371 is a special handling for TXO string data). Reads additional Unicode 372 flag byte at start of the new raw record and sets or resets rb16BitChars. 373 @return True if next CONTINUE record has been found and initialized. */ 374 bool jumpToNextStringContinue( bool& rb16BitChars ); 375 /** Calculates the complete length of the current record including CONTINUE 376 records, stores the length in mnComplRecSize. */ 377 void calcRecordLength(); 378 379 /** Returns the maximum size of raw data possible to read in one block. */ 380 sal_uInt16 getMaxRawReadSize( sal_Int32 nBytes, size_t nAtomSize ) const; 381 382 /** Reads the BIFF8 Unicode string header fields. */ 383 void readUniStringHeader( bool& orb16BitChars, sal_Int32& ornAddSize ); 384 385 private: 386 prv::BiffInputRecordBuffer maRecBuffer; /// Raw record data buffer. 387 388 sal_Int64 mnRecHandle; /// Handle of current record. 389 sal_uInt16 mnRecId; /// Identifier of current record (not the CONTINUE ID). 390 sal_uInt16 mnAltContId; /// Alternative identifier for content continuation records. 391 392 sal_Int64 mnCurrRecSize; /// Helper for record size and position. 393 sal_Int64 mnComplRecSize; /// Size of complete record data (with CONTINUEs). 394 bool mbHasComplRec; /// True = mnComplRecSize is valid. 395 396 bool mbCont; /// True = automatic CONTINUE lookup enabled. 397 }; 398 399 // ============================================================================ 400 401 class BiffInputStreamPos 402 { 403 public: 404 explicit BiffInputStreamPos( BiffInputStream& rStrm ); 405 406 bool restorePosition(); 407 getStream()408 inline BiffInputStream& getStream() { return mrStrm; } 409 410 private: 411 BiffInputStream& mrStrm; 412 sal_Int64 mnRecHandle; 413 sal_Int64 mnRecPos; 414 }; 415 416 // ============================================================================ 417 418 /** Stores the current position of the passed stream on construction and 419 restores it automatically on destruction. */ 420 class BiffInputStreamPosGuard : private BiffInputStreamPos 421 { 422 public: 423 explicit BiffInputStreamPosGuard( BiffInputStream& rStrm ); 424 ~BiffInputStreamPosGuard(); 425 }; 426 427 // ============================================================================ 428 429 } // namespace xls 430 } // namespace oox 431 432 #endif 433