xref: /aoo4110/main/oox/inc/oox/xls/biffinputstream.hxx (revision b1cdbd2c)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef OOX_XLS_BIFFINPUTSTREAM_HXX
25 #define OOX_XLS_BIFFINPUTSTREAM_HXX
26 
27 #include <vector>
28 #include "oox/helper/binaryinputstream.hxx"
29 #include "oox/xls/biffhelper.hxx"
30 #include "oox/xls/biffcodec.hxx"
31 
32 namespace rtl { class OUStringBuffer; }
33 
34 namespace oox {
35 namespace xls {
36 
37 // ============================================================================
38 
39 namespace prv {
40 
41 /** Buffers the contents of a raw record and encapsulates stream decoding. */
42 class BiffInputRecordBuffer
43 {
44 public:
45     explicit            BiffInputRecordBuffer( BinaryInputStream& rInStrm );
46 
47     /** Returns the wrapped binary base stream. */
getBaseStream() const48     inline const BinaryInputStream& getBaseStream() const { return mrInStrm; }
49 
50     /** Sets a decoder object and decrypts buffered record data. */
51     void                setDecoder( const BiffDecoderRef& rxDecoder );
52     /** Returns the current decoder object. */
getDecoder() const53     inline BiffDecoderRef getDecoder() const { return mxDecoder; }
54     /** Enables/disables usage of current decoder. */
55     void                enableDecoder( bool bEnable );
56 
57     /** Restarts the stream at the passed position. Buffer is invalid until the
58         next call of startRecord() or startNextRecord(). */
59     void                restartAt( sal_Int64 nPos );
60 
61     /** Reads the record header at the passed position. */
62     bool                startRecord( sal_Int64 nHeaderPos );
63     /** Reads the next record header from the stream. */
64     bool                startNextRecord();
65     /** Returns the start position of the record header in the core stream. */
66     sal_uInt16          getNextRecId();
67 
68     /** Returns the start position of the record header in the core stream. */
getRecHeaderPos() const69     inline sal_Int64    getRecHeaderPos() const { return mnHeaderPos; }
70     /** Returns the current record identifier. */
getRecId() const71     inline sal_uInt16   getRecId() const { return mnRecId; }
72     /** Returns the current record size. */
getRecSize() const73     inline sal_uInt16   getRecSize() const { return mnRecSize; }
74     /** Returns the current read position in the current record body. */
getRecPos() const75     inline sal_uInt16   getRecPos() const { return mnRecPos; }
76     /** Returns the number of remaining bytes in the current record body. */
getRecLeft() const77     inline sal_uInt16   getRecLeft() const { return mnRecSize - mnRecPos; }
78 
79     /** Reads nBytes bytes to the existing buffer opData. Must NOT overread the source buffer. */
80     void                read( void* opData, sal_uInt16 nBytes );
81     /** Ignores nBytes bytes. Must NOT overread the buffer. */
82     void                skip( sal_uInt16 nBytes );
83 
84 private:
85     /** Updates data buffer from stream, if needed. */
86     void                updateBuffer();
87     /** Updates decoded data from original data. */
88     void                updateDecoded();
89 
90 private:
91     typedef ::std::vector< sal_uInt8 > DataBuffer;
92 
93     BinaryInputStream&  mrInStrm;               /// Core input stream.
94     DataBuffer          maOriginalData;         /// Original data read from stream.
95     DataBuffer          maDecodedData;          /// Decoded data.
96     DataBuffer*         mpCurrentData;          /// Points to data buffer currently in use.
97     BiffDecoderRef      mxDecoder;              /// Decoder object.
98     sal_Int64           mnHeaderPos;            /// Stream start position of current record header.
99     sal_Int64           mnBodyPos;              /// Stream start position of current record body.
100     sal_Int64           mnBufferBodyPos;        /// Stream start position of buffered data.
101     sal_Int64           mnNextHeaderPos;        /// Stream start position of next record header.
102     sal_uInt16          mnRecId;                /// Current record identifier.
103     sal_uInt16          mnRecSize;              /// Current record size.
104     sal_uInt16          mnRecPos;               /// Current position in record body.
105     bool                mbValidHeader;          /// True = valid record header.
106 };
107 
108 } // namespace prv
109 
110 // ============================================================================
111 
112 /** This class is used to read BIFF record streams.
113 
114     An instance is constructed with a BinaryInputStream object. The passed
115     stream is reset to its start while constructing this stream.
116 
117     To start reading a record call startNextRecord(). Now it is possible to
118     read all contents of the record using operator>>() or any of the read***()
119     functions. If some data exceeds the record size limit, the stream looks for
120     a following CONTINUE record and jumps automatically to it. It is NOT
121     allowed that an atomic data type is split into two records (e.g. 4 bytes of
122     a double in one record and the other 4 bytes in a following CONTINUE).
123 
124     Trying to read over the record limits results in a stream error. The
125     isValid() function indicates that by returning false. From now on the data
126     returned by the read functions is undefined. The error state will be reset,
127     if the record is reset (with the function resetRecord()), or if the next
128     record is started.
129 
130     To switch off the automatic lookup of CONTINUE records, use resetRecord()
131     with false parameter. This is useful e.g. on import of drawing layer data,
132     where sometimes solely CONTINUE records will occur. The automatic lookup
133     keeps switched off until the method resetRecord() is called with parameter
134     true. All other settings done on the stream (e.g. alternative CONTINUE
135     record identifier, enabled decryption, NUL substitution character) will be
136     reset to default values, if a new record is started.
137 
138     The import stream supports decrypting the stream data. The contents of a
139     record (not the record header) will be encrypted by Excel if the file has
140     been stored with password protection. The functions setDecoder() and
141     enableDecoder() control the usage of the decryption algorithms.
142     setDecoder() sets a new decryption algorithm and initially enables it.
143     enableDecoder( false ) may be used to stop the usage of the decryption
144     temporarily (sometimes record contents are never encrypted, e.g. all BOF
145     records or the stream position in SHEET records). Decryption will be
146     reenabled automatically, if a new record is started with the function
147     startNextRecord().
148 */
149 class BiffInputStream : public BinaryInputStream
150 {
151 public:
152     /** Constructs the BIFF record stream using the passed binary stream.
153 
154         @param rInStream
155             The base input stream. Must be seekable. Will be seeked to its
156             start position.
157 
158         @param bContLookup  Automatic CONTINUE lookup on/off.
159      */
160     explicit            BiffInputStream(
161                             BinaryInputStream& rInStream,
162                             bool bContLookup = true );
163 
164     // record control ---------------------------------------------------------
165 
166     /** Sets stream pointer to the start of the next record content.
167 
168         Ignores all CONTINUE records of the current record, if automatic
169         CONTINUE usage is switched on.
170 
171         @return  False = no record found (end of stream).
172      */
173     bool                startNextRecord();
174 
175     /** Sets stream pointer to the start of the content of the specified record.
176 
177         The handle of the current record can be received and stored using the
178         function getRecHandle() for later usage with this function. The record
179         handle is equivalent to the position of the underlying binary stream,
180         thus the function can be used to perform a hard seek to a specific
181         position, if it is sure that a record starts exactly at this position.
182 
183         @return  False = no record found (invalid handle passed).
184      */
185     bool                startRecordByHandle( sal_Int64 nRecHandle );
186 
187     /** Sets stream pointer to begin of record content.
188 
189         @param bContLookup
190             Automatic CONTINUE lookup on/off. In difference to other stream
191             settings, this setting is persistent until next call of this
192             function (because it is wanted to receive the next CONTINUE records
193             separately).
194         @param nAltContId
195             Sets an alternative record identifier for content continuation.
196             This value is reset automatically when a new record is started with
197             startNextRecord().
198      */
199     void                resetRecord(
200                             bool bContLookup,
201                             sal_uInt16 nAltContId = BIFF_ID_UNKNOWN );
202 
203     /** Sets stream pointer before current record and invalidates stream.
204 
205         The next call to startNextRecord() will start again the current record.
206         This can be used in situations where a loop or a function leaves on a
207         specific record, but the parent context expects to start this record by
208         itself. The stream is invalid as long as the first record has not been
209         started (it is not allowed to call any other stream operation then).
210      */
211     void                rewindRecord();
212 
213     // decoder ----------------------------------------------------------------
214 
215     /** Sets a new decoder object.
216 
217         Enables decryption of record contents for the rest of the stream.
218      */
219     void                setDecoder( const BiffDecoderRef& rxDecoder );
220 
221     /** Enables/disables usage of current decoder.
222 
223         Decryption is reenabled automatically, if a new record is started using
224         the function startNextRecord().
225      */
226     void                enableDecoder( bool bEnable = true );
227 
228     // stream/record state and info -------------------------------------------
229 
230     /** Returns the current record identifier. */
getRecId() const231     inline sal_uInt16   getRecId() const { return mnRecId; }
232     /** Returns the record identifier of the following record. */
233     sal_uInt16          getNextRecId();
234 
235     /** Returns a unique handle for the current record that can be used with
236         the function startRecordByHandle(). */
getRecHandle() const237     inline sal_Int64    getRecHandle() const { return mnRecHandle; }
238 
239     // BinaryStreamBase interface (seeking) -----------------------------------
240 
241     /** Returns the data size of the whole record without record headers. */
242     virtual sal_Int64   size() const;
243     /** Returns the position inside of the whole record content. */
244     virtual sal_Int64   tell() const;
245     /** Seeks in record content to the specified position. */
246     virtual void        seek( sal_Int64 nRecPos );
247     /** Closes the input stream but not the wrapped stream. */
248     virtual void        close();
249 
250     /** Returns the absolute position in the wrapped binary stream. */
251     sal_Int64           tellBase() const;
252     /** Returns the total size of the wrapped binary stream. */
253     sal_Int64           sizeBase() const;
254 
255     // BinaryInputStream interface (stream read access) -----------------------
256 
257     /** Reads nBytes bytes to the passed sequence.
258         @return  Number of bytes really read. */
259     virtual sal_Int32   readData( StreamDataSequence& orData, sal_Int32 nBytes, size_t nAtomSize = 1 );
260     /** Reads nBytes bytes and copies them to the passed buffer opMem.
261         @return  Number of bytes really read. */
262     virtual sal_Int32   readMemory( void* opMem, sal_Int32 nBytes, size_t nAtomSize = 1 );
263     /** Seeks forward inside the current record. */
264     virtual void        skip( sal_Int32 nBytes, size_t nAtomSize = 1 );
265 
266     /** Stream operator for integral and floating-point types. */
267     template< typename Type >
operator >>(Type & ornValue)268     inline BiffInputStream& operator>>( Type& ornValue ) { readValue( ornValue ); return *this; }
269 
270     // byte strings -----------------------------------------------------------
271 
272     /** Reads 8/16 bit string length and character array, and returns the string.
273         @param b16BitLen
274             True = Read 16-bit string length field before the character array.
275             False = Read 8-bit string length field before the character array.
276         @param bAllowNulChars
277             True = NUL characters are inserted into the imported string.
278             False = NUL characters are replaced by question marks (default).
279      */
280     ::rtl::OString      readByteString( bool b16BitLen, bool bAllowNulChars = false );
281 
282     /** Reads 8/16 bit string length and character array, and returns a Unicode string.
283         @param b16BitLen
284             True = Read 16-bit string length field before the character array.
285             False = Read 8-bit string length field before the character array.
286         @param eTextEnc  The text encoding used to create the Unicode string.
287         @param bAllowNulChars
288             True = NUL characters are inserted into the imported string.
289             False = NUL characters are replaced by question marks (default).
290      */
291     ::rtl::OUString     readByteStringUC( bool b16BitLen, rtl_TextEncoding eTextEnc, bool bAllowNulChars = false );
292 
293     /** Ignores 8/16 bit string length and character array.
294         @param b16BitLen
295             True = Read 16-bit string length field before the character array.
296             False = Read 8-bit string length field before the character array.
297      */
298     void                skipByteString( bool b16BitLen );
299 
300     // Unicode strings --------------------------------------------------------
301 
302     /** Reads nChars characters of a BIFF8 string, and returns the string.
303         @param nChars  Number of characters to read from the stream.
304         @param b16BitChars
305             True = The character array contains 16-bit characters.
306             False = The character array contains truncated 8-bit characters.
307         @param bAllowNulChars
308             True = NUL characters are inserted into the imported string.
309             False = NUL characters are replaced by question marks (default).
310      */
311     ::rtl::OUString     readUniStringChars( sal_uInt16 nChars, bool b16BitChars, bool bAllowNulChars = false );
312 
313     /** Reads 8-bit flags, extended header, nChar characters, extended data of
314         a BIFF8 string, and returns the string.
315         @param nChars  Number of characters to read from the stream.
316         @param bAllowNulChars
317             True = NUL characters are inserted into the imported string.
318             False = NUL characters are replaced by question marks (default).
319      */
320     ::rtl::OUString     readUniStringBody( sal_uInt16 nChars, bool bAllowNulChars = false );
321 
322     /** Reads 16-bit character count, 8-bit flags, extended header, character
323         array, extended data of a BIFF8 string, and returns the string.
324         @param bAllowNulChars
325             True = NUL characters are inserted into the imported string.
326             False = NUL characters are replaced by question marks (default).
327      */
328     ::rtl::OUString     readUniString( bool bAllowNulChars = false );
329 
330     /** Ignores nChars characters of a BIFF8 string.
331         @param nChars  Number of characters to skip in the stream.
332         @param b16BitChars
333             True = The character array contains 16-bit characters.
334             False = The character array contains truncated 8-bit characters.
335      */
336     void                skipUniStringChars( sal_uInt16 nChars, bool b16BitChars );
337 
338     /** Ignores 8-bit flags, extended header, nChar characters, extended data
339         of a BIFF8 string.
340         @param nChars  Number of characters to skip in the stream.
341      */
342     void                skipUniStringBody( sal_uInt16 nChars );
343 
344     /** Ignores 16-bit character count, 8-bit flags, extended header, character
345         array, extended data of a BIFF8 string.
346      */
347     void                skipUniString();
348 
349     // ------------------------------------------------------------------------
350 private:
351     /** Initializes all members after base stream has been seeked to new record. */
352     void                setupRecord();
353     /** Restarts the current record from the beginning. */
354     void                restartRecord( bool bInvalidateRecSize );
355     /** Sets stream pointer before specified record and invalidates stream. */
356     void                rewindToRecord( sal_Int64 nRecHandle );
357     /** Returns true, if stream was able to start a valid record. */
isInRecord() const358     inline bool         isInRecord() const { return mnRecHandle >= 0; }
359 
360     /** Returns true, if the passed ID is real or alternative continuation record ID. */
361     bool                isContinueId( sal_uInt16 nRecId ) const;
362     /** Goes to start of the next CONTINUE record.
363         @descr  Stream must be located at the end of a raw record, and handling
364         of CONTINUE records must be enabled.
365         @return  True if next CONTINUE record has been found and initialized. */
366     bool                jumpToNextContinue();
367     /** Goes to start of the next CONTINUE record while reading strings.
368         @descr  Stream must be located at the end of a raw record. If reading
369         has been started in a CONTINUE record, jumps to an existing following
370         CONTINUE record, even if handling of CONTINUE records is disabled (this
371         is a special handling for TXO string data). Reads additional Unicode
372         flag byte at start of the new raw record and sets or resets rb16BitChars.
373         @return  True if next CONTINUE record has been found and initialized. */
374     bool                jumpToNextStringContinue( bool& rb16BitChars );
375     /** Calculates the complete length of the current record including CONTINUE
376         records, stores the length in mnComplRecSize. */
377     void                calcRecordLength();
378 
379     /** Returns the maximum size of raw data possible to read in one block. */
380     sal_uInt16          getMaxRawReadSize( sal_Int32 nBytes, size_t nAtomSize ) const;
381 
382     /** Reads the BIFF8 Unicode string header fields. */
383     void                readUniStringHeader( bool& orb16BitChars, sal_Int32& ornAddSize );
384 
385 private:
386     prv::BiffInputRecordBuffer maRecBuffer; /// Raw record data buffer.
387 
388     sal_Int64           mnRecHandle;        /// Handle of current record.
389     sal_uInt16          mnRecId;            /// Identifier of current record (not the CONTINUE ID).
390     sal_uInt16          mnAltContId;        /// Alternative identifier for content continuation records.
391 
392     sal_Int64           mnCurrRecSize;      /// Helper for record size and position.
393     sal_Int64           mnComplRecSize;     /// Size of complete record data (with CONTINUEs).
394     bool                mbHasComplRec;      /// True = mnComplRecSize is valid.
395 
396     bool                mbCont;             /// True = automatic CONTINUE lookup enabled.
397 };
398 
399 // ============================================================================
400 
401 class BiffInputStreamPos
402 {
403 public:
404     explicit            BiffInputStreamPos( BiffInputStream& rStrm );
405 
406     bool                restorePosition();
407 
getStream()408     inline BiffInputStream& getStream() { return mrStrm; }
409 
410 private:
411     BiffInputStream&    mrStrm;
412     sal_Int64           mnRecHandle;
413     sal_Int64           mnRecPos;
414 };
415 
416 // ============================================================================
417 
418 /** Stores the current position of the passed stream on construction and
419     restores it automatically on destruction. */
420 class BiffInputStreamPosGuard : private BiffInputStreamPos
421 {
422 public:
423     explicit            BiffInputStreamPosGuard( BiffInputStream& rStrm );
424                         ~BiffInputStreamPosGuard();
425 };
426 
427 // ============================================================================
428 
429 } // namespace xls
430 } // namespace oox
431 
432 #endif
433