xref: /aoo41x/main/oox/source/xls/biffinputstream.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "oox/xls/biffinputstream.hxx"
29 
30 #include <algorithm>
31 #include <rtl/ustrbuf.hxx>
32 
33 namespace oox {
34 namespace xls {
35 
36 // ============================================================================
37 
38 using ::rtl::OString;
39 using ::rtl::OStringToOUString;
40 using ::rtl::OUString;
41 using ::rtl::OUStringBuffer;
42 
43 // ============================================================================
44 
45 namespace prv {
46 
47 BiffInputRecordBuffer::BiffInputRecordBuffer( BinaryInputStream& rInStrm ) :
48     mrInStrm( rInStrm ),
49     mpCurrentData( 0 ),
50     mnHeaderPos( -1 ),
51     mnBodyPos( 0 ),
52     mnBufferBodyPos( 0 ),
53     mnNextHeaderPos( 0 ),
54     mnRecId( BIFF_ID_UNKNOWN ),
55     mnRecSize( 0 ),
56     mnRecPos( 0 ),
57     mbValidHeader( false )
58 {
59     OSL_ENSURE( mrInStrm.isSeekable(), "BiffInputRecordBuffer::BiffInputRecordBuffer - stream must be seekable" );
60     mrInStrm.seekToStart();
61     maOriginalData.reserve( SAL_MAX_UINT16 );
62     maDecodedData.reserve( SAL_MAX_UINT16 );
63     enableDecoder( false );     // updates mpCurrentData
64 }
65 
66 void BiffInputRecordBuffer::restartAt( sal_Int64 nPos )
67 {
68     mnHeaderPos = -1;
69     mnBodyPos = mnBufferBodyPos = 0;
70     mnNextHeaderPos = nPos;
71     mnRecId = BIFF_ID_UNKNOWN;
72     mnRecSize = mnRecPos = 0;
73     mbValidHeader = false;
74 }
75 
76 void BiffInputRecordBuffer::setDecoder( const BiffDecoderRef& rxDecoder )
77 {
78     mxDecoder = rxDecoder;
79     enableDecoder( true );
80     updateDecoded();
81 }
82 
83 void BiffInputRecordBuffer::enableDecoder( bool bEnable )
84 {
85     mpCurrentData = (bEnable && mxDecoder.get() && mxDecoder->isValid()) ? &maDecodedData : &maOriginalData;
86 }
87 
88 bool BiffInputRecordBuffer::startRecord( sal_Int64 nHeaderPos )
89 {
90     mbValidHeader = (0 <= nHeaderPos) && (nHeaderPos + 4 <= mrInStrm.size());
91     if( mbValidHeader )
92     {
93         mnHeaderPos = nHeaderPos;
94         mrInStrm.seek( nHeaderPos );
95         mrInStrm >> mnRecId >> mnRecSize;
96         mnBodyPos = mrInStrm.tell();
97         mnNextHeaderPos = mnBodyPos + mnRecSize;
98         mbValidHeader = !mrInStrm.isEof() && (mnNextHeaderPos <= mrInStrm.size());
99     }
100     if( !mbValidHeader )
101     {
102         mnHeaderPos = mnBodyPos = -1;
103         mnNextHeaderPos = 0;
104         mnRecId = BIFF_ID_UNKNOWN;
105         mnRecSize = 0;
106     }
107     mnRecPos = 0;
108     return mbValidHeader;
109 }
110 
111 bool BiffInputRecordBuffer::startNextRecord()
112 {
113     return startRecord( mnNextHeaderPos );
114 }
115 
116 sal_uInt16 BiffInputRecordBuffer::getNextRecId()
117 {
118     sal_uInt16 nRecId = BIFF_ID_UNKNOWN;
119     if( mbValidHeader && (mnNextHeaderPos + 4 <= mrInStrm.size()) )
120     {
121         mrInStrm.seek( mnNextHeaderPos );
122         mrInStrm >> nRecId;
123     }
124     return nRecId;
125 }
126 
127 void BiffInputRecordBuffer::read( void* opData, sal_uInt16 nBytes )
128 {
129     updateBuffer();
130     OSL_ENSURE( nBytes > 0, "BiffInputRecordBuffer::read - nothing to read" );
131     OSL_ENSURE( nBytes <= getRecLeft(), "BiffInputRecordBuffer::read - buffer overflow" );
132     memcpy( opData, &(*mpCurrentData)[ mnRecPos ], nBytes );
133     mnRecPos = mnRecPos + nBytes;
134 }
135 
136 void BiffInputRecordBuffer::skip( sal_uInt16 nBytes )
137 {
138     OSL_ENSURE( nBytes > 0, "BiffInputRecordBuffer::skip - nothing to skip" );
139     OSL_ENSURE( nBytes <= getRecLeft(), "BiffInputRecordBuffer::skip - buffer overflow" );
140     mnRecPos = mnRecPos + nBytes;
141 }
142 
143 void BiffInputRecordBuffer::updateBuffer()
144 {
145     OSL_ENSURE( mbValidHeader, "BiffInputRecordBuffer::updateBuffer - invalid access" );
146     if( mnBodyPos != mnBufferBodyPos )
147     {
148         mrInStrm.seek( mnBodyPos );
149         maOriginalData.resize( mnRecSize );
150         if( mnRecSize > 0 )
151             mrInStrm.readMemory( &maOriginalData.front(), static_cast< sal_Int32 >( mnRecSize ) );
152         mnBufferBodyPos = mnBodyPos;
153         updateDecoded();
154     }
155 }
156 
157 void BiffInputRecordBuffer::updateDecoded()
158 {
159     if( mxDecoder.get() && mxDecoder->isValid() )
160     {
161         maDecodedData.resize( mnRecSize );
162         if( mnRecSize > 0 )
163             mxDecoder->decode( &maDecodedData.front(), &maOriginalData.front(), mnBodyPos, mnRecSize );
164     }
165 }
166 
167 } // namespace prv
168 
169 // ============================================================================
170 
171 BiffInputStream::BiffInputStream( BinaryInputStream& rInStream, bool bContLookup ) :
172     BinaryStreamBase( true ),
173     maRecBuffer( rInStream ),
174     mnRecHandle( -1 ),
175     mnRecId( BIFF_ID_UNKNOWN ),
176     mnAltContId( BIFF_ID_UNKNOWN ),
177     mnCurrRecSize( 0 ),
178     mnComplRecSize( 0 ),
179     mbHasComplRec( false ),
180     mbCont( bContLookup )
181 {
182     mbEof = true;   // EOF will be true if stream is not inside a record
183 }
184 
185 // record control -------------------------------------------------------------
186 
187 bool BiffInputStream::startNextRecord()
188 {
189     bool bValidRec = false;
190     /*  #i4266# ignore zero records (id==len==0) (e.g. the application
191         "Crystal Report" writes zero records between other records) */
192     bool bIsZeroRec = false;
193     do
194     {
195         // record header is never encrypted
196         maRecBuffer.enableDecoder( false );
197         // read header of next raw record, returns false at end of stream
198         bValidRec = maRecBuffer.startNextRecord();
199         // ignore record, if identifier and size are zero
200         bIsZeroRec = (maRecBuffer.getRecId() == 0) && (maRecBuffer.getRecSize() == 0);
201     }
202     while( bValidRec && ((mbCont && isContinueId( maRecBuffer.getRecId() )) || bIsZeroRec) );
203 
204     // setup other class members
205     setupRecord();
206     return isInRecord();
207 }
208 
209 bool BiffInputStream::startRecordByHandle( sal_Int64 nRecHandle )
210 {
211     rewindToRecord( nRecHandle );
212     return startNextRecord();
213 }
214 
215 void BiffInputStream::resetRecord( bool bContLookup, sal_uInt16 nAltContId )
216 {
217     if( isInRecord() )
218     {
219         mbCont = bContLookup;
220         mnAltContId = nAltContId;
221         restartRecord( true );
222         maRecBuffer.enableDecoder( true );
223     }
224 }
225 
226 void BiffInputStream::rewindRecord()
227 {
228     rewindToRecord( mnRecHandle );
229 }
230 
231 // decoder --------------------------------------------------------------------
232 
233 void BiffInputStream::setDecoder( const BiffDecoderRef& rxDecoder )
234 {
235     maRecBuffer.setDecoder( rxDecoder );
236 }
237 
238 void BiffInputStream::enableDecoder( bool bEnable )
239 {
240     maRecBuffer.enableDecoder( bEnable );
241 }
242 
243 // stream/record state and info -----------------------------------------------
244 
245 sal_uInt16 BiffInputStream::getNextRecId()
246 {
247     sal_uInt16 nRecId = BIFF_ID_UNKNOWN;
248     if( isInRecord() )
249     {
250         sal_Int64 nCurrPos = tell();            // save current position in record
251         while( jumpToNextContinue() ) {}        // skip following CONTINUE records
252         if( maRecBuffer.startNextRecord() )     // read header of next record
253             nRecId = maRecBuffer.getRecId();
254         seek( nCurrPos );                       // restore position, seek() resets old mbValid state
255     }
256     return nRecId;
257 }
258 
259 // BinaryStreamBase interface (seeking) ---------------------------------------
260 
261 sal_Int64 BiffInputStream::size() const
262 {
263     if( !mbHasComplRec )
264         const_cast< BiffInputStream* >( this )->calcRecordLength();
265     return mnComplRecSize;
266 }
267 
268 sal_Int64 BiffInputStream::tell() const
269 {
270     return mbEof ? -1 : (mnCurrRecSize - maRecBuffer.getRecLeft());
271 }
272 
273 void BiffInputStream::seek( sal_Int64 nRecPos )
274 {
275     if( isInRecord() )
276     {
277         if( mbEof || (nRecPos < tell()) )
278             restartRecord( false );
279         if( !mbEof && (nRecPos > tell()) )
280             skip( static_cast< sal_Int32 >( nRecPos - tell() ) );
281     }
282 }
283 
284 void BiffInputStream::close()
285 {
286 }
287 
288 sal_Int64 BiffInputStream::tellBase() const
289 {
290     return maRecBuffer.getBaseStream().tell();
291 }
292 
293 sal_Int64 BiffInputStream::sizeBase() const
294 {
295     return maRecBuffer.getBaseStream().size();
296 }
297 
298 // BinaryInputStream interface (stream read access) ---------------------------
299 
300 sal_Int32 BiffInputStream::readData( StreamDataSequence& orData, sal_Int32 nBytes, size_t nAtomSize )
301 {
302     sal_Int32 nRet = 0;
303     if( !mbEof )
304     {
305         orData.realloc( ::std::max< sal_Int32 >( nBytes, 0 ) );
306         if( nBytes > 0 )
307             nRet = readMemory( orData.getArray(), nBytes, nAtomSize );
308     }
309     return nRet;
310 }
311 
312 sal_Int32 BiffInputStream::readMemory( void* opMem, sal_Int32 nBytes, size_t nAtomSize )
313 {
314     sal_Int32 nRet = 0;
315     if( !mbEof && opMem && (nBytes > 0) )
316     {
317         sal_uInt8* pnBuffer = reinterpret_cast< sal_uInt8* >( opMem );
318         sal_Int32 nBytesLeft = nBytes;
319 
320         while( !mbEof && (nBytesLeft > 0) )
321         {
322             sal_uInt16 nReadSize = getMaxRawReadSize( nBytesLeft, nAtomSize );
323             // check nReadSize, stream may already be located at end of a raw record
324             if( nReadSize > 0 )
325             {
326                 maRecBuffer.read( pnBuffer, nReadSize );
327                 nRet += nReadSize;
328                 pnBuffer += nReadSize;
329                 nBytesLeft -= nReadSize;
330             }
331             if( nBytesLeft > 0 )
332                 jumpToNextContinue();
333             OSL_ENSURE( !mbEof, "BiffInputStream::readMemory - record overread" );
334         }
335     }
336     return nRet;
337 }
338 
339 void BiffInputStream::skip( sal_Int32 nBytes, size_t nAtomSize )
340 {
341     sal_Int32 nBytesLeft = nBytes;
342     while( !mbEof && (nBytesLeft > 0) )
343     {
344         sal_uInt16 nSkipSize = getMaxRawReadSize( nBytesLeft, nAtomSize );
345         // check nSkipSize, stream may already be located at end of a raw record
346         if( nSkipSize > 0 )
347         {
348             maRecBuffer.skip( nSkipSize );
349             nBytesLeft -= nSkipSize;
350         }
351         if( nBytesLeft > 0 )
352             jumpToNextContinue();
353         OSL_ENSURE( !mbEof, "BiffInputStream::skip - record overread" );
354     }
355 }
356 
357 // byte strings ---------------------------------------------------------------
358 
359 OString BiffInputStream::readByteString( bool b16BitLen, bool bAllowNulChars )
360 {
361     sal_Int32 nStrLen = b16BitLen ? readuInt16() : readuInt8();
362     return readCharArray( nStrLen, bAllowNulChars );
363 }
364 
365 OUString BiffInputStream::readByteStringUC( bool b16BitLen, rtl_TextEncoding eTextEnc, bool bAllowNulChars )
366 {
367     return OStringToOUString( readByteString( b16BitLen, bAllowNulChars ), eTextEnc );
368 }
369 
370 void BiffInputStream::skipByteString( bool b16BitLen )
371 {
372     skip( b16BitLen ? readuInt16() : readuInt8() );
373 }
374 
375 // Unicode strings ------------------------------------------------------------
376 
377 OUString BiffInputStream::readUniStringChars( sal_uInt16 nChars, bool b16BitChars, bool bAllowNulChars )
378 {
379     OUStringBuffer aBuffer;
380     aBuffer.ensureCapacity( nChars );
381 
382     /*  This function has to react on CONTINUE records which repeat the flags
383         field in their first byte and may change the 8bit/16bit character mode,
384         thus a plain call to readCompressedUnicodeArray() cannot be used here. */
385     sal_Int32 nCharsLeft = nChars;
386     while( !mbEof && (nCharsLeft > 0) )
387     {
388         /*  Read the character array from the remaining part of the current raw
389             record. First, calculate the maximum number of characters that can
390             be read without triggering to start a following CONTINUE record. */
391         sal_Int32 nRawChars = b16BitChars ? (getMaxRawReadSize( nCharsLeft * 2, 2 ) / 2) : getMaxRawReadSize( nCharsLeft, 1 );
392         aBuffer.append( readCompressedUnicodeArray( nRawChars, !b16BitChars, bAllowNulChars ) );
393 
394         /*  Prepare for next CONTINUE record. Calling jumpToNextStringContinue()
395             reads the leading byte in the following CONTINUE record and updates
396             the b16BitChars flag. */
397         nCharsLeft -= nRawChars;
398         if( nCharsLeft > 0 )
399             jumpToNextStringContinue( b16BitChars );
400     }
401 
402     return aBuffer.makeStringAndClear();
403 }
404 
405 OUString BiffInputStream::readUniStringBody( sal_uInt16 nChars, bool bAllowNulChars )
406 {
407     bool b16BitChars;
408     sal_Int32 nAddSize;
409     readUniStringHeader( b16BitChars, nAddSize );
410     OUString aString = readUniStringChars( nChars, b16BitChars, bAllowNulChars );
411     skip( nAddSize );
412     return aString;
413 }
414 
415 OUString BiffInputStream::readUniString( bool bAllowNulChars )
416 {
417     return readUniStringBody( readuInt16(), bAllowNulChars );
418 }
419 
420 void BiffInputStream::skipUniStringChars( sal_uInt16 nChars, bool b16BitChars )
421 {
422     sal_Int32 nCharsLeft = nChars;
423     while( !mbEof && (nCharsLeft > 0) )
424     {
425         // skip the character array
426         sal_Int32 nSkipSize = b16BitChars ? getMaxRawReadSize( 2 * nCharsLeft, 2 ) : getMaxRawReadSize( nCharsLeft, 1 );
427         skip( nSkipSize );
428 
429         // prepare for next CONTINUE record
430         nCharsLeft -= (b16BitChars ? (nSkipSize / 2) : nSkipSize);
431         if( nCharsLeft > 0 )
432             jumpToNextStringContinue( b16BitChars );
433     }
434 }
435 
436 void BiffInputStream::skipUniStringBody( sal_uInt16 nChars )
437 {
438     bool b16BitChars;
439     sal_Int32 nAddSize;
440     readUniStringHeader( b16BitChars, nAddSize );
441     skipUniStringChars( nChars, b16BitChars );
442     skip( nAddSize );
443 }
444 
445 void BiffInputStream::skipUniString()
446 {
447     skipUniStringBody( readuInt16() );
448 }
449 
450 // private --------------------------------------------------------------------
451 
452 void BiffInputStream::setupRecord()
453 {
454     // initialize class members
455     mnRecHandle = maRecBuffer.getRecHeaderPos();
456     mnRecId = maRecBuffer.getRecId();
457     mnAltContId = BIFF_ID_UNKNOWN;
458     mnCurrRecSize = mnComplRecSize = maRecBuffer.getRecSize();
459     mbHasComplRec = !mbCont;
460     mbEof = !isInRecord();
461     // enable decoder in new record
462     enableDecoder( true );
463 }
464 
465 void BiffInputStream::restartRecord( bool bInvalidateRecSize )
466 {
467     if( isInRecord() )
468     {
469         maRecBuffer.startRecord( getRecHandle() );
470         mnCurrRecSize = maRecBuffer.getRecSize();
471         if( bInvalidateRecSize )
472         {
473             mnComplRecSize = mnCurrRecSize;
474             mbHasComplRec = !mbCont;
475         }
476         mbEof = false;
477     }
478 }
479 
480 void BiffInputStream::rewindToRecord( sal_Int64 nRecHandle )
481 {
482     if( nRecHandle >= 0 )
483     {
484         maRecBuffer.restartAt( nRecHandle );
485         mnRecHandle = -1;
486         mbEof = true;   // as long as the record is not started
487     }
488 }
489 
490 bool BiffInputStream::isContinueId( sal_uInt16 nRecId ) const
491 {
492     return (nRecId == BIFF_ID_CONT) || (nRecId == mnAltContId);
493 }
494 
495 bool BiffInputStream::jumpToNextContinue()
496 {
497     mbEof = mbEof || !mbCont || !isContinueId( maRecBuffer.getNextRecId() ) || !maRecBuffer.startNextRecord();
498     if( !mbEof )
499         mnCurrRecSize += maRecBuffer.getRecSize();
500     return !mbEof;
501 }
502 
503 bool BiffInputStream::jumpToNextStringContinue( bool& rb16BitChars )
504 {
505     OSL_ENSURE( maRecBuffer.getRecLeft() == 0, "BiffInputStream::jumpToNextStringContinue - alignment error" );
506 
507     if( mbCont && (getRemaining() > 0) )
508     {
509         jumpToNextContinue();
510     }
511     else if( mnRecId == BIFF_ID_CONT )
512     {
513         /*  CONTINUE handling is off, but we have started reading in a CONTINUE
514             record -> start next CONTINUE for TXO import. We really start a new
515             record here - no chance to return to string origin. */
516         mbEof = mbEof || (maRecBuffer.getNextRecId() != BIFF_ID_CONT) || !maRecBuffer.startNextRecord();
517         if( !mbEof )
518             setupRecord();
519     }
520 
521     // trying to read the flags invalidates stream, if no CONTINUE record has been found
522     sal_uInt8 nFlags;
523     readValue( nFlags );
524     rb16BitChars = getFlag( nFlags, BIFF_STRF_16BIT );
525     return !mbEof;
526 }
527 
528 void BiffInputStream::calcRecordLength()
529 {
530     sal_Int64 nCurrPos = tell();            // save current position in record
531     while( jumpToNextContinue() ) {}        // jumpToNextContinue() adds up mnCurrRecSize
532     mnComplRecSize = mnCurrRecSize;
533     mbHasComplRec = true;
534     seek( nCurrPos );                       // restore position, seek() resets old mbValid state
535 }
536 
537 sal_uInt16 BiffInputStream::getMaxRawReadSize( sal_Int32 nBytes, size_t nAtomSize ) const
538 {
539     sal_uInt16 nMaxSize = getLimitedValue< sal_uInt16, sal_Int32 >( nBytes, 0, maRecBuffer.getRecLeft() );
540     if( (0 < nMaxSize) && (nMaxSize < nBytes) && (nAtomSize > 1) )
541     {
542         // check that remaining data in record buffer is a multiple of the passed atom size
543         sal_uInt16 nPadding = static_cast< sal_uInt16 >( nMaxSize % nAtomSize );
544         OSL_ENSURE( nPadding == 0, "BiffInputStream::getMaxRawReadSize - alignment error" );
545         nMaxSize = nMaxSize - nPadding;
546     }
547     return nMaxSize;
548 }
549 
550 void BiffInputStream::readUniStringHeader( bool& orb16BitChars, sal_Int32& ornAddSize )
551 {
552     sal_uInt8 nFlags = readuInt8();
553     OSL_ENSURE( !getFlag( nFlags, BIFF_STRF_UNKNOWN ), "BiffInputStream::readUniStringHeader - unknown flags" );
554     orb16BitChars = getFlag( nFlags, BIFF_STRF_16BIT );
555     sal_uInt16 nFontCount = getFlag( nFlags, BIFF_STRF_RICH ) ? readuInt16() : 0;
556     sal_Int32 nPhoneticSize = getFlag( nFlags, BIFF_STRF_PHONETIC ) ? readInt32() : 0;
557     ornAddSize = 4 * nFontCount + ::std::max< sal_Int32 >( 0, nPhoneticSize );
558 }
559 
560 // ============================================================================
561 
562 BiffInputStreamPos::BiffInputStreamPos( BiffInputStream& rStrm ) :
563     mrStrm( rStrm ),
564     mnRecHandle( rStrm.getRecHandle() ),
565     mnRecPos( rStrm.tell() )
566 {
567 }
568 
569 bool BiffInputStreamPos::restorePosition()
570 {
571     bool bValidRec = mrStrm.startRecordByHandle( mnRecHandle );
572     if( bValidRec )
573         mrStrm.seek( mnRecPos );
574     return bValidRec && !mrStrm.isEof();
575 }
576 
577 // ============================================================================
578 
579 BiffInputStreamPosGuard::BiffInputStreamPosGuard( BiffInputStream& rStrm ) :
580     BiffInputStreamPos( rStrm )
581 {
582 }
583 
584 BiffInputStreamPosGuard::~BiffInputStreamPosGuard()
585 {
586     restorePosition();
587 }
588 
589 // ============================================================================
590 
591 } // namespace xls
592 } // namespace oox
593