1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sdext.hxx"
30 
31 #if defined __SUNPRO_CC
32 #pragma disable_warn
33 #elif defined _MSC_VER
34 #pragma warning(push, 1)
35 #endif
36 
37 #include "pdfparse.hxx"
38 
39 // workaround windows compiler: do not include multi_pass.hpp
40 //#include <boost/spirit.hpp>
41 #include <boost/spirit/include/classic_core.hpp>
42 #include <boost/spirit/include/classic_utility.hpp>
43 #include <boost/spirit/include/classic_error_handling.hpp>
44 #include <boost/spirit/include/classic_file_iterator.hpp>
45 #include <boost/bind.hpp>
46 #include <string>
47 
48 #include <rtl/strbuf.hxx>
49 #include <rtl/memory.h>
50 #include <rtl/alloc.h>
51 
52 // disable warnings again because someone along the line has enabled them
53 #if defined __SUNPRO_CC
54 #pragma disable_warn
55 #elif defined _MSC_VER
56 #pragma warning(push, 1)
57 #endif
58 
59 using namespace boost::spirit;
60 using namespace rtl;
61 using namespace pdfparse;
62 
63 class StringEmitContext : public EmitContext
64 {
65     OStringBuffer m_aBuf;
66     public:
67     StringEmitContext() : EmitContext(), m_aBuf(256) {}
68     virtual ~StringEmitContext() {}
69     virtual bool write( const void* pBuf, unsigned int nLen ) throw()
70     {
71         m_aBuf.append( (const sal_Char*)pBuf, nLen );
72         return true;
73     }
74     virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
75     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
76     { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
77              write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
78     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
79     {
80         if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
81         {
82             rtl_copyMemory( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
83             return nLen;
84         }
85         return 0;
86     }
87 
88     OString getString() { return m_aBuf.makeStringAndClear(); }
89 };
90 
91 template< class iteratorT >
92 class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
93 {
94 public:
95 
96     PDFGrammar( const iteratorT& first )
97     : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
98     ~PDFGrammar()
99     {
100         if( !m_aObjectStack.empty() )
101             delete m_aObjectStack.front();
102     }
103 
104     double m_fDouble;
105     std::vector< unsigned int > m_aUIntStack;
106     std::vector< PDFEntry* >    m_aObjectStack;
107     rtl::OString                m_aErrorString;
108     iteratorT                   m_aGlobalBegin;
109 
110 public:
111     struct pdf_string_parser
112     {
113         typedef nil_t result_t;
114         template <typename ScannerT>
115         std::ptrdiff_t
116         operator()(ScannerT const& scan, result_t& result) const
117         {
118             std::ptrdiff_t len = 0;
119 
120             int nBraceLevel = 0;
121             while( ! scan.at_end() )
122             {
123                 char c = *scan;
124                 if( c == ')' )
125                 {
126                     nBraceLevel--;
127                     if( nBraceLevel < 0 )
128                         break;
129                 }
130                 else if( c == '(' )
131                     nBraceLevel++;
132                 else if( c == '\\' ) // ignore escaped braces
133                 {
134                     ++len;
135                     ++scan;
136                     if( scan.at_end() )
137                         break;
138                 }
139                 ++len;
140                 ++scan;
141             }
142             return scan.at_end() ? -1 : len;
143         }
144     };
145 
146     template< typename ScannerT >
147     struct definition
148     {
149         definition( const PDFGrammar<iteratorT>& rSelf )
150         {
151             PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
152 
153             // workaround workshop compiler: comment_p doesn't work
154             // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
155             comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
156 
157             boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
158 
159             // workaround workshop compiler: confix_p doesn't work
160             //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
161             stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
162 
163             name        = lexeme_d[
164                             ch_p('/')
165                             >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
166                                [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
167 
168             // workaround workshop compiler: confix_p doesn't work
169             //stringtype  = ( confix_p("(",*anychar_p, ")") |
170             //                confix_p("<",*xdigit_p,  ">") )
171             //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
172 
173             stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
174                             ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
175                           [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
176 
177             null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
178 
179             #ifdef USE_ASSIGN_ACTOR
180             objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
181                             >> uint_p[push_back_a(pSelf->m_aUIntStack)]
182                             >> ch_p('R')
183                             >> eps_p
184                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
185             #else
186             objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
187                             >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
188                             >> ch_p('R')
189                             >> eps_p
190                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
191             #endif
192 
193             #ifdef USE_ASSIGN_ACTOR
194             simple_type = objectref | name |
195                           ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
196                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
197                           | stringtype | boolean | null_object;
198             #else
199             simple_type = objectref | name |
200                           ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
201                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
202                           | stringtype | boolean | null_object;
203             #endif
204 
205             dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
206             dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
207 
208             array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
209             array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
210 
211             #ifdef USE_ASSIGN_ACTOR
212             object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
213                           >> uint_p[push_back_a(pSelf->m_aUIntStack)]
214                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
215             #else
216             object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
217                           >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
218                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
219             #endif
220             object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
221 
222             xref        = str_p( "xref" ) >> uint_p >> uint_p
223                           >> lexeme_d[
224                                 +( repeat_p(10)[digit_p]
225                                    >> blank_p
226                                    >> repeat_p(5)[digit_p]
227                                    >> blank_p
228                                    >> ( ch_p('n') | ch_p('f') )
229                                    >> repeat_p(2)[space_p]
230                                  ) ];
231 
232             dict_element= dict_begin | comment | simple_type
233                           | array_begin | array_end | dict_end;
234 
235             object      = object_begin
236                           >> *dict_element
237                           >> !stream
238                           >> object_end;
239 
240             trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
241                           >> *dict_element
242                           >> str_p("startxref")
243                           >> uint_p
244                           >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
245 
246             #ifdef USE_ASSIGN_ACTOR
247             pdfrule     = ! (lexeme_d[
248                                 str_p( "%PDF-" )
249                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
250                                 >> ch_p('.')
251                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
252                                 >> *((~ch_p('\r') & ~ch_p('\n')))
253                                 >> eol_p
254                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
255                           >> *( comment | object | ( xref >> trailer ) );
256             #else
257             pdfrule     = ! (lexeme_d[
258                                 str_p( "%PDF-" )
259                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
260                                 >> ch_p('.')
261                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
262                                 >> *((~ch_p('\r') & ~ch_p('\n')))
263                                 >> eol_p
264                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
265                           >> *( comment | object | ( xref >> trailer ) );
266             #endif
267         }
268         rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
269                          objectref, array, value, dict_element, dict_begin, dict_end,
270                          array_begin, array_end, object, object_begin, object_end,
271                          xref, trailer, pdfrule;
272 
273         const rule< ScannerT >& start() const { return pdfrule; }
274     };
275 
276     #ifndef USE_ASSIGN_ACTOR
277     void push_back_action_uint( unsigned int i )
278     {
279         m_aUIntStack.push_back( i );
280     }
281     void assign_action_double( double d )
282     {
283         m_fDouble = d;
284     }
285     #endif
286 
287     void parseError( const char* pMessage, iteratorT pLocation )
288     {
289         throw_( pLocation, pMessage );
290     }
291 
292     rtl::OString iteratorToString( iteratorT first, iteratorT last ) const
293     {
294         rtl::OStringBuffer aStr( 32 );
295         while( first != last )
296         {
297             aStr.append( *first );
298             ++first;
299         }
300         return aStr.makeStringAndClear();
301     }
302 
303     void haveFile( iteratorT pBegin, iteratorT /*pEnd*/ )
304     {
305         if( m_aObjectStack.empty() )
306         {
307             PDFFile* pFile = new PDFFile();
308             pFile->m_nMinor = m_aUIntStack.back();
309             m_aUIntStack.pop_back();
310             pFile->m_nMajor = m_aUIntStack.back();
311             m_aUIntStack.pop_back();
312             m_aObjectStack.push_back( pFile );
313         }
314         else
315             parseError( "found file header in unusual place", pBegin );
316     }
317 
318     void pushComment( iteratorT first, iteratorT last )
319     {
320         // add a comment to the current stack element
321         PDFComment* pComment =
322             new PDFComment(iteratorToString(first,last));
323         if( m_aObjectStack.empty() )
324             m_aObjectStack.push_back( new PDFPart() );
325         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
326         if( pContainer == NULL )
327             parseError( "comment without container", first );
328         pContainer->m_aSubElements.push_back( pComment );
329     }
330 
331     void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
332     {
333         PDFContainer* pContainer = NULL;
334         const char* pMsg = NULL;
335         if( ! m_aObjectStack.empty() &&
336             (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
337         {
338             if( dynamic_cast<PDFDict*>(pContainer) == NULL      &&
339                 dynamic_cast<PDFArray*>(pContainer) == NULL )
340             {
341                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
342                 if( pObj )
343                 {
344                     if( pObj->m_pObject == NULL )
345                         pObj->m_pObject = pNewValue;
346                     else
347                     {
348                         pMsg = "second value for object";
349                         pContainer = NULL;
350                     }
351                 }
352                 else if( dynamic_cast<PDFDict*>(pNewValue) )
353                 {
354                     PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
355                     if( pTrailer )
356                     {
357                         if( pTrailer->m_pDict == NULL )
358                             pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
359                         else
360                             pContainer = NULL;
361                     }
362                     else
363                         pContainer = NULL;
364                 }
365                 else
366                     pContainer = NULL;
367             }
368         }
369         if( pContainer )
370             pContainer->m_aSubElements.push_back( pNewValue );
371         else
372         {
373             if( ! pMsg )
374             {
375                 if( dynamic_cast<PDFContainer*>(pNewValue) )
376                     pMsg = "array without container";
377                 else
378                     pMsg = "value without container";
379             }
380             delete pNewValue;
381             parseError( pMsg, pPos );
382         }
383     }
384 
385     void pushName( iteratorT first, iteratorT last )
386     {
387         insertNewValue( new PDFName(iteratorToString(first,last)), first );
388     }
389 
390     void pushDouble( iteratorT first, iteratorT /*last*/ )
391     {
392         insertNewValue( new PDFNumber(m_fDouble), first );
393     }
394 
395     void pushString( iteratorT first, iteratorT last )
396     {
397         insertNewValue( new PDFString(iteratorToString(first,last)), first );
398     }
399 
400     void pushBool( iteratorT first, iteratorT last )
401     {
402         insertNewValue( new PDFBool( (last-first == 4) ), first );
403     }
404 
405     void pushNull( iteratorT first, iteratorT )
406     {
407         insertNewValue( new PDFNull(), first );
408     }
409 
410 
411     void beginObject( iteratorT first, iteratorT /*last*/ )
412     {
413         if( m_aObjectStack.empty() )
414             m_aObjectStack.push_back( new PDFPart() );
415 
416         unsigned int nGeneration = m_aUIntStack.back();
417         m_aUIntStack.pop_back();
418         unsigned int nObject = m_aUIntStack.back();
419         m_aUIntStack.pop_back();
420 
421         PDFObject* pObj = new PDFObject( nObject, nGeneration );
422         pObj->m_nOffset = first - m_aGlobalBegin;
423 
424         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
425         if( pContainer &&
426             ( dynamic_cast<PDFFile*>(pContainer) ||
427               dynamic_cast<PDFPart*>(pContainer) ) )
428         {
429             pContainer->m_aSubElements.push_back( pObj );
430             m_aObjectStack.push_back( pObj );
431         }
432         else
433             parseError( "object in wrong place", first );
434     }
435 
436     void endObject( iteratorT first, iteratorT )
437     {
438         if( m_aObjectStack.empty() )
439             parseError( "endobj without obj", first );
440         else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
441             parseError( "spurious endobj", first );
442         else
443             m_aObjectStack.pop_back();
444     }
445 
446     void pushObjectRef( iteratorT first, iteratorT )
447     {
448         unsigned int nGeneration = m_aUIntStack.back();
449         m_aUIntStack.pop_back();
450         unsigned int nObject = m_aUIntStack.back();
451         m_aUIntStack.pop_back();
452         insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
453     }
454 
455     void beginDict( iteratorT first, iteratorT )
456     {
457         PDFDict* pDict = new PDFDict();
458         pDict->m_nOffset = first - m_aGlobalBegin;
459 
460         insertNewValue( pDict, first );
461         // will not come here if insertion fails (exception)
462         m_aObjectStack.push_back( pDict );
463     }
464     void endDict( iteratorT first, iteratorT )
465     {
466         PDFDict* pDict = NULL;
467         if( m_aObjectStack.empty() )
468             parseError( "dictionary end without begin", first );
469         else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
470             parseError( "spurious dictionary end", first );
471         else
472             m_aObjectStack.pop_back();
473 
474         PDFEntry* pOffender = pDict->buildMap();
475         if( pOffender )
476         {
477             StringEmitContext aCtx;
478             aCtx.write( "offending dictionary element: ", 30 );
479             pOffender->emit( aCtx );
480             m_aErrorString = aCtx.getString();
481             parseError( m_aErrorString.getStr(), first );
482         }
483     }
484 
485     void beginArray( iteratorT first, iteratorT )
486     {
487         PDFArray* pArray = new PDFArray();
488         pArray->m_nOffset = first - m_aGlobalBegin;
489 
490         insertNewValue( pArray, first );
491         // will not come here if insertion fails (exception)
492         m_aObjectStack.push_back( pArray );
493     }
494 
495     void endArray( iteratorT first, iteratorT )
496     {
497         if( m_aObjectStack.empty() )
498             parseError( "array end without begin", first );
499         else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
500             parseError( "spurious array end", first );
501         else
502             m_aObjectStack.pop_back();
503     }
504 
505     void emitStream( iteratorT first, iteratorT last )
506     {
507         if( m_aObjectStack.empty() )
508             parseError( "stream without object", first );
509         PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
510         if( pObj && pObj->m_pObject )
511         {
512             if( pObj->m_pStream )
513                 parseError( "multiple streams in object", first );
514 
515             PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
516             if( pDict )
517             {
518                 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
519 
520                 pObj->m_pStream = pStream;
521                 pObj->m_aSubElements.push_back( pStream );
522             }
523         }
524         else
525             parseError( "stream without object", first );
526     }
527 
528     void beginTrailer( iteratorT first, iteratorT )
529     {
530         if( m_aObjectStack.empty() )
531             m_aObjectStack.push_back( new PDFPart() );
532 
533         PDFTrailer* pTrailer = new PDFTrailer();
534         pTrailer->m_nOffset = first - m_aGlobalBegin;
535 
536         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
537         if( pContainer &&
538             ( dynamic_cast<PDFFile*>(pContainer) ||
539               dynamic_cast<PDFPart*>(pContainer) ) )
540         {
541             pContainer->m_aSubElements.push_back( pTrailer );
542             m_aObjectStack.push_back( pTrailer );
543         }
544         else
545             parseError( "trailer in wrong place", first );
546     }
547 
548     void endTrailer( iteratorT first, iteratorT )
549     {
550         if( m_aObjectStack.empty() )
551             parseError( "%%EOF without trailer", first );
552         else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
553             parseError( "spurious %%EOF", first );
554         else
555             m_aObjectStack.pop_back();
556     }
557 };
558 
559 PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
560 {
561     PDFGrammar<const char*> aGrammar( pBuffer );
562 
563     try
564     {
565         boost::spirit::parse_info<const char*> aInfo =
566             boost::spirit::parse( pBuffer,
567                                   pBuffer+nLen,
568                                   aGrammar,
569                                   boost::spirit::space_p );
570         #if OSL_DEBUG_LEVEL > 1
571         fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n",
572                  aInfo.stop, pBuffer, aInfo.stop - pBuffer,
573                  aInfo.hit ? "true" : "false",
574                  aInfo.full ? "true" : "false",
575                  (int)aInfo.length );
576         #endif
577     }
578     catch( parser_error<const char*, const char*>& rError )
579     {
580         #if OSL_DEBUG_LEVEL > 1
581         fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
582                  rError.descriptor, rError.where - pBuffer );
583         unsigned int nElem = aGrammar.m_aObjectStack.size();
584         for( unsigned int i = 0; i < nElem; i++ )
585         {
586             fprintf( stderr, "   %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
587         }
588         #endif
589     }
590 
591     PDFEntry* pRet = NULL;
592     unsigned int nEntries = aGrammar.m_aObjectStack.size();
593     if( nEntries == 1 )
594     {
595         pRet = aGrammar.m_aObjectStack.back();
596         aGrammar.m_aObjectStack.pop_back();
597     }
598     #if OSL_DEBUG_LEVEL > 1
599     else if( nEntries > 1 )
600         fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
601     #endif
602 
603     return pRet;
604 }
605 
606 PDFEntry* PDFReader::read( const char* pFileName )
607 {
608     #ifdef WIN32
609     /* #i106583#
610        since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
611        C++ stdlib istream_iterator does not allow "-" apparently
612        using spirit 2.0 doesn't work in our environment with the MSC
613 
614        So for the time being bite the bullet and read the whole file.
615        FIXME: give Spirit 2.x another try when we upgrade boost again.
616     */
617     PDFEntry* pRet = NULL;
618     FILE* fp = fopen( pFileName, "rb" );
619     if( fp )
620     {
621         fseek( fp, 0, SEEK_END );
622         unsigned int nLen = (unsigned int)ftell( fp );
623         fseek( fp, 0, SEEK_SET );
624         char* pBuf = (char*)rtl_allocateMemory( nLen );
625         if( pBuf )
626         {
627             fread( pBuf, 1, nLen, fp );
628             pRet = read( pBuf, nLen );
629             rtl_freeMemory( pBuf );
630         }
631         fclose( fp );
632     }
633     return pRet;
634     #else
635     file_iterator<> file_start( pFileName );
636     if( ! file_start )
637         return NULL;
638     file_iterator<> file_end = file_start.make_end();
639     PDFGrammar< file_iterator<> > aGrammar( file_start );
640 
641     try
642     {
643         boost::spirit::parse_info< file_iterator<> > aInfo =
644             boost::spirit::parse( file_start,
645                                   file_end,
646                                   aGrammar,
647                                   boost::spirit::space_p );
648         #if OSL_DEBUG_LEVEL > 1
649         fprintf( stderr, "parseinfo: stop at offset = %d, hit = %s, full = %s, length = %d\n",
650                  aInfo.stop - file_start,
651                  aInfo.hit ? "true" : "false",
652                  aInfo.full ? "true" : "false",
653                  (int)aInfo.length );
654         #endif
655     }
656     catch( parser_error< const char*, file_iterator<> >& rError )
657     {
658         #if OSL_DEBUG_LEVEL > 1
659         fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
660                  rError.descriptor, rError.where - file_start );
661         unsigned int nElem = aGrammar.m_aObjectStack.size();
662         for( unsigned int i = 0; i < nElem; i++ )
663         {
664             fprintf( stderr, "   %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
665         }
666         #endif
667     }
668 
669     PDFEntry* pRet = NULL;
670     unsigned int nEntries = aGrammar.m_aObjectStack.size();
671     if( nEntries == 1 )
672     {
673         pRet = aGrammar.m_aObjectStack.back();
674         aGrammar.m_aObjectStack.pop_back();
675     }
676     #if OSL_DEBUG_LEVEL > 1
677     else if( nEntries > 1 )
678     {
679         fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
680         for( unsigned int i = 0; i < nEntries; i++ )
681         {
682             fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
683             PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
684             if( pObj )
685                 fprintf( stderr, "   -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration );
686             else
687                 fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
688         }
689     }
690     #endif
691     return pRet;
692     #endif // WIN32
693 }
694 
695 #if defined __SUNPRO_CC
696 #pragma enable_warn
697 #elif defined _MSC_VER
698 #pragma warning(pop)
699 #endif
700 
701 
702