1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef INCLUDED_PDFI_PDFPARSE_HXX
29 #define INCLUDED_PDFI_PDFPARSE_HXX
30 
31 #include <sal/types.h>
32 #include <rtl/ustring.hxx>
33 #include <rtl/string.hxx>
34 
35 #include <vector>
36 #include <hash_map>
37 
38 namespace pdfparse
39 {
40 
41 struct EmitImplData;
42 struct PDFContainer;
43 class EmitContext
44 {
45     public:
46     virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
47     virtual unsigned int getCurPos() = 0;
48     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
49     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
50 
51     EmitContext( const PDFContainer* pTop = NULL );
52     virtual ~EmitContext();
53 
54     // set this to deflate contained streams
55     bool m_bDeflate;
56     // set this to decrypt the PDF file
57     bool m_bDecrypt;
58 
59     private:
60     friend struct PDFEntry;
61     EmitImplData* m_pImplData;
62 };
63 
64 struct PDFEntry
65 {
66     PDFEntry() {}
67     virtual ~PDFEntry();
68 
69     virtual bool emit( EmitContext& rWriteContext ) const = 0;
70     virtual PDFEntry* clone() const = 0;
71 
72     protected:
73     EmitImplData* getEmitData( EmitContext& rContext ) const;
74     void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
75 };
76 
77 struct PDFComment : public PDFEntry
78 {
79     rtl::OString  m_aComment;
80 
81     PDFComment( const rtl::OString& rComment )
82     : PDFEntry(), m_aComment( rComment ) {}
83     virtual ~PDFComment();
84     virtual bool emit( EmitContext& rWriteContext ) const;
85     virtual PDFEntry* clone() const;
86 };
87 
88 struct PDFValue : public PDFEntry
89 {
90     // abstract base class for simple values
91     PDFValue() : PDFEntry() {}
92     virtual ~PDFValue();
93 };
94 
95 struct PDFName : public PDFValue
96 {
97     rtl::OString  m_aName;
98 
99     PDFName( const rtl::OString& rName )
100     : PDFValue(), m_aName( rName ) {}
101     virtual ~PDFName();
102     virtual bool emit( EmitContext& rWriteContext ) const;
103     virtual PDFEntry* clone() const;
104 
105     rtl::OUString getFilteredName() const;
106 };
107 
108 struct PDFString : public PDFValue
109 {
110     rtl::OString  m_aString;
111 
112     PDFString( const rtl::OString& rString )
113     : PDFValue(), m_aString( rString ) {}
114     virtual ~PDFString();
115     virtual bool emit( EmitContext& rWriteContext ) const;
116     virtual PDFEntry* clone() const;
117 
118     rtl::OString getFilteredString() const;
119 };
120 
121 struct PDFNumber : public PDFValue
122 {
123     double m_fValue;
124 
125     PDFNumber( double fVal )
126     : PDFValue(), m_fValue( fVal ) {}
127     virtual ~PDFNumber();
128     virtual bool emit( EmitContext& rWriteContext ) const;
129     virtual PDFEntry* clone() const;
130 };
131 
132 struct PDFBool : public PDFValue
133 {
134     bool m_bValue;
135 
136     PDFBool( bool bVal )
137     : PDFValue(), m_bValue( bVal ) {}
138     virtual ~PDFBool();
139     virtual bool emit( EmitContext& rWriteContext ) const;
140     virtual PDFEntry* clone() const;
141 };
142 
143 struct PDFObjectRef : public PDFValue
144 {
145     unsigned int    m_nNumber;
146     unsigned int    m_nGeneration;
147 
148     PDFObjectRef( unsigned int nNr, unsigned int nGen )
149     : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
150     virtual ~PDFObjectRef();
151     virtual bool emit( EmitContext& rWriteContext ) const;
152     virtual PDFEntry* clone() const;
153 };
154 
155 struct PDFNull : public PDFValue
156 {
157     PDFNull() {}
158     virtual ~PDFNull();
159     virtual bool emit( EmitContext& rWriteContext ) const;
160     virtual PDFEntry* clone() const;
161 };
162 
163 struct PDFObject;
164 struct PDFContainer : public PDFEntry
165 {
166     sal_Int32              m_nOffset;
167     std::vector<PDFEntry*> m_aSubElements;
168 
169     // this is an abstract base class for identifying
170     // entries that can contain sub elements besides comments
171     PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
172     virtual ~PDFContainer();
173     virtual bool emitSubElements( EmitContext& rWriteContext ) const;
174     virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
175 
176     PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
177     PDFObject* findObject( PDFObjectRef* pRef ) const
178     { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
179 };
180 
181 struct PDFArray : public PDFContainer
182 {
183     PDFArray() {}
184     virtual ~PDFArray();
185     virtual bool emit( EmitContext& rWriteContext ) const;
186     virtual PDFEntry* clone() const;
187 };
188 
189 struct PDFDict : public PDFContainer
190 {
191     typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
192     Map m_aMap;
193 
194     PDFDict() {}
195     virtual ~PDFDict();
196     virtual bool emit( EmitContext& rWriteContext ) const;
197     virtual PDFEntry* clone() const;
198 
199     // inserting a value of NULL will remove rName and the previous value
200     // from the dictionary
201     void insertValue( const rtl::OString& rName, PDFEntry* pValue );
202     // removes a name/value pair from the dict
203     void eraseValue( const rtl::OString& rName );
204     // builds new map as of sub elements
205     // returns NULL if successfull, else the first offending element
206     PDFEntry* buildMap();
207 };
208 
209 struct PDFStream : public PDFEntry
210 {
211     unsigned int    m_nBeginOffset;
212     unsigned int    m_nEndOffset; // offset of the byte after the stream
213     PDFDict*        m_pDict;
214 
215     PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
216     : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
217     virtual ~PDFStream();
218     virtual bool emit( EmitContext& rWriteContext ) const;
219     virtual PDFEntry* clone() const;
220 
221     unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
222 };
223 
224 struct PDFTrailer : public PDFContainer
225 {
226     PDFDict*        m_pDict;
227 
228     PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
229     virtual ~PDFTrailer();
230     virtual bool emit( EmitContext& rWriteContext ) const;
231     virtual PDFEntry* clone() const;
232 };
233 
234 struct PDFFileImplData;
235 struct PDFFile : public PDFContainer
236 {
237     private:
238     mutable PDFFileImplData*    m_pData;
239     PDFFileImplData*            impl_getData() const;
240     public:
241     unsigned int        m_nMajor;           // PDF major
242     unsigned int        m_nMinor;           // PDF minor
243 
244     PDFFile()
245     : PDFContainer(),
246       m_pData( NULL ),
247       m_nMajor( 0 ), m_nMinor( 0 )
248     {}
249     virtual ~PDFFile();
250 
251     virtual bool emit( EmitContext& rWriteContext ) const;
252     virtual PDFEntry* clone() const;
253 
254     bool isEncrypted() const;
255     // this method checks whether rPwd is compatible with
256     // either user or owner password and sets up decrypt data in that case
257     // returns true if decryption can be done
258     bool setupDecryptionData( const rtl::OString& rPwd ) const;
259 
260     bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
261                   sal_uInt8* pOutBuffer,
262                   unsigned int nObject, unsigned int nGeneration ) const;
263 
264     rtl::OUString getDecryptionKey() const;
265 };
266 
267 struct PDFObject : public PDFContainer
268 {
269     PDFEntry*       m_pObject;
270     PDFStream*      m_pStream;
271     unsigned int    m_nNumber;
272     unsigned int    m_nGeneration;
273 
274     PDFObject( unsigned int nNr, unsigned int nGen )
275     : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
276     virtual ~PDFObject();
277     virtual bool emit( EmitContext& rWriteContext ) const;
278     virtual PDFEntry* clone() const;
279 
280     // writes only the contained stream, deflated if necessary
281     bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
282 
283     private:
284     // returns true if stream is deflated
285     // fills *ppStream and *pBytes with start of stream and count of bytes
286     // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
287     // fills in NULL and 0 in case of error
288     bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
289 };
290 
291 struct PDFPart : public PDFContainer
292 {
293     PDFPart() : PDFContainer() {}
294     virtual ~PDFPart();
295     virtual bool emit( EmitContext& rWriteContext ) const;
296     virtual PDFEntry* clone() const;
297 };
298 
299 class PDFReader
300 {
301     public:
302     PDFReader() {}
303     ~PDFReader() {}
304 
305     PDFEntry* read( const char* pFileName );
306     PDFEntry* read( const char* pBuffer, unsigned int nLen );
307 };
308 
309 } // namespace
310 
311 #endif
312