1*c142477cSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*c142477cSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*c142477cSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*c142477cSAndrew Rist  * distributed with this work for additional information
6*c142477cSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*c142477cSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*c142477cSAndrew Rist  * "License"); you may not use this file except in compliance
9*c142477cSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*c142477cSAndrew Rist  *
11*c142477cSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*c142477cSAndrew Rist  *
13*c142477cSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*c142477cSAndrew Rist  * software distributed under the License is distributed on an
15*c142477cSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*c142477cSAndrew Rist  * KIND, either express or implied.  See the License for the
17*c142477cSAndrew Rist  * specific language governing permissions and limitations
18*c142477cSAndrew Rist  * under the License.
19*c142477cSAndrew Rist  *
20*c142477cSAndrew Rist  *************************************************************/
21*c142477cSAndrew Rist 
22*c142477cSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_sdext.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <stdio.h>
28cdf0e10cSrcweir #include <sal/main.h>
29cdf0e10cSrcweir #include <osl/file.h>
30cdf0e10cSrcweir #include <osl/thread.h>
31cdf0e10cSrcweir #include <rtl/alloc.h>
32cdf0e10cSrcweir #include <rtl/ustring.hxx>
33cdf0e10cSrcweir #include <rtl/strbuf.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir #include "pdfparse.hxx"
36cdf0e10cSrcweir 
37cdf0e10cSrcweir using namespace rtl;
38cdf0e10cSrcweir using namespace pdfparse;
39cdf0e10cSrcweir 
printHelp(const char * pExe)40cdf0e10cSrcweir void printHelp( const char* pExe )
41cdf0e10cSrcweir {
42cdf0e10cSrcweir     fprintf( stdout,
43cdf0e10cSrcweir     "USAGE: %s [-h,--help]\n"
44cdf0e10cSrcweir     "       %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
45cdf0e10cSrcweir     "       %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
46cdf0e10cSrcweir     "       %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
47cdf0e10cSrcweir     "       %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
48cdf0e10cSrcweir     "  -h, --help: show help\n"
49cdf0e10cSrcweir     "  -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
50cdf0e10cSrcweir     "      and prints the mimetype found to stdout\n"
51cdf0e10cSrcweir     "  -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
52cdf0e10cSrcweir     "  -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
53cdf0e10cSrcweir     "      object numbers, where object number and generation number are separated by \':\'\n"
54cdf0e10cSrcweir     "      an omitted generation number defaults to 0\n"
55cdf0e10cSrcweir     "  -pw, --password: use password for decryption\n"
56cdf0e10cSrcweir     "\n"
57cdf0e10cSrcweir     "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
58cdf0e10cSrcweir     , pExe, pExe, pExe, pExe, pExe );
59cdf0e10cSrcweir }
60cdf0e10cSrcweir 
61cdf0e10cSrcweir class FileEmitContext : public EmitContext
62cdf0e10cSrcweir {
63cdf0e10cSrcweir     oslFileHandle m_aHandle;
64cdf0e10cSrcweir     oslFileHandle m_aReadHandle;
65cdf0e10cSrcweir     unsigned int  m_nReadLen;
66cdf0e10cSrcweir 
67cdf0e10cSrcweir     void openReadFile( const char* pOrigName );
68cdf0e10cSrcweir 
69cdf0e10cSrcweir     public:
70cdf0e10cSrcweir     FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
71cdf0e10cSrcweir     virtual ~FileEmitContext();
72cdf0e10cSrcweir 
73cdf0e10cSrcweir     virtual bool write( const void* pBuf, unsigned int nLen ) throw();
74cdf0e10cSrcweir     virtual unsigned int getCurPos() throw();
75cdf0e10cSrcweir     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw();
76cdf0e10cSrcweir     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw();
77cdf0e10cSrcweir };
78cdf0e10cSrcweir 
FileEmitContext(const char * pFileName,const char * pOrigName,const PDFContainer * pTop)79cdf0e10cSrcweir FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
80cdf0e10cSrcweir     : EmitContext( pTop ),
81cdf0e10cSrcweir       m_aHandle( NULL ),
82cdf0e10cSrcweir       m_aReadHandle( NULL ),
83cdf0e10cSrcweir       m_nReadLen( 0 )
84cdf0e10cSrcweir {
85cdf0e10cSrcweir     OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) );
86cdf0e10cSrcweir     OUString aURL;
87cdf0e10cSrcweir     if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
88cdf0e10cSrcweir     {
89cdf0e10cSrcweir         fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
90cdf0e10cSrcweir         return;
91cdf0e10cSrcweir     }
92cdf0e10cSrcweir 
93cdf0e10cSrcweir     if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
94cdf0e10cSrcweir     {
95cdf0e10cSrcweir         if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
96cdf0e10cSrcweir         {
97cdf0e10cSrcweir             fprintf( stderr, "could not truncate %s\n", pFileName );
98cdf0e10cSrcweir             osl_closeFile( m_aHandle );
99cdf0e10cSrcweir             m_aHandle = NULL;
100cdf0e10cSrcweir         }
101cdf0e10cSrcweir     }
102cdf0e10cSrcweir     else if( osl_openFile( aURL.pData, &m_aHandle,
103cdf0e10cSrcweir             osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
104cdf0e10cSrcweir     {
105cdf0e10cSrcweir         fprintf( stderr, "could not open %s\n", pFileName );
106cdf0e10cSrcweir         return;
107cdf0e10cSrcweir     }
108cdf0e10cSrcweir     m_bDeflate = true;
109cdf0e10cSrcweir 
110cdf0e10cSrcweir     openReadFile( pOrigName );
111cdf0e10cSrcweir }
112cdf0e10cSrcweir 
~FileEmitContext()113cdf0e10cSrcweir FileEmitContext::~FileEmitContext()
114cdf0e10cSrcweir {
115cdf0e10cSrcweir     if( m_aHandle )
116cdf0e10cSrcweir         osl_closeFile( m_aHandle );
117cdf0e10cSrcweir     if( m_aReadHandle )
118cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
119cdf0e10cSrcweir }
120cdf0e10cSrcweir 
openReadFile(const char * pInFile)121cdf0e10cSrcweir void FileEmitContext::openReadFile( const char* pInFile )
122cdf0e10cSrcweir {
123cdf0e10cSrcweir     OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) );
124cdf0e10cSrcweir     OUString aURL;
125cdf0e10cSrcweir     if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
126cdf0e10cSrcweir     {
127cdf0e10cSrcweir         fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
128cdf0e10cSrcweir         return;
129cdf0e10cSrcweir     }
130cdf0e10cSrcweir 
131cdf0e10cSrcweir     if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
132cdf0e10cSrcweir     {
133cdf0e10cSrcweir         fprintf( stderr, "could not open %s\n", pInFile );
134cdf0e10cSrcweir         return;
135cdf0e10cSrcweir     }
136cdf0e10cSrcweir 
137cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
138cdf0e10cSrcweir     {
139cdf0e10cSrcweir         fprintf( stderr, "could not seek to end of %s\n", pInFile );
140cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
141cdf0e10cSrcweir         return;
142cdf0e10cSrcweir     }
143cdf0e10cSrcweir 
144cdf0e10cSrcweir     sal_uInt64 nFileSize = 0;
145cdf0e10cSrcweir     if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
146cdf0e10cSrcweir     {
147cdf0e10cSrcweir         fprintf( stderr, "could not get end pos of %s\n", pInFile );
148cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
149cdf0e10cSrcweir         return;
150cdf0e10cSrcweir     }
151cdf0e10cSrcweir 
152cdf0e10cSrcweir     m_nReadLen = static_cast<unsigned int>(nFileSize);
153cdf0e10cSrcweir }
154cdf0e10cSrcweir 
write(const void * pBuf,unsigned int nLen)155cdf0e10cSrcweir bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw()
156cdf0e10cSrcweir {
157cdf0e10cSrcweir     if( ! m_aHandle )
158cdf0e10cSrcweir         return false;
159cdf0e10cSrcweir 
160cdf0e10cSrcweir     sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
161cdf0e10cSrcweir     sal_uInt64 nWritten = 0;
162cdf0e10cSrcweir     return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
163cdf0e10cSrcweir            && nWrite == nWritten;
164cdf0e10cSrcweir }
165cdf0e10cSrcweir 
getCurPos()166cdf0e10cSrcweir unsigned int FileEmitContext::getCurPos() throw()
167cdf0e10cSrcweir {
168cdf0e10cSrcweir     sal_uInt64 nFileSize = 0;
169cdf0e10cSrcweir     if( m_aHandle )
170cdf0e10cSrcweir     {
171cdf0e10cSrcweir         if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
172cdf0e10cSrcweir             nFileSize = 0;
173cdf0e10cSrcweir     }
174cdf0e10cSrcweir     return static_cast<unsigned int>(nFileSize);
175cdf0e10cSrcweir }
176cdf0e10cSrcweir 
copyOrigBytes(unsigned int nOrigOffset,unsigned int nLen)177cdf0e10cSrcweir bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
178cdf0e10cSrcweir {
179cdf0e10cSrcweir     if( nOrigOffset + nLen > m_nReadLen )
180cdf0e10cSrcweir         return false;
181cdf0e10cSrcweir 
182cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
183cdf0e10cSrcweir     {
184cdf0e10cSrcweir         fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
185cdf0e10cSrcweir         return false;
186cdf0e10cSrcweir     }
187cdf0e10cSrcweir     void* pBuf = rtl_allocateMemory( nLen );
188cdf0e10cSrcweir     if( ! pBuf )
189cdf0e10cSrcweir         return false;
190cdf0e10cSrcweir     sal_uInt64 nBytesRead = 0;
191cdf0e10cSrcweir     if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
192cdf0e10cSrcweir         || nBytesRead != static_cast<sal_uInt64>(nLen) )
193cdf0e10cSrcweir     {
194cdf0e10cSrcweir         fprintf( stderr, "could not read %u bytes\n", nLen );
195cdf0e10cSrcweir         rtl_freeMemory( pBuf );
196cdf0e10cSrcweir         return false;
197cdf0e10cSrcweir     }
198cdf0e10cSrcweir     bool bRet = write( pBuf, nLen );
199cdf0e10cSrcweir     rtl_freeMemory( pBuf );
200cdf0e10cSrcweir     return bRet;
201cdf0e10cSrcweir }
202cdf0e10cSrcweir 
readOrigBytes(unsigned int nOrigOffset,unsigned int nLen,void * pBuf)203cdf0e10cSrcweir unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
204cdf0e10cSrcweir {
205cdf0e10cSrcweir     if( nOrigOffset + nLen > m_nReadLen )
206cdf0e10cSrcweir         return 0;
207cdf0e10cSrcweir 
208cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
209cdf0e10cSrcweir     {
210cdf0e10cSrcweir         fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
211cdf0e10cSrcweir         return 0;
212cdf0e10cSrcweir     }
213cdf0e10cSrcweir     sal_uInt64 nBytesRead = 0;
214cdf0e10cSrcweir     if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
215cdf0e10cSrcweir         return 0;
216cdf0e10cSrcweir     return static_cast<unsigned int>(nBytesRead);
217cdf0e10cSrcweir }
218cdf0e10cSrcweir 
219cdf0e10cSrcweir typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
220cdf0e10cSrcweir 
handleFile(const char * pInFile,const char * pOutFile,const char * pPassword,PDFFileHdl pHdl)221cdf0e10cSrcweir int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
222cdf0e10cSrcweir {
223cdf0e10cSrcweir 
224cdf0e10cSrcweir     PDFReader aParser;
225cdf0e10cSrcweir     int nRet = 0;
226cdf0e10cSrcweir     PDFEntry* pEntry = aParser.read( pInFile );
227cdf0e10cSrcweir     if( pEntry )
228cdf0e10cSrcweir     {
229cdf0e10cSrcweir         PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry);
230cdf0e10cSrcweir         if( pPDFFile )
231cdf0e10cSrcweir         {
232cdf0e10cSrcweir             fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
233cdf0e10cSrcweir             if( pPassword )
234cdf0e10cSrcweir                 fprintf( stdout, "password %s\n",
235cdf0e10cSrcweir                          pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
236cdf0e10cSrcweir             nRet = pHdl( pInFile, pOutFile, pPDFFile );
237cdf0e10cSrcweir         }
238cdf0e10cSrcweir         else
239cdf0e10cSrcweir             nRet = 20;
240cdf0e10cSrcweir         delete pEntry;
241cdf0e10cSrcweir     }
242cdf0e10cSrcweir     return nRet;
243cdf0e10cSrcweir }
244cdf0e10cSrcweir 
write_unzipFile(const char * pInFile,const char * pOutFile,PDFFile * pPDFFile)245cdf0e10cSrcweir int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
246cdf0e10cSrcweir {
247cdf0e10cSrcweir     FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
248cdf0e10cSrcweir     aContext.m_bDecrypt = pPDFFile->isEncrypted();
249cdf0e10cSrcweir     pPDFFile->emit(aContext);
250cdf0e10cSrcweir     return 0;
251cdf0e10cSrcweir }
252cdf0e10cSrcweir 
write_addStreamArray(const char * pOutFile,PDFArray * pStreams,PDFFile * pPDFFile,const char * pInFile)253cdf0e10cSrcweir int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
254cdf0e10cSrcweir {
255cdf0e10cSrcweir     int nRet = 0;
256cdf0e10cSrcweir     unsigned int nArrayElements = pStreams->m_aSubElements.size();
257cdf0e10cSrcweir     for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
258cdf0e10cSrcweir     {
259cdf0e10cSrcweir         PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]);
260cdf0e10cSrcweir         PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]);
261cdf0e10cSrcweir         if( ! pMimeType )
262cdf0e10cSrcweir             fprintf( stderr, "error: no mimetype element\n" );
263cdf0e10cSrcweir         if( ! pStreamRef )
264cdf0e10cSrcweir             fprintf( stderr, "error: no stream ref element\n" );
265cdf0e10cSrcweir         if( pMimeType && pStreamRef )
266cdf0e10cSrcweir         {
267cdf0e10cSrcweir             fprintf( stdout, "found stream %d %d with mimetype %s\n",
268cdf0e10cSrcweir                      pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
269cdf0e10cSrcweir                      pMimeType->m_aName.getStr() );
270cdf0e10cSrcweir             PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
271cdf0e10cSrcweir             if( pObject )
272cdf0e10cSrcweir             {
273cdf0e10cSrcweir                 rtl::OStringBuffer aOutStream( pOutFile );
274cdf0e10cSrcweir                 aOutStream.append( "_stream_" );
275cdf0e10cSrcweir                 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
276cdf0e10cSrcweir                 aOutStream.append( "_" );
277cdf0e10cSrcweir                 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
278cdf0e10cSrcweir                 FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
279cdf0e10cSrcweir                 aContext.m_bDecrypt = pPDFFile->isEncrypted();
280cdf0e10cSrcweir                 pObject->writeStream( aContext, pPDFFile );
281cdf0e10cSrcweir             }
282cdf0e10cSrcweir             else
283cdf0e10cSrcweir             {
284cdf0e10cSrcweir                 fprintf( stderr, "object not found\n" );
285cdf0e10cSrcweir                 nRet = 121;
286cdf0e10cSrcweir             }
287cdf0e10cSrcweir         }
288cdf0e10cSrcweir         else
289cdf0e10cSrcweir             nRet = 120;
290cdf0e10cSrcweir     }
291cdf0e10cSrcweir     return nRet;
292cdf0e10cSrcweir }
293cdf0e10cSrcweir 
write_addStreams(const char * pInFile,const char * pOutFile,PDFFile * pPDFFile)294cdf0e10cSrcweir int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
295cdf0e10cSrcweir {
296cdf0e10cSrcweir     // find all trailers
297cdf0e10cSrcweir     int nRet = 0;
298cdf0e10cSrcweir     unsigned int nElements = pPDFFile->m_aSubElements.size();
299cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
300cdf0e10cSrcweir     {
301cdf0e10cSrcweir         PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]);
302cdf0e10cSrcweir         if( pTrailer && pTrailer->m_pDict )
303cdf0e10cSrcweir         {
304cdf0e10cSrcweir             // search for AdditionalStreams entry
305cdf0e10cSrcweir             std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator add_stream;
306cdf0e10cSrcweir             add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" );
307cdf0e10cSrcweir             if( add_stream != pTrailer->m_pDict->m_aMap.end() )
308cdf0e10cSrcweir             {
309cdf0e10cSrcweir                 PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
310cdf0e10cSrcweir                 if( pStreams )
311cdf0e10cSrcweir                     nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
312cdf0e10cSrcweir             }
313cdf0e10cSrcweir         }
314cdf0e10cSrcweir     }
315cdf0e10cSrcweir     return nRet;
316cdf0e10cSrcweir }
317cdf0e10cSrcweir 
write_fonts(const char * i_pInFile,const char * i_pOutFile,PDFFile * i_pPDFFile)318cdf0e10cSrcweir int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
319cdf0e10cSrcweir {
320cdf0e10cSrcweir     int nRet = 0;
321cdf0e10cSrcweir     unsigned int nElements = i_pPDFFile->m_aSubElements.size();
322cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
323cdf0e10cSrcweir     {
324cdf0e10cSrcweir         // search FontDescriptors
325cdf0e10cSrcweir         PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
326cdf0e10cSrcweir         if( ! pObj )
327cdf0e10cSrcweir             continue;
328cdf0e10cSrcweir         PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
329cdf0e10cSrcweir         if( ! pDict )
330cdf0e10cSrcweir             continue;
331cdf0e10cSrcweir 
332cdf0e10cSrcweir         std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it =
333cdf0e10cSrcweir                 pDict->m_aMap.find( "Type" );
334cdf0e10cSrcweir         if( map_it == pDict->m_aMap.end() )
335cdf0e10cSrcweir             continue;
336cdf0e10cSrcweir 
337cdf0e10cSrcweir         PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
338cdf0e10cSrcweir         if( ! pName )
339cdf0e10cSrcweir             continue;
340cdf0e10cSrcweir         if( ! pName->m_aName.equals( "FontDescriptor" ) )
341cdf0e10cSrcweir             continue;
342cdf0e10cSrcweir 
343cdf0e10cSrcweir         // the font name will be helpful, also there must be one in
344cdf0e10cSrcweir         // a font descriptor
345cdf0e10cSrcweir         map_it = pDict->m_aMap.find( "FontName" );
346cdf0e10cSrcweir         if( map_it == pDict->m_aMap.end() )
347cdf0e10cSrcweir             continue;
348cdf0e10cSrcweir         pName = dynamic_cast<PDFName*>(map_it->second);
349cdf0e10cSrcweir         if( ! pName )
350cdf0e10cSrcweir             continue;
351cdf0e10cSrcweir         rtl::OString aFontName( pName->m_aName );
352cdf0e10cSrcweir 
353cdf0e10cSrcweir         PDFObjectRef* pStreamRef = 0;
354cdf0e10cSrcweir         const char* pFileType = NULL;
355cdf0e10cSrcweir         // we have a font descriptor, try for a type 1 font
356cdf0e10cSrcweir         map_it = pDict->m_aMap.find( "FontFile" );
357cdf0e10cSrcweir         if( map_it != pDict->m_aMap.end() )
358cdf0e10cSrcweir         {
359cdf0e10cSrcweir             pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
360cdf0e10cSrcweir             if( pStreamRef )
361cdf0e10cSrcweir                 pFileType = "pfa";
362cdf0e10cSrcweir         }
363cdf0e10cSrcweir 
364cdf0e10cSrcweir         // perhaps it's a truetype file ?
365cdf0e10cSrcweir         if( ! pStreamRef )
366cdf0e10cSrcweir         {
367cdf0e10cSrcweir             map_it  = pDict->m_aMap.find( "FontFile2" );
368cdf0e10cSrcweir             if( map_it != pDict->m_aMap.end() )
369cdf0e10cSrcweir             {
370cdf0e10cSrcweir                 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
371cdf0e10cSrcweir                 if( pStreamRef )
372cdf0e10cSrcweir                     pFileType = "ttf";
373cdf0e10cSrcweir             }
374cdf0e10cSrcweir         }
375cdf0e10cSrcweir 
376cdf0e10cSrcweir         if( ! pStreamRef )
377cdf0e10cSrcweir             continue;
378cdf0e10cSrcweir 
379cdf0e10cSrcweir         PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
380cdf0e10cSrcweir         if( ! pStream )
381cdf0e10cSrcweir             continue;
382cdf0e10cSrcweir 
383cdf0e10cSrcweir         rtl::OStringBuffer aOutStream( i_pOutFile );
384cdf0e10cSrcweir         aOutStream.append( "_font_" );
385cdf0e10cSrcweir         aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
386cdf0e10cSrcweir         aOutStream.append( "_" );
387cdf0e10cSrcweir         aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
388cdf0e10cSrcweir         aOutStream.append( "_" );
389cdf0e10cSrcweir         aOutStream.append( aFontName );
390cdf0e10cSrcweir         if( pFileType )
391cdf0e10cSrcweir         {
392cdf0e10cSrcweir             aOutStream.append( "." );
393cdf0e10cSrcweir             aOutStream.append( pFileType );
394cdf0e10cSrcweir         }
395cdf0e10cSrcweir         FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
396cdf0e10cSrcweir         aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
397cdf0e10cSrcweir         pStream->writeStream( aContext, i_pPDFFile );
398cdf0e10cSrcweir     }
399cdf0e10cSrcweir     return nRet;
400cdf0e10cSrcweir }
401cdf0e10cSrcweir 
402cdf0e10cSrcweir std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
403cdf0e10cSrcweir 
write_objects(const char * i_pInFile,const char * i_pOutFile,PDFFile * i_pPDFFile)404cdf0e10cSrcweir int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
405cdf0e10cSrcweir {
406cdf0e10cSrcweir     int nRet = 0;
407cdf0e10cSrcweir     unsigned int nElements = s_aEmitObjects.size();
408cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
409cdf0e10cSrcweir     {
410cdf0e10cSrcweir         sal_Int32 nObject     = s_aEmitObjects[i].first;
411cdf0e10cSrcweir         sal_Int32 nGeneration = s_aEmitObjects[i].second;
412cdf0e10cSrcweir         PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
413cdf0e10cSrcweir         if( ! pStream )
414cdf0e10cSrcweir         {
415cdf0e10cSrcweir             fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
416cdf0e10cSrcweir             continue;
417cdf0e10cSrcweir         }
418cdf0e10cSrcweir 
419cdf0e10cSrcweir         rtl::OStringBuffer aOutStream( i_pOutFile );
420cdf0e10cSrcweir         aOutStream.append( "_stream_" );
421cdf0e10cSrcweir         aOutStream.append( nObject );
422cdf0e10cSrcweir         aOutStream.append( "_" );
423cdf0e10cSrcweir         aOutStream.append( nGeneration );
424cdf0e10cSrcweir         FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
425cdf0e10cSrcweir         aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
426cdf0e10cSrcweir         pStream->writeStream( aContext, i_pPDFFile );
427cdf0e10cSrcweir     }
428cdf0e10cSrcweir     return nRet;
429cdf0e10cSrcweir }
430cdf0e10cSrcweir 
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)431cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
432cdf0e10cSrcweir {
433cdf0e10cSrcweir     const char* pInFile = NULL;
434cdf0e10cSrcweir     const char* pOutFile = NULL;
435cdf0e10cSrcweir     const char* pPassword = NULL;
436cdf0e10cSrcweir     OStringBuffer aOutFile( 256 );
437cdf0e10cSrcweir     PDFFileHdl aHdl = write_unzipFile;
438cdf0e10cSrcweir 
439cdf0e10cSrcweir     for( int nArg = 1; nArg < argc; nArg++ )
440cdf0e10cSrcweir     {
441cdf0e10cSrcweir         if( argv[nArg][0] == '-' )
442cdf0e10cSrcweir         {
443cdf0e10cSrcweir             if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
444cdf0e10cSrcweir                 ! rtl_str_compare( "--password" , argv[nArg] ) )
445cdf0e10cSrcweir             {
446cdf0e10cSrcweir                 if( nArg == argc-1 )
447cdf0e10cSrcweir                 {
448cdf0e10cSrcweir                     fprintf( stderr, "no password given\n" );
449cdf0e10cSrcweir                     return 1;
450cdf0e10cSrcweir                 }
451cdf0e10cSrcweir                 nArg++;
452cdf0e10cSrcweir                 pPassword = argv[nArg];
453cdf0e10cSrcweir             }
454cdf0e10cSrcweir             else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
455cdf0e10cSrcweir                 ! rtl_str_compare( "--help", argv[nArg] ) )
456cdf0e10cSrcweir             {
457cdf0e10cSrcweir                 printHelp( argv[0] );
458cdf0e10cSrcweir                 return 0;
459cdf0e10cSrcweir             }
460cdf0e10cSrcweir             else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
461cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
462cdf0e10cSrcweir             {
463cdf0e10cSrcweir                 aHdl = write_addStreams;
464cdf0e10cSrcweir             }
465cdf0e10cSrcweir             else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
466cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
467cdf0e10cSrcweir             {
468cdf0e10cSrcweir                 aHdl = write_fonts;
469cdf0e10cSrcweir             }
470cdf0e10cSrcweir             else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
471cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
472cdf0e10cSrcweir             {
473cdf0e10cSrcweir                 aHdl = write_objects;
474cdf0e10cSrcweir                 nArg++;
475cdf0e10cSrcweir                 if( nArg < argc )
476cdf0e10cSrcweir                 {
477cdf0e10cSrcweir                     rtl::OString aObjs( argv[nArg] );
478cdf0e10cSrcweir                     sal_Int32 nIndex = 0;
479cdf0e10cSrcweir                     while( nIndex != -1 )
480cdf0e10cSrcweir                     {
481cdf0e10cSrcweir                         rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) );
482cdf0e10cSrcweir                         sal_Int32 nObject = 0;
483cdf0e10cSrcweir                         sal_Int32 nGeneration = 0;
484cdf0e10cSrcweir                         sal_Int32 nGenIndex = 0;
485cdf0e10cSrcweir                         nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
486cdf0e10cSrcweir                         if( nGenIndex != -1 )
487cdf0e10cSrcweir                             nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
488cdf0e10cSrcweir                         s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
489cdf0e10cSrcweir                     }
490cdf0e10cSrcweir                 }
491cdf0e10cSrcweir             }
492cdf0e10cSrcweir             else
493cdf0e10cSrcweir             {
494cdf0e10cSrcweir                 fprintf( stderr, "unrecognized option \"%s\"\n",
495cdf0e10cSrcweir                          argv[nArg] );
496cdf0e10cSrcweir                 printHelp( argv[0] );
497cdf0e10cSrcweir                 return 1;
498cdf0e10cSrcweir             }
499cdf0e10cSrcweir         }
500cdf0e10cSrcweir         else if( pInFile == NULL )
501cdf0e10cSrcweir             pInFile = argv[nArg];
502cdf0e10cSrcweir         else if( pOutFile == NULL )
503cdf0e10cSrcweir             pOutFile = argv[nArg];
504cdf0e10cSrcweir     }
505cdf0e10cSrcweir     if( ! pInFile )
506cdf0e10cSrcweir     {
507cdf0e10cSrcweir         fprintf( stderr, "no input file given\n" );
508cdf0e10cSrcweir         return 10;
509cdf0e10cSrcweir     }
510cdf0e10cSrcweir     if( ! pOutFile )
511cdf0e10cSrcweir     {
512cdf0e10cSrcweir         OString aFile( pInFile );
513cdf0e10cSrcweir         if( aFile.getLength() > 0 )
514cdf0e10cSrcweir         {
515cdf0e10cSrcweir             if( aFile.getLength() > 4 )
516cdf0e10cSrcweir             {
517cdf0e10cSrcweir                 if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) )
518cdf0e10cSrcweir                     aOutFile.append( pInFile, aFile.getLength() - 4 );
519cdf0e10cSrcweir                 else
520cdf0e10cSrcweir                     aOutFile.append( aFile );
521cdf0e10cSrcweir             }
522cdf0e10cSrcweir             aOutFile.append( "_unzip.pdf" );
523cdf0e10cSrcweir             pOutFile = aOutFile.getStr();
524cdf0e10cSrcweir         }
525cdf0e10cSrcweir         else
526cdf0e10cSrcweir         {
527cdf0e10cSrcweir             fprintf( stderr, "no output file given\n" );
528cdf0e10cSrcweir             return 11;
529cdf0e10cSrcweir         }
530cdf0e10cSrcweir     }
531cdf0e10cSrcweir 
532cdf0e10cSrcweir     return handleFile( pInFile, pOutFile, pPassword, aHdl );
533cdf0e10cSrcweir }
534cdf0e10cSrcweir 
535