1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_sdext.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include "pdfiadaptor.hxx"
32*cdf0e10cSrcweir #include "filterdet.hxx"
33*cdf0e10cSrcweir #include "saxemitter.hxx"
34*cdf0e10cSrcweir #include "odfemitter.hxx"
35*cdf0e10cSrcweir #include "inc/wrapper.hxx"
36*cdf0e10cSrcweir #include "inc/contentsink.hxx"
37*cdf0e10cSrcweir #include "tree/pdfiprocessor.hxx"
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir #include <osl/file.h>
40*cdf0e10cSrcweir #include <osl/thread.h>
41*cdf0e10cSrcweir #include <osl/diagnose.h>
42*cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
43*cdf0e10cSrcweir #include <cppuhelper/implementationentry.hxx>
44*cdf0e10cSrcweir #include <com/sun/star/lang/XMultiComponentFactory.hpp>
45*cdf0e10cSrcweir #include <com/sun/star/uno/RuntimeException.hpp>
46*cdf0e10cSrcweir #include <com/sun/star/io/XInputStream.hpp>
47*cdf0e10cSrcweir #include <com/sun/star/frame/XLoadable.hpp>
48*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
49*cdf0e10cSrcweir #include <com/sun/star/io/XSeekable.hpp>
50*cdf0e10cSrcweir 
51*cdf0e10cSrcweir 
52*cdf0e10cSrcweir #include <boost/shared_ptr.hpp>
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir using namespace com::sun::star;
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir namespace pdfi
58*cdf0e10cSrcweir {
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir PDFIHybridAdaptor::PDFIHybridAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) :
61*cdf0e10cSrcweir 	PDFIHybridAdaptorBase( m_aMutex ),
62*cdf0e10cSrcweir 	m_xContext( xContext ),
63*cdf0e10cSrcweir 	m_xModel()
64*cdf0e10cSrcweir {
65*cdf0e10cSrcweir }
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir // XFilter
68*cdf0e10cSrcweir sal_Bool SAL_CALL PDFIHybridAdaptor::filter( const uno::Sequence< beans::PropertyValue >& rFilterData ) throw( uno::RuntimeException )
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir     sal_Bool bRet = sal_False;
71*cdf0e10cSrcweir     if( m_xModel.is() )
72*cdf0e10cSrcweir     {
73*cdf0e10cSrcweir         uno::Reference< io::XStream > xSubStream;
74*cdf0e10cSrcweir         rtl::OUString aPwd;
75*cdf0e10cSrcweir         const beans::PropertyValue* pAttribs = rFilterData.getConstArray();
76*cdf0e10cSrcweir         sal_Int32 nAttribs = rFilterData.getLength();
77*cdf0e10cSrcweir         sal_Int32 nPwPos = -1;
78*cdf0e10cSrcweir         for( sal_Int32 i = 0; i < nAttribs; i++ )
79*cdf0e10cSrcweir         {
80*cdf0e10cSrcweir             #if OSL_DEBUG_LEVEL > 1
81*cdf0e10cSrcweir             rtl::OUString aVal( RTL_CONSTASCII_USTRINGPARAM( "<no string>" ) );
82*cdf0e10cSrcweir             pAttribs[i].Value >>= aVal;
83*cdf0e10cSrcweir             OSL_TRACE( "filter: Attrib: %s = %s\n",
84*cdf0e10cSrcweir                        rtl::OUStringToOString( pAttribs[i].Name, RTL_TEXTENCODING_UTF8 ).getStr(),
85*cdf0e10cSrcweir                        rtl::OUStringToOString( aVal, RTL_TEXTENCODING_UTF8 ).getStr() );
86*cdf0e10cSrcweir             #endif
87*cdf0e10cSrcweir             if( pAttribs[i].Name.equalsAscii( "EmbeddedSubstream" ) )
88*cdf0e10cSrcweir                 pAttribs[i].Value >>= xSubStream;
89*cdf0e10cSrcweir             else if( pAttribs[i].Name.equalsAscii( "Password" ) )
90*cdf0e10cSrcweir             {
91*cdf0e10cSrcweir                 nPwPos = i;
92*cdf0e10cSrcweir                 pAttribs[i].Value >>= aPwd;
93*cdf0e10cSrcweir             }
94*cdf0e10cSrcweir         }
95*cdf0e10cSrcweir         bool bAddPwdProp = false;
96*cdf0e10cSrcweir         if( ! xSubStream.is() )
97*cdf0e10cSrcweir         {
98*cdf0e10cSrcweir             uno::Reference< io::XInputStream > xInput;
99*cdf0e10cSrcweir             for( sal_Int32 i = 0; i < nAttribs; i++ )
100*cdf0e10cSrcweir             {
101*cdf0e10cSrcweir                 if( pAttribs[i].Name.equalsAscii( "InputStream" ) )
102*cdf0e10cSrcweir                 {
103*cdf0e10cSrcweir                     pAttribs[i].Value >>= xInput;
104*cdf0e10cSrcweir                     break;
105*cdf0e10cSrcweir                 }
106*cdf0e10cSrcweir             }
107*cdf0e10cSrcweir             if( xInput.is() )
108*cdf0e10cSrcweir             {
109*cdf0e10cSrcweir 				// TODO(P2): extracting hybrid substream twice - once during detection, second time here
110*cdf0e10cSrcweir                 uno::Reference< io::XSeekable > xSeek( xInput, uno::UNO_QUERY );
111*cdf0e10cSrcweir                 if( xSeek.is() )
112*cdf0e10cSrcweir                     xSeek->seek( 0 );
113*cdf0e10cSrcweir                 oslFileHandle aFile = NULL;
114*cdf0e10cSrcweir                 sal_uInt64 nWritten = 0;
115*cdf0e10cSrcweir                 rtl::OUString aURL;
116*cdf0e10cSrcweir                 if( osl_createTempFile( NULL, &aFile, &aURL.pData ) == osl_File_E_None )
117*cdf0e10cSrcweir                 {
118*cdf0e10cSrcweir                     OSL_TRACE( "created temp file %s\n", rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8 ).getStr() );
119*cdf0e10cSrcweir                     const sal_Int32 nBufSize = 4096;
120*cdf0e10cSrcweir                     uno::Sequence<sal_Int8> aBuf(nBufSize);
121*cdf0e10cSrcweir                     // copy the bytes
122*cdf0e10cSrcweir                     sal_Int32 nBytes;
123*cdf0e10cSrcweir                     do
124*cdf0e10cSrcweir                     {
125*cdf0e10cSrcweir                         nBytes = xInput->readBytes( aBuf, nBufSize );
126*cdf0e10cSrcweir                         if( nBytes > 0 )
127*cdf0e10cSrcweir                         {
128*cdf0e10cSrcweir                             osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
129*cdf0e10cSrcweir                             if( static_cast<sal_Int32>(nWritten) != nBytes )
130*cdf0e10cSrcweir                             {
131*cdf0e10cSrcweir                                 xInput.clear();
132*cdf0e10cSrcweir                                 break;
133*cdf0e10cSrcweir                             }
134*cdf0e10cSrcweir                         }
135*cdf0e10cSrcweir                     } while( nBytes == nBufSize );
136*cdf0e10cSrcweir                     osl_closeFile( aFile );
137*cdf0e10cSrcweir                     if( xInput.is() )
138*cdf0e10cSrcweir                     {
139*cdf0e10cSrcweir                         rtl::OUString aEmbedMimetype;
140*cdf0e10cSrcweir                         rtl::OUString aOrgPwd( aPwd );
141*cdf0e10cSrcweir                         xSubStream = getAdditionalStream( aURL, aEmbedMimetype, aPwd, m_xContext, rFilterData, true );
142*cdf0e10cSrcweir                         if( aOrgPwd != aPwd )
143*cdf0e10cSrcweir                             bAddPwdProp = true;
144*cdf0e10cSrcweir                     }
145*cdf0e10cSrcweir                     osl_removeFile( aURL.pData );
146*cdf0e10cSrcweir                 }
147*cdf0e10cSrcweir                 else
148*cdf0e10cSrcweir                     xSubStream.clear();
149*cdf0e10cSrcweir             }
150*cdf0e10cSrcweir         }
151*cdf0e10cSrcweir         if( xSubStream.is() )
152*cdf0e10cSrcweir         {
153*cdf0e10cSrcweir             uno::Sequence< uno::Any > aArgs( 2 );
154*cdf0e10cSrcweir             aArgs[0] <<= m_xModel;
155*cdf0e10cSrcweir             aArgs[1] <<= xSubStream;
156*cdf0e10cSrcweir 
157*cdf0e10cSrcweir             OSL_TRACE( "try to instantiate subfilter\n" );
158*cdf0e10cSrcweir             uno::Reference< document::XFilter > xSubFilter;
159*cdf0e10cSrcweir             try {
160*cdf0e10cSrcweir                 xSubFilter = uno::Reference<document::XFilter>(
161*cdf0e10cSrcweir                     m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
162*cdf0e10cSrcweir                         rtl::OUString( RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.document.OwnSubFilter" ) ),
163*cdf0e10cSrcweir                         aArgs,
164*cdf0e10cSrcweir                         m_xContext ),
165*cdf0e10cSrcweir                     uno::UNO_QUERY );
166*cdf0e10cSrcweir             }
167*cdf0e10cSrcweir             catch(uno::Exception& e)
168*cdf0e10cSrcweir             {
169*cdf0e10cSrcweir                 (void)e;
170*cdf0e10cSrcweir                 OSL_TRACE( "subfilter exception: %s\n",
171*cdf0e10cSrcweir                            OUStringToOString( e.Message, RTL_TEXTENCODING_UTF8 ).getStr() );
172*cdf0e10cSrcweir             }
173*cdf0e10cSrcweir 
174*cdf0e10cSrcweir             OSL_TRACE( "subfilter: %p\n", xSubFilter.get() );
175*cdf0e10cSrcweir             if( xSubFilter.is() )
176*cdf0e10cSrcweir             {
177*cdf0e10cSrcweir                 if( bAddPwdProp )
178*cdf0e10cSrcweir                 {
179*cdf0e10cSrcweir                     uno::Sequence<beans::PropertyValue> aFilterData( rFilterData );
180*cdf0e10cSrcweir                     if( nPwPos == -1 )
181*cdf0e10cSrcweir                     {
182*cdf0e10cSrcweir                         nPwPos = aFilterData.getLength();
183*cdf0e10cSrcweir                         aFilterData.realloc( nPwPos+1 );
184*cdf0e10cSrcweir                         aFilterData[nPwPos].Name = rtl::OUString(
185*cdf0e10cSrcweir                             RTL_CONSTASCII_USTRINGPARAM( "Password" ) );
186*cdf0e10cSrcweir                     }
187*cdf0e10cSrcweir                     aFilterData[nPwPos].Value <<= aPwd;
188*cdf0e10cSrcweir                     bRet = xSubFilter->filter( aFilterData );
189*cdf0e10cSrcweir                 }
190*cdf0e10cSrcweir                 else
191*cdf0e10cSrcweir                     bRet = xSubFilter->filter( rFilterData );
192*cdf0e10cSrcweir             }
193*cdf0e10cSrcweir         }
194*cdf0e10cSrcweir         #if OSL_DEBUG_LEVEL > 1
195*cdf0e10cSrcweir         else
196*cdf0e10cSrcweir             OSL_TRACE( "PDFIAdaptor::filter: no embedded substream set\n" );
197*cdf0e10cSrcweir         #endif
198*cdf0e10cSrcweir     }
199*cdf0e10cSrcweir     #if OSL_DEBUG_LEVEL > 1
200*cdf0e10cSrcweir     else
201*cdf0e10cSrcweir         OSL_TRACE( "PDFIAdaptor::filter: no model set\n" );
202*cdf0e10cSrcweir     #endif
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir     return bRet;
205*cdf0e10cSrcweir }
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::cancel() throw()
208*cdf0e10cSrcweir {
209*cdf0e10cSrcweir }
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir //XImporter
212*cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException )
213*cdf0e10cSrcweir {
214*cdf0e10cSrcweir     OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" );
215*cdf0e10cSrcweir     m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY );
216*cdf0e10cSrcweir     if( xDocument.is() && ! m_xModel.is() )
217*cdf0e10cSrcweir         throw lang::IllegalArgumentException();
218*cdf0e10cSrcweir }
219*cdf0e10cSrcweir 
220*cdf0e10cSrcweir //---------------------------------------------------------------------------------------
221*cdf0e10cSrcweir 
222*cdf0e10cSrcweir PDFIRawAdaptor::PDFIRawAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) :
223*cdf0e10cSrcweir     PDFIAdaptorBase( m_aMutex ),
224*cdf0e10cSrcweir     m_xContext( xContext ),
225*cdf0e10cSrcweir     m_xModel(),
226*cdf0e10cSrcweir     m_pVisitorFactory(),
227*cdf0e10cSrcweir     m_bEnableToplevelText(false)
228*cdf0e10cSrcweir {
229*cdf0e10cSrcweir }
230*cdf0e10cSrcweir 
231*cdf0e10cSrcweir void PDFIRawAdaptor::setTreeVisitorFactory(const TreeVisitorFactorySharedPtr& rVisitorFactory)
232*cdf0e10cSrcweir {
233*cdf0e10cSrcweir     m_pVisitorFactory = rVisitorFactory;
234*cdf0e10cSrcweir }
235*cdf0e10cSrcweir 
236*cdf0e10cSrcweir bool PDFIRawAdaptor::parse( const uno::Reference<io::XInputStream>&       xInput,
237*cdf0e10cSrcweir                             const uno::Reference<task::XInteractionHandler>& xIHdl,
238*cdf0e10cSrcweir                             const rtl::OUString&                          rPwd,
239*cdf0e10cSrcweir                             const uno::Reference<task::XStatusIndicator>& xStatus,
240*cdf0e10cSrcweir                             const XmlEmitterSharedPtr&                    rEmitter,
241*cdf0e10cSrcweir                             const rtl::OUString&                          rURL )
242*cdf0e10cSrcweir {
243*cdf0e10cSrcweir     // container for metaformat
244*cdf0e10cSrcweir     boost::shared_ptr<PDFIProcessor> pSink(
245*cdf0e10cSrcweir         new PDFIProcessor(xStatus, m_xContext));
246*cdf0e10cSrcweir 
247*cdf0e10cSrcweir     // TEMP! TEMP!
248*cdf0e10cSrcweir     if( m_bEnableToplevelText )
249*cdf0e10cSrcweir         pSink->enableToplevelText();
250*cdf0e10cSrcweir 
251*cdf0e10cSrcweir     bool bSuccess=false;
252*cdf0e10cSrcweir 
253*cdf0e10cSrcweir     if( xInput.is() && (!rURL.getLength() || rURL.compareToAscii( "file:", 5 ) != 0) )
254*cdf0e10cSrcweir         bSuccess = xpdf_ImportFromStream( xInput, pSink, xIHdl, rPwd, m_xContext );
255*cdf0e10cSrcweir     else
256*cdf0e10cSrcweir         bSuccess = xpdf_ImportFromFile( rURL, pSink, xIHdl, rPwd, m_xContext );
257*cdf0e10cSrcweir 
258*cdf0e10cSrcweir     if( bSuccess )
259*cdf0e10cSrcweir         pSink->emit(*rEmitter,*m_pVisitorFactory);
260*cdf0e10cSrcweir 
261*cdf0e10cSrcweir     return bSuccess;
262*cdf0e10cSrcweir }
263*cdf0e10cSrcweir 
264*cdf0e10cSrcweir bool PDFIRawAdaptor::odfConvert( const rtl::OUString&                          rURL,
265*cdf0e10cSrcweir                                  const uno::Reference<io::XOutputStream>&      xOutput,
266*cdf0e10cSrcweir                                  const uno::Reference<task::XStatusIndicator>& xStatus )
267*cdf0e10cSrcweir {
268*cdf0e10cSrcweir     XmlEmitterSharedPtr pEmitter = createOdfEmitter(xOutput);
269*cdf0e10cSrcweir     const bool bSuccess = parse(uno::Reference<io::XInputStream>(),
270*cdf0e10cSrcweir                                 uno::Reference<task::XInteractionHandler>(),
271*cdf0e10cSrcweir                                 rtl::OUString(),
272*cdf0e10cSrcweir                                 xStatus,pEmitter,rURL);
273*cdf0e10cSrcweir 
274*cdf0e10cSrcweir     // tell input stream that it is no longer needed
275*cdf0e10cSrcweir 	xOutput->closeOutput();
276*cdf0e10cSrcweir 
277*cdf0e10cSrcweir     return bSuccess;
278*cdf0e10cSrcweir }
279*cdf0e10cSrcweir 
280*cdf0e10cSrcweir // XImportFilter
281*cdf0e10cSrcweir sal_Bool SAL_CALL PDFIRawAdaptor::importer( const uno::Sequence< beans::PropertyValue >&        rSourceData,
282*cdf0e10cSrcweir                                             const uno::Reference< xml::sax::XDocumentHandler >& rHdl,
283*cdf0e10cSrcweir                                             const uno::Sequence< rtl::OUString >&               /*rUserData*/ ) throw( uno::RuntimeException )
284*cdf0e10cSrcweir {
285*cdf0e10cSrcweir     // get the InputStream carrying the PDF content
286*cdf0e10cSrcweir     uno::Reference< io::XInputStream > xInput;
287*cdf0e10cSrcweir     uno::Reference< task::XStatusIndicator > xStatus;
288*cdf0e10cSrcweir     uno::Reference< task::XInteractionHandler > xInteractionHandler;
289*cdf0e10cSrcweir     rtl::OUString aURL;
290*cdf0e10cSrcweir     rtl::OUString aPwd;
291*cdf0e10cSrcweir     const beans::PropertyValue* pAttribs = rSourceData.getConstArray();
292*cdf0e10cSrcweir     sal_Int32 nAttribs = rSourceData.getLength();
293*cdf0e10cSrcweir     for( sal_Int32 i = 0; i < nAttribs; i++, pAttribs++ )
294*cdf0e10cSrcweir     {
295*cdf0e10cSrcweir         OSL_TRACE("importer Attrib: %s\n", OUStringToOString( pAttribs->Name, RTL_TEXTENCODING_UTF8 ).getStr() );
296*cdf0e10cSrcweir         if( pAttribs->Name.equalsAscii( "InputStream" ) )
297*cdf0e10cSrcweir             pAttribs->Value >>= xInput;
298*cdf0e10cSrcweir         else if( pAttribs->Name.equalsAscii( "URL" ) )
299*cdf0e10cSrcweir             pAttribs->Value >>= aURL;
300*cdf0e10cSrcweir         else if( pAttribs->Name.equalsAscii( "StatusIndicator" ) )
301*cdf0e10cSrcweir             pAttribs->Value >>= xStatus;
302*cdf0e10cSrcweir         else if( pAttribs->Name.equalsAscii( "InteractionHandler" ) )
303*cdf0e10cSrcweir             pAttribs->Value >>= xInteractionHandler;
304*cdf0e10cSrcweir         else if( pAttribs->Name.equalsAscii( "Password" ) )
305*cdf0e10cSrcweir             pAttribs->Value >>= aPwd;
306*cdf0e10cSrcweir     }
307*cdf0e10cSrcweir     if( !xInput.is() )
308*cdf0e10cSrcweir         return sal_False;
309*cdf0e10cSrcweir 
310*cdf0e10cSrcweir     XmlEmitterSharedPtr pEmitter = createSaxEmitter(rHdl);
311*cdf0e10cSrcweir     const bool bSuccess = parse(xInput,xInteractionHandler, aPwd, xStatus,pEmitter,aURL);
312*cdf0e10cSrcweir 
313*cdf0e10cSrcweir     // tell input stream that it is no longer needed
314*cdf0e10cSrcweir     xInput->closeInput();
315*cdf0e10cSrcweir     xInput.clear();
316*cdf0e10cSrcweir 
317*cdf0e10cSrcweir     return bSuccess;
318*cdf0e10cSrcweir }
319*cdf0e10cSrcweir 
320*cdf0e10cSrcweir //XImporter
321*cdf0e10cSrcweir void SAL_CALL PDFIRawAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException )
322*cdf0e10cSrcweir {
323*cdf0e10cSrcweir     OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" );
324*cdf0e10cSrcweir     m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY );
325*cdf0e10cSrcweir     if( xDocument.is() && ! m_xModel.is() )
326*cdf0e10cSrcweir         throw lang::IllegalArgumentException();
327*cdf0e10cSrcweir }
328*cdf0e10cSrcweir 
329*cdf0e10cSrcweir }
330