1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_sdext.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include "pdfiadaptor.hxx" 32*cdf0e10cSrcweir #include "filterdet.hxx" 33*cdf0e10cSrcweir #include "saxemitter.hxx" 34*cdf0e10cSrcweir #include "odfemitter.hxx" 35*cdf0e10cSrcweir #include "inc/wrapper.hxx" 36*cdf0e10cSrcweir #include "inc/contentsink.hxx" 37*cdf0e10cSrcweir #include "tree/pdfiprocessor.hxx" 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir #include <osl/file.h> 40*cdf0e10cSrcweir #include <osl/thread.h> 41*cdf0e10cSrcweir #include <osl/diagnose.h> 42*cdf0e10cSrcweir #include <cppuhelper/factory.hxx> 43*cdf0e10cSrcweir #include <cppuhelper/implementationentry.hxx> 44*cdf0e10cSrcweir #include <com/sun/star/lang/XMultiComponentFactory.hpp> 45*cdf0e10cSrcweir #include <com/sun/star/uno/RuntimeException.hpp> 46*cdf0e10cSrcweir #include <com/sun/star/io/XInputStream.hpp> 47*cdf0e10cSrcweir #include <com/sun/star/frame/XLoadable.hpp> 48*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XDocumentHandler.hpp> 49*cdf0e10cSrcweir #include <com/sun/star/io/XSeekable.hpp> 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir #include <boost/shared_ptr.hpp> 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir using namespace com::sun::star; 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir namespace pdfi 58*cdf0e10cSrcweir { 59*cdf0e10cSrcweir 60*cdf0e10cSrcweir PDFIHybridAdaptor::PDFIHybridAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) : 61*cdf0e10cSrcweir PDFIHybridAdaptorBase( m_aMutex ), 62*cdf0e10cSrcweir m_xContext( xContext ), 63*cdf0e10cSrcweir m_xModel() 64*cdf0e10cSrcweir { 65*cdf0e10cSrcweir } 66*cdf0e10cSrcweir 67*cdf0e10cSrcweir // XFilter 68*cdf0e10cSrcweir sal_Bool SAL_CALL PDFIHybridAdaptor::filter( const uno::Sequence< beans::PropertyValue >& rFilterData ) throw( uno::RuntimeException ) 69*cdf0e10cSrcweir { 70*cdf0e10cSrcweir sal_Bool bRet = sal_False; 71*cdf0e10cSrcweir if( m_xModel.is() ) 72*cdf0e10cSrcweir { 73*cdf0e10cSrcweir uno::Reference< io::XStream > xSubStream; 74*cdf0e10cSrcweir rtl::OUString aPwd; 75*cdf0e10cSrcweir const beans::PropertyValue* pAttribs = rFilterData.getConstArray(); 76*cdf0e10cSrcweir sal_Int32 nAttribs = rFilterData.getLength(); 77*cdf0e10cSrcweir sal_Int32 nPwPos = -1; 78*cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++ ) 79*cdf0e10cSrcweir { 80*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 81*cdf0e10cSrcweir rtl::OUString aVal( RTL_CONSTASCII_USTRINGPARAM( "<no string>" ) ); 82*cdf0e10cSrcweir pAttribs[i].Value >>= aVal; 83*cdf0e10cSrcweir OSL_TRACE( "filter: Attrib: %s = %s\n", 84*cdf0e10cSrcweir rtl::OUStringToOString( pAttribs[i].Name, RTL_TEXTENCODING_UTF8 ).getStr(), 85*cdf0e10cSrcweir rtl::OUStringToOString( aVal, RTL_TEXTENCODING_UTF8 ).getStr() ); 86*cdf0e10cSrcweir #endif 87*cdf0e10cSrcweir if( pAttribs[i].Name.equalsAscii( "EmbeddedSubstream" ) ) 88*cdf0e10cSrcweir pAttribs[i].Value >>= xSubStream; 89*cdf0e10cSrcweir else if( pAttribs[i].Name.equalsAscii( "Password" ) ) 90*cdf0e10cSrcweir { 91*cdf0e10cSrcweir nPwPos = i; 92*cdf0e10cSrcweir pAttribs[i].Value >>= aPwd; 93*cdf0e10cSrcweir } 94*cdf0e10cSrcweir } 95*cdf0e10cSrcweir bool bAddPwdProp = false; 96*cdf0e10cSrcweir if( ! xSubStream.is() ) 97*cdf0e10cSrcweir { 98*cdf0e10cSrcweir uno::Reference< io::XInputStream > xInput; 99*cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++ ) 100*cdf0e10cSrcweir { 101*cdf0e10cSrcweir if( pAttribs[i].Name.equalsAscii( "InputStream" ) ) 102*cdf0e10cSrcweir { 103*cdf0e10cSrcweir pAttribs[i].Value >>= xInput; 104*cdf0e10cSrcweir break; 105*cdf0e10cSrcweir } 106*cdf0e10cSrcweir } 107*cdf0e10cSrcweir if( xInput.is() ) 108*cdf0e10cSrcweir { 109*cdf0e10cSrcweir // TODO(P2): extracting hybrid substream twice - once during detection, second time here 110*cdf0e10cSrcweir uno::Reference< io::XSeekable > xSeek( xInput, uno::UNO_QUERY ); 111*cdf0e10cSrcweir if( xSeek.is() ) 112*cdf0e10cSrcweir xSeek->seek( 0 ); 113*cdf0e10cSrcweir oslFileHandle aFile = NULL; 114*cdf0e10cSrcweir sal_uInt64 nWritten = 0; 115*cdf0e10cSrcweir rtl::OUString aURL; 116*cdf0e10cSrcweir if( osl_createTempFile( NULL, &aFile, &aURL.pData ) == osl_File_E_None ) 117*cdf0e10cSrcweir { 118*cdf0e10cSrcweir OSL_TRACE( "created temp file %s\n", rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8 ).getStr() ); 119*cdf0e10cSrcweir const sal_Int32 nBufSize = 4096; 120*cdf0e10cSrcweir uno::Sequence<sal_Int8> aBuf(nBufSize); 121*cdf0e10cSrcweir // copy the bytes 122*cdf0e10cSrcweir sal_Int32 nBytes; 123*cdf0e10cSrcweir do 124*cdf0e10cSrcweir { 125*cdf0e10cSrcweir nBytes = xInput->readBytes( aBuf, nBufSize ); 126*cdf0e10cSrcweir if( nBytes > 0 ) 127*cdf0e10cSrcweir { 128*cdf0e10cSrcweir osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten ); 129*cdf0e10cSrcweir if( static_cast<sal_Int32>(nWritten) != nBytes ) 130*cdf0e10cSrcweir { 131*cdf0e10cSrcweir xInput.clear(); 132*cdf0e10cSrcweir break; 133*cdf0e10cSrcweir } 134*cdf0e10cSrcweir } 135*cdf0e10cSrcweir } while( nBytes == nBufSize ); 136*cdf0e10cSrcweir osl_closeFile( aFile ); 137*cdf0e10cSrcweir if( xInput.is() ) 138*cdf0e10cSrcweir { 139*cdf0e10cSrcweir rtl::OUString aEmbedMimetype; 140*cdf0e10cSrcweir rtl::OUString aOrgPwd( aPwd ); 141*cdf0e10cSrcweir xSubStream = getAdditionalStream( aURL, aEmbedMimetype, aPwd, m_xContext, rFilterData, true ); 142*cdf0e10cSrcweir if( aOrgPwd != aPwd ) 143*cdf0e10cSrcweir bAddPwdProp = true; 144*cdf0e10cSrcweir } 145*cdf0e10cSrcweir osl_removeFile( aURL.pData ); 146*cdf0e10cSrcweir } 147*cdf0e10cSrcweir else 148*cdf0e10cSrcweir xSubStream.clear(); 149*cdf0e10cSrcweir } 150*cdf0e10cSrcweir } 151*cdf0e10cSrcweir if( xSubStream.is() ) 152*cdf0e10cSrcweir { 153*cdf0e10cSrcweir uno::Sequence< uno::Any > aArgs( 2 ); 154*cdf0e10cSrcweir aArgs[0] <<= m_xModel; 155*cdf0e10cSrcweir aArgs[1] <<= xSubStream; 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir OSL_TRACE( "try to instantiate subfilter\n" ); 158*cdf0e10cSrcweir uno::Reference< document::XFilter > xSubFilter; 159*cdf0e10cSrcweir try { 160*cdf0e10cSrcweir xSubFilter = uno::Reference<document::XFilter>( 161*cdf0e10cSrcweir m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext( 162*cdf0e10cSrcweir rtl::OUString( RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.document.OwnSubFilter" ) ), 163*cdf0e10cSrcweir aArgs, 164*cdf0e10cSrcweir m_xContext ), 165*cdf0e10cSrcweir uno::UNO_QUERY ); 166*cdf0e10cSrcweir } 167*cdf0e10cSrcweir catch(uno::Exception& e) 168*cdf0e10cSrcweir { 169*cdf0e10cSrcweir (void)e; 170*cdf0e10cSrcweir OSL_TRACE( "subfilter exception: %s\n", 171*cdf0e10cSrcweir OUStringToOString( e.Message, RTL_TEXTENCODING_UTF8 ).getStr() ); 172*cdf0e10cSrcweir } 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir OSL_TRACE( "subfilter: %p\n", xSubFilter.get() ); 175*cdf0e10cSrcweir if( xSubFilter.is() ) 176*cdf0e10cSrcweir { 177*cdf0e10cSrcweir if( bAddPwdProp ) 178*cdf0e10cSrcweir { 179*cdf0e10cSrcweir uno::Sequence<beans::PropertyValue> aFilterData( rFilterData ); 180*cdf0e10cSrcweir if( nPwPos == -1 ) 181*cdf0e10cSrcweir { 182*cdf0e10cSrcweir nPwPos = aFilterData.getLength(); 183*cdf0e10cSrcweir aFilterData.realloc( nPwPos+1 ); 184*cdf0e10cSrcweir aFilterData[nPwPos].Name = rtl::OUString( 185*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( "Password" ) ); 186*cdf0e10cSrcweir } 187*cdf0e10cSrcweir aFilterData[nPwPos].Value <<= aPwd; 188*cdf0e10cSrcweir bRet = xSubFilter->filter( aFilterData ); 189*cdf0e10cSrcweir } 190*cdf0e10cSrcweir else 191*cdf0e10cSrcweir bRet = xSubFilter->filter( rFilterData ); 192*cdf0e10cSrcweir } 193*cdf0e10cSrcweir } 194*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 195*cdf0e10cSrcweir else 196*cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::filter: no embedded substream set\n" ); 197*cdf0e10cSrcweir #endif 198*cdf0e10cSrcweir } 199*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 200*cdf0e10cSrcweir else 201*cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::filter: no model set\n" ); 202*cdf0e10cSrcweir #endif 203*cdf0e10cSrcweir 204*cdf0e10cSrcweir return bRet; 205*cdf0e10cSrcweir } 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::cancel() throw() 208*cdf0e10cSrcweir { 209*cdf0e10cSrcweir } 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir //XImporter 212*cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException ) 213*cdf0e10cSrcweir { 214*cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" ); 215*cdf0e10cSrcweir m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY ); 216*cdf0e10cSrcweir if( xDocument.is() && ! m_xModel.is() ) 217*cdf0e10cSrcweir throw lang::IllegalArgumentException(); 218*cdf0e10cSrcweir } 219*cdf0e10cSrcweir 220*cdf0e10cSrcweir //--------------------------------------------------------------------------------------- 221*cdf0e10cSrcweir 222*cdf0e10cSrcweir PDFIRawAdaptor::PDFIRawAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) : 223*cdf0e10cSrcweir PDFIAdaptorBase( m_aMutex ), 224*cdf0e10cSrcweir m_xContext( xContext ), 225*cdf0e10cSrcweir m_xModel(), 226*cdf0e10cSrcweir m_pVisitorFactory(), 227*cdf0e10cSrcweir m_bEnableToplevelText(false) 228*cdf0e10cSrcweir { 229*cdf0e10cSrcweir } 230*cdf0e10cSrcweir 231*cdf0e10cSrcweir void PDFIRawAdaptor::setTreeVisitorFactory(const TreeVisitorFactorySharedPtr& rVisitorFactory) 232*cdf0e10cSrcweir { 233*cdf0e10cSrcweir m_pVisitorFactory = rVisitorFactory; 234*cdf0e10cSrcweir } 235*cdf0e10cSrcweir 236*cdf0e10cSrcweir bool PDFIRawAdaptor::parse( const uno::Reference<io::XInputStream>& xInput, 237*cdf0e10cSrcweir const uno::Reference<task::XInteractionHandler>& xIHdl, 238*cdf0e10cSrcweir const rtl::OUString& rPwd, 239*cdf0e10cSrcweir const uno::Reference<task::XStatusIndicator>& xStatus, 240*cdf0e10cSrcweir const XmlEmitterSharedPtr& rEmitter, 241*cdf0e10cSrcweir const rtl::OUString& rURL ) 242*cdf0e10cSrcweir { 243*cdf0e10cSrcweir // container for metaformat 244*cdf0e10cSrcweir boost::shared_ptr<PDFIProcessor> pSink( 245*cdf0e10cSrcweir new PDFIProcessor(xStatus, m_xContext)); 246*cdf0e10cSrcweir 247*cdf0e10cSrcweir // TEMP! TEMP! 248*cdf0e10cSrcweir if( m_bEnableToplevelText ) 249*cdf0e10cSrcweir pSink->enableToplevelText(); 250*cdf0e10cSrcweir 251*cdf0e10cSrcweir bool bSuccess=false; 252*cdf0e10cSrcweir 253*cdf0e10cSrcweir if( xInput.is() && (!rURL.getLength() || rURL.compareToAscii( "file:", 5 ) != 0) ) 254*cdf0e10cSrcweir bSuccess = xpdf_ImportFromStream( xInput, pSink, xIHdl, rPwd, m_xContext ); 255*cdf0e10cSrcweir else 256*cdf0e10cSrcweir bSuccess = xpdf_ImportFromFile( rURL, pSink, xIHdl, rPwd, m_xContext ); 257*cdf0e10cSrcweir 258*cdf0e10cSrcweir if( bSuccess ) 259*cdf0e10cSrcweir pSink->emit(*rEmitter,*m_pVisitorFactory); 260*cdf0e10cSrcweir 261*cdf0e10cSrcweir return bSuccess; 262*cdf0e10cSrcweir } 263*cdf0e10cSrcweir 264*cdf0e10cSrcweir bool PDFIRawAdaptor::odfConvert( const rtl::OUString& rURL, 265*cdf0e10cSrcweir const uno::Reference<io::XOutputStream>& xOutput, 266*cdf0e10cSrcweir const uno::Reference<task::XStatusIndicator>& xStatus ) 267*cdf0e10cSrcweir { 268*cdf0e10cSrcweir XmlEmitterSharedPtr pEmitter = createOdfEmitter(xOutput); 269*cdf0e10cSrcweir const bool bSuccess = parse(uno::Reference<io::XInputStream>(), 270*cdf0e10cSrcweir uno::Reference<task::XInteractionHandler>(), 271*cdf0e10cSrcweir rtl::OUString(), 272*cdf0e10cSrcweir xStatus,pEmitter,rURL); 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir // tell input stream that it is no longer needed 275*cdf0e10cSrcweir xOutput->closeOutput(); 276*cdf0e10cSrcweir 277*cdf0e10cSrcweir return bSuccess; 278*cdf0e10cSrcweir } 279*cdf0e10cSrcweir 280*cdf0e10cSrcweir // XImportFilter 281*cdf0e10cSrcweir sal_Bool SAL_CALL PDFIRawAdaptor::importer( const uno::Sequence< beans::PropertyValue >& rSourceData, 282*cdf0e10cSrcweir const uno::Reference< xml::sax::XDocumentHandler >& rHdl, 283*cdf0e10cSrcweir const uno::Sequence< rtl::OUString >& /*rUserData*/ ) throw( uno::RuntimeException ) 284*cdf0e10cSrcweir { 285*cdf0e10cSrcweir // get the InputStream carrying the PDF content 286*cdf0e10cSrcweir uno::Reference< io::XInputStream > xInput; 287*cdf0e10cSrcweir uno::Reference< task::XStatusIndicator > xStatus; 288*cdf0e10cSrcweir uno::Reference< task::XInteractionHandler > xInteractionHandler; 289*cdf0e10cSrcweir rtl::OUString aURL; 290*cdf0e10cSrcweir rtl::OUString aPwd; 291*cdf0e10cSrcweir const beans::PropertyValue* pAttribs = rSourceData.getConstArray(); 292*cdf0e10cSrcweir sal_Int32 nAttribs = rSourceData.getLength(); 293*cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++, pAttribs++ ) 294*cdf0e10cSrcweir { 295*cdf0e10cSrcweir OSL_TRACE("importer Attrib: %s\n", OUStringToOString( pAttribs->Name, RTL_TEXTENCODING_UTF8 ).getStr() ); 296*cdf0e10cSrcweir if( pAttribs->Name.equalsAscii( "InputStream" ) ) 297*cdf0e10cSrcweir pAttribs->Value >>= xInput; 298*cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "URL" ) ) 299*cdf0e10cSrcweir pAttribs->Value >>= aURL; 300*cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "StatusIndicator" ) ) 301*cdf0e10cSrcweir pAttribs->Value >>= xStatus; 302*cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "InteractionHandler" ) ) 303*cdf0e10cSrcweir pAttribs->Value >>= xInteractionHandler; 304*cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "Password" ) ) 305*cdf0e10cSrcweir pAttribs->Value >>= aPwd; 306*cdf0e10cSrcweir } 307*cdf0e10cSrcweir if( !xInput.is() ) 308*cdf0e10cSrcweir return sal_False; 309*cdf0e10cSrcweir 310*cdf0e10cSrcweir XmlEmitterSharedPtr pEmitter = createSaxEmitter(rHdl); 311*cdf0e10cSrcweir const bool bSuccess = parse(xInput,xInteractionHandler, aPwd, xStatus,pEmitter,aURL); 312*cdf0e10cSrcweir 313*cdf0e10cSrcweir // tell input stream that it is no longer needed 314*cdf0e10cSrcweir xInput->closeInput(); 315*cdf0e10cSrcweir xInput.clear(); 316*cdf0e10cSrcweir 317*cdf0e10cSrcweir return bSuccess; 318*cdf0e10cSrcweir } 319*cdf0e10cSrcweir 320*cdf0e10cSrcweir //XImporter 321*cdf0e10cSrcweir void SAL_CALL PDFIRawAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException ) 322*cdf0e10cSrcweir { 323*cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" ); 324*cdf0e10cSrcweir m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY ); 325*cdf0e10cSrcweir if( xDocument.is() && ! m_xModel.is() ) 326*cdf0e10cSrcweir throw lang::IllegalArgumentException(); 327*cdf0e10cSrcweir } 328*cdf0e10cSrcweir 329*cdf0e10cSrcweir } 330