1*cdf0e10cSrcweirimport getopt,sys 2*cdf0e10cSrcweirimport uno 3*cdf0e10cSrcweirfrom unohelper import Base,systemPathToFileUrl, absolutize 4*cdf0e10cSrcweirfrom os import getcwd 5*cdf0e10cSrcweir 6*cdf0e10cSrcweirfrom com.sun.star.beans import PropertyValue 7*cdf0e10cSrcweirfrom com.sun.star.beans.PropertyState import DIRECT_VALUE 8*cdf0e10cSrcweirfrom com.sun.star.uno import Exception as UnoException 9*cdf0e10cSrcweirfrom com.sun.star.io import IOException,XInputStream, XOutputStream 10*cdf0e10cSrcweir 11*cdf0e10cSrcweirclass OutputStream( Base, XOutputStream ): 12*cdf0e10cSrcweir def __init__( self ): 13*cdf0e10cSrcweir self.closed = 0 14*cdf0e10cSrcweir 15*cdf0e10cSrcweir def closeOutput(self): 16*cdf0e10cSrcweir self.closed = 1 17*cdf0e10cSrcweir 18*cdf0e10cSrcweir def writeBytes( self, seq ): 19*cdf0e10cSrcweir sys.stdout.write( seq.value ) 20*cdf0e10cSrcweir 21*cdf0e10cSrcweir def flush( self ): 22*cdf0e10cSrcweir pass 23*cdf0e10cSrcweir 24*cdf0e10cSrcweir 25*cdf0e10cSrcweirdef main(): 26*cdf0e10cSrcweir retVal = 0 27*cdf0e10cSrcweir doc = None 28*cdf0e10cSrcweir 29*cdf0e10cSrcweir try: 30*cdf0e10cSrcweir opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"]) 31*cdf0e10cSrcweir format = None 32*cdf0e10cSrcweir url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" 33*cdf0e10cSrcweir filterName = "Text (Encoded)" 34*cdf0e10cSrcweir for o, a in opts: 35*cdf0e10cSrcweir if o in ("-h", "--help"): 36*cdf0e10cSrcweir usage() 37*cdf0e10cSrcweir sys.exit() 38*cdf0e10cSrcweir if o in ("-c", "--connection-string" ): 39*cdf0e10cSrcweir url = "uno:" + a + ";urp;StarOffice.ComponentContext" 40*cdf0e10cSrcweir if o == "--html": 41*cdf0e10cSrcweir filterName = "HTML (StarWriter)" 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir print filterName 44*cdf0e10cSrcweir if not len( args ): 45*cdf0e10cSrcweir usage() 46*cdf0e10cSrcweir sys.exit() 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir ctxLocal = uno.getComponentContext() 49*cdf0e10cSrcweir smgrLocal = ctxLocal.ServiceManager 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir resolver = smgrLocal.createInstanceWithContext( 52*cdf0e10cSrcweir "com.sun.star.bridge.UnoUrlResolver", ctxLocal ) 53*cdf0e10cSrcweir ctx = resolver.resolve( url ) 54*cdf0e10cSrcweir smgr = ctx.ServiceManager 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx ) 57*cdf0e10cSrcweir 58*cdf0e10cSrcweir cwd = systemPathToFileUrl( getcwd() ) 59*cdf0e10cSrcweir outProps = ( 60*cdf0e10cSrcweir PropertyValue( "FilterName" , 0, filterName , 0 ), 61*cdf0e10cSrcweir PropertyValue( "OutputStream",0, OutputStream(),0)) 62*cdf0e10cSrcweir inProps = PropertyValue( "Hidden" , 0 , True, 0 ), 63*cdf0e10cSrcweir for path in args: 64*cdf0e10cSrcweir try: 65*cdf0e10cSrcweir fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) ) 66*cdf0e10cSrcweir doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps) 67*cdf0e10cSrcweir 68*cdf0e10cSrcweir if not doc: 69*cdf0e10cSrcweir raise UnoException( "Couldn't open stream for unknown reason", None ) 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir doc.storeToURL("private:stream",outProps) 72*cdf0e10cSrcweir except IOException, e: 73*cdf0e10cSrcweir sys.stderr.write( "Error during conversion: " + e.Message + "\n" ) 74*cdf0e10cSrcweir retVal = 1 75*cdf0e10cSrcweir except UnoException, e: 76*cdf0e10cSrcweir sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" ) 77*cdf0e10cSrcweir retVal = 1 78*cdf0e10cSrcweir if doc: 79*cdf0e10cSrcweir doc.dispose() 80*cdf0e10cSrcweir 81*cdf0e10cSrcweir except UnoException, e: 82*cdf0e10cSrcweir sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" ) 83*cdf0e10cSrcweir retVal = 1 84*cdf0e10cSrcweir except getopt.GetoptError,e: 85*cdf0e10cSrcweir sys.stderr.write( str(e) + "\n" ) 86*cdf0e10cSrcweir usage() 87*cdf0e10cSrcweir retVal = 1 88*cdf0e10cSrcweir 89*cdf0e10cSrcweir sys.exit(retVal) 90*cdf0e10cSrcweir 91*cdf0e10cSrcweirdef usage(): 92*cdf0e10cSrcweir sys.stderr.write( "usage: ooextract.py --help |\n"+ 93*cdf0e10cSrcweir " [-c <connection-string> | --connection-string=<connection-string>\n"+ 94*cdf0e10cSrcweir " file1 file2 ...\n"+ 95*cdf0e10cSrcweir "\n" + 96*cdf0e10cSrcweir "Extracts plain text from documents and prints it to stdout.\n" + 97*cdf0e10cSrcweir "Requires an OpenOffice.org instance to be running. The script and the\n"+ 98*cdf0e10cSrcweir "running OpenOffice.org instance must be able to access the file with\n"+ 99*cdf0e10cSrcweir "by the same system path.\n" 100*cdf0e10cSrcweir "\n"+ 101*cdf0e10cSrcweir "-c <connection-string> | --connection-string=<connection-string>\n" + 102*cdf0e10cSrcweir " The connection-string part of a uno url to where the\n" + 103*cdf0e10cSrcweir " the script should connect to in order to do the conversion.\n" + 104*cdf0e10cSrcweir " The strings defaults to socket,host=localhost,port=2002\n" 105*cdf0e10cSrcweir "--html \n" 106*cdf0e10cSrcweir " Instead of the text filter, the writer html filter is used\n" 107*cdf0e10cSrcweir ) 108*cdf0e10cSrcweir 109*cdf0e10cSrcweirmain() 110