1*bd8ef897SAndrew Rist# ************************************************************* 2*bd8ef897SAndrew Rist# 3*bd8ef897SAndrew Rist# Licensed to the Apache Software Foundation (ASF) under one 4*bd8ef897SAndrew Rist# or more contributor license agreements. See the NOTICE file 5*bd8ef897SAndrew Rist# distributed with this work for additional information 6*bd8ef897SAndrew Rist# regarding copyright ownership. The ASF licenses this file 7*bd8ef897SAndrew Rist# to you under the Apache License, Version 2.0 (the 8*bd8ef897SAndrew Rist# "License"); you may not use this file except in compliance 9*bd8ef897SAndrew Rist# with the License. You may obtain a copy of the License at 10*bd8ef897SAndrew Rist# 11*bd8ef897SAndrew Rist# http://www.apache.org/licenses/LICENSE-2.0 12*bd8ef897SAndrew Rist# 13*bd8ef897SAndrew Rist# Unless required by applicable law or agreed to in writing, 14*bd8ef897SAndrew Rist# software distributed under the License is distributed on an 15*bd8ef897SAndrew Rist# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*bd8ef897SAndrew Rist# KIND, either express or implied. See the License for the 17*bd8ef897SAndrew Rist# specific language governing permissions and limitations 18*bd8ef897SAndrew Rist# under the License. 19*bd8ef897SAndrew Rist# 20*bd8ef897SAndrew Rist# ************************************************************* 21*bd8ef897SAndrew Rist 22cdf0e10cSrcweirimport getopt,sys 23cdf0e10cSrcweirimport uno 24cdf0e10cSrcweirfrom unohelper import Base,systemPathToFileUrl, absolutize 25cdf0e10cSrcweirfrom os import getcwd 26cdf0e10cSrcweir 27cdf0e10cSrcweirfrom com.sun.star.beans import PropertyValue 28cdf0e10cSrcweirfrom com.sun.star.beans.PropertyState import DIRECT_VALUE 29cdf0e10cSrcweirfrom com.sun.star.uno import Exception as UnoException 30cdf0e10cSrcweirfrom com.sun.star.io import IOException,XInputStream, XOutputStream 31cdf0e10cSrcweir 32cdf0e10cSrcweirclass OutputStream( Base, XOutputStream ): 33cdf0e10cSrcweir def __init__( self ): 34cdf0e10cSrcweir self.closed = 0 35cdf0e10cSrcweir 36cdf0e10cSrcweir def closeOutput(self): 37cdf0e10cSrcweir self.closed = 1 38cdf0e10cSrcweir 39cdf0e10cSrcweir def writeBytes( self, seq ): 40cdf0e10cSrcweir sys.stdout.write( seq.value ) 41cdf0e10cSrcweir 42cdf0e10cSrcweir def flush( self ): 43cdf0e10cSrcweir pass 44cdf0e10cSrcweir 45cdf0e10cSrcweir 46cdf0e10cSrcweirdef main(): 47cdf0e10cSrcweir retVal = 0 48cdf0e10cSrcweir doc = None 49cdf0e10cSrcweir 50cdf0e10cSrcweir try: 51cdf0e10cSrcweir opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"]) 52cdf0e10cSrcweir format = None 53cdf0e10cSrcweir url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" 54cdf0e10cSrcweir filterName = "Text (Encoded)" 55cdf0e10cSrcweir for o, a in opts: 56cdf0e10cSrcweir if o in ("-h", "--help"): 57cdf0e10cSrcweir usage() 58cdf0e10cSrcweir sys.exit() 59cdf0e10cSrcweir if o in ("-c", "--connection-string" ): 60cdf0e10cSrcweir url = "uno:" + a + ";urp;StarOffice.ComponentContext" 61cdf0e10cSrcweir if o == "--html": 62cdf0e10cSrcweir filterName = "HTML (StarWriter)" 63cdf0e10cSrcweir 64cdf0e10cSrcweir print filterName 65cdf0e10cSrcweir if not len( args ): 66cdf0e10cSrcweir usage() 67cdf0e10cSrcweir sys.exit() 68cdf0e10cSrcweir 69cdf0e10cSrcweir ctxLocal = uno.getComponentContext() 70cdf0e10cSrcweir smgrLocal = ctxLocal.ServiceManager 71cdf0e10cSrcweir 72cdf0e10cSrcweir resolver = smgrLocal.createInstanceWithContext( 73cdf0e10cSrcweir "com.sun.star.bridge.UnoUrlResolver", ctxLocal ) 74cdf0e10cSrcweir ctx = resolver.resolve( url ) 75cdf0e10cSrcweir smgr = ctx.ServiceManager 76cdf0e10cSrcweir 77cdf0e10cSrcweir desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx ) 78cdf0e10cSrcweir 79cdf0e10cSrcweir cwd = systemPathToFileUrl( getcwd() ) 80cdf0e10cSrcweir outProps = ( 81cdf0e10cSrcweir PropertyValue( "FilterName" , 0, filterName , 0 ), 82cdf0e10cSrcweir PropertyValue( "OutputStream",0, OutputStream(),0)) 83cdf0e10cSrcweir inProps = PropertyValue( "Hidden" , 0 , True, 0 ), 84cdf0e10cSrcweir for path in args: 85cdf0e10cSrcweir try: 86cdf0e10cSrcweir fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) ) 87cdf0e10cSrcweir doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps) 88cdf0e10cSrcweir 89cdf0e10cSrcweir if not doc: 90cdf0e10cSrcweir raise UnoException( "Couldn't open stream for unknown reason", None ) 91cdf0e10cSrcweir 92cdf0e10cSrcweir doc.storeToURL("private:stream",outProps) 93cdf0e10cSrcweir except IOException, e: 94cdf0e10cSrcweir sys.stderr.write( "Error during conversion: " + e.Message + "\n" ) 95cdf0e10cSrcweir retVal = 1 96cdf0e10cSrcweir except UnoException, e: 97cdf0e10cSrcweir sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" ) 98cdf0e10cSrcweir retVal = 1 99cdf0e10cSrcweir if doc: 100cdf0e10cSrcweir doc.dispose() 101cdf0e10cSrcweir 102cdf0e10cSrcweir except UnoException, e: 103cdf0e10cSrcweir sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" ) 104cdf0e10cSrcweir retVal = 1 105cdf0e10cSrcweir except getopt.GetoptError,e: 106cdf0e10cSrcweir sys.stderr.write( str(e) + "\n" ) 107cdf0e10cSrcweir usage() 108cdf0e10cSrcweir retVal = 1 109cdf0e10cSrcweir 110cdf0e10cSrcweir sys.exit(retVal) 111cdf0e10cSrcweir 112cdf0e10cSrcweirdef usage(): 113cdf0e10cSrcweir sys.stderr.write( "usage: ooextract.py --help |\n"+ 114cdf0e10cSrcweir " [-c <connection-string> | --connection-string=<connection-string>\n"+ 115cdf0e10cSrcweir " file1 file2 ...\n"+ 116cdf0e10cSrcweir "\n" + 117cdf0e10cSrcweir "Extracts plain text from documents and prints it to stdout.\n" + 118cdf0e10cSrcweir "Requires an OpenOffice.org instance to be running. The script and the\n"+ 119cdf0e10cSrcweir "running OpenOffice.org instance must be able to access the file with\n"+ 120cdf0e10cSrcweir "by the same system path.\n" 121cdf0e10cSrcweir "\n"+ 122cdf0e10cSrcweir "-c <connection-string> | --connection-string=<connection-string>\n" + 123cdf0e10cSrcweir " The connection-string part of a uno url to where the\n" + 124cdf0e10cSrcweir " the script should connect to in order to do the conversion.\n" + 125cdf0e10cSrcweir " The strings defaults to socket,host=localhost,port=2002\n" 126cdf0e10cSrcweir "--html \n" 127cdf0e10cSrcweir " Instead of the text filter, the writer html filter is used\n" 128cdf0e10cSrcweir ) 129cdf0e10cSrcweir 130cdf0e10cSrcweirmain() 131