xref: /aoo41x/main/pyuno/demo/ooextract.py (revision cdf0e10c)
1import getopt,sys
2import uno
3from unohelper import Base,systemPathToFileUrl, absolutize
4from os import getcwd
5
6from com.sun.star.beans import PropertyValue
7from com.sun.star.beans.PropertyState import DIRECT_VALUE
8from com.sun.star.uno import Exception as UnoException
9from com.sun.star.io import IOException,XInputStream, XOutputStream
10
11class OutputStream( Base, XOutputStream ):
12      def __init__( self ):
13	  self.closed = 0
14
15      def closeOutput(self):
16	  self.closed = 1
17
18      def writeBytes( self, seq ):
19	  sys.stdout.write( seq.value )
20
21      def flush( self ):
22	  pass
23
24
25def main():
26    retVal = 0
27    doc = None
28
29    try:
30        opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
31        format = None
32        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
33        filterName = "Text (Encoded)"
34        for o, a in opts:
35            if o in ("-h", "--help"):
36                usage()
37                sys.exit()
38            if o in ("-c", "--connection-string" ):
39                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
40            if o == "--html":
41                filterName = "HTML (StarWriter)"
42
43        print filterName
44        if not len( args ):
45              usage()
46              sys.exit()
47
48        ctxLocal = uno.getComponentContext()
49        smgrLocal = ctxLocal.ServiceManager
50
51        resolver = smgrLocal.createInstanceWithContext(
52                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal )
53        ctx = resolver.resolve( url )
54        smgr = ctx.ServiceManager
55
56        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
57
58        cwd = systemPathToFileUrl( getcwd() )
59        outProps = (
60            PropertyValue( "FilterName" , 0, filterName , 0 ),
61            PropertyValue( "OutputStream",0, OutputStream(),0))
62        inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
63        for path in args:
64            try:
65                fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
66                doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
67
68                if not doc:
69                    raise UnoException( "Couldn't open stream for unknown reason", None )
70
71                doc.storeToURL("private:stream",outProps)
72            except IOException, e:
73                sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
74                retVal = 1
75            except UnoException, e:
76                sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
77                retVal = 1
78            if doc:
79                doc.dispose()
80
81    except UnoException, e:
82        sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
83        retVal = 1
84    except getopt.GetoptError,e:
85        sys.stderr.write( str(e) + "\n" )
86        usage()
87        retVal = 1
88
89    sys.exit(retVal)
90
91def usage():
92    sys.stderr.write( "usage: ooextract.py --help |\n"+
93                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
94                  "       file1 file2 ...\n"+
95                  "\n" +
96                  "Extracts plain text from documents and prints it to stdout.\n" +
97                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
98                  "running OpenOffice.org instance must be able to access the file with\n"+
99                  "by the same system path.\n"
100                  "\n"+
101                  "-c <connection-string> | --connection-string=<connection-string>\n" +
102                  "        The connection-string part of a uno url to where the\n" +
103                  "        the script should connect to in order to do the conversion.\n" +
104                  "        The strings defaults to socket,host=localhost,port=2002\n"
105                  "--html \n"
106                  "        Instead of the text filter, the writer html filter is used\n"
107                  )
108
109main()
110