xref: /aoo4110/main/pyuno/demo/ooextract.py (revision b1cdbd2c)
1*b1cdbd2cSJim Jagielski# *************************************************************
2*b1cdbd2cSJim Jagielski#
3*b1cdbd2cSJim Jagielski#  Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski#  or more contributor license agreements.  See the NOTICE file
5*b1cdbd2cSJim Jagielski#  distributed with this work for additional information
6*b1cdbd2cSJim Jagielski#  regarding copyright ownership.  The ASF licenses this file
7*b1cdbd2cSJim Jagielski#  to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski#  "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski#  with the License.  You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski#
11*b1cdbd2cSJim Jagielski#    http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski#
13*b1cdbd2cSJim Jagielski#  Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski#  software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski#  KIND, either express or implied.  See the License for the
17*b1cdbd2cSJim Jagielski#  specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski#  under the License.
19*b1cdbd2cSJim Jagielski#
20*b1cdbd2cSJim Jagielski# *************************************************************
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielskiimport getopt,sys
23*b1cdbd2cSJim Jagielskiimport uno
24*b1cdbd2cSJim Jagielskifrom unohelper import Base,systemPathToFileUrl, absolutize
25*b1cdbd2cSJim Jagielskifrom os import getcwd
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielskifrom com.sun.star.beans import PropertyValue
28*b1cdbd2cSJim Jagielskifrom com.sun.star.beans.PropertyState import DIRECT_VALUE
29*b1cdbd2cSJim Jagielskifrom com.sun.star.uno import Exception as UnoException
30*b1cdbd2cSJim Jagielskifrom com.sun.star.io import IOException,XInputStream, XOutputStream
31*b1cdbd2cSJim Jagielski
32*b1cdbd2cSJim Jagielskiclass OutputStream( Base, XOutputStream ):
33*b1cdbd2cSJim Jagielski    def __init__( self ):
34*b1cdbd2cSJim Jagielski        self.closed = 0
35*b1cdbd2cSJim Jagielski
36*b1cdbd2cSJim Jagielski    def closeOutput(self):
37*b1cdbd2cSJim Jagielski        self.closed = 1
38*b1cdbd2cSJim Jagielski
39*b1cdbd2cSJim Jagielski    def writeBytes( self, seq ):
40*b1cdbd2cSJim Jagielski        sys.stdout.write( seq.value )
41*b1cdbd2cSJim Jagielski
42*b1cdbd2cSJim Jagielski    def flush( self ):
43*b1cdbd2cSJim Jagielski        pass
44*b1cdbd2cSJim Jagielski
45*b1cdbd2cSJim Jagielski
46*b1cdbd2cSJim Jagielskidef main():
47*b1cdbd2cSJim Jagielski    retVal = 0
48*b1cdbd2cSJim Jagielski    doc = None
49*b1cdbd2cSJim Jagielski
50*b1cdbd2cSJim Jagielski    try:
51*b1cdbd2cSJim Jagielski        opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
52*b1cdbd2cSJim Jagielski        format = None
53*b1cdbd2cSJim Jagielski        url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
54*b1cdbd2cSJim Jagielski        filterName = "Text (Encoded)"
55*b1cdbd2cSJim Jagielski        for o, a in opts:
56*b1cdbd2cSJim Jagielski            if o in ("-h", "--help"):
57*b1cdbd2cSJim Jagielski                usage()
58*b1cdbd2cSJim Jagielski                sys.exit()
59*b1cdbd2cSJim Jagielski            if o in ("-c", "--connection-string" ):
60*b1cdbd2cSJim Jagielski                url = "uno:" + a + ";urp;StarOffice.ComponentContext"
61*b1cdbd2cSJim Jagielski            if o == "--html":
62*b1cdbd2cSJim Jagielski                filterName = "HTML (StarWriter)"
63*b1cdbd2cSJim Jagielski
64*b1cdbd2cSJim Jagielski        print(filterName)
65*b1cdbd2cSJim Jagielski        if not len( args ):
66*b1cdbd2cSJim Jagielski            usage()
67*b1cdbd2cSJim Jagielski            sys.exit()
68*b1cdbd2cSJim Jagielski
69*b1cdbd2cSJim Jagielski        ctxLocal = uno.getComponentContext()
70*b1cdbd2cSJim Jagielski        smgrLocal = ctxLocal.ServiceManager
71*b1cdbd2cSJim Jagielski
72*b1cdbd2cSJim Jagielski        resolver = smgrLocal.createInstanceWithContext(
73*b1cdbd2cSJim Jagielski                 "com.sun.star.bridge.UnoUrlResolver", ctxLocal )
74*b1cdbd2cSJim Jagielski        ctx = resolver.resolve( url )
75*b1cdbd2cSJim Jagielski        smgr = ctx.ServiceManager
76*b1cdbd2cSJim Jagielski
77*b1cdbd2cSJim Jagielski        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
78*b1cdbd2cSJim Jagielski
79*b1cdbd2cSJim Jagielski        cwd = systemPathToFileUrl( getcwd() )
80*b1cdbd2cSJim Jagielski        outProps = (
81*b1cdbd2cSJim Jagielski            PropertyValue( "FilterName" , 0, filterName , 0 ),
82*b1cdbd2cSJim Jagielski            PropertyValue( "OutputStream",0, OutputStream(),0))
83*b1cdbd2cSJim Jagielski        inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
84*b1cdbd2cSJim Jagielski        for path in args:
85*b1cdbd2cSJim Jagielski            try:
86*b1cdbd2cSJim Jagielski                fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
87*b1cdbd2cSJim Jagielski                doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
88*b1cdbd2cSJim Jagielski
89*b1cdbd2cSJim Jagielski                if not doc:
90*b1cdbd2cSJim Jagielski                    raise UnoException( "Couldn't open stream for unknown reason", None )
91*b1cdbd2cSJim Jagielski
92*b1cdbd2cSJim Jagielski                doc.storeToURL("private:stream",outProps)
93*b1cdbd2cSJim Jagielski            except IOException as e:
94*b1cdbd2cSJim Jagielski                sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
95*b1cdbd2cSJim Jagielski                retVal = 1
96*b1cdbd2cSJim Jagielski            except UnoException as e:
97*b1cdbd2cSJim Jagielski                sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
98*b1cdbd2cSJim Jagielski                retVal = 1
99*b1cdbd2cSJim Jagielski            if doc:
100*b1cdbd2cSJim Jagielski                doc.dispose()
101*b1cdbd2cSJim Jagielski
102*b1cdbd2cSJim Jagielski    except UnoException as e:
103*b1cdbd2cSJim Jagielski        sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
104*b1cdbd2cSJim Jagielski        retVal = 1
105*b1cdbd2cSJim Jagielski    except getopt.GetoptError as e:
106*b1cdbd2cSJim Jagielski        sys.stderr.write( str(e) + "\n" )
107*b1cdbd2cSJim Jagielski        usage()
108*b1cdbd2cSJim Jagielski        retVal = 1
109*b1cdbd2cSJim Jagielski
110*b1cdbd2cSJim Jagielski    sys.exit(retVal)
111*b1cdbd2cSJim Jagielski
112*b1cdbd2cSJim Jagielskidef usage():
113*b1cdbd2cSJim Jagielski    sys.stderr.write( "usage: ooextract.py --help |\n"+
114*b1cdbd2cSJim Jagielski                  "       [-c <connection-string> | --connection-string=<connection-string>\n"+
115*b1cdbd2cSJim Jagielski                  "       file1 file2 ...\n"+
116*b1cdbd2cSJim Jagielski                  "\n" +
117*b1cdbd2cSJim Jagielski                  "Extracts plain text from documents and prints it to stdout.\n" +
118*b1cdbd2cSJim Jagielski                  "Requires an OpenOffice.org instance to be running. The script and the\n"+
119*b1cdbd2cSJim Jagielski                  "running OpenOffice.org instance must be able to access the file with\n"+
120*b1cdbd2cSJim Jagielski                  "by the same system path.\n"
121*b1cdbd2cSJim Jagielski                  "\n"+
122*b1cdbd2cSJim Jagielski                  "-c <connection-string> | --connection-string=<connection-string>\n" +
123*b1cdbd2cSJim Jagielski                  "        The connection-string part of a uno url to where the\n" +
124*b1cdbd2cSJim Jagielski                  "        the script should connect to in order to do the conversion.\n" +
125*b1cdbd2cSJim Jagielski                  "        The strings defaults to socket,host=localhost,port=2002\n"
126*b1cdbd2cSJim Jagielski                  "--html \n"
127*b1cdbd2cSJim Jagielski                  "        Instead of the text filter, the writer html filter is used\n"
128*b1cdbd2cSJim Jagielski                  )
129*b1cdbd2cSJim Jagielski
130*b1cdbd2cSJim Jagielskimain()
131