xref: /aoo42x/main/sc/workben/celltrans/parse.py (revision a0428e9e)
1cdf0e10cSrcweir#!/usr/bin/env python
2*a0428e9eSAndrew Rist#**************************************************************
3*a0428e9eSAndrew Rist#
4*a0428e9eSAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
5*a0428e9eSAndrew Rist#  or more contributor license agreements.  See the NOTICE file
6*a0428e9eSAndrew Rist#  distributed with this work for additional information
7*a0428e9eSAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
8*a0428e9eSAndrew Rist#  to you under the Apache License, Version 2.0 (the
9*a0428e9eSAndrew Rist#  "License"); you may not use this file except in compliance
10*a0428e9eSAndrew Rist#  with the License.  You may obtain a copy of the License at
11*a0428e9eSAndrew Rist#
12*a0428e9eSAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
13*a0428e9eSAndrew Rist#
14*a0428e9eSAndrew Rist#  Unless required by applicable law or agreed to in writing,
15*a0428e9eSAndrew Rist#  software distributed under the License is distributed on an
16*a0428e9eSAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17*a0428e9eSAndrew Rist#  KIND, either express or implied.  See the License for the
18*a0428e9eSAndrew Rist#  specific language governing permissions and limitations
19*a0428e9eSAndrew Rist#  under the License.
20*a0428e9eSAndrew Rist#
21*a0428e9eSAndrew Rist#**************************************************************
22cdf0e10cSrcweir
23cdf0e10cSrcweirimport sys
24cdf0e10cSrcweir
25cdf0e10cSrcweirlocaleNames = {'fr': 'French', 'hu': 'Hungarian', 'de': 'German'}
26cdf0e10cSrcweirdef getLocaleName (code):
27cdf0e10cSrcweir    global localeNames
28cdf0e10cSrcweir    if localeNames.has_key(code):
29cdf0e10cSrcweir        return localeNames[code]
30cdf0e10cSrcweir    else:
31cdf0e10cSrcweir        return "(unknown locale)"
32cdf0e10cSrcweir
33cdf0e10cSrcweirdef getAscii (ords):
34cdf0e10cSrcweir    ascii = ''
35cdf0e10cSrcweir    for c in ords:
36cdf0e10cSrcweir        ascii += chr(c)
37cdf0e10cSrcweir    return ascii
38cdf0e10cSrcweir
39cdf0e10cSrcweirclass LocaleData(object):
40cdf0e10cSrcweir    def __init__ (self, locale):
41cdf0e10cSrcweir        self.locale = locale
42cdf0e10cSrcweir        self.funcList = {}
43cdf0e10cSrcweir
44cdf0e10cSrcweir    def addKeywordMap (self, funcName, localeName, engName):
45cdf0e10cSrcweir        if not self.funcList.has_key(funcName):
46cdf0e10cSrcweir            self.funcList[funcName] = []
47cdf0e10cSrcweir
48cdf0e10cSrcweir        self.funcList[funcName].append([localeName, engName])
49cdf0e10cSrcweir
50cdf0e10cSrcweir    def getLocaleFuncVarName (self, func, pair):
51cdf0e10cSrcweir        return func.lower() + "_" + getAscii(pair[1]).lower() + "_" + self.locale
52cdf0e10cSrcweir
53cdf0e10cSrcweir    def dumpCode (self):
54cdf0e10cSrcweir        chars = ""
55cdf0e10cSrcweir
56cdf0e10cSrcweir        # locale output
57cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
58cdf0e10cSrcweir        chars += "// %s language locale (automatically generated)\n"%getLocaleName(self.locale)
59cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
60cdf0e10cSrcweir        chars += "static const Locale a" + self.locale.capitalize() + "(OUString::createFromAscii(\""
61cdf0e10cSrcweir        chars += self.locale
62cdf0e10cSrcweir        chars += "\"), OUString(), OUString());\n\n"
63cdf0e10cSrcweir
64cdf0e10cSrcweir        # pre instantiations of localized function names.
65cdf0e10cSrcweir        funcs = self.funcList.keys()
66cdf0e10cSrcweir        funcs.sort()
67cdf0e10cSrcweir        chars += "// pre instantiations of localized function names\n"
68cdf0e10cSrcweir        for func in funcs:
69cdf0e10cSrcweir            for item in self.funcList[func]:
70cdf0e10cSrcweir                chars += "static const sal_Unicode " + self.getLocaleFuncVarName(func, item) + "[] = {\n"
71cdf0e10cSrcweir                chars += "    "
72cdf0e10cSrcweir                isFirst = True
73cdf0e10cSrcweir                # Dump the UTF-16 bytes.
74cdf0e10cSrcweir                for uval in item[0]:
75cdf0e10cSrcweir                    if isFirst:
76cdf0e10cSrcweir                        isFirst = False
77cdf0e10cSrcweir                    else:
78cdf0e10cSrcweir                        chars += ", "
79cdf0e10cSrcweir                    chars += "0x%.4X"%uval
80cdf0e10cSrcweir
81cdf0e10cSrcweir                # Don't forget to null-terminate the string.
82cdf0e10cSrcweir                if not isFirst:
83cdf0e10cSrcweir                    chars += ", "
84cdf0e10cSrcweir                chars += "0x0000"
85cdf0e10cSrcweir
86cdf0e10cSrcweir                chars += "};\n"
87cdf0e10cSrcweir
88cdf0e10cSrcweir        # map item instantiations
89cdf0e10cSrcweir        chars += "\n"
90cdf0e10cSrcweir        chars += "static const TransItem p" + self.locale.capitalize() + "[] = {\n"
91cdf0e10cSrcweir        for func in funcs:
92cdf0e10cSrcweir            for item in self.funcList[func]:
93cdf0e10cSrcweir                chars += "    "
94cdf0e10cSrcweir                chars += "{%s, \"%s\", %s},\n"%(self.getLocaleFuncVarName(func, item),
95cdf0e10cSrcweir                                                getAscii(item[1]),
96cdf0e10cSrcweir                                                "oc"+func.capitalize())
97cdf0e10cSrcweir
98cdf0e10cSrcweir        chars += "    {NULL, NULL, ocNone}\n"
99cdf0e10cSrcweir        chars += "};\n\n"
100cdf0e10cSrcweir
101cdf0e10cSrcweir        # addToMap call
102cdf0e10cSrcweir        chars += "addToMap(%s, %s);\n"%(
103cdf0e10cSrcweir            "p"+self.locale.capitalize(), "a"+self.locale.capitalize())
104cdf0e10cSrcweir
105cdf0e10cSrcweir        return chars
106cdf0e10cSrcweir
107cdf0e10cSrcweirclass Parser(object):
108cdf0e10cSrcweir
109cdf0e10cSrcweir    def __init__ (self, args):
110cdf0e10cSrcweir        # default input & output files.
111cdf0e10cSrcweir        self.infile = "./keywords_utf16.txt"
112cdf0e10cSrcweir        self.outfile = "../../source/core/tool/cellkeywords.inl"
113cdf0e10cSrcweir
114cdf0e10cSrcweir        if len(args) >= 2:
115cdf0e10cSrcweir            self.infile = args[1]
116cdf0e10cSrcweir        if len(args) >= 3:
117cdf0e10cSrcweir            self.outfile = args[2]
118cdf0e10cSrcweir
119cdf0e10cSrcweir    def getDByte (self):
120cdf0e10cSrcweir        # Assume little endian.
121cdf0e10cSrcweir        bh = ord(self.bytes[self.i])
122cdf0e10cSrcweir        bl = ord(self.bytes[self.i+1])
123cdf0e10cSrcweir        dbyte = bl*256 + bh
124cdf0e10cSrcweir        self.i += 2
125cdf0e10cSrcweir        return dbyte
126cdf0e10cSrcweir
127cdf0e10cSrcweir    def parseLine (self):
128cdf0e10cSrcweir        buf = []
129cdf0e10cSrcweir        while self.i < self.size:
130cdf0e10cSrcweir            dbyte = self.getDByte()
131cdf0e10cSrcweir            if dbyte == 0x000A:
132cdf0e10cSrcweir                break
133cdf0e10cSrcweir            buf.append(dbyte)
134cdf0e10cSrcweir        return buf
135cdf0e10cSrcweir
136cdf0e10cSrcweir    def dumpBuf (self, buf, linefeed=True):
137cdf0e10cSrcweir        for item in buf:
138cdf0e10cSrcweir            sys.stdout.write(chr(item))
139cdf0e10cSrcweir        if linefeed:
140cdf0e10cSrcweir            print ''
141cdf0e10cSrcweir
142cdf0e10cSrcweir    def parse (self):
143cdf0e10cSrcweir
144cdf0e10cSrcweir        file = open(self.infile, 'r')
145cdf0e10cSrcweir        self.bytes = file.read()
146cdf0e10cSrcweir        file.close()
147cdf0e10cSrcweir
148cdf0e10cSrcweir        self.size = len(self.bytes)
149cdf0e10cSrcweir        self.i = 0
150cdf0e10cSrcweir
151cdf0e10cSrcweir        localeList = []  # stores an array of locale data objects.
152cdf0e10cSrcweir        funcName = None
153cdf0e10cSrcweir        word = []
154cdf0e10cSrcweir        wordPair = []
155cdf0e10cSrcweir
156cdf0e10cSrcweir        while self.i < self.size:
157cdf0e10cSrcweir            dbyte = self.getDByte()
158cdf0e10cSrcweir            if dbyte == 0xFEFF and self.i == 2:
159cdf0e10cSrcweir                # unicode signature - ignore it.
160cdf0e10cSrcweir                pass
161cdf0e10cSrcweir            elif dbyte == 0x0024:
162cdf0e10cSrcweir                # $ - locale name
163cdf0e10cSrcweir                buf = self.parseLine()
164cdf0e10cSrcweir                locale = getAscii(buf)
165cdf0e10cSrcweir                localeList.append(LocaleData(locale))
166cdf0e10cSrcweir
167cdf0e10cSrcweir            elif dbyte == 0x0040:
168cdf0e10cSrcweir                # @ - function name
169cdf0e10cSrcweir                buf = self.parseLine()
170cdf0e10cSrcweir                funcName = getAscii(buf)
171cdf0e10cSrcweir
172cdf0e10cSrcweir            elif dbyte == 0x002C:
173cdf0e10cSrcweir                # , - comma separator
174cdf0e10cSrcweir                if len(word) > 0:
175cdf0e10cSrcweir                    wordPair.append(word)
176cdf0e10cSrcweir                    word = []
177cdf0e10cSrcweir            elif dbyte == 0x000A:
178cdf0e10cSrcweir                # linefeed
179cdf0e10cSrcweir                if len(word) > 0:
180cdf0e10cSrcweir                    wordPair.append(word)
181cdf0e10cSrcweir                    word = []
182cdf0e10cSrcweir                if len(wordPair) >= 2:
183cdf0e10cSrcweir                    localeList[-1].addKeywordMap(funcName, wordPair[0], wordPair[1])
184cdf0e10cSrcweir                wordPair = []
185cdf0e10cSrcweir            elif dbyte in [0x0009, 0x0020]:
186cdf0e10cSrcweir                # whitespace - ignore it.
187cdf0e10cSrcweir                pass
188cdf0e10cSrcweir            else:
189cdf0e10cSrcweir                word.append(dbyte)
190cdf0e10cSrcweir
191cdf0e10cSrcweir        chars = "// This file has been automatically generated.  Do not hand-edit this!\n"
192cdf0e10cSrcweir        for obj in localeList:
193cdf0e10cSrcweir            chars += "\n" + obj.dumpCode()
194cdf0e10cSrcweir
195cdf0e10cSrcweir        # Write to output file.
196cdf0e10cSrcweir        file = open(self.outfile, 'w')
197cdf0e10cSrcweir        file.write(chars)
198cdf0e10cSrcweir        file.close()
199cdf0e10cSrcweir
200cdf0e10cSrcweirif __name__=='__main__':
201cdf0e10cSrcweir    parser = Parser(sys.argv)
202cdf0e10cSrcweir    parser.parse()
203cdf0e10cSrcweir
204