xref: /aoo42x/main/sc/workben/celltrans/parse.py (revision 7d9fa7c3)
1cdf0e10cSrcweir#!/usr/bin/env python
2a0428e9eSAndrew Rist#**************************************************************
3a0428e9eSAndrew Rist#
4a0428e9eSAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
5a0428e9eSAndrew Rist#  or more contributor license agreements.  See the NOTICE file
6a0428e9eSAndrew Rist#  distributed with this work for additional information
7a0428e9eSAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
8a0428e9eSAndrew Rist#  to you under the Apache License, Version 2.0 (the
9a0428e9eSAndrew Rist#  "License"); you may not use this file except in compliance
10a0428e9eSAndrew Rist#  with the License.  You may obtain a copy of the License at
11a0428e9eSAndrew Rist#
12a0428e9eSAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
13a0428e9eSAndrew Rist#
14a0428e9eSAndrew Rist#  Unless required by applicable law or agreed to in writing,
15a0428e9eSAndrew Rist#  software distributed under the License is distributed on an
16a0428e9eSAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17a0428e9eSAndrew Rist#  KIND, either express or implied.  See the License for the
18a0428e9eSAndrew Rist#  specific language governing permissions and limitations
19a0428e9eSAndrew Rist#  under the License.
20a0428e9eSAndrew Rist#
21a0428e9eSAndrew Rist#**************************************************************
22cdf0e10cSrcweir
23cdf0e10cSrcweirimport sys
24cdf0e10cSrcweir
25cdf0e10cSrcweirlocaleNames = {'fr': 'French', 'hu': 'Hungarian', 'de': 'German'}
26cdf0e10cSrcweirdef getLocaleName (code):
27cdf0e10cSrcweir    global localeNames
28*7d9fa7c3SPedro Giffuni    if code in localeNames:
29cdf0e10cSrcweir        return localeNames[code]
30cdf0e10cSrcweir    else:
31cdf0e10cSrcweir        return "(unknown locale)"
32cdf0e10cSrcweir
33cdf0e10cSrcweirdef getAscii (ords):
34cdf0e10cSrcweir    ascii = ''
35cdf0e10cSrcweir    for c in ords:
36cdf0e10cSrcweir        ascii += chr(c)
37cdf0e10cSrcweir    return ascii
38cdf0e10cSrcweir
39cdf0e10cSrcweirclass LocaleData(object):
40cdf0e10cSrcweir    def __init__ (self, locale):
41cdf0e10cSrcweir        self.locale = locale
42cdf0e10cSrcweir        self.funcList = {}
43cdf0e10cSrcweir
44cdf0e10cSrcweir    def addKeywordMap (self, funcName, localeName, engName):
45*7d9fa7c3SPedro Giffuni        if funcName not in self.funcList:
46cdf0e10cSrcweir            self.funcList[funcName] = []
47cdf0e10cSrcweir
48cdf0e10cSrcweir        self.funcList[funcName].append([localeName, engName])
49cdf0e10cSrcweir
50cdf0e10cSrcweir    def getLocaleFuncVarName (self, func, pair):
51cdf0e10cSrcweir        return func.lower() + "_" + getAscii(pair[1]).lower() + "_" + self.locale
52cdf0e10cSrcweir
53cdf0e10cSrcweir    def dumpCode (self):
54cdf0e10cSrcweir        chars = ""
55cdf0e10cSrcweir
56cdf0e10cSrcweir        # locale output
57cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
58cdf0e10cSrcweir        chars += "// %s language locale (automatically generated)\n"%getLocaleName(self.locale)
59cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
60cdf0e10cSrcweir        chars += "static const Locale a" + self.locale.capitalize() + "(OUString::createFromAscii(\""
61cdf0e10cSrcweir        chars += self.locale
62cdf0e10cSrcweir        chars += "\"), OUString(), OUString());\n\n"
63cdf0e10cSrcweir
64cdf0e10cSrcweir        # pre instantiations of localized function names.
6577de67d5SPedro Giffuni        funcs = sorted(self.funcList.keys())
66cdf0e10cSrcweir        chars += "// pre instantiations of localized function names\n"
67cdf0e10cSrcweir        for func in funcs:
68cdf0e10cSrcweir            for item in self.funcList[func]:
69cdf0e10cSrcweir                chars += "static const sal_Unicode " + self.getLocaleFuncVarName(func, item) + "[] = {\n"
70cdf0e10cSrcweir                chars += "    "
71cdf0e10cSrcweir                isFirst = True
72cdf0e10cSrcweir                # Dump the UTF-16 bytes.
73cdf0e10cSrcweir                for uval in item[0]:
74cdf0e10cSrcweir                    if isFirst:
75cdf0e10cSrcweir                        isFirst = False
76cdf0e10cSrcweir                    else:
77cdf0e10cSrcweir                        chars += ", "
78cdf0e10cSrcweir                    chars += "0x%.4X"%uval
79cdf0e10cSrcweir
80cdf0e10cSrcweir                # Don't forget to null-terminate the string.
81cdf0e10cSrcweir                if not isFirst:
82cdf0e10cSrcweir                    chars += ", "
83cdf0e10cSrcweir                chars += "0x0000"
84cdf0e10cSrcweir
85cdf0e10cSrcweir                chars += "};\n"
86cdf0e10cSrcweir
87cdf0e10cSrcweir        # map item instantiations
88cdf0e10cSrcweir        chars += "\n"
89cdf0e10cSrcweir        chars += "static const TransItem p" + self.locale.capitalize() + "[] = {\n"
90cdf0e10cSrcweir        for func in funcs:
91cdf0e10cSrcweir            for item in self.funcList[func]:
92cdf0e10cSrcweir                chars += "    "
93cdf0e10cSrcweir                chars += "{%s, \"%s\", %s},\n"%(self.getLocaleFuncVarName(func, item),
94cdf0e10cSrcweir                                                getAscii(item[1]),
95cdf0e10cSrcweir                                                "oc"+func.capitalize())
96cdf0e10cSrcweir
97cdf0e10cSrcweir        chars += "    {NULL, NULL, ocNone}\n"
98cdf0e10cSrcweir        chars += "};\n\n"
99cdf0e10cSrcweir
100cdf0e10cSrcweir        # addToMap call
101cdf0e10cSrcweir        chars += "addToMap(%s, %s);\n"%(
102cdf0e10cSrcweir            "p"+self.locale.capitalize(), "a"+self.locale.capitalize())
103cdf0e10cSrcweir
104cdf0e10cSrcweir        return chars
105cdf0e10cSrcweir
106cdf0e10cSrcweirclass Parser(object):
107cdf0e10cSrcweir
108cdf0e10cSrcweir    def __init__ (self, args):
109cdf0e10cSrcweir        # default input & output files.
110cdf0e10cSrcweir        self.infile = "./keywords_utf16.txt"
111cdf0e10cSrcweir        self.outfile = "../../source/core/tool/cellkeywords.inl"
112cdf0e10cSrcweir
113cdf0e10cSrcweir        if len(args) >= 2:
114cdf0e10cSrcweir            self.infile = args[1]
115cdf0e10cSrcweir        if len(args) >= 3:
116cdf0e10cSrcweir            self.outfile = args[2]
117cdf0e10cSrcweir
118cdf0e10cSrcweir    def getDByte (self):
119cdf0e10cSrcweir        # Assume little endian.
120cdf0e10cSrcweir        bh = ord(self.bytes[self.i])
121cdf0e10cSrcweir        bl = ord(self.bytes[self.i+1])
122cdf0e10cSrcweir        dbyte = bl*256 + bh
123cdf0e10cSrcweir        self.i += 2
124cdf0e10cSrcweir        return dbyte
125cdf0e10cSrcweir
126cdf0e10cSrcweir    def parseLine (self):
127cdf0e10cSrcweir        buf = []
128cdf0e10cSrcweir        while self.i < self.size:
129cdf0e10cSrcweir            dbyte = self.getDByte()
130cdf0e10cSrcweir            if dbyte == 0x000A:
131cdf0e10cSrcweir                break
132cdf0e10cSrcweir            buf.append(dbyte)
133cdf0e10cSrcweir        return buf
134cdf0e10cSrcweir
135cdf0e10cSrcweir    def dumpBuf (self, buf, linefeed=True):
136cdf0e10cSrcweir        for item in buf:
137cdf0e10cSrcweir            sys.stdout.write(chr(item))
138cdf0e10cSrcweir        if linefeed:
139*7d9fa7c3SPedro Giffuni            print('')
140cdf0e10cSrcweir
141cdf0e10cSrcweir    def parse (self):
142cdf0e10cSrcweir
143cdf0e10cSrcweir        file = open(self.infile, 'r')
144cdf0e10cSrcweir        self.bytes = file.read()
145cdf0e10cSrcweir        file.close()
146cdf0e10cSrcweir
147cdf0e10cSrcweir        self.size = len(self.bytes)
148cdf0e10cSrcweir        self.i = 0
149cdf0e10cSrcweir
150cdf0e10cSrcweir        localeList = []  # stores an array of locale data objects.
151cdf0e10cSrcweir        funcName = None
152cdf0e10cSrcweir        word = []
153cdf0e10cSrcweir        wordPair = []
154cdf0e10cSrcweir
155cdf0e10cSrcweir        while self.i < self.size:
156cdf0e10cSrcweir            dbyte = self.getDByte()
157cdf0e10cSrcweir            if dbyte == 0xFEFF and self.i == 2:
158cdf0e10cSrcweir                # unicode signature - ignore it.
159cdf0e10cSrcweir                pass
160cdf0e10cSrcweir            elif dbyte == 0x0024:
161cdf0e10cSrcweir                # $ - locale name
162cdf0e10cSrcweir                buf = self.parseLine()
163cdf0e10cSrcweir                locale = getAscii(buf)
164cdf0e10cSrcweir                localeList.append(LocaleData(locale))
165cdf0e10cSrcweir
166cdf0e10cSrcweir            elif dbyte == 0x0040:
167cdf0e10cSrcweir                # @ - function name
168cdf0e10cSrcweir                buf = self.parseLine()
169cdf0e10cSrcweir                funcName = getAscii(buf)
170cdf0e10cSrcweir
171cdf0e10cSrcweir            elif dbyte == 0x002C:
172cdf0e10cSrcweir                # , - comma separator
173cdf0e10cSrcweir                if len(word) > 0:
174cdf0e10cSrcweir                    wordPair.append(word)
175cdf0e10cSrcweir                    word = []
176cdf0e10cSrcweir            elif dbyte == 0x000A:
177cdf0e10cSrcweir                # linefeed
178cdf0e10cSrcweir                if len(word) > 0:
179cdf0e10cSrcweir                    wordPair.append(word)
180cdf0e10cSrcweir                    word = []
181cdf0e10cSrcweir                if len(wordPair) >= 2:
182cdf0e10cSrcweir                    localeList[-1].addKeywordMap(funcName, wordPair[0], wordPair[1])
183cdf0e10cSrcweir                wordPair = []
184cdf0e10cSrcweir            elif dbyte in [0x0009, 0x0020]:
185cdf0e10cSrcweir                # whitespace - ignore it.
186cdf0e10cSrcweir                pass
187cdf0e10cSrcweir            else:
188cdf0e10cSrcweir                word.append(dbyte)
189cdf0e10cSrcweir
190cdf0e10cSrcweir        chars = "// This file has been automatically generated.  Do not hand-edit this!\n"
191cdf0e10cSrcweir        for obj in localeList:
192cdf0e10cSrcweir            chars += "\n" + obj.dumpCode()
193ea3fc10eSPedro Giffuni
194cdf0e10cSrcweir        # Write to output file.
195cdf0e10cSrcweir        file = open(self.outfile, 'w')
196cdf0e10cSrcweir        file.write(chars)
197cdf0e10cSrcweir        file.close()
198cdf0e10cSrcweir
199cdf0e10cSrcweirif __name__=='__main__':
200cdf0e10cSrcweir    parser = Parser(sys.argv)
201cdf0e10cSrcweir    parser.parse()
202