xref: /aoo42x/main/toolkit/src2xml/source/srclexer.py (revision cdf0e10c)
1import sys, os.path
2from globals import *
3import macroparser
4
5class EOF(Exception):
6    def __init__ (self):
7        pass
8
9    def str (self):
10        return "end of file"
11
12class BOF(Exception):
13    def __init__ (self):
14        pass
15
16    def str (self):
17        return "beginning of file"
18
19
20def removeHeaderQuotes (orig):
21    if len(orig) <= 2:
22        return orig
23    elif orig[0] == orig[-1] == '"':
24        return orig[1:-1]
25    elif orig[0] == '<' and orig[-1] == '>':
26        return orig[1:-1]
27
28    return orig
29
30
31def dumpTokens (tokens, toError=False):
32
33    scope = 0
34    indent = "    "
35    line = ''
36    chars = ''
37
38    for token in tokens:
39        if token in '{<':
40            if len(line) > 0:
41                chars += indent*scope + line + "\n"
42                line = ''
43            chars += indent*scope + token + "\n"
44            scope += 1
45
46        elif token in '}>':
47            if len(line) > 0:
48                chars += indent*scope + line + "\n"
49                line = ''
50            scope -= 1
51            chars += indent*scope + token
52
53        elif token == ';':
54            if len(line) > 0:
55                chars += indent*scope + line + ";\n"
56                line = ''
57            else:
58                chars += ";\n"
59        elif len(token) > 0:
60            line += token + ' '
61
62    if len(line) > 0:
63        chars += line
64    chars += "\n"
65    if toError:
66        sys.stderr.write(chars)
67    else:
68        sys.stdout.write(chars)
69
70
71class HeaderData(object):
72    def __init__ (self):
73        self.defines = {}
74        self.tokens = []
75
76
77class SrcLexer(object):
78    """Lexicographical analyzer for .src format.
79
80The role of a lexer is to parse the source file and break it into
81appropriate tokens.  Such tokens are later passed to a parser to
82build the syntax tree.
83"""
84    headerCache = {}
85
86    VISIBLE = 0
87    INVISIBLE_PRE = 1
88    INVISIBLE_POST = 2
89
90    def __init__ (self, chars, filepath = None):
91        self.filepath = filepath
92        self.parentLexer = None
93        self.chars = chars
94        self.bufsize = len(self.chars)
95
96        # TODO: use parameters for this
97        # Properties that can be copied.
98        self.headerDict = dict ()
99        self.debug = False
100        self.debugMacro = False
101        self.includeDirs = list ()
102        self.expandHeaders = True
103        self.inMacroDefine = False
104        self.stopOnHeader = False
105
106    def copyProperties (self, other):
107        """Copy properties from another instance of SrcLexer."""
108
109        # TODO: use parameters for this
110        self.headerDict = other.headerDict
111        self.debug = other.debug
112        self.debugMacro = other.debugMacro
113        self.includeDirs = other.includeDirs[:]
114        self.expandHeaders = other.expandHeaders
115        self.inMacroDefine = other.inMacroDefine
116        self.stopOnHeader = other.stopOnHeader
117
118    def init (self):
119        self.firstNonBlank = ''
120        self.token = ''
121        self.tokens = []
122        self.defines = {}
123        self.visibilityStack = []
124
125    def getTokens (self):
126        return self.tokens
127
128    def getDefines (self):
129        return self.defines
130
131    def nextPos (self, i):
132        while True:
133            i += 1
134            try:
135                c = self.chars[i]
136            except IndexError:
137                raise EOF
138
139            if ord(c) in [0x0D]:
140                continue
141            break
142        return i
143
144    def prevPos (self, i):
145        while True:
146            i -= 1
147            try:
148                c = self.chars[i]
149            except IndexError:
150                raise BOF
151
152            if ord(c) in [0x0D]:
153                continue
154            break
155        return i
156
157    def isCodeVisible (self):
158        if len(self.visibilityStack) == 0:
159            return True
160        for item in self.visibilityStack:
161            if item != SrcLexer.VISIBLE:
162                return False
163        return True
164
165    def tokenize (self):
166        self.init()
167
168        i = 0
169        while True:
170            c = self.chars[i]
171
172            if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
173                # Store the first non-blank in a line.
174                self.firstNonBlank = c
175            elif c == "\n":
176                self.firstNonBlank = ''
177
178            if c == '#':
179                i = self.pound(i)
180            elif c == '/':
181                i = self.slash(i)
182            elif c == "\n":
183                i = self.lineBreak(i)
184            elif c == '"':
185                i = self.doubleQuote(i)
186            elif c in [' ', "\t"]:
187                i = self.blank(i)
188            elif c in ";()[]{}<>,=+-*":
189                # Any outstanding single-character token.
190                i = self.anyToken(i, c)
191            elif self.isCodeVisible():
192                self.token += c
193
194            try:
195                i = self.nextPos(i)
196            except EOF:
197                break
198
199        if len(self.token):
200            self.tokens.append(self.token)
201
202        if not self.parentLexer and self.debug:
203            progress ("-"*68 + "\n")
204            progress ("All defines found in this translation unit:\n")
205            keys = self.defines.keys()
206            keys.sort()
207            for key in keys:
208                progress ("@ %s\n"%key)
209
210    def dumpTokens (self, toError=False):
211        dumpTokens(self.tokens, toError)
212
213
214    def maybeAddToken (self):
215        if len(self.token) > 0:
216            self.tokens.append(self.token)
217            self.token = ''
218
219
220    #--------------------------------------------------------------------
221    # character handlers
222
223    def blank (self, i):
224        if not self.isCodeVisible():
225            return i
226
227        self.maybeAddToken()
228        return i
229
230
231    def pound (self, i):
232
233        if self.inMacroDefine:
234            return i
235
236        if not self.firstNonBlank == '#':
237            return i
238
239        self.maybeAddToken()
240        # We are in preprocessing mode.
241
242        # Get the macro command name '#<command> .....'
243
244        command, define, buf = '', '', ''
245        firstNonBlank = False
246        while True:
247            try:
248                i = self.nextPos(i)
249                c = self.chars[i]
250                if c == '\\' and self.chars[self.nextPos(i)] == "\n":
251                    i = self.nextPos(i)
252                    continue
253            except EOF:
254                break
255
256            if c == "\n":
257                if len(buf) > 0 and len(command) == 0:
258                    command = buf
259                i = self.prevPos(i)
260                break
261            elif c in [' ', "\t"]:
262                if not firstNonBlank:
263                    # Ignore any leading blanks after the '#'.
264                    continue
265
266                if len(command) == 0:
267                    command = buf
268                    buf = ''
269                else:
270                    buf += ' '
271            elif c == '(':
272                if len(buf) > 0 and len(command) == 0:
273                    command = buf
274                buf += c
275            else:
276                if not firstNonBlank:
277                    firstNonBlank = True
278                buf += c
279
280        if command == 'define':
281            self.handleMacroDefine(buf)
282        elif command == 'include':
283            self.handleMacroInclude(buf)
284        elif command == 'ifdef':
285            defineName = buf.strip()
286            if self.defines.has_key(defineName):
287                self.visibilityStack.append(SrcLexer.VISIBLE)
288            else:
289                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
290
291        elif command == 'ifndef':
292            defineName = buf.strip()
293            if self.defines.has_key(defineName):
294                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
295            else:
296                self.visibilityStack.append(SrcLexer.VISIBLE)
297
298        elif command == 'if':
299            if self.evalCodeVisibility(buf):
300                self.visibilityStack.append(SrcLexer.VISIBLE)
301            else:
302                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
303
304        elif command == 'elif':
305            if len(self.visibilityStack) == 0:
306                raise ParseError ('')
307
308            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
309                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
310            elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
311                # Evaluate only if the current visibility is false.
312                if self.evalCodeVisibility(buf):
313                    self.visibilityStack[-1] = SrcLexer.VISIBLE
314
315        elif command == 'else':
316            if len(self.visibilityStack) == 0:
317                raise ParseError ('')
318
319            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
320                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
321            if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
322                self.visibilityStack[-1] = SrcLexer.VISIBLE
323
324        elif command == 'endif':
325            if len(self.visibilityStack) == 0:
326                raise ParseError ('')
327            self.visibilityStack.pop()
328
329        elif command == 'undef':
330            pass
331        elif command in ['error', 'pragma']:
332            pass
333        else:
334            print "'%s' '%s'"%(command, buf)
335            print self.filepath
336            sys.exit(0)
337
338        return i
339
340
341    def evalCodeVisibility (self, buf):
342        try:
343            return eval(buf)
344        except:
345            return True
346
347    def handleMacroDefine (self, buf):
348
349        mparser = macroparser.MacroParser(buf)
350        mparser.debug = self.debugMacro
351        mparser.parse()
352        macro = mparser.getMacro()
353        if macro:
354            self.defines[macro.name] = macro
355
356    def handleMacroInclude (self, buf):
357
358        # Strip excess string if any.
359        pos = buf.find(' ')
360        if pos >= 0:
361            buf = buf[:pos]
362        headerSub = removeHeaderQuotes(buf)
363
364        if not self.expandHeaders:
365            # We don't want to expand headers.  Bail out.
366            if self.debug:
367                progress ("%s ignored\n"%headerSub)
368            return
369
370        defines = {}
371        headerPath = None
372        for includeDir in self.includeDirs:
373            hpath = includeDir + '/' + headerSub
374            if os.path.isfile(hpath) and hpath != self.filepath:
375                headerPath = hpath
376                break
377
378        if not headerPath:
379            error("included header file " + headerSub + " not found\n", self.stopOnHeader)
380            return
381
382        if self.debug:
383            progress ("%s found\n"%headerPath)
384
385        if headerPath in self.headerDict:
386            if self.debug:
387                progress ("%s already included\n"%headerPath)
388            return
389
390        if SrcLexer.headerCache.has_key(headerPath):
391            if self.debug:
392                progress ("%s in cache\n"%headerPath)
393            for key in SrcLexer.headerCache[headerPath].defines.keys():
394                self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
395            return
396
397        chars = open(headerPath, 'r').read()
398        mclexer = SrcLexer(chars, headerPath)
399        mclexer.copyProperties(self)
400        mclexer.parentLexer = self
401        mclexer.tokenize()
402        hdrData = HeaderData()
403        hdrData.tokens = mclexer.getTokens()
404        headerDefines = mclexer.getDefines()
405        for key in headerDefines.keys():
406            defines[key] = headerDefines[key]
407            hdrData.defines[key] = headerDefines[key]
408
409        self.headerDict[headerPath] = True
410        SrcLexer.headerCache[headerPath] = hdrData
411
412        # Update the list of headers that have already been expaneded.
413        for key in mclexer.headerDict.keys():
414            self.headerDict[key] = True
415
416        if self.debug:
417            progress ("defines found in header %s:\n"%headerSub)
418            for key in defines.keys():
419                progress ("  '%s'\n"%key)
420
421        for key in defines.keys():
422            self.defines[key] = defines[key]
423
424
425    def slash (self, i):
426        if not self.isCodeVisible():
427            return i
428
429        if i < self.bufsize - 1 and self.chars[i+1] == '/':
430            # Parse line comment.
431            line = ''
432            i += 2
433            while i < self.bufsize:
434                c = self.chars[i]
435                if ord(c) in [0x0A, 0x0D]:
436                    return i - 1
437                line += c
438                i += 1
439            self.token = ''
440        elif i < self.bufsize - 1 and self.chars[i+1] == '*':
441            comment = ''
442            i += 2
443            while i < self.bufsize:
444                c = self.chars[i]
445                if c == '/' and self.chars[i-1] == '*':
446                    return i
447                comment += c
448                i += 1
449        else:
450            return self.anyToken(i, '/')
451
452        return i
453
454
455    def lineBreak (self, i):
456        if not self.isCodeVisible():
457            return i
458
459        self.maybeAddToken()
460
461        return i
462
463
464    def doubleQuote (self, i):
465        if not self.isCodeVisible():
466            return i
467
468        literal = ''
469        i += 1
470        while i < self.bufsize:
471            c = self.chars[i]
472            if c == '"':
473                self.tokens.append('"'+literal+'"')
474                break
475            literal += c
476            i += 1
477
478        return i
479
480
481    def anyToken (self, i, token):
482        if not self.isCodeVisible():
483            return i
484
485        self.maybeAddToken()
486        self.token = token
487        self.maybeAddToken()
488        return i
489