xref: /aoo41x/main/toolkit/src2xml/source/srclexer.py (revision 77de67d5)
1# *************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20# *************************************************************
21
22import sys, os.path
23from globals import *
24import macroparser
25
26class EOF(Exception):
27    def __init__ (self):
28        pass
29
30    def str (self):
31        return "end of file"
32
33class BOF(Exception):
34    def __init__ (self):
35        pass
36
37    def str (self):
38        return "beginning of file"
39
40
41def removeHeaderQuotes (orig):
42    if len(orig) <= 2:
43        return orig
44    elif orig[0] == orig[-1] == '"':
45        return orig[1:-1]
46    elif orig[0] == '<' and orig[-1] == '>':
47        return orig[1:-1]
48
49    return orig
50
51
52def dumpTokens (tokens, toError=False):
53
54    scope = 0
55    indent = "    "
56    line = ''
57    chars = ''
58
59    for token in tokens:
60        if token in '{<':
61            if len(line) > 0:
62                chars += indent*scope + line + "\n"
63                line = ''
64            chars += indent*scope + token + "\n"
65            scope += 1
66
67        elif token in '}>':
68            if len(line) > 0:
69                chars += indent*scope + line + "\n"
70                line = ''
71            scope -= 1
72            chars += indent*scope + token
73
74        elif token == ';':
75            if len(line) > 0:
76                chars += indent*scope + line + ";\n"
77                line = ''
78            else:
79                chars += ";\n"
80        elif len(token) > 0:
81            line += token + ' '
82
83    if len(line) > 0:
84        chars += line
85    chars += "\n"
86    if toError:
87        sys.stderr.write(chars)
88    else:
89        sys.stdout.write(chars)
90
91
92class HeaderData(object):
93    def __init__ (self):
94        self.defines = {}
95        self.tokens = []
96
97
98class SrcLexer(object):
99    """Lexicographical analyzer for .src format.
100
101The role of a lexer is to parse the source file and break it into
102appropriate tokens.  Such tokens are later passed to a parser to
103build the syntax tree.
104"""
105    headerCache = {}
106
107    VISIBLE = 0
108    INVISIBLE_PRE = 1
109    INVISIBLE_POST = 2
110
111    def __init__ (self, chars, filepath = None):
112        self.filepath = filepath
113        self.parentLexer = None
114        self.chars = chars
115        self.bufsize = len(self.chars)
116
117        # TODO: use parameters for this
118        # Properties that can be copied.
119        self.headerDict = dict ()
120        self.debug = False
121        self.debugMacro = False
122        self.includeDirs = list ()
123        self.expandHeaders = True
124        self.inMacroDefine = False
125        self.stopOnHeader = False
126
127    def copyProperties (self, other):
128        """Copy properties from another instance of SrcLexer."""
129
130        # TODO: use parameters for this
131        self.headerDict = other.headerDict
132        self.debug = other.debug
133        self.debugMacro = other.debugMacro
134        self.includeDirs = other.includeDirs[:]
135        self.expandHeaders = other.expandHeaders
136        self.inMacroDefine = other.inMacroDefine
137        self.stopOnHeader = other.stopOnHeader
138
139    def init (self):
140        self.firstNonBlank = ''
141        self.token = ''
142        self.tokens = []
143        self.defines = {}
144        self.visibilityStack = []
145
146    def getTokens (self):
147        return self.tokens
148
149    def getDefines (self):
150        return self.defines
151
152    def nextPos (self, i):
153        while True:
154            i += 1
155            try:
156                c = self.chars[i]
157            except IndexError:
158                raise EOF
159
160            if ord(c) in [0x0D]:
161                continue
162            break
163        return i
164
165    def prevPos (self, i):
166        while True:
167            i -= 1
168            try:
169                c = self.chars[i]
170            except IndexError:
171                raise BOF
172
173            if ord(c) in [0x0D]:
174                continue
175            break
176        return i
177
178    def isCodeVisible (self):
179        if len(self.visibilityStack) == 0:
180            return True
181        for item in self.visibilityStack:
182            if item != SrcLexer.VISIBLE:
183                return False
184        return True
185
186    def tokenize (self):
187        self.init()
188
189        i = 0
190        while True:
191            c = self.chars[i]
192
193            if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
194                # Store the first non-blank in a line.
195                self.firstNonBlank = c
196            elif c == "\n":
197                self.firstNonBlank = ''
198
199            if c == '#':
200                i = self.pound(i)
201            elif c == '/':
202                i = self.slash(i)
203            elif c == "\n":
204                i = self.lineBreak(i)
205            elif c == '"':
206                i = self.doubleQuote(i)
207            elif c in [' ', "\t"]:
208                i = self.blank(i)
209            elif c in ";()[]{}<>,=+-*":
210                # Any outstanding single-character token.
211                i = self.anyToken(i, c)
212            elif self.isCodeVisible():
213                self.token += c
214
215            try:
216                i = self.nextPos(i)
217            except EOF:
218                break
219
220        if len(self.token):
221            self.tokens.append(self.token)
222
223        if not self.parentLexer and self.debug:
224            progress ("-"*68 + "\n")
225            progress ("All defines found in this translation unit:\n")
226            keys = sorted(self.defines.keys())
227            for key in keys:
228                progress ("@ %s\n"%key)
229
230    def dumpTokens (self, toError=False):
231        dumpTokens(self.tokens, toError)
232
233
234    def maybeAddToken (self):
235        if len(self.token) > 0:
236            self.tokens.append(self.token)
237            self.token = ''
238
239
240    #--------------------------------------------------------------------
241    # character handlers
242
243    def blank (self, i):
244        if not self.isCodeVisible():
245            return i
246
247        self.maybeAddToken()
248        return i
249
250
251    def pound (self, i):
252
253        if self.inMacroDefine:
254            return i
255
256        if not self.firstNonBlank == '#':
257            return i
258
259        self.maybeAddToken()
260        # We are in preprocessing mode.
261
262        # Get the macro command name '#<command> .....'
263
264        command, define, buf = '', '', ''
265        firstNonBlank = False
266        while True:
267            try:
268                i = self.nextPos(i)
269                c = self.chars[i]
270                if c == '\\' and self.chars[self.nextPos(i)] == "\n":
271                    i = self.nextPos(i)
272                    continue
273            except EOF:
274                break
275
276            if c == "\n":
277                if len(buf) > 0 and len(command) == 0:
278                    command = buf
279                i = self.prevPos(i)
280                break
281            elif c in [' ', "\t"]:
282                if not firstNonBlank:
283                    # Ignore any leading blanks after the '#'.
284                    continue
285
286                if len(command) == 0:
287                    command = buf
288                    buf = ''
289                else:
290                    buf += ' '
291            elif c == '(':
292                if len(buf) > 0 and len(command) == 0:
293                    command = buf
294                buf += c
295            else:
296                if not firstNonBlank:
297                    firstNonBlank = True
298                buf += c
299
300        if command == 'define':
301            self.handleMacroDefine(buf)
302        elif command == 'include':
303            self.handleMacroInclude(buf)
304        elif command == 'ifdef':
305            defineName = buf.strip()
306            if self.defines.has_key(defineName):
307                self.visibilityStack.append(SrcLexer.VISIBLE)
308            else:
309                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
310
311        elif command == 'ifndef':
312            defineName = buf.strip()
313            if self.defines.has_key(defineName):
314                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
315            else:
316                self.visibilityStack.append(SrcLexer.VISIBLE)
317
318        elif command == 'if':
319            if self.evalCodeVisibility(buf):
320                self.visibilityStack.append(SrcLexer.VISIBLE)
321            else:
322                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
323
324        elif command == 'elif':
325            if len(self.visibilityStack) == 0:
326                raise ParseError ('')
327
328            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
329                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
330            elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
331                # Evaluate only if the current visibility is false.
332                if self.evalCodeVisibility(buf):
333                    self.visibilityStack[-1] = SrcLexer.VISIBLE
334
335        elif command == 'else':
336            if len(self.visibilityStack) == 0:
337                raise ParseError ('')
338
339            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
340                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
341            if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
342                self.visibilityStack[-1] = SrcLexer.VISIBLE
343
344        elif command == 'endif':
345            if len(self.visibilityStack) == 0:
346                raise ParseError ('')
347            self.visibilityStack.pop()
348
349        elif command == 'undef':
350            pass
351        elif command in ['error', 'pragma']:
352            pass
353        else:
354            print "'%s' '%s'"%(command, buf)
355            print self.filepath
356            sys.exit(0)
357
358        return i
359
360
361    def evalCodeVisibility (self, buf):
362        try:
363            return eval(buf)
364        except:
365            return True
366
367    def handleMacroDefine (self, buf):
368
369        mparser = macroparser.MacroParser(buf)
370        mparser.debug = self.debugMacro
371        mparser.parse()
372        macro = mparser.getMacro()
373        if macro:
374            self.defines[macro.name] = macro
375
376    def handleMacroInclude (self, buf):
377
378        # Strip excess string if any.
379        pos = buf.find(' ')
380        if pos >= 0:
381            buf = buf[:pos]
382        headerSub = removeHeaderQuotes(buf)
383
384        if not self.expandHeaders:
385            # We don't want to expand headers.  Bail out.
386            if self.debug:
387                progress ("%s ignored\n"%headerSub)
388            return
389
390        defines = {}
391        headerPath = None
392        for includeDir in self.includeDirs:
393            hpath = includeDir + '/' + headerSub
394            if os.path.isfile(hpath) and hpath != self.filepath:
395                headerPath = hpath
396                break
397
398        if not headerPath:
399            error("included header file " + headerSub + " not found\n", self.stopOnHeader)
400            return
401
402        if self.debug:
403            progress ("%s found\n"%headerPath)
404
405        if headerPath in self.headerDict:
406            if self.debug:
407                progress ("%s already included\n"%headerPath)
408            return
409
410        if SrcLexer.headerCache.has_key(headerPath):
411            if self.debug:
412                progress ("%s in cache\n"%headerPath)
413            for key in SrcLexer.headerCache[headerPath].defines.keys():
414                self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
415            return
416
417        chars = open(headerPath, 'r').read()
418        mclexer = SrcLexer(chars, headerPath)
419        mclexer.copyProperties(self)
420        mclexer.parentLexer = self
421        mclexer.tokenize()
422        hdrData = HeaderData()
423        hdrData.tokens = mclexer.getTokens()
424        headerDefines = mclexer.getDefines()
425        for key in headerDefines.keys():
426            defines[key] = headerDefines[key]
427            hdrData.defines[key] = headerDefines[key]
428
429        self.headerDict[headerPath] = True
430        SrcLexer.headerCache[headerPath] = hdrData
431
432        # Update the list of headers that have already been expaneded.
433        for key in mclexer.headerDict.keys():
434            self.headerDict[key] = True
435
436        if self.debug:
437            progress ("defines found in header %s:\n"%headerSub)
438            for key in defines.keys():
439                progress ("  '%s'\n"%key)
440
441        for key in defines.keys():
442            self.defines[key] = defines[key]
443
444
445    def slash (self, i):
446        if not self.isCodeVisible():
447            return i
448
449        if i < self.bufsize - 1 and self.chars[i+1] == '/':
450            # Parse line comment.
451            line = ''
452            i += 2
453            while i < self.bufsize:
454                c = self.chars[i]
455                if ord(c) in [0x0A, 0x0D]:
456                    return i - 1
457                line += c
458                i += 1
459            self.token = ''
460        elif i < self.bufsize - 1 and self.chars[i+1] == '*':
461            comment = ''
462            i += 2
463            while i < self.bufsize:
464                c = self.chars[i]
465                if c == '/' and self.chars[i-1] == '*':
466                    return i
467                comment += c
468                i += 1
469        else:
470            return self.anyToken(i, '/')
471
472        return i
473
474
475    def lineBreak (self, i):
476        if not self.isCodeVisible():
477            return i
478
479        self.maybeAddToken()
480
481        return i
482
483
484    def doubleQuote (self, i):
485        if not self.isCodeVisible():
486            return i
487
488        literal = ''
489        i += 1
490        while i < self.bufsize:
491            c = self.chars[i]
492            if c == '"':
493                self.tokens.append('"'+literal+'"')
494                break
495            literal += c
496            i += 1
497
498        return i
499
500
501    def anyToken (self, i, token):
502        if not self.isCodeVisible():
503            return i
504
505        self.maybeAddToken()
506        self.token = token
507        self.maybeAddToken()
508        return i
509