xref: /trunk/main/toolkit/src2xml/source/srclexer.py (revision ae54856b)
1# *************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20# *************************************************************
21
22import sys, os.path
23from globals import *
24import macroparser
25
26class EOF(Exception):
27    def __init__ (self):
28        pass
29
30    def str (self):
31        return "end of file"
32
33class BOF(Exception):
34    def __init__ (self):
35        pass
36
37    def str (self):
38        return "beginning of file"
39
40
41def removeHeaderQuotes (orig):
42    if len(orig) <= 2:
43        return orig
44    elif orig[0] == orig[-1] == '"':
45        return orig[1:-1]
46    elif orig[0] == '<' and orig[-1] == '>':
47        return orig[1:-1]
48
49    return orig
50
51
52def dumpTokens (tokens, toError=False):
53
54    scope = 0
55    indent = "    "
56    line = ''
57    chars = ''
58
59    for token in tokens:
60        if token in '{<':
61            if len(line) > 0:
62                chars += indent*scope + line + "\n"
63                line = ''
64            chars += indent*scope + token + "\n"
65            scope += 1
66
67        elif token in '}>':
68            if len(line) > 0:
69                chars += indent*scope + line + "\n"
70                line = ''
71            scope -= 1
72            chars += indent*scope + token
73
74        elif token == ';':
75            if len(line) > 0:
76                chars += indent*scope + line + ";\n"
77                line = ''
78            else:
79                chars += ";\n"
80        elif len(token) > 0:
81            line += token + ' '
82
83    if len(line) > 0:
84        chars += line
85    chars += "\n"
86    if toError:
87        sys.stderr.write(chars)
88    else:
89        sys.stdout.write(chars)
90
91
92class HeaderData(object):
93    def __init__ (self):
94        self.defines = {}
95        self.tokens = []
96
97
98class SrcLexer(object):
99    """Lexicographical analyzer for .src format.
100
101The role of a lexer is to parse the source file and break it into
102appropriate tokens.  Such tokens are later passed to a parser to
103build the syntax tree.
104"""
105    headerCache = {}
106
107    VISIBLE = 0
108    INVISIBLE_PRE = 1
109    INVISIBLE_POST = 2
110
111    def __init__ (self, chars, filepath = None):
112        self.filepath = filepath
113        self.parentLexer = None
114        self.chars = chars
115        self.bufsize = len(self.chars)
116
117        # TODO: use parameters for this
118        # Properties that can be copied.
119        self.headerDict = dict ()
120        self.debug = False
121        self.debugMacro = False
122        self.includeDirs = list ()
123        self.expandHeaders = True
124        self.inMacroDefine = False
125        self.stopOnHeader = False
126
127    def copyProperties (self, other):
128        """Copy properties from another instance of SrcLexer."""
129
130        # TODO: use parameters for this
131        self.headerDict = other.headerDict
132        self.debug = other.debug
133        self.debugMacro = other.debugMacro
134        self.includeDirs = other.includeDirs[:]
135        self.expandHeaders = other.expandHeaders
136        self.inMacroDefine = other.inMacroDefine
137        self.stopOnHeader = other.stopOnHeader
138
139    def init (self):
140        self.firstNonBlank = ''
141        self.token = ''
142        self.tokens = []
143        self.defines = {}
144        self.visibilityStack = []
145
146    def getTokens (self):
147        return self.tokens
148
149    def getDefines (self):
150        return self.defines
151
152    def nextPos (self, i):
153        while True:
154            i += 1
155            try:
156                c = self.chars[i]
157            except IndexError:
158                raise EOF
159
160            if ord(c) in [0x0D]:
161                continue
162            break
163        return i
164
165    def prevPos (self, i):
166        while True:
167            i -= 1
168            try:
169                c = self.chars[i]
170            except IndexError:
171                raise BOF
172
173            if ord(c) in [0x0D]:
174                continue
175            break
176        return i
177
178    def isCodeVisible (self):
179        if len(self.visibilityStack) == 0:
180            return True
181        for item in self.visibilityStack:
182            if item != SrcLexer.VISIBLE:
183                return False
184        return True
185
186    def tokenize (self):
187        self.init()
188
189        i = 0
190        while True:
191            c = self.chars[i]
192
193            if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
194                # Store the first non-blank in a line.
195                self.firstNonBlank = c
196            elif c == "\n":
197                self.firstNonBlank = ''
198
199            if c == '#':
200                i = self.pound(i)
201            elif c == '/':
202                i = self.slash(i)
203            elif c == "\n":
204                i = self.lineBreak(i)
205            elif c == '"':
206                i = self.doubleQuote(i)
207            elif c in [' ', "\t"]:
208                i = self.blank(i)
209            elif c in ";()[]{}<>,=+-*":
210                # Any outstanding single-character token.
211                i = self.anyToken(i, c)
212            elif self.isCodeVisible():
213                self.token += c
214
215            try:
216                i = self.nextPos(i)
217            except EOF:
218                break
219
220        if len(self.token):
221            self.tokens.append(self.token)
222
223        if not self.parentLexer and self.debug:
224            progress ("-"*68 + "\n")
225            progress ("All defines found in this translation unit:\n")
226            keys = self.defines.keys()
227            keys.sort()
228            for key in keys:
229                progress ("@ %s\n"%key)
230
231    def dumpTokens (self, toError=False):
232        dumpTokens(self.tokens, toError)
233
234
235    def maybeAddToken (self):
236        if len(self.token) > 0:
237            self.tokens.append(self.token)
238            self.token = ''
239
240
241    #--------------------------------------------------------------------
242    # character handlers
243
244    def blank (self, i):
245        if not self.isCodeVisible():
246            return i
247
248        self.maybeAddToken()
249        return i
250
251
252    def pound (self, i):
253
254        if self.inMacroDefine:
255            return i
256
257        if not self.firstNonBlank == '#':
258            return i
259
260        self.maybeAddToken()
261        # We are in preprocessing mode.
262
263        # Get the macro command name '#<command> .....'
264
265        command, define, buf = '', '', ''
266        firstNonBlank = False
267        while True:
268            try:
269                i = self.nextPos(i)
270                c = self.chars[i]
271                if c == '\\' and self.chars[self.nextPos(i)] == "\n":
272                    i = self.nextPos(i)
273                    continue
274            except EOF:
275                break
276
277            if c == "\n":
278                if len(buf) > 0 and len(command) == 0:
279                    command = buf
280                i = self.prevPos(i)
281                break
282            elif c in [' ', "\t"]:
283                if not firstNonBlank:
284                    # Ignore any leading blanks after the '#'.
285                    continue
286
287                if len(command) == 0:
288                    command = buf
289                    buf = ''
290                else:
291                    buf += ' '
292            elif c == '(':
293                if len(buf) > 0 and len(command) == 0:
294                    command = buf
295                buf += c
296            else:
297                if not firstNonBlank:
298                    firstNonBlank = True
299                buf += c
300
301        if command == 'define':
302            self.handleMacroDefine(buf)
303        elif command == 'include':
304            self.handleMacroInclude(buf)
305        elif command == 'ifdef':
306            defineName = buf.strip()
307            if self.defines.has_key(defineName):
308                self.visibilityStack.append(SrcLexer.VISIBLE)
309            else:
310                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
311
312        elif command == 'ifndef':
313            defineName = buf.strip()
314            if self.defines.has_key(defineName):
315                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
316            else:
317                self.visibilityStack.append(SrcLexer.VISIBLE)
318
319        elif command == 'if':
320            if self.evalCodeVisibility(buf):
321                self.visibilityStack.append(SrcLexer.VISIBLE)
322            else:
323                self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
324
325        elif command == 'elif':
326            if len(self.visibilityStack) == 0:
327                raise ParseError ('')
328
329            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
330                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
331            elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
332                # Evaluate only if the current visibility is false.
333                if self.evalCodeVisibility(buf):
334                    self.visibilityStack[-1] = SrcLexer.VISIBLE
335
336        elif command == 'else':
337            if len(self.visibilityStack) == 0:
338                raise ParseError ('')
339
340            if self.visibilityStack[-1] == SrcLexer.VISIBLE:
341                self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
342            if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
343                self.visibilityStack[-1] = SrcLexer.VISIBLE
344
345        elif command == 'endif':
346            if len(self.visibilityStack) == 0:
347                raise ParseError ('')
348            self.visibilityStack.pop()
349
350        elif command == 'undef':
351            pass
352        elif command in ['error', 'pragma']:
353            pass
354        else:
355            print "'%s' '%s'"%(command, buf)
356            print self.filepath
357            sys.exit(0)
358
359        return i
360
361
362    def evalCodeVisibility (self, buf):
363        try:
364            return eval(buf)
365        except:
366            return True
367
368    def handleMacroDefine (self, buf):
369
370        mparser = macroparser.MacroParser(buf)
371        mparser.debug = self.debugMacro
372        mparser.parse()
373        macro = mparser.getMacro()
374        if macro:
375            self.defines[macro.name] = macro
376
377    def handleMacroInclude (self, buf):
378
379        # Strip excess string if any.
380        pos = buf.find(' ')
381        if pos >= 0:
382            buf = buf[:pos]
383        headerSub = removeHeaderQuotes(buf)
384
385        if not self.expandHeaders:
386            # We don't want to expand headers.  Bail out.
387            if self.debug:
388                progress ("%s ignored\n"%headerSub)
389            return
390
391        defines = {}
392        headerPath = None
393        for includeDir in self.includeDirs:
394            hpath = includeDir + '/' + headerSub
395            if os.path.isfile(hpath) and hpath != self.filepath:
396                headerPath = hpath
397                break
398
399        if not headerPath:
400            error("included header file " + headerSub + " not found\n", self.stopOnHeader)
401            return
402
403        if self.debug:
404            progress ("%s found\n"%headerPath)
405
406        if headerPath in self.headerDict:
407            if self.debug:
408                progress ("%s already included\n"%headerPath)
409            return
410
411        if SrcLexer.headerCache.has_key(headerPath):
412            if self.debug:
413                progress ("%s in cache\n"%headerPath)
414            for key in SrcLexer.headerCache[headerPath].defines.keys():
415                self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
416            return
417
418        chars = open(headerPath, 'r').read()
419        mclexer = SrcLexer(chars, headerPath)
420        mclexer.copyProperties(self)
421        mclexer.parentLexer = self
422        mclexer.tokenize()
423        hdrData = HeaderData()
424        hdrData.tokens = mclexer.getTokens()
425        headerDefines = mclexer.getDefines()
426        for key in headerDefines.keys():
427            defines[key] = headerDefines[key]
428            hdrData.defines[key] = headerDefines[key]
429
430        self.headerDict[headerPath] = True
431        SrcLexer.headerCache[headerPath] = hdrData
432
433        # Update the list of headers that have already been expaneded.
434        for key in mclexer.headerDict.keys():
435            self.headerDict[key] = True
436
437        if self.debug:
438            progress ("defines found in header %s:\n"%headerSub)
439            for key in defines.keys():
440                progress ("  '%s'\n"%key)
441
442        for key in defines.keys():
443            self.defines[key] = defines[key]
444
445
446    def slash (self, i):
447        if not self.isCodeVisible():
448            return i
449
450        if i < self.bufsize - 1 and self.chars[i+1] == '/':
451            # Parse line comment.
452            line = ''
453            i += 2
454            while i < self.bufsize:
455                c = self.chars[i]
456                if ord(c) in [0x0A, 0x0D]:
457                    return i - 1
458                line += c
459                i += 1
460            self.token = ''
461        elif i < self.bufsize - 1 and self.chars[i+1] == '*':
462            comment = ''
463            i += 2
464            while i < self.bufsize:
465                c = self.chars[i]
466                if c == '/' and self.chars[i-1] == '*':
467                    return i
468                comment += c
469                i += 1
470        else:
471            return self.anyToken(i, '/')
472
473        return i
474
475
476    def lineBreak (self, i):
477        if not self.isCodeVisible():
478            return i
479
480        self.maybeAddToken()
481
482        return i
483
484
485    def doubleQuote (self, i):
486        if not self.isCodeVisible():
487            return i
488
489        literal = ''
490        i += 1
491        while i < self.bufsize:
492            c = self.chars[i]
493            if c == '"':
494                self.tokens.append('"'+literal+'"')
495                break
496            literal += c
497            i += 1
498
499        return i
500
501
502    def anyToken (self, i, token):
503        if not self.isCodeVisible():
504            return i
505
506        self.maybeAddToken()
507        self.token = token
508        self.maybeAddToken()
509        return i
510