1# ************************************************************* 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20# ************************************************************* 21 22import sys, os.path 23from globals import * 24import macroparser 25 26class EOF(Exception): 27 def __init__ (self): 28 pass 29 30 def str (self): 31 return "end of file" 32 33class BOF(Exception): 34 def __init__ (self): 35 pass 36 37 def str (self): 38 return "beginning of file" 39 40 41def removeHeaderQuotes (orig): 42 if len(orig) <= 2: 43 return orig 44 elif orig[0] == orig[-1] == '"': 45 return orig[1:-1] 46 elif orig[0] == '<' and orig[-1] == '>': 47 return orig[1:-1] 48 49 return orig 50 51 52def dumpTokens (tokens, toError=False): 53 54 scope = 0 55 indent = " " 56 line = '' 57 chars = '' 58 59 for token in tokens: 60 if token in '{<': 61 if len(line) > 0: 62 chars += indent*scope + line + "\n" 63 line = '' 64 chars += indent*scope + token + "\n" 65 scope += 1 66 67 elif token in '}>': 68 if len(line) > 0: 69 chars += indent*scope + line + "\n" 70 line = '' 71 scope -= 1 72 chars += indent*scope + token 73 74 elif token == ';': 75 if len(line) > 0: 76 chars += indent*scope + line + ";\n" 77 line = '' 78 else: 79 chars += ";\n" 80 elif len(token) > 0: 81 line += token + ' ' 82 83 if len(line) > 0: 84 chars += line 85 chars += "\n" 86 if toError: 87 sys.stderr.write(chars) 88 else: 89 sys.stdout.write(chars) 90 91 92class HeaderData(object): 93 def __init__ (self): 94 self.defines = {} 95 self.tokens = [] 96 97 98class SrcLexer(object): 99 """Lexicographical analyzer for .src format. 100 101The role of a lexer is to parse the source file and break it into 102appropriate tokens. Such tokens are later passed to a parser to 103build the syntax tree. 104""" 105 headerCache = {} 106 107 VISIBLE = 0 108 INVISIBLE_PRE = 1 109 INVISIBLE_POST = 2 110 111 def __init__ (self, chars, filepath = None): 112 self.filepath = filepath 113 self.parentLexer = None 114 self.chars = chars 115 self.bufsize = len(self.chars) 116 117 # TODO: use parameters for this 118 # Properties that can be copied. 119 self.headerDict = dict () 120 self.debug = False 121 self.debugMacro = False 122 self.includeDirs = list () 123 self.expandHeaders = True 124 self.inMacroDefine = False 125 self.stopOnHeader = False 126 127 def copyProperties (self, other): 128 """Copy properties from another instance of SrcLexer.""" 129 130 # TODO: use parameters for this 131 self.headerDict = other.headerDict 132 self.debug = other.debug 133 self.debugMacro = other.debugMacro 134 self.includeDirs = other.includeDirs[:] 135 self.expandHeaders = other.expandHeaders 136 self.inMacroDefine = other.inMacroDefine 137 self.stopOnHeader = other.stopOnHeader 138 139 def init (self): 140 self.firstNonBlank = '' 141 self.token = '' 142 self.tokens = [] 143 self.defines = {} 144 self.visibilityStack = [] 145 146 def getTokens (self): 147 return self.tokens 148 149 def getDefines (self): 150 return self.defines 151 152 def nextPos (self, i): 153 while True: 154 i += 1 155 try: 156 c = self.chars[i] 157 except IndexError: 158 raise EOF 159 160 if ord(c) in [0x0D]: 161 continue 162 break 163 return i 164 165 def prevPos (self, i): 166 while True: 167 i -= 1 168 try: 169 c = self.chars[i] 170 except IndexError: 171 raise BOF 172 173 if ord(c) in [0x0D]: 174 continue 175 break 176 return i 177 178 def isCodeVisible (self): 179 if len(self.visibilityStack) == 0: 180 return True 181 for item in self.visibilityStack: 182 if item != SrcLexer.VISIBLE: 183 return False 184 return True 185 186 def tokenize (self): 187 self.init() 188 189 i = 0 190 while True: 191 c = self.chars[i] 192 193 if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]: 194 # Store the first non-blank in a line. 195 self.firstNonBlank = c 196 elif c == "\n": 197 self.firstNonBlank = '' 198 199 if c == '#': 200 i = self.pound(i) 201 elif c == '/': 202 i = self.slash(i) 203 elif c == "\n": 204 i = self.lineBreak(i) 205 elif c == '"': 206 i = self.doubleQuote(i) 207 elif c in [' ', "\t"]: 208 i = self.blank(i) 209 elif c in ";()[]{}<>,=+-*": 210 # Any outstanding single-character token. 211 i = self.anyToken(i, c) 212 elif self.isCodeVisible(): 213 self.token += c 214 215 try: 216 i = self.nextPos(i) 217 except EOF: 218 break 219 220 if len(self.token): 221 self.tokens.append(self.token) 222 223 if not self.parentLexer and self.debug: 224 progress ("-"*68 + "\n") 225 progress ("All defines found in this translation unit:\n") 226 keys = sorted(self.defines.keys()) 227 for key in keys: 228 progress ("@ %s\n"%key) 229 230 def dumpTokens (self, toError=False): 231 dumpTokens(self.tokens, toError) 232 233 234 def maybeAddToken (self): 235 if len(self.token) > 0: 236 self.tokens.append(self.token) 237 self.token = '' 238 239 240 #-------------------------------------------------------------------- 241 # character handlers 242 243 def blank (self, i): 244 if not self.isCodeVisible(): 245 return i 246 247 self.maybeAddToken() 248 return i 249 250 251 def pound (self, i): 252 253 if self.inMacroDefine: 254 return i 255 256 if not self.firstNonBlank == '#': 257 return i 258 259 self.maybeAddToken() 260 # We are in preprocessing mode. 261 262 # Get the macro command name '#<command> .....' 263 264 command, define, buf = '', '', '' 265 firstNonBlank = False 266 while True: 267 try: 268 i = self.nextPos(i) 269 c = self.chars[i] 270 if c == '\\' and self.chars[self.nextPos(i)] == "\n": 271 i = self.nextPos(i) 272 continue 273 except EOF: 274 break 275 276 if c == "\n": 277 if len(buf) > 0 and len(command) == 0: 278 command = buf 279 i = self.prevPos(i) 280 break 281 elif c in [' ', "\t"]: 282 if not firstNonBlank: 283 # Ignore any leading blanks after the '#'. 284 continue 285 286 if len(command) == 0: 287 command = buf 288 buf = '' 289 else: 290 buf += ' ' 291 elif c == '(': 292 if len(buf) > 0 and len(command) == 0: 293 command = buf 294 buf += c 295 else: 296 if not firstNonBlank: 297 firstNonBlank = True 298 buf += c 299 300 if command == 'define': 301 self.handleMacroDefine(buf) 302 elif command == 'include': 303 self.handleMacroInclude(buf) 304 elif command == 'ifdef': 305 defineName = buf.strip() 306 if self.defines.has_key(defineName): 307 self.visibilityStack.append(SrcLexer.VISIBLE) 308 else: 309 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 310 311 elif command == 'ifndef': 312 defineName = buf.strip() 313 if self.defines.has_key(defineName): 314 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 315 else: 316 self.visibilityStack.append(SrcLexer.VISIBLE) 317 318 elif command == 'if': 319 if self.evalCodeVisibility(buf): 320 self.visibilityStack.append(SrcLexer.VISIBLE) 321 else: 322 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 323 324 elif command == 'elif': 325 if len(self.visibilityStack) == 0: 326 raise ParseError ('') 327 328 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 329 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 330 elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 331 # Evaluate only if the current visibility is false. 332 if self.evalCodeVisibility(buf): 333 self.visibilityStack[-1] = SrcLexer.VISIBLE 334 335 elif command == 'else': 336 if len(self.visibilityStack) == 0: 337 raise ParseError ('') 338 339 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 340 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 341 if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 342 self.visibilityStack[-1] = SrcLexer.VISIBLE 343 344 elif command == 'endif': 345 if len(self.visibilityStack) == 0: 346 raise ParseError ('') 347 self.visibilityStack.pop() 348 349 elif command == 'undef': 350 pass 351 elif command in ['error', 'pragma']: 352 pass 353 else: 354 print "'%s' '%s'"%(command, buf) 355 print self.filepath 356 sys.exit(0) 357 358 return i 359 360 361 def evalCodeVisibility (self, buf): 362 try: 363 return eval(buf) 364 except: 365 return True 366 367 def handleMacroDefine (self, buf): 368 369 mparser = macroparser.MacroParser(buf) 370 mparser.debug = self.debugMacro 371 mparser.parse() 372 macro = mparser.getMacro() 373 if macro: 374 self.defines[macro.name] = macro 375 376 def handleMacroInclude (self, buf): 377 378 # Strip excess string if any. 379 pos = buf.find(' ') 380 if pos >= 0: 381 buf = buf[:pos] 382 headerSub = removeHeaderQuotes(buf) 383 384 if not self.expandHeaders: 385 # We don't want to expand headers. Bail out. 386 if self.debug: 387 progress ("%s ignored\n"%headerSub) 388 return 389 390 defines = {} 391 headerPath = None 392 for includeDir in self.includeDirs: 393 hpath = includeDir + '/' + headerSub 394 if os.path.isfile(hpath) and hpath != self.filepath: 395 headerPath = hpath 396 break 397 398 if not headerPath: 399 error("included header file " + headerSub + " not found\n", self.stopOnHeader) 400 return 401 402 if self.debug: 403 progress ("%s found\n"%headerPath) 404 405 if headerPath in self.headerDict: 406 if self.debug: 407 progress ("%s already included\n"%headerPath) 408 return 409 410 if SrcLexer.headerCache.has_key(headerPath): 411 if self.debug: 412 progress ("%s in cache\n"%headerPath) 413 for key in SrcLexer.headerCache[headerPath].defines.keys(): 414 self.defines[key] = SrcLexer.headerCache[headerPath].defines[key] 415 return 416 417 chars = open(headerPath, 'r').read() 418 mclexer = SrcLexer(chars, headerPath) 419 mclexer.copyProperties(self) 420 mclexer.parentLexer = self 421 mclexer.tokenize() 422 hdrData = HeaderData() 423 hdrData.tokens = mclexer.getTokens() 424 headerDefines = mclexer.getDefines() 425 for key in headerDefines.keys(): 426 defines[key] = headerDefines[key] 427 hdrData.defines[key] = headerDefines[key] 428 429 self.headerDict[headerPath] = True 430 SrcLexer.headerCache[headerPath] = hdrData 431 432 # Update the list of headers that have already been expaneded. 433 for key in mclexer.headerDict.keys(): 434 self.headerDict[key] = True 435 436 if self.debug: 437 progress ("defines found in header %s:\n"%headerSub) 438 for key in defines.keys(): 439 progress (" '%s'\n"%key) 440 441 for key in defines.keys(): 442 self.defines[key] = defines[key] 443 444 445 def slash (self, i): 446 if not self.isCodeVisible(): 447 return i 448 449 if i < self.bufsize - 1 and self.chars[i+1] == '/': 450 # Parse line comment. 451 line = '' 452 i += 2 453 while i < self.bufsize: 454 c = self.chars[i] 455 if ord(c) in [0x0A, 0x0D]: 456 return i - 1 457 line += c 458 i += 1 459 self.token = '' 460 elif i < self.bufsize - 1 and self.chars[i+1] == '*': 461 comment = '' 462 i += 2 463 while i < self.bufsize: 464 c = self.chars[i] 465 if c == '/' and self.chars[i-1] == '*': 466 return i 467 comment += c 468 i += 1 469 else: 470 return self.anyToken(i, '/') 471 472 return i 473 474 475 def lineBreak (self, i): 476 if not self.isCodeVisible(): 477 return i 478 479 self.maybeAddToken() 480 481 return i 482 483 484 def doubleQuote (self, i): 485 if not self.isCodeVisible(): 486 return i 487 488 literal = '' 489 i += 1 490 while i < self.bufsize: 491 c = self.chars[i] 492 if c == '"': 493 self.tokens.append('"'+literal+'"') 494 break 495 literal += c 496 i += 1 497 498 return i 499 500 501 def anyToken (self, i, token): 502 if not self.isCodeVisible(): 503 return i 504 505 self.maybeAddToken() 506 self.token = token 507 self.maybeAddToken() 508 return i 509