1import sys, os.path 2from globals import * 3import macroparser 4 5class EOF(Exception): 6 def __init__ (self): 7 pass 8 9 def str (self): 10 return "end of file" 11 12class BOF(Exception): 13 def __init__ (self): 14 pass 15 16 def str (self): 17 return "beginning of file" 18 19 20def removeHeaderQuotes (orig): 21 if len(orig) <= 2: 22 return orig 23 elif orig[0] == orig[-1] == '"': 24 return orig[1:-1] 25 elif orig[0] == '<' and orig[-1] == '>': 26 return orig[1:-1] 27 28 return orig 29 30 31def dumpTokens (tokens, toError=False): 32 33 scope = 0 34 indent = " " 35 line = '' 36 chars = '' 37 38 for token in tokens: 39 if token in '{<': 40 if len(line) > 0: 41 chars += indent*scope + line + "\n" 42 line = '' 43 chars += indent*scope + token + "\n" 44 scope += 1 45 46 elif token in '}>': 47 if len(line) > 0: 48 chars += indent*scope + line + "\n" 49 line = '' 50 scope -= 1 51 chars += indent*scope + token 52 53 elif token == ';': 54 if len(line) > 0: 55 chars += indent*scope + line + ";\n" 56 line = '' 57 else: 58 chars += ";\n" 59 elif len(token) > 0: 60 line += token + ' ' 61 62 if len(line) > 0: 63 chars += line 64 chars += "\n" 65 if toError: 66 sys.stderr.write(chars) 67 else: 68 sys.stdout.write(chars) 69 70 71class HeaderData(object): 72 def __init__ (self): 73 self.defines = {} 74 self.tokens = [] 75 76 77class SrcLexer(object): 78 """Lexicographical analyzer for .src format. 79 80The role of a lexer is to parse the source file and break it into 81appropriate tokens. Such tokens are later passed to a parser to 82build the syntax tree. 83""" 84 headerCache = {} 85 86 VISIBLE = 0 87 INVISIBLE_PRE = 1 88 INVISIBLE_POST = 2 89 90 def __init__ (self, chars, filepath = None): 91 self.filepath = filepath 92 self.parentLexer = None 93 self.chars = chars 94 self.bufsize = len(self.chars) 95 96 # TODO: use parameters for this 97 # Properties that can be copied. 98 self.headerDict = dict () 99 self.debug = False 100 self.debugMacro = False 101 self.includeDirs = list () 102 self.expandHeaders = True 103 self.inMacroDefine = False 104 self.stopOnHeader = False 105 106 def copyProperties (self, other): 107 """Copy properties from another instance of SrcLexer.""" 108 109 # TODO: use parameters for this 110 self.headerDict = other.headerDict 111 self.debug = other.debug 112 self.debugMacro = other.debugMacro 113 self.includeDirs = other.includeDirs[:] 114 self.expandHeaders = other.expandHeaders 115 self.inMacroDefine = other.inMacroDefine 116 self.stopOnHeader = other.stopOnHeader 117 118 def init (self): 119 self.firstNonBlank = '' 120 self.token = '' 121 self.tokens = [] 122 self.defines = {} 123 self.visibilityStack = [] 124 125 def getTokens (self): 126 return self.tokens 127 128 def getDefines (self): 129 return self.defines 130 131 def nextPos (self, i): 132 while True: 133 i += 1 134 try: 135 c = self.chars[i] 136 except IndexError: 137 raise EOF 138 139 if ord(c) in [0x0D]: 140 continue 141 break 142 return i 143 144 def prevPos (self, i): 145 while True: 146 i -= 1 147 try: 148 c = self.chars[i] 149 except IndexError: 150 raise BOF 151 152 if ord(c) in [0x0D]: 153 continue 154 break 155 return i 156 157 def isCodeVisible (self): 158 if len(self.visibilityStack) == 0: 159 return True 160 for item in self.visibilityStack: 161 if item != SrcLexer.VISIBLE: 162 return False 163 return True 164 165 def tokenize (self): 166 self.init() 167 168 i = 0 169 while True: 170 c = self.chars[i] 171 172 if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]: 173 # Store the first non-blank in a line. 174 self.firstNonBlank = c 175 elif c == "\n": 176 self.firstNonBlank = '' 177 178 if c == '#': 179 i = self.pound(i) 180 elif c == '/': 181 i = self.slash(i) 182 elif c == "\n": 183 i = self.lineBreak(i) 184 elif c == '"': 185 i = self.doubleQuote(i) 186 elif c in [' ', "\t"]: 187 i = self.blank(i) 188 elif c in ";()[]{}<>,=+-*": 189 # Any outstanding single-character token. 190 i = self.anyToken(i, c) 191 elif self.isCodeVisible(): 192 self.token += c 193 194 try: 195 i = self.nextPos(i) 196 except EOF: 197 break 198 199 if len(self.token): 200 self.tokens.append(self.token) 201 202 if not self.parentLexer and self.debug: 203 progress ("-"*68 + "\n") 204 progress ("All defines found in this translation unit:\n") 205 keys = self.defines.keys() 206 keys.sort() 207 for key in keys: 208 progress ("@ %s\n"%key) 209 210 def dumpTokens (self, toError=False): 211 dumpTokens(self.tokens, toError) 212 213 214 def maybeAddToken (self): 215 if len(self.token) > 0: 216 self.tokens.append(self.token) 217 self.token = '' 218 219 220 #-------------------------------------------------------------------- 221 # character handlers 222 223 def blank (self, i): 224 if not self.isCodeVisible(): 225 return i 226 227 self.maybeAddToken() 228 return i 229 230 231 def pound (self, i): 232 233 if self.inMacroDefine: 234 return i 235 236 if not self.firstNonBlank == '#': 237 return i 238 239 self.maybeAddToken() 240 # We are in preprocessing mode. 241 242 # Get the macro command name '#<command> .....' 243 244 command, define, buf = '', '', '' 245 firstNonBlank = False 246 while True: 247 try: 248 i = self.nextPos(i) 249 c = self.chars[i] 250 if c == '\\' and self.chars[self.nextPos(i)] == "\n": 251 i = self.nextPos(i) 252 continue 253 except EOF: 254 break 255 256 if c == "\n": 257 if len(buf) > 0 and len(command) == 0: 258 command = buf 259 i = self.prevPos(i) 260 break 261 elif c in [' ', "\t"]: 262 if not firstNonBlank: 263 # Ignore any leading blanks after the '#'. 264 continue 265 266 if len(command) == 0: 267 command = buf 268 buf = '' 269 else: 270 buf += ' ' 271 elif c == '(': 272 if len(buf) > 0 and len(command) == 0: 273 command = buf 274 buf += c 275 else: 276 if not firstNonBlank: 277 firstNonBlank = True 278 buf += c 279 280 if command == 'define': 281 self.handleMacroDefine(buf) 282 elif command == 'include': 283 self.handleMacroInclude(buf) 284 elif command == 'ifdef': 285 defineName = buf.strip() 286 if self.defines.has_key(defineName): 287 self.visibilityStack.append(SrcLexer.VISIBLE) 288 else: 289 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 290 291 elif command == 'ifndef': 292 defineName = buf.strip() 293 if self.defines.has_key(defineName): 294 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 295 else: 296 self.visibilityStack.append(SrcLexer.VISIBLE) 297 298 elif command == 'if': 299 if self.evalCodeVisibility(buf): 300 self.visibilityStack.append(SrcLexer.VISIBLE) 301 else: 302 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 303 304 elif command == 'elif': 305 if len(self.visibilityStack) == 0: 306 raise ParseError ('') 307 308 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 309 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 310 elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 311 # Evaluate only if the current visibility is false. 312 if self.evalCodeVisibility(buf): 313 self.visibilityStack[-1] = SrcLexer.VISIBLE 314 315 elif command == 'else': 316 if len(self.visibilityStack) == 0: 317 raise ParseError ('') 318 319 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 320 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 321 if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 322 self.visibilityStack[-1] = SrcLexer.VISIBLE 323 324 elif command == 'endif': 325 if len(self.visibilityStack) == 0: 326 raise ParseError ('') 327 self.visibilityStack.pop() 328 329 elif command == 'undef': 330 pass 331 elif command in ['error', 'pragma']: 332 pass 333 else: 334 print "'%s' '%s'"%(command, buf) 335 print self.filepath 336 sys.exit(0) 337 338 return i 339 340 341 def evalCodeVisibility (self, buf): 342 try: 343 return eval(buf) 344 except: 345 return True 346 347 def handleMacroDefine (self, buf): 348 349 mparser = macroparser.MacroParser(buf) 350 mparser.debug = self.debugMacro 351 mparser.parse() 352 macro = mparser.getMacro() 353 if macro: 354 self.defines[macro.name] = macro 355 356 def handleMacroInclude (self, buf): 357 358 # Strip excess string if any. 359 pos = buf.find(' ') 360 if pos >= 0: 361 buf = buf[:pos] 362 headerSub = removeHeaderQuotes(buf) 363 364 if not self.expandHeaders: 365 # We don't want to expand headers. Bail out. 366 if self.debug: 367 progress ("%s ignored\n"%headerSub) 368 return 369 370 defines = {} 371 headerPath = None 372 for includeDir in self.includeDirs: 373 hpath = includeDir + '/' + headerSub 374 if os.path.isfile(hpath) and hpath != self.filepath: 375 headerPath = hpath 376 break 377 378 if not headerPath: 379 error("included header file " + headerSub + " not found\n", self.stopOnHeader) 380 return 381 382 if self.debug: 383 progress ("%s found\n"%headerPath) 384 385 if headerPath in self.headerDict: 386 if self.debug: 387 progress ("%s already included\n"%headerPath) 388 return 389 390 if SrcLexer.headerCache.has_key(headerPath): 391 if self.debug: 392 progress ("%s in cache\n"%headerPath) 393 for key in SrcLexer.headerCache[headerPath].defines.keys(): 394 self.defines[key] = SrcLexer.headerCache[headerPath].defines[key] 395 return 396 397 chars = open(headerPath, 'r').read() 398 mclexer = SrcLexer(chars, headerPath) 399 mclexer.copyProperties(self) 400 mclexer.parentLexer = self 401 mclexer.tokenize() 402 hdrData = HeaderData() 403 hdrData.tokens = mclexer.getTokens() 404 headerDefines = mclexer.getDefines() 405 for key in headerDefines.keys(): 406 defines[key] = headerDefines[key] 407 hdrData.defines[key] = headerDefines[key] 408 409 self.headerDict[headerPath] = True 410 SrcLexer.headerCache[headerPath] = hdrData 411 412 # Update the list of headers that have already been expaneded. 413 for key in mclexer.headerDict.keys(): 414 self.headerDict[key] = True 415 416 if self.debug: 417 progress ("defines found in header %s:\n"%headerSub) 418 for key in defines.keys(): 419 progress (" '%s'\n"%key) 420 421 for key in defines.keys(): 422 self.defines[key] = defines[key] 423 424 425 def slash (self, i): 426 if not self.isCodeVisible(): 427 return i 428 429 if i < self.bufsize - 1 and self.chars[i+1] == '/': 430 # Parse line comment. 431 line = '' 432 i += 2 433 while i < self.bufsize: 434 c = self.chars[i] 435 if ord(c) in [0x0A, 0x0D]: 436 return i - 1 437 line += c 438 i += 1 439 self.token = '' 440 elif i < self.bufsize - 1 and self.chars[i+1] == '*': 441 comment = '' 442 i += 2 443 while i < self.bufsize: 444 c = self.chars[i] 445 if c == '/' and self.chars[i-1] == '*': 446 return i 447 comment += c 448 i += 1 449 else: 450 return self.anyToken(i, '/') 451 452 return i 453 454 455 def lineBreak (self, i): 456 if not self.isCodeVisible(): 457 return i 458 459 self.maybeAddToken() 460 461 return i 462 463 464 def doubleQuote (self, i): 465 if not self.isCodeVisible(): 466 return i 467 468 literal = '' 469 i += 1 470 while i < self.bufsize: 471 c = self.chars[i] 472 if c == '"': 473 self.tokens.append('"'+literal+'"') 474 break 475 literal += c 476 i += 1 477 478 return i 479 480 481 def anyToken (self, i, token): 482 if not self.isCodeVisible(): 483 return i 484 485 self.maybeAddToken() 486 self.token = token 487 self.maybeAddToken() 488 return i 489