1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include <stdio.h> 29 #include <ctype.h> 30 #include <string.h> 31 #include "cppdef.h" 32 #include "cpp.h" 33 34 /*ER evaluate macros to pDefOut */ 35 36 /* 37 * skipnl() skips over input text to the end of the line. 38 * skipws() skips over "whitespace" (spaces or tabs), but 39 * not skip over the end of the line. It skips over 40 * TOK_SEP, however (though that shouldn't happen). 41 * scanid() reads the next token (C identifier) into token[]. 42 * The caller has already read the first character of 43 * the identifier. Unlike macroid(), the token is 44 * never expanded. 45 * macroid() reads the next token (C identifier) into token[]. 46 * If it is a #defined macro, it is expanded, and 47 * macroid() returns TRUE, otherwise, FALSE. 48 * catenate() Does the dirty work of token concatenation, TRUE if it did. 49 * scanstring() Reads a string from the input stream, calling 50 * a user-supplied function for each character. 51 * This function may be output() to write the 52 * string to the output file, or save() to save 53 * the string in the work buffer. 54 * scannumber() Reads a C numeric constant from the input stream, 55 * calling the user-supplied function for each 56 * character. (output() or save() as noted above.) 57 * save() Save one character in the work[] buffer. 58 * savestring() Saves a string in malloc() memory. 59 * getfile() Initialize a new FILEINFO structure, called when 60 * #include opens a new file, or a macro is to be 61 * expanded. 62 * getmem() Get a specified number of bytes from malloc memory. 63 * output() Write one character to stdout (calling PUTCHAR) -- 64 * implemented as a function so its address may be 65 * passed to scanstring() and scannumber(). 66 * lookid() Scans the next token (identifier) from the input 67 * stream. Looks for it in the #defined symbol table. 68 * Returns a pointer to the definition, if found, or NULL 69 * if not present. The identifier is stored in token[]. 70 * defnedel() Define enter/delete subroutine. Updates the 71 * symbol table. 72 * get() Read the next byte from the current input stream, 73 * handling end of (macro/file) input and embedded 74 * comments appropriately. Note that the global 75 * instring is -- essentially -- a parameter to get(). 76 * cget() Like get(), but skip over TOK_SEP. 77 * unget() Push last gotten character back on the input stream. 78 * cerror(), cwarn(), cfatal(), cierror(), ciwarn() 79 * These routines format an print messages to the user. 80 * cerror & cwarn take a format and a single string argument. 81 * cierror & ciwarn take a format and a single int (char) argument. 82 * cfatal takes a format and a single string argument. 83 */ 84 85 /* 86 * This table must be rewritten for a non-Ascii machine. 87 * 88 * Note that several "non-visible" characters have special meaning: 89 * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion. 90 * Hex 1E TOK_SEP -- a delimiter for token concatenation 91 * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation 92 */ 93 #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D 94 << error type table is not correct >> 95 #endif 96 97 #if OK_DOLLAR 98 #define DOL LET 99 #else 100 #define DOL 000 101 #endif 102 103 #ifdef EBCDIC 104 105 char type[256] = { /* Character type codes Hex */ 106 END, 000, 000, 000, 000, SPA, 000, 000, /* 00 */ 107 000, 000, 000, 000, 000, 000, 000, 000, /* 08 */ 108 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 109 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ 110 000, 000, 000, 000, 000, 000, 000, 000, /* 20 */ 111 000, 000, 000, 000, 000, 000, 000, 000, /* 28 */ 112 000, 000, 000, 000, 000, 000, 000, 000, /* 30 */ 113 000, 000, 000, 000, 000, 000, 000, 000, /* 38 */ 114 SPA, 000, 000, 000, 000, 000, 000, 000, /* 40 */ 115 000, 000, 000, DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48 .<(+| */ 116 OP_AND, 000, 000, 000, 000, 000, 000, 000, /* 50 & */ 117 000, 000,OP_NOT, DOL,OP_MUL,OP_RPA, 000,OP_XOR, /* 58 !$*);^ */ 118 OP_SUB,OP_DIV, 000, 000, 000, 000, 000, 000, /* 60 -/ */ 119 000, 000, 000, 000,OP_MOD, LET, OP_GT,OP_QUE, /* 68 ,%_>? */ 120 000, 000, 000, 000, 000, 000, 000, 000, /* 70 */ 121 000, 000,OP_COL, 000, 000, QUO, OP_EQ, QUO, /* 78 `:#@'=" */ 122 000, LET, LET, LET, LET, LET, LET, LET, /* 80 abcdefg */ 123 LET, LET, 000, 000, 000, 000, 000, 000, /* 88 hi */ 124 000, LET, LET, LET, LET, LET, LET, LET, /* 90 jklmnop */ 125 LET, LET, 000, 000, 000, 000, 000, 000, /* 98 qr */ 126 000,OP_NOT, LET, LET, LET, LET, LET, LET, /* A0 ~stuvwx */ 127 LET, LET, 000, 000, 000, 000, 000, 000, /* A8 yz [ */ 128 000, 000, 000, 000, 000, 000, 000, 000, /* B0 */ 129 000, 000, 000, 000, 000, 000, 000, 000, /* B8 ] */ 130 000, LET, LET, LET, LET, LET, LET, LET, /* C0 {ABCDEFG */ 131 LET, LET, 000, 000, 000, 000, 000, 000, /* C8 HI */ 132 000, LET, LET, LET, LET, LET, LET, LET, /* D0 }JKLMNOP */ 133 LET, LET, 000, 000, 000, 000, 000, 000, /* D8 QR */ 134 BSH, 000, LET, LET, LET, LET, LET, LET, /* E0 \ STUVWX */ 135 LET, LET, 000, 000, 000, 000, 000, 000, /* E8 YZ */ 136 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* F0 01234567 */ 137 DIG, DIG, 000, 000, 000, 000, 000, 000, /* F8 89 */ 138 }; 139 140 #else 141 142 char type[256] = { /* Character type codes Hex */ 143 END, 000, 000, 000, 000, 000, 000, 000, /* 00 */ 144 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */ 145 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 146 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ 147 SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */ 148 OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */ 149 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */ 150 DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */ 151 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */ 152 LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */ 153 LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */ 154 LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */ 155 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */ 156 LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */ 157 LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */ 158 LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */ 159 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 160 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 161 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 162 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 163 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 164 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 165 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 166 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 167 }; 168 169 #endif 170 171 172 /* 173 * C P P S y m b o l T a b l e s 174 */ 175 176 /* 177 * SBSIZE defines the number of hash-table slots for the symbol table. 178 * It must be a power of 2. 179 */ 180 #ifndef SBSIZE 181 #define SBSIZE 64 182 #endif 183 #define SBMASK (SBSIZE - 1) 184 #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1) 185 << error, SBSIZE must be a power of 2 >> 186 #endif 187 188 189 static DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */ 190 191 void InitCpp6() 192 { 193 int i; 194 for( i = 0; i < SBSIZE; i++ ) 195 symtab[ i ] = NULL; 196 } 197 198 199 200 void skipnl() 201 /* 202 * Skip to the end of the current input line. 203 */ 204 { 205 register int c; 206 207 do { /* Skip to newline */ 208 c = get(); 209 } while (c != '\n' && c != EOF_CHAR); 210 } 211 212 int 213 skipws() 214 /* 215 * Skip over whitespace 216 */ 217 { 218 register int c; 219 220 do { /* Skip whitespace */ 221 c = get(); 222 #if COMMENT_INVISIBLE 223 } while (type[c] == SPA || c == COM_SEP); 224 #else 225 } while (type[c] == SPA); 226 #endif 227 return (c); 228 } 229 230 void scanid(int c) 231 /* 232 * Get the next token (an id) into the token buffer. 233 * Note: this code is duplicated in lookid(). 234 * Change one, change both. 235 */ 236 { 237 register char *bp; 238 239 if (c == DEF_MAGIC) /* Eat the magic token */ 240 c = get(); /* undefiner. */ 241 bp = token; 242 do { 243 if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */ 244 *bp++ = (char)c; 245 c = get(); 246 } while (type[c] == LET || type[c] == DIG); 247 unget(); 248 *bp = EOS; 249 } 250 251 int 252 macroid(int c) 253 /* 254 * If c is a letter, scan the id. if it's #defined, expand it and scan 255 * the next character and try again. 256 * 257 * Else, return the character. If type[c] is a LET, the token is in token. 258 */ 259 { 260 register DEFBUF *dp; 261 262 if (infile != NULL && infile->fp != NULL) 263 recursion = 0; 264 while (type[c] == LET && (dp = lookid(c)) != NULL) { 265 expand(dp); 266 c = get(); 267 } 268 return (c); 269 } 270 271 int 272 catenate() 273 /* 274 * A token was just read (via macroid). 275 * If the next character is TOK_SEP, concatenate the next token 276 * return TRUE -- which should recall macroid after refreshing 277 * macroid's argument. If it is not TOK_SEP, unget() the character 278 * and return FALSE. 279 */ 280 { 281 register int c; 282 register char *token1; 283 284 #if OK_CONCAT 285 if (get() != TOK_SEP) { /* Token concatenation */ 286 unget(); 287 return (FALSE); 288 } 289 else { 290 token1 = savestring(token); /* Save first token */ 291 c = macroid(get()); /* Scan next token */ 292 switch(type[c]) { /* What was it? */ 293 case LET: /* An identifier, ... */ 294 if (strlen(token1) + strlen(token) >= NWORK) 295 cfatal("work buffer overflow doing %s #", token1); 296 sprintf(work, "%s%s", token1, token); 297 break; 298 299 case DIG: /* A digit string */ 300 strcpy(work, token1); 301 workp = work + strlen(work); 302 do { 303 save(c); 304 } while ((c = get()) != TOK_SEP); 305 /* 306 * The trailing TOK_SEP is no longer needed. 307 */ 308 save(EOS); 309 break; 310 311 default: /* An error, ... */ 312 #if ! COMMENT_INVISIBLE 313 if (isprint(c)) 314 cierror("Strange character '%c' after #", c); 315 else 316 cierror("Strange character (%d.) after #", c); 317 #endif 318 strcpy(work, token1); 319 unget(); 320 break; 321 } 322 /* 323 * work has the concatenated token and token1 has 324 * the first token (no longer needed). Unget the 325 * new (concatenated) token after freeing token1. 326 * Finally, setup to read the new token. 327 */ 328 free(token1); /* Free up memory */ 329 ungetstring(work); /* Unget the new thing, */ 330 return (TRUE); 331 } 332 #else 333 return (FALSE); /* Not supported */ 334 #endif 335 } 336 337 int 338 scanstring(int delim, 339 #ifndef _NO_PROTO 340 void (*outfun)( int ) /* BP */ /* Output function */ 341 #else 342 void (*outfun)() /* BP */ 343 #endif 344 ) 345 /* 346 * Scan off a string. Warning if terminated by newline or EOF. 347 * outfun() outputs the character -- to a buffer if in a macro. 348 * TRUE if ok, FALSE if error. 349 */ 350 { 351 register int c; 352 353 instring = TRUE; /* Don't strip comments */ 354 (*outfun)(delim); 355 while ((c = get()) != delim 356 && c != '\n' 357 && c != EOF_CHAR) { 358 359 if (c != DEF_MAGIC) 360 (*outfun)(c); 361 if (c == '\\') 362 (*outfun)(get()); 363 } 364 instring = FALSE; 365 if (c == delim) { 366 (*outfun)(c); 367 return (TRUE); 368 } 369 else { 370 cerror("Unterminated string", NULLST); 371 unget(); 372 return (FALSE); 373 } 374 } 375 376 void scannumber(int c, 377 #ifndef _NO_PROTO 378 register void (*outfun)( int ) /* BP */ /* Output/store func */ 379 #else 380 register void (*outfun)() /* BP */ 381 #endif 382 ) 383 /* 384 * Process a number. We know that c is from 0 to 9 or dot. 385 * Algorithm from Dave Conroy's Decus C. 386 */ 387 { 388 register int radix; /* 8, 10, or 16 */ 389 int expseen; /* 'e' seen in floater */ 390 int signseen; /* '+' or '-' seen */ 391 int octal89; /* For bad octal test */ 392 int dotflag; /* TRUE if '.' was seen */ 393 394 expseen = FALSE; /* No exponent seen yet */ 395 signseen = TRUE; /* No +/- allowed yet */ 396 octal89 = FALSE; /* No bad octal yet */ 397 radix = 10; /* Assume decimal */ 398 if ((dotflag = (c == '.')) != FALSE) { /* . something? */ 399 (*outfun)('.'); /* Always out the dot */ 400 if (type[(c = get())] != DIG) { /* If not a float numb, */ 401 unget(); /* Rescan strange char */ 402 return; /* All done for now */ 403 } 404 } /* End of float test */ 405 else if (c == '0') { /* Octal or hex? */ 406 (*outfun)(c); /* Stuff initial zero */ 407 radix = 8; /* Assume it's octal */ 408 c = get(); /* Look for an 'x' */ 409 if (c == 'x' || c == 'X') { /* Did we get one? */ 410 radix = 16; /* Remember new radix */ 411 (*outfun)(c); /* Stuff the 'x' */ 412 c = get(); /* Get next character */ 413 } 414 } 415 for (;;) { /* Process curr. char. */ 416 /* 417 * Note that this algorithm accepts "012e4" and "03.4" 418 * as legitimate floating-point numbers. 419 */ 420 if (radix != 16 && (c == 'e' || c == 'E')) { 421 if (expseen) /* Already saw 'E'? */ 422 break; /* Exit loop, bad nbr. */ 423 expseen = TRUE; /* Set exponent seen */ 424 signseen = FALSE; /* We can read '+' now */ 425 radix = 10; /* Decimal exponent */ 426 } 427 else if (radix != 16 && c == '.') { 428 if (dotflag) /* Saw dot already? */ 429 break; /* Exit loop, two dots */ 430 dotflag = TRUE; /* Remember the dot */ 431 radix = 10; /* Decimal fraction */ 432 } 433 else if (c == '+' || c == '-') { /* 1.0e+10 */ 434 if (signseen) /* Sign in wrong place? */ 435 break; /* Exit loop, not nbr. */ 436 /* signseen = TRUE; */ /* Remember we saw it */ 437 } 438 else { /* Check the digit */ 439 switch (c) { 440 case '8': case '9': /* Sometimes wrong */ 441 octal89 = TRUE; /* Do check later */ 442 case '0': case '1': case '2': case '3': 443 case '4': case '5': case '6': case '7': 444 break; /* Always ok */ 445 446 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 447 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 448 if (radix == 16) /* Alpha's are ok only */ 449 break; /* if reading hex. */ 450 default: /* At number end */ 451 goto done; /* Break from for loop */ 452 } /* End of switch */ 453 } /* End general case */ 454 (*outfun)(c); /* Accept the character */ 455 signseen = TRUE; /* Don't read sign now */ 456 c = get(); /* Read another char */ 457 } /* End of scan loop */ 458 /* 459 * When we break out of the scan loop, c contains the first 460 * character (maybe) not in the number. If the number is an 461 * integer, allow a trailing 'L' for long and/or a trailing 'U' 462 * for unsigned. If not those, push the trailing character back 463 * on the input stream. Floating point numbers accept a trailing 464 * 'L' for "long double". 465 */ 466 done: if (dotflag || expseen) { /* Floating point? */ 467 if (c == 'l' || c == 'L') { 468 (*outfun)(c); 469 c = get(); /* Ungotten later */ 470 } 471 } 472 else { /* Else it's an integer */ 473 /* 474 * We know that dotflag and expseen are both zero, now: 475 * dotflag signals "saw 'L'", and 476 * expseen signals "saw 'U'". 477 */ 478 for (;;) { 479 switch (c) { 480 case 'l': 481 case 'L': 482 if (dotflag) 483 goto nomore; 484 dotflag = TRUE; 485 break; 486 487 case 'u': 488 case 'U': 489 if (expseen) 490 goto nomore; 491 expseen = TRUE; 492 break; 493 494 default: 495 goto nomore; 496 } 497 (*outfun)(c); /* Got 'L' or 'U'. */ 498 c = get(); /* Look at next, too. */ 499 } 500 } 501 nomore: unget(); /* Not part of a number */ 502 if (octal89 && radix == 8) 503 cwarn("Illegal digit in octal number", NULLST); 504 } 505 506 void save(int c) 507 { 508 if (workp >= &work[NWORK]) { 509 work[NWORK-1] = '\0'; 510 cfatal("Work buffer overflow: %s", work); 511 } 512 else *workp++ = (char)c; 513 } 514 515 char * 516 savestring(char* text) 517 /* 518 * Store a string into free memory. 519 */ 520 { 521 register char *result; 522 523 result = getmem(strlen(text) + 1); 524 strcpy(result, text); 525 return (result); 526 } 527 528 FILEINFO * 529 getfile(int bufsize, char* name) 530 /* 531 * Common FILEINFO buffer initialization for a new file or macro. 532 */ 533 { 534 register FILEINFO *file; 535 register int size; 536 537 size = strlen(name); /* File/macro name */ 538 file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size); 539 file->parent = infile; /* Chain files together */ 540 file->fp = NULL; /* No file yet */ 541 file->filename = savestring(name); /* Save file/macro name */ 542 file->progname = NULL; /* No #line seen yet */ 543 file->unrecur = 0; /* No macro fixup */ 544 file->bptr = file->buffer; /* Initialize line ptr */ 545 file->buffer[0] = EOS; /* Force first read */ 546 file->line = 0; /* (Not used just yet) */ 547 if (infile != NULL) /* If #include file */ 548 infile->line = line; /* Save current line */ 549 infile = file; /* New current file */ 550 line = 1; /* Note first line */ 551 return (file); /* All done. */ 552 } 553 554 char * 555 getmem(int size) 556 /* 557 * Get a block of free memory. 558 */ 559 { 560 register char *result; 561 562 if ((result = malloc((unsigned) size)) == NULL) 563 cfatal("Out of memory", NULLST); 564 return (result); 565 } 566 567 568 DEFBUF * 569 lookid(int c) 570 /* 571 * Look for the next token in the symbol table. Returns token in "token". 572 * If found, returns the table pointer; Else returns NULL. 573 */ 574 { 575 register int nhash; 576 register DEFBUF *dp; 577 register char *np; 578 int temp = 0; 579 int isrecurse; /* For #define foo foo */ 580 581 np = token; 582 nhash = 0; 583 if (0 != (isrecurse = (c == DEF_MAGIC))) /* If recursive macro */ 584 c = get(); /* hack, skip DEF_MAGIC */ 585 do { 586 if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */ 587 *np++ = (char)c; /* Store token byte */ 588 nhash += c; /* Update hash value */ 589 } 590 c = get(); /* And get another byte */ 591 } while (type[c] == LET || type[c] == DIG); 592 unget(); /* Rescan terminator */ 593 *np = EOS; /* Terminate token */ 594 if (isrecurse) /* Recursive definition */ 595 return (NULL); /* undefined just now */ 596 nhash += (np - token); /* Fix hash value */ 597 dp = symtab[nhash & SBMASK]; /* Starting bucket */ 598 while (dp != (DEFBUF *) NULL) { /* Search symbol table */ 599 if (dp->hash == nhash /* Fast precheck */ 600 && (temp = strcmp(dp->name, token)) >= 0) 601 break; 602 dp = dp->link; /* Nope, try next one */ 603 } 604 return ((temp == 0) ? dp : NULL); 605 } 606 607 DEFBUF * 608 defendel(char* name, int delete) 609 /* 610 * Enter this name in the lookup table (delete = FALSE) 611 * or delete this name (delete = TRUE). 612 * Returns a pointer to the define block (delete = FALSE) 613 * Returns NULL if the symbol wasn't defined (delete = TRUE). 614 */ 615 { 616 register DEFBUF *dp; 617 register DEFBUF **prevp; 618 register char *np; 619 int nhash; 620 int temp; 621 int size; 622 623 for (nhash = 0, np = name; *np != EOS;) 624 nhash += *np++; 625 size = (np - name); 626 nhash += size; 627 prevp = &symtab[nhash & SBMASK]; 628 while ((dp = *prevp) != (DEFBUF *) NULL) { 629 if (dp->hash == nhash 630 && (temp = strcmp(dp->name, name)) >= 0) { 631 if (temp > 0) 632 dp = NULL; /* Not found */ 633 else { 634 *prevp = dp->link; /* Found, unlink and */ 635 if (dp->repl != NULL) /* Free the replacement */ 636 free(dp->repl); /* if any, and then */ 637 free((char *) dp); /* Free the symbol */ 638 } 639 break; 640 } 641 prevp = &dp->link; 642 } 643 if (!delete) { 644 dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size); 645 dp->link = *prevp; 646 *prevp = dp; 647 dp->hash = nhash; 648 dp->repl = NULL; 649 dp->nargs = 0; 650 strcpy(dp->name, name); 651 } 652 return (dp); 653 } 654 655 #if OSL_DEBUG_LEVEL > 1 656 657 void dumpdef(char *why) 658 { 659 register DEFBUF *dp; 660 register DEFBUF **syp; 661 FILE *pRememberOut = NULL; 662 663 if ( bDumpDefs ) /*ER */ 664 { 665 pRememberOut = pCppOut; 666 pCppOut = pDefOut; 667 } 668 fprintf( pCppOut, "CPP symbol table dump %s\n", why); 669 for (syp = symtab; syp < &symtab[SBSIZE]; syp++) { 670 if ((dp = *syp) != (DEFBUF *) NULL) { 671 fprintf( pCppOut, "symtab[%d]\n", (syp - symtab)); 672 do { 673 dumpadef((char *) NULL, dp); 674 } while ((dp = dp->link) != (DEFBUF *) NULL); 675 } 676 } 677 if ( bDumpDefs ) 678 { 679 fprintf( pCppOut, "\n"); 680 pCppOut = pRememberOut; 681 } 682 } 683 684 void dumpadef(char *why, register DEFBUF *dp) 685 { 686 register char *cp; 687 register int c; 688 FILE *pRememberOut = NULL; 689 690 /*ER dump #define's to pDefOut */ 691 if ( bDumpDefs ) 692 { 693 pRememberOut = pCppOut; 694 pCppOut = pDefOut; 695 } 696 fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs); 697 if (why != NULL) 698 fprintf( pCppOut, " (%s)", why); 699 if (dp->repl != NULL) { 700 fprintf( pCppOut, " => "); 701 for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) { 702 #ifdef SOLAR 703 if (c == DEL) { 704 c = *cp++ & 0xFF; 705 if( c == EOS ) break; 706 fprintf( pCppOut, "<%%%d>", c - MAC_PARM); 707 } 708 #else 709 if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) 710 fprintf( pCppOut, "<%%%d>", c - MAC_PARM); 711 #endif 712 else if (isprint(c) || c == '\n' || c == '\t') 713 PUTCHAR(c); 714 else if (c < ' ') 715 fprintf( pCppOut, "<^%c>", c + '@'); 716 else 717 fprintf( pCppOut, "<\\0%o>", c); 718 } 719 /*ER evaluate macros to pDefOut */ 720 #ifdef EVALDEFS 721 if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 ) 722 { 723 FILEINFO *infileSave = infile; 724 char *tokenSave = savestring( token ); 725 char *workSave = savestring( work ); 726 int lineSave = line; 727 int wronglineSave = wrongline; 728 int recursionSave = recursion; 729 FILEINFO *file; 730 EVALTYPE valEval; 731 732 bIsInEval = 1; 733 infile = NULL; /* start from scrap */ 734 line = 0; 735 wrongline = 0; 736 *token = EOS; 737 *work = EOS; 738 recursion = 0; 739 file = getfile( strlen( dp->repl ), dp->name ); 740 strcpy( file->buffer, dp->repl ); 741 fprintf( pCppOut, " ===> "); 742 nEvalOff = 0; 743 cppmain(); /* get() frees also *file */ 744 valEval = 0; 745 if ( 0 == evaluate( EvalBuf, &valEval ) ) 746 { 747 #ifdef EVALFLOATS 748 if ( valEval != (EVALTYPE)((long)valEval ) ) 749 fprintf( pCppOut, " ==eval=> %f", valEval ); 750 else 751 #endif 752 fprintf( pCppOut, " ==eval=> %ld", (long)valEval ); 753 } 754 recursion = recursionSave; 755 wrongline = wronglineSave; 756 line = lineSave; 757 strcpy( work, workSave ); 758 free( workSave ); 759 strcpy( token, tokenSave ); 760 free( tokenSave ); 761 infile = infileSave; 762 bIsInEval = 0; 763 } 764 #endif 765 } 766 else { 767 fprintf( pCppOut, ", no replacement."); 768 } 769 PUTCHAR('\n'); 770 if ( bDumpDefs ) 771 pCppOut = pRememberOut; 772 } 773 #endif 774 775 /* 776 * G E T 777 */ 778 779 int 780 get() 781 /* 782 * Return the next character from a macro or the current file. 783 * Handle end of file from #include files. 784 */ 785 { 786 register int c; 787 register FILEINFO *file; 788 register int popped; /* Recursion fixup */ 789 790 popped = 0; 791 get_from_file: 792 if ((file = infile) == NULL) 793 return (EOF_CHAR); 794 newline: 795 #if 0 796 fprintf( pCppOut, "get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n", 797 file->filename, recursion, line, 798 file->bptr - file->buffer, file->buffer); 799 #endif 800 /* 801 * Read a character from the current input line or macro. 802 * At EOS, either finish the current macro (freeing temp. 803 * storage) or read another line from the current input file. 804 * At EOF, exit the current file (#include) or, at EOF from 805 * the cpp input file, return EOF_CHAR to finish processing. 806 */ 807 if ((c = *file->bptr++ & 0xFF) == EOS) { 808 /* 809 * Nothing in current line or macro. Get next line (if 810 * input from a file), or do end of file/macro processing. 811 * In the latter case, jump back to restart from the top. 812 */ 813 if (file->fp == NULL) { /* NULL if macro */ 814 popped++; 815 recursion -= file->unrecur; 816 if (recursion < 0) 817 recursion = 0; 818 infile = file->parent; /* Unwind file chain */ 819 } 820 else { /* Else get from a file */ 821 if ((file->bptr = fgets(file->buffer, NBUFF, file->fp)) 822 != NULL) { 823 #if OSL_DEBUG_LEVEL > 1 824 if (debug > 1) { /* Dump it to stdout */ 825 fprintf( pCppOut, "\n#line %d (%s), %s", 826 line, file->filename, file->buffer); 827 } 828 #endif 829 goto newline; /* process the line */ 830 } 831 else { 832 if( file->fp != stdin ) 833 fclose(file->fp); /* Close finished file */ 834 if ((infile = file->parent) != NULL) { 835 /* 836 * There is an "ungotten" newline in the current 837 * infile buffer (set there by doinclude() in 838 * cpp1.c). Thus, we know that the mainline code 839 * is skipping over blank lines and will do a 840 * #line at its convenience. 841 */ 842 wrongline = TRUE; /* Need a #line now */ 843 } 844 } 845 } 846 /* 847 * Free up space used by the (finished) file or macro and 848 * restart input from the parent file/macro, if any. 849 */ 850 free(file->filename); /* Free name and */ 851 if (file->progname != NULL) /* if a #line was seen, */ 852 free(file->progname); /* free it, too. */ 853 free((char *) file); /* Free file space */ 854 if (infile == NULL) /* If at end of file */ 855 return (EOF_CHAR); /* Return end of file */ 856 line = infile->line; /* Reset line number */ 857 goto get_from_file; /* Get from the top. */ 858 } 859 /* 860 * Common processing for the new character. 861 */ 862 if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */ 863 goto newline; /* from a file */ 864 if (file->parent != NULL) { /* Macro or #include */ 865 if (popped != 0) 866 file->parent->unrecur += popped; 867 else { 868 recursion -= file->parent->unrecur; 869 if (recursion < 0) 870 recursion = 0; 871 file->parent->unrecur = 0; 872 } 873 } 874 #if (HOST == SYS_UNIX) 875 /*ER*/ if (c == '\r') 876 /*ER*/ return get(); /* DOS fuck */ 877 #endif 878 if (c == '\n') /* Maintain current */ 879 ++line; /* line counter */ 880 if (instring) /* Strings just return */ 881 return (c); /* the character. */ 882 else if (c == '/') { /* Comment? */ 883 instring = TRUE; /* So get() won't loop */ 884 /*MM c++ comments */ 885 /*MM*/ c = get(); 886 /*MM*/ if ((c != '*') && (c != '/')) { /* Next byte '*'? */ 887 instring = FALSE; /* Nope, no comment */ 888 unget(); /* Push the char. back */ 889 return ('/'); /* Return the slash */ 890 } 891 if (keepcomments) { /* If writing comments */ 892 PUTCHAR('/'); /* Write out the */ 893 /* initializer */ 894 /*MM*/ if( '*' == c ) 895 PUTCHAR('*'); 896 /*MM*/ else 897 /*MM*/ PUTCHAR('/'); 898 899 } 900 /*MM*/ if( '*' == c ){ 901 for (;;) { /* Eat a comment */ 902 c = get(); 903 test: if (keepcomments && c != EOF_CHAR) 904 cput(c); 905 switch (c) { 906 case EOF_CHAR: 907 cerror("EOF in comment", NULLST); 908 return (EOF_CHAR); 909 910 case '/': 911 if ((c = get()) != '*') /* Don't let comments */ 912 goto test; /* Nest. */ 913 #ifdef STRICT_COMMENTS 914 cwarn("Nested comments", NULLST); 915 #endif 916 /* Fall into * stuff */ 917 case '*': 918 if ((c = get()) != '/') /* If comment doesn't */ 919 goto test; /* end, look at next */ 920 instring = FALSE; /* End of comment, */ 921 if (keepcomments) { /* Put out the comment */ 922 cput(c); /* terminator, too */ 923 } 924 /* 925 * A comment is syntactically "whitespace" -- 926 * however, there are certain strange sequences 927 * such as 928 * #define foo(x) (something) 929 * foo|* comment *|(123) 930 * these are '/' ^ ^ 931 * where just returning space (or COM_SEP) will cause 932 * problems. This can be "fixed" by overwriting the 933 * '/' in the input line buffer with ' ' (or COM_SEP) 934 * but that may mess up an error message. 935 * So, we peek ahead -- if the next character is 936 * "whitespace" we just get another character, if not, 937 * we modify the buffer. All in the name of purity. 938 */ 939 if (*file->bptr == '\n' 940 || type[*file->bptr & 0xFF] == SPA) 941 goto newline; 942 #if COMMENT_INVISIBLE 943 /* 944 * Return magic (old-fashioned) syntactic space. 945 */ 946 return ((file->bptr[-1] = COM_SEP)); 947 #else 948 return ((file->bptr[-1] = ' ')); 949 #endif 950 951 case '\n': /* we'll need a #line */ 952 if (!keepcomments) 953 wrongline = TRUE; /* later... */ 954 default: /* Anything else is */ 955 break; /* Just a character */ 956 } /* End switch */ 957 } /* End comment loop */ 958 } 959 else{ /* c++ comment */ 960 /*MM c++ comment*/ 961 for (;;) { /* Eat a comment */ 962 c = get(); 963 if (keepcomments && c != EOF_CHAR) 964 cput(c); 965 if( EOF_CHAR == c ) 966 return (EOF_CHAR); 967 else if( '\n' == c ){ 968 instring = FALSE; /* End of comment, */ 969 return( c ); 970 } 971 } 972 } 973 } /* End if in comment */ 974 else if (!inmacro && c == '\\') { /* If backslash, peek */ 975 if ((c = get()) == '\n') { /* for a <nl>. If so, */ 976 wrongline = TRUE; 977 goto newline; 978 } 979 else { /* Backslash anything */ 980 unget(); /* Get it later */ 981 return ('\\'); /* Return the backslash */ 982 } 983 } 984 else if (c == '\f' || c == VT) /* Form Feed, Vertical */ 985 c = ' '; /* Tab are whitespace */ 986 else if (c == 0xef) /* eat up UTF-8 BOM */ 987 { 988 if((c = get()) == 0xbb) 989 { 990 if((c = get()) == 0xbf) 991 { 992 c = get(); 993 return c; 994 } 995 else 996 { 997 unget(); 998 unget(); 999 return 0xef; 1000 } 1001 } 1002 else 1003 { 1004 unget(); 1005 return 0xef; 1006 } 1007 } 1008 return (c); /* Just return the char */ 1009 } 1010 1011 void unget() 1012 /* 1013 * Backup the pointer to reread the last character. Fatal error 1014 * (code bug) if we backup too far. unget() may be called, 1015 * without problems, at end of file. Only one character may 1016 * be ungotten. If you need to unget more, call ungetstring(). 1017 */ 1018 { 1019 register FILEINFO *file; 1020 1021 if ((file = infile) == NULL) 1022 return; /* Unget after EOF */ 1023 if (--file->bptr < file->buffer) 1024 cfatal("Too much pushback", NULLST); 1025 if (*file->bptr == '\n') /* Ungetting a newline? */ 1026 --line; /* Unget the line number, too */ 1027 } 1028 1029 void ungetstring(char* text) 1030 /* 1031 * Push a string back on the input stream. This is done by treating 1032 * the text as if it were a macro. 1033 */ 1034 { 1035 register FILEINFO *file; 1036 #ifndef ZTC /* BP */ 1037 extern FILEINFO *getfile(); 1038 #endif 1039 file = getfile(strlen(text) + 1, ""); 1040 strcpy(file->buffer, text); 1041 } 1042 1043 int 1044 cget() 1045 /* 1046 * Get one character, absorb "funny space" after comments or 1047 * token concatenation 1048 */ 1049 { 1050 register int c; 1051 1052 do { 1053 c = get(); 1054 #if COMMENT_INVISIBLE 1055 } while (c == TOK_SEP || c == COM_SEP); 1056 #else 1057 } while (c == TOK_SEP); 1058 #endif 1059 return (c); 1060 } 1061 1062 /* 1063 * Error messages and other hacks. The first byte of severity 1064 * is 'S' for string arguments and 'I' for int arguments. This 1065 * is needed for portability with machines that have int's that 1066 * are shorter than char *'s. 1067 */ 1068 1069 static void domsg(char* severity, char* format, void* arg) 1070 /* 1071 * Print filenames, macro names, and line numbers for error messages. 1072 */ 1073 { 1074 register char *tp; 1075 register FILEINFO *file; 1076 1077 fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]); 1078 if (*severity == 'S') 1079 fprintf(stderr, format, (char *)arg); 1080 else 1081 fprintf(stderr, format, *((int *)arg) ); 1082 putc('\n', stderr); 1083 if ((file = infile) == NULL) 1084 return; /* At end of file */ 1085 if (file->fp != NULL) { 1086 tp = file->buffer; /* Print current file */ 1087 fprintf(stderr, "%s", tp); /* name, making sure */ 1088 if (tp[strlen(tp) - 1] != '\n') /* there's a newline */ 1089 putc('\n', stderr); 1090 } 1091 while ((file = file->parent) != NULL) { /* Print #includes, too */ 1092 if (file->fp == NULL) 1093 fprintf(stderr, "from macro %s\n", file->filename); 1094 else { 1095 tp = file->buffer; 1096 fprintf(stderr, "from file %s, line %d:\n%s", 1097 (file->progname != NULL) 1098 ? file->progname : file->filename, 1099 file->line, tp); 1100 if (tp[strlen(tp) - 1] != '\n') 1101 putc('\n', stderr); 1102 } 1103 } 1104 } 1105 1106 void cerror(char* format, char* sarg) 1107 /* 1108 * Print a normal error message, string argument. 1109 */ 1110 { 1111 domsg("SError", format, sarg); 1112 errors++; 1113 } 1114 1115 void cierror(char* format, int narg) 1116 /* 1117 * Print a normal error message, numeric argument. 1118 */ 1119 { 1120 domsg("IError", format, &narg); 1121 errors++; 1122 } 1123 1124 void cfatal(char* format, char* sarg) 1125 /* 1126 * A real disaster 1127 */ 1128 { 1129 domsg("SFatal error", format, sarg); 1130 exit(IO_ERROR); 1131 } 1132 1133 void cwarn(char* format, char* sarg) 1134 /* 1135 * A non-fatal error, string argument. 1136 */ 1137 { 1138 domsg("SWarning", format, sarg); 1139 } 1140 1141 void ciwarn(char* format, int narg) 1142 /* 1143 * A non-fatal error, numeric argument. 1144 */ 1145 { 1146 domsg("IWarning", format, &narg); 1147 } 1148 1149