1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include <stdio.h>
25 #include <ctype.h>
26 #include <string.h>
27 #include "cppdef.h"
28 #include "cpp.h"
29
30 /*ER evaluate macros to pDefOut */
31
32 /*
33 * skipnl() skips over input text to the end of the line.
34 * skipws() skips over "whitespace" (spaces or tabs), but
35 * not skip over the end of the line. It skips over
36 * TOK_SEP, however (though that shouldn't happen).
37 * scanid() reads the next token (C identifier) into token[].
38 * The caller has already read the first character of
39 * the identifier. Unlike macroid(), the token is
40 * never expanded.
41 * macroid() reads the next token (C identifier) into token[].
42 * If it is a #defined macro, it is expanded, and
43 * macroid() returns TRUE, otherwise, FALSE.
44 * catenate() Does the dirty work of token concatenation, TRUE if it did.
45 * scanstring() Reads a string from the input stream, calling
46 * a user-supplied function for each character.
47 * This function may be output() to write the
48 * string to the output file, or save() to save
49 * the string in the work buffer.
50 * scannumber() Reads a C numeric constant from the input stream,
51 * calling the user-supplied function for each
52 * character. (output() or save() as noted above.)
53 * save() Save one character in the work[] buffer.
54 * savestring() Saves a string in malloc() memory.
55 * getfile() Initialize a new FILEINFO structure, called when
56 * #include opens a new file, or a macro is to be
57 * expanded.
58 * getmem() Get a specified number of bytes from malloc memory.
59 * output() Write one character to stdout (calling PUTCHAR) --
60 * implemented as a function so its address may be
61 * passed to scanstring() and scannumber().
62 * lookid() Scans the next token (identifier) from the input
63 * stream. Looks for it in the #defined symbol table.
64 * Returns a pointer to the definition, if found, or NULL
65 * if not present. The identifier is stored in token[].
66 * defnedel() Define enter/delete subroutine. Updates the
67 * symbol table.
68 * get() Read the next byte from the current input stream,
69 * handling end of (macro/file) input and embedded
70 * comments appropriately. Note that the global
71 * instring is -- essentially -- a parameter to get().
72 * cget() Like get(), but skip over TOK_SEP.
73 * unget() Push last gotten character back on the input stream.
74 * cerror(), cwarn(), cfatal(), cierror(), ciwarn()
75 * These routines format an print messages to the user.
76 * cerror & cwarn take a format and a single string argument.
77 * cierror & ciwarn take a format and a single int (char) argument.
78 * cfatal takes a format and a single string argument.
79 */
80
81 /*
82 * This table must be rewritten for a non-Ascii machine.
83 *
84 * Note that several "non-visible" characters have special meaning:
85 * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion.
86 * Hex 1E TOK_SEP -- a delimiter for token concatenation
87 * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation
88 */
89 #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D
90 << error type table is not correct >>
91 #endif
92
93 #if OK_DOLLAR
94 #define DOL LET
95 #else
96 #define DOL 000
97 #endif
98
99 #ifdef EBCDIC
100
101 char type[256] = { /* Character type codes Hex */
102 END, 000, 000, 000, 000, SPA, 000, 000, /* 00 */
103 000, 000, 000, 000, 000, 000, 000, 000, /* 08 */
104 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
105 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */
106 000, 000, 000, 000, 000, 000, 000, 000, /* 20 */
107 000, 000, 000, 000, 000, 000, 000, 000, /* 28 */
108 000, 000, 000, 000, 000, 000, 000, 000, /* 30 */
109 000, 000, 000, 000, 000, 000, 000, 000, /* 38 */
110 SPA, 000, 000, 000, 000, 000, 000, 000, /* 40 */
111 000, 000, 000, DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48 .<(+| */
112 OP_AND, 000, 000, 000, 000, 000, 000, 000, /* 50 & */
113 000, 000,OP_NOT, DOL,OP_MUL,OP_RPA, 000,OP_XOR, /* 58 !$*);^ */
114 OP_SUB,OP_DIV, 000, 000, 000, 000, 000, 000, /* 60 -/ */
115 000, 000, 000, 000,OP_MOD, LET, OP_GT,OP_QUE, /* 68 ,%_>? */
116 000, 000, 000, 000, 000, 000, 000, 000, /* 70 */
117 000, 000,OP_COL, 000, 000, QUO, OP_EQ, QUO, /* 78 `:#@'=" */
118 000, LET, LET, LET, LET, LET, LET, LET, /* 80 abcdefg */
119 LET, LET, 000, 000, 000, 000, 000, 000, /* 88 hi */
120 000, LET, LET, LET, LET, LET, LET, LET, /* 90 jklmnop */
121 LET, LET, 000, 000, 000, 000, 000, 000, /* 98 qr */
122 000,OP_NOT, LET, LET, LET, LET, LET, LET, /* A0 ~stuvwx */
123 LET, LET, 000, 000, 000, 000, 000, 000, /* A8 yz [ */
124 000, 000, 000, 000, 000, 000, 000, 000, /* B0 */
125 000, 000, 000, 000, 000, 000, 000, 000, /* B8 ] */
126 000, LET, LET, LET, LET, LET, LET, LET, /* C0 {ABCDEFG */
127 LET, LET, 000, 000, 000, 000, 000, 000, /* C8 HI */
128 000, LET, LET, LET, LET, LET, LET, LET, /* D0 }JKLMNOP */
129 LET, LET, 000, 000, 000, 000, 000, 000, /* D8 QR */
130 BSH, 000, LET, LET, LET, LET, LET, LET, /* E0 \ STUVWX */
131 LET, LET, 000, 000, 000, 000, 000, 000, /* E8 YZ */
132 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* F0 01234567 */
133 DIG, DIG, 000, 000, 000, 000, 000, 000, /* F8 89 */
134 };
135
136 #else
137
138 char type[256] = { /* Character type codes Hex */
139 END, 000, 000, 000, 000, 000, 000, 000, /* 00 */
140 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */
141 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
142 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */
143 SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */
144 OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */
145 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */
146 DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */
147 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */
148 LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */
149 LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */
150 LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */
151 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */
152 LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */
153 LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */
154 LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */
155 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
156 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
157 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
158 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
159 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
160 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
161 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
162 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
163 };
164
165 #endif
166
167
168 /*
169 * C P P S y m b o l T a b l e s
170 */
171
172 /*
173 * SBSIZE defines the number of hash-table slots for the symbol table.
174 * It must be a power of 2.
175 */
176 #ifndef SBSIZE
177 #define SBSIZE 64
178 #endif
179 #define SBMASK (SBSIZE - 1)
180 #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1)
181 << error, SBSIZE must be a power of 2 >>
182 #endif
183
184
185 static DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */
186
InitCpp6()187 void InitCpp6()
188 {
189 int i;
190 for( i = 0; i < SBSIZE; i++ )
191 symtab[ i ] = NULL;
192 }
193
194
195
skipnl()196 void skipnl()
197 /*
198 * Skip to the end of the current input line.
199 */
200 {
201 register int c;
202
203 do { /* Skip to newline */
204 c = get();
205 } while (c != '\n' && c != EOF_CHAR);
206 }
207
208 int
skipws()209 skipws()
210 /*
211 * Skip over whitespace
212 */
213 {
214 register int c;
215
216 do { /* Skip whitespace */
217 c = get();
218 #if COMMENT_INVISIBLE
219 } while (type[c] == SPA || c == COM_SEP);
220 #else
221 } while (type[c] == SPA);
222 #endif
223 return (c);
224 }
225
scanid(int c)226 void scanid(int c)
227 /*
228 * Get the next token (an id) into the token buffer.
229 * Note: this code is duplicated in lookid().
230 * Change one, change both.
231 */
232 {
233 register char *bp;
234
235 if (c == DEF_MAGIC) /* Eat the magic token */
236 c = get(); /* undefiner. */
237 bp = token;
238 do {
239 if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */
240 *bp++ = (char)c;
241 c = get();
242 } while (type[c] == LET || type[c] == DIG);
243 unget();
244 *bp = EOS;
245 }
246
247 int
macroid(int c)248 macroid(int c)
249 /*
250 * If c is a letter, scan the id. if it's #defined, expand it and scan
251 * the next character and try again.
252 *
253 * Else, return the character. If type[c] is a LET, the token is in token.
254 */
255 {
256 register DEFBUF *dp;
257
258 if (infile != NULL && infile->fp != NULL)
259 recursion = 0;
260 while (type[c] == LET && (dp = lookid(c)) != NULL) {
261 expand(dp);
262 c = get();
263 }
264 return (c);
265 }
266
267 int
catenate()268 catenate()
269 /*
270 * A token was just read (via macroid).
271 * If the next character is TOK_SEP, concatenate the next token
272 * return TRUE -- which should recall macroid after refreshing
273 * macroid's argument. If it is not TOK_SEP, unget() the character
274 * and return FALSE.
275 */
276 {
277 register int c;
278 register char *token1;
279
280 #if OK_CONCAT
281 if (get() != TOK_SEP) { /* Token concatenation */
282 unget();
283 return (FALSE);
284 }
285 else {
286 token1 = savestring(token); /* Save first token */
287 c = macroid(get()); /* Scan next token */
288 switch(type[c]) { /* What was it? */
289 case LET: /* An identifier, ... */
290 if (strlen(token1) + strlen(token) >= NWORK)
291 cfatal("work buffer overflow doing %s #", token1);
292 sprintf(work, "%s%s", token1, token);
293 break;
294
295 case DIG: /* A digit string */
296 strcpy(work, token1);
297 workp = work + strlen(work);
298 do {
299 save(c);
300 } while ((c = get()) != TOK_SEP);
301 /*
302 * The trailing TOK_SEP is no longer needed.
303 */
304 save(EOS);
305 break;
306
307 default: /* An error, ... */
308 #if ! COMMENT_INVISIBLE
309 if (isprint(c))
310 cierror("Strange character '%c' after #", c);
311 else
312 cierror("Strange character (%d.) after #", c);
313 #endif
314 strcpy(work, token1);
315 unget();
316 break;
317 }
318 /*
319 * work has the concatenated token and token1 has
320 * the first token (no longer needed). Unget the
321 * new (concatenated) token after freeing token1.
322 * Finally, setup to read the new token.
323 */
324 free(token1); /* Free up memory */
325 ungetstring(work); /* Unget the new thing, */
326 return (TRUE);
327 }
328 #else
329 return (FALSE); /* Not supported */
330 #endif
331 }
332
333 int
scanstring(int delim,void (* outfun)(int))334 scanstring(int delim,
335 #ifndef _NO_PROTO
336 void (*outfun)( int ) /* BP */ /* Output function */
337 #else
338 void (*outfun)() /* BP */
339 #endif
340 )
341 /*
342 * Scan off a string. Warning if terminated by newline or EOF.
343 * outfun() outputs the character -- to a buffer if in a macro.
344 * TRUE if ok, FALSE if error.
345 */
346 {
347 register int c;
348
349 instring = TRUE; /* Don't strip comments */
350 (*outfun)(delim);
351 while ((c = get()) != delim
352 && c != '\n'
353 && c != EOF_CHAR) {
354
355 if (c != DEF_MAGIC)
356 (*outfun)(c);
357 if (c == '\\')
358 (*outfun)(get());
359 }
360 instring = FALSE;
361 if (c == delim) {
362 (*outfun)(c);
363 return (TRUE);
364 }
365 else {
366 cerror("Unterminated string", NULLST);
367 unget();
368 return (FALSE);
369 }
370 }
371
scannumber(int c,register void (* outfun)(int))372 void scannumber(int c,
373 #ifndef _NO_PROTO
374 register void (*outfun)( int ) /* BP */ /* Output/store func */
375 #else
376 register void (*outfun)() /* BP */
377 #endif
378 )
379 /*
380 * Process a number. We know that c is from 0 to 9 or dot.
381 * Algorithm from Dave Conroy's Decus C.
382 */
383 {
384 register int radix; /* 8, 10, or 16 */
385 int expseen; /* 'e' seen in floater */
386 int signseen; /* '+' or '-' seen */
387 int octal89; /* For bad octal test */
388 int dotflag; /* TRUE if '.' was seen */
389
390 expseen = FALSE; /* No exponent seen yet */
391 signseen = TRUE; /* No +/- allowed yet */
392 octal89 = FALSE; /* No bad octal yet */
393 radix = 10; /* Assume decimal */
394 if ((dotflag = (c == '.')) != FALSE) { /* . something? */
395 (*outfun)('.'); /* Always out the dot */
396 if (type[(c = get())] != DIG) { /* If not a float numb, */
397 unget(); /* Rescan strange char */
398 return; /* All done for now */
399 }
400 } /* End of float test */
401 else if (c == '0') { /* Octal or hex? */
402 (*outfun)(c); /* Stuff initial zero */
403 radix = 8; /* Assume it's octal */
404 c = get(); /* Look for an 'x' */
405 if (c == 'x' || c == 'X') { /* Did we get one? */
406 radix = 16; /* Remember new radix */
407 (*outfun)(c); /* Stuff the 'x' */
408 c = get(); /* Get next character */
409 }
410 }
411 for (;;) { /* Process curr. char. */
412 /*
413 * Note that this algorithm accepts "012e4" and "03.4"
414 * as legitimate floating-point numbers.
415 */
416 if (radix != 16 && (c == 'e' || c == 'E')) {
417 if (expseen) /* Already saw 'E'? */
418 break; /* Exit loop, bad nbr. */
419 expseen = TRUE; /* Set exponent seen */
420 signseen = FALSE; /* We can read '+' now */
421 radix = 10; /* Decimal exponent */
422 }
423 else if (radix != 16 && c == '.') {
424 if (dotflag) /* Saw dot already? */
425 break; /* Exit loop, two dots */
426 dotflag = TRUE; /* Remember the dot */
427 radix = 10; /* Decimal fraction */
428 }
429 else if (c == '+' || c == '-') { /* 1.0e+10 */
430 if (signseen) /* Sign in wrong place? */
431 break; /* Exit loop, not nbr. */
432 /* signseen = TRUE; */ /* Remember we saw it */
433 }
434 else { /* Check the digit */
435 switch (c) {
436 case '8': case '9': /* Sometimes wrong */
437 octal89 = TRUE; /* Do check later */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 break; /* Always ok */
441
442 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
443 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
444 if (radix == 16) /* Alpha's are ok only */
445 break; /* if reading hex. */
446 default: /* At number end */
447 goto done; /* Break from for loop */
448 } /* End of switch */
449 } /* End general case */
450 (*outfun)(c); /* Accept the character */
451 signseen = TRUE; /* Don't read sign now */
452 c = get(); /* Read another char */
453 } /* End of scan loop */
454 /*
455 * When we break out of the scan loop, c contains the first
456 * character (maybe) not in the number. If the number is an
457 * integer, allow a trailing 'L' for long and/or a trailing 'U'
458 * for unsigned. If not those, push the trailing character back
459 * on the input stream. Floating point numbers accept a trailing
460 * 'L' for "long double".
461 */
462 done: if (dotflag || expseen) { /* Floating point? */
463 if (c == 'l' || c == 'L') {
464 (*outfun)(c);
465 c = get(); /* Ungotten later */
466 }
467 }
468 else { /* Else it's an integer */
469 /*
470 * We know that dotflag and expseen are both zero, now:
471 * dotflag signals "saw 'L'", and
472 * expseen signals "saw 'U'".
473 */
474 for (;;) {
475 switch (c) {
476 case 'l':
477 case 'L':
478 if (dotflag)
479 goto nomore;
480 dotflag = TRUE;
481 break;
482
483 case 'u':
484 case 'U':
485 if (expseen)
486 goto nomore;
487 expseen = TRUE;
488 break;
489
490 default:
491 goto nomore;
492 }
493 (*outfun)(c); /* Got 'L' or 'U'. */
494 c = get(); /* Look at next, too. */
495 }
496 }
497 nomore: unget(); /* Not part of a number */
498 if (octal89 && radix == 8)
499 cwarn("Illegal digit in octal number", NULLST);
500 }
501
save(int c)502 void save(int c)
503 {
504 if (workp >= &work[NWORK]) {
505 work[NWORK-1] = '\0';
506 cfatal("Work buffer overflow: %s", work);
507 }
508 else *workp++ = (char)c;
509 }
510
511 char *
savestring(char * text)512 savestring(char* text)
513 /*
514 * Store a string into free memory.
515 */
516 {
517 register char *result;
518
519 result = getmem(strlen(text) + 1);
520 strcpy(result, text);
521 return (result);
522 }
523
524 FILEINFO *
getfile(int bufsize,char * name)525 getfile(int bufsize, char* name)
526 /*
527 * Common FILEINFO buffer initialization for a new file or macro.
528 */
529 {
530 register FILEINFO *file;
531 register int size;
532
533 size = strlen(name); /* File/macro name */
534 file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size);
535 file->parent = infile; /* Chain files together */
536 file->fp = NULL; /* No file yet */
537 file->filename = savestring(name); /* Save file/macro name */
538 file->progname = NULL; /* No #line seen yet */
539 file->unrecur = 0; /* No macro fixup */
540 file->bptr = file->buffer; /* Initialize line ptr */
541 file->buffer[0] = EOS; /* Force first read */
542 file->line = 0; /* (Not used just yet) */
543 if (infile != NULL) /* If #include file */
544 infile->line = line; /* Save current line */
545 infile = file; /* New current file */
546 line = 1; /* Note first line */
547 return (file); /* All done. */
548 }
549
550 char *
getmem(int size)551 getmem(int size)
552 /*
553 * Get a block of free memory.
554 */
555 {
556 register char *result;
557
558 if ((result = malloc((unsigned) size)) == NULL)
559 cfatal("Out of memory", NULLST);
560 return (result);
561 }
562
563
564 DEFBUF *
lookid(int c)565 lookid(int c)
566 /*
567 * Look for the next token in the symbol table. Returns token in "token".
568 * If found, returns the table pointer; Else returns NULL.
569 */
570 {
571 register int nhash;
572 register DEFBUF *dp;
573 register char *np;
574 int temp = 0;
575 int isrecurse; /* For #define foo foo */
576
577 np = token;
578 nhash = 0;
579 if (0 != (isrecurse = (c == DEF_MAGIC))) /* If recursive macro */
580 c = get(); /* hack, skip DEF_MAGIC */
581 do {
582 if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */
583 *np++ = (char)c; /* Store token byte */
584 nhash += c; /* Update hash value */
585 }
586 c = get(); /* And get another byte */
587 } while (type[c] == LET || type[c] == DIG);
588 unget(); /* Rescan terminator */
589 *np = EOS; /* Terminate token */
590 if (isrecurse) /* Recursive definition */
591 return (NULL); /* undefined just now */
592 nhash += (np - token); /* Fix hash value */
593 dp = symtab[nhash & SBMASK]; /* Starting bucket */
594 while (dp != (DEFBUF *) NULL) { /* Search symbol table */
595 if (dp->hash == nhash /* Fast precheck */
596 && (temp = strcmp(dp->name, token)) >= 0)
597 break;
598 dp = dp->link; /* Nope, try next one */
599 }
600 return ((temp == 0) ? dp : NULL);
601 }
602
603 DEFBUF *
defendel(char * name,int delete)604 defendel(char* name, int delete)
605 /*
606 * Enter this name in the lookup table (delete = FALSE)
607 * or delete this name (delete = TRUE).
608 * Returns a pointer to the define block (delete = FALSE)
609 * Returns NULL if the symbol wasn't defined (delete = TRUE).
610 */
611 {
612 register DEFBUF *dp;
613 register DEFBUF **prevp;
614 register char *np;
615 int nhash;
616 int temp;
617 int size;
618
619 for (nhash = 0, np = name; *np != EOS;)
620 nhash += *np++;
621 size = (np - name);
622 nhash += size;
623 prevp = &symtab[nhash & SBMASK];
624 while ((dp = *prevp) != (DEFBUF *) NULL) {
625 if (dp->hash == nhash
626 && (temp = strcmp(dp->name, name)) >= 0) {
627 if (temp > 0)
628 dp = NULL; /* Not found */
629 else {
630 *prevp = dp->link; /* Found, unlink and */
631 if (dp->repl != NULL) /* Free the replacement */
632 free(dp->repl); /* if any, and then */
633 free((char *) dp); /* Free the symbol */
634 }
635 break;
636 }
637 prevp = &dp->link;
638 }
639 if (!delete) {
640 dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size);
641 dp->link = *prevp;
642 *prevp = dp;
643 dp->hash = nhash;
644 dp->repl = NULL;
645 dp->nargs = 0;
646 strcpy(dp->name, name);
647 }
648 return (dp);
649 }
650
651 #if OSL_DEBUG_LEVEL > 1
652
dumpdef(char * why)653 void dumpdef(char *why)
654 {
655 register DEFBUF *dp;
656 register DEFBUF **syp;
657 FILE *pRememberOut = NULL;
658
659 if ( bDumpDefs ) /*ER */
660 {
661 pRememberOut = pCppOut;
662 pCppOut = pDefOut;
663 }
664 fprintf( pCppOut, "CPP symbol table dump %s\n", why);
665 for (syp = symtab; syp < &symtab[SBSIZE]; syp++) {
666 if ((dp = *syp) != (DEFBUF *) NULL) {
667 fprintf( pCppOut, "symtab[%d]\n", (syp - symtab));
668 do {
669 dumpadef((char *) NULL, dp);
670 } while ((dp = dp->link) != (DEFBUF *) NULL);
671 }
672 }
673 if ( bDumpDefs )
674 {
675 fprintf( pCppOut, "\n");
676 pCppOut = pRememberOut;
677 }
678 }
679
dumpadef(char * why,register DEFBUF * dp)680 void dumpadef(char *why, register DEFBUF *dp)
681 {
682 register char *cp;
683 register int c;
684 FILE *pRememberOut = NULL;
685
686 /*ER dump #define's to pDefOut */
687 if ( bDumpDefs )
688 {
689 pRememberOut = pCppOut;
690 pCppOut = pDefOut;
691 }
692 fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs);
693 if (why != NULL)
694 fprintf( pCppOut, " (%s)", why);
695 if (dp->repl != NULL) {
696 fprintf( pCppOut, " => ");
697 for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) {
698 #ifdef SOLAR
699 if (c == DEL) {
700 c = *cp++ & 0xFF;
701 if( c == EOS ) break;
702 fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
703 }
704 #else
705 if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC))
706 fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
707 #endif
708 else if (isprint(c) || c == '\n' || c == '\t')
709 PUTCHAR(c);
710 else if (c < ' ')
711 fprintf( pCppOut, "<^%c>", c + '@');
712 else
713 fprintf( pCppOut, "<\\0%o>", c);
714 }
715 /*ER evaluate macros to pDefOut */
716 #ifdef EVALDEFS
717 if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 )
718 {
719 FILEINFO *infileSave = infile;
720 char *tokenSave = savestring( token );
721 char *workSave = savestring( work );
722 int lineSave = line;
723 int wronglineSave = wrongline;
724 int recursionSave = recursion;
725 FILEINFO *file;
726 EVALTYPE valEval;
727
728 bIsInEval = 1;
729 infile = NULL; /* start from scrap */
730 line = 0;
731 wrongline = 0;
732 *token = EOS;
733 *work = EOS;
734 recursion = 0;
735 file = getfile( strlen( dp->repl ), dp->name );
736 strcpy( file->buffer, dp->repl );
737 fprintf( pCppOut, " ===> ");
738 nEvalOff = 0;
739 cppmain(); /* get() frees also *file */
740 valEval = 0;
741 if ( 0 == evaluate( EvalBuf, &valEval ) )
742 {
743 #ifdef EVALFLOATS
744 if ( valEval != (EVALTYPE)((long)valEval ) )
745 fprintf( pCppOut, " ==eval=> %f", valEval );
746 else
747 #endif
748 fprintf( pCppOut, " ==eval=> %ld", (long)valEval );
749 }
750 recursion = recursionSave;
751 wrongline = wronglineSave;
752 line = lineSave;
753 strcpy( work, workSave );
754 free( workSave );
755 strcpy( token, tokenSave );
756 free( tokenSave );
757 infile = infileSave;
758 bIsInEval = 0;
759 }
760 #endif
761 }
762 else {
763 fprintf( pCppOut, ", no replacement.");
764 }
765 PUTCHAR('\n');
766 if ( bDumpDefs )
767 pCppOut = pRememberOut;
768 }
769 #endif
770
771 /*
772 * G E T
773 */
774
775 int
get()776 get()
777 /*
778 * Return the next character from a macro or the current file.
779 * Handle end of file from #include files.
780 */
781 {
782 register int c;
783 register FILEINFO *file;
784 register int popped; /* Recursion fixup */
785
786 popped = 0;
787 get_from_file:
788 if ((file = infile) == NULL)
789 return (EOF_CHAR);
790 newline:
791 #if 0
792 fprintf( pCppOut, "get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n",
793 file->filename, recursion, line,
794 file->bptr - file->buffer, file->buffer);
795 #endif
796 /*
797 * Read a character from the current input line or macro.
798 * At EOS, either finish the current macro (freeing temp.
799 * storage) or read another line from the current input file.
800 * At EOF, exit the current file (#include) or, at EOF from
801 * the cpp input file, return EOF_CHAR to finish processing.
802 */
803 if ((c = *file->bptr++ & 0xFF) == EOS) {
804 /*
805 * Nothing in current line or macro. Get next line (if
806 * input from a file), or do end of file/macro processing.
807 * In the latter case, jump back to restart from the top.
808 */
809 if (file->fp == NULL) { /* NULL if macro */
810 popped++;
811 recursion -= file->unrecur;
812 if (recursion < 0)
813 recursion = 0;
814 infile = file->parent; /* Unwind file chain */
815 }
816 else { /* Else get from a file */
817 if ((file->bptr = fgets(file->buffer, NBUFF, file->fp))
818 != NULL) {
819 #if OSL_DEBUG_LEVEL > 1
820 if (debug > 1) { /* Dump it to stdout */
821 fprintf( pCppOut, "\n#line %d (%s), %s",
822 line, file->filename, file->buffer);
823 }
824 #endif
825 goto newline; /* process the line */
826 }
827 else {
828 if( file->fp != stdin )
829 fclose(file->fp); /* Close finished file */
830 if ((infile = file->parent) != NULL) {
831 /*
832 * There is an "ungotten" newline in the current
833 * infile buffer (set there by doinclude() in
834 * cpp1.c). Thus, we know that the mainline code
835 * is skipping over blank lines and will do a
836 * #line at its convenience.
837 */
838 wrongline = TRUE; /* Need a #line now */
839 }
840 }
841 }
842 /*
843 * Free up space used by the (finished) file or macro and
844 * restart input from the parent file/macro, if any.
845 */
846 free(file->filename); /* Free name and */
847 if (file->progname != NULL) /* if a #line was seen, */
848 free(file->progname); /* free it, too. */
849 free((char *) file); /* Free file space */
850 if (infile == NULL) /* If at end of file */
851 return (EOF_CHAR); /* Return end of file */
852 line = infile->line; /* Reset line number */
853 goto get_from_file; /* Get from the top. */
854 }
855 /*
856 * Common processing for the new character.
857 */
858 if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */
859 goto newline; /* from a file */
860 if (file->parent != NULL) { /* Macro or #include */
861 if (popped != 0)
862 file->parent->unrecur += popped;
863 else {
864 recursion -= file->parent->unrecur;
865 if (recursion < 0)
866 recursion = 0;
867 file->parent->unrecur = 0;
868 }
869 }
870 #if (HOST == SYS_UNIX)
871 /*ER*/ if (c == '\r')
872 /*ER*/ return get(); /* DOS fuck */
873 #endif
874 if (c == '\n') /* Maintain current */
875 ++line; /* line counter */
876 if (instring) /* Strings just return */
877 return (c); /* the character. */
878 else if (c == '/') { /* Comment? */
879 instring = TRUE; /* So get() won't loop */
880 /*MM c++ comments */
881 /*MM*/ c = get();
882 /*MM*/ if ((c != '*') && (c != '/')) { /* Next byte '*'? */
883 instring = FALSE; /* Nope, no comment */
884 unget(); /* Push the char. back */
885 return ('/'); /* Return the slash */
886 }
887 if (keepcomments) { /* If writing comments */
888 PUTCHAR('/'); /* Write out the */
889 /* initializer */
890 /*MM*/ if( '*' == c )
891 PUTCHAR('*');
892 /*MM*/ else
893 /*MM*/ PUTCHAR('/');
894
895 }
896 /*MM*/ if( '*' == c ){
897 for (;;) { /* Eat a comment */
898 c = get();
899 test: if (keepcomments && c != EOF_CHAR)
900 cput(c);
901 switch (c) {
902 case EOF_CHAR:
903 cerror("EOF in comment", NULLST);
904 return (EOF_CHAR);
905
906 case '/':
907 if ((c = get()) != '*') /* Don't let comments */
908 goto test; /* Nest. */
909 #ifdef STRICT_COMMENTS
910 cwarn("Nested comments", NULLST);
911 #endif
912 /* Fall into * stuff */
913 case '*':
914 if ((c = get()) != '/') /* If comment doesn't */
915 goto test; /* end, look at next */
916 instring = FALSE; /* End of comment, */
917 if (keepcomments) { /* Put out the comment */
918 cput(c); /* terminator, too */
919 }
920 /*
921 * A comment is syntactically "whitespace" --
922 * however, there are certain strange sequences
923 * such as
924 * #define foo(x) (something)
925 * foo|* comment *|(123)
926 * these are '/' ^ ^
927 * where just returning space (or COM_SEP) will cause
928 * problems. This can be "fixed" by overwriting the
929 * '/' in the input line buffer with ' ' (or COM_SEP)
930 * but that may mess up an error message.
931 * So, we peek ahead -- if the next character is
932 * "whitespace" we just get another character, if not,
933 * we modify the buffer. All in the name of purity.
934 */
935 if (*file->bptr == '\n'
936 || type[*file->bptr & 0xFF] == SPA)
937 goto newline;
938 #if COMMENT_INVISIBLE
939 /*
940 * Return magic (old-fashioned) syntactic space.
941 */
942 return ((file->bptr[-1] = COM_SEP));
943 #else
944 return ((file->bptr[-1] = ' '));
945 #endif
946
947 case '\n': /* we'll need a #line */
948 if (!keepcomments)
949 wrongline = TRUE; /* later... */
950 default: /* Anything else is */
951 break; /* Just a character */
952 } /* End switch */
953 } /* End comment loop */
954 }
955 else{ /* c++ comment */
956 /*MM c++ comment*/
957 for (;;) { /* Eat a comment */
958 c = get();
959 if (keepcomments && c != EOF_CHAR)
960 cput(c);
961 if( EOF_CHAR == c )
962 return (EOF_CHAR);
963 else if( '\n' == c ){
964 instring = FALSE; /* End of comment, */
965 return( c );
966 }
967 }
968 }
969 } /* End if in comment */
970 else if (!inmacro && c == '\\') { /* If backslash, peek */
971 if ((c = get()) == '\n') { /* for a <nl>. If so, */
972 wrongline = TRUE;
973 goto newline;
974 }
975 else { /* Backslash anything */
976 unget(); /* Get it later */
977 return ('\\'); /* Return the backslash */
978 }
979 }
980 else if (c == '\f' || c == VT) /* Form Feed, Vertical */
981 c = ' '; /* Tab are whitespace */
982 else if (c == 0xef) /* eat up UTF-8 BOM */
983 {
984 if((c = get()) == 0xbb)
985 {
986 if((c = get()) == 0xbf)
987 {
988 c = get();
989 return c;
990 }
991 else
992 {
993 unget();
994 unget();
995 return 0xef;
996 }
997 }
998 else
999 {
1000 unget();
1001 return 0xef;
1002 }
1003 }
1004 return (c); /* Just return the char */
1005 }
1006
unget()1007 void unget()
1008 /*
1009 * Backup the pointer to reread the last character. Fatal error
1010 * (code bug) if we backup too far. unget() may be called,
1011 * without problems, at end of file. Only one character may
1012 * be ungotten. If you need to unget more, call ungetstring().
1013 */
1014 {
1015 register FILEINFO *file;
1016
1017 if ((file = infile) == NULL)
1018 return; /* Unget after EOF */
1019 if (--file->bptr < file->buffer)
1020 cfatal("Too much pushback", NULLST);
1021 if (*file->bptr == '\n') /* Ungetting a newline? */
1022 --line; /* Unget the line number, too */
1023 }
1024
ungetstring(char * text)1025 void ungetstring(char* text)
1026 /*
1027 * Push a string back on the input stream. This is done by treating
1028 * the text as if it were a macro.
1029 */
1030 {
1031 register FILEINFO *file;
1032 #ifndef ZTC /* BP */
1033 extern FILEINFO *getfile();
1034 #endif
1035 file = getfile(strlen(text) + 1, "");
1036 strcpy(file->buffer, text);
1037 }
1038
1039 int
cget()1040 cget()
1041 /*
1042 * Get one character, absorb "funny space" after comments or
1043 * token concatenation
1044 */
1045 {
1046 register int c;
1047
1048 do {
1049 c = get();
1050 #if COMMENT_INVISIBLE
1051 } while (c == TOK_SEP || c == COM_SEP);
1052 #else
1053 } while (c == TOK_SEP);
1054 #endif
1055 return (c);
1056 }
1057
1058 /*
1059 * Error messages and other hacks. The first byte of severity
1060 * is 'S' for string arguments and 'I' for int arguments. This
1061 * is needed for portability with machines that have int's that
1062 * are shorter than char *'s.
1063 */
1064
domsg(char * severity,char * format,void * arg)1065 static void domsg(char* severity, char* format, void* arg)
1066 /*
1067 * Print filenames, macro names, and line numbers for error messages.
1068 */
1069 {
1070 register char *tp;
1071 register FILEINFO *file;
1072
1073 fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]);
1074 if (*severity == 'S')
1075 fprintf(stderr, format, (char *)arg);
1076 else
1077 fprintf(stderr, format, *((int *)arg) );
1078 putc('\n', stderr);
1079 if ((file = infile) == NULL)
1080 return; /* At end of file */
1081 if (file->fp != NULL) {
1082 tp = file->buffer; /* Print current file */
1083 fprintf(stderr, "%s", tp); /* name, making sure */
1084 if (tp[strlen(tp) - 1] != '\n') /* there's a newline */
1085 putc('\n', stderr);
1086 }
1087 while ((file = file->parent) != NULL) { /* Print #includes, too */
1088 if (file->fp == NULL)
1089 fprintf(stderr, "from macro %s\n", file->filename);
1090 else {
1091 tp = file->buffer;
1092 fprintf(stderr, "from file %s, line %d:\n%s",
1093 (file->progname != NULL)
1094 ? file->progname : file->filename,
1095 file->line, tp);
1096 if (tp[strlen(tp) - 1] != '\n')
1097 putc('\n', stderr);
1098 }
1099 }
1100 }
1101
cerror(char * format,char * sarg)1102 void cerror(char* format, char* sarg)
1103 /*
1104 * Print a normal error message, string argument.
1105 */
1106 {
1107 domsg("SError", format, sarg);
1108 errors++;
1109 }
1110
cierror(char * format,int narg)1111 void cierror(char* format, int narg)
1112 /*
1113 * Print a normal error message, numeric argument.
1114 */
1115 {
1116 domsg("IError", format, &narg);
1117 errors++;
1118 }
1119
cfatal(char * format,char * sarg)1120 void cfatal(char* format, char* sarg)
1121 /*
1122 * A real disaster
1123 */
1124 {
1125 domsg("SFatal error", format, sarg);
1126 exit(IO_ERROR);
1127 }
1128
cwarn(char * format,char * sarg)1129 void cwarn(char* format, char* sarg)
1130 /*
1131 * A non-fatal error, string argument.
1132 */
1133 {
1134 domsg("SWarning", format, sarg);
1135 }
1136
ciwarn(char * format,int narg)1137 void ciwarn(char* format, int narg)
1138 /*
1139 * A non-fatal error, numeric argument.
1140 */
1141 {
1142 domsg("IWarning", format, &narg);
1143 }
1144
1145