xref: /trunk/main/rsc/source/rscpp/cpp6.c (revision 8e2a856b)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include        <stdio.h>
25 #include        <ctype.h>
26 #include        <string.h>
27 #include        "cppdef.h"
28 #include        "cpp.h"
29 
30 /*ER evaluate macros to pDefOut */
31 
32 /*
33  * skipnl()     skips over input text to the end of the line.
34  * skipws()     skips over "whitespace" (spaces or tabs), but
35  *              not skip over the end of the line.  It skips over
36  *              TOK_SEP, however (though that shouldn't happen).
37  * scanid()     reads the next token (C identifier) into token[].
38  *              The caller has already read the first character of
39  *              the identifier.  Unlike macroid(), the token is
40  *              never expanded.
41  * macroid()    reads the next token (C identifier) into token[].
42  *              If it is a #defined macro, it is expanded, and
43  *              macroid() returns TRUE, otherwise, FALSE.
44  * catenate()   Does the dirty work of token concatenation, TRUE if it did.
45  * scanstring() Reads a string from the input stream, calling
46  *              a user-supplied function for each character.
47  *              This function may be output() to write the
48  *              string to the output file, or save() to save
49  *              the string in the work buffer.
50  * scannumber() Reads a C numeric constant from the input stream,
51  *              calling the user-supplied function for each
52  *              character.  (output() or save() as noted above.)
53  * save()       Save one character in the work[] buffer.
54  * savestring() Saves a string in malloc() memory.
55  * getfile()    Initialize a new FILEINFO structure, called when
56  *              #include opens a new file, or a macro is to be
57  *              expanded.
58  * getmem()     Get a specified number of bytes from malloc memory.
59  * output()     Write one character to stdout (calling PUTCHAR) --
60  *              implemented as a function so its address may be
61  *              passed to scanstring() and scannumber().
62  * lookid()     Scans the next token (identifier) from the input
63  *              stream.  Looks for it in the #defined symbol table.
64  *              Returns a pointer to the definition, if found, or NULL
65  *              if not present.  The identifier is stored in token[].
66  * defnedel()   Define enter/delete subroutine.  Updates the
67  *              symbol table.
68  * get()        Read the next byte from the current input stream,
69  *              handling end of (macro/file) input and embedded
70  *              comments appropriately.  Note that the global
71  *              instring is -- essentially -- a parameter to get().
72  * cget()       Like get(), but skip over TOK_SEP.
73  * unget()      Push last gotten character back on the input stream.
74  * cerror(), cwarn(), cfatal(), cierror(), ciwarn()
75  *              These routines format an print messages to the user.
76  *              cerror & cwarn take a format and a single string argument.
77  *              cierror & ciwarn take a format and a single int (char) argument.
78  *              cfatal takes a format and a single string argument.
79  */
80 
81 /*
82  * This table must be rewritten for a non-Ascii machine.
83  *
84  * Note that several "non-visible" characters have special meaning:
85  * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion.
86  * Hex 1E TOK_SEP   -- a delimiter for token concatenation
87  * Hex 1F COM_SEP   -- a zero-width whitespace for comment concatenation
88  */
89 #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D
90         << error type table is not correct >>
91 #endif
92 
93 #if OK_DOLLAR
94 #define DOL     LET
95 #else
96 #define DOL     000
97 #endif
98 
99 #ifdef EBCDIC
100 
101 char type[256] = {              /* Character type codes    Hex          */
102    END,   000,   000,   000,   000,   SPA,   000,   000, /* 00          */
103    000,   000,   000,   000,   000,   000,   000,   000, /* 08          */
104    000,   000,   000,   000,   000,   000,   000,   000, /* 10          */
105    000,   000,   000,   000,   000,   LET,   000,   SPA, /* 18          */
106    000,   000,   000,   000,   000,   000,   000,   000, /* 20          */
107    000,   000,   000,   000,   000,   000,   000,   000, /* 28          */
108    000,   000,   000,   000,   000,   000,   000,   000, /* 30          */
109    000,   000,   000,   000,   000,   000,   000,   000, /* 38          */
110    SPA,   000,   000,   000,   000,   000,   000,   000, /* 40          */
111    000,   000,   000,   DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48    .<(+| */
112 OP_AND,   000,   000,   000,   000,   000,   000,   000, /* 50 &        */
113    000,   000,OP_NOT,   DOL,OP_MUL,OP_RPA,   000,OP_XOR, /* 58   !$*);^ */
114 OP_SUB,OP_DIV,   000,   000,   000,   000,   000,   000, /* 60 -/       */
115    000,   000,   000,   000,OP_MOD,   LET, OP_GT,OP_QUE, /* 68    ,%_>? */
116    000,   000,   000,   000,   000,   000,   000,   000, /* 70          */
117    000,   000,OP_COL,   000,   000,   QUO, OP_EQ,   QUO, /* 78  `:#@'=" */
118    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 80  abcdefg */
119    LET,   LET,   000,   000,   000,   000,   000,   000, /* 88 hi       */
120    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 90  jklmnop */
121    LET,   LET,   000,   000,   000,   000,   000,   000, /* 98 qr       */
122    000,OP_NOT,   LET,   LET,   LET,   LET,   LET,   LET, /* A0  ~stuvwx */
123    LET,   LET,   000,   000,   000,   000,   000,   000, /* A8 yz   [   */
124    000,   000,   000,   000,   000,   000,   000,   000, /* B0          */
125    000,   000,   000,   000,   000,   000,   000,   000, /* B8      ]   */
126    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* C0 {ABCDEFG */
127    LET,   LET,   000,   000,   000,   000,   000,   000, /* C8 HI       */
128    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* D0 }JKLMNOP */
129    LET,   LET,   000,   000,   000,   000,   000,   000, /* D8 QR       */
130    BSH,   000,   LET,   LET,   LET,   LET,   LET,   LET, /* E0 \ STUVWX */
131    LET,   LET,   000,   000,   000,   000,   000,   000, /* E8 YZ       */
132    DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG, /* F0 01234567 */
133    DIG,   DIG,   000,   000,   000,   000,   000,   000, /* F8 89       */
134 };
135 
136 #else
137 
138 char type[256] = {              /* Character type codes    Hex          */
139    END,   000,   000,   000,   000,   000,   000,   000, /* 00          */
140    000,   SPA,   000,   000,   000,   000,   000,   000, /* 08          */
141    000,   000,   000,   000,   000,   000,   000,   000, /* 10          */
142    000,   000,   000,   000,   000,   LET,   000,   SPA, /* 18          */
143    SPA,OP_NOT,   QUO,   000,   DOL,OP_MOD,OP_AND,   QUO, /* 20  !"#$%&' */
144 OP_LPA,OP_RPA,OP_MUL,OP_ADD,   000,OP_SUB,   DOT,OP_DIV, /* 28 ()*+,-./ */
145    DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG, /* 30 01234567 */
146    DIG,   DIG,OP_COL,   000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */
147    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 40 @ABCDEFG */
148    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 48 HIJKLMNO */
149    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 50 PQRSTUVW */
150    LET,   LET,   LET,   000,   BSH,   000,OP_XOR,   LET, /* 58 XYZ[\]^_ */
151    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 60 `abcdefg */
152    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 68 hijklmno */
153    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 70 pqrstuvw */
154    LET,   LET,   LET,   000, OP_OR,   000,OP_NOT,   000, /* 78 xyz{|}~  */
155    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
156    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
157    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
158    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
159    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
160    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
161    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
162    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
163 };
164 
165 #endif
166 
167 
168 /*
169  *                      C P P   S y m b o l   T a b l e s
170  */
171 
172 /*
173  * SBSIZE defines the number of hash-table slots for the symbol table.
174  * It must be a power of 2.
175  */
176 #ifndef SBSIZE
177 #define SBSIZE  64
178 #endif
179 #define SBMASK  (SBSIZE - 1)
180 #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1)
181         << error, SBSIZE must be a power of 2 >>
182 #endif
183 
184 
185 static DEFBUF   *symtab[SBSIZE];        /* Symbol table queue headers   */
186 
InitCpp6()187 void InitCpp6()
188 {
189     int i;
190     for( i = 0; i < SBSIZE; i++ )
191         symtab[ i ] = NULL;
192 }
193 
194 
195 
skipnl()196 void skipnl()
197 /*
198  * Skip to the end of the current input line.
199  */
200 {
201         register int            c;
202 
203         do {                            /* Skip to newline      */
204             c = get();
205         } while (c != '\n' && c != EOF_CHAR);
206 }
207 
208 int
skipws()209 skipws()
210 /*
211  * Skip over whitespace
212  */
213 {
214         register int            c;
215 
216         do {                            /* Skip whitespace      */
217             c = get();
218 #if COMMENT_INVISIBLE
219         } while (type[c] == SPA || c == COM_SEP);
220 #else
221         } while (type[c] == SPA);
222 #endif
223         return (c);
224 }
225 
scanid(int c)226 void scanid(int c)
227 /*
228  * Get the next token (an id) into the token buffer.
229  * Note: this code is duplicated in lookid().
230  * Change one, change both.
231  */
232 {
233         register char   *bp;
234 
235         if (c == DEF_MAGIC)                     /* Eat the magic token  */
236             c = get();                          /* undefiner.           */
237         bp = token;
238         do {
239             if (bp < &token[IDMAX])             /* token dim is IDMAX+1 */
240                 *bp++ = (char)c;
241             c = get();
242         } while (type[c] == LET || type[c] == DIG);
243         unget();
244         *bp = EOS;
245 }
246 
247 int
macroid(int c)248 macroid(int c)
249 /*
250  * If c is a letter, scan the id.  if it's #defined, expand it and scan
251  * the next character and try again.
252  *
253  * Else, return the character.  If type[c] is a LET, the token is in token.
254  */
255 {
256         register DEFBUF *dp;
257 
258         if (infile != NULL && infile->fp != NULL)
259             recursion = 0;
260         while (type[c] == LET && (dp = lookid(c)) != NULL) {
261             expand(dp);
262             c = get();
263         }
264         return (c);
265 }
266 
267 int
catenate()268 catenate()
269 /*
270  * A token was just read (via macroid).
271  * If the next character is TOK_SEP, concatenate the next token
272  * return TRUE -- which should recall macroid after refreshing
273  * macroid's argument.  If it is not TOK_SEP, unget() the character
274  * and return FALSE.
275  */
276 {
277         register int            c;
278         register char           *token1;
279 
280 #if OK_CONCAT
281         if (get() != TOK_SEP) {                 /* Token concatenation  */
282             unget();
283             return (FALSE);
284         }
285         else {
286             token1 = savestring(token);         /* Save first token     */
287             c = macroid(get());                 /* Scan next token      */
288             switch(type[c]) {                   /* What was it?         */
289             case LET:                           /* An identifier, ...   */
290                 if (strlen(token1) + strlen(token) >= NWORK)
291                     cfatal("work buffer overflow doing %s #", token1);
292                 sprintf(work, "%s%s", token1, token);
293                 break;
294 
295             case DIG:                           /* A digit string       */
296                 strcpy(work, token1);
297                 workp = work + strlen(work);
298                 do {
299                     save(c);
300                 } while ((c = get()) != TOK_SEP);
301                 /*
302                  * The trailing TOK_SEP is no longer needed.
303                  */
304                 save(EOS);
305                 break;
306 
307             default:                            /* An error, ...        */
308 #if ! COMMENT_INVISIBLE
309                 if (isprint(c))
310                     cierror("Strange character '%c' after #", c);
311                 else
312                     cierror("Strange character (%d.) after #", c);
313 #endif
314                 strcpy(work, token1);
315                 unget();
316                 break;
317             }
318             /*
319              * work has the concatenated token and token1 has
320              * the first token (no longer needed).  Unget the
321              * new (concatenated) token after freeing token1.
322              * Finally, setup to read the new token.
323              */
324             free(token1);                       /* Free up memory       */
325             ungetstring(work);                  /* Unget the new thing, */
326             return (TRUE);
327         }
328 #else
329         return (FALSE);                         /* Not supported        */
330 #endif
331 }
332 
333 int
scanstring(int delim,void (* outfun)(int))334 scanstring(int delim,
335 #ifndef _NO_PROTO
336 void             (*outfun)( int ) /* BP */    /* Output function              */
337 #else
338 void         (*outfun)() /* BP */
339 #endif
340 )
341 /*
342  * Scan off a string.  Warning if terminated by newline or EOF.
343  * outfun() outputs the character -- to a buffer if in a macro.
344  * TRUE if ok, FALSE if error.
345  */
346 {
347         register int            c;
348 
349         instring = TRUE;                /* Don't strip comments         */
350         (*outfun)(delim);
351         while ((c = get()) != delim
352              && c != '\n'
353              && c != EOF_CHAR) {
354 
355             if (c != DEF_MAGIC)
356             (*outfun)(c);
357             if (c == '\\')
358                 (*outfun)(get());
359         }
360         instring = FALSE;
361         if (c == delim) {
362             (*outfun)(c);
363             return (TRUE);
364         }
365         else {
366             cerror("Unterminated string", NULLST);
367             unget();
368             return (FALSE);
369         }
370 }
371 
scannumber(int c,register void (* outfun)(int))372 void scannumber(int c,
373 #ifndef _NO_PROTO
374 register void    (*outfun)( int )  /* BP */    /* Output/store func    */
375 #else
376 register void    (*outfun)() /* BP */
377 #endif
378 )
379 /*
380  * Process a number.  We know that c is from 0 to 9 or dot.
381  * Algorithm from Dave Conroy's Decus C.
382  */
383 {
384         register int    radix;                  /* 8, 10, or 16         */
385         int             expseen;                /* 'e' seen in floater  */
386         int             signseen;               /* '+' or '-' seen      */
387         int             octal89;                /* For bad octal test   */
388         int             dotflag;                /* TRUE if '.' was seen */
389 
390         expseen = FALSE;                        /* No exponent seen yet */
391         signseen = TRUE;                        /* No +/- allowed yet   */
392         octal89 = FALSE;                        /* No bad octal yet     */
393         radix = 10;                             /* Assume decimal       */
394         if ((dotflag = (c == '.')) != FALSE) {  /* . something?         */
395             (*outfun)('.');                     /* Always out the dot   */
396             if (type[(c = get())] != DIG) {     /* If not a float numb, */
397                 unget();                        /* Rescan strange char  */
398                 return;                         /* All done for now     */
399             }
400         }                                       /* End of float test    */
401         else if (c == '0') {                    /* Octal or hex?        */
402             (*outfun)(c);                       /* Stuff initial zero   */
403             radix = 8;                          /* Assume it's octal    */
404             c = get();                          /* Look for an 'x'      */
405             if (c == 'x' || c == 'X') {         /* Did we get one?      */
406                 radix = 16;                     /* Remember new radix   */
407                 (*outfun)(c);                   /* Stuff the 'x'        */
408                 c = get();                      /* Get next character   */
409             }
410         }
411         for (;;) {                              /* Process curr. char.  */
412             /*
413              * Note that this algorithm accepts "012e4" and "03.4"
414              * as legitimate floating-point numbers.
415              */
416             if (radix != 16 && (c == 'e' || c == 'E')) {
417                 if (expseen)                    /* Already saw 'E'?     */
418                     break;                      /* Exit loop, bad nbr.  */
419                 expseen = TRUE;                 /* Set exponent seen    */
420                 signseen = FALSE;               /* We can read '+' now  */
421                 radix = 10;                     /* Decimal exponent     */
422             }
423             else if (radix != 16 && c == '.') {
424                 if (dotflag)                    /* Saw dot already?     */
425                     break;                      /* Exit loop, two dots  */
426                 dotflag = TRUE;                 /* Remember the dot     */
427                 radix = 10;                     /* Decimal fraction     */
428             }
429             else if (c == '+' || c == '-') {    /* 1.0e+10              */
430                 if (signseen)                   /* Sign in wrong place? */
431                     break;                      /* Exit loop, not nbr.  */
432                 /* signseen = TRUE; */          /* Remember we saw it   */
433             }
434             else {                              /* Check the digit      */
435                 switch (c) {
436                 case '8': case '9':             /* Sometimes wrong      */
437                     octal89 = TRUE;             /* Do check later       */
438                 case '0': case '1': case '2': case '3':
439                 case '4': case '5': case '6': case '7':
440                     break;                      /* Always ok            */
441 
442                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
443                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
444                     if (radix == 16)            /* Alpha's are ok only  */
445                         break;                  /* if reading hex.      */
446                 default:                        /* At number end        */
447                     goto done;                  /* Break from for loop  */
448                 }                               /* End of switch        */
449             }                                   /* End general case     */
450             (*outfun)(c);                       /* Accept the character */
451             signseen = TRUE;                    /* Don't read sign now  */
452             c = get();                          /* Read another char    */
453         }                                       /* End of scan loop     */
454         /*
455          * When we break out of the scan loop, c contains the first
456          * character (maybe) not in the number.  If the number is an
457          * integer, allow a trailing 'L' for long and/or a trailing 'U'
458          * for unsigned.  If not those, push the trailing character back
459          * on the input stream.  Floating point numbers accept a trailing
460          * 'L' for "long double".
461          */
462 done:   if (dotflag || expseen) {               /* Floating point?      */
463             if (c == 'l' || c == 'L') {
464                 (*outfun)(c);
465                 c = get();                      /* Ungotten later       */
466             }
467         }
468         else {                                  /* Else it's an integer */
469             /*
470              * We know that dotflag and expseen are both zero, now:
471              * dotflag signals "saw 'L'", and
472              * expseen signals "saw 'U'".
473              */
474             for (;;) {
475                 switch (c) {
476                 case 'l':
477                 case 'L':
478                     if (dotflag)
479                         goto nomore;
480                     dotflag = TRUE;
481                     break;
482 
483                 case 'u':
484                 case 'U':
485                     if (expseen)
486                         goto nomore;
487                     expseen = TRUE;
488                     break;
489 
490                 default:
491                     goto nomore;
492                 }
493                 (*outfun)(c);                   /* Got 'L' or 'U'.      */
494                 c = get();                      /* Look at next, too.   */
495             }
496         }
497 nomore: unget();                                /* Not part of a number */
498         if (octal89 && radix == 8)
499             cwarn("Illegal digit in octal number", NULLST);
500 }
501 
save(int c)502 void save(int c)
503 {
504         if (workp >= &work[NWORK]) {
505             work[NWORK-1] = '\0';
506             cfatal("Work buffer overflow:  %s", work);
507         }
508         else *workp++ = (char)c;
509 }
510 
511 char *
savestring(char * text)512 savestring(char* text)
513 /*
514  * Store a string into free memory.
515  */
516 {
517         register char   *result;
518 
519         result = getmem(strlen(text) + 1);
520         strcpy(result, text);
521         return (result);
522 }
523 
524 FILEINFO        *
getfile(int bufsize,char * name)525 getfile(int bufsize, char* name)
526 /*
527  * Common FILEINFO buffer initialization for a new file or macro.
528  */
529 {
530         register FILEINFO       *file;
531         register int            size;
532 
533         size = strlen(name);                    /* File/macro name      */
534         file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size);
535         file->parent = infile;                  /* Chain files together */
536         file->fp = NULL;                        /* No file yet          */
537         file->filename = savestring(name);      /* Save file/macro name */
538         file->progname = NULL;                  /* No #line seen yet    */
539         file->unrecur = 0;                      /* No macro fixup       */
540         file->bptr = file->buffer;              /* Initialize line ptr  */
541         file->buffer[0] = EOS;                  /* Force first read     */
542         file->line = 0;                         /* (Not used just yet)  */
543         if (infile != NULL)                     /* If #include file     */
544             infile->line = line;                /* Save current line    */
545         infile = file;                          /* New current file     */
546         line = 1;                               /* Note first line      */
547         return (file);                          /* All done.            */
548 }
549 
550 char *
getmem(int size)551 getmem(int size)
552 /*
553  * Get a block of free memory.
554  */
555 {
556         register char   *result;
557 
558         if ((result = malloc((unsigned) size)) == NULL)
559             cfatal("Out of memory", NULLST);
560         return (result);
561 }
562 
563 
564 DEFBUF *
lookid(int c)565 lookid(int c)
566 /*
567  * Look for the next token in the symbol table.  Returns token in "token".
568  * If found, returns the table pointer;  Else returns NULL.
569  */
570 {
571         register int            nhash;
572         register DEFBUF         *dp;
573         register char           *np;
574         int                     temp = 0;
575         int                     isrecurse;      /* For #define foo foo  */
576 
577         np = token;
578         nhash = 0;
579         if (0 != (isrecurse = (c == DEF_MAGIC)))     /* If recursive macro   */
580             c = get();                          /* hack, skip DEF_MAGIC */
581         do {
582             if (np < &token[IDMAX]) {           /* token dim is IDMAX+1 */
583                 *np++ = (char)c;                /* Store token byte     */
584                 nhash += c;                     /* Update hash value    */
585             }
586             c = get();                          /* And get another byte */
587         } while (type[c] == LET || type[c] == DIG);
588         unget();                                /* Rescan terminator    */
589         *np = EOS;                              /* Terminate token      */
590         if (isrecurse)                          /* Recursive definition */
591             return (NULL);                      /* undefined just now   */
592         nhash += (np - token);                  /* Fix hash value       */
593         dp = symtab[nhash & SBMASK];            /* Starting bucket      */
594         while (dp != (DEFBUF *) NULL) {         /* Search symbol table  */
595             if (dp->hash == nhash               /* Fast precheck        */
596              && (temp = strcmp(dp->name, token)) >= 0)
597                 break;
598             dp = dp->link;                      /* Nope, try next one   */
599         }
600         return ((temp == 0) ? dp : NULL);
601 }
602 
603 DEFBUF *
defendel(char * name,int delete)604 defendel(char* name, int delete)
605 /*
606  * Enter this name in the lookup table (delete = FALSE)
607  * or delete this name (delete = TRUE).
608  * Returns a pointer to the define block (delete = FALSE)
609  * Returns NULL if the symbol wasn't defined (delete = TRUE).
610  */
611 {
612         register DEFBUF         *dp;
613         register DEFBUF         **prevp;
614         register char           *np;
615         int                     nhash;
616         int                     temp;
617         int                     size;
618 
619         for (nhash = 0, np = name; *np != EOS;)
620             nhash += *np++;
621         size = (np - name);
622         nhash += size;
623         prevp = &symtab[nhash & SBMASK];
624         while ((dp = *prevp) != (DEFBUF *) NULL) {
625             if (dp->hash == nhash
626              && (temp = strcmp(dp->name, name)) >= 0) {
627                 if (temp > 0)
628                     dp = NULL;                  /* Not found            */
629                 else {
630                     *prevp = dp->link;          /* Found, unlink and    */
631                     if (dp->repl != NULL)       /* Free the replacement */
632                         free(dp->repl);         /* if any, and then     */
633                     free((char *) dp);          /* Free the symbol      */
634                 }
635                 break;
636             }
637             prevp = &dp->link;
638         }
639         if (!delete) {
640             dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size);
641             dp->link = *prevp;
642             *prevp = dp;
643             dp->hash = nhash;
644             dp->repl = NULL;
645             dp->nargs = 0;
646             strcpy(dp->name, name);
647         }
648         return (dp);
649 }
650 
651 #if OSL_DEBUG_LEVEL > 1
652 
dumpdef(char * why)653 void dumpdef(char *why)
654 {
655         register DEFBUF         *dp;
656         register DEFBUF         **syp;
657 		FILE *pRememberOut = NULL;
658 
659 		if ( bDumpDefs )	/*ER */
660 		{
661 			pRememberOut = pCppOut;
662 			pCppOut = pDefOut;
663 		}
664         fprintf( pCppOut, "CPP symbol table dump %s\n", why);
665         for (syp = symtab; syp < &symtab[SBSIZE]; syp++) {
666             if ((dp = *syp) != (DEFBUF *) NULL) {
667                 fprintf( pCppOut, "symtab[%d]\n", (syp - symtab));
668                 do {
669                     dumpadef((char *) NULL, dp);
670                 } while ((dp = dp->link) != (DEFBUF *) NULL);
671             }
672         }
673 		if ( bDumpDefs )
674 		{
675             fprintf( pCppOut, "\n");
676 			pCppOut = pRememberOut;
677 		}
678 }
679 
dumpadef(char * why,register DEFBUF * dp)680 void dumpadef(char *why, register DEFBUF *dp)
681 {
682         register char           *cp;
683         register int            c;
684 		FILE *pRememberOut = NULL;
685 
686 /*ER dump #define's to pDefOut */
687 		if ( bDumpDefs )
688 		{
689 			pRememberOut = pCppOut;
690 			pCppOut = pDefOut;
691 		}
692         fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs);
693         if (why != NULL)
694             fprintf( pCppOut, " (%s)", why);
695         if (dp->repl != NULL) {
696             fprintf( pCppOut, " => ");
697             for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) {
698 #ifdef SOLAR
699                 if (c == DEL) {
700                     c = *cp++ & 0xFF;
701                     if( c == EOS ) break;
702                     fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
703                 }
704 #else
705                 if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC))
706                     fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
707 #endif
708                 else if (isprint(c) || c == '\n' || c == '\t')
709                     PUTCHAR(c);
710                 else if (c < ' ')
711                     fprintf( pCppOut, "<^%c>", c + '@');
712                 else
713                     fprintf( pCppOut, "<\\0%o>", c);
714             }
715 /*ER evaluate macros to pDefOut */
716 #ifdef EVALDEFS
717 			if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 )
718 			{
719 				FILEINFO *infileSave = infile;
720 				char *tokenSave = savestring( token );
721 				char *workSave = savestring( work );
722 				int lineSave = line;
723 				int wronglineSave = wrongline;
724 				int recursionSave = recursion;
725 				FILEINFO *file;
726 				EVALTYPE valEval;
727 
728 				bIsInEval = 1;
729 				infile = NULL;			/* start from scrap */
730 				line = 0;
731 				wrongline = 0;
732 				*token = EOS;
733 				*work = EOS;
734 				recursion = 0;
735 				file = getfile( strlen( dp->repl ), dp->name );
736 				strcpy( file->buffer, dp->repl );
737 	            fprintf( pCppOut, " ===> ");
738 				nEvalOff = 0;
739 				cppmain();				/* get() frees also *file */
740 				valEval = 0;
741 				if ( 0 == evaluate( EvalBuf, &valEval ) )
742 				{
743 #ifdef EVALFLOATS
744 					if ( valEval != (EVALTYPE)((long)valEval ) )
745 		            	fprintf( pCppOut, " ==eval=> %f", valEval );
746 					else
747 #endif
748 		            	fprintf( pCppOut, " ==eval=> %ld", (long)valEval );
749 				}
750 				recursion = recursionSave;
751 				wrongline = wronglineSave;
752 				line = lineSave;
753 				strcpy( work, workSave );
754 				free( workSave );
755 				strcpy( token, tokenSave );
756 				free( tokenSave );
757 				infile = infileSave;
758 				bIsInEval = 0;
759 			}
760 #endif
761         }
762         else {
763             fprintf( pCppOut, ", no replacement.");
764         }
765         PUTCHAR('\n');
766 		if ( bDumpDefs )
767 			pCppOut = pRememberOut;
768 }
769 #endif
770 
771 /*
772  *                      G E T
773  */
774 
775 int
get()776 get()
777 /*
778  * Return the next character from a macro or the current file.
779  * Handle end of file from #include files.
780  */
781 {
782         register int            c;
783         register FILEINFO       *file;
784         register int            popped;         /* Recursion fixup      */
785 
786         popped = 0;
787 get_from_file:
788         if ((file = infile) == NULL)
789             return (EOF_CHAR);
790 newline:
791 #if 0
792         fprintf( pCppOut, "get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n",
793             file->filename, recursion, line,
794             file->bptr - file->buffer, file->buffer);
795 #endif
796         /*
797          * Read a character from the current input line or macro.
798          * At EOS, either finish the current macro (freeing temp.
799          * storage) or read another line from the current input file.
800          * At EOF, exit the current file (#include) or, at EOF from
801          * the cpp input file, return EOF_CHAR to finish processing.
802          */
803         if ((c = *file->bptr++ & 0xFF) == EOS) {
804             /*
805              * Nothing in current line or macro.  Get next line (if
806              * input from a file), or do end of file/macro processing.
807              * In the latter case, jump back to restart from the top.
808              */
809             if (file->fp == NULL) {             /* NULL if macro        */
810                 popped++;
811                 recursion -= file->unrecur;
812                 if (recursion < 0)
813                     recursion = 0;
814                 infile = file->parent;          /* Unwind file chain    */
815             }
816             else {                              /* Else get from a file */
817                 if ((file->bptr = fgets(file->buffer, NBUFF, file->fp))
818                         != NULL) {
819 #if OSL_DEBUG_LEVEL > 1
820                     if (debug > 1) {            /* Dump it to stdout    */
821                         fprintf( pCppOut, "\n#line %d (%s), %s",
822                             line, file->filename, file->buffer);
823                     }
824 #endif
825                     goto newline;               /* process the line     */
826                 }
827                 else {
828 		    if( file->fp != stdin )
829                         fclose(file->fp);           /* Close finished file  */
830                     if ((infile = file->parent) != NULL) {
831                         /*
832                          * There is an "ungotten" newline in the current
833                          * infile buffer (set there by doinclude() in
834                          * cpp1.c).  Thus, we know that the mainline code
835                          * is skipping over blank lines and will do a
836                          * #line at its convenience.
837                          */
838                         wrongline = TRUE;       /* Need a #line now     */
839                     }
840                 }
841             }
842             /*
843              * Free up space used by the (finished) file or macro and
844              * restart input from the parent file/macro, if any.
845              */
846             free(file->filename);               /* Free name and        */
847             if (file->progname != NULL)         /* if a #line was seen, */
848                 free(file->progname);           /* free it, too.        */
849             free((char *) file);                /* Free file space      */
850             if (infile == NULL)                 /* If at end of file    */
851                 return (EOF_CHAR);              /* Return end of file   */
852             line = infile->line;                /* Reset line number    */
853             goto get_from_file;                 /* Get from the top.    */
854         }
855         /*
856          * Common processing for the new character.
857          */
858         if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete   */
859             goto newline;                       /* from a file          */
860         if (file->parent != NULL) {             /* Macro or #include    */
861             if (popped != 0)
862                 file->parent->unrecur += popped;
863             else {
864                 recursion -= file->parent->unrecur;
865                 if (recursion < 0)
866                     recursion = 0;
867                 file->parent->unrecur = 0;
868             }
869         }
870 #if (HOST == SYS_UNIX)
871 /*ER*/	if (c == '\r')
872 /*ER*/		return get();						/* DOS fuck				*/
873 #endif
874         if (c == '\n')                          /* Maintain current     */
875             ++line;                             /* line counter         */
876         if (instring)                           /* Strings just return  */
877             return (c);                         /* the character.       */
878         else if (c == '/') {                    /* Comment?             */
879             instring = TRUE;                    /* So get() won't loop  */
880 /*MM c++ comments  */
881 /*MM*/      c = get();
882 /*MM*/      if ((c != '*') && (c != '/')) {     /* Next byte '*'?       */
883                 instring = FALSE;               /* Nope, no comment     */
884                 unget();                        /* Push the char. back  */
885                 return ('/');                   /* Return the slash     */
886             }
887             if (keepcomments) {                 /* If writing comments  */
888                 PUTCHAR('/');                   /* Write out the        */
889                                                 /*   initializer        */
890 /*MM*/          if( '*' == c )
891                     PUTCHAR('*');
892 /*MM*/          else
893 /*MM*/              PUTCHAR('/');
894 
895             }
896 /*MM*/      if( '*' == c ){
897                 for (;;) {                          /* Eat a comment        */
898                     c = get();
899     test:           if (keepcomments && c != EOF_CHAR)
900                         cput(c);
901                     switch (c) {
902                     case EOF_CHAR:
903                         cerror("EOF in comment", NULLST);
904                         return (EOF_CHAR);
905 
906                     case '/':
907                         if ((c = get()) != '*')     /* Don't let comments   */
908                             goto test;              /* Nest.                */
909 #ifdef STRICT_COMMENTS
910                         cwarn("Nested comments", NULLST);
911 #endif
912                                                     /* Fall into * stuff    */
913                     case '*':
914                         if ((c = get()) != '/')     /* If comment doesn't   */
915                             goto test;              /* end, look at next    */
916                         instring = FALSE;           /* End of comment,      */
917                         if (keepcomments) {         /* Put out the comment  */
918                             cput(c);                /* terminator, too      */
919                         }
920                         /*
921                          * A comment is syntactically "whitespace" --
922                          * however, there are certain strange sequences
923                          * such as
924                          *          #define foo(x)  (something)
925                          *                  foo|* comment *|(123)
926                          *       these are '/' ^           ^
927                          * where just returning space (or COM_SEP) will cause
928                          * problems.  This can be "fixed" by overwriting the
929                          * '/' in the input line buffer with ' ' (or COM_SEP)
930                          * but that may mess up an error message.
931                          * So, we peek ahead -- if the next character is
932                          * "whitespace" we just get another character, if not,
933                          * we modify the buffer.  All in the name of purity.
934                          */
935                         if (*file->bptr == '\n'
936                          || type[*file->bptr & 0xFF] == SPA)
937                             goto newline;
938 #if COMMENT_INVISIBLE
939                         /*
940                          * Return magic (old-fashioned) syntactic space.
941                          */
942                         return ((file->bptr[-1] = COM_SEP));
943 #else
944                         return ((file->bptr[-1] = ' '));
945 #endif
946 
947                     case '\n':                      /* we'll need a #line   */
948                         if (!keepcomments)
949                             wrongline = TRUE;       /* later...             */
950                     default:                        /* Anything else is     */
951                         break;                      /* Just a character     */
952                     }                               /* End switch           */
953                 }                                   /* End comment loop     */
954             }
955             else{                                   /* c++ comment          */
956 /*MM c++ comment*/
957                 for (;;) {                          /* Eat a comment        */
958                     c = get();
959                     if (keepcomments && c != EOF_CHAR)
960                         cput(c);
961                     if( EOF_CHAR == c )
962                         return (EOF_CHAR);
963                     else if( '\n' == c ){
964                         instring = FALSE;           /* End of comment,      */
965                         return( c );
966                     }
967                 }
968             }
969         }                                       /* End if in comment    */
970         else if (!inmacro && c == '\\') {       /* If backslash, peek   */
971             if ((c = get()) == '\n') {          /* for a <nl>.  If so,  */
972                 wrongline = TRUE;
973                 goto newline;
974             }
975             else {                              /* Backslash anything   */
976                 unget();                        /* Get it later         */
977                 return ('\\');                  /* Return the backslash */
978             }
979         }
980         else if (c == '\f' || c == VT)          /* Form Feed, Vertical  */
981             c = ' ';                            /* Tab are whitespace   */
982         else if (c == 0xef)						/* eat up UTF-8 BOM */
983         {
984             if((c = get()) == 0xbb)
985             {
986                 if((c = get()) == 0xbf)
987                 {
988                     c = get();
989                     return c;
990                 }
991                 else
992                 {
993                     unget();
994                     unget();
995                     return 0xef;
996                 }
997             }
998             else
999             {
1000                 unget();
1001                 return 0xef;
1002             }
1003         }
1004         return (c);                             /* Just return the char */
1005 }
1006 
unget()1007 void unget()
1008 /*
1009  * Backup the pointer to reread the last character.  Fatal error
1010  * (code bug) if we backup too far.  unget() may be called,
1011  * without problems, at end of file.  Only one character may
1012  * be ungotten.  If you need to unget more, call ungetstring().
1013  */
1014 {
1015         register FILEINFO       *file;
1016 
1017         if ((file = infile) == NULL)
1018             return;                     /* Unget after EOF              */
1019         if (--file->bptr < file->buffer)
1020             cfatal("Too much pushback", NULLST);
1021         if (*file->bptr == '\n')        /* Ungetting a newline?         */
1022             --line;                     /* Unget the line number, too   */
1023 }
1024 
ungetstring(char * text)1025 void ungetstring(char* text)
1026 /*
1027  * Push a string back on the input stream.  This is done by treating
1028  * the text as if it were a macro.
1029  */
1030 {
1031         register FILEINFO       *file;
1032 #ifndef ZTC /* BP */
1033         extern FILEINFO         *getfile();
1034 #endif
1035         file = getfile(strlen(text) + 1, "");
1036         strcpy(file->buffer, text);
1037 }
1038 
1039 int
cget()1040 cget()
1041 /*
1042  * Get one character, absorb "funny space" after comments or
1043  * token concatenation
1044  */
1045 {
1046         register int    c;
1047 
1048         do {
1049             c = get();
1050 #if COMMENT_INVISIBLE
1051         } while (c == TOK_SEP || c == COM_SEP);
1052 #else
1053         } while (c == TOK_SEP);
1054 #endif
1055         return (c);
1056 }
1057 
1058 /*
1059  * Error messages and other hacks.  The first byte of severity
1060  * is 'S' for string arguments and 'I' for int arguments.  This
1061  * is needed for portability with machines that have int's that
1062  * are shorter than  char *'s.
1063  */
1064 
domsg(char * severity,char * format,void * arg)1065 static void domsg(char* severity, char* format, void* arg)
1066 /*
1067  * Print filenames, macro names, and line numbers for error messages.
1068  */
1069 {
1070         register char           *tp;
1071         register FILEINFO       *file;
1072 
1073         fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]);
1074         if (*severity == 'S')
1075             fprintf(stderr, format, (char *)arg);
1076         else
1077             fprintf(stderr, format, *((int *)arg) );
1078         putc('\n', stderr);
1079         if ((file = infile) == NULL)
1080             return;                             /* At end of file       */
1081         if (file->fp != NULL) {
1082             tp = file->buffer;                  /* Print current file   */
1083             fprintf(stderr, "%s", tp);          /* name, making sure    */
1084             if (tp[strlen(tp) - 1] != '\n')     /* there's a newline    */
1085                 putc('\n', stderr);
1086         }
1087         while ((file = file->parent) != NULL) { /* Print #includes, too */
1088             if (file->fp == NULL)
1089                 fprintf(stderr, "from macro %s\n", file->filename);
1090             else {
1091                 tp = file->buffer;
1092                 fprintf(stderr, "from file %s, line %d:\n%s",
1093                     (file->progname != NULL)
1094                         ? file->progname : file->filename,
1095                     file->line, tp);
1096                 if (tp[strlen(tp) - 1] != '\n')
1097                     putc('\n', stderr);
1098             }
1099         }
1100 }
1101 
cerror(char * format,char * sarg)1102 void cerror(char* format, char* sarg)
1103 /*
1104  * Print a normal error message, string argument.
1105  */
1106 {
1107         domsg("SError", format, sarg);
1108         errors++;
1109 }
1110 
cierror(char * format,int narg)1111 void cierror(char* format, int narg)
1112 /*
1113  * Print a normal error message, numeric argument.
1114  */
1115 {
1116         domsg("IError", format, &narg);
1117         errors++;
1118 }
1119 
cfatal(char * format,char * sarg)1120 void cfatal(char* format, char* sarg)
1121 /*
1122  * A real disaster
1123  */
1124 {
1125         domsg("SFatal error", format, sarg);
1126         exit(IO_ERROR);
1127 }
1128 
cwarn(char * format,char * sarg)1129 void cwarn(char* format, char* sarg)
1130 /*
1131  * A non-fatal error, string argument.
1132  */
1133 {
1134         domsg("SWarning", format, sarg);
1135 }
1136 
ciwarn(char * format,int narg)1137 void ciwarn(char* format, int narg)
1138 /*
1139  * A non-fatal error, numeric argument.
1140  */
1141 {
1142         domsg("IWarning", format, &narg);
1143 }
1144 
1145