xref: /aoo42x/main/rsc/source/rscpp/cpp6.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include        <stdio.h>
29 #include        <ctype.h>
30 #include        <string.h>
31 #include        "cppdef.h"
32 #include        "cpp.h"
33 
34 /*ER evaluate macros to pDefOut */
35 
36 /*
37  * skipnl()     skips over input text to the end of the line.
38  * skipws()     skips over "whitespace" (spaces or tabs), but
39  *              not skip over the end of the line.  It skips over
40  *              TOK_SEP, however (though that shouldn't happen).
41  * scanid()     reads the next token (C identifier) into token[].
42  *              The caller has already read the first character of
43  *              the identifier.  Unlike macroid(), the token is
44  *              never expanded.
45  * macroid()    reads the next token (C identifier) into token[].
46  *              If it is a #defined macro, it is expanded, and
47  *              macroid() returns TRUE, otherwise, FALSE.
48  * catenate()   Does the dirty work of token concatenation, TRUE if it did.
49  * scanstring() Reads a string from the input stream, calling
50  *              a user-supplied function for each character.
51  *              This function may be output() to write the
52  *              string to the output file, or save() to save
53  *              the string in the work buffer.
54  * scannumber() Reads a C numeric constant from the input stream,
55  *              calling the user-supplied function for each
56  *              character.  (output() or save() as noted above.)
57  * save()       Save one character in the work[] buffer.
58  * savestring() Saves a string in malloc() memory.
59  * getfile()    Initialize a new FILEINFO structure, called when
60  *              #include opens a new file, or a macro is to be
61  *              expanded.
62  * getmem()     Get a specified number of bytes from malloc memory.
63  * output()     Write one character to stdout (calling PUTCHAR) --
64  *              implemented as a function so its address may be
65  *              passed to scanstring() and scannumber().
66  * lookid()     Scans the next token (identifier) from the input
67  *              stream.  Looks for it in the #defined symbol table.
68  *              Returns a pointer to the definition, if found, or NULL
69  *              if not present.  The identifier is stored in token[].
70  * defnedel()   Define enter/delete subroutine.  Updates the
71  *              symbol table.
72  * get()        Read the next byte from the current input stream,
73  *              handling end of (macro/file) input and embedded
74  *              comments appropriately.  Note that the global
75  *              instring is -- essentially -- a parameter to get().
76  * cget()       Like get(), but skip over TOK_SEP.
77  * unget()      Push last gotten character back on the input stream.
78  * cerror(), cwarn(), cfatal(), cierror(), ciwarn()
79  *              These routines format an print messages to the user.
80  *              cerror & cwarn take a format and a single string argument.
81  *              cierror & ciwarn take a format and a single int (char) argument.
82  *              cfatal takes a format and a single string argument.
83  */
84 
85 /*
86  * This table must be rewritten for a non-Ascii machine.
87  *
88  * Note that several "non-visible" characters have special meaning:
89  * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion.
90  * Hex 1E TOK_SEP   -- a delimiter for token concatenation
91  * Hex 1F COM_SEP   -- a zero-width whitespace for comment concatenation
92  */
93 #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D
94         << error type table is not correct >>
95 #endif
96 
97 #if OK_DOLLAR
98 #define DOL     LET
99 #else
100 #define DOL     000
101 #endif
102 
103 #ifdef EBCDIC
104 
105 char type[256] = {              /* Character type codes    Hex          */
106    END,   000,   000,   000,   000,   SPA,   000,   000, /* 00          */
107    000,   000,   000,   000,   000,   000,   000,   000, /* 08          */
108    000,   000,   000,   000,   000,   000,   000,   000, /* 10          */
109    000,   000,   000,   000,   000,   LET,   000,   SPA, /* 18          */
110    000,   000,   000,   000,   000,   000,   000,   000, /* 20          */
111    000,   000,   000,   000,   000,   000,   000,   000, /* 28          */
112    000,   000,   000,   000,   000,   000,   000,   000, /* 30          */
113    000,   000,   000,   000,   000,   000,   000,   000, /* 38          */
114    SPA,   000,   000,   000,   000,   000,   000,   000, /* 40          */
115    000,   000,   000,   DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48    .<(+| */
116 OP_AND,   000,   000,   000,   000,   000,   000,   000, /* 50 &        */
117    000,   000,OP_NOT,   DOL,OP_MUL,OP_RPA,   000,OP_XOR, /* 58   !$*);^ */
118 OP_SUB,OP_DIV,   000,   000,   000,   000,   000,   000, /* 60 -/       */
119    000,   000,   000,   000,OP_MOD,   LET, OP_GT,OP_QUE, /* 68    ,%_>? */
120    000,   000,   000,   000,   000,   000,   000,   000, /* 70          */
121    000,   000,OP_COL,   000,   000,   QUO, OP_EQ,   QUO, /* 78  `:#@'=" */
122    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 80  abcdefg */
123    LET,   LET,   000,   000,   000,   000,   000,   000, /* 88 hi       */
124    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 90  jklmnop */
125    LET,   LET,   000,   000,   000,   000,   000,   000, /* 98 qr       */
126    000,OP_NOT,   LET,   LET,   LET,   LET,   LET,   LET, /* A0  ~stuvwx */
127    LET,   LET,   000,   000,   000,   000,   000,   000, /* A8 yz   [   */
128    000,   000,   000,   000,   000,   000,   000,   000, /* B0          */
129    000,   000,   000,   000,   000,   000,   000,   000, /* B8      ]   */
130    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* C0 {ABCDEFG */
131    LET,   LET,   000,   000,   000,   000,   000,   000, /* C8 HI       */
132    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* D0 }JKLMNOP */
133    LET,   LET,   000,   000,   000,   000,   000,   000, /* D8 QR       */
134    BSH,   000,   LET,   LET,   LET,   LET,   LET,   LET, /* E0 \ STUVWX */
135    LET,   LET,   000,   000,   000,   000,   000,   000, /* E8 YZ       */
136    DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG, /* F0 01234567 */
137    DIG,   DIG,   000,   000,   000,   000,   000,   000, /* F8 89       */
138 };
139 
140 #else
141 
142 char type[256] = {              /* Character type codes    Hex          */
143    END,   000,   000,   000,   000,   000,   000,   000, /* 00          */
144    000,   SPA,   000,   000,   000,   000,   000,   000, /* 08          */
145    000,   000,   000,   000,   000,   000,   000,   000, /* 10          */
146    000,   000,   000,   000,   000,   LET,   000,   SPA, /* 18          */
147    SPA,OP_NOT,   QUO,   000,   DOL,OP_MOD,OP_AND,   QUO, /* 20  !"#$%&' */
148 OP_LPA,OP_RPA,OP_MUL,OP_ADD,   000,OP_SUB,   DOT,OP_DIV, /* 28 ()*+,-./ */
149    DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG, /* 30 01234567 */
150    DIG,   DIG,OP_COL,   000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */
151    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 40 @ABCDEFG */
152    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 48 HIJKLMNO */
153    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 50 PQRSTUVW */
154    LET,   LET,   LET,   000,   BSH,   000,OP_XOR,   LET, /* 58 XYZ[\]^_ */
155    000,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 60 `abcdefg */
156    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 68 hijklmno */
157    LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET, /* 70 pqrstuvw */
158    LET,   LET,   LET,   000, OP_OR,   000,OP_NOT,   000, /* 78 xyz{|}~  */
159    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
160    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
161    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
162    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
163    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
164    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
165    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
166    000,   000,   000,   000,   000,   000,   000,   000, /*   80 .. FF  */
167 };
168 
169 #endif
170 
171 
172 /*
173  *                      C P P   S y m b o l   T a b l e s
174  */
175 
176 /*
177  * SBSIZE defines the number of hash-table slots for the symbol table.
178  * It must be a power of 2.
179  */
180 #ifndef SBSIZE
181 #define SBSIZE  64
182 #endif
183 #define SBMASK  (SBSIZE - 1)
184 #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1)
185         << error, SBSIZE must be a power of 2 >>
186 #endif
187 
188 
189 static DEFBUF   *symtab[SBSIZE];        /* Symbol table queue headers   */
190 
191 void InitCpp6()
192 {
193     int i;
194     for( i = 0; i < SBSIZE; i++ )
195         symtab[ i ] = NULL;
196 }
197 
198 
199 
200 void skipnl()
201 /*
202  * Skip to the end of the current input line.
203  */
204 {
205         register int            c;
206 
207         do {                            /* Skip to newline      */
208             c = get();
209         } while (c != '\n' && c != EOF_CHAR);
210 }
211 
212 int
213 skipws()
214 /*
215  * Skip over whitespace
216  */
217 {
218         register int            c;
219 
220         do {                            /* Skip whitespace      */
221             c = get();
222 #if COMMENT_INVISIBLE
223         } while (type[c] == SPA || c == COM_SEP);
224 #else
225         } while (type[c] == SPA);
226 #endif
227         return (c);
228 }
229 
230 void scanid(int c)
231 /*
232  * Get the next token (an id) into the token buffer.
233  * Note: this code is duplicated in lookid().
234  * Change one, change both.
235  */
236 {
237         register char   *bp;
238 
239         if (c == DEF_MAGIC)                     /* Eat the magic token  */
240             c = get();                          /* undefiner.           */
241         bp = token;
242         do {
243             if (bp < &token[IDMAX])             /* token dim is IDMAX+1 */
244                 *bp++ = (char)c;
245             c = get();
246         } while (type[c] == LET || type[c] == DIG);
247         unget();
248         *bp = EOS;
249 }
250 
251 int
252 macroid(int c)
253 /*
254  * If c is a letter, scan the id.  if it's #defined, expand it and scan
255  * the next character and try again.
256  *
257  * Else, return the character.  If type[c] is a LET, the token is in token.
258  */
259 {
260         register DEFBUF *dp;
261 
262         if (infile != NULL && infile->fp != NULL)
263             recursion = 0;
264         while (type[c] == LET && (dp = lookid(c)) != NULL) {
265             expand(dp);
266             c = get();
267         }
268         return (c);
269 }
270 
271 int
272 catenate()
273 /*
274  * A token was just read (via macroid).
275  * If the next character is TOK_SEP, concatenate the next token
276  * return TRUE -- which should recall macroid after refreshing
277  * macroid's argument.  If it is not TOK_SEP, unget() the character
278  * and return FALSE.
279  */
280 {
281         register int            c;
282         register char           *token1;
283 
284 #if OK_CONCAT
285         if (get() != TOK_SEP) {                 /* Token concatenation  */
286             unget();
287             return (FALSE);
288         }
289         else {
290             token1 = savestring(token);         /* Save first token     */
291             c = macroid(get());                 /* Scan next token      */
292             switch(type[c]) {                   /* What was it?         */
293             case LET:                           /* An identifier, ...   */
294                 if (strlen(token1) + strlen(token) >= NWORK)
295                     cfatal("work buffer overflow doing %s #", token1);
296                 sprintf(work, "%s%s", token1, token);
297                 break;
298 
299             case DIG:                           /* A digit string       */
300                 strcpy(work, token1);
301                 workp = work + strlen(work);
302                 do {
303                     save(c);
304                 } while ((c = get()) != TOK_SEP);
305                 /*
306                  * The trailing TOK_SEP is no longer needed.
307                  */
308                 save(EOS);
309                 break;
310 
311             default:                            /* An error, ...        */
312 #if ! COMMENT_INVISIBLE
313                 if (isprint(c))
314                     cierror("Strange character '%c' after #", c);
315                 else
316                     cierror("Strange character (%d.) after #", c);
317 #endif
318                 strcpy(work, token1);
319                 unget();
320                 break;
321             }
322             /*
323              * work has the concatenated token and token1 has
324              * the first token (no longer needed).  Unget the
325              * new (concatenated) token after freeing token1.
326              * Finally, setup to read the new token.
327              */
328             free(token1);                       /* Free up memory       */
329             ungetstring(work);                  /* Unget the new thing, */
330             return (TRUE);
331         }
332 #else
333         return (FALSE);                         /* Not supported        */
334 #endif
335 }
336 
337 int
338 scanstring(int delim,
339 #ifndef _NO_PROTO
340 void             (*outfun)( int ) /* BP */    /* Output function              */
341 #else
342 void         (*outfun)() /* BP */
343 #endif
344 )
345 /*
346  * Scan off a string.  Warning if terminated by newline or EOF.
347  * outfun() outputs the character -- to a buffer if in a macro.
348  * TRUE if ok, FALSE if error.
349  */
350 {
351         register int            c;
352 
353         instring = TRUE;                /* Don't strip comments         */
354         (*outfun)(delim);
355         while ((c = get()) != delim
356              && c != '\n'
357              && c != EOF_CHAR) {
358 
359             if (c != DEF_MAGIC)
360             (*outfun)(c);
361             if (c == '\\')
362                 (*outfun)(get());
363         }
364         instring = FALSE;
365         if (c == delim) {
366             (*outfun)(c);
367             return (TRUE);
368         }
369         else {
370             cerror("Unterminated string", NULLST);
371             unget();
372             return (FALSE);
373         }
374 }
375 
376 void scannumber(int c,
377 #ifndef _NO_PROTO
378 register void    (*outfun)( int )  /* BP */    /* Output/store func    */
379 #else
380 register void    (*outfun)() /* BP */
381 #endif
382 )
383 /*
384  * Process a number.  We know that c is from 0 to 9 or dot.
385  * Algorithm from Dave Conroy's Decus C.
386  */
387 {
388         register int    radix;                  /* 8, 10, or 16         */
389         int             expseen;                /* 'e' seen in floater  */
390         int             signseen;               /* '+' or '-' seen      */
391         int             octal89;                /* For bad octal test   */
392         int             dotflag;                /* TRUE if '.' was seen */
393 
394         expseen = FALSE;                        /* No exponent seen yet */
395         signseen = TRUE;                        /* No +/- allowed yet   */
396         octal89 = FALSE;                        /* No bad octal yet     */
397         radix = 10;                             /* Assume decimal       */
398         if ((dotflag = (c == '.')) != FALSE) {  /* . something?         */
399             (*outfun)('.');                     /* Always out the dot   */
400             if (type[(c = get())] != DIG) {     /* If not a float numb, */
401                 unget();                        /* Rescan strange char  */
402                 return;                         /* All done for now     */
403             }
404         }                                       /* End of float test    */
405         else if (c == '0') {                    /* Octal or hex?        */
406             (*outfun)(c);                       /* Stuff initial zero   */
407             radix = 8;                          /* Assume it's octal    */
408             c = get();                          /* Look for an 'x'      */
409             if (c == 'x' || c == 'X') {         /* Did we get one?      */
410                 radix = 16;                     /* Remember new radix   */
411                 (*outfun)(c);                   /* Stuff the 'x'        */
412                 c = get();                      /* Get next character   */
413             }
414         }
415         for (;;) {                              /* Process curr. char.  */
416             /*
417              * Note that this algorithm accepts "012e4" and "03.4"
418              * as legitimate floating-point numbers.
419              */
420             if (radix != 16 && (c == 'e' || c == 'E')) {
421                 if (expseen)                    /* Already saw 'E'?     */
422                     break;                      /* Exit loop, bad nbr.  */
423                 expseen = TRUE;                 /* Set exponent seen    */
424                 signseen = FALSE;               /* We can read '+' now  */
425                 radix = 10;                     /* Decimal exponent     */
426             }
427             else if (radix != 16 && c == '.') {
428                 if (dotflag)                    /* Saw dot already?     */
429                     break;                      /* Exit loop, two dots  */
430                 dotflag = TRUE;                 /* Remember the dot     */
431                 radix = 10;                     /* Decimal fraction     */
432             }
433             else if (c == '+' || c == '-') {    /* 1.0e+10              */
434                 if (signseen)                   /* Sign in wrong place? */
435                     break;                      /* Exit loop, not nbr.  */
436                 /* signseen = TRUE; */          /* Remember we saw it   */
437             }
438             else {                              /* Check the digit      */
439                 switch (c) {
440                 case '8': case '9':             /* Sometimes wrong      */
441                     octal89 = TRUE;             /* Do check later       */
442                 case '0': case '1': case '2': case '3':
443                 case '4': case '5': case '6': case '7':
444                     break;                      /* Always ok            */
445 
446                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
447                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
448                     if (radix == 16)            /* Alpha's are ok only  */
449                         break;                  /* if reading hex.      */
450                 default:                        /* At number end        */
451                     goto done;                  /* Break from for loop  */
452                 }                               /* End of switch        */
453             }                                   /* End general case     */
454             (*outfun)(c);                       /* Accept the character */
455             signseen = TRUE;                    /* Don't read sign now  */
456             c = get();                          /* Read another char    */
457         }                                       /* End of scan loop     */
458         /*
459          * When we break out of the scan loop, c contains the first
460          * character (maybe) not in the number.  If the number is an
461          * integer, allow a trailing 'L' for long and/or a trailing 'U'
462          * for unsigned.  If not those, push the trailing character back
463          * on the input stream.  Floating point numbers accept a trailing
464          * 'L' for "long double".
465          */
466 done:   if (dotflag || expseen) {               /* Floating point?      */
467             if (c == 'l' || c == 'L') {
468                 (*outfun)(c);
469                 c = get();                      /* Ungotten later       */
470             }
471         }
472         else {                                  /* Else it's an integer */
473             /*
474              * We know that dotflag and expseen are both zero, now:
475              * dotflag signals "saw 'L'", and
476              * expseen signals "saw 'U'".
477              */
478             for (;;) {
479                 switch (c) {
480                 case 'l':
481                 case 'L':
482                     if (dotflag)
483                         goto nomore;
484                     dotflag = TRUE;
485                     break;
486 
487                 case 'u':
488                 case 'U':
489                     if (expseen)
490                         goto nomore;
491                     expseen = TRUE;
492                     break;
493 
494                 default:
495                     goto nomore;
496                 }
497                 (*outfun)(c);                   /* Got 'L' or 'U'.      */
498                 c = get();                      /* Look at next, too.   */
499             }
500         }
501 nomore: unget();                                /* Not part of a number */
502         if (octal89 && radix == 8)
503             cwarn("Illegal digit in octal number", NULLST);
504 }
505 
506 void save(int c)
507 {
508         if (workp >= &work[NWORK]) {
509             work[NWORK-1] = '\0';
510             cfatal("Work buffer overflow:  %s", work);
511         }
512         else *workp++ = (char)c;
513 }
514 
515 char *
516 savestring(char* text)
517 /*
518  * Store a string into free memory.
519  */
520 {
521         register char   *result;
522 
523         result = getmem(strlen(text) + 1);
524         strcpy(result, text);
525         return (result);
526 }
527 
528 FILEINFO        *
529 getfile(int bufsize, char* name)
530 /*
531  * Common FILEINFO buffer initialization for a new file or macro.
532  */
533 {
534         register FILEINFO       *file;
535         register int            size;
536 
537         size = strlen(name);                    /* File/macro name      */
538         file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size);
539         file->parent = infile;                  /* Chain files together */
540         file->fp = NULL;                        /* No file yet          */
541         file->filename = savestring(name);      /* Save file/macro name */
542         file->progname = NULL;                  /* No #line seen yet    */
543         file->unrecur = 0;                      /* No macro fixup       */
544         file->bptr = file->buffer;              /* Initialize line ptr  */
545         file->buffer[0] = EOS;                  /* Force first read     */
546         file->line = 0;                         /* (Not used just yet)  */
547         if (infile != NULL)                     /* If #include file     */
548             infile->line = line;                /* Save current line    */
549         infile = file;                          /* New current file     */
550         line = 1;                               /* Note first line      */
551         return (file);                          /* All done.            */
552 }
553 
554 char *
555 getmem(int size)
556 /*
557  * Get a block of free memory.
558  */
559 {
560         register char   *result;
561 
562         if ((result = malloc((unsigned) size)) == NULL)
563             cfatal("Out of memory", NULLST);
564         return (result);
565 }
566 
567 
568 DEFBUF *
569 lookid(int c)
570 /*
571  * Look for the next token in the symbol table.  Returns token in "token".
572  * If found, returns the table pointer;  Else returns NULL.
573  */
574 {
575         register int            nhash;
576         register DEFBUF         *dp;
577         register char           *np;
578         int                     temp = 0;
579         int                     isrecurse;      /* For #define foo foo  */
580 
581         np = token;
582         nhash = 0;
583         if (0 != (isrecurse = (c == DEF_MAGIC)))     /* If recursive macro   */
584             c = get();                          /* hack, skip DEF_MAGIC */
585         do {
586             if (np < &token[IDMAX]) {           /* token dim is IDMAX+1 */
587                 *np++ = (char)c;                /* Store token byte     */
588                 nhash += c;                     /* Update hash value    */
589             }
590             c = get();                          /* And get another byte */
591         } while (type[c] == LET || type[c] == DIG);
592         unget();                                /* Rescan terminator    */
593         *np = EOS;                              /* Terminate token      */
594         if (isrecurse)                          /* Recursive definition */
595             return (NULL);                      /* undefined just now   */
596         nhash += (np - token);                  /* Fix hash value       */
597         dp = symtab[nhash & SBMASK];            /* Starting bucket      */
598         while (dp != (DEFBUF *) NULL) {         /* Search symbol table  */
599             if (dp->hash == nhash               /* Fast precheck        */
600              && (temp = strcmp(dp->name, token)) >= 0)
601                 break;
602             dp = dp->link;                      /* Nope, try next one   */
603         }
604         return ((temp == 0) ? dp : NULL);
605 }
606 
607 DEFBUF *
608 defendel(char* name, int delete)
609 /*
610  * Enter this name in the lookup table (delete = FALSE)
611  * or delete this name (delete = TRUE).
612  * Returns a pointer to the define block (delete = FALSE)
613  * Returns NULL if the symbol wasn't defined (delete = TRUE).
614  */
615 {
616         register DEFBUF         *dp;
617         register DEFBUF         **prevp;
618         register char           *np;
619         int                     nhash;
620         int                     temp;
621         int                     size;
622 
623         for (nhash = 0, np = name; *np != EOS;)
624             nhash += *np++;
625         size = (np - name);
626         nhash += size;
627         prevp = &symtab[nhash & SBMASK];
628         while ((dp = *prevp) != (DEFBUF *) NULL) {
629             if (dp->hash == nhash
630              && (temp = strcmp(dp->name, name)) >= 0) {
631                 if (temp > 0)
632                     dp = NULL;                  /* Not found            */
633                 else {
634                     *prevp = dp->link;          /* Found, unlink and    */
635                     if (dp->repl != NULL)       /* Free the replacement */
636                         free(dp->repl);         /* if any, and then     */
637                     free((char *) dp);          /* Free the symbol      */
638                 }
639                 break;
640             }
641             prevp = &dp->link;
642         }
643         if (!delete) {
644             dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size);
645             dp->link = *prevp;
646             *prevp = dp;
647             dp->hash = nhash;
648             dp->repl = NULL;
649             dp->nargs = 0;
650             strcpy(dp->name, name);
651         }
652         return (dp);
653 }
654 
655 #if OSL_DEBUG_LEVEL > 1
656 
657 void dumpdef(char *why)
658 {
659         register DEFBUF         *dp;
660         register DEFBUF         **syp;
661 		FILE *pRememberOut = NULL;
662 
663 		if ( bDumpDefs )	/*ER */
664 		{
665 			pRememberOut = pCppOut;
666 			pCppOut = pDefOut;
667 		}
668         fprintf( pCppOut, "CPP symbol table dump %s\n", why);
669         for (syp = symtab; syp < &symtab[SBSIZE]; syp++) {
670             if ((dp = *syp) != (DEFBUF *) NULL) {
671                 fprintf( pCppOut, "symtab[%d]\n", (syp - symtab));
672                 do {
673                     dumpadef((char *) NULL, dp);
674                 } while ((dp = dp->link) != (DEFBUF *) NULL);
675             }
676         }
677 		if ( bDumpDefs )
678 		{
679             fprintf( pCppOut, "\n");
680 			pCppOut = pRememberOut;
681 		}
682 }
683 
684 void dumpadef(char *why, register DEFBUF *dp)
685 {
686         register char           *cp;
687         register int            c;
688 		FILE *pRememberOut = NULL;
689 
690 /*ER dump #define's to pDefOut */
691 		if ( bDumpDefs )
692 		{
693 			pRememberOut = pCppOut;
694 			pCppOut = pDefOut;
695 		}
696         fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs);
697         if (why != NULL)
698             fprintf( pCppOut, " (%s)", why);
699         if (dp->repl != NULL) {
700             fprintf( pCppOut, " => ");
701             for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) {
702 #ifdef SOLAR
703                 if (c == DEL) {
704                     c = *cp++ & 0xFF;
705                     if( c == EOS ) break;
706                     fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
707                 }
708 #else
709                 if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC))
710                     fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
711 #endif
712                 else if (isprint(c) || c == '\n' || c == '\t')
713                     PUTCHAR(c);
714                 else if (c < ' ')
715                     fprintf( pCppOut, "<^%c>", c + '@');
716                 else
717                     fprintf( pCppOut, "<\\0%o>", c);
718             }
719 /*ER evaluate macros to pDefOut */
720 #ifdef EVALDEFS
721 			if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 )
722 			{
723 				FILEINFO *infileSave = infile;
724 				char *tokenSave = savestring( token );
725 				char *workSave = savestring( work );
726 				int lineSave = line;
727 				int wronglineSave = wrongline;
728 				int recursionSave = recursion;
729 				FILEINFO *file;
730 				EVALTYPE valEval;
731 
732 				bIsInEval = 1;
733 				infile = NULL;			/* start from scrap */
734 				line = 0;
735 				wrongline = 0;
736 				*token = EOS;
737 				*work = EOS;
738 				recursion = 0;
739 				file = getfile( strlen( dp->repl ), dp->name );
740 				strcpy( file->buffer, dp->repl );
741 	            fprintf( pCppOut, " ===> ");
742 				nEvalOff = 0;
743 				cppmain();				/* get() frees also *file */
744 				valEval = 0;
745 				if ( 0 == evaluate( EvalBuf, &valEval ) )
746 				{
747 #ifdef EVALFLOATS
748 					if ( valEval != (EVALTYPE)((long)valEval ) )
749 		            	fprintf( pCppOut, " ==eval=> %f", valEval );
750 					else
751 #endif
752 		            	fprintf( pCppOut, " ==eval=> %ld", (long)valEval );
753 				}
754 				recursion = recursionSave;
755 				wrongline = wronglineSave;
756 				line = lineSave;
757 				strcpy( work, workSave );
758 				free( workSave );
759 				strcpy( token, tokenSave );
760 				free( tokenSave );
761 				infile = infileSave;
762 				bIsInEval = 0;
763 			}
764 #endif
765         }
766         else {
767             fprintf( pCppOut, ", no replacement.");
768         }
769         PUTCHAR('\n');
770 		if ( bDumpDefs )
771 			pCppOut = pRememberOut;
772 }
773 #endif
774 
775 /*
776  *                      G E T
777  */
778 
779 int
780 get()
781 /*
782  * Return the next character from a macro or the current file.
783  * Handle end of file from #include files.
784  */
785 {
786         register int            c;
787         register FILEINFO       *file;
788         register int            popped;         /* Recursion fixup      */
789 
790         popped = 0;
791 get_from_file:
792         if ((file = infile) == NULL)
793             return (EOF_CHAR);
794 newline:
795 #if 0
796         fprintf( pCppOut, "get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n",
797             file->filename, recursion, line,
798             file->bptr - file->buffer, file->buffer);
799 #endif
800         /*
801          * Read a character from the current input line or macro.
802          * At EOS, either finish the current macro (freeing temp.
803          * storage) or read another line from the current input file.
804          * At EOF, exit the current file (#include) or, at EOF from
805          * the cpp input file, return EOF_CHAR to finish processing.
806          */
807         if ((c = *file->bptr++ & 0xFF) == EOS) {
808             /*
809              * Nothing in current line or macro.  Get next line (if
810              * input from a file), or do end of file/macro processing.
811              * In the latter case, jump back to restart from the top.
812              */
813             if (file->fp == NULL) {             /* NULL if macro        */
814                 popped++;
815                 recursion -= file->unrecur;
816                 if (recursion < 0)
817                     recursion = 0;
818                 infile = file->parent;          /* Unwind file chain    */
819             }
820             else {                              /* Else get from a file */
821                 if ((file->bptr = fgets(file->buffer, NBUFF, file->fp))
822                         != NULL) {
823 #if OSL_DEBUG_LEVEL > 1
824                     if (debug > 1) {            /* Dump it to stdout    */
825                         fprintf( pCppOut, "\n#line %d (%s), %s",
826                             line, file->filename, file->buffer);
827                     }
828 #endif
829                     goto newline;               /* process the line     */
830                 }
831                 else {
832 		    if( file->fp != stdin )
833                         fclose(file->fp);           /* Close finished file  */
834                     if ((infile = file->parent) != NULL) {
835                         /*
836                          * There is an "ungotten" newline in the current
837                          * infile buffer (set there by doinclude() in
838                          * cpp1.c).  Thus, we know that the mainline code
839                          * is skipping over blank lines and will do a
840                          * #line at its convenience.
841                          */
842                         wrongline = TRUE;       /* Need a #line now     */
843                     }
844                 }
845             }
846             /*
847              * Free up space used by the (finished) file or macro and
848              * restart input from the parent file/macro, if any.
849              */
850             free(file->filename);               /* Free name and        */
851             if (file->progname != NULL)         /* if a #line was seen, */
852                 free(file->progname);           /* free it, too.        */
853             free((char *) file);                /* Free file space      */
854             if (infile == NULL)                 /* If at end of file    */
855                 return (EOF_CHAR);              /* Return end of file   */
856             line = infile->line;                /* Reset line number    */
857             goto get_from_file;                 /* Get from the top.    */
858         }
859         /*
860          * Common processing for the new character.
861          */
862         if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete   */
863             goto newline;                       /* from a file          */
864         if (file->parent != NULL) {             /* Macro or #include    */
865             if (popped != 0)
866                 file->parent->unrecur += popped;
867             else {
868                 recursion -= file->parent->unrecur;
869                 if (recursion < 0)
870                     recursion = 0;
871                 file->parent->unrecur = 0;
872             }
873         }
874 #if (HOST == SYS_UNIX)
875 /*ER*/	if (c == '\r')
876 /*ER*/		return get();						/* DOS fuck				*/
877 #endif
878         if (c == '\n')                          /* Maintain current     */
879             ++line;                             /* line counter         */
880         if (instring)                           /* Strings just return  */
881             return (c);                         /* the character.       */
882         else if (c == '/') {                    /* Comment?             */
883             instring = TRUE;                    /* So get() won't loop  */
884 /*MM c++ comments  */
885 /*MM*/      c = get();
886 /*MM*/      if ((c != '*') && (c != '/')) {     /* Next byte '*'?       */
887                 instring = FALSE;               /* Nope, no comment     */
888                 unget();                        /* Push the char. back  */
889                 return ('/');                   /* Return the slash     */
890             }
891             if (keepcomments) {                 /* If writing comments  */
892                 PUTCHAR('/');                   /* Write out the        */
893                                                 /*   initializer        */
894 /*MM*/          if( '*' == c )
895                     PUTCHAR('*');
896 /*MM*/          else
897 /*MM*/              PUTCHAR('/');
898 
899             }
900 /*MM*/      if( '*' == c ){
901                 for (;;) {                          /* Eat a comment        */
902                     c = get();
903     test:           if (keepcomments && c != EOF_CHAR)
904                         cput(c);
905                     switch (c) {
906                     case EOF_CHAR:
907                         cerror("EOF in comment", NULLST);
908                         return (EOF_CHAR);
909 
910                     case '/':
911                         if ((c = get()) != '*')     /* Don't let comments   */
912                             goto test;              /* Nest.                */
913 #ifdef STRICT_COMMENTS
914                         cwarn("Nested comments", NULLST);
915 #endif
916                                                     /* Fall into * stuff    */
917                     case '*':
918                         if ((c = get()) != '/')     /* If comment doesn't   */
919                             goto test;              /* end, look at next    */
920                         instring = FALSE;           /* End of comment,      */
921                         if (keepcomments) {         /* Put out the comment  */
922                             cput(c);                /* terminator, too      */
923                         }
924                         /*
925                          * A comment is syntactically "whitespace" --
926                          * however, there are certain strange sequences
927                          * such as
928                          *          #define foo(x)  (something)
929                          *                  foo|* comment *|(123)
930                          *       these are '/' ^           ^
931                          * where just returning space (or COM_SEP) will cause
932                          * problems.  This can be "fixed" by overwriting the
933                          * '/' in the input line buffer with ' ' (or COM_SEP)
934                          * but that may mess up an error message.
935                          * So, we peek ahead -- if the next character is
936                          * "whitespace" we just get another character, if not,
937                          * we modify the buffer.  All in the name of purity.
938                          */
939                         if (*file->bptr == '\n'
940                          || type[*file->bptr & 0xFF] == SPA)
941                             goto newline;
942 #if COMMENT_INVISIBLE
943                         /*
944                          * Return magic (old-fashioned) syntactic space.
945                          */
946                         return ((file->bptr[-1] = COM_SEP));
947 #else
948                         return ((file->bptr[-1] = ' '));
949 #endif
950 
951                     case '\n':                      /* we'll need a #line   */
952                         if (!keepcomments)
953                             wrongline = TRUE;       /* later...             */
954                     default:                        /* Anything else is     */
955                         break;                      /* Just a character     */
956                     }                               /* End switch           */
957                 }                                   /* End comment loop     */
958             }
959             else{                                   /* c++ comment          */
960 /*MM c++ comment*/
961                 for (;;) {                          /* Eat a comment        */
962                     c = get();
963                     if (keepcomments && c != EOF_CHAR)
964                         cput(c);
965                     if( EOF_CHAR == c )
966                         return (EOF_CHAR);
967                     else if( '\n' == c ){
968                         instring = FALSE;           /* End of comment,      */
969                         return( c );
970                     }
971                 }
972             }
973         }                                       /* End if in comment    */
974         else if (!inmacro && c == '\\') {       /* If backslash, peek   */
975             if ((c = get()) == '\n') {          /* for a <nl>.  If so,  */
976                 wrongline = TRUE;
977                 goto newline;
978             }
979             else {                              /* Backslash anything   */
980                 unget();                        /* Get it later         */
981                 return ('\\');                  /* Return the backslash */
982             }
983         }
984         else if (c == '\f' || c == VT)          /* Form Feed, Vertical  */
985             c = ' ';                            /* Tab are whitespace   */
986         else if (c == 0xef)						/* eat up UTF-8 BOM */
987         {
988             if((c = get()) == 0xbb)
989             {
990                 if((c = get()) == 0xbf)
991                 {
992                     c = get();
993                     return c;
994                 }
995                 else
996                 {
997                     unget();
998                     unget();
999                     return 0xef;
1000                 }
1001             }
1002             else
1003             {
1004                 unget();
1005                 return 0xef;
1006             }
1007         }
1008         return (c);                             /* Just return the char */
1009 }
1010 
1011 void unget()
1012 /*
1013  * Backup the pointer to reread the last character.  Fatal error
1014  * (code bug) if we backup too far.  unget() may be called,
1015  * without problems, at end of file.  Only one character may
1016  * be ungotten.  If you need to unget more, call ungetstring().
1017  */
1018 {
1019         register FILEINFO       *file;
1020 
1021         if ((file = infile) == NULL)
1022             return;                     /* Unget after EOF              */
1023         if (--file->bptr < file->buffer)
1024             cfatal("Too much pushback", NULLST);
1025         if (*file->bptr == '\n')        /* Ungetting a newline?         */
1026             --line;                     /* Unget the line number, too   */
1027 }
1028 
1029 void ungetstring(char* text)
1030 /*
1031  * Push a string back on the input stream.  This is done by treating
1032  * the text as if it were a macro.
1033  */
1034 {
1035         register FILEINFO       *file;
1036 #ifndef ZTC /* BP */
1037         extern FILEINFO         *getfile();
1038 #endif
1039         file = getfile(strlen(text) + 1, "");
1040         strcpy(file->buffer, text);
1041 }
1042 
1043 int
1044 cget()
1045 /*
1046  * Get one character, absorb "funny space" after comments or
1047  * token concatenation
1048  */
1049 {
1050         register int    c;
1051 
1052         do {
1053             c = get();
1054 #if COMMENT_INVISIBLE
1055         } while (c == TOK_SEP || c == COM_SEP);
1056 #else
1057         } while (c == TOK_SEP);
1058 #endif
1059         return (c);
1060 }
1061 
1062 /*
1063  * Error messages and other hacks.  The first byte of severity
1064  * is 'S' for string arguments and 'I' for int arguments.  This
1065  * is needed for portability with machines that have int's that
1066  * are shorter than  char *'s.
1067  */
1068 
1069 static void domsg(char* severity, char* format, void* arg)
1070 /*
1071  * Print filenames, macro names, and line numbers for error messages.
1072  */
1073 {
1074         register char           *tp;
1075         register FILEINFO       *file;
1076 
1077         fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]);
1078         if (*severity == 'S')
1079             fprintf(stderr, format, (char *)arg);
1080         else
1081             fprintf(stderr, format, *((int *)arg) );
1082         putc('\n', stderr);
1083         if ((file = infile) == NULL)
1084             return;                             /* At end of file       */
1085         if (file->fp != NULL) {
1086             tp = file->buffer;                  /* Print current file   */
1087             fprintf(stderr, "%s", tp);          /* name, making sure    */
1088             if (tp[strlen(tp) - 1] != '\n')     /* there's a newline    */
1089                 putc('\n', stderr);
1090         }
1091         while ((file = file->parent) != NULL) { /* Print #includes, too */
1092             if (file->fp == NULL)
1093                 fprintf(stderr, "from macro %s\n", file->filename);
1094             else {
1095                 tp = file->buffer;
1096                 fprintf(stderr, "from file %s, line %d:\n%s",
1097                     (file->progname != NULL)
1098                         ? file->progname : file->filename,
1099                     file->line, tp);
1100                 if (tp[strlen(tp) - 1] != '\n')
1101                     putc('\n', stderr);
1102             }
1103         }
1104 }
1105 
1106 void cerror(char* format, char* sarg)
1107 /*
1108  * Print a normal error message, string argument.
1109  */
1110 {
1111         domsg("SError", format, sarg);
1112         errors++;
1113 }
1114 
1115 void cierror(char* format, int narg)
1116 /*
1117  * Print a normal error message, numeric argument.
1118  */
1119 {
1120         domsg("IError", format, &narg);
1121         errors++;
1122 }
1123 
1124 void cfatal(char* format, char* sarg)
1125 /*
1126  * A real disaster
1127  */
1128 {
1129         domsg("SFatal error", format, sarg);
1130         exit(IO_ERROR);
1131 }
1132 
1133 void cwarn(char* format, char* sarg)
1134 /*
1135  * A non-fatal error, string argument.
1136  */
1137 {
1138         domsg("SWarning", format, sarg);
1139 }
1140 
1141 void ciwarn(char* format, int narg)
1142 /*
1143  * A non-fatal error, numeric argument.
1144  */
1145 {
1146         domsg("IWarning", format, &narg);
1147 }
1148 
1149