xref: /aoo41x/main/soltools/cpp/_tokens.c (revision cdf0e10c)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <ctype.h>
5 #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
6 #include <io.h>
7 #else
8 #include <unistd.h>
9 #endif
10 #include "cpp.h"
11 
12 
13 static char wbuf[4 * OBS];
14 static char *wbp = wbuf;
15 static int EBCDIC_ExternTokenDetected = 0;
16 static int EBCDIC_StartTokenDetected = 0;
17 
18 unsigned char toLatin1[256] =
19 {
20     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
21     0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
22     0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
23     0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
24     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
25     0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
26     0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
27     0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
28     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
29     0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
30     0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
31     0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
32     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
33     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
34     0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
35     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
36     0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
37     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
38     0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
39     0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
40     0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
41     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
42     0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
43     0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
44     0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
45     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
46 };
47 
48 #define MASK    "\\x%x"
49 
50 int
51     memcpy_EBCDIC( char * pwbuf, uchar *p, int len )
52 {
53     int currpos = 0;
54     int processedchars = 0;
55 
56     if( len == 0 )
57         return 0;
58 
59     if( len == 1 )
60     {
61         *pwbuf = *p;
62         return 1;
63     }
64 
65     /* copy spaces until " or ' */
66     while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') )
67         pwbuf[ currpos++ ] = p[ processedchars++ ];
68 
69     /* copy first " or ' */
70     pwbuf[ currpos++ ] = p[ processedchars++ ];
71 
72     /* convert all characters until " or ' */
73     while( processedchars < (len - 1) )
74     {
75         if( p[ processedchars ] == '\\' )
76         {
77             switch( p[ ++processedchars ] )
78             {
79                 case 'n':
80                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] );
81                     processedchars++;
82                     break;
83 
84                 case 't':
85                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] );
86                     processedchars++;
87                     break;
88 
89                 case 'v':
90                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] );
91                     processedchars++;
92                     break;
93 
94                 case 'b':
95                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] );
96                     processedchars++;
97                     break;
98 
99                 case 'r':
100                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] );
101                     processedchars++;
102                     break;
103 
104                 case 'f':
105                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] );
106                     processedchars++;
107                     break;
108 
109                 case 'a':
110                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] );
111                     processedchars++;
112                     break;
113 
114                 case '\\':
115                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] );
116                     processedchars++;
117                     break;
118 
119                 case '?':
120                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] );
121                     processedchars++;
122                     break;
123 
124                 case '\'':
125                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] );
126                     processedchars++;
127                     break;
128 
129                 case '"':
130                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] );
131                     processedchars++;
132                     break;
133 
134                 /* octal coded character? -> copy */
135                 case '0':
136                 case '1':
137                 case '2':
138                 case '3':
139                 case '4':
140                 case '5':
141                 case '6':
142                 case '7':
143                     {
144                     int startpos = currpos;
145 
146                     pwbuf[ currpos++ ] = '\\';
147 
148                     while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) )
149                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
150                     break;
151                     }
152 
153                 /* hex coded character? -> copy */
154                 case 'x':
155                 case 'X':
156                     {
157                     int startpos = currpos;
158 
159                     pwbuf[ currpos++ ] = '\\';
160                     pwbuf[ currpos++ ] = 'x';
161                     processedchars++;
162 
163                     while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) )
164                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
165                     break;
166                     }
167 
168             }
169         }
170         else
171             currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] );
172 
173     }
174 
175     /* copy last " or ' */
176     pwbuf[ currpos++ ] = p[ processedchars ];
177 
178     return currpos;
179 }
180 
181 void
182     maketokenrow(int size, Tokenrow * trp)
183 {
184     trp->max = size;
185     if (size > 0)
186         trp->bp = (Token *) domalloc(size * sizeof(Token));
187     else
188         trp->bp = NULL;
189     trp->tp = trp->bp;
190     trp->lp = trp->bp;
191 }
192 
193 Token *
194     growtokenrow(Tokenrow * trp)
195 {
196     int ncur = trp->tp - trp->bp;
197     int nlast = trp->lp - trp->bp;
198 
199     trp->max = 3 * trp->max / 2 + 1;
200     trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token));
201     trp->lp = &trp->bp[nlast];
202     trp->tp = &trp->bp[ncur];
203     return trp->lp;
204 }
205 
206 /*
207  * Compare a row of tokens, ignoring the content of WS; return !=0 if different
208  */
209 int
210     comparetokens(Tokenrow * tr1, Tokenrow * tr2)
211 {
212     Token *tp1, *tp2;
213 
214     tp1 = tr1->tp;
215     tp2 = tr2->tp;
216     if (tr1->lp - tp1 != tr2->lp - tp2)
217         return 1;
218     for (; tp1 < tr1->lp; tp1++, tp2++)
219     {
220         if (tp1->type != tp2->type
221             || (tp1->wslen == 0) != (tp2->wslen == 0)
222             || tp1->len != tp2->len
223             || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0)
224             return 1;
225     }
226     return 0;
227 }
228 
229 /*
230  * replace ntok tokens starting at dtr->tp with the contents of str.
231  * tp ends up pointing just beyond the replacement.
232  * Canonical whitespace is assured on each side.
233  */
234 void
235     insertrow(Tokenrow * dtr, int ntok, Tokenrow * str)
236 {
237     int nrtok = rowlen(str);
238 
239     dtr->tp += ntok;
240     adjustrow(dtr, nrtok - ntok);
241     dtr->tp -= ntok;
242     movetokenrow(dtr, str);
243     dtr->tp += nrtok;
244 }
245 
246 /*
247  * make sure there is WS before trp->tp, if tokens might merge in the output
248  */
249 void
250     makespace(Tokenrow * trp, Token * ntp)
251 {
252     uchar *tt;
253     Token *tp = trp->tp;
254 
255     if (tp >= trp->lp)
256         return;
257 
258     if (ntp->wslen)
259     {
260         tt = newstring(tp->t, tp->len, ntp->wslen);
261         strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen);
262         tp->t = tt + ntp->wslen;
263         tp->wslen = ntp->wslen;
264         tp->flag |= XPWS;
265     }
266 }
267 
268 /*
269  * Copy an entire tokenrow into another, at tp.
270  * It is assumed that there is enough space.
271  *  Not strictly conforming.
272  */
273 void
274     movetokenrow(Tokenrow * dtr, Tokenrow * str)
275 {
276     int nby;
277 
278     /* nby = sizeof(Token) * (str->lp - str->bp); */
279     nby = (char *) str->lp - (char *) str->bp;
280     memmove(dtr->tp, str->bp, nby);
281 }
282 
283 /*
284  * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
285  * nt may be negative (left move).
286  * The row may need to be grown.
287  * Non-strictly conforming because of the (char *), but easily fixed
288  */
289 void
290     adjustrow(Tokenrow * trp, int nt)
291 {
292     int nby, size;
293 
294     if (nt == 0)
295         return;
296     size = (trp->lp - trp->bp) + nt;
297     while (size > trp->max)
298         growtokenrow(trp);
299     /* nby = sizeof(Token) * (trp->lp - trp->tp); */
300     nby = (char *) trp->lp - (char *) trp->tp;
301     if (nby)
302         memmove(trp->tp + nt, trp->tp, nby);
303     trp->lp += nt;
304 }
305 
306 /*
307  * Copy a row of tokens into the destination holder, allocating
308  * the space for the contents.  Return the destination.
309  */
310 Tokenrow *
311     copytokenrow(Tokenrow * dtr, Tokenrow * str)
312 {
313     int len = rowlen(str);
314 
315     maketokenrow(len, dtr);
316     movetokenrow(dtr, str);
317     dtr->lp += len;
318     return dtr;
319 }
320 
321 /*
322  * Produce a copy of a row of tokens.  Start at trp->tp.
323  * The value strings are copied as well.  The first token
324  * has WS available.
325  */
326 Tokenrow *
327     normtokenrow(Tokenrow * trp)
328 {
329     Token *tp;
330     Tokenrow *ntrp = new(Tokenrow);
331     int len;
332 
333     len = trp->lp - trp->tp;
334     if (len <= 0)
335         len = 1;
336     maketokenrow(len, ntrp);
337     for (tp = trp->tp; tp < trp->lp; tp++)
338     {
339         *ntrp->lp = *tp;
340         if (tp->len)
341         {
342             ntrp->lp->t = newstring(tp->t, tp->len, 1);
343             *ntrp->lp->t++ = ' ';
344             if (tp->wslen)
345                 ntrp->lp->wslen = 1;
346         }
347         ntrp->lp++;
348     }
349     if (ntrp->lp > ntrp->bp)
350         ntrp->bp->wslen = 0;
351     return ntrp;
352 }
353 
354 /*
355  * Debugging
356  */
357 void
358     peektokens(Tokenrow * trp, char *str)
359 {
360     Token *tp;
361 
362     tp = trp->tp;
363     flushout();
364     if (str)
365         fprintf(stderr, "%s ", str);
366     if (tp < trp->bp || tp > trp->lp)
367         fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp));
368     for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++)
369     {
370         if (tp->type != NL)
371         {
372             int c = tp->t[tp->len];
373 
374             tp->t[tp->len] = 0;
375             fprintf(stderr, "%s", tp->t);
376             tp->t[tp->len] = (uchar) c;
377         }
378         fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type);
379     }
380     fprintf(stderr, "\n");
381     fflush(stderr);
382 }
383 
384 void
385     puttokens(Tokenrow * trp)
386 {
387     Token *tp;
388     int len;
389     uchar *p;
390 
391     if (Vflag)
392         peektokens(trp, "");
393     tp = trp->bp;
394     for (; tp < trp->lp; tp++)
395     {
396         if (tp->type != NL)
397         {
398             len = tp->len + tp->wslen;
399             p = tp->t - tp->wslen;
400 
401 			/* add parameter check to delete operator? */
402 			if( Dflag )
403 			{
404 				if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) )
405 				{
406 					Token* ntp = tp;
407 					ntp++;
408 
409 					if( ntp->type == NAME )
410 					{
411 						uchar* np = ntp->t - ntp->wslen;
412 			            int nlen = ntp->len + ntp->wslen;
413 
414 						memcpy(wbp, "if(", 3 );
415  			            wbp += 4;
416 						memcpy(wbp, np, nlen );
417  			            wbp += nlen;
418 						memcpy(wbp, ")", 1 );
419  			            wbp++;
420 
421                         memcpy(wbp, p, len);
422 					}
423 				}
424 			}
425 
426             /* EBCDIC to ANSI conversion requested? */
427             if( Aflag )
428             {
429                 /* keyword __ToLatin1__ found? -> do conversion! */
430                 if( EBCDIC_StartTokenDetected )
431                 {
432                     /* previous token was 'extern'? -> don't convert current token! */
433                     if( EBCDIC_ExternTokenDetected )
434                     {
435                         EBCDIC_ExternTokenDetected = 0;
436                         memcpy(wbp, p, len);
437                     }
438                     else
439                     {
440                         /* current token is keyword 'extern'? -> don't convert following token! */
441                         if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) )
442                         {
443                             EBCDIC_ExternTokenDetected = 1;
444                             memcpy(wbp, p, len);
445                         }
446                         else
447                         {
448                             /* token is string or char? -> process EBCDIC to ANSI conversion */
449                             if ((tp->type == STRING) || (tp->type == CCON))
450                                 len = memcpy_EBCDIC(wbp,  p, len);
451                             else
452                                 memcpy(wbp, p, len);
453                         }
454                     }
455                 }
456                 else
457                     /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
458                     if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) )
459                     {
460                         EBCDIC_StartTokenDetected = 1;
461                         len = 0;
462                     }
463                     else
464                         memcpy(wbp, p, len);
465             }
466             else
467                 memcpy(wbp, p, len);
468 
469             wbp += len;
470         }
471         else
472             *wbp++ = '\n';
473 
474         if (wbp >= &wbuf[OBS])
475         {
476             if ( write(1, wbuf, OBS) != -1 ) {
477             if (wbp > &wbuf[OBS])
478                 memcpy(wbuf, wbuf + OBS, wbp - &wbuf[OBS]);
479             wbp -= OBS;
480 	    }
481 		else exit(1);
482         }
483     }
484     trp->tp = tp;
485     if (cursource->fd == 0)
486         flushout();
487 }
488 
489 void
490     flushout(void)
491 {
492     if (wbp > wbuf)
493     {
494         if ( write(1, wbuf, wbp - wbuf) != -1)
495         	wbp = wbuf;
496 	else
497 		exit(1);
498     }
499 }
500 
501 /*
502  * turn a row into just a newline
503  */
504 void
505     setempty(Tokenrow * trp)
506 {
507     trp->tp = trp->bp;
508     trp->lp = trp->bp + 1;
509     *trp->bp = nltoken;
510 }
511 
512 /*
513  * generate a number
514  */
515 char *
516     outnum(char *p, int n)
517 {
518     if (n >= 10)
519         p = outnum(p, n / 10);
520     *p++ = (char) (n % 10 + '0');
521     return p;
522 }
523 
524 /*
525  * allocate and initialize a new string from s, of length l, at offset o
526  * Null terminated.
527  */
528 uchar *
529     newstring(uchar * s, int l, int o)
530 {
531     uchar *ns = (uchar *) domalloc(l + o + 1);
532 
533     ns[l + o] = '\0';
534     return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o;
535 }
536