1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <ctype.h> 26 #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__)) 27 #include <io.h> 28 #else 29 #include <unistd.h> 30 #endif 31 #include "cpp.h" 32 33 34 static char wbuf[4 * OBS]; 35 static char *wbp = wbuf; 36 static int EBCDIC_ExternTokenDetected = 0; 37 static int EBCDIC_StartTokenDetected = 0; 38 39 unsigned char toLatin1[256] = 40 { 41 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 42 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 43 0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 44 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b, 45 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91, 46 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, 47 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 48 0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 49 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 50 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4, 51 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 52 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 53 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 54 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 55 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c, 56 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 57 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 58 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3, 59 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8, 60 0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 61 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, 62 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 63 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54, 64 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 65 0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 66 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f 67 }; 68 69 #define MASK "\\x%x" 70 71 int memcpy_EBCDIC(char * pwbuf,uchar * p,int len)72 memcpy_EBCDIC( char * pwbuf, uchar *p, int len ) 73 { 74 int currpos = 0; 75 int processedchars = 0; 76 77 if( len == 0 ) 78 return 0; 79 80 if( len == 1 ) 81 { 82 *pwbuf = *p; 83 return 1; 84 } 85 86 /* copy spaces until " or ' */ 87 while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') ) 88 pwbuf[ currpos++ ] = p[ processedchars++ ]; 89 90 /* copy first " or ' */ 91 pwbuf[ currpos++ ] = p[ processedchars++ ]; 92 93 /* convert all characters until " or ' */ 94 while( processedchars < (len - 1) ) 95 { 96 if( p[ processedchars ] == '\\' ) 97 { 98 switch( p[ ++processedchars ] ) 99 { 100 case 'n': 101 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] ); 102 processedchars++; 103 break; 104 105 case 't': 106 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] ); 107 processedchars++; 108 break; 109 110 case 'v': 111 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] ); 112 processedchars++; 113 break; 114 115 case 'b': 116 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] ); 117 processedchars++; 118 break; 119 120 case 'r': 121 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] ); 122 processedchars++; 123 break; 124 125 case 'f': 126 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] ); 127 processedchars++; 128 break; 129 130 case 'a': 131 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] ); 132 processedchars++; 133 break; 134 135 case '\\': 136 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] ); 137 processedchars++; 138 break; 139 140 case '?': 141 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] ); 142 processedchars++; 143 break; 144 145 case '\'': 146 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] ); 147 processedchars++; 148 break; 149 150 case '"': 151 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] ); 152 processedchars++; 153 break; 154 155 /* octal coded character? -> copy */ 156 case '0': 157 case '1': 158 case '2': 159 case '3': 160 case '4': 161 case '5': 162 case '6': 163 case '7': 164 { 165 int startpos = currpos; 166 167 pwbuf[ currpos++ ] = '\\'; 168 169 while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) ) 170 pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ]; 171 break; 172 } 173 174 /* hex coded character? -> copy */ 175 case 'x': 176 case 'X': 177 { 178 int startpos = currpos; 179 180 pwbuf[ currpos++ ] = '\\'; 181 pwbuf[ currpos++ ] = 'x'; 182 processedchars++; 183 184 while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) ) 185 pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ]; 186 break; 187 } 188 189 } 190 } 191 else 192 currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] ); 193 194 } 195 196 /* copy last " or ' */ 197 pwbuf[ currpos++ ] = p[ processedchars ]; 198 199 return currpos; 200 } 201 202 void maketokenrow(int size,Tokenrow * trp)203 maketokenrow(int size, Tokenrow * trp) 204 { 205 trp->max = size; 206 if (size > 0) 207 trp->bp = (Token *) domalloc(size * sizeof(Token)); 208 else 209 trp->bp = NULL; 210 trp->tp = trp->bp; 211 trp->lp = trp->bp; 212 } 213 214 Token * growtokenrow(Tokenrow * trp)215 growtokenrow(Tokenrow * trp) 216 { 217 int ncur = trp->tp - trp->bp; 218 int nlast = trp->lp - trp->bp; 219 220 trp->max = 3 * trp->max / 2 + 1; 221 trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token)); 222 trp->lp = &trp->bp[nlast]; 223 trp->tp = &trp->bp[ncur]; 224 return trp->lp; 225 } 226 227 /* 228 * Compare a row of tokens, ignoring the content of WS; return !=0 if different 229 */ 230 int comparetokens(Tokenrow * tr1,Tokenrow * tr2)231 comparetokens(Tokenrow * tr1, Tokenrow * tr2) 232 { 233 Token *tp1, *tp2; 234 235 tp1 = tr1->tp; 236 tp2 = tr2->tp; 237 if (tr1->lp - tp1 != tr2->lp - tp2) 238 return 1; 239 for (; tp1 < tr1->lp; tp1++, tp2++) 240 { 241 if (tp1->type != tp2->type 242 || (tp1->wslen == 0) != (tp2->wslen == 0) 243 || tp1->len != tp2->len 244 || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0) 245 return 1; 246 } 247 return 0; 248 } 249 250 /* 251 * replace ntok tokens starting at dtr->tp with the contents of str. 252 * tp ends up pointing just beyond the replacement. 253 * Canonical whitespace is assured on each side. 254 */ 255 void insertrow(Tokenrow * dtr,int ntok,Tokenrow * str)256 insertrow(Tokenrow * dtr, int ntok, Tokenrow * str) 257 { 258 int nrtok = rowlen(str); 259 260 dtr->tp += ntok; 261 adjustrow(dtr, nrtok - ntok); 262 dtr->tp -= ntok; 263 movetokenrow(dtr, str); 264 dtr->tp += nrtok; 265 } 266 267 /* 268 * make sure there is WS before trp->tp, if tokens might merge in the output 269 */ 270 void makespace(Tokenrow * trp,Token * ntp)271 makespace(Tokenrow * trp, Token * ntp) 272 { 273 uchar *tt; 274 Token *tp = trp->tp; 275 276 if (tp >= trp->lp) 277 return; 278 279 if (ntp->wslen) 280 { 281 tt = newstring(tp->t, tp->len, ntp->wslen); 282 strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen); 283 tp->t = tt + ntp->wslen; 284 tp->wslen = ntp->wslen; 285 tp->flag |= XPWS; 286 } 287 } 288 289 /* 290 * Copy an entire tokenrow into another, at tp. 291 * It is assumed that there is enough space. 292 * Not strictly conforming. 293 */ 294 void movetokenrow(Tokenrow * dtr,Tokenrow * str)295 movetokenrow(Tokenrow * dtr, Tokenrow * str) 296 { 297 int nby; 298 299 /* nby = sizeof(Token) * (str->lp - str->bp); */ 300 nby = (char *) str->lp - (char *) str->bp; 301 memmove(dtr->tp, str->bp, nby); 302 } 303 304 /* 305 * Move the tokens in a row, starting at tr->tp, rightward by nt tokens; 306 * nt may be negative (left move). 307 * The row may need to be grown. 308 * Non-strictly conforming because of the (char *), but easily fixed 309 */ 310 void adjustrow(Tokenrow * trp,int nt)311 adjustrow(Tokenrow * trp, int nt) 312 { 313 int nby, size; 314 315 if (nt == 0) 316 return; 317 size = (trp->lp - trp->bp) + nt; 318 while (size > trp->max) 319 growtokenrow(trp); 320 /* nby = sizeof(Token) * (trp->lp - trp->tp); */ 321 nby = (char *) trp->lp - (char *) trp->tp; 322 if (nby) 323 memmove(trp->tp + nt, trp->tp, nby); 324 trp->lp += nt; 325 } 326 327 /* 328 * Copy a row of tokens into the destination holder, allocating 329 * the space for the contents. Return the destination. 330 */ 331 Tokenrow * copytokenrow(Tokenrow * dtr,Tokenrow * str)332 copytokenrow(Tokenrow * dtr, Tokenrow * str) 333 { 334 int len = rowlen(str); 335 336 maketokenrow(len, dtr); 337 movetokenrow(dtr, str); 338 dtr->lp += len; 339 return dtr; 340 } 341 342 /* 343 * Produce a copy of a row of tokens. Start at trp->tp. 344 * The value strings are copied as well. The first token 345 * has WS available. 346 */ 347 Tokenrow * normtokenrow(Tokenrow * trp)348 normtokenrow(Tokenrow * trp) 349 { 350 Token *tp; 351 Tokenrow *ntrp = new(Tokenrow); 352 int len; 353 354 len = trp->lp - trp->tp; 355 if (len <= 0) 356 len = 1; 357 maketokenrow(len, ntrp); 358 for (tp = trp->tp; tp < trp->lp; tp++) 359 { 360 *ntrp->lp = *tp; 361 if (tp->len) 362 { 363 ntrp->lp->t = newstring(tp->t, tp->len, 1); 364 *ntrp->lp->t++ = ' '; 365 if (tp->wslen) 366 ntrp->lp->wslen = 1; 367 } 368 ntrp->lp++; 369 } 370 if (ntrp->lp > ntrp->bp) 371 ntrp->bp->wslen = 0; 372 return ntrp; 373 } 374 375 /* 376 * Debugging 377 */ 378 void peektokens(Tokenrow * trp,char * str)379 peektokens(Tokenrow * trp, char *str) 380 { 381 Token *tp; 382 383 tp = trp->tp; 384 flushout(); 385 if (str) 386 fprintf(stderr, "%s ", str); 387 if (tp < trp->bp || tp > trp->lp) 388 fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp)); 389 for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++) 390 { 391 if (tp->type != NL) 392 { 393 int c = tp->t[tp->len]; 394 395 tp->t[tp->len] = 0; 396 fprintf(stderr, "%s", tp->t); 397 tp->t[tp->len] = (uchar) c; 398 } 399 fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type); 400 } 401 fprintf(stderr, "\n"); 402 fflush(stderr); 403 } 404 405 void puttokens(Tokenrow * trp)406 puttokens(Tokenrow * trp) 407 { 408 Token *tp; 409 int len; 410 uchar *p; 411 412 if (Vflag) 413 peektokens(trp, ""); 414 tp = trp->bp; 415 for (; tp < trp->lp; tp++) 416 { 417 if (tp->type != NL) 418 { 419 len = tp->len + tp->wslen; 420 p = tp->t - tp->wslen; 421 422 /* add parameter check to delete operator? */ 423 if( Dflag ) 424 { 425 if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) ) 426 { 427 Token* ntp = tp; 428 ntp++; 429 430 if( ntp->type == NAME ) 431 { 432 uchar* np = ntp->t - ntp->wslen; 433 int nlen = ntp->len + ntp->wslen; 434 435 memcpy(wbp, "if(", 3 ); 436 wbp += 4; 437 memcpy(wbp, np, nlen ); 438 wbp += nlen; 439 memcpy(wbp, ")", 1 ); 440 wbp++; 441 442 memcpy(wbp, p, len); 443 } 444 } 445 } 446 447 /* EBCDIC to ANSI conversion requested? */ 448 if( Aflag ) 449 { 450 /* keyword __ToLatin1__ found? -> do conversion! */ 451 if( EBCDIC_StartTokenDetected ) 452 { 453 /* previous token was 'extern'? -> don't convert current token! */ 454 if( EBCDIC_ExternTokenDetected ) 455 { 456 EBCDIC_ExternTokenDetected = 0; 457 memcpy(wbp, p, len); 458 } 459 else 460 { 461 /* current token is keyword 'extern'? -> don't convert following token! */ 462 if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) ) 463 { 464 EBCDIC_ExternTokenDetected = 1; 465 memcpy(wbp, p, len); 466 } 467 else 468 { 469 /* token is string or char? -> process EBCDIC to ANSI conversion */ 470 if ((tp->type == STRING) || (tp->type == CCON)) 471 len = memcpy_EBCDIC(wbp, p, len); 472 else 473 memcpy(wbp, p, len); 474 } 475 } 476 } 477 else 478 /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */ 479 if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) ) 480 { 481 EBCDIC_StartTokenDetected = 1; 482 len = 0; 483 } 484 else 485 memcpy(wbp, p, len); 486 } 487 else 488 memcpy(wbp, p, len); 489 490 wbp += len; 491 } 492 else 493 *wbp++ = '\n'; 494 495 if (wbp >= &wbuf[OBS]) 496 { 497 if ( write(1, wbuf, OBS) != -1 ) { 498 if (wbp > &wbuf[OBS]) 499 memcpy(wbuf, wbuf + OBS, wbp - &wbuf[OBS]); 500 wbp -= OBS; 501 } 502 else exit(1); 503 } 504 } 505 trp->tp = tp; 506 if (cursource->fd == 0) 507 flushout(); 508 } 509 510 void flushout(void)511 flushout(void) 512 { 513 if (wbp > wbuf) 514 { 515 if ( write(1, wbuf, wbp - wbuf) != -1) 516 wbp = wbuf; 517 else 518 exit(1); 519 } 520 } 521 522 /* 523 * turn a row into just a newline 524 */ 525 void setempty(Tokenrow * trp)526 setempty(Tokenrow * trp) 527 { 528 trp->tp = trp->bp; 529 trp->lp = trp->bp + 1; 530 *trp->bp = nltoken; 531 } 532 533 /* 534 * generate a number 535 */ 536 char * outnum(char * p,int n)537 outnum(char *p, int n) 538 { 539 if (n >= 10) 540 p = outnum(p, n / 10); 541 *p++ = (char) (n % 10 + '0'); 542 return p; 543 } 544 545 /* 546 * allocate and initialize a new string from s, of length l, at offset o 547 * Null terminated. 548 */ 549 uchar * newstring(uchar * s,int l,int o)550 newstring(uchar * s, int l, int o) 551 { 552 uchar *ns = (uchar *) domalloc(l + o + 1); 553 554 ns[l + o] = '\0'; 555 return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o; 556 } 557