This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl 2.0 patch 1: removed redundant debugging code in regexp.c
[perl5.git] / toke.c
CommitLineData
13281fa4 1/* $Header: toke.c,v 2.0.1.1 88/06/28 16:39:50 root Exp $
378cc40b
LW
2 *
3 * $Log: toke.c,v $
13281fa4
LW
4 * Revision 2.0.1.1 88/06/28 16:39:50 root
5 * patch1: tr/x/y/ can dump core if y is shorter than x
6 *
378cc40b
LW
7 * Revision 2.0 88/06/05 00:11:16 root
8 * Baseline version 2.0.
9 *
10 */
11
12#include "EXTERN.h"
13#include "perl.h"
14#include "perly.h"
15
16#define CLINE (cmdline = (line < cmdline ? line : cmdline))
17
18#define RETURN(retval) return (bufptr = s,(int)retval)
19#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
20#define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
21#define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
22#define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s,(int)UNIOP)
23#define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
24#define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
25#define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
26#define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
27#define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
28#define SFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)STABFUN)
29#define LFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LVALFUN)
30
31yylex()
32{
33 register char *s = bufptr;
34 register char *d;
35 register int tmp;
36 static bool in_format = FALSE;
37 static bool firstline = TRUE;
38
39 retry:
40#ifdef YYDEBUG
41 if (yydebug)
42 if (index(s,'\n'))
43 fprintf(stderr,"Tokener at %s",s);
44 else
45 fprintf(stderr,"Tokener at %s\n",s);
46#endif
47 switch (*s) {
48 default:
49 fprintf(stderr,
50 "Unrecognized character %c in file %s line %ld--ignoring.\n",
51 *s++,filename,(long)line);
52 goto retry;
53 case 0:
54 s = str_get(linestr);
55 *s = '\0';
56 if (firstline && (minus_n || minus_p)) {
57 firstline = FALSE;
58 str_set(linestr,"line: while (<>) {");
59 if (minus_a)
60 str_cat(linestr,"@F=split(' ');");
61 s = str_get(linestr);
62 goto retry;
63 }
64 if (!rsfp)
65 RETURN(0);
66 if (in_format) {
67 yylval.formval = load_format(); /* leaves . in buffer */
68 in_format = FALSE;
69 s = str_get(linestr);
70 TERM(FORMLIST);
71 }
72 line++;
73 if ((s = str_gets(linestr, rsfp)) == Nullch) {
74 if (preprocess)
75 pclose(rsfp);
76 else if (rsfp != stdin)
77 fclose(rsfp);
78 rsfp = Nullfp;
79 if (minus_n || minus_p) {
80 str_set(linestr,minus_p ? "}continue{print;" : "");
81 str_cat(linestr,"}");
82 s = str_get(linestr);
83 goto retry;
84 }
85 s = str_get(linestr);
86 RETURN(0);
87 }
88#ifdef DEBUG
89 else if (firstline) {
90 char *showinput();
91 s = showinput();
92 }
93#endif
94 firstline = FALSE;
95 goto retry;
96 case ' ': case '\t': case '\f':
97 s++;
98 goto retry;
99 case '\n':
100 case '#':
101 if (preprocess && s == str_get(linestr) &&
102 s[1] == ' ' && isdigit(s[2])) {
103 line = atoi(s+2)-1;
104 for (s += 2; isdigit(*s); s++) ;
105 while (*s && isspace(*s)) s++;
106 if (filename)
107 safefree(filename);
108 s[strlen(s)-1] = '\0'; /* wipe out newline */
109 if (*s == '"') {
110 s++;
111 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
112 }
113 if (*s)
114 filename = savestr(s);
115 else
116 filename = savestr(origfilename);
117 s = str_get(linestr);
118 }
119 if (in_eval) {
120 while (*s && *s != '\n')
121 s++;
122 if (*s)
123 s++;
124 line++;
125 }
126 else
127 *s = '\0';
128 if (lex_newlines)
129 RETURN('\n');
130 goto retry;
131 case '-':
132 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
133 s++;
134 switch (*s++) {
135 case 'r': FTST(O_FTEREAD);
136 case 'w': FTST(O_FTEWRITE);
137 case 'x': FTST(O_FTEEXEC);
138 case 'o': FTST(O_FTEOWNED);
139 case 'R': FTST(O_FTRREAD);
140 case 'W': FTST(O_FTRWRITE);
141 case 'X': FTST(O_FTREXEC);
142 case 'O': FTST(O_FTROWNED);
143 case 'e': FTST(O_FTIS);
144 case 'z': FTST(O_FTZERO);
145 case 's': FTST(O_FTSIZE);
146 case 'f': FTST(O_FTFILE);
147 case 'd': FTST(O_FTDIR);
148 case 'l': FTST(O_FTLINK);
149 case 'p': FTST(O_FTPIPE);
150 case 'S': FTST(O_FTSOCK);
151 case 'u': FTST(O_FTSUID);
152 case 'g': FTST(O_FTSGID);
153 case 'k': FTST(O_FTSVTX);
154 case 'b': FTST(O_FTBLK);
155 case 'c': FTST(O_FTCHR);
156 case 't': FTST(O_FTTTY);
157 case 'T': FTST(O_FTTEXT);
158 case 'B': FTST(O_FTBINARY);
159 default:
160 s -= 2;
161 break;
162 }
163 }
164 /*FALL THROUGH*/
165 case '+':
166 if (s[1] == *s) {
167 s++;
168 if (*s++ == '+')
169 RETURN(INC);
170 else
171 RETURN(DEC);
172 }
173 /* FALL THROUGH */
174 case '*':
175 case '%':
176 case '^':
177 case '~':
178 case '(':
179 case ',':
180 case ':':
181 case '[':
182 tmp = *s++;
183 OPERATOR(tmp);
184 case '{':
185 tmp = *s++;
186 if (isspace(*s) || *s == '#')
187 cmdline = NOLINE; /* invalidate current command line number */
188 OPERATOR(tmp);
189 case ';':
190 if (line < cmdline)
191 cmdline = line;
192 tmp = *s++;
193 OPERATOR(tmp);
194 case ')':
195 case ']':
196 tmp = *s++;
197 TERM(tmp);
198 case '}':
199 tmp = *s++;
200 for (d = s; *d == ' ' || *d == '\t'; d++) ;
201 if (*d == '\n' || *d == '#')
202 OPERATOR(tmp); /* block end */
203 else
204 TERM(tmp); /* associative array end */
205 case '&':
206 s++;
207 tmp = *s++;
208 if (tmp == '&')
209 OPERATOR(ANDAND);
210 s--;
211 OPERATOR('&');
212 case '|':
213 s++;
214 tmp = *s++;
215 if (tmp == '|')
216 OPERATOR(OROR);
217 s--;
218 OPERATOR('|');
219 case '=':
220 s++;
221 tmp = *s++;
222 if (tmp == '=')
223 OPERATOR(EQ);
224 if (tmp == '~')
225 OPERATOR(MATCH);
226 s--;
227 OPERATOR('=');
228 case '!':
229 s++;
230 tmp = *s++;
231 if (tmp == '=')
232 OPERATOR(NE);
233 if (tmp == '~')
234 OPERATOR(NMATCH);
235 s--;
236 OPERATOR('!');
237 case '<':
238 if (expectterm) {
239 s = scanstr(s);
240 TERM(RSTRING);
241 }
242 s++;
243 tmp = *s++;
244 if (tmp == '<')
245 OPERATOR(LS);
246 if (tmp == '=')
247 OPERATOR(LE);
248 s--;
249 OPERATOR('<');
250 case '>':
251 s++;
252 tmp = *s++;
253 if (tmp == '>')
254 OPERATOR(RS);
255 if (tmp == '=')
256 OPERATOR(GE);
257 s--;
258 OPERATOR('>');
259
260#define SNARFWORD \
261 d = tokenbuf; \
262 while (isalpha(*s) || isdigit(*s) || *s == '_') \
263 *d++ = *s++; \
264 *d = '\0'; \
265 d = tokenbuf;
266
267 case '$':
268 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
269 s++;
270 s = scanreg(s,tokenbuf);
271 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
272 TERM(ARYLEN);
273 }
274 s = scanreg(s,tokenbuf);
275 yylval.stabval = stabent(tokenbuf,TRUE);
276 TERM(REG);
277
278 case '@':
279 s = scanreg(s,tokenbuf);
280 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
281 TERM(ARY);
282
283 case '/': /* may either be division or pattern */
284 case '?': /* may either be conditional or pattern */
285 if (expectterm) {
286 s = scanpat(s);
287 TERM(PATTERN);
288 }
289 tmp = *s++;
290 OPERATOR(tmp);
291
292 case '.':
293 if (!expectterm || !isdigit(s[1])) {
294 s++;
295 tmp = *s++;
296 if (tmp == '.')
297 OPERATOR(DOTDOT);
298 s--;
299 OPERATOR('.');
300 }
301 /* FALL THROUGH */
302 case '0': case '1': case '2': case '3': case '4':
303 case '5': case '6': case '7': case '8': case '9':
304 case '\'': case '"': case '`':
305 s = scanstr(s);
306 TERM(RSTRING);
307
308 case '_':
309 SNARFWORD;
310 yylval.cval = savestr(d);
311 OPERATOR(WORD);
312 case 'a': case 'A':
313 SNARFWORD;
314 yylval.cval = savestr(d);
315 OPERATOR(WORD);
316 case 'b': case 'B':
317 SNARFWORD;
318 yylval.cval = savestr(d);
319 OPERATOR(WORD);
320 case 'c': case 'C':
321 SNARFWORD;
322 if (strEQ(d,"continue"))
323 OPERATOR(CONTINUE);
324 if (strEQ(d,"chdir"))
325 UNI(O_CHDIR);
326 if (strEQ(d,"close"))
327 OPERATOR(CLOSE);
328 if (strEQ(d,"crypt"))
329 FUN2(O_CRYPT);
330 if (strEQ(d,"chop"))
331 LFUN(O_CHOP);
332 if (strEQ(d,"chmod")) {
333 yylval.ival = O_CHMOD;
334 OPERATOR(LISTOP);
335 }
336 if (strEQ(d,"chown")) {
337 yylval.ival = O_CHOWN;
338 OPERATOR(LISTOP);
339 }
340 yylval.cval = savestr(d);
341 OPERATOR(WORD);
342 case 'd': case 'D':
343 SNARFWORD;
344 if (strEQ(d,"do"))
345 OPERATOR(DO);
346 if (strEQ(d,"die"))
347 UNI(O_DIE);
348 if (strEQ(d,"delete"))
349 OPERATOR(DELETE);
350 yylval.cval = savestr(d);
351 OPERATOR(WORD);
352 case 'e': case 'E':
353 SNARFWORD;
354 if (strEQ(d,"else"))
355 OPERATOR(ELSE);
356 if (strEQ(d,"elsif")) {
357 yylval.ival = line;
358 OPERATOR(ELSIF);
359 }
360 if (strEQ(d,"eq") || strEQ(d,"EQ"))
361 OPERATOR(SEQ);
362 if (strEQ(d,"exit"))
363 UNI(O_EXIT);
364 if (strEQ(d,"eval")) {
365 allstabs = TRUE; /* must initialize everything since */
366 UNI(O_EVAL); /* we don't know what will be used */
367 }
368 if (strEQ(d,"eof"))
369 TERM(FEOF);
370 if (strEQ(d,"exp"))
371 FUN1(O_EXP);
372 if (strEQ(d,"each"))
373 SFUN(O_EACH);
374 if (strEQ(d,"exec")) {
375 yylval.ival = O_EXEC;
376 OPERATOR(LISTOP);
377 }
378 yylval.cval = savestr(d);
379 OPERATOR(WORD);
380 case 'f': case 'F':
381 SNARFWORD;
382 if (strEQ(d,"for"))
383 OPERATOR(FOR);
384 if (strEQ(d,"foreach"))
385 OPERATOR(FOR);
386 if (strEQ(d,"format")) {
387 in_format = TRUE;
388 OPERATOR(FORMAT);
389 }
390 if (strEQ(d,"fork"))
391 FUN0(O_FORK);
392 yylval.cval = savestr(d);
393 OPERATOR(WORD);
394 case 'g': case 'G':
395 SNARFWORD;
396 if (strEQ(d,"gt") || strEQ(d,"GT"))
397 OPERATOR(SGT);
398 if (strEQ(d,"ge") || strEQ(d,"GE"))
399 OPERATOR(SGE);
400 if (strEQ(d,"goto"))
401 LOOPX(O_GOTO);
402 if (strEQ(d,"gmtime"))
403 FUN1(O_GMTIME);
404 yylval.cval = savestr(d);
405 OPERATOR(WORD);
406 case 'h': case 'H':
407 SNARFWORD;
408 if (strEQ(d,"hex"))
409 FUN1(O_HEX);
410 yylval.cval = savestr(d);
411 OPERATOR(WORD);
412 case 'i': case 'I':
413 SNARFWORD;
414 if (strEQ(d,"if")) {
415 yylval.ival = line;
416 OPERATOR(IF);
417 }
418 if (strEQ(d,"index"))
419 FUN2(O_INDEX);
420 if (strEQ(d,"int"))
421 FUN1(O_INT);
422 yylval.cval = savestr(d);
423 OPERATOR(WORD);
424 case 'j': case 'J':
425 SNARFWORD;
426 if (strEQ(d,"join"))
427 OPERATOR(JOIN);
428 yylval.cval = savestr(d);
429 OPERATOR(WORD);
430 case 'k': case 'K':
431 SNARFWORD;
432 if (strEQ(d,"keys"))
433 SFUN(O_KEYS);
434 if (strEQ(d,"kill")) {
435 yylval.ival = O_KILL;
436 OPERATOR(LISTOP);
437 }
438 yylval.cval = savestr(d);
439 OPERATOR(WORD);
440 case 'l': case 'L':
441 SNARFWORD;
442 if (strEQ(d,"last"))
443 LOOPX(O_LAST);
444 if (strEQ(d,"local"))
445 OPERATOR(LOCAL);
446 if (strEQ(d,"length"))
447 FUN1(O_LENGTH);
448 if (strEQ(d,"lt") || strEQ(d,"LT"))
449 OPERATOR(SLT);
450 if (strEQ(d,"le") || strEQ(d,"LE"))
451 OPERATOR(SLE);
452 if (strEQ(d,"localtime"))
453 FUN1(O_LOCALTIME);
454 if (strEQ(d,"log"))
455 FUN1(O_LOG);
456 if (strEQ(d,"link"))
457 FUN2(O_LINK);
458 yylval.cval = savestr(d);
459 OPERATOR(WORD);
460 case 'm': case 'M':
461 SNARFWORD;
462 if (strEQ(d,"m")) {
463 s = scanpat(s-1);
464 TERM(PATTERN);
465 }
466 yylval.cval = savestr(d);
467 OPERATOR(WORD);
468 case 'n': case 'N':
469 SNARFWORD;
470 if (strEQ(d,"next"))
471 LOOPX(O_NEXT);
472 if (strEQ(d,"ne") || strEQ(d,"NE"))
473 OPERATOR(SNE);
474 yylval.cval = savestr(d);
475 OPERATOR(WORD);
476 case 'o': case 'O':
477 SNARFWORD;
478 if (strEQ(d,"open"))
479 OPERATOR(OPEN);
480 if (strEQ(d,"ord"))
481 FUN1(O_ORD);
482 if (strEQ(d,"oct"))
483 FUN1(O_OCT);
484 yylval.cval = savestr(d);
485 OPERATOR(WORD);
486 case 'p': case 'P':
487 SNARFWORD;
488 if (strEQ(d,"print")) {
489 yylval.ival = O_PRINT;
490 OPERATOR(LISTOP);
491 }
492 if (strEQ(d,"printf")) {
493 yylval.ival = O_PRTF;
494 OPERATOR(LISTOP);
495 }
496 if (strEQ(d,"push")) {
497 yylval.ival = O_PUSH;
498 OPERATOR(PUSH);
499 }
500 if (strEQ(d,"pop"))
501 OPERATOR(POP);
502 yylval.cval = savestr(d);
503 OPERATOR(WORD);
504 case 'q': case 'Q':
505 SNARFWORD;
506 yylval.cval = savestr(d);
507 OPERATOR(WORD);
508 case 'r': case 'R':
509 SNARFWORD;
510 if (strEQ(d,"reset"))
511 UNI(O_RESET);
512 if (strEQ(d,"redo"))
513 LOOPX(O_REDO);
514 if (strEQ(d,"rename"))
515 FUN2(O_RENAME);
516 yylval.cval = savestr(d);
517 OPERATOR(WORD);
518 case 's': case 'S':
519 SNARFWORD;
520 if (strEQ(d,"s")) {
521 s = scansubst(s);
522 TERM(SUBST);
523 }
524 if (strEQ(d,"shift"))
525 TERM(SHIFT);
526 if (strEQ(d,"split"))
527 TERM(SPLIT);
528 if (strEQ(d,"substr"))
529 FUN3(O_SUBSTR);
530 if (strEQ(d,"sprintf"))
531 OPERATOR(SPRINTF);
532 if (strEQ(d,"sub"))
533 OPERATOR(SUB);
534 if (strEQ(d,"select"))
535 OPERATOR(SELECT);
536 if (strEQ(d,"seek"))
537 OPERATOR(SEEK);
538 if (strEQ(d,"stat"))
539 OPERATOR(STAT);
540 if (strEQ(d,"study")) {
541 sawstudy++;
542 LFUN(O_STUDY);
543 }
544 if (strEQ(d,"sqrt"))
545 FUN1(O_SQRT);
546 if (strEQ(d,"sleep"))
547 UNI(O_SLEEP);
548 if (strEQ(d,"system")) {
549 yylval.ival = O_SYSTEM;
550 OPERATOR(LISTOP);
551 }
552 if (strEQ(d,"symlink"))
553 FUN2(O_SYMLINK);
554 if (strEQ(d,"sort")) {
555 yylval.ival = O_SORT;
556 OPERATOR(LISTOP);
557 }
558 yylval.cval = savestr(d);
559 OPERATOR(WORD);
560 case 't': case 'T':
561 SNARFWORD;
562 if (strEQ(d,"tr")) {
563 s = scantrans(s);
564 TERM(TRANS);
565 }
566 if (strEQ(d,"tell"))
567 TERM(TELL);
568 if (strEQ(d,"time"))
569 FUN0(O_TIME);
570 if (strEQ(d,"times"))
571 FUN0(O_TMS);
572 yylval.cval = savestr(d);
573 OPERATOR(WORD);
574 case 'u': case 'U':
575 SNARFWORD;
576 if (strEQ(d,"using"))
577 OPERATOR(USING);
578 if (strEQ(d,"until")) {
579 yylval.ival = line;
580 OPERATOR(UNTIL);
581 }
582 if (strEQ(d,"unless")) {
583 yylval.ival = line;
584 OPERATOR(UNLESS);
585 }
586 if (strEQ(d,"umask"))
587 FUN1(O_UMASK);
588 if (strEQ(d,"unshift")) {
589 yylval.ival = O_UNSHIFT;
590 OPERATOR(PUSH);
591 }
592 if (strEQ(d,"unlink")) {
593 yylval.ival = O_UNLINK;
594 OPERATOR(LISTOP);
595 }
596 if (strEQ(d,"utime")) {
597 yylval.ival = O_UTIME;
598 OPERATOR(LISTOP);
599 }
600 yylval.cval = savestr(d);
601 OPERATOR(WORD);
602 case 'v': case 'V':
603 SNARFWORD;
604 if (strEQ(d,"values"))
605 SFUN(O_VALUES);
606 yylval.cval = savestr(d);
607 OPERATOR(WORD);
608 case 'w': case 'W':
609 SNARFWORD;
610 if (strEQ(d,"write"))
611 TERM(WRITE);
612 if (strEQ(d,"while")) {
613 yylval.ival = line;
614 OPERATOR(WHILE);
615 }
616 if (strEQ(d,"wait"))
617 FUN0(O_WAIT);
618 yylval.cval = savestr(d);
619 OPERATOR(WORD);
620 case 'x': case 'X':
621 SNARFWORD;
622 if (!expectterm && strEQ(d,"x"))
623 OPERATOR('x');
624 yylval.cval = savestr(d);
625 OPERATOR(WORD);
626 case 'y': case 'Y':
627 SNARFWORD;
628 if (strEQ(d,"y")) {
629 s = scantrans(s);
630 TERM(TRANS);
631 }
632 yylval.cval = savestr(d);
633 OPERATOR(WORD);
634 case 'z': case 'Z':
635 SNARFWORD;
636 yylval.cval = savestr(d);
637 OPERATOR(WORD);
638 }
639}
640
641char *
642scanreg(s,dest)
643register char *s;
644char *dest;
645{
646 register char *d;
647
648 s++;
649 d = dest;
650 if (isdigit(*s)) {
651 while (isdigit(*s) || *s == '_')
652 *d++ = *s++;
653 }
654 else {
655 while (isalpha(*s) || isdigit(*s) || *s == '_')
656 *d++ = *s++;
657 }
658 *d = '\0';
659 d = dest;
660 if (!*d) {
661 *d = *s++;
662 if (*d == '{') {
663 d = dest;
664 while (*s && *s != '}')
665 *d++ = *s++;
666 *d = '\0';
667 d = dest;
668 if (*s)
669 s++;
670 }
671 else
672 d[1] = '\0';
673 }
674 if (*d == '^' && !isspace(*s))
675 *d = *s++ & 31;
676 return s;
677}
678
679STR *
680scanconst(string)
681char *string;
682{
683 register STR *retstr;
684 register char *t;
685 register char *d;
686
687 if (index(string,'|')) {
688 return Nullstr;
689 }
690 retstr = str_make(string);
691 t = str_get(retstr);
692 *(long*)&retstr->str_nval = 100;
693 for (d=t; *d; ) {
694 switch (*d) {
695 case '.': case '[': case '$': case '(': case ')': case '|':
696 *d = '\0';
697 break;
698 case '\\':
699 if (index("wWbB0123456789sSdD",d[1])) {
700 *d = '\0';
701 break;
702 }
703 strcpy(d,d+1);
704 switch(*d) {
705 case 'n':
706 *d = '\n';
707 break;
708 case 't':
709 *d = '\t';
710 break;
711 case 'f':
712 *d = '\f';
713 break;
714 case 'r':
715 *d = '\r';
716 break;
717 }
718 /* FALL THROUGH */
719 default:
720 if (d[1] == '*' || d[1] == '+' || d[1] == '?') {
721 *d = '\0';
722 break;
723 }
724 d++;
725 }
726 }
727 if (!*t) {
728 str_free(retstr);
729 return Nullstr;
730 }
731 retstr->str_cur = strlen(retstr->str_ptr);
732 return retstr;
733}
734
735char *
736scanpat(s)
737register char *s;
738{
739 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
740 register char *d;
741
742 bzero((char *)spat, sizeof(SPAT));
743 spat->spat_next = spat_root; /* link into spat list */
744 spat_root = spat;
745
746 switch (*s++) {
747 case 'm':
748 s++;
749 break;
750 case '/':
751 break;
752 case '?':
753 spat->spat_flags |= SPAT_ONCE;
754 break;
755 default:
756 fatal("panic: scanpat");
757 }
758 s = cpytill(tokenbuf,s,s[-1]);
759 if (!*s)
760 fatal("Search pattern not terminated");
761 s++;
762 if (*s == 'i') {
763 s++;
764 spat->spat_flags |= SPAT_FOLD;
765 }
766 for (d=tokenbuf; *d; d++) {
767 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
768 register ARG *arg;
769
770 spat->spat_runtime = arg = op_new(1);
771 arg->arg_type = O_ITEM;
772 arg[1].arg_type = A_DOUBLE;
773 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
774 goto got_pat; /* skip compiling for now */
775 }
776 }
777 if (!(spat->spat_flags & SPAT_FOLD)) {
778 if (*tokenbuf == '^') {
779 spat->spat_short = scanconst(tokenbuf+1);
780 if (spat->spat_short) {
781 spat->spat_slen = strlen(spat->spat_short->str_ptr);
782 if (spat->spat_slen == strlen(tokenbuf+1))
783 spat->spat_flags |= SPAT_ALL;
784 }
785 }
786 else {
787 spat->spat_flags |= SPAT_SCANFIRST;
788 spat->spat_short = scanconst(tokenbuf);
789 if (spat->spat_short) {
790 spat->spat_slen = strlen(spat->spat_short->str_ptr);
791 if (spat->spat_slen == strlen(tokenbuf))
792 spat->spat_flags |= SPAT_ALL;
793 }
794 }
795 }
796 spat->spat_regexp = regcomp(tokenbuf,spat->spat_flags & SPAT_FOLD,1);
797 hoistmust(spat);
798 got_pat:
799 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
800 return s;
801}
802
803char *
804scansubst(s)
805register char *s;
806{
807 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
808 register char *d;
809
810 bzero((char *)spat, sizeof(SPAT));
811 spat->spat_next = spat_root; /* link into spat list */
812 spat_root = spat;
813
814 s = cpytill(tokenbuf,s+1,*s);
815 if (!*s)
816 fatal("Substitution pattern not terminated");
817 for (d=tokenbuf; *d; d++) {
818 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
819 register ARG *arg;
820
821 spat->spat_runtime = arg = op_new(1);
822 arg->arg_type = O_ITEM;
823 arg[1].arg_type = A_DOUBLE;
824 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
825 goto get_repl; /* skip compiling for now */
826 }
827 }
828 if (*tokenbuf == '^') {
829 spat->spat_short = scanconst(tokenbuf+1);
830 if (spat->spat_short)
831 spat->spat_slen = strlen(spat->spat_short->str_ptr);
832 }
833 else {
834 spat->spat_flags |= SPAT_SCANFIRST;
835 spat->spat_short = scanconst(tokenbuf);
836 if (spat->spat_short)
837 spat->spat_slen = strlen(spat->spat_short->str_ptr);
838 }
839 d = savestr(tokenbuf);
840get_repl:
841 s = scanstr(s);
842 if (!*s)
843 fatal("Substitution replacement not terminated");
844 spat->spat_repl = yylval.arg;
845 spat->spat_flags |= SPAT_ONCE;
846 while (*s == 'g' || *s == 'i') {
847 if (*s == 'g') {
848 s++;
849 spat->spat_flags &= ~SPAT_ONCE;
850 }
851 if (*s == 'i') {
852 s++;
853 spat->spat_flags |= SPAT_FOLD;
854 }
855 }
856 if (!spat->spat_runtime) {
857 spat->spat_regexp = regcomp(d, spat->spat_flags & SPAT_FOLD,1);
858 hoistmust(spat);
859 safefree(d);
860 }
861 if (spat->spat_flags & SPAT_FOLD) { /* Oops, disable optimization */
862 str_free(spat->spat_short);
863 spat->spat_short = Nullstr;
864 spat->spat_slen = 0;
865 }
866 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
867 return s;
868}
869
870hoistmust(spat)
871register SPAT *spat;
872{
873 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
874 if (spat->spat_short &&
875 strEQ(spat->spat_short->str_ptr,spat->spat_regexp->regmust->str_ptr)){
876 if (spat->spat_flags & SPAT_SCANFIRST) {
877 str_free(spat->spat_short);
878 spat->spat_short = Nullstr;
879 }
880 else {
881 str_free(spat->spat_regexp->regmust);
882 spat->spat_regexp->regmust = Nullstr;
883 return;
884 }
885 }
886 if (!spat->spat_short || /* promote the better string */
887 ((spat->spat_flags & SPAT_SCANFIRST) &&
888 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
889 str_free(spat->spat_short); /* ok if null */
890 spat->spat_short = spat->spat_regexp->regmust;
891 spat->spat_regexp->regmust = Nullstr;
892 spat->spat_flags |= SPAT_SCANFIRST;
893 }
894 }
895}
896
897char *
898expand_charset(s)
899register char *s;
900{
901 char t[512];
902 register char *d = t;
903 register int i;
904
905 while (*s) {
906 if (s[1] == '-' && s[2]) {
907 for (i = s[0]; i <= s[2]; i++)
908 *d++ = i;
909 s += 3;
910 }
911 else
912 *d++ = *s++;
913 }
914 *d = '\0';
915 return savestr(t);
916}
917
918char *
919scantrans(s)
920register char *s;
921{
922 ARG *arg =
923 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg,0));
924 register char *t;
925 register char *r;
926 register char *tbl = safemalloc(256);
927 register int i;
13281fa4 928 register int j;
378cc40b
LW
929
930 arg[2].arg_type = A_NULL;
931 arg[2].arg_ptr.arg_cval = tbl;
932 for (i=0; i<256; i++)
933 tbl[i] = 0;
934 s = scanstr(s);
935 if (!*s)
936 fatal("Translation pattern not terminated");
937 t = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
938 free_arg(yylval.arg);
939 s = scanstr(s-1);
940 if (!*s)
941 fatal("Translation replacement not terminated");
942 r = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
943 free_arg(yylval.arg);
944 yylval.arg = arg;
945 if (!*r) {
946 safefree(r);
947 r = t;
948 }
13281fa4
LW
949 for (i = 0, j = 0; t[i]; i++,j++) {
950 if (!r[j])
951 --j;
952 tbl[t[i] & 0377] = r[j];
378cc40b
LW
953 }
954 if (r != t)
955 safefree(r);
956 safefree(t);
957 return s;
958}
959
960char *
961scanstr(s)
962register char *s;
963{
964 register char term;
965 register char *d;
966 register ARG *arg;
967 register bool makesingle = FALSE;
968 register STAB *stab;
969 char *leave = "\\$nrtfb0123456789"; /* which backslash sequences to keep */
970
971 arg = op_new(1);
972 yylval.arg = arg;
973 arg->arg_type = O_ITEM;
974
975 switch (*s) {
976 default: /* a substitution replacement */
977 arg[1].arg_type = A_DOUBLE;
978 makesingle = TRUE; /* maybe disable runtime scanning */
979 term = *s;
980 if (term == '\'')
981 leave = Nullch;
982 goto snarf_it;
983 case '0':
984 {
985 long i;
986 int shift;
987
988 arg[1].arg_type = A_SINGLE;
989 if (s[1] == 'x') {
990 shift = 4;
991 s += 2;
992 }
993 else if (s[1] == '.')
994 goto decimal;
995 else
996 shift = 3;
997 i = 0;
998 for (;;) {
999 switch (*s) {
1000 default:
1001 goto out;
1002 case '8': case '9':
1003 if (shift != 4)
1004 fatal("Illegal octal digit");
1005 /* FALL THROUGH */
1006 case '0': case '1': case '2': case '3': case '4':
1007 case '5': case '6': case '7':
1008 i <<= shift;
1009 i += *s++ & 15;
1010 break;
1011 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1012 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1013 if (shift != 4)
1014 goto out;
1015 i <<= 4;
1016 i += (*s++ & 7) + 9;
1017 break;
1018 }
1019 }
1020 out:
1021 sprintf(tokenbuf,"%ld",i);
1022 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1023 }
1024 break;
1025 case '1': case '2': case '3': case '4': case '5':
1026 case '6': case '7': case '8': case '9': case '.':
1027 decimal:
1028 arg[1].arg_type = A_SINGLE;
1029 d = tokenbuf;
1030 while (isdigit(*s) || *s == '_') {
1031 if (*s == '_')
1032 s++;
1033 else
1034 *d++ = *s++;
1035 }
1036 if (*s == '.' && index("0123456789eE",s[1])) {
1037 *d++ = *s++;
1038 while (isdigit(*s) || *s == '_') {
1039 if (*s == '_')
1040 s++;
1041 else
1042 *d++ = *s++;
1043 }
1044 }
1045 if (index("eE",*s) && index("+-0123456789",s[1])) {
1046 *d++ = *s++;
1047 if (*s == '+' || *s == '-')
1048 *d++ = *s++;
1049 while (isdigit(*s))
1050 *d++ = *s++;
1051 }
1052 *d = '\0';
1053 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1054 break;
1055 case '\'':
1056 arg[1].arg_type = A_SINGLE;
1057 term = *s;
1058 leave = Nullch;
1059 goto snarf_it;
1060
1061 case '<':
1062 d = tokenbuf;
1063 s = cpytill(d,s+1,'>');
1064 if (*s)
1065 s++;
1066 if (*d == '$') d++;
1067 while (*d && (isalpha(*d) || isdigit(*d) || *d == '_')) d++;
1068 if (*d) {
1069 d = tokenbuf;
1070 arg[1].arg_type = A_GLOB;
1071 d = savestr(d);
1072 arg[1].arg_ptr.arg_stab = stab = genstab();
1073 stab->stab_io = stio_new();
1074 stab->stab_val = str_make(d);
1075 }
1076 else {
1077 d = tokenbuf;
1078 if (!*d)
1079 strcpy(d,"ARGV");
1080 if (*d == '$') {
1081 arg[1].arg_type = A_INDREAD;
1082 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1083 }
1084 else {
1085 arg[1].arg_type = A_READ;
1086 if (rsfp == stdin && strEQ(d,"stdin"))
1087 fatal("Can't get both program and data from <stdin>");
1088 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1089 arg[1].arg_ptr.arg_stab->stab_io = stio_new();
1090 if (strEQ(d,"ARGV")) {
1091 aadd(arg[1].arg_ptr.arg_stab);
1092 arg[1].arg_ptr.arg_stab->stab_io->flags |=
1093 IOF_ARGV|IOF_START;
1094 }
1095 }
1096 }
1097 break;
1098 case '"':
1099 arg[1].arg_type = A_DOUBLE;
1100 makesingle = TRUE; /* maybe disable runtime scanning */
1101 term = *s;
1102 goto snarf_it;
1103 case '`':
1104 arg[1].arg_type = A_BACKTICK;
1105 term = *s;
1106 snarf_it:
1107 {
1108 STR *tmpstr;
1109 int sqstart = line;
1110 char *tmps;
1111
1112 tmpstr = str_new(strlen(s));
1113 s = str_append_till(tmpstr,s+1,term,leave);
1114 while (!*s) { /* multiple line string? */
1115 s = str_gets(linestr, rsfp);
1116 if (!s) {
1117 line = sqstart;
1118 fatal("EOF in string");
1119 }
1120 line++;
1121 s = str_append_till(tmpstr,s,term,leave);
1122 }
1123 s++;
1124 if (term == '\'') {
1125 arg[1].arg_ptr.arg_str = tmpstr;
1126 break;
1127 }
1128 tmps = s;
1129 s = tmpstr->str_ptr;
1130 while (*s) { /* see if we can make SINGLE */
1131 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1132 !index("`\"",term) )
1133 *s = '$'; /* grandfather \digit in subst */
1134 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1135 makesingle = FALSE; /* force interpretation */
1136 }
1137 else if (*s == '\\' && s[1]) {
1138 s++;
1139 }
1140 s++;
1141 }
1142 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1143 while (*s) {
1144 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1145 int len;
1146
1147 len = scanreg(s,tokenbuf) - s;
1148 stabent(tokenbuf,TRUE); /* make sure it's created */
1149 while (len--)
1150 *d++ = *s++;
1151 continue;
1152 }
1153 else if (*s == '\\' && s[1]) {
1154 s++;
1155 switch (*s) {
1156 default:
1157 if (!makesingle && (!leave || index(leave,*s)))
1158 *d++ = '\\';
1159 *d++ = *s++;
1160 continue;
1161 case '0': case '1': case '2': case '3':
1162 case '4': case '5': case '6': case '7':
1163 *d = *s++ - '0';
1164 if (index("01234567",*s)) {
1165 *d <<= 3;
1166 *d += *s++ - '0';
1167 }
1168 if (index("01234567",*s)) {
1169 *d <<= 3;
1170 *d += *s++ - '0';
1171 }
1172 d++;
1173 continue;
1174 case 'b':
1175 *d++ = '\b';
1176 break;
1177 case 'n':
1178 *d++ = '\n';
1179 break;
1180 case 'r':
1181 *d++ = '\r';
1182 break;
1183 case 'f':
1184 *d++ = '\f';
1185 break;
1186 case 't':
1187 *d++ = '\t';
1188 break;
1189 }
1190 s++;
1191 continue;
1192 }
1193 *d++ = *s++;
1194 }
1195 *d = '\0';
1196
1197 if (arg[1].arg_type == A_DOUBLE && makesingle)
1198 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1199
1200 tmpstr->str_cur = d - tmpstr->str_ptr; /* XXX cheat */
1201 arg[1].arg_ptr.arg_str = tmpstr;
1202 s = tmps;
1203 break;
1204 }
1205 }
1206 return s;
1207}
1208
1209FCMD *
1210load_format()
1211{
1212 FCMD froot;
1213 FCMD *flinebeg;
1214 register FCMD *fprev = &froot;
1215 register FCMD *fcmd;
1216 register char *s;
1217 register char *t;
1218 register char tmpchar;
1219 bool noblank;
1220
1221 while ((s = str_gets(linestr,rsfp)) != Nullch) {
1222 line++;
1223 if (strEQ(s,".\n")) {
1224 bufptr = s;
1225 return froot.f_next;
1226 }
1227 if (*s == '#')
1228 continue;
1229 flinebeg = Nullfcmd;
1230 noblank = FALSE;
1231 while (*s) {
1232 fcmd = (FCMD *)safemalloc(sizeof (FCMD));
1233 bzero((char*)fcmd, sizeof (FCMD));
1234 fprev->f_next = fcmd;
1235 fprev = fcmd;
1236 for (t=s; *t && *t != '@' && *t != '^'; t++) {
1237 if (*t == '~') {
1238 noblank = TRUE;
1239 *t = ' ';
1240 }
1241 }
1242 tmpchar = *t;
1243 *t = '\0';
1244 fcmd->f_pre = savestr(s);
1245 fcmd->f_presize = strlen(s);
1246 *t = tmpchar;
1247 s = t;
1248 if (!*s) {
1249 if (noblank)
1250 fcmd->f_flags |= FC_NOBLANK;
1251 break;
1252 }
1253 if (!flinebeg)
1254 flinebeg = fcmd; /* start values here */
1255 if (*s++ == '^')
1256 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
1257 switch (*s) {
1258 case '*':
1259 fcmd->f_type = F_LINES;
1260 *s = '\0';
1261 break;
1262 case '<':
1263 fcmd->f_type = F_LEFT;
1264 while (*s == '<')
1265 s++;
1266 break;
1267 case '>':
1268 fcmd->f_type = F_RIGHT;
1269 while (*s == '>')
1270 s++;
1271 break;
1272 case '|':
1273 fcmd->f_type = F_CENTER;
1274 while (*s == '|')
1275 s++;
1276 break;
1277 default:
1278 fcmd->f_type = F_LEFT;
1279 break;
1280 }
1281 if (fcmd->f_flags & FC_CHOP && *s == '.') {
1282 fcmd->f_flags |= FC_MORE;
1283 while (*s == '.')
1284 s++;
1285 }
1286 fcmd->f_size = s-t;
1287 }
1288 if (flinebeg) {
1289 again:
1290 if ((bufptr = str_gets(linestr ,rsfp)) == Nullch)
1291 goto badform;
1292 line++;
1293 if (strEQ(bufptr,".\n")) {
1294 yyerror("Missing values line");
1295 return froot.f_next;
1296 }
1297 if (*bufptr == '#')
1298 goto again;
1299 lex_newlines = TRUE;
1300 while (flinebeg || *bufptr) {
1301 switch(yylex()) {
1302 default:
1303 yyerror("Bad value in format");
1304 *bufptr = '\0';
1305 break;
1306 case '\n':
1307 if (flinebeg)
1308 yyerror("Missing value in format");
1309 *bufptr = '\0';
1310 break;
1311 case REG:
1312 yylval.arg = stab2arg(A_LVAL,yylval.stabval);
1313 /* FALL THROUGH */
1314 case RSTRING:
1315 if (!flinebeg)
1316 yyerror("Extra value in format");
1317 else {
1318 flinebeg->f_expr = yylval.arg;
1319 do {
1320 flinebeg = flinebeg->f_next;
1321 } while (flinebeg && flinebeg->f_size == 0);
1322 }
1323 break;
1324 case ',': case ';':
1325 continue;
1326 }
1327 }
1328 lex_newlines = FALSE;
1329 }
1330 }
1331 badform:
1332 bufptr = str_get(linestr);
1333 yyerror("Format not terminated");
1334 return froot.f_next;
1335}