This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl 2.0 (no announcement message available)
[perl5.git] / toke.c
CommitLineData
378cc40b
LW
1/* $Header: toke.c,v 2.0 88/06/05 00:11:16 root Exp $
2 *
3 * $Log: toke.c,v $
4 * Revision 2.0 88/06/05 00:11:16 root
5 * Baseline version 2.0.
6 *
7 */
8
9#include "EXTERN.h"
10#include "perl.h"
11#include "perly.h"
12
13#define CLINE (cmdline = (line < cmdline ? line : cmdline))
14
15#define RETURN(retval) return (bufptr = s,(int)retval)
16#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
17#define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
18#define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
19#define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s,(int)UNIOP)
20#define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
21#define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
22#define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
23#define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
24#define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
25#define SFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)STABFUN)
26#define LFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LVALFUN)
27
28yylex()
29{
30 register char *s = bufptr;
31 register char *d;
32 register int tmp;
33 static bool in_format = FALSE;
34 static bool firstline = TRUE;
35
36 retry:
37#ifdef YYDEBUG
38 if (yydebug)
39 if (index(s,'\n'))
40 fprintf(stderr,"Tokener at %s",s);
41 else
42 fprintf(stderr,"Tokener at %s\n",s);
43#endif
44 switch (*s) {
45 default:
46 fprintf(stderr,
47 "Unrecognized character %c in file %s line %ld--ignoring.\n",
48 *s++,filename,(long)line);
49 goto retry;
50 case 0:
51 s = str_get(linestr);
52 *s = '\0';
53 if (firstline && (minus_n || minus_p)) {
54 firstline = FALSE;
55 str_set(linestr,"line: while (<>) {");
56 if (minus_a)
57 str_cat(linestr,"@F=split(' ');");
58 s = str_get(linestr);
59 goto retry;
60 }
61 if (!rsfp)
62 RETURN(0);
63 if (in_format) {
64 yylval.formval = load_format(); /* leaves . in buffer */
65 in_format = FALSE;
66 s = str_get(linestr);
67 TERM(FORMLIST);
68 }
69 line++;
70 if ((s = str_gets(linestr, rsfp)) == Nullch) {
71 if (preprocess)
72 pclose(rsfp);
73 else if (rsfp != stdin)
74 fclose(rsfp);
75 rsfp = Nullfp;
76 if (minus_n || minus_p) {
77 str_set(linestr,minus_p ? "}continue{print;" : "");
78 str_cat(linestr,"}");
79 s = str_get(linestr);
80 goto retry;
81 }
82 s = str_get(linestr);
83 RETURN(0);
84 }
85#ifdef DEBUG
86 else if (firstline) {
87 char *showinput();
88 s = showinput();
89 }
90#endif
91 firstline = FALSE;
92 goto retry;
93 case ' ': case '\t': case '\f':
94 s++;
95 goto retry;
96 case '\n':
97 case '#':
98 if (preprocess && s == str_get(linestr) &&
99 s[1] == ' ' && isdigit(s[2])) {
100 line = atoi(s+2)-1;
101 for (s += 2; isdigit(*s); s++) ;
102 while (*s && isspace(*s)) s++;
103 if (filename)
104 safefree(filename);
105 s[strlen(s)-1] = '\0'; /* wipe out newline */
106 if (*s == '"') {
107 s++;
108 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
109 }
110 if (*s)
111 filename = savestr(s);
112 else
113 filename = savestr(origfilename);
114 s = str_get(linestr);
115 }
116 if (in_eval) {
117 while (*s && *s != '\n')
118 s++;
119 if (*s)
120 s++;
121 line++;
122 }
123 else
124 *s = '\0';
125 if (lex_newlines)
126 RETURN('\n');
127 goto retry;
128 case '-':
129 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
130 s++;
131 switch (*s++) {
132 case 'r': FTST(O_FTEREAD);
133 case 'w': FTST(O_FTEWRITE);
134 case 'x': FTST(O_FTEEXEC);
135 case 'o': FTST(O_FTEOWNED);
136 case 'R': FTST(O_FTRREAD);
137 case 'W': FTST(O_FTRWRITE);
138 case 'X': FTST(O_FTREXEC);
139 case 'O': FTST(O_FTROWNED);
140 case 'e': FTST(O_FTIS);
141 case 'z': FTST(O_FTZERO);
142 case 's': FTST(O_FTSIZE);
143 case 'f': FTST(O_FTFILE);
144 case 'd': FTST(O_FTDIR);
145 case 'l': FTST(O_FTLINK);
146 case 'p': FTST(O_FTPIPE);
147 case 'S': FTST(O_FTSOCK);
148 case 'u': FTST(O_FTSUID);
149 case 'g': FTST(O_FTSGID);
150 case 'k': FTST(O_FTSVTX);
151 case 'b': FTST(O_FTBLK);
152 case 'c': FTST(O_FTCHR);
153 case 't': FTST(O_FTTTY);
154 case 'T': FTST(O_FTTEXT);
155 case 'B': FTST(O_FTBINARY);
156 default:
157 s -= 2;
158 break;
159 }
160 }
161 /*FALL THROUGH*/
162 case '+':
163 if (s[1] == *s) {
164 s++;
165 if (*s++ == '+')
166 RETURN(INC);
167 else
168 RETURN(DEC);
169 }
170 /* FALL THROUGH */
171 case '*':
172 case '%':
173 case '^':
174 case '~':
175 case '(':
176 case ',':
177 case ':':
178 case '[':
179 tmp = *s++;
180 OPERATOR(tmp);
181 case '{':
182 tmp = *s++;
183 if (isspace(*s) || *s == '#')
184 cmdline = NOLINE; /* invalidate current command line number */
185 OPERATOR(tmp);
186 case ';':
187 if (line < cmdline)
188 cmdline = line;
189 tmp = *s++;
190 OPERATOR(tmp);
191 case ')':
192 case ']':
193 tmp = *s++;
194 TERM(tmp);
195 case '}':
196 tmp = *s++;
197 for (d = s; *d == ' ' || *d == '\t'; d++) ;
198 if (*d == '\n' || *d == '#')
199 OPERATOR(tmp); /* block end */
200 else
201 TERM(tmp); /* associative array end */
202 case '&':
203 s++;
204 tmp = *s++;
205 if (tmp == '&')
206 OPERATOR(ANDAND);
207 s--;
208 OPERATOR('&');
209 case '|':
210 s++;
211 tmp = *s++;
212 if (tmp == '|')
213 OPERATOR(OROR);
214 s--;
215 OPERATOR('|');
216 case '=':
217 s++;
218 tmp = *s++;
219 if (tmp == '=')
220 OPERATOR(EQ);
221 if (tmp == '~')
222 OPERATOR(MATCH);
223 s--;
224 OPERATOR('=');
225 case '!':
226 s++;
227 tmp = *s++;
228 if (tmp == '=')
229 OPERATOR(NE);
230 if (tmp == '~')
231 OPERATOR(NMATCH);
232 s--;
233 OPERATOR('!');
234 case '<':
235 if (expectterm) {
236 s = scanstr(s);
237 TERM(RSTRING);
238 }
239 s++;
240 tmp = *s++;
241 if (tmp == '<')
242 OPERATOR(LS);
243 if (tmp == '=')
244 OPERATOR(LE);
245 s--;
246 OPERATOR('<');
247 case '>':
248 s++;
249 tmp = *s++;
250 if (tmp == '>')
251 OPERATOR(RS);
252 if (tmp == '=')
253 OPERATOR(GE);
254 s--;
255 OPERATOR('>');
256
257#define SNARFWORD \
258 d = tokenbuf; \
259 while (isalpha(*s) || isdigit(*s) || *s == '_') \
260 *d++ = *s++; \
261 *d = '\0'; \
262 d = tokenbuf;
263
264 case '$':
265 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
266 s++;
267 s = scanreg(s,tokenbuf);
268 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
269 TERM(ARYLEN);
270 }
271 s = scanreg(s,tokenbuf);
272 yylval.stabval = stabent(tokenbuf,TRUE);
273 TERM(REG);
274
275 case '@':
276 s = scanreg(s,tokenbuf);
277 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
278 TERM(ARY);
279
280 case '/': /* may either be division or pattern */
281 case '?': /* may either be conditional or pattern */
282 if (expectterm) {
283 s = scanpat(s);
284 TERM(PATTERN);
285 }
286 tmp = *s++;
287 OPERATOR(tmp);
288
289 case '.':
290 if (!expectterm || !isdigit(s[1])) {
291 s++;
292 tmp = *s++;
293 if (tmp == '.')
294 OPERATOR(DOTDOT);
295 s--;
296 OPERATOR('.');
297 }
298 /* FALL THROUGH */
299 case '0': case '1': case '2': case '3': case '4':
300 case '5': case '6': case '7': case '8': case '9':
301 case '\'': case '"': case '`':
302 s = scanstr(s);
303 TERM(RSTRING);
304
305 case '_':
306 SNARFWORD;
307 yylval.cval = savestr(d);
308 OPERATOR(WORD);
309 case 'a': case 'A':
310 SNARFWORD;
311 yylval.cval = savestr(d);
312 OPERATOR(WORD);
313 case 'b': case 'B':
314 SNARFWORD;
315 yylval.cval = savestr(d);
316 OPERATOR(WORD);
317 case 'c': case 'C':
318 SNARFWORD;
319 if (strEQ(d,"continue"))
320 OPERATOR(CONTINUE);
321 if (strEQ(d,"chdir"))
322 UNI(O_CHDIR);
323 if (strEQ(d,"close"))
324 OPERATOR(CLOSE);
325 if (strEQ(d,"crypt"))
326 FUN2(O_CRYPT);
327 if (strEQ(d,"chop"))
328 LFUN(O_CHOP);
329 if (strEQ(d,"chmod")) {
330 yylval.ival = O_CHMOD;
331 OPERATOR(LISTOP);
332 }
333 if (strEQ(d,"chown")) {
334 yylval.ival = O_CHOWN;
335 OPERATOR(LISTOP);
336 }
337 yylval.cval = savestr(d);
338 OPERATOR(WORD);
339 case 'd': case 'D':
340 SNARFWORD;
341 if (strEQ(d,"do"))
342 OPERATOR(DO);
343 if (strEQ(d,"die"))
344 UNI(O_DIE);
345 if (strEQ(d,"delete"))
346 OPERATOR(DELETE);
347 yylval.cval = savestr(d);
348 OPERATOR(WORD);
349 case 'e': case 'E':
350 SNARFWORD;
351 if (strEQ(d,"else"))
352 OPERATOR(ELSE);
353 if (strEQ(d,"elsif")) {
354 yylval.ival = line;
355 OPERATOR(ELSIF);
356 }
357 if (strEQ(d,"eq") || strEQ(d,"EQ"))
358 OPERATOR(SEQ);
359 if (strEQ(d,"exit"))
360 UNI(O_EXIT);
361 if (strEQ(d,"eval")) {
362 allstabs = TRUE; /* must initialize everything since */
363 UNI(O_EVAL); /* we don't know what will be used */
364 }
365 if (strEQ(d,"eof"))
366 TERM(FEOF);
367 if (strEQ(d,"exp"))
368 FUN1(O_EXP);
369 if (strEQ(d,"each"))
370 SFUN(O_EACH);
371 if (strEQ(d,"exec")) {
372 yylval.ival = O_EXEC;
373 OPERATOR(LISTOP);
374 }
375 yylval.cval = savestr(d);
376 OPERATOR(WORD);
377 case 'f': case 'F':
378 SNARFWORD;
379 if (strEQ(d,"for"))
380 OPERATOR(FOR);
381 if (strEQ(d,"foreach"))
382 OPERATOR(FOR);
383 if (strEQ(d,"format")) {
384 in_format = TRUE;
385 OPERATOR(FORMAT);
386 }
387 if (strEQ(d,"fork"))
388 FUN0(O_FORK);
389 yylval.cval = savestr(d);
390 OPERATOR(WORD);
391 case 'g': case 'G':
392 SNARFWORD;
393 if (strEQ(d,"gt") || strEQ(d,"GT"))
394 OPERATOR(SGT);
395 if (strEQ(d,"ge") || strEQ(d,"GE"))
396 OPERATOR(SGE);
397 if (strEQ(d,"goto"))
398 LOOPX(O_GOTO);
399 if (strEQ(d,"gmtime"))
400 FUN1(O_GMTIME);
401 yylval.cval = savestr(d);
402 OPERATOR(WORD);
403 case 'h': case 'H':
404 SNARFWORD;
405 if (strEQ(d,"hex"))
406 FUN1(O_HEX);
407 yylval.cval = savestr(d);
408 OPERATOR(WORD);
409 case 'i': case 'I':
410 SNARFWORD;
411 if (strEQ(d,"if")) {
412 yylval.ival = line;
413 OPERATOR(IF);
414 }
415 if (strEQ(d,"index"))
416 FUN2(O_INDEX);
417 if (strEQ(d,"int"))
418 FUN1(O_INT);
419 yylval.cval = savestr(d);
420 OPERATOR(WORD);
421 case 'j': case 'J':
422 SNARFWORD;
423 if (strEQ(d,"join"))
424 OPERATOR(JOIN);
425 yylval.cval = savestr(d);
426 OPERATOR(WORD);
427 case 'k': case 'K':
428 SNARFWORD;
429 if (strEQ(d,"keys"))
430 SFUN(O_KEYS);
431 if (strEQ(d,"kill")) {
432 yylval.ival = O_KILL;
433 OPERATOR(LISTOP);
434 }
435 yylval.cval = savestr(d);
436 OPERATOR(WORD);
437 case 'l': case 'L':
438 SNARFWORD;
439 if (strEQ(d,"last"))
440 LOOPX(O_LAST);
441 if (strEQ(d,"local"))
442 OPERATOR(LOCAL);
443 if (strEQ(d,"length"))
444 FUN1(O_LENGTH);
445 if (strEQ(d,"lt") || strEQ(d,"LT"))
446 OPERATOR(SLT);
447 if (strEQ(d,"le") || strEQ(d,"LE"))
448 OPERATOR(SLE);
449 if (strEQ(d,"localtime"))
450 FUN1(O_LOCALTIME);
451 if (strEQ(d,"log"))
452 FUN1(O_LOG);
453 if (strEQ(d,"link"))
454 FUN2(O_LINK);
455 yylval.cval = savestr(d);
456 OPERATOR(WORD);
457 case 'm': case 'M':
458 SNARFWORD;
459 if (strEQ(d,"m")) {
460 s = scanpat(s-1);
461 TERM(PATTERN);
462 }
463 yylval.cval = savestr(d);
464 OPERATOR(WORD);
465 case 'n': case 'N':
466 SNARFWORD;
467 if (strEQ(d,"next"))
468 LOOPX(O_NEXT);
469 if (strEQ(d,"ne") || strEQ(d,"NE"))
470 OPERATOR(SNE);
471 yylval.cval = savestr(d);
472 OPERATOR(WORD);
473 case 'o': case 'O':
474 SNARFWORD;
475 if (strEQ(d,"open"))
476 OPERATOR(OPEN);
477 if (strEQ(d,"ord"))
478 FUN1(O_ORD);
479 if (strEQ(d,"oct"))
480 FUN1(O_OCT);
481 yylval.cval = savestr(d);
482 OPERATOR(WORD);
483 case 'p': case 'P':
484 SNARFWORD;
485 if (strEQ(d,"print")) {
486 yylval.ival = O_PRINT;
487 OPERATOR(LISTOP);
488 }
489 if (strEQ(d,"printf")) {
490 yylval.ival = O_PRTF;
491 OPERATOR(LISTOP);
492 }
493 if (strEQ(d,"push")) {
494 yylval.ival = O_PUSH;
495 OPERATOR(PUSH);
496 }
497 if (strEQ(d,"pop"))
498 OPERATOR(POP);
499 yylval.cval = savestr(d);
500 OPERATOR(WORD);
501 case 'q': case 'Q':
502 SNARFWORD;
503 yylval.cval = savestr(d);
504 OPERATOR(WORD);
505 case 'r': case 'R':
506 SNARFWORD;
507 if (strEQ(d,"reset"))
508 UNI(O_RESET);
509 if (strEQ(d,"redo"))
510 LOOPX(O_REDO);
511 if (strEQ(d,"rename"))
512 FUN2(O_RENAME);
513 yylval.cval = savestr(d);
514 OPERATOR(WORD);
515 case 's': case 'S':
516 SNARFWORD;
517 if (strEQ(d,"s")) {
518 s = scansubst(s);
519 TERM(SUBST);
520 }
521 if (strEQ(d,"shift"))
522 TERM(SHIFT);
523 if (strEQ(d,"split"))
524 TERM(SPLIT);
525 if (strEQ(d,"substr"))
526 FUN3(O_SUBSTR);
527 if (strEQ(d,"sprintf"))
528 OPERATOR(SPRINTF);
529 if (strEQ(d,"sub"))
530 OPERATOR(SUB);
531 if (strEQ(d,"select"))
532 OPERATOR(SELECT);
533 if (strEQ(d,"seek"))
534 OPERATOR(SEEK);
535 if (strEQ(d,"stat"))
536 OPERATOR(STAT);
537 if (strEQ(d,"study")) {
538 sawstudy++;
539 LFUN(O_STUDY);
540 }
541 if (strEQ(d,"sqrt"))
542 FUN1(O_SQRT);
543 if (strEQ(d,"sleep"))
544 UNI(O_SLEEP);
545 if (strEQ(d,"system")) {
546 yylval.ival = O_SYSTEM;
547 OPERATOR(LISTOP);
548 }
549 if (strEQ(d,"symlink"))
550 FUN2(O_SYMLINK);
551 if (strEQ(d,"sort")) {
552 yylval.ival = O_SORT;
553 OPERATOR(LISTOP);
554 }
555 yylval.cval = savestr(d);
556 OPERATOR(WORD);
557 case 't': case 'T':
558 SNARFWORD;
559 if (strEQ(d,"tr")) {
560 s = scantrans(s);
561 TERM(TRANS);
562 }
563 if (strEQ(d,"tell"))
564 TERM(TELL);
565 if (strEQ(d,"time"))
566 FUN0(O_TIME);
567 if (strEQ(d,"times"))
568 FUN0(O_TMS);
569 yylval.cval = savestr(d);
570 OPERATOR(WORD);
571 case 'u': case 'U':
572 SNARFWORD;
573 if (strEQ(d,"using"))
574 OPERATOR(USING);
575 if (strEQ(d,"until")) {
576 yylval.ival = line;
577 OPERATOR(UNTIL);
578 }
579 if (strEQ(d,"unless")) {
580 yylval.ival = line;
581 OPERATOR(UNLESS);
582 }
583 if (strEQ(d,"umask"))
584 FUN1(O_UMASK);
585 if (strEQ(d,"unshift")) {
586 yylval.ival = O_UNSHIFT;
587 OPERATOR(PUSH);
588 }
589 if (strEQ(d,"unlink")) {
590 yylval.ival = O_UNLINK;
591 OPERATOR(LISTOP);
592 }
593 if (strEQ(d,"utime")) {
594 yylval.ival = O_UTIME;
595 OPERATOR(LISTOP);
596 }
597 yylval.cval = savestr(d);
598 OPERATOR(WORD);
599 case 'v': case 'V':
600 SNARFWORD;
601 if (strEQ(d,"values"))
602 SFUN(O_VALUES);
603 yylval.cval = savestr(d);
604 OPERATOR(WORD);
605 case 'w': case 'W':
606 SNARFWORD;
607 if (strEQ(d,"write"))
608 TERM(WRITE);
609 if (strEQ(d,"while")) {
610 yylval.ival = line;
611 OPERATOR(WHILE);
612 }
613 if (strEQ(d,"wait"))
614 FUN0(O_WAIT);
615 yylval.cval = savestr(d);
616 OPERATOR(WORD);
617 case 'x': case 'X':
618 SNARFWORD;
619 if (!expectterm && strEQ(d,"x"))
620 OPERATOR('x');
621 yylval.cval = savestr(d);
622 OPERATOR(WORD);
623 case 'y': case 'Y':
624 SNARFWORD;
625 if (strEQ(d,"y")) {
626 s = scantrans(s);
627 TERM(TRANS);
628 }
629 yylval.cval = savestr(d);
630 OPERATOR(WORD);
631 case 'z': case 'Z':
632 SNARFWORD;
633 yylval.cval = savestr(d);
634 OPERATOR(WORD);
635 }
636}
637
638char *
639scanreg(s,dest)
640register char *s;
641char *dest;
642{
643 register char *d;
644
645 s++;
646 d = dest;
647 if (isdigit(*s)) {
648 while (isdigit(*s) || *s == '_')
649 *d++ = *s++;
650 }
651 else {
652 while (isalpha(*s) || isdigit(*s) || *s == '_')
653 *d++ = *s++;
654 }
655 *d = '\0';
656 d = dest;
657 if (!*d) {
658 *d = *s++;
659 if (*d == '{') {
660 d = dest;
661 while (*s && *s != '}')
662 *d++ = *s++;
663 *d = '\0';
664 d = dest;
665 if (*s)
666 s++;
667 }
668 else
669 d[1] = '\0';
670 }
671 if (*d == '^' && !isspace(*s))
672 *d = *s++ & 31;
673 return s;
674}
675
676STR *
677scanconst(string)
678char *string;
679{
680 register STR *retstr;
681 register char *t;
682 register char *d;
683
684 if (index(string,'|')) {
685 return Nullstr;
686 }
687 retstr = str_make(string);
688 t = str_get(retstr);
689 *(long*)&retstr->str_nval = 100;
690 for (d=t; *d; ) {
691 switch (*d) {
692 case '.': case '[': case '$': case '(': case ')': case '|':
693 *d = '\0';
694 break;
695 case '\\':
696 if (index("wWbB0123456789sSdD",d[1])) {
697 *d = '\0';
698 break;
699 }
700 strcpy(d,d+1);
701 switch(*d) {
702 case 'n':
703 *d = '\n';
704 break;
705 case 't':
706 *d = '\t';
707 break;
708 case 'f':
709 *d = '\f';
710 break;
711 case 'r':
712 *d = '\r';
713 break;
714 }
715 /* FALL THROUGH */
716 default:
717 if (d[1] == '*' || d[1] == '+' || d[1] == '?') {
718 *d = '\0';
719 break;
720 }
721 d++;
722 }
723 }
724 if (!*t) {
725 str_free(retstr);
726 return Nullstr;
727 }
728 retstr->str_cur = strlen(retstr->str_ptr);
729 return retstr;
730}
731
732char *
733scanpat(s)
734register char *s;
735{
736 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
737 register char *d;
738
739 bzero((char *)spat, sizeof(SPAT));
740 spat->spat_next = spat_root; /* link into spat list */
741 spat_root = spat;
742
743 switch (*s++) {
744 case 'm':
745 s++;
746 break;
747 case '/':
748 break;
749 case '?':
750 spat->spat_flags |= SPAT_ONCE;
751 break;
752 default:
753 fatal("panic: scanpat");
754 }
755 s = cpytill(tokenbuf,s,s[-1]);
756 if (!*s)
757 fatal("Search pattern not terminated");
758 s++;
759 if (*s == 'i') {
760 s++;
761 spat->spat_flags |= SPAT_FOLD;
762 }
763 for (d=tokenbuf; *d; d++) {
764 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
765 register ARG *arg;
766
767 spat->spat_runtime = arg = op_new(1);
768 arg->arg_type = O_ITEM;
769 arg[1].arg_type = A_DOUBLE;
770 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
771 goto got_pat; /* skip compiling for now */
772 }
773 }
774 if (!(spat->spat_flags & SPAT_FOLD)) {
775 if (*tokenbuf == '^') {
776 spat->spat_short = scanconst(tokenbuf+1);
777 if (spat->spat_short) {
778 spat->spat_slen = strlen(spat->spat_short->str_ptr);
779 if (spat->spat_slen == strlen(tokenbuf+1))
780 spat->spat_flags |= SPAT_ALL;
781 }
782 }
783 else {
784 spat->spat_flags |= SPAT_SCANFIRST;
785 spat->spat_short = scanconst(tokenbuf);
786 if (spat->spat_short) {
787 spat->spat_slen = strlen(spat->spat_short->str_ptr);
788 if (spat->spat_slen == strlen(tokenbuf))
789 spat->spat_flags |= SPAT_ALL;
790 }
791 }
792 }
793 spat->spat_regexp = regcomp(tokenbuf,spat->spat_flags & SPAT_FOLD,1);
794 hoistmust(spat);
795 got_pat:
796 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
797 return s;
798}
799
800char *
801scansubst(s)
802register char *s;
803{
804 register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
805 register char *d;
806
807 bzero((char *)spat, sizeof(SPAT));
808 spat->spat_next = spat_root; /* link into spat list */
809 spat_root = spat;
810
811 s = cpytill(tokenbuf,s+1,*s);
812 if (!*s)
813 fatal("Substitution pattern not terminated");
814 for (d=tokenbuf; *d; d++) {
815 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
816 register ARG *arg;
817
818 spat->spat_runtime = arg = op_new(1);
819 arg->arg_type = O_ITEM;
820 arg[1].arg_type = A_DOUBLE;
821 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
822 goto get_repl; /* skip compiling for now */
823 }
824 }
825 if (*tokenbuf == '^') {
826 spat->spat_short = scanconst(tokenbuf+1);
827 if (spat->spat_short)
828 spat->spat_slen = strlen(spat->spat_short->str_ptr);
829 }
830 else {
831 spat->spat_flags |= SPAT_SCANFIRST;
832 spat->spat_short = scanconst(tokenbuf);
833 if (spat->spat_short)
834 spat->spat_slen = strlen(spat->spat_short->str_ptr);
835 }
836 d = savestr(tokenbuf);
837get_repl:
838 s = scanstr(s);
839 if (!*s)
840 fatal("Substitution replacement not terminated");
841 spat->spat_repl = yylval.arg;
842 spat->spat_flags |= SPAT_ONCE;
843 while (*s == 'g' || *s == 'i') {
844 if (*s == 'g') {
845 s++;
846 spat->spat_flags &= ~SPAT_ONCE;
847 }
848 if (*s == 'i') {
849 s++;
850 spat->spat_flags |= SPAT_FOLD;
851 }
852 }
853 if (!spat->spat_runtime) {
854 spat->spat_regexp = regcomp(d, spat->spat_flags & SPAT_FOLD,1);
855 hoistmust(spat);
856 safefree(d);
857 }
858 if (spat->spat_flags & SPAT_FOLD) { /* Oops, disable optimization */
859 str_free(spat->spat_short);
860 spat->spat_short = Nullstr;
861 spat->spat_slen = 0;
862 }
863 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
864 return s;
865}
866
867hoistmust(spat)
868register SPAT *spat;
869{
870 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
871 if (spat->spat_short &&
872 strEQ(spat->spat_short->str_ptr,spat->spat_regexp->regmust->str_ptr)){
873 if (spat->spat_flags & SPAT_SCANFIRST) {
874 str_free(spat->spat_short);
875 spat->spat_short = Nullstr;
876 }
877 else {
878 str_free(spat->spat_regexp->regmust);
879 spat->spat_regexp->regmust = Nullstr;
880 return;
881 }
882 }
883 if (!spat->spat_short || /* promote the better string */
884 ((spat->spat_flags & SPAT_SCANFIRST) &&
885 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
886 str_free(spat->spat_short); /* ok if null */
887 spat->spat_short = spat->spat_regexp->regmust;
888 spat->spat_regexp->regmust = Nullstr;
889 spat->spat_flags |= SPAT_SCANFIRST;
890 }
891 }
892}
893
894char *
895expand_charset(s)
896register char *s;
897{
898 char t[512];
899 register char *d = t;
900 register int i;
901
902 while (*s) {
903 if (s[1] == '-' && s[2]) {
904 for (i = s[0]; i <= s[2]; i++)
905 *d++ = i;
906 s += 3;
907 }
908 else
909 *d++ = *s++;
910 }
911 *d = '\0';
912 return savestr(t);
913}
914
915char *
916scantrans(s)
917register char *s;
918{
919 ARG *arg =
920 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg,0));
921 register char *t;
922 register char *r;
923 register char *tbl = safemalloc(256);
924 register int i;
925
926 arg[2].arg_type = A_NULL;
927 arg[2].arg_ptr.arg_cval = tbl;
928 for (i=0; i<256; i++)
929 tbl[i] = 0;
930 s = scanstr(s);
931 if (!*s)
932 fatal("Translation pattern not terminated");
933 t = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
934 free_arg(yylval.arg);
935 s = scanstr(s-1);
936 if (!*s)
937 fatal("Translation replacement not terminated");
938 r = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
939 free_arg(yylval.arg);
940 yylval.arg = arg;
941 if (!*r) {
942 safefree(r);
943 r = t;
944 }
945 for (i = 0; t[i]; i++) {
946 if (!r[i])
947 r[i] = r[i-1];
948 tbl[t[i] & 0377] = r[i];
949 }
950 if (r != t)
951 safefree(r);
952 safefree(t);
953 return s;
954}
955
956char *
957scanstr(s)
958register char *s;
959{
960 register char term;
961 register char *d;
962 register ARG *arg;
963 register bool makesingle = FALSE;
964 register STAB *stab;
965 char *leave = "\\$nrtfb0123456789"; /* which backslash sequences to keep */
966
967 arg = op_new(1);
968 yylval.arg = arg;
969 arg->arg_type = O_ITEM;
970
971 switch (*s) {
972 default: /* a substitution replacement */
973 arg[1].arg_type = A_DOUBLE;
974 makesingle = TRUE; /* maybe disable runtime scanning */
975 term = *s;
976 if (term == '\'')
977 leave = Nullch;
978 goto snarf_it;
979 case '0':
980 {
981 long i;
982 int shift;
983
984 arg[1].arg_type = A_SINGLE;
985 if (s[1] == 'x') {
986 shift = 4;
987 s += 2;
988 }
989 else if (s[1] == '.')
990 goto decimal;
991 else
992 shift = 3;
993 i = 0;
994 for (;;) {
995 switch (*s) {
996 default:
997 goto out;
998 case '8': case '9':
999 if (shift != 4)
1000 fatal("Illegal octal digit");
1001 /* FALL THROUGH */
1002 case '0': case '1': case '2': case '3': case '4':
1003 case '5': case '6': case '7':
1004 i <<= shift;
1005 i += *s++ & 15;
1006 break;
1007 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1008 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1009 if (shift != 4)
1010 goto out;
1011 i <<= 4;
1012 i += (*s++ & 7) + 9;
1013 break;
1014 }
1015 }
1016 out:
1017 sprintf(tokenbuf,"%ld",i);
1018 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1019 }
1020 break;
1021 case '1': case '2': case '3': case '4': case '5':
1022 case '6': case '7': case '8': case '9': case '.':
1023 decimal:
1024 arg[1].arg_type = A_SINGLE;
1025 d = tokenbuf;
1026 while (isdigit(*s) || *s == '_') {
1027 if (*s == '_')
1028 s++;
1029 else
1030 *d++ = *s++;
1031 }
1032 if (*s == '.' && index("0123456789eE",s[1])) {
1033 *d++ = *s++;
1034 while (isdigit(*s) || *s == '_') {
1035 if (*s == '_')
1036 s++;
1037 else
1038 *d++ = *s++;
1039 }
1040 }
1041 if (index("eE",*s) && index("+-0123456789",s[1])) {
1042 *d++ = *s++;
1043 if (*s == '+' || *s == '-')
1044 *d++ = *s++;
1045 while (isdigit(*s))
1046 *d++ = *s++;
1047 }
1048 *d = '\0';
1049 arg[1].arg_ptr.arg_str = str_make(tokenbuf);
1050 break;
1051 case '\'':
1052 arg[1].arg_type = A_SINGLE;
1053 term = *s;
1054 leave = Nullch;
1055 goto snarf_it;
1056
1057 case '<':
1058 d = tokenbuf;
1059 s = cpytill(d,s+1,'>');
1060 if (*s)
1061 s++;
1062 if (*d == '$') d++;
1063 while (*d && (isalpha(*d) || isdigit(*d) || *d == '_')) d++;
1064 if (*d) {
1065 d = tokenbuf;
1066 arg[1].arg_type = A_GLOB;
1067 d = savestr(d);
1068 arg[1].arg_ptr.arg_stab = stab = genstab();
1069 stab->stab_io = stio_new();
1070 stab->stab_val = str_make(d);
1071 }
1072 else {
1073 d = tokenbuf;
1074 if (!*d)
1075 strcpy(d,"ARGV");
1076 if (*d == '$') {
1077 arg[1].arg_type = A_INDREAD;
1078 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1079 }
1080 else {
1081 arg[1].arg_type = A_READ;
1082 if (rsfp == stdin && strEQ(d,"stdin"))
1083 fatal("Can't get both program and data from <stdin>");
1084 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1085 arg[1].arg_ptr.arg_stab->stab_io = stio_new();
1086 if (strEQ(d,"ARGV")) {
1087 aadd(arg[1].arg_ptr.arg_stab);
1088 arg[1].arg_ptr.arg_stab->stab_io->flags |=
1089 IOF_ARGV|IOF_START;
1090 }
1091 }
1092 }
1093 break;
1094 case '"':
1095 arg[1].arg_type = A_DOUBLE;
1096 makesingle = TRUE; /* maybe disable runtime scanning */
1097 term = *s;
1098 goto snarf_it;
1099 case '`':
1100 arg[1].arg_type = A_BACKTICK;
1101 term = *s;
1102 snarf_it:
1103 {
1104 STR *tmpstr;
1105 int sqstart = line;
1106 char *tmps;
1107
1108 tmpstr = str_new(strlen(s));
1109 s = str_append_till(tmpstr,s+1,term,leave);
1110 while (!*s) { /* multiple line string? */
1111 s = str_gets(linestr, rsfp);
1112 if (!s) {
1113 line = sqstart;
1114 fatal("EOF in string");
1115 }
1116 line++;
1117 s = str_append_till(tmpstr,s,term,leave);
1118 }
1119 s++;
1120 if (term == '\'') {
1121 arg[1].arg_ptr.arg_str = tmpstr;
1122 break;
1123 }
1124 tmps = s;
1125 s = tmpstr->str_ptr;
1126 while (*s) { /* see if we can make SINGLE */
1127 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1128 !index("`\"",term) )
1129 *s = '$'; /* grandfather \digit in subst */
1130 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1131 makesingle = FALSE; /* force interpretation */
1132 }
1133 else if (*s == '\\' && s[1]) {
1134 s++;
1135 }
1136 s++;
1137 }
1138 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1139 while (*s) {
1140 if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
1141 int len;
1142
1143 len = scanreg(s,tokenbuf) - s;
1144 stabent(tokenbuf,TRUE); /* make sure it's created */
1145 while (len--)
1146 *d++ = *s++;
1147 continue;
1148 }
1149 else if (*s == '\\' && s[1]) {
1150 s++;
1151 switch (*s) {
1152 default:
1153 if (!makesingle && (!leave || index(leave,*s)))
1154 *d++ = '\\';
1155 *d++ = *s++;
1156 continue;
1157 case '0': case '1': case '2': case '3':
1158 case '4': case '5': case '6': case '7':
1159 *d = *s++ - '0';
1160 if (index("01234567",*s)) {
1161 *d <<= 3;
1162 *d += *s++ - '0';
1163 }
1164 if (index("01234567",*s)) {
1165 *d <<= 3;
1166 *d += *s++ - '0';
1167 }
1168 d++;
1169 continue;
1170 case 'b':
1171 *d++ = '\b';
1172 break;
1173 case 'n':
1174 *d++ = '\n';
1175 break;
1176 case 'r':
1177 *d++ = '\r';
1178 break;
1179 case 'f':
1180 *d++ = '\f';
1181 break;
1182 case 't':
1183 *d++ = '\t';
1184 break;
1185 }
1186 s++;
1187 continue;
1188 }
1189 *d++ = *s++;
1190 }
1191 *d = '\0';
1192
1193 if (arg[1].arg_type == A_DOUBLE && makesingle)
1194 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1195
1196 tmpstr->str_cur = d - tmpstr->str_ptr; /* XXX cheat */
1197 arg[1].arg_ptr.arg_str = tmpstr;
1198 s = tmps;
1199 break;
1200 }
1201 }
1202 return s;
1203}
1204
1205FCMD *
1206load_format()
1207{
1208 FCMD froot;
1209 FCMD *flinebeg;
1210 register FCMD *fprev = &froot;
1211 register FCMD *fcmd;
1212 register char *s;
1213 register char *t;
1214 register char tmpchar;
1215 bool noblank;
1216
1217 while ((s = str_gets(linestr,rsfp)) != Nullch) {
1218 line++;
1219 if (strEQ(s,".\n")) {
1220 bufptr = s;
1221 return froot.f_next;
1222 }
1223 if (*s == '#')
1224 continue;
1225 flinebeg = Nullfcmd;
1226 noblank = FALSE;
1227 while (*s) {
1228 fcmd = (FCMD *)safemalloc(sizeof (FCMD));
1229 bzero((char*)fcmd, sizeof (FCMD));
1230 fprev->f_next = fcmd;
1231 fprev = fcmd;
1232 for (t=s; *t && *t != '@' && *t != '^'; t++) {
1233 if (*t == '~') {
1234 noblank = TRUE;
1235 *t = ' ';
1236 }
1237 }
1238 tmpchar = *t;
1239 *t = '\0';
1240 fcmd->f_pre = savestr(s);
1241 fcmd->f_presize = strlen(s);
1242 *t = tmpchar;
1243 s = t;
1244 if (!*s) {
1245 if (noblank)
1246 fcmd->f_flags |= FC_NOBLANK;
1247 break;
1248 }
1249 if (!flinebeg)
1250 flinebeg = fcmd; /* start values here */
1251 if (*s++ == '^')
1252 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
1253 switch (*s) {
1254 case '*':
1255 fcmd->f_type = F_LINES;
1256 *s = '\0';
1257 break;
1258 case '<':
1259 fcmd->f_type = F_LEFT;
1260 while (*s == '<')
1261 s++;
1262 break;
1263 case '>':
1264 fcmd->f_type = F_RIGHT;
1265 while (*s == '>')
1266 s++;
1267 break;
1268 case '|':
1269 fcmd->f_type = F_CENTER;
1270 while (*s == '|')
1271 s++;
1272 break;
1273 default:
1274 fcmd->f_type = F_LEFT;
1275 break;
1276 }
1277 if (fcmd->f_flags & FC_CHOP && *s == '.') {
1278 fcmd->f_flags |= FC_MORE;
1279 while (*s == '.')
1280 s++;
1281 }
1282 fcmd->f_size = s-t;
1283 }
1284 if (flinebeg) {
1285 again:
1286 if ((bufptr = str_gets(linestr ,rsfp)) == Nullch)
1287 goto badform;
1288 line++;
1289 if (strEQ(bufptr,".\n")) {
1290 yyerror("Missing values line");
1291 return froot.f_next;
1292 }
1293 if (*bufptr == '#')
1294 goto again;
1295 lex_newlines = TRUE;
1296 while (flinebeg || *bufptr) {
1297 switch(yylex()) {
1298 default:
1299 yyerror("Bad value in format");
1300 *bufptr = '\0';
1301 break;
1302 case '\n':
1303 if (flinebeg)
1304 yyerror("Missing value in format");
1305 *bufptr = '\0';
1306 break;
1307 case REG:
1308 yylval.arg = stab2arg(A_LVAL,yylval.stabval);
1309 /* FALL THROUGH */
1310 case RSTRING:
1311 if (!flinebeg)
1312 yyerror("Extra value in format");
1313 else {
1314 flinebeg->f_expr = yylval.arg;
1315 do {
1316 flinebeg = flinebeg->f_next;
1317 } while (flinebeg && flinebeg->f_size == 0);
1318 }
1319 break;
1320 case ',': case ';':
1321 continue;
1322 }
1323 }
1324 lex_newlines = FALSE;
1325 }
1326 }
1327 badform:
1328 bufptr = str_get(linestr);
1329 yyerror("Format not terminated");
1330 return froot.f_next;
1331}