1 /* $Header: toke.c,v 3.0.1.1 89/10/26 23:26:21 lwall Locked $
3 * Copyright (c) 1989, Larry Wall
5 * You may distribute under the terms of the GNU General Public License
6 * as specified in the README file that comes with the perl 3.0 kit.
9 * Revision 3.0.1.1 89/10/26 23:26:21 lwall
10 * patch1: disambiguated word after "sort" better
12 * Revision 3.0 89/10/18 15:32:33 lwall
21 char *reparse; /* if non-null, scanreg found ${foo[$bar]} */
23 #define CLINE (cmdline = (line < cmdline ? line : cmdline))
25 #define META(c) ((c) | 128)
27 #define RETURN(retval) return (bufptr = s,(int)retval)
28 #define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
29 #define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
30 #define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
31 #define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
32 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
33 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
34 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
35 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
36 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
37 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
38 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
39 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
40 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
41 #define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
42 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
43 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
44 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
45 #define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
46 #define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
47 #define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
48 #define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
49 #define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
50 #define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
51 #define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
53 /* This bit of chicanery makes a unary function followed by
54 * a parenthesis into a function with one argument, highest precedence.
56 #define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
57 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
59 /* This does similarly for list operators, merely by pretending that the
60 * paren came before the listop rather than after.
62 #define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
63 (*s = META('('), bufptr = oldbufptr, '(') : \
64 (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
70 while (s < bufend && isascii(*s) && isspace(*s))
77 register char *s = bufptr;
80 static bool in_format = FALSE;
81 static bool firstline = TRUE;
82 extern int yychar; /* last token */
84 oldoldbufptr = oldbufptr;
91 fprintf(stderr,"Tokener at %s",s);
93 fprintf(stderr,"Tokener at %s\n",s);
97 if ((*s & 127) == '(')
100 warn("Unrecognized character \\%03o ignored", *s++);
106 goto retry; /* ignore stray nulls */
109 if (minus_n || minus_p || perldb) {
112 str_cat(linestr,"do 'perldb.pl'; print $@;");
113 if (minus_n || minus_p) {
114 str_cat(linestr,"line: while (<>) {");
116 str_cat(linestr,"@F=split(' ');");
118 oldoldbufptr = oldbufptr = s = str_get(linestr);
119 bufend = linestr->str_ptr + linestr->str_cur;
124 yylval.formval = load_format();
126 oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
127 bufend = linestr->str_ptr + linestr->str_cur;
131 if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
133 (void)mypclose(rsfp);
134 else if (rsfp != stdin)
137 if (minus_n || minus_p) {
138 str_set(linestr,minus_p ? "}continue{print;" : "");
139 str_cat(linestr,"}");
140 oldoldbufptr = oldbufptr = s = str_get(linestr);
141 bufend = linestr->str_ptr + linestr->str_cur;
144 oldoldbufptr = oldbufptr = s = str_get(linestr);
148 oldoldbufptr = oldbufptr = bufptr = s;
150 STR *str = Str_new(85,0);
152 str_sset(str,linestr);
153 astore(lineary,(int)line,str);
161 bufend = linestr->str_ptr + linestr->str_cur;
164 case ' ': case '\t': case '\f':
169 if (preprocess && s == str_get(linestr) &&
170 s[1] == ' ' && isdigit(s[2])) {
172 for (s += 2; isdigit(*s); s++) ;
174 while (s < d && isspace(*s)) s++;
177 s[strlen(s)-1] = '\0'; /* wipe out newline */
180 s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
183 filename = savestr(s);
185 filename = savestr(origfilename);
186 oldoldbufptr = oldbufptr = s = str_get(linestr);
188 if (in_eval && !rsfp) {
190 while (s < d && *s != '\n')
203 if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
206 case 'r': FTST(O_FTEREAD);
207 case 'w': FTST(O_FTEWRITE);
208 case 'x': FTST(O_FTEEXEC);
209 case 'o': FTST(O_FTEOWNED);
210 case 'R': FTST(O_FTRREAD);
211 case 'W': FTST(O_FTRWRITE);
212 case 'X': FTST(O_FTREXEC);
213 case 'O': FTST(O_FTROWNED);
214 case 'e': FTST(O_FTIS);
215 case 'z': FTST(O_FTZERO);
216 case 's': FTST(O_FTSIZE);
217 case 'f': FTST(O_FTFILE);
218 case 'd': FTST(O_FTDIR);
219 case 'l': FTST(O_FTLINK);
220 case 'p': FTST(O_FTPIPE);
221 case 'S': FTST(O_FTSOCK);
222 case 'u': FTST(O_FTSUID);
223 case 'g': FTST(O_FTSGID);
224 case 'k': FTST(O_FTSVTX);
225 case 'b': FTST(O_FTBLK);
226 case 'c': FTST(O_FTCHR);
227 case 't': FTST(O_FTTTY);
228 case 'T': FTST(O_FTTEXT);
229 case 'B': FTST(O_FTBINARY);
257 s = scanreg(s,bufend,tokenbuf);
258 yylval.stabval = stabent(tokenbuf,TRUE);
269 s = scanreg(s,bufend,tokenbuf);
270 yylval.stabval = stabent(tokenbuf,TRUE);
286 if (isspace(*s) || *s == '#')
287 cmdline = NOLINE; /* invalidate current command line number */
300 for (d = s; *d == ' ' || *d == '\t'; d++) ;
301 if (*d == '\n' || *d == '#')
302 OPERATOR(tmp); /* block end */
304 TERM(tmp); /* associative array end */
313 while (s < d && isspace(*s))
315 if (isalpha(*s) || *s == '_' || *s == '\'')
316 *(--s) = '\\'; /* force next ident to WORD */
370 while (isascii(*s) && \
371 (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
379 if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
381 s = scanreg(s,bufend,tokenbuf);
382 yylval.stabval = aadd(stabent(tokenbuf,TRUE));
386 s = scanreg(s,bufend,tokenbuf);
387 if (reparse) { /* turn ${foo[bar]} into ($foo[bar]) */
395 yylval.stabval = stabent(tokenbuf,TRUE);
400 s = scanreg(s,bufend,tokenbuf);
403 yylval.stabval = stabent(tokenbuf,TRUE);
406 case '/': /* may either be division or pattern */
407 case '?': /* may either be conditional or pattern */
418 if (!expectterm || !isdigit(s[1])) {
427 case '0': case '1': case '2': case '3': case '4':
428 case '5': case '6': case '7': case '8': case '9':
429 case '\'': case '"': case '`':
433 case '\\': /* some magic to force next word to be a WORD */
434 s++; /* used by do and sub to force a separate namespace */
441 if (strEQ(d,"accept"))
443 if (strEQ(d,"atan2"))
455 if (strEQ(d,"continue"))
457 if (strEQ(d,"chdir"))
459 if (strEQ(d,"close"))
461 if (strEQ(d,"closedir"))
463 if (strEQ(d,"crypt")) {
469 if (strEQ(d,"chmod"))
471 if (strEQ(d,"chown"))
473 if (strEQ(d,"connect"))
477 if (strEQ(d,"chroot"))
484 while (s < d && isspace(*s))
486 if (isalpha(*s) || *s == '_')
487 *(--s) = '\\'; /* force next ident to WORD */
492 if (strEQ(d,"defined"))
494 if (strEQ(d,"delete"))
496 if (strEQ(d,"dbmopen"))
498 if (strEQ(d,"dbmclose"))
507 if (strEQ(d,"elsif")) {
511 if (strEQ(d,"eq") || strEQ(d,"EQ"))
515 if (strEQ(d,"eval")) {
516 allstabs = TRUE; /* must initialize everything since */
517 UNI(O_EVAL); /* we don't know what will be used */
525 if (strEQ(d,"exec")) {
529 if (strEQ(d,"endhostent"))
531 if (strEQ(d,"endnetent"))
533 if (strEQ(d,"endservent"))
535 if (strEQ(d,"endprotoent"))
537 if (strEQ(d,"endpwent"))
539 if (strEQ(d,"endgrent"))
546 if (strEQ(d,"foreach"))
548 if (strEQ(d,"format")) {
550 while (s < d && isspace(*s))
552 if (isalpha(*s) || *s == '_')
553 *(--s) = '\\'; /* force next ident to WORD */
555 allstabs = TRUE; /* must initialize everything since */
556 OPERATOR(FORMAT); /* we don't know what will be used */
560 if (strEQ(d,"fcntl"))
562 if (strEQ(d,"fileno"))
564 if (strEQ(d,"flock"))
569 if (strEQ(d,"gt") || strEQ(d,"GT"))
571 if (strEQ(d,"ge") || strEQ(d,"GE"))
577 if (strEQ(d,"gmtime"))
581 if (strnEQ(d,"get",3)) {
588 if (strEQ(d,"priority"))
590 if (strEQ(d,"protobyname"))
592 if (strEQ(d,"protobynumber"))
594 if (strEQ(d,"protoent"))
596 if (strEQ(d,"pwent"))
598 if (strEQ(d,"pwnam"))
600 if (strEQ(d,"pwuid"))
602 if (strEQ(d,"peername"))
605 else if (*d == 'h') {
606 if (strEQ(d,"hostbyname"))
608 if (strEQ(d,"hostbyaddr"))
610 if (strEQ(d,"hostent"))
613 else if (*d == 'n') {
614 if (strEQ(d,"netbyname"))
616 if (strEQ(d,"netbyaddr"))
618 if (strEQ(d,"netent"))
621 else if (*d == 's') {
622 if (strEQ(d,"servbyname"))
624 if (strEQ(d,"servbyport"))
626 if (strEQ(d,"servent"))
628 if (strEQ(d,"sockname"))
630 if (strEQ(d,"sockopt"))
633 else if (*d == 'g') {
634 if (strEQ(d,"grent"))
636 if (strEQ(d,"grnam"))
638 if (strEQ(d,"grgid"))
641 else if (*d == 'l') {
642 if (strEQ(d,"login"))
659 if (strEQ(d,"index"))
663 if (strEQ(d,"ioctl"))
682 if (strEQ(d,"local"))
684 if (strEQ(d,"length"))
686 if (strEQ(d,"lt") || strEQ(d,"LT"))
688 if (strEQ(d,"le") || strEQ(d,"LE"))
690 if (strEQ(d,"localtime"))
696 if (strEQ(d,"listen"))
698 if (strEQ(d,"lstat"))
708 RETURN(1); /* force error */
710 if (strEQ(d,"mkdir"))
717 if (strEQ(d,"ne") || strEQ(d,"NE"))
728 if (strEQ(d,"opendir"))
733 if (strEQ(d,"print")) {
734 checkcomma(s,"filehandle");
737 if (strEQ(d,"printf")) {
738 checkcomma(s,"filehandle");
741 if (strEQ(d,"push")) {
742 yylval.ival = O_PUSH;
749 if (strEQ(d,"package"))
765 if (strEQ(d,"return"))
767 if (strEQ(d,"reset"))
771 if (strEQ(d,"rename"))
775 if (strEQ(d,"rmdir"))
777 if (strEQ(d,"rindex"))
781 if (strEQ(d,"readdir"))
783 if (strEQ(d,"rewinddir"))
787 if (strEQ(d,"reverse"))
789 if (strEQ(d,"readlink"))
799 RETURN(1); /* force error */
808 if (strEQ(d,"select"))
814 if (strEQ(d,"setpgrp"))
816 if (strEQ(d,"setpriority"))
818 if (strEQ(d,"sethostent"))
820 if (strEQ(d,"setnetent"))
822 if (strEQ(d,"setservent"))
824 if (strEQ(d,"setprotoent"))
826 if (strEQ(d,"setpwent"))
828 if (strEQ(d,"setgrent"))
830 if (strEQ(d,"seekdir"))
832 if (strEQ(d,"setsockopt"))
839 if (strEQ(d,"shift"))
841 if (strEQ(d,"shutdown"))
852 if (strEQ(d,"sleep"))
859 if (strEQ(d,"socket"))
861 if (strEQ(d,"socketpair"))
863 if (strEQ(d,"sort")) {
864 checkcomma(s,"subroutine name");
866 while (s < d && isascii(*s) && isspace(*s)) s++;
867 if (*s == ';' || *s == ')') /* probably a close */
868 fatal("sort is now a reserved word");
869 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
870 for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
871 strncpy(tokenbuf,s,d-s);
872 if (strNE(tokenbuf,"keys") &&
873 strNE(tokenbuf,"values") &&
874 strNE(tokenbuf,"split") &&
875 strNE(tokenbuf,"grep") &&
876 strNE(tokenbuf,"readdir") &&
877 strNE(tokenbuf,"unpack") &&
878 strNE(tokenbuf,"do") &&
879 (d >= bufend || isspace(*d)) )
880 *(--s) = '\\'; /* force next ident to WORD */
886 if (strEQ(d,"split"))
888 if (strEQ(d,"sprintf"))
896 if (strEQ(d,"srand"))
904 if (strEQ(d,"study")) {
910 if (strEQ(d,"substr"))
912 if (strEQ(d,"sub")) {
915 while (s < d && isspace(*s))
917 if (isalpha(*s) || *s == '_' || *s == '\'') {
919 str_sset(subname,curstname);
920 str_ncat(subname,"'",1);
922 isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
926 str_ncat(subname,s,d-s);
928 *(--s) = '\\'; /* force next ident to WORD */
931 str_set(subname,"?");
940 if (strEQ(d,"system")) {
944 if (strEQ(d,"symlink"))
946 if (strEQ(d,"syscall"))
960 RETURN(1); /* force error */
964 if (strEQ(d,"telldir"))
968 if (strEQ(d,"times"))
973 if (strEQ(d,"using"))
975 if (strEQ(d,"until")) {
979 if (strEQ(d,"unless")) {
983 if (strEQ(d,"unlink"))
985 if (strEQ(d,"undef"))
987 if (strEQ(d,"unpack"))
989 if (strEQ(d,"utime"))
991 if (strEQ(d,"umask"))
993 if (strEQ(d,"unshift")) {
994 yylval.ival = O_UNSHIFT;
1000 if (strEQ(d,"values"))
1002 if (strEQ(d,"vec")) {
1009 if (strEQ(d,"while")) {
1013 if (strEQ(d,"warn"))
1015 if (strEQ(d,"wait"))
1017 if (strEQ(d,"wantarray")) {
1018 yylval.arg = op_new(1);
1019 yylval.arg->arg_type = O_ITEM;
1020 yylval.arg[1].arg_type = A_WANTARRAY;
1023 if (strEQ(d,"write"))
1028 if (!expectterm && strEQ(d,"x"))
1042 yylval.cval = savestr(d);
1044 if (oldoldbufptr && oldoldbufptr < bufptr) {
1045 while (isspace(*oldoldbufptr))
1047 if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
1049 else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
1052 return (CLINE, bufptr = s, (int)WORD);
1062 while (s < bufend && isascii(*s) && isspace(*s))
1064 if (isascii(*s) && (isalpha(*s) || *s == '_')) {
1066 while (isalpha(*s) || isdigit(*s) || *s == '_')
1068 while (s < bufend && isspace(*s))
1071 fatal("No comma allowed after %s", what);
1076 scanreg(s,send,dest)
1078 register char *send;
1092 while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
1095 if (d > dest+1 && d[-1] == '\'')
1101 if (*d == '{' /* } */ ) {
1104 while (s < send && brackets) {
1105 if (!reparse && (d == dest || (*s && isascii(*s) &&
1106 (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
1116 if (reparse && reparse == s - 1)
1130 if (*d == '^' && !isspace(*s))
1136 scanconst(string,len)
1140 register STR *retstr;
1145 if (index(string,'|')) {
1148 retstr = Str_new(86,len);
1149 str_nset(retstr,string,len);
1150 t = str_get(retstr);
1152 retstr->str_u.str_useful = 100;
1153 for (d=t; d < e; ) {
1161 case '.': case '[': case '$': case '(': case ')': case '|': case '+':
1165 if (d[1] && index("wWbB0123456789sSdD",d[1])) {
1169 (void)bcopy(d+1,d,e-d);
1188 if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
1200 retstr->str_cur = d - t;
1208 register SPAT *spat;
1214 Newz(801,spat,1,SPAT);
1215 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1216 curstash->tbl_spatroot = spat;
1225 spat->spat_flags |= SPAT_ONCE;
1228 fatal("panic: scanpat");
1230 s = cpytill(tokenbuf,s,bufend,s[-1],&len);
1232 yyerror("Search pattern not terminated");
1233 yylval.arg = Nullarg;
1237 while (*s == 'i' || *s == 'o') {
1241 spat->spat_flags |= SPAT_FOLD;
1245 spat->spat_flags |= SPAT_KEEP;
1249 for (d=tokenbuf; d < e; d++) {
1250 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1251 (*d == '@' && d[-1] != '\\')) {
1254 spat->spat_runtime = arg = op_new(1);
1255 arg->arg_type = O_ITEM;
1256 arg[1].arg_type = A_DOUBLE;
1257 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1258 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1259 d = scanreg(d,bufend,buf);
1260 (void)stabent(buf,TRUE); /* make sure it's created */
1261 for (; d < e; d++) {
1262 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1263 d = scanreg(d,bufend,buf);
1264 (void)stabent(buf,TRUE);
1266 else if (*d == '@' && d[-1] != '\\') {
1267 d = scanreg(d,bufend,buf);
1268 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1269 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1270 (void)stabent(buf,TRUE);
1273 goto got_pat; /* skip compiling for now */
1276 if (spat->spat_flags & SPAT_FOLD)
1280 (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
1282 if (*tokenbuf == '^') {
1283 spat->spat_short = scanconst(tokenbuf+1,len-1);
1284 if (spat->spat_short) {
1285 spat->spat_slen = spat->spat_short->str_cur;
1286 if (spat->spat_slen == len - 1)
1287 spat->spat_flags |= SPAT_ALL;
1291 spat->spat_flags |= SPAT_SCANFIRST;
1292 spat->spat_short = scanconst(tokenbuf,len);
1293 if (spat->spat_short) {
1294 spat->spat_slen = spat->spat_short->str_cur;
1295 if (spat->spat_slen == len)
1296 spat->spat_flags |= SPAT_ALL;
1299 if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
1300 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1301 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1302 spat->spat_flags & SPAT_FOLD,1);
1303 /* Note that this regexp can still be used if someone says
1304 * something like /a/ && s//b/; so we can't delete it.
1308 if (spat->spat_flags & SPAT_FOLD)
1312 (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
1314 if (spat->spat_short)
1315 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1316 spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
1317 spat->spat_flags & SPAT_FOLD,1);
1321 yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
1329 register SPAT *spat;
1334 Newz(802,spat,1,SPAT);
1335 spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
1336 curstash->tbl_spatroot = spat;
1338 s = cpytill(tokenbuf,s+1,bufend,*s,&len);
1340 yyerror("Substitution pattern not terminated");
1341 yylval.arg = Nullarg;
1345 for (d=tokenbuf; d < e; d++) {
1346 if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
1347 (*d == '@' && d[-1] != '\\')) {
1350 spat->spat_runtime = arg = op_new(1);
1351 arg->arg_type = O_ITEM;
1352 arg[1].arg_type = A_DOUBLE;
1353 arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
1354 arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
1355 d = scanreg(d,bufend,buf);
1356 (void)stabent(buf,TRUE); /* make sure it's created */
1358 if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
1359 d = scanreg(d,bufend,buf);
1360 (void)stabent(buf,TRUE);
1362 else if (*d == '@' && d[-1] != '\\') {
1363 d = scanreg(d,bufend,buf);
1364 if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
1365 strEQ(buf,"SIG") || strEQ(buf,"INC"))
1366 (void)stabent(buf,TRUE);
1369 goto get_repl; /* skip compiling for now */
1372 if (*tokenbuf == '^') {
1373 spat->spat_short = scanconst(tokenbuf+1,len-1);
1374 if (spat->spat_short)
1375 spat->spat_slen = spat->spat_short->str_cur;
1378 spat->spat_flags |= SPAT_SCANFIRST;
1379 spat->spat_short = scanconst(tokenbuf,len);
1380 if (spat->spat_short)
1381 spat->spat_slen = spat->spat_short->str_cur;
1383 d = nsavestr(tokenbuf,len);
1387 yyerror("Substitution replacement not terminated");
1388 yylval.arg = Nullarg;
1391 spat->spat_repl = yylval.arg;
1392 spat->spat_flags |= SPAT_ONCE;
1393 if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
1394 spat->spat_flags |= SPAT_CONST;
1395 else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
1399 spat->spat_flags |= SPAT_CONST;
1400 tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
1401 e = tmpstr->str_ptr + tmpstr->str_cur;
1402 for (t = tmpstr->str_ptr; t < e; t++) {
1403 if (*t == '$' && t[1] && index("`'&+0123456789",t[1]))
1404 spat->spat_flags &= ~SPAT_CONST;
1407 while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
1410 if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
1411 spat->spat_repl[1].arg_type = A_SINGLE;
1412 spat->spat_repl = fixeval(make_op(O_EVAL,2,
1416 spat->spat_flags &= ~SPAT_CONST;
1420 spat->spat_flags &= ~SPAT_ONCE;
1425 spat->spat_flags |= SPAT_FOLD;
1426 if (!(spat->spat_flags & SPAT_SCANFIRST)) {
1427 str_free(spat->spat_short); /* anchored opt doesn't do */
1428 spat->spat_short = Nullstr; /* case insensitive match */
1429 spat->spat_slen = 0;
1434 spat->spat_flags |= SPAT_KEEP;
1437 if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
1438 fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
1439 if (!spat->spat_runtime) {
1440 spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
1444 yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
1449 register SPAT *spat;
1451 if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
1452 if (spat->spat_short &&
1453 str_eq(spat->spat_short,spat->spat_regexp->regmust))
1455 if (spat->spat_flags & SPAT_SCANFIRST) {
1456 str_free(spat->spat_short);
1457 spat->spat_short = Nullstr;
1460 str_free(spat->spat_regexp->regmust);
1461 spat->spat_regexp->regmust = Nullstr;
1465 if (!spat->spat_short || /* promote the better string */
1466 ((spat->spat_flags & SPAT_SCANFIRST) &&
1467 (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
1468 str_free(spat->spat_short); /* ok if null */
1469 spat->spat_short = spat->spat_regexp->regmust;
1470 spat->spat_regexp->regmust = Nullstr;
1471 spat->spat_flags |= SPAT_SCANFIRST;
1477 expand_charset(s,len,retlen)
1483 register char *d = t;
1485 register char *send = s + len;
1488 if (s[1] == '-' && s+2 < send) {
1489 for (i = s[0]; i <= s[2]; i++)
1498 return nsavestr(t,d-t);
1506 l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
1514 Newz(803,tbl,256,char);
1515 arg[2].arg_type = A_NULL;
1516 arg[2].arg_ptr.arg_cval = tbl;
1519 yyerror("Translation pattern not terminated");
1520 yylval.arg = Nullarg;
1523 t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1524 yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
1525 free_arg(yylval.arg);
1528 yyerror("Translation replacement not terminated");
1529 yylval.arg = Nullarg;
1532 r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
1533 yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
1534 free_arg(yylval.arg);
1540 for (i = 0, j = 0; i < tlen; i++,j++) {
1543 tbl[t[i] & 0377] = r[j];
1558 register char *send;
1559 register bool makesingle = FALSE;
1560 register STAB *stab;
1561 bool alwaysdollar = FALSE;
1562 bool hereis = FALSE;
1564 char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
1569 arg->arg_type = O_ITEM;
1572 default: /* a substitution replacement */
1573 arg[1].arg_type = A_DOUBLE;
1574 makesingle = TRUE; /* maybe disable runtime scanning */
1584 arg[1].arg_type = A_SINGLE;
1589 else if (s[1] == '.')
1600 yyerror("Illegal octal digit");
1602 case '0': case '1': case '2': case '3': case '4':
1603 case '5': case '6': case '7':
1607 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1608 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1612 i += (*s++ & 7) + 9;
1617 (void)sprintf(tokenbuf,"%ld",i);
1618 arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
1619 (void)str_2num(arg[1].arg_ptr.arg_str);
1622 case '1': case '2': case '3': case '4': case '5':
1623 case '6': case '7': case '8': case '9': case '.':
1625 arg[1].arg_type = A_SINGLE;
1627 while (isdigit(*s) || *s == '_') {
1633 if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
1635 while (isdigit(*s) || *s == '_') {
1642 if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
1644 if (*s == '+' || *s == '-')
1650 arg[1].arg_ptr.arg_str = str_make(tokenbuf, d - tokenbuf);
1651 (void)str_2num(arg[1].arg_ptr.arg_str);
1659 if (*++s && index("`'\"",*s)) {
1661 s = cpytill(d,s,bufend,term,&len);
1671 while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
1673 } /* assuming tokenbuf won't clobber */
1678 if (rsfp || !(d=ninstr(s,bufend,d,d+1)))
1679 herewas = str_make(s,bufend-s);
1681 s--, herewas = str_make(s,d-s);
1682 s += herewas->str_cur;
1690 s = cpytill(d,s,bufend,'>',&len);
1695 (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
1697 if (d - tokenbuf != len) {
1699 arg[1].arg_type = A_GLOB;
1700 d = nsavestr(d,len);
1701 arg[1].arg_ptr.arg_stab = stab = genstab();
1702 stab_io(stab) = stio_new();
1703 stab_val(stab) = str_make(d,len);
1704 stab_val(stab)->str_u.str_hash = curstash;
1711 (void)strcpy(d,"ARGV");
1713 arg[1].arg_type = A_INDREAD;
1714 arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
1717 arg[1].arg_type = A_READ;
1718 if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
1719 yyerror("Can't get both program and data from <STDIN>");
1720 arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
1721 if (!stab_io(arg[1].arg_ptr.arg_stab))
1722 stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
1723 if (strEQ(d,"ARGV")) {
1724 (void)aadd(arg[1].arg_ptr.arg_stab);
1725 stab_io(arg[1].arg_ptr.arg_stab)->flags |=
1742 arg[1].arg_type = A_SINGLE;
1749 arg[1].arg_type = A_DOUBLE;
1750 makesingle = TRUE; /* maybe disable runtime scanning */
1751 alwaysdollar = TRUE; /* treat $) and $| as variables */
1756 arg[1].arg_type = A_BACKTICK;
1758 alwaysdollar = TRUE; /* treat $) and $| as variables */
1766 multi_open = multi_close = '<';
1769 if (tmps = index("([{< )]}> )]}>",term))
1773 tmpstr = Str_new(87,0);
1778 while (s < bufend &&
1779 (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
1785 fatal("EOF in string");
1787 str_nset(tmpstr,d+1,s-d);
1789 str_ncat(herewas,s,bufend-s);
1790 str_replace(linestr,herewas);
1791 oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
1792 bufend = linestr->str_ptr + linestr->str_cur;
1797 s = str_append_till(tmpstr,s+1,bufend,term,leave);
1798 while (s >= bufend) { /* multiple line string? */
1800 !(oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0))) {
1802 fatal("EOF in string");
1806 STR *str = Str_new(88,0);
1808 str_sset(str,linestr);
1809 astore(lineary,(int)line,str);
1811 bufend = linestr->str_ptr + linestr->str_cur;
1813 if (*s == term && bcmp(s,tokenbuf,len) == 0) {
1816 str_scat(linestr,herewas);
1817 bufend = linestr->str_ptr + linestr->str_cur;
1821 str_scat(tmpstr,linestr);
1825 s = str_append_till(tmpstr,s,bufend,term,leave);
1829 if (tmpstr->str_cur + 5 < tmpstr->str_len) {
1830 tmpstr->str_len = tmpstr->str_cur + 1;
1831 Renew(tmpstr->str_ptr, tmpstr->str_len, char);
1833 if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
1834 arg[1].arg_ptr.arg_str = tmpstr;
1838 s = tmpstr->str_ptr;
1839 send = s + tmpstr->str_cur;
1840 while (s < send) { /* see if we can make SINGLE */
1841 if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
1843 *s = '$'; /* grandfather \digit in subst */
1844 if ((*s == '$' || *s == '@') && s+1 < send &&
1845 (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
1846 makesingle = FALSE; /* force interpretation */
1848 else if (*s == '\\' && s+1 < send) {
1853 s = d = tmpstr->str_ptr; /* assuming shrinkage only */
1855 if ((*s == '$' && s+1 < send &&
1856 (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
1857 (*s == '@' && s+1 < send) ) {
1858 len = scanreg(s,bufend,tokenbuf) - s;
1859 if (*s == '$' || strEQ(tokenbuf,"ARGV")
1860 || strEQ(tokenbuf,"ENV")
1861 || strEQ(tokenbuf,"SIG")
1862 || strEQ(tokenbuf,"INC") )
1863 (void)stabent(tokenbuf,TRUE); /* make sure it exists */
1868 else if (*s == '\\' && s+1 < send) {
1872 if (!makesingle && (!leave || (*s && index(leave,*s))))
1876 case '0': case '1': case '2': case '3':
1877 case '4': case '5': case '6': case '7':
1879 if (s < send && *s && index("01234567",*s)) {
1883 if (s < send && *s && index("01234567",*s)) {
1912 if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
1913 arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
1915 tmpstr->str_u.str_hash = curstash; /* so interp knows package */
1917 tmpstr->str_cur = d - tmpstr->str_ptr;
1918 arg[1].arg_ptr.arg_str = tmpstr;
1933 register FCMD *fprev = &froot;
1934 register FCMD *fcmd;
1941 Zero(&froot, 1, FCMD);
1942 while ((s = str_gets(linestr,rsfp, 0)) != Nullch) {
1945 STR *tmpstr = Str_new(89,0);
1947 str_sset(tmpstr,linestr);
1948 astore(lineary,(int)line,tmpstr);
1950 bufend = linestr->str_ptr + linestr->str_cur;
1951 if (strEQ(s,".\n")) {
1953 return froot.f_next;
1957 flinebeg = Nullfcmd;
1960 while (s < bufend) {
1961 Newz(804,fcmd,1,FCMD);
1962 fprev->f_next = fcmd;
1964 for (t=s; t < bufend && *t != '@' && *t != '^'; t++) {
1974 fcmd->f_pre = nsavestr(s, t-s);
1975 fcmd->f_presize = t-s;
1979 fcmd->f_flags |= FC_NOBLANK;
1981 fcmd->f_flags |= FC_REPEAT;
1985 flinebeg = fcmd; /* start values here */
1987 fcmd->f_flags |= FC_CHOP; /* for doing text filling */
1990 fcmd->f_type = F_LINES;
1994 fcmd->f_type = F_LEFT;
1999 fcmd->f_type = F_RIGHT;
2004 fcmd->f_type = F_CENTER;
2009 fcmd->f_type = F_LEFT;
2012 if (fcmd->f_flags & FC_CHOP && *s == '.') {
2013 fcmd->f_flags |= FC_MORE;
2021 if ((s = str_gets(linestr, rsfp, 0)) == Nullch)
2025 STR *tmpstr = Str_new(90,0);
2027 str_sset(tmpstr,linestr);
2028 astore(lineary,(int)line,tmpstr);
2030 if (strEQ(s,".\n")) {
2032 yyerror("Missing values line");
2033 return froot.f_next;
2037 bufend = linestr->str_ptr + linestr->str_cur;
2038 str = flinebeg->f_unparsed = Str_new(91,bufend - bufptr);
2039 str->str_u.str_hash = curstash;
2040 str_nset(str,"(",1);
2041 flinebeg->f_line = line;
2042 if (!flinebeg->f_next->f_type || index(linestr->str_ptr, ',')) {
2043 str_scat(str,linestr);
2044 str_ncat(str,",$$);",5);
2047 while (s < bufend && isspace(*s))
2050 while (s < bufend) {
2052 case ' ': case '\t': case '\n': case ';':
2053 str_ncat(str, t, s - t);
2054 str_ncat(str, "," ,1);
2055 while (s < bufend && (isspace(*s) || *s == ';'))
2060 str_ncat(str, t, s - t);
2062 s = scanreg(s,bufend,tokenbuf);
2063 str_ncat(str, t, s - t);
2065 if (s < bufend && *s && index("$'\"",*s))
2066 str_ncat(str, ",", 1);
2068 case '"': case '\'':
2069 str_ncat(str, t, s - t);
2072 while (s < bufend && (*s != *t || s[-1] == '\\'))
2076 str_ncat(str, t, s - t);
2078 if (s < bufend && *s && index("$'\"",*s))
2079 str_ncat(str, ",", 1);
2082 yyerror("Please use commas to separate fields");
2085 str_ncat(str,"$$);",4);
2090 bufptr = str_get(linestr);
2091 yyerror("Format not terminated");
2092 return froot.f_next;
2098 if (stat("/bin/csh",&statbuf) < 0)