X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ff68c7194e176ca1907544a3a65684b76834d0fe..324aa91a3e6fd44523a86df7c5575563c5adf45c:/toke.c diff --git a/toke.c b/toke.c index 7dd35cb..9c4f487 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,6 @@ /* toke.c * - * Copyright (c) 1991-1994, Larry Wall + * Copyright (c) 1991-1997, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -22,13 +22,15 @@ static SV *q _((SV *sv)); static char *scan_const _((char *start)); static char *scan_formline _((char *s)); static char *scan_heredoc _((char *s)); -static char *scan_ident _((char *s, char *send, char *dest, I32 ck_uni)); +static char *scan_ident _((char *s, char *send, char *dest, STRLEN destlen, + I32 ck_uni)); static char *scan_inputsymbol _((char *start)); static char *scan_pat _((char *start)); static char *scan_str _((char *start)); static char *scan_subst _((char *start)); static char *scan_trans _((char *start)); -static char *scan_word _((char *s, char *dest, int allow_package, STRLEN *slp)); +static char *scan_word _((char *s, char *dest, STRLEN destlen, + int allow_package, STRLEN *slp)); static char *skipspace _((char *s)); static void checkcomma _((char *s, char *name, char *what)); static void force_ident _((char *s, int kind)); @@ -48,6 +50,8 @@ static int uni _((I32 f, char *s)); static char * filter_gets _((SV *sv, PerlIO *fp, STRLEN append)); static void restore_rsfp _((void *f)); +static char ident_too_long[] = "Identifier too long"; + static char *linestart; /* beg. of most recently read line */ static char pending_ident; /* pending identifier lookup */ @@ -157,12 +161,11 @@ no_op(what, s) char *what; char *s; { - char tmpbuf[128]; char *oldbp = bufptr; bool is_first = (oldbufptr == linestart); + bufptr = s; - sprintf(tmpbuf, "%s found where operator expected", what); - yywarn(tmpbuf); + yywarn(form("%s found where operator expected", what)); if (is_first) warn("\t(Missing semicolon on previous line?)\n"); else if (oldoldbufptr && isIDFIRST(*oldoldbufptr)) { @@ -286,6 +289,7 @@ SV *line; void lex_end() { + doextract = FALSE; } static void @@ -444,10 +448,15 @@ char *s; #define LOP(f,x) return lop(f,x,s) static I32 -lop(f,x,s) +lop +#ifdef CAN_PROTOTYPE + (I32 f, expectation x, char *s) +#else + (f,x,s) I32 f; expectation x; char *s; +#endif /* CAN_PROTOTYPE */ { yylval.ival = f; CLINE; @@ -496,7 +505,7 @@ int allow_tick; (allow_pack && *s == ':') || (allow_tick && *s == '\'') ) { - s = scan_word(s, tokenbuf, allow_pack, &len); + s = scan_word(s, tokenbuf, sizeof tokenbuf, allow_pack, &len); if (check_keyword && keyword(tokenbuf, len)) return start; if (token == METHOD) { @@ -613,7 +622,11 @@ sublex_start() return THING; } if (op_type == OP_CONST || op_type == OP_READLINE) { - yylval.opval = (OP*)newSVOP(op_type, 0, q(lex_stuff)); + SV *sv = q(lex_stuff); + STRLEN len; + char *p = SvPV(sv, len); + yylval.opval = (OP*)newSVOP(op_type, 0, newSVpv(p, len)); + SvREFCNT_dec(sv); lex_stuff = Nullsv; return THING; } @@ -910,7 +923,7 @@ register char *s; char seen[256]; unsigned char un_char = 0, last_un_char; char *send = strchr(s,']'); - char tmpbuf[512]; + char tmpbuf[sizeof tokenbuf * 4]; if (!send) /* has to be an expression */ return TRUE; @@ -935,7 +948,7 @@ register char *s; case '$': weight -= seen[un_char] * 10; if (isALNUM(s[1])) { - scan_ident(s,send,tmpbuf,FALSE); + scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV)) weight -= 100; else @@ -1005,17 +1018,17 @@ char *start; GV *gv; { char *s = start + (*start == '$'); - char tmpbuf[1024]; + char tmpbuf[sizeof tokenbuf]; STRLEN len; GV* indirgv; if (gv) { if (GvIO(gv)) return 0; - if (!GvCV(gv)) + if (!GvCVu(gv)) gv = 0; } - s = scan_word(s, tmpbuf, TRUE, &len); + s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); if (*start == '$') { if (gv || last_lop_op == OP_PRINT || isUPPER(*tokenbuf)) return 0; @@ -1026,7 +1039,7 @@ GV *gv; } if (!keyword(tmpbuf, len)) { indirgv = gv_fetchpv(tmpbuf,FALSE, SVt_PVCV); - if (indirgv && GvCV(indirgv)) + if (indirgv && GvCVu(indirgv)) return 0; /* filehandle or package name makes it a method */ if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) { @@ -1095,7 +1108,7 @@ filter_add(funcp, datasv) die("Can't upgrade filter_add data to SVt_PVIO"); IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */ if (filter_debug) - warn("filter_add func %lx (%s)", funcp, SvPV(datasv,na)); + warn("filter_add func %p (%s)", funcp, SvPV(datasv,na)); av_unshift(rsfp_filters, 1); av_store(rsfp_filters, 0, datasv) ; return(datasv); @@ -1108,7 +1121,7 @@ filter_del(funcp) filter_t funcp; { if (filter_debug) - warn("filter_del func %lx", funcp); + warn("filter_del func %p", funcp); if (!rsfp_filters || AvFILL(rsfp_filters)<0) return; /* if filter is on top of stack (usual case) just pop it off */ @@ -1174,7 +1187,7 @@ filter_read(idx, buf_sv, maxlen) /* Get function pointer hidden within datasv */ funcp = (filter_t)IoDIRP(datasv); if (filter_debug) - warn("filter_read %d: via function %lx (%s)\n", + warn("filter_read %d: via function %p (%s)\n", idx, funcp, SvPV(datasv,na)); /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ @@ -1208,7 +1221,7 @@ STRLEN append; { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "TERMBLOCK" }; #endif -extern int yychar; /* last token */ +EXT int yychar; /* last token */ int yylex() @@ -1255,11 +1268,9 @@ yylex() /* Force them to make up their mind on "@foo". */ if (pit == '@' && lex_state != LEX_NORMAL && !lex_brackets) { GV *gv = gv_fetchpv(tokenbuf+1, FALSE, SVt_PVAV); - if (!gv || (tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)) { - char tmpbuf[1024]; - sprintf(tmpbuf, "Literal %s now requires backslash", tokenbuf); - yyerror(tmpbuf); - } + if (!gv || ((tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) + yyerror(form("In string, %s now must be written as \\%s", + tokenbuf, tokenbuf)); } yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf+1, 0)); @@ -1378,9 +1389,7 @@ yylex() s = bufptr; Aop(OP_CONCAT); } - else - return yylex(); - break; + return yylex(); case LEX_INTERPENDMAYBE: if (intuit_more(bufptr)) { @@ -1450,8 +1459,7 @@ yylex() retry: switch (*s) { default: - warn("Unrecognized character \\%03o ignored", *s++ & 255); - goto retry; + croak("Unrecognized character \\%03o", *s & 255); case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -1486,16 +1494,28 @@ yylex() sv_catpv(linestr, "LINE: while (<>) {"); if (minus_l) sv_catpv(linestr,"chomp;"); - if (minus_a){ - if (minus_F){ - char tmpbuf1[50]; - if ( splitstr[0] == '/' || - splitstr[0] == '\'' || - splitstr[0] == '"' ) - sprintf( tmpbuf1, "@F=split(%s);", splitstr ); - else - sprintf( tmpbuf1, "@F=split('%s');", splitstr ); - sv_catpv(linestr,tmpbuf1); + if (minus_a) { + GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV); + if (gv) + GvIMPORTED_AV_on(gv); + if (minus_F) { + if (strchr("/'\"", *splitstr) + && strchr(splitstr + 1, *splitstr)) + sv_catpvf(linestr, "@F=split(%s);", splitstr); + else { + char delim; + s = "'~#\200\1'"; /* surely one char is unused...*/ + while (s[1] && strchr(splitstr, *s)) s++; + delim = *s; + sv_catpvf(linestr, "@F=split(%s%c", + "q" + (delim == '\''), delim); + for (s = splitstr; *s; s++) { + if (*s == '\\') + sv_catpvn(linestr, "\\", 1); + sv_catpvn(linestr, s, 1); + } + sv_catpvf(linestr, "%c);", delim); + } } else sv_catpv(linestr,"@F=split(' ');"); @@ -1565,25 +1585,84 @@ yylex() s++; if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ s++; - if (!in_eval && *s == '#' && s[1] == '!') { + d = Nullch; + if (!in_eval) { + if (*s == '#' && *(s+1) == '!') + d = s + 2; +#ifdef ALTERNATE_SHEBANG + else { + static char as[] = ALTERNATE_SHEBANG; + if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) + d = s + (sizeof(as) - 1); + } +#endif /* ALTERNATE_SHEBANG */ + } + if (d) { + char *ipath; + char *ipathend; + + while (isSPACE(*d)) + d++; + ipath = d; + while (*d && !isSPACE(*d)) + d++; + ipathend = d; + +#ifdef ARG_ZERO_IS_SCRIPT + if (ipathend > ipath) { + /* + * HP-UX (at least) sets argv[0] to the script name, + * which makes $^X incorrect. And Digital UNIX and Linux, + * at least, set argv[0] to the basename of the Perl + * interpreter. So, having found "#!", we'll set it right. + */ + SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); + assert(SvPOK(x) || SvGMAGICAL(x)); + if (sv_eq(x, GvSV(curcop->cop_filegv))) { + sv_setpvn(x, ipath, ipathend - ipath); + SvSETMAGIC(x); + } + TAINT_NOT; /* $^X is always tainted, but that's OK */ + } +#endif /* ARG_ZERO_IS_SCRIPT */ + + /* + * Look for options. + */ d = instr(s,"perl -"); if (!d) d = instr(s,"perl"); +#ifdef ALTERNATE_SHEBANG + /* + * If the ALTERNATE_SHEBANG on this system starts with a + * character that can be part of a Perl expression, then if + * we see it but not "perl", we're probably looking at the + * start of Perl code, not a request to hand off to some + * other interpreter. Similarly, if "perl" is there, but + * not in the first 'word' of the line, we assume the line + * contains the start of the Perl program. + */ + if (d && *s != '#') { + char *c = ipath; + while (*c && !strchr("; \t\r\n\f\v#", *c)) + c++; + if (c < d) + d = Nullch; /* "perl" not in first word; ignore */ + else + *s = '#'; /* Don't try to parse shebang line */ + } +#endif /* ALTERNATE_SHEBANG */ if (!d && + *s == '#' && + ipathend > ipath && !minus_c && !instr(s,"indir") && instr(origargv[0],"perl")) { char **newargv; - char *cmd; - s += 2; - if (*s == ' ') - s++; - cmd = s; - while (s < bufend && !isSPACE(*s)) - s++; - *s++ = '\0'; + *ipathend = '\0'; + s = ipathend + 1; while (s < bufend && isSPACE(*s)) s++; if (s < bufend) { @@ -1596,20 +1675,28 @@ yylex() } else newargv = origargv; - newargv[0] = cmd; - execv(cmd,newargv); - croak("Can't exec %s", cmd); + newargv[0] = ipath; + execv(ipath, newargv); + croak("Can't exec %s", ipath); } if (d) { - int oldpdb = perldb; - int oldn = minus_n; - int oldp = minus_p; + U32 oldpdb = perldb; + bool oldn = minus_n; + bool oldp = minus_p; while (*d && !isSPACE(*d)) d++; while (*d == ' ' || *d == '\t') d++; if (*d++ == '-') { - while (d = moreswitches(d)) ; + do { + if (*d == 'M' || *d == 'm') { + char *m = d; + while (*d && !isSPACE(*d)) d++; + croak("Too late for \"-%.*s\" option", + (int)(d - m), m); + } + d = moreswitches(d); + } while (d); if (perldb && !oldpdb || ( minus_n || minus_p ) && !(oldn || oldp) ) /* if we have already added "LINE: while (<>) {", @@ -1633,7 +1720,11 @@ yylex() return yylex(); } goto retry; - case ' ': case '\t': case '\f': case '\r': case 013: + case '\r': + warn("Illegal character \\%03o (carriage return)", '\r'); + croak( + "(Maybe you didn't strip carriage returns after a network transfer?)\n"); + case ' ': case '\t': case '\f': case 013: s++; goto retry; case '#': @@ -1668,7 +1759,7 @@ yylex() if (strnEQ(s,"=>",2)) { if (dowarn) warn("Ambiguous use of -%c => resolved to \"-%c\" =>", - tmp, tmp); + (int)tmp, (int)tmp); s = force_word(bufptr,WORD,FALSE,FALSE,FALSE); OPERATOR('-'); /* unary minus */ } @@ -1703,7 +1794,7 @@ yylex() case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME); case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME); default: - croak("Unrecognized file test: -%c", tmp); + croak("Unrecognized file test: -%c", (int)tmp); break; } } @@ -1754,7 +1845,7 @@ yylex() case '*': if (expect != XOPERATOR) { - s = scan_ident(s, bufend, tokenbuf, TRUE); + s = scan_ident(s, bufend, tokenbuf, sizeof tokenbuf, TRUE); expect = XOPERATOR; force_ident(tokenbuf, '*'); if (!*tokenbuf) @@ -1774,7 +1865,7 @@ yylex() Mop(OP_MODULO); } tokenbuf[0] = '%'; - s = scan_ident(s, bufend, tokenbuf+1, TRUE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, TRUE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final % should be \\% or %name"); @@ -1852,21 +1943,33 @@ yylex() else lex_brackstack[lex_brackets++] = XOPERATOR; OPERATOR(HASHBRACK); - break; case XOPERATOR: while (s < bufend && (*s == ' ' || *s == '\t')) s++; - if (s < bufend && isIDFIRST(*s)) { - d = scan_word(s, tokenbuf, FALSE, &len); + d = s; + tokenbuf[0] = '\0'; + if (d < bufend && *d == '-') { + tokenbuf[0] = '-'; + d++; + while (d < bufend && (*d == ' ' || *d == '\t')) + d++; + } + if (d < bufend && isIDFIRST(*d)) { + d = scan_word(d, tokenbuf + 1, sizeof tokenbuf - 1, + FALSE, &len); while (d < bufend && (*d == ' ' || *d == '\t')) d++; if (*d == '}') { + char minus = (tokenbuf[0] == '-'); if (dowarn && - (keyword(tokenbuf, len) || - perl_get_cv(tokenbuf, FALSE) )) + (keyword(tokenbuf + 1, len) || + (minus && len == 1 && isALPHA(tokenbuf[1])) || + perl_get_cv(tokenbuf + 1, FALSE) )) warn("Ambiguous use of {%s} resolved to {\"%s\"}", - tokenbuf, tokenbuf); - s = force_word(s,WORD,FALSE,TRUE,FALSE); + tokenbuf + !minus, tokenbuf + !minus); + s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); + if (minus) + force_next('-'); } } /* FALL THROUGH */ @@ -1930,7 +2033,9 @@ yylex() bufptr = s; return yylex(); /* ignore fake brackets */ } - if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>')) + if (*s == '-' && s[1] == '>') + lex_state = LEX_INTERPENDMAYBE; + else if (*s != '[' && *s != '{') lex_state = LEX_INTERPEND; } } @@ -1956,7 +2061,7 @@ yylex() BAop(OP_BIT_AND); } - s = scan_ident(s-1, bufend, tokenbuf, TRUE); + s = scan_ident(s - 1, bufend, tokenbuf, sizeof tokenbuf, TRUE); if (*tokenbuf) { expect = XOPERATOR; force_ident(tokenbuf, '&'); @@ -1983,7 +2088,7 @@ yylex() if (tmp == '~') PMop(OP_MATCH); if (dowarn && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp)) - warn("Reversed %c= operator",tmp); + warn("Reversed %c= operator",(int)tmp); s--; if (expect == XSTATE && isALPHA(tmp) && (s == linestart+1 || s[-2] == '\n') ) @@ -2078,7 +2183,8 @@ yylex() if (expect == XOPERATOR) no_op("Array length", bufptr); tokenbuf[0] = '@'; - s = scan_ident(s+1, bufend, tokenbuf+1, FALSE); + s = scan_ident(s + 1, bufend, tokenbuf + 1, sizeof tokenbuf - 1, + FALSE); if (!tokenbuf[1]) PREREF(DOLSHARP); expect = XOPERATOR; @@ -2089,7 +2195,7 @@ yylex() if (expect == XOPERATOR) no_op("Scalar", bufptr); tokenbuf[0] = '$'; - s = scan_ident(s, bufend, tokenbuf+1, FALSE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, FALSE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final $ should be \\$ or $name"); @@ -2130,11 +2236,11 @@ yylex() if (dowarn && strEQ(tokenbuf+1, "SIG") && (t = strchr(s, '}')) && (t = strchr(t, '='))) { - char tmpbuf[1024]; + char tmpbuf[sizeof tokenbuf]; STRLEN len; for (t++; isSPACE(*t); t++) ; if (isIDFIRST(*t)) { - t = scan_word(t, tmpbuf, TRUE, &len); + t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); if (*t != '(' && perl_get_cv(tmpbuf, FALSE)) warn("You need to quote \"%s\"", tmpbuf); } @@ -2151,6 +2257,17 @@ yylex() expect = XTERM; /* e.g. print $fh "foo" */ else if (strchr("&*<%", *s) && isIDFIRST(s[1])) expect = XTERM; /* e.g. print $fh &sub */ + else if (isIDFIRST(*s)) { + char tmpbuf[sizeof tokenbuf]; + scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + if (keyword(tmpbuf, len)) + expect = XTERM; /* e.g. print $fh length() */ + else { + GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); + if (gv && GvCVu(gv)) + expect = XTERM; /* e.g. print $fh subr() */ + } + } else if (isDIGIT(*s)) expect = XTERM; /* e.g. print $fh 3 */ else if (*s == '.' && isDIGIT(s[1])) @@ -2167,7 +2284,7 @@ yylex() if (expect == XOPERATOR) no_op("Array", s); tokenbuf[0] = '@'; - s = scan_ident(s, bufend, tokenbuf+1, FALSE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, FALSE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final @ should be \\@ or @name"); @@ -2333,17 +2450,35 @@ yylex() keylookup: bufptr = s; - s = scan_word(s, tokenbuf, FALSE, &len); - - if (*s == ':' && s[1] == ':' && strNE(tokenbuf, "CORE")) + s = scan_word(s, tokenbuf, sizeof tokenbuf, FALSE, &len); + + /* Some keywords can be followed by any delimiter, including ':' */ + tmp = (len == 1 && strchr("msyq", tokenbuf[0]) || + len == 2 && ((tokenbuf[0] == 't' && tokenbuf[1] == 'r') || + (tokenbuf[0] == 'q' && + strchr("qwx", tokenbuf[1])))); + + /* x::* is just a word, unless x is "CORE" */ + if (!tmp && *s == ':' && s[1] == ':' && strNE(tokenbuf, "CORE")) goto just_a_word; + d = s; + while (d < bufend && isSPACE(*d)) + d++; /* no comments skipped here, or s### is misparsed */ + + /* Is this a label? */ + if (!tmp && expect == XSTATE + && d < bufend && *d == ':' && *(d + 1) != ':') { + s = d + 1; + yylval.pval = savepv(tokenbuf); + CLINE; + TOKEN(LABEL); + } + + /* Check for keywords */ tmp = keyword(tokenbuf, len); /* Is this a word before a => operator? */ - d = s; - while (d < bufend && (*d == ' ' || *d == '\t')) - d++; /* no comments skipped here, or s### is misparsed */ if (strnEQ(d,"=>",2)) { CLINE; if (dowarn && (tmp || perl_get_cv(tokenbuf, FALSE))) @@ -2373,35 +2508,26 @@ yylex() default: /* not a keyword */ just_a_word: { GV *gv; + SV *sv; char lastchar = (bufptr == oldoldbufptr ? 0 : bufptr[-1]); /* Get the rest if it looks like a package qualifier */ if (*s == '\'' || *s == ':' && s[1] == ':') { - s = scan_word(s, tokenbuf + len, TRUE, &len); + s = scan_word(s, tokenbuf + len, sizeof tokenbuf - len, + TRUE, &len); if (!len) croak("Bad name after %s::", tokenbuf); } - /* Do special processing at start of statement. */ - - if (expect == XSTATE) { - while (isSPACE(*s)) s++; - if (*s == ':') { /* It's a label. */ - yylval.pval = savepv(tokenbuf); - s++; - CLINE; - TOKEN(LABEL); - } - } - else if (expect == XOPERATOR) { + if (expect == XOPERATOR) { if (bufptr == linestart) { curcop->cop_line--; warn(warn_nosemi); curcop->cop_line++; } else - no_op("Bare word",s); + no_op("Bareword",s); } /* Look for a subroutine with this name in current package. */ @@ -2437,7 +2563,7 @@ yylex() /* (But it's an indir obj regardless for sort.) */ if ((last_lop_op == OP_SORT || - (!immediate_paren && (!gv || !GvCV(gv))) ) && + (!immediate_paren && (!gv || !GvCVu(gv))) ) && (last_lop_op != OP_MAPSTART && last_lop_op != OP_GREPSTART)){ expect = (last_lop == oldoldbufptr) ? XTERM : XOPERATOR; goto bareword; @@ -2450,6 +2576,13 @@ yylex() s = skipspace(s); if (*s == '(') { CLINE; + if (gv && GvCVu(gv)) { + for (d = s + 1; *d == ' ' || *d == '\t'; d++) ; + if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) { + s = d + 1; + goto its_constant; + } + } nextval[nexttoke].opval = yylval.opval; expect = XOPERATOR; force_next(WORD); @@ -2459,7 +2592,7 @@ yylex() /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && (!gv || !GvCV(gv))) { + if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) { last_lop = oldbufptr; last_lop_op = OP_METHOD; PREBLOCK(METHOD); @@ -2472,29 +2605,21 @@ yylex() /* Not a method, so call it a subroutine (if defined) */ - if (gv && GvCV(gv)) { - CV* cv = GvCV(gv); - if (*s == '(') { - nextval[nexttoke].opval = yylval.opval; - expect = XTERM; - force_next(WORD); - yylval.ival = 0; - TOKEN('&'); - } + if (gv && GvCVu(gv)) { + CV* cv; if (lastchar == '-') warn("Ambiguous use of -%s resolved as -&%s()", tokenbuf, tokenbuf); last_lop = oldbufptr; last_lop_op = OP_ENTERSUB; /* Check for a constant sub */ - { - SV *sv = cv_const_sv(cv); - if (sv) { - SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv); - ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv); - yylval.opval->op_private = 0; - TOKEN(WORD); - } + cv = GvCV(gv); + if ((sv = cv_const_sv(cv))) { + its_constant: + SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv); + ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv); + yylval.opval->op_private = 0; + TOKEN(WORD); } /* Resolve to GV now. */ @@ -2552,15 +2677,22 @@ yylex() TOKEN(WORD); } + case KEY___FILE__: + yylval.opval = (OP*)newSVOP(OP_CONST, 0, + newSVsv(GvSV(curcop->cop_filegv))); + TERM(THING); + case KEY___LINE__: - case KEY___FILE__: { - if (tokenbuf[2] == 'L') - (void)sprintf(tokenbuf,"%ld",(long)curcop->cop_line); - else - strcpy(tokenbuf, SvPVX(GvSV(curcop->cop_filegv))); - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf,0)); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, + newSVpvf("%ld", (long)curcop->cop_line)); + TERM(THING); + + case KEY___PACKAGE__: + yylval.opval = (OP*)newSVOP(OP_CONST, 0, + (curstash + ? newSVsv(curstname) + : &sv_undef)); TERM(THING); - } case KEY___DATA__: case KEY___END__: { @@ -2568,12 +2700,10 @@ yylex() /*SUPPRESS 560*/ if (rsfp && (!in_eval || tokenbuf[2] == 'D')) { - char dname[256]; char *pname = "main"; if (tokenbuf[2] == 'D') pname = HvNAME(curstash ? curstash : defstash); - sprintf(dname,"%s::DATA", pname); - gv = gv_fetchpv(dname,TRUE, SVt_PVIO); + gv = gv_fetchpv(form("%s::DATA", pname), TRUE, SVt_PVIO); GvMULTI_on(gv); if (!GvIO(gv)) GvIOp(gv) = newIO(); @@ -2611,7 +2741,7 @@ yylex() if (*s == ':' && s[1] == ':') { s += 2; d = s; - s = scan_word(s, tokenbuf, FALSE, &len); + s = scan_word(s, tokenbuf, sizeof tokenbuf, FALSE, &len); tmp = keyword(tokenbuf, len); if (tmp < 0) tmp = -tmp; @@ -3316,9 +3446,9 @@ yylex() s = skipspace(s); if (isIDFIRST(*s) || *s == '\'' || *s == ':') { - char tmpbuf[128]; + char tmpbuf[sizeof tokenbuf]; expect = XBLOCK; - d = scan_word(s, tmpbuf, TRUE, &len); + d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); if (strchr(tmpbuf, ':')) sv_setpv(subname, tmpbuf); else { @@ -3343,6 +3473,8 @@ yylex() /* Look for a prototype */ if (*s == '(') { + char *p; + s = scan_str(s); if (!s) { if (lex_stuff) @@ -3350,6 +3482,16 @@ yylex() lex_stuff = Nullsv; croak("Prototype not terminated"); } + /* strip spaces */ + d = SvPVX(lex_stuff); + tmp = 0; + for (p = d; *p; ++p) { + if (!isSPACE(*p)) + d[tmp++] = *p; + } + d[tmp] = '\0'; + SvCUR(lex_stuff) = tmp; + nexttoke++; nextval[1] = nextval[0]; nexttype[1] = nexttype[0]; @@ -3382,6 +3524,9 @@ yylex() case KEY_sysopen: LOP(OP_SYSOPEN,XTERM); + case KEY_sysseek: + LOP(OP_SYSSEEK,XTERM); + case KEY_sysread: LOP(OP_SYSREAD,XTERM); @@ -3524,8 +3669,9 @@ I32 len; switch (*d) { case '_': if (d[1] == '_') { - if (strEQ(d,"__LINE__")) return -KEY___LINE__; if (strEQ(d,"__FILE__")) return -KEY___FILE__; + if (strEQ(d,"__LINE__")) return -KEY___LINE__; + if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__; if (strEQ(d,"__DATA__")) return KEY___DATA__; if (strEQ(d,"__END__")) return KEY___END__; } @@ -4031,10 +4177,11 @@ I32 len; if (strEQ(d,"system")) return -KEY_system; break; case 7: - if (strEQ(d,"sysopen")) return -KEY_sysopen; - if (strEQ(d,"sysread")) return -KEY_sysread; if (strEQ(d,"symlink")) return -KEY_symlink; if (strEQ(d,"syscall")) return -KEY_syscall; + if (strEQ(d,"sysopen")) return -KEY_sysopen; + if (strEQ(d,"sysread")) return -KEY_sysread; + if (strEQ(d,"sysseek")) return -KEY_sysseek; break; case 8: if (strEQ(d,"syswrite")) return -KEY_syswrite; @@ -4174,14 +4321,18 @@ char *what; } static char * -scan_word(s, dest, allow_package, slp) +scan_word(s, dest, destlen, allow_package, slp) register char *s; char *dest; +STRLEN destlen; int allow_package; STRLEN *slp; { register char *d = dest; + register char *e = d + destlen - 3; /* two-character token, ending NUL */ for (;;) { + if (d >= e) + croak(ident_too_long); if (isALNUM(*s)) *d++ = *s++; else if (*s == '\'' && allow_package && isIDFIRST(s[1])) { @@ -4202,13 +4353,15 @@ STRLEN *slp; } static char * -scan_ident(s,send,dest,ck_uni) +scan_ident(s, send, dest, destlen, ck_uni) register char *s; register char *send; char *dest; +STRLEN destlen; I32 ck_uni; { register char *d; + register char *e; char *bracket = 0; char funny = *s++; @@ -4217,12 +4370,18 @@ I32 ck_uni; if (isSPACE(*s)) s = skipspace(s); d = dest; + e = d + destlen - 3; /* two-character token, ending NUL */ if (isDIGIT(*s)) { - while (isDIGIT(*s)) + while (isDIGIT(*s)) { + if (d >= e) + croak(ident_too_long); *d++ = *s++; + } } else { for (;;) { + if (d >= e) + croak(ident_too_long); if (isALNUM(*s)) *d++ = *s++; else if (*s == '\'' && isIDFIRST(s[1])) { @@ -4246,8 +4405,13 @@ I32 ck_uni; return s; } if (*s == '$' && s[1] && - (isALPHA(s[1]) || strchr("$_{", s[1]) || strnEQ(s+1,"::",2)) ) - return s; + (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) ) + { + if (isDIGIT(s[1]) && lex_state == LEX_INTERPNORMAL) + deprecate("\"$$\" to mean \"${$}\""); + else + return s; + } if (*s == '{') { bracket = s; s++; @@ -4263,8 +4427,13 @@ I32 ck_uni; } if (bracket) { if (isSPACE(s[-1])) { - while (s < send && (*s == ' ' || *s == '\t')) s++; - *d = *s; + while (s < send) { + char ch = *s++; + if (ch != ' ' && ch != '\t') { + *d = ch; + break; + } + } } if (isIDFIRST(*d)) { d++; @@ -4341,8 +4510,6 @@ char *start; pm = (PMOP*)newPMOP(OP_MATCH, 0); if (multi_open == '?') pm->op_pmflags |= PMf_ONCE; - if (hints & HINT_LOCALE) - pm->op_pmflags |= PMf_LOCALE; while (*s && strchr("iogmsx", *s)) pmflag(&pm->op_pmflags,*s++); pm->op_pmpermflags = pm->op_pmflags; @@ -4358,6 +4525,7 @@ char *start; { register char *s; register PMOP *pm; + I32 first_start; I32 es = 0; yylval.ival = OP_NULL; @@ -4374,6 +4542,7 @@ char *start; if (s[-1] == multi_open) s--; + first_start = multi_start; s = scan_str(s); if (!s) { if (lex_stuff) @@ -4384,6 +4553,7 @@ char *start; lex_repl = Nullsv; croak("Substitution replacement not terminated"); } + multi_start = first_start; /* so whole substitution is taken together */ pm = (PMOP*)newPMOP(OP_SUBST, 0); while (*s && strchr("iogmsex", *s)) { @@ -4441,9 +4611,11 @@ register PMOP *pm; return; } } - if (!pm->op_pmshort || /* promote the better string */ - ((pm->op_pmflags & PMf_SCANFIRST) && - (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)) )){ + /* promote the better string */ + if ((!pm->op_pmshort && + !(pm->op_pmregexp->reganch & ROPT_ANCH_GPOS)) || + ((pm->op_pmflags & PMf_SCANFIRST) && + (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)))) { SvREFCNT_dec(pm->op_pmshort); /* ok if null */ pm->op_pmshort = pm->op_pmregexp->regmust; pm->op_pmslen = SvCUR(pm->op_pmshort); @@ -4517,20 +4689,23 @@ register char *s; SV *tmpstr; char term; register char *d; + register char *e; char *peek; + int outer = (rsfp && !lex_inwhat); s += 2; d = tokenbuf; - if (!rsfp) + e = tokenbuf + sizeof tokenbuf - 1; + if (!outer) *d++ = '\n'; for (peek = s; *peek == ' ' || *peek == '\t'; peek++) ; if (*peek && strchr("`'\"",*peek)) { s = peek; term = *s++; - s = cpytill(d,s,bufend,term,&len); + s = delimcpy(d, e, s, bufend, term, &len); + d += len; if (s < bufend) s++; - d += len; } else { if (*s == '\\') @@ -4539,14 +4714,18 @@ register char *s; term = '"'; if (!isALNUM(*s)) deprecate("bare << to mean <<\"\""); - while (isALNUM(*s)) - *d++ = *s++; - } /* assuming tokenbuf won't clobber */ + for (; isALNUM(*s); s++) { + if (d < e) + *d++ = *s; + } + } + if (d >= tokenbuf + sizeof tokenbuf - 1) + croak("Delimiter for here document is too long"); *d++ = '\n'; *d = '\0'; len = d - tokenbuf; d = "\n"; - if (rsfp || !(d=ninstr(s,bufend,d,d+1))) + if (outer || !(d=ninstr(s,bufend,d,d+1))) herewas = newSVpv(s,bufend-s); else s--, herewas = newSVpv(s,d-s); @@ -4567,10 +4746,10 @@ register char *s; multi_start = curcop->cop_line; multi_open = multi_close = '<'; term = *tokenbuf; - if (!rsfp) { + if (!outer) { d = s; while (s < bufend && - (*s != term || memcmp(s,tokenbuf,len) != 0) ) { + (*s != term || memNE(s,tokenbuf,len)) ) { if (*s++ == '\n') curcop->cop_line++; } @@ -4588,7 +4767,7 @@ register char *s; else sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */ while (s >= bufend) { /* multiple line string? */ - if (!rsfp || + if (!outer || !(oldoldbufptr = oldbufptr = s = linestart = filter_gets(linestr, rsfp, 0))) { curcop->cop_line = multi_start; missingterm(tokenbuf); @@ -4603,7 +4782,7 @@ register char *s; (I32)curcop->cop_line,sv); } bufend = SvPVX(linestr) + SvCUR(linestr); - if (*s == term && memcmp(s,tokenbuf,len) == 0) { + if (*s == term && memEQ(s,tokenbuf,len)) { s = bufend - 1; *s = ' '; sv_catsv(linestr,herewas); @@ -4632,15 +4811,17 @@ char *start; { register char *s = start; register char *d; + register char *e; I32 len; d = tokenbuf; - s = cpytill(d, s+1, bufend, '>', &len); - if (s < bufend) - s++; - else + e = tokenbuf + sizeof tokenbuf; + s = delimcpy(d, e, s + 1, bufend, '>', &len); + if (len >= sizeof tokenbuf) + croak("Excessively long <> operator"); + if (s >= bufend) croak("Unterminated <> operator"); - + s++; if (*d == '$' && d[1]) d++; while (*d && (isALNUM(*d) || *d == '\'' || *d == ':')) d++; @@ -4783,11 +4964,13 @@ char *start; { register char *s = start; register char *d; - I32 tryi32; + register char *e; + I32 tryiv; double value; SV *sv; I32 floatit; char *lastub = 0; + static char number_too_long[] = "Number too long"; switch (*s) { default: @@ -4849,6 +5032,7 @@ char *start; case '6': case '7': case '8': case '9': case '.': decimal: d = tokenbuf; + e = tokenbuf + sizeof tokenbuf - 6; /* room for various punctuation */ floatit = FALSE; while (isDIGIT(*s) || *s == '_') { if (*s == '_') { @@ -4856,19 +5040,22 @@ char *start; warn("Misplaced _ in number"); lastub = ++s; } - else + else { + if (d >= e) + croak(number_too_long); *d++ = *s++; + } } if (dowarn && lastub && s - lastub != 3) warn("Misplaced _ in number"); if (*s == '.' && s[1] != '.') { floatit = TRUE; *d++ = *s++; - while (isDIGIT(*s) || *s == '_') { - if (*s == '_') - s++; - else - *d++ = *s++; + for (; isDIGIT(*s) || *s == '_'; s++) { + if (d >= e) + croak(number_too_long); + if (*s != '_') + *d++ = *s; } } if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) { @@ -4877,18 +5064,21 @@ char *start; *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ if (*s == '+' || *s == '-') *d++ = *s++; - while (isDIGIT(*s)) + while (isDIGIT(*s)) { + if (d >= e) + croak(number_too_long); *d++ = *s++; + } } *d = '\0'; sv = NEWSV(92,0); - NUMERIC_STANDARD(); + SET_NUMERIC_STANDARD(); value = atof(tokenbuf); - tryi32 = I_32(value); - if (!floatit && (double)tryi32 == value) - sv_setiv(sv,tryi32); + tryiv = I_V(value); + if (!floatit && (double)tryiv == value) + sv_setiv(sv, tryiv); else - sv_setnv(sv,value); + sv_setnv(sv, value); break; } @@ -4976,10 +5166,12 @@ set_csh() #endif } -int -start_subparse() +I32 +start_subparse(is_format, flags) +I32 is_format; +U32 flags; { - int oldsavestack_ix = savestack_ix; + I32 oldsavestack_ix = savestack_ix; CV* outsidecv = compcv; AV* comppadlist; @@ -4999,7 +5191,8 @@ start_subparse() SAVEI32(pad_reset_pending); compcv = (CV*)NEWSV(1104,0); - sv_upgrade((SV *)compcv, SVt_PVCV); + sv_upgrade((SV *)compcv, is_format ? SVt_PVFM : SVt_PVCV); + CvFLAGS(compcv) |= flags; comppad = newAV(); comppad_name = newAV(); @@ -5036,55 +5229,69 @@ int yyerror(s) char *s; { - char tmpbuf[258]; - char *tname = tmpbuf; - - if (bufptr > oldoldbufptr && bufptr - oldoldbufptr < 200 && + char *where = NULL; + char *context = NULL; + int contlen = -1; + SV *msg; + + if (!yychar || (yychar == ';' && !rsfp)) + where = "at EOF"; + else if (bufptr > oldoldbufptr && bufptr - oldoldbufptr < 200 && oldoldbufptr != oldbufptr && oldbufptr != bufptr) { while (isSPACE(*oldoldbufptr)) oldoldbufptr++; - sprintf(tname,"near \"%.*s\"",bufptr - oldoldbufptr, oldoldbufptr); + context = oldoldbufptr; + contlen = bufptr - oldoldbufptr; } else if (bufptr > oldbufptr && bufptr - oldbufptr < 200 && oldbufptr != bufptr) { while (isSPACE(*oldbufptr)) oldbufptr++; - sprintf(tname,"near \"%.*s\"",bufptr - oldbufptr, oldbufptr); + context = oldbufptr; + contlen = bufptr - oldbufptr; } else if (yychar > 255) - tname = "next token ???"; - else if (!yychar || (yychar == ';' && !rsfp)) - (void)strcpy(tname,"at EOF"); + where = "next token ???"; else if ((yychar & 127) == 127) { if (lex_state == LEX_NORMAL || (lex_state == LEX_KNOWNEXT && lex_defer == LEX_NORMAL)) - (void)strcpy(tname,"at end of line"); + where = "at end of line"; else if (lex_inpat) - (void)strcpy(tname,"within pattern"); + where = "within pattern"; + else + where = "within string"; + } + else { + SV *where_sv = sv_2mortal(newSVpv("next char ", 0)); + if (yychar < 32) + sv_catpvf(where_sv, "^%c", toCTRL(yychar)); + else if (isPRINT_LC(yychar)) + sv_catpvf(where_sv, "%c", yychar); else - (void)strcpy(tname,"within string"); + sv_catpvf(where_sv, "\\%03o", yychar & 255); + where = SvPVX(where_sv); } - else if (yychar < 32) - (void)sprintf(tname,"next char ^%c",toCTRL(yychar)); + msg = sv_2mortal(newSVpv(s, 0)); + sv_catpvf(msg, " at %_ line %ld, ", + GvSV(curcop->cop_filegv), (long)curcop->cop_line); + if (context) + sv_catpvf(msg, "near \"%.*s\"\n", contlen, context); else - (void)sprintf(tname,"next char %c",yychar); - (void)sprintf(buf, "%s at %s line %d, %s\n", - s,SvPVX(GvSV(curcop->cop_filegv)),curcop->cop_line,tname); - if (curcop->cop_line == multi_end && multi_start < multi_end) { - sprintf(buf+strlen(buf), - " (Might be a runaway multi-line %c%c string starting on line %ld)\n", - multi_open,multi_close,(long)multi_start); + sv_catpvf(msg, "%s\n", where); + if (multi_start < multi_end && (U32)(curcop->cop_line - multi_end) <= 1) { + sv_catpvf(msg, + " (Might be a runaway multi-line %c%c string starting on line %ld)\n", + (int)multi_open,(int)multi_close,(long)multi_start); multi_end = 0; } if (in_eval & 2) - warn("%s",buf); + warn("%_", msg); else if (in_eval) - sv_catpv(GvSV(errgv),buf); + sv_catsv(GvSV(errgv), msg); else - PerlIO_printf(PerlIO_stderr(), "%s",buf); + PerlIO_write(PerlIO_stderr(), SvPVX(msg), SvCUR(msg)); if (++error_count >= 10) - croak("%s has too many errors.\n", - SvPVX(GvSV(curcop->cop_filegv))); + croak("%_ has too many errors.\n", GvSV(curcop->cop_filegv)); in_my = 0; return 0; }