X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/c1789b9f89e17b99d728910cb490561f334c2033..c30fc27b:/toke.c diff --git a/toke.c b/toke.c index 4581bfd..578fe14 100644 --- a/toke.c +++ b/toke.c @@ -137,7 +137,7 @@ static const char* const ident_too_long = "Identifier too long"; * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') +#define SPACE_OR_TAB(c) isBLANK_A(c) /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -427,7 +427,11 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) if (name) Perl_sv_catpv(aTHX_ report, name); else if ((char)rv > ' ' && (char)rv <= '~') + { Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); + if ((char)rv == 'p') + sv_catpvs(report, " (pending identifier)"); + } else if (!rv) sv_catpvs(report, "EOF"); else @@ -549,16 +553,14 @@ S_no_op(pTHX_ const char *const what, char *s) NOOP; if (t < PL_bufptr && isSPACE(*t)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Do you need to predeclare %"SVf"?)\n", - SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Do you need to predeclare %"UTF8f"?)\n", + UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); } else { assert(s >= oldbp); Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing operator before %"SVf"?)\n", - SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Missing operator before %"UTF8f"?)\n", + UTF8fARG(UTF, s - oldbp, oldbp)); } } PL_bufptr = oldbp; @@ -1510,14 +1512,16 @@ chunk will not be discarded. =cut */ +#define LEX_NO_INCLINE 0x40000000 #define LEX_NO_NEXT_CHUNK 0x80000000 void Perl_lex_read_space(pTHX_ U32 flags) { char *s, *bufend; + const bool can_incline = !(flags & LEX_NO_INCLINE); bool need_incline = 0; - if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK)) + if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); #ifdef PERL_MAD if (PL_skipwhite) { @@ -1537,11 +1541,13 @@ Perl_lex_read_space(pTHX_ U32 flags) } while (!(c == '\n' || (c == 0 && s == bufend))); } else if (c == '\n') { s++; - PL_parser->linestart = s; - if (s == bufend) - need_incline = 1; - else - incline(s); + if (can_incline) { + PL_parser->linestart = s; + if (s == bufend) + need_incline = 1; + else + incline(s); + } } else if (isSPACE(c)) { s++; } else if (c == 0 && s == bufend) { @@ -1553,14 +1559,14 @@ Perl_lex_read_space(pTHX_ U32 flags) if (flags & LEX_NO_NEXT_CHUNK) break; PL_parser->bufptr = s; - COPLINE_INC_WITH_HERELINES; + if (can_incline) COPLINE_INC_WITH_HERELINES; got_more = lex_next_chunk(flags); - CopLINE_dec(PL_curcop); + if (can_incline) CopLINE_dec(PL_curcop); s = PL_parser->bufptr; bufend = PL_parser->bufend; if (!got_more) break; - if (need_incline && PL_parser->rsfp) { + if (can_incline && need_incline && PL_parser->rsfp) { incline(s); need_incline = 0; } @@ -1576,6 +1582,107 @@ Perl_lex_read_space(pTHX_ U32 flags) } /* + +=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn + +This function performs syntax checking on a prototype, C. +If C is true, any illegal characters or mismatched brackets +will trigger illegalproto warnings, declaring that they were +detected in the prototype for C. + +The return value is C if this is a valid prototype, and +C if it is not, regardless of whether C was C or +C. + +Note that C is a valid C and will always return C. + +=cut + + */ + +bool +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +{ + STRLEN len, origlen; + char *p = proto ? SvPV(proto, len) : NULL; + bool bad_proto = FALSE; + bool in_brackets = FALSE; + bool after_slash = FALSE; + char greedy_proto = ' '; + bool proto_after_greedy_proto = FALSE; + bool must_be_last = FALSE; + bool underscore = FALSE; + bool bad_proto_after_underscore = FALSE; + + PERL_ARGS_ASSERT_VALIDATE_PROTO; + + if (!proto) + return TRUE; + + origlen = len; + for (; len--; p++) { + if (!isSPACE(*p)) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (underscore) { + if (!strchr(";@%", *p)) + bad_proto_after_underscore = TRUE; + underscore = FALSE; + } + if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { + bad_proto = TRUE; + } + else { + if (*p == '[') + in_brackets = TRUE; + else if (*p == ']') + in_brackets = FALSE; + else if ((*p == '@' || *p == '%') && + !after_slash && + !in_brackets ) { + must_be_last = TRUE; + greedy_proto = *p; + } + else if (*p == '_') + underscore = TRUE; + } + if (*p == '\\') + after_slash = TRUE; + else + after_slash = FALSE; + } + } + + if (warn) { + SV *tmpsv = newSVpvs_flags("", SVs_TEMP); + p -= origlen; + p = SvUTF8(proto) + ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), + origlen, UNI_DISPLAY_ISPRINT) + : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Prototype after '%c' for %"SVf" : %s", + greedy_proto, SVfARG(name), p); + if (in_brackets) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Missing ']' in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto_after_underscore) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character after '_' in prototype for %"SVf" : %s", + SVfARG(name), p); + } + + return (! (proto_after_greedy_proto || bad_proto) ); +} + +/* * S_incline * This subroutine has nothing to do with tilting, whether at windmills * or pinball tables. Its name is short for "increment line". It @@ -1731,6 +1838,8 @@ S_incline(pTHX_ const char *s) CopLINE_set(PL_curcop, line_num); } +#define skipspace(s) skipspace_flags(s, 0) + #ifdef PERL_MAD /* skip space before PL_thistoken */ @@ -1820,7 +1929,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) if (av) { SV * const sv = newSV_type(SVt_PVMG); if (orig_sv) - sv_setsv(sv, orig_sv); + sv_setsv_flags(sv, orig_sv, 0); /* no cow */ else sv_setpvn(sv, buf, len); (void)SvIOK_on(sv); @@ -1836,12 +1945,12 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) */ STATIC char * -S_skipspace(pTHX_ char *s) +S_skipspace_flags(pTHX_ char *s, U32 flags) { #ifdef PERL_MAD char *start = s; #endif /* PERL_MAD */ - PERL_ARGS_ASSERT_SKIPSPACE; + PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; #ifdef PERL_MAD if (PL_skipwhite) { sv_free(PL_skipwhite); @@ -1854,7 +1963,7 @@ S_skipspace(pTHX_ char *s) } else { STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); PL_bufptr = s; - lex_read_space(LEX_KEEP_PREVIOUS | + lex_read_space(flags | LEX_KEEP_PREVIOUS | (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? LEX_NO_NEXT_CHUNK : 0)); s = PL_bufptr; @@ -2110,7 +2219,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) */ STATIC char * -S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) +S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) { dVAR; char *s; @@ -2121,12 +2230,16 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, in start = SKIPSPACE1(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || - (allow_pack && *s == ':') || - (allow_initial_tick && *s == '\'') ) + (allow_pack && *s == ':') ) { s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); - if (check_keyword && keyword(PL_tokenbuf, len, 0)) + if (check_keyword) { + char *s2 = PL_tokenbuf; + if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) + s2 += 6, len -= 6; + if (keyword(s2, len, 0)) return start; + } start_force(PL_curforce); if (PL_madskills) curmad('X', newSVpvn(start,s-start)); @@ -2525,6 +2638,7 @@ S_sublex_push(pTHX) SAVEGENERICPV(PL_lex_brackstack); SAVEGENERICPV(PL_lex_casestack); SAVEGENERICPV(PL_parser->lex_shared); + SAVEBOOL(PL_parser->lex_re_reparsing); /* The here-doc parser needs to be able to peek into outer lexing scopes to find the body of the here-doc. So we put PL_linestr and @@ -2568,6 +2682,9 @@ S_sublex_push(pTHX) else PL_lex_inpat = NULL; + PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); + PL_in_eval &= ~EVAL_RE_REPARSING; + return '('; } @@ -2724,13 +2841,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnames " + "alias definition is deprecated"); } s++; } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } else { @@ -2767,8 +2888,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' + && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnam" + "es alias definition is deprecated"); } s++; } @@ -2794,8 +2918,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += UTF8SKIP(s); } } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } @@ -3159,12 +3285,12 @@ S_scan_const(pTHX_ char *start) * char, which will be done separately. * Stop on (?{..}) and friends */ - else if (*s == '(' && PL_lex_inpat && s[1] == '?') { + else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') *d++ = NATIVE_TO_NEED(has_utf8,*s++); } - else if (!PL_lex_casemods && !in_charclass && + else if (!PL_lex_casemods && ( s[2] == '{' /* This should match regcomp.c */ || (s[2] == '?' && s[3] == '{'))) { @@ -3173,7 +3299,7 @@ S_scan_const(pTHX_ char *start) } /* likewise skip #-initiated comments in //x patterns */ - else if (*s == '#' && PL_lex_inpat && + else if (*s == '#' && PL_lex_inpat && !in_charclass && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { while (s+1 < send && *s != '\n') *d++ = NATIVE_TO_NEED(has_utf8,*s++); @@ -3751,7 +3877,9 @@ S_scan_const(pTHX_ char *start) /* return the substring (via pl_yylval) only if we parsed anything */ if (s > PL_bufptr) { SvREFCNT_inc_simple_void_NN(sv); - if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) { + if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) + && ! PL_parser->lex_re_reparsing) + { const char *const key = PL_lex_inpat ? "qr" : "q"; const STRLEN keylen = PL_lex_inpat ? 2 : 1; const char *type; @@ -3972,19 +4100,14 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) return 0; if (cv && SvPOK(cv)) { - const char *proto = CvPROTO(cv); - if (proto) { - if (*proto == ';') - proto++; - if (*proto == '*') - return 0; - } + const char *proto = CvPROTO(cv); + if (proto) { + while (*proto && (isSPACE(*proto) || *proto == ';')) + proto++; + if (*proto == '*') + return 0; + } } - s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); - /* start is the beginning of the possible filehandle/object, - * and s is the end of it - * tmpbuf is a copy of it - */ if (*start == '$') { if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || @@ -4001,6 +4124,13 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; } + + s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + /* start is the beginning of the possible filehandle/object, + * and s is the end of it + * tmpbuf is a copy of it (but with single quotes as double colons) + */ + if (!keyword(tmpbuf, len, 0)) { if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { len -= 2; @@ -4531,12 +4661,12 @@ S_tokenize_use(pTHX_ int is_use, char *s) { force_next(WORD); } else if (*s == 'v') { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } } else { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } pl_yylval.ival = is_use; @@ -4615,6 +4745,7 @@ Perl_yylex(pTHX) char *d; STRLEN len; bool bof = FALSE; + const bool saw_infix_sigil = PL_parser->saw_infix_sigil; U8 formbrack = 0; U32 fake_eof = 0; @@ -4839,7 +4970,10 @@ Perl_yylex(pTHX) DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); }); PL_expect = XTERM; - PL_lex_dojoin = (*PL_bufptr == '@'); + /* for /@a/, we leave the joining for the regex engine to do + * (unless we're within \Q etc) */ + PL_lex_dojoin = (*PL_bufptr == '@' + && (!PL_lex_inpat || PL_lex_casemods)); PL_lex_state = LEX_INTERPNORMAL; if (PL_lex_dojoin) { start_force(PL_curforce); @@ -5011,9 +5145,12 @@ Perl_yylex(pTHX) return yylex(); } + /* We really do *not* want PL_linestr ever becoming a COW. */ + assert (!SvIsCOW(PL_linestr)); s = PL_bufptr; PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; + PL_parser->saw_infix_sigil = 0; retry: #ifdef PERL_MAD @@ -5507,8 +5644,12 @@ Perl_yylex(pTHX) PL_bufend = s; */ } #else - *s = '\0'; - PL_bufend = s; + while (s < PL_bufend && *s != '\n') + s++; + if (s < PL_bufend) + s++; + else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ + Perl_croak(aTHX_ "panic: input overflow"); #endif } goto retry; @@ -5525,7 +5666,7 @@ Perl_yylex(pTHX) s++; if (strnEQ(s,"=>",2)) { - s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); + s = force_word(PL_bufptr,WORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); OPERATOR('-'); /* unary minus */ } @@ -5597,7 +5738,7 @@ Perl_yylex(pTHX) s++; s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { - s = force_word(s,METHOD,FALSE,TRUE,FALSE); + s = force_word(s,METHOD,FALSE,TRUE); TOKEN(ARROW); } else if (*s == '$') @@ -5669,6 +5810,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; Mop(OP_MULTIPLY); case '%': @@ -5677,6 +5819,7 @@ Perl_yylex(pTHX) PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) TOKEN(0); ++s; + PL_parser->saw_infix_sigil = 1; Mop(OP_MODULO); } PL_tokenbuf[0] = '%'; @@ -5960,7 +6103,7 @@ Perl_yylex(pTHX) d++; if (*d == '}') { const char minus = (PL_tokenbuf[0] == '-'); - s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); + s = force_word(s + minus, WORD, FALSE, TRUE); if (minus) force_next('-'); } @@ -6170,6 +6313,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; BAop(OP_BIT_AND); } @@ -6473,9 +6617,8 @@ Perl_yylex(pTHX) if (*t == ';' && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "You need to quote \"%"SVf"\"", - SVfARG(newSVpvn_flags(tmpbuf, len, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "You need to quote \"%"UTF8f"\"", + UTF8fARG(UTF, len, tmpbuf)); } } } @@ -6560,11 +6703,9 @@ Perl_yylex(pTHX) PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Scalar value %"SVf" better written as $%"SVf, - SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))), - SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 )))); + "Scalar value %"UTF8f" better written as $%"UTF8f, + UTF8fARG(UTF, t-PL_bufptr, PL_bufptr), + UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1)); } } } @@ -6836,6 +6977,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { + fat_arrow: CLINE; pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, @@ -6912,8 +7054,7 @@ Perl_yylex(pTHX) else { rv2cv_op = newOP(OP_PADANY, 0); rv2cv_op->op_targ = off; - rv2cv_op = (OP*)newCVREF(0, rv2cv_op); - cv = (CV *)PAD_SV(off); + cv = find_lexical_cv(off); } lex = TRUE; goto just_a_word; @@ -6970,6 +7111,18 @@ Perl_yylex(pTHX) } } + if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ + && (!anydelim || *s != '#')) { + /* no override, and not s### either; skipspace is safe here + * check for => on following line */ + STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); + STRLEN soff = s - SvPVX(PL_linestr); + s = skipspace_flags(s, LEX_NO_INCLINE); + if (*s == '=' && s[1] == '>') goto fat_arrow; + PL_bufptr = SvPVX(PL_linestr) + bufoff; + s = SvPVX(PL_linestr) + soff; + } + reserved_word: switch (tmp) { @@ -7008,9 +7161,8 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); if (!morelen) - Perl_croak(aTHX_ "Bad name after %"SVf"%s", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP )), + Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", + UTF8fARG(UTF, len, PL_tokenbuf), *s == '\'' ? "'" : "::"); len += morelen; pkgname = 1; @@ -7037,9 +7189,8 @@ Perl_yylex(pTHX) if (ckWARN(WARN_BAREWORD) && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) Perl_warner(aTHX_ packWARN(WARN_BAREWORD), - "Bareword \"%"SVf"\" refers to nonexistent package", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + "Bareword \"%"UTF8f"\" refers to nonexistent package", + UTF8fARG(UTF, len, PL_tokenbuf)); len -= 2; PL_tokenbuf[len] = '\0'; gv = NULL; @@ -7155,9 +7306,13 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>' && !pkgname) { op_free(rv2cv_op); CLINE; + /* This is our own scalar, created a few lines above, + so this is safe. */ + SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); + SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); TERM(WORD); } @@ -7168,7 +7323,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv(cv))) { + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { s = d + 1; goto its_constant; } @@ -7225,24 +7380,32 @@ Perl_yylex(pTHX) if (cv) { if (lastchar == '-' && penultchar != '-') { - const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP ); + const STRLEN l = len ? len : strlen(PL_tokenbuf); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%"SVf" resolved as -&%"SVf"()", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", + UTF8fARG(UTF, l, PL_tokenbuf), + UTF8fARG(UTF, l, PL_tokenbuf)); } /* Check for a constant sub */ - if ((sv = cv_const_sv(cv))) { + if ((sv = cv_const_sv_or_av(cv))) { its_constant: op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - pl_yylval.opval->op_private = OPpCONST_FOLDED; - pl_yylval.opval->op_flags |= OPf_SPECIAL; + if (SvTYPE(sv) == SVt_PVAV) + pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, + pl_yylval.opval); + else { + pl_yylval.opval->op_private = OPpCONST_FOLDED; + pl_yylval.opval->op_folded = 1; + pl_yylval.opval->op_flags |= OPf_SPECIAL; + } TOKEN(WORD); } op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7256,6 +7419,7 @@ Perl_yylex(pTHX) STRLEN protolen = CvPROTOLEN(cv); const char *proto = CvPROTO(cv); bool optional; + proto = S_strip_spaces(aTHX_ proto, &protolen); if (!protolen) TERM(FUNC0SUB); if ((optional = *proto == ';')) @@ -7338,7 +7502,8 @@ Perl_yylex(pTHX) gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7399,12 +7564,13 @@ Perl_yylex(pTHX) op_free(rv2cv_op); safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && saw_infix_sigil) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%"SVf, - lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf, - strlen(PL_tokenbuf), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "Operator or semicolon missing before %c%"UTF8f, + lastchar, + UTF8fARG(UTF, strlen(PL_tokenbuf), + PL_tokenbuf)); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); @@ -7564,9 +7730,8 @@ Perl_yylex(pTHX) goto just_a_word; } if (!tmp) - Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", + UTF8fARG(UTF, len, PL_tokenbuf)); if (tmp < 0) tmp = -tmp; else if (tmp == KEY_require || tmp == KEY_do @@ -7719,7 +7884,7 @@ Perl_yylex(pTHX) case KEY_dump: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7852,7 +8017,7 @@ Perl_yylex(pTHX) case KEY_goto: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7978,7 +8143,7 @@ Perl_yylex(pTHX) case KEY_last: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -8086,7 +8251,7 @@ Perl_yylex(pTHX) case KEY_next: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -8122,11 +8287,9 @@ Perl_yylex(pTHX) && !(t[0] == ':' && t[1] == ':') && !keyword(s, d-s, 0) ) { - SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)); Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), - "Precedence problem: open %"SVf" should be open(%"SVf")", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", + UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); } } LOP(OP_OPEN,XTERM); @@ -8170,7 +8333,7 @@ Perl_yylex(pTHX) LOP(OP_PACK,XTERM); case KEY_package: - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = SKIPSPACE1(s); s = force_strict_version(s); PL_lex_expect = XBLOCK; @@ -8273,7 +8436,7 @@ Perl_yylex(pTHX) || (s = force_version(s, TRUE), *s == 'v')) { *PL_tokenbuf = '\0'; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD | (UTF ? SVf_UTF8 : 0)); @@ -8298,7 +8461,7 @@ Perl_yylex(pTHX) case KEY_redo: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -8439,7 +8602,7 @@ Perl_yylex(pTHX) checkcomma(s,PL_tokenbuf,"subroutine name"); s = SKIPSPACE1(s); PL_expect = XTERM; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); LOP(OP_SORT,XREF); case KEY_split: @@ -8471,10 +8634,12 @@ Perl_yylex(pTHX) really_sub: { char * const tmpbuf = PL_tokenbuf + 1; - SSize_t tboffset = 0; expectation attrful; bool have_name, have_proto; const int key = tmp; +#ifndef PERL_MAD + SV *format_name = NULL; +#endif #ifdef PERL_MAD SV *tmpwhite = 0; @@ -8501,13 +8666,14 @@ Perl_yylex(pTHX) PL_expect = XBLOCK; attrful = XATTRBLOCK; - /* remember buffer pos'n for later force_word */ - tboffset = s - PL_oldbufptr; d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); #ifdef PERL_MAD if (PL_madskills) nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); +#else + if (key == KEY_format) + format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); #endif *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub @@ -8555,87 +8721,23 @@ Perl_yylex(pTHX) PL_thistoken = subtoken; s = d; #else - if (have_name) - (void) force_word(PL_oldbufptr + tboffset, WORD, - FALSE, TRUE, TRUE); + if (format_name) { + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.opval + = (OP*)newSVOP(OP_CONST,0, format_name); + NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; + force_next(WORD); + } #endif PREBLOCK(FORMAT); } /* Look for a prototype */ if (*s == '(') { - char *p; - bool bad_proto = FALSE; - bool in_brackets = FALSE; - char greedy_proto = ' '; - bool proto_after_greedy_proto = FALSE; - bool must_be_last = FALSE; - bool underscore = FALSE; - bool seen_underscore = FALSE; - const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); - STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - /* strip spaces and check for bad characters */ - d = SvPV(PL_lex_stuff, tmplen); - tmp = 0; - for (p = d; tmplen; tmplen--, ++p) { - if (!isSPACE(*p)) { - d[tmp++] = *p; - - if (warnillegalproto) { - if (must_be_last) - proto_after_greedy_proto = TRUE; - if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { - bad_proto = TRUE; - } - else { - if ( underscore ) { - if ( !strchr(";@%", *p) ) - bad_proto = TRUE; - underscore = FALSE; - } - if ( *p == '[' ) { - in_brackets = TRUE; - } - else if ( *p == ']' ) { - in_brackets = FALSE; - } - else if ( (*p == '@' || *p == '%') && - ( tmp < 2 || d[tmp-2] != '\\' ) && - !in_brackets ) { - must_be_last = TRUE; - greedy_proto = *p; - } - else if ( *p == '_' ) { - underscore = seen_underscore = TRUE; - } - } - } - } - } - d[tmp] = '\0'; - if (proto_after_greedy_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Prototype after '%c' for %"SVf" : %s", - greedy_proto, SVfARG(PL_subname), d); - if (bad_proto) { - SV *dsv = newSVpvs_flags("", SVs_TEMP); - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character %sin prototype for %"SVf" : %s", - seen_underscore ? "after '_' " : "", - SVfARG(PL_subname), - SvUTF8(PL_lex_stuff) - ? sv_uni_display(dsv, - newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), - tmp, - UNI_DISPLAY_ISPRINT) - : pv_pretty(dsv, d, tmp, 60, NULL, NULL, - PERL_PV_ESCAPE_NONASCII)); - } - SvCUR_set(PL_lex_stuff, tmp); + (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); have_proto = TRUE; #ifdef PERL_MAD @@ -8676,6 +8778,7 @@ Perl_yylex(pTHX) force_next(0); PL_thistoken = subtoken; + PERL_UNUSED_VAR(have_proto); #else if (have_proto) { NEXTVAL_NEXTTOKE.opval = @@ -8971,9 +9074,9 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %"SVf" in string", - SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len, - SVs_TEMP | ( UTF ? SVf_UTF8 : 0 )))); + "Possible unintended interpolation of %"UTF8f + " in string", + UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); } } @@ -9287,6 +9390,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck *d = '\0'; d = dest; if (*d) { + /* Either a digit variable, or parse_ident() found an identifier + (anything valid as a bareword), so job done and return. */ if (PL_lex_state != LEX_NORMAL) PL_lex_state = LEX_INTERPENDMAYBE; return s; @@ -9298,8 +9403,12 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck || s[1] == '{' || strnEQ(s+1,"::",2)) ) { + /* Dereferencing a value in a scalar variable. + The alternatives are different syntaxes for a scalar variable. + Using ' as a leading package separator isn't allowed. :: is. */ return s; } + /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ if (*s == '{') { bracket = s; s++; @@ -9307,12 +9416,12 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck s++; } -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)*(d)) \ - || isCNTRL_A((U8)*(d)) \ - || isDIGIT_A((U8)*(d)) \ - || (!(u) && !UTF8_IS_INVARIANT((U8)*(d)))) +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ + || isCNTRL_A((U8)(d)) \ + || isDIGIT_A((U8)(d)) \ + || (!(u) && !UTF8_IS_INVARIANT((U8)(d)))) if (s < send - && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(s, is_utf8))) + && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) { if (is_utf8) { const STRLEN skip = UTF8SKIP(s); @@ -9326,20 +9435,29 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck d[1] = '\0'; } } + /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ if (*d == '^' && *s && isCONTROLVAR(*s)) { *d = toCTRL(*s); s++; } + /* Warn about ambiguous code after unary operators if {...} notation isn't + used. There's no difference in ambiguity; it's merely a heuristic + about when not to warn. */ else if (ck_uni && !bracket) check_uni(); if (bracket) { + /* If we were processing {...} notation then... */ if (isIDFIRST_lazy_if(d,is_utf8)) { + /* if it starts as a valid identifier, assume that it is one. + (the later check for } being at the expected point will trap + cases where this doesn't pan out.) */ d += is_utf8 ? UTF8SKIP(d) : 1; parse_ident(&s, &d, e, 1, is_utf8); *d = '\0'; while (s < send && SPACE_OR_TAB(*s)) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { + /* ${foo[0]} and ${foo{bar}} notation. */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { const char * const brack = (const char *) @@ -9357,7 +9475,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } /* Handle extended ${^Foo} variables * 1999-02-27 mjd-perl-patch@plover.com */ - else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */ + else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ && isWORDCHAR(*s)) { d++; @@ -9372,6 +9490,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck while (s < send && SPACE_OR_TAB(*s)) s++; + /* Expect to find a closing } after consuming any trailing whitespace. + */ if (*s == '}') { s++; if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { @@ -9394,6 +9514,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } } else { + /* Didn't find the closing } at the point we expected, so restore + state such that the next thing to process is the opening { and */ s = bracket; /* let the parser handle it */ *dest = '\0'; } @@ -9517,9 +9639,6 @@ S_scan_pat(pTHX_ char *start, I32 type) s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), TRUE /* look for escaped bracketed metas */ ); - /* this was only needed for the initial scan_str; set it to false - * so that any (?{}) code blocks etc are parsed normally */ - PL_in_eval &= ~EVAL_RE_REPARSING; if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -10058,8 +10177,11 @@ S_scan_heredoc(pTHX_ char *s) } CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { - lex_grow_linestr(SvCUR(PL_linestr) + 2); + s = lex_grow_linestr(SvLEN(PL_linestr) + 3); + /* ^That should be enough to avoid this needing to grow: */ sv_catpvs(PL_linestr, "\n\0"); + assert(s == SvPVX(PL_linestr)); + PL_bufend = SvEND(PL_linestr); } s = PL_bufptr; #ifdef PERL_MAD @@ -10425,8 +10547,49 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, int offset = s - SvPVX_const(PL_linestr); const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, &offset, (char*)termstr, termlen); - const char * const ns = SvPVX_const(PL_linestr) + offset; - char * const svlast = SvEND(sv) - 1; + const char *ns; + char *svlast; + + if (SvIsCOW(PL_linestr)) { + STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; + STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; + STRLEN last_lop_pos, re_eval_start_pos, s_pos; + char *buf = SvPVX(PL_linestr); + bufend_pos = PL_parser->bufend - buf; + bufptr_pos = PL_parser->bufptr - buf; + oldbufptr_pos = PL_parser->oldbufptr - buf; + oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; + linestart_pos = PL_parser->linestart - buf; + last_uni_pos = PL_parser->last_uni + ? PL_parser->last_uni - buf + : 0; + last_lop_pos = PL_parser->last_lop + ? PL_parser->last_lop - buf + : 0; + re_eval_start_pos = + PL_parser->lex_shared->re_eval_start ? + PL_parser->lex_shared->re_eval_start - buf : 0; + s_pos = s - buf; + + sv_force_normal(PL_linestr); + + buf = SvPVX(PL_linestr); + PL_parser->bufend = buf + bufend_pos; + PL_parser->bufptr = buf + bufptr_pos; + PL_parser->oldbufptr = buf + oldbufptr_pos; + PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; + PL_parser->linestart = buf + linestart_pos; + if (PL_parser->last_uni) + PL_parser->last_uni = buf + last_uni_pos; + if (PL_parser->last_lop) + PL_parser->last_lop = buf + last_lop_pos; + if (PL_parser->lex_shared->re_eval_start) + PL_parser->lex_shared->re_eval_start = + buf + re_eval_start_pos; + s = buf + s_pos; + } + ns = SvPVX_const(PL_linestr) + offset; + svlast = SvEND(sv) - 1; for (; s < ns; s++) { if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) @@ -11348,9 +11511,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n", - SVfARG(newSVpvn_flags(context, contlen, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", + UTF8fARG(UTF, contlen, context)); else Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {