X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/a8d9c7ae5b2de9d5434563530be821c884d9a6a7..89720c60b1df65ae0ab0e5aab3d9ea23c8890c39:/toke.c diff --git a/toke.c b/toke.c index 24e794d..d891372 100644 --- a/toke.c +++ b/toke.c @@ -137,7 +137,7 @@ static const char* const ident_too_long = "Identifier too long"; * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') +#define SPACE_OR_TAB(c) isBLANK_A(c) /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -427,7 +427,11 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) if (name) Perl_sv_catpv(aTHX_ report, name); else if ((char)rv > ' ' && (char)rv <= '~') + { Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); + if ((char)rv == 'p') + sv_catpvs(report, " (pending identifier)"); + } else if (!rv) sv_catpvs(report, "EOF"); else @@ -549,16 +553,14 @@ S_no_op(pTHX_ const char *const what, char *s) NOOP; if (t < PL_bufptr && isSPACE(*t)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Do you need to predeclare %"SVf"?)\n", - SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Do you need to predeclare %"UTF8f"?)\n", + UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); } else { assert(s >= oldbp); Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing operator before %"SVf"?)\n", - SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Missing operator before %"UTF8f"?)\n", + UTF8fARG(UTF, s - oldbp, oldbp)); } } PL_bufptr = oldbp; @@ -1510,14 +1512,16 @@ chunk will not be discarded. =cut */ +#define LEX_NO_INCLINE 0x40000000 #define LEX_NO_NEXT_CHUNK 0x80000000 void Perl_lex_read_space(pTHX_ U32 flags) { char *s, *bufend; + const bool can_incline = !(flags & LEX_NO_INCLINE); bool need_incline = 0; - if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK)) + if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); #ifdef PERL_MAD if (PL_skipwhite) { @@ -1537,11 +1541,13 @@ Perl_lex_read_space(pTHX_ U32 flags) } while (!(c == '\n' || (c == 0 && s == bufend))); } else if (c == '\n') { s++; - PL_parser->linestart = s; - if (s == bufend) - need_incline = 1; - else - incline(s); + if (can_incline) { + PL_parser->linestart = s; + if (s == bufend) + need_incline = 1; + else + incline(s); + } } else if (isSPACE(c)) { s++; } else if (c == 0 && s == bufend) { @@ -1553,14 +1559,14 @@ Perl_lex_read_space(pTHX_ U32 flags) if (flags & LEX_NO_NEXT_CHUNK) break; PL_parser->bufptr = s; - COPLINE_INC_WITH_HERELINES; + if (can_incline) COPLINE_INC_WITH_HERELINES; got_more = lex_next_chunk(flags); - CopLINE_dec(PL_curcop); + if (can_incline) CopLINE_dec(PL_curcop); s = PL_parser->bufptr; bufend = PL_parser->bufend; if (!got_more) break; - if (need_incline && PL_parser->rsfp) { + if (can_incline && need_incline && PL_parser->rsfp) { incline(s); need_incline = 0; } @@ -1576,6 +1582,107 @@ Perl_lex_read_space(pTHX_ U32 flags) } /* + +=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn + +This function performs syntax checking on a prototype, C. +If C is true, any illegal characters or mismatched brackets +will trigger illegalproto warnings, declaring that they were +detected in the prototype for C. + +The return value is C if this is a valid prototype, and +C if it is not, regardless of whether C was C or +C. + +Note that C is a valid C and will always return C. + +=cut + + */ + +bool +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +{ + STRLEN len, origlen; + char *p = proto ? SvPV(proto, len) : NULL; + bool bad_proto = FALSE; + bool in_brackets = FALSE; + bool after_slash = FALSE; + char greedy_proto = ' '; + bool proto_after_greedy_proto = FALSE; + bool must_be_last = FALSE; + bool underscore = FALSE; + bool bad_proto_after_underscore = FALSE; + + PERL_ARGS_ASSERT_VALIDATE_PROTO; + + if (!proto) + return TRUE; + + origlen = len; + for (; len--; p++) { + if (!isSPACE(*p)) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (underscore) { + if (!strchr(";@%", *p)) + bad_proto_after_underscore = TRUE; + underscore = FALSE; + } + if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { + bad_proto = TRUE; + } + else { + if (*p == '[') + in_brackets = TRUE; + else if (*p == ']') + in_brackets = FALSE; + else if ((*p == '@' || *p == '%') && + !after_slash && + !in_brackets ) { + must_be_last = TRUE; + greedy_proto = *p; + } + else if (*p == '_') + underscore = TRUE; + } + if (*p == '\\') + after_slash = TRUE; + else + after_slash = FALSE; + } + } + + if (warn) { + SV *tmpsv = newSVpvs_flags("", SVs_TEMP); + p -= origlen; + p = SvUTF8(proto) + ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), + origlen, UNI_DISPLAY_ISPRINT) + : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Prototype after '%c' for %"SVf" : %s", + greedy_proto, SVfARG(name), p); + if (in_brackets) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Missing ']' in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto_after_underscore) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character after '_' in prototype for %"SVf" : %s", + SVfARG(name), p); + } + + return (! (proto_after_greedy_proto || bad_proto) ); +} + +/* * S_incline * This subroutine has nothing to do with tilting, whether at windmills * or pinball tables. Its name is short for "increment line". It @@ -1646,27 +1753,27 @@ S_incline(pTHX_ const char *s) if (t - s > 0) { const STRLEN len = t - s; - SV *const temp_sv = CopFILESV(PL_curcop); - const char *cf; - STRLEN tmplen; - - if (temp_sv) { - cf = SvPVX(temp_sv); - tmplen = SvCUR(temp_sv); - } else { - cf = NULL; - tmplen = 0; - } - if (!PL_rsfp && !PL_parser->filtered) { /* must copy *{"::_<(eval N)[oldfilename:L]"} * to *{"::_ 6 && strnEQ(s2, "CORE::", 6)) + s2 += 6, len -= 6; + if (keyword(s2, len, 0)) return start; + } start_force(PL_curforce); if (PL_madskills) curmad('X', newSVpvn(start,s-start)); @@ -2525,6 +2638,7 @@ S_sublex_push(pTHX) SAVEGENERICPV(PL_lex_brackstack); SAVEGENERICPV(PL_lex_casestack); SAVEGENERICPV(PL_parser->lex_shared); + SAVEBOOL(PL_parser->lex_re_reparsing); /* The here-doc parser needs to be able to peek into outer lexing scopes to find the body of the here-doc. So we put PL_linestr and @@ -2568,6 +2682,9 @@ S_sublex_push(pTHX) else PL_lex_inpat = NULL; + PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); + PL_in_eval &= ~EVAL_RE_REPARSING; + return '('; } @@ -2724,13 +2841,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnames " + "alias definition is deprecated"); } s++; } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } else { @@ -2767,8 +2888,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' + && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnam" + "es alias definition is deprecated"); } s++; } @@ -2794,8 +2918,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += UTF8SKIP(s); } } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } @@ -2863,7 +2989,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) In patterns: expand: - \N{ABC} => \N{U+41.42.43} + \N{FOO} => \N{U+hex_for_character_FOO} + (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) pass through: all other \-char, including \N and \N{ apart from \N{ABC} @@ -3158,12 +3285,12 @@ S_scan_const(pTHX_ char *start) * char, which will be done separately. * Stop on (?{..}) and friends */ - else if (*s == '(' && PL_lex_inpat && s[1] == '?') { + else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') *d++ = NATIVE_TO_NEED(has_utf8,*s++); } - else if (!PL_lex_casemods && !in_charclass && + else if (!PL_lex_casemods && ( s[2] == '{' /* This should match regcomp.c */ || (s[2] == '?' && s[3] == '{'))) { @@ -3172,7 +3299,7 @@ S_scan_const(pTHX_ char *start) } /* likewise skip #-initiated comments in //x patterns */ - else if (*s == '#' && PL_lex_inpat && + else if (*s == '#' && PL_lex_inpat && !in_charclass && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { while (s+1 < send && *s != '\n') *d++ = NATIVE_TO_NEED(has_utf8,*s++); @@ -3248,7 +3375,7 @@ S_scan_const(pTHX_ char *start) else if (PL_lex_inpat && (*s != 'N' || s[1] != '{' - || regcurly(s + 1))) + || regcurly(s + 1, FALSE))) { *d++ = NATIVE_TO_NEED(has_utf8,'\\'); goto default_action; @@ -3750,7 +3877,9 @@ S_scan_const(pTHX_ char *start) /* return the substring (via pl_yylval) only if we parsed anything */ if (s > PL_bufptr) { SvREFCNT_inc_simple_void_NN(sv); - if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) { + if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) + && ! PL_parser->lex_re_reparsing) + { const char *const key = PL_lex_inpat ? "qr" : "q"; const STRLEN keylen = PL_lex_inpat ? 2 : 1; const char *type; @@ -3818,7 +3947,7 @@ S_intuit_more(pTHX_ char *s) /* In a pattern, so maybe we have {n,m}. */ if (*s == '{') { - if (regcurly(s)) { + if (regcurly(s, FALSE)) { return FALSE; } return TRUE; @@ -3971,19 +4100,14 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) return 0; if (cv && SvPOK(cv)) { - const char *proto = CvPROTO(cv); - if (proto) { - if (*proto == ';') - proto++; - if (*proto == '*') - return 0; - } + const char *proto = CvPROTO(cv); + if (proto) { + while (*proto && (isSPACE(*proto) || *proto == ';')) + proto++; + if (*proto == '*') + return 0; + } } - s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); - /* start is the beginning of the possible filehandle/object, - * and s is the end of it - * tmpbuf is a copy of it - */ if (*start == '$') { if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || @@ -4000,6 +4124,13 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; } + + s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + /* start is the beginning of the possible filehandle/object, + * and s is the end of it + * tmpbuf is a copy of it (but with single quotes as double colons) + */ + if (!keyword(tmpbuf, len, 0)) { if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { len -= 2; @@ -4530,12 +4661,12 @@ S_tokenize_use(pTHX_ int is_use, char *s) { force_next(WORD); } else if (*s == 'v') { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } } else { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } pl_yylval.ival = is_use; @@ -4614,6 +4745,7 @@ Perl_yylex(pTHX) char *d; STRLEN len; bool bof = FALSE; + const bool saw_infix_sigil = PL_parser->saw_infix_sigil; U8 formbrack = 0; U32 fake_eof = 0; @@ -4838,7 +4970,10 @@ Perl_yylex(pTHX) DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); }); PL_expect = XTERM; - PL_lex_dojoin = (*PL_bufptr == '@'); + /* for /@a/, we leave the joining for the regex engine to do + * (unless we're within \Q etc) */ + PL_lex_dojoin = (*PL_bufptr == '@' + && (!PL_lex_inpat || PL_lex_casemods)); PL_lex_state = LEX_INTERPNORMAL; if (PL_lex_dojoin) { start_force(PL_curforce); @@ -5010,9 +5145,12 @@ Perl_yylex(pTHX) return yylex(); } + /* We really do *not* want PL_linestr ever becoming a COW. */ + assert (!SvIsCOW(PL_linestr)); s = PL_bufptr; PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; + PL_parser->saw_infix_sigil = 0; retry: #ifdef PERL_MAD @@ -5024,7 +5162,7 @@ Perl_yylex(pTHX) #endif switch (*s) { default: - if (isIDFIRST_lazy_if(s,UTF)) + if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) goto keylookup; { SV *dsv = newSVpvs_flags("", SVs_TEMP); @@ -5506,8 +5644,12 @@ Perl_yylex(pTHX) PL_bufend = s; */ } #else - *s = '\0'; - PL_bufend = s; + while (s < PL_bufend && *s != '\n') + s++; + if (s < PL_bufend) + s++; + else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ + Perl_croak(aTHX_ "panic: input overflow"); #endif } goto retry; @@ -5524,7 +5666,7 @@ Perl_yylex(pTHX) s++; if (strnEQ(s,"=>",2)) { - s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); + s = force_word(PL_bufptr,WORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); OPERATOR('-'); /* unary minus */ } @@ -5596,7 +5738,7 @@ Perl_yylex(pTHX) s++; s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { - s = force_word(s,METHOD,FALSE,TRUE,FALSE); + s = force_word(s,METHOD,FALSE,TRUE); TOKEN(ARROW); } else if (*s == '$') @@ -5668,6 +5810,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; Mop(OP_MULTIPLY); case '%': @@ -5676,6 +5819,7 @@ Perl_yylex(pTHX) PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) TOKEN(0); ++s; + PL_parser->saw_infix_sigil = 1; Mop(OP_MODULO); } PL_tokenbuf[0] = '%'; @@ -5710,6 +5854,9 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) TOKEN(0); s += 2; + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "Smartmatch is experimental"); Eop(OP_SMARTMATCH); } s++; @@ -5772,7 +5919,7 @@ Perl_yylex(pTHX) } sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); if (*d == '(') { - d = scan_str(d,TRUE,TRUE,FALSE); + d = scan_str(d,TRUE,TRUE,FALSE, FALSE); if (!d) { /* MUST advance bufptr here to avoid bogus "at end of line" context messages from yyerror(). @@ -5956,7 +6103,7 @@ Perl_yylex(pTHX) d++; if (*d == '}') { const char minus = (PL_tokenbuf[0] == '-'); - s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); + s = force_word(s + minus, WORD, FALSE, TRUE); if (minus) force_next('-'); } @@ -6166,6 +6313,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; BAop(OP_BIT_AND); } @@ -6469,9 +6617,8 @@ Perl_yylex(pTHX) if (*t == ';' && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "You need to quote \"%"SVf"\"", - SVfARG(newSVpvn_flags(tmpbuf, len, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "You need to quote \"%"UTF8f"\"", + UTF8fARG(UTF, len, tmpbuf)); } } } @@ -6556,11 +6703,9 @@ Perl_yylex(pTHX) PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Scalar value %"SVf" better written as $%"SVf, - SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))), - SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 )))); + "Scalar value %"UTF8f" better written as $%"UTF8f, + UTF8fARG(UTF, t-PL_bufptr, PL_bufptr), + UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1)); } } } @@ -6677,7 +6822,7 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6692,7 +6837,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6715,7 +6860,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '`': - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); if (PL_expect == XOPERATOR) no_op("Backticks",s); @@ -6832,6 +6977,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { + fat_arrow: CLINE; pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, @@ -6899,12 +7045,16 @@ Perl_yylex(pTHX) gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), SVt_PVCV); off = 0; + if (!gv) { + sv_free(sv); + sv = NULL; + goto just_a_word; + } } else { rv2cv_op = newOP(OP_PADANY, 0); rv2cv_op->op_targ = off; - rv2cv_op = (OP*)newCVREF(0, rv2cv_op); - cv = (CV *)PAD_SV(off); + cv = find_lexical_cv(off); } lex = TRUE; goto just_a_word; @@ -6961,6 +7111,18 @@ Perl_yylex(pTHX) } } + if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ + && (!anydelim || *s != '#')) { + /* no override, and not s### either; skipspace is safe here + * check for => on following line */ + STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); + STRLEN soff = s - SvPVX(PL_linestr); + s = skipspace_flags(s, LEX_NO_INCLINE); + if (*s == '=' && s[1] == '>') goto fat_arrow; + PL_bufptr = SvPVX(PL_linestr) + bufoff; + s = SvPVX(PL_linestr) + soff; + } + reserved_word: switch (tmp) { @@ -6999,9 +7161,8 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); if (!morelen) - Perl_croak(aTHX_ "Bad name after %"SVf"%s", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP )), + Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", + UTF8fARG(UTF, len, PL_tokenbuf), *s == '\'' ? "'" : "::"); len += morelen; pkgname = 1; @@ -7028,9 +7189,8 @@ Perl_yylex(pTHX) if (ckWARN(WARN_BAREWORD) && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) Perl_warner(aTHX_ packWARN(WARN_BAREWORD), - "Bareword \"%"SVf"\" refers to nonexistent package", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + "Bareword \"%"UTF8f"\" refers to nonexistent package", + UTF8fARG(UTF, len, PL_tokenbuf)); len -= 2; PL_tokenbuf[len] = '\0'; gv = NULL; @@ -7146,9 +7306,13 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>' && !pkgname) { op_free(rv2cv_op); CLINE; + /* This is our own scalar, created a few lines above, + so this is safe. */ + SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); + SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); TERM(WORD); } @@ -7159,7 +7323,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv(cv))) { + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { s = d + 1; goto its_constant; } @@ -7216,24 +7380,32 @@ Perl_yylex(pTHX) if (cv) { if (lastchar == '-' && penultchar != '-') { - const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP ); + const STRLEN l = len ? len : strlen(PL_tokenbuf); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%"SVf" resolved as -&%"SVf"()", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", + UTF8fARG(UTF, l, PL_tokenbuf), + UTF8fARG(UTF, l, PL_tokenbuf)); } /* Check for a constant sub */ - if ((sv = cv_const_sv(cv))) { + if ((sv = cv_const_sv_or_av(cv))) { its_constant: op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - pl_yylval.opval->op_private = OPpCONST_FOLDED; - pl_yylval.opval->op_flags |= OPf_SPECIAL; + if (SvTYPE(sv) == SVt_PVAV) + pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, + pl_yylval.opval); + else { + pl_yylval.opval->op_private = OPpCONST_FOLDED; + pl_yylval.opval->op_folded = 1; + pl_yylval.opval->op_flags |= OPf_SPECIAL; + } TOKEN(WORD); } op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7247,6 +7419,7 @@ Perl_yylex(pTHX) STRLEN protolen = CvPROTOLEN(cv); const char *proto = CvPROTO(cv); bool optional; + proto = S_strip_spaces(aTHX_ proto, &protolen); if (!protolen) TERM(FUNC0SUB); if ((optional = *proto == ';')) @@ -7329,7 +7502,8 @@ Perl_yylex(pTHX) gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7390,12 +7564,13 @@ Perl_yylex(pTHX) op_free(rv2cv_op); safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && saw_infix_sigil) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%"SVf, - lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf, - strlen(PL_tokenbuf), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "Operator or semicolon missing before %c%"UTF8f, + lastchar, + UTF8fARG(UTF, strlen(PL_tokenbuf), + PL_tokenbuf)); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); @@ -7555,9 +7730,8 @@ Perl_yylex(pTHX) goto just_a_word; } if (!tmp) - Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", + UTF8fARG(UTF, len, PL_tokenbuf)); if (tmp < 0) tmp = -tmp; else if (tmp == KEY_require || tmp == KEY_do @@ -7710,7 +7884,7 @@ Perl_yylex(pTHX) case KEY_dump: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7843,7 +8017,7 @@ Perl_yylex(pTHX) case KEY_goto: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7929,6 +8103,9 @@ Perl_yylex(pTHX) case KEY_given: pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "given is experimental"); OPERATOR(GIVEN); case KEY_glob: @@ -7966,7 +8143,7 @@ Perl_yylex(pTHX) case KEY_last: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -8074,7 +8251,7 @@ Perl_yylex(pTHX) case KEY_next: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -8099,15 +8276,9 @@ Perl_yylex(pTHX) case KEY_open: s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { - const char *t; - for (d = s; isWORDCHAR_lazy_if(d,UTF);) { - d += UTF ? UTF8SKIP(d) : 1; - if (UTF) { - while (UTF8_IS_CONTINUED(*d) && _is_utf8_mark((U8*)d)) { - d += UTF ? UTF8SKIP(d) : 1; - } - } - } + const char *t; + d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, + &len); for (t=d; isSPACE(*t);) t++; if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) @@ -8116,11 +8287,9 @@ Perl_yylex(pTHX) && !(t[0] == ':' && t[1] == ':') && !keyword(s, d-s, 0) ) { - SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)); Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), - "Precedence problem: open %"SVf" should be open(%"SVf")", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", + UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); } } LOP(OP_OPEN,XTERM); @@ -8164,7 +8333,7 @@ Perl_yylex(pTHX) LOP(OP_PACK,XTERM); case KEY_package: - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = SKIPSPACE1(s); s = force_strict_version(s); PL_lex_expect = XBLOCK; @@ -8174,7 +8343,7 @@ Perl_yylex(pTHX) LOP(OP_PIPE_OP,XTERM); case KEY_q: - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_CONST; @@ -8185,7 +8354,7 @@ Perl_yylex(pTHX) case KEY_qw: { OP *words = NULL; - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) missingterm(NULL); PL_expect = XOPERATOR; @@ -8235,7 +8404,7 @@ Perl_yylex(pTHX) } case KEY_qq: - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_STRINGIFY; @@ -8248,7 +8417,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case KEY_qx: - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) missingterm(NULL); readpipe_override(); @@ -8267,7 +8436,7 @@ Perl_yylex(pTHX) || (s = force_version(s, TRUE), *s == 'v')) { *PL_tokenbuf = '\0'; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD | (UTF ? SVf_UTF8 : 0)); @@ -8292,7 +8461,7 @@ Perl_yylex(pTHX) case KEY_redo: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -8433,7 +8602,7 @@ Perl_yylex(pTHX) checkcomma(s,PL_tokenbuf,"subroutine name"); s = SKIPSPACE1(s); PL_expect = XTERM; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); LOP(OP_SORT,XREF); case KEY_split: @@ -8465,10 +8634,12 @@ Perl_yylex(pTHX) really_sub: { char * const tmpbuf = PL_tokenbuf + 1; - SSize_t tboffset = 0; expectation attrful; bool have_name, have_proto; const int key = tmp; +#ifndef PERL_MAD + SV *format_name = NULL; +#endif #ifdef PERL_MAD SV *tmpwhite = 0; @@ -8495,13 +8666,14 @@ Perl_yylex(pTHX) PL_expect = XBLOCK; attrful = XATTRBLOCK; - /* remember buffer pos'n for later force_word */ - tboffset = s - PL_oldbufptr; d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); #ifdef PERL_MAD if (PL_madskills) nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); +#else + if (key == KEY_format) + format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); #endif *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub @@ -8549,87 +8721,23 @@ Perl_yylex(pTHX) PL_thistoken = subtoken; s = d; #else - if (have_name) - (void) force_word(PL_oldbufptr + tboffset, WORD, - FALSE, TRUE, TRUE); + if (format_name) { + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.opval + = (OP*)newSVOP(OP_CONST,0, format_name); + NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; + force_next(WORD); + } #endif PREBLOCK(FORMAT); } /* Look for a prototype */ if (*s == '(') { - char *p; - bool bad_proto = FALSE; - bool in_brackets = FALSE; - char greedy_proto = ' '; - bool proto_after_greedy_proto = FALSE; - bool must_be_last = FALSE; - bool underscore = FALSE; - bool seen_underscore = FALSE; - const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); - STRLEN tmplen; - - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - /* strip spaces and check for bad characters */ - d = SvPV(PL_lex_stuff, tmplen); - tmp = 0; - for (p = d; tmplen; tmplen--, ++p) { - if (!isSPACE(*p)) { - d[tmp++] = *p; - - if (warnillegalproto) { - if (must_be_last) - proto_after_greedy_proto = TRUE; - if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { - bad_proto = TRUE; - } - else { - if ( underscore ) { - if ( !strchr(";@%", *p) ) - bad_proto = TRUE; - underscore = FALSE; - } - if ( *p == '[' ) { - in_brackets = TRUE; - } - else if ( *p == ']' ) { - in_brackets = FALSE; - } - else if ( (*p == '@' || *p == '%') && - ( tmp < 2 || d[tmp-2] != '\\' ) && - !in_brackets ) { - must_be_last = TRUE; - greedy_proto = *p; - } - else if ( *p == '_' ) { - underscore = seen_underscore = TRUE; - } - } - } - } - } - d[tmp] = '\0'; - if (proto_after_greedy_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Prototype after '%c' for %"SVf" : %s", - greedy_proto, SVfARG(PL_subname), d); - if (bad_proto) { - SV *dsv = newSVpvs_flags("", SVs_TEMP); - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character %sin prototype for %"SVf" : %s", - seen_underscore ? "after '_' " : "", - SVfARG(PL_subname), - SvUTF8(PL_lex_stuff) - ? sv_uni_display(dsv, - newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), - tmp, - UNI_DISPLAY_ISPRINT) - : pv_pretty(dsv, d, tmp, 60, NULL, NULL, - PERL_PV_ESCAPE_NONASCII)); - } - SvCUR_set(PL_lex_stuff, tmp); + (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); have_proto = TRUE; #ifdef PERL_MAD @@ -8670,6 +8778,7 @@ Perl_yylex(pTHX) force_next(0); PL_thistoken = subtoken; + PERL_UNUSED_VAR(have_proto); #else if (have_proto) { NEXTVAL_NEXTTOKE.opval = @@ -8791,6 +8900,9 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) return REPORT(0); pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "when is experimental"); OPERATOR(WHEN); case KEY_while: @@ -8962,9 +9074,9 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %"SVf" in string", - SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len, - SVs_TEMP | ( UTF ? SVf_UTF8 : 0 )))); + "Possible unintended interpolation of %"UTF8f + " in string", + UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); } } @@ -9038,7 +9150,9 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) } } -/* Either returns sv, or mortalizes/frees sv and returns a new SV*. +/* S_new_constant(): do any overload::constant lookup. + + Either returns sv, or mortalizes/frees sv and returns a new SV*. Best used as sv=new_constant(..., sv, ...). If s, pv are NULL, calls subroutine with one argument, and is used with error messages only. @@ -9182,6 +9296,54 @@ now_ok: return res; } +PERL_STATIC_INLINE void +S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { + dVAR; + PERL_ARGS_ASSERT_PARSE_IDENT; + + for (;;) { + if (*d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { + /* The UTF-8 case must come first, otherwise things + * like c\N{COMBINING TILDE} would start failing, as the + * isWORDCHAR_A case below would gobble the 'c' up. + */ + + char *t = *s + UTF8SKIP(*s); + while (isIDCONT_utf8((U8*)t)) + t += UTF8SKIP(t); + if (*d + (t - *s) > e) + Perl_croak(aTHX_ "%s", ident_too_long); + Copy(*s, *d, t - *s, char); + *d += t - *s; + *s = t; + } + else if ( isWORDCHAR_A(**s) ) { + do { + *(*d)++ = *(*s)++; + } while isWORDCHAR_A(**s); + } + else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { + *(*d)++ = ':'; + *(*d)++ = ':'; + (*s)++; + } + else if (allow_package && **s == ':' && (*s)[1] == ':' + /* Disallow things like Foo::$bar. For the curious, this is + * the code path that triggers the "Bad name after" warning + * when looking for barewords. + */ + && (*s)[2] != '$') { + *(*d)++ = *(*s)++; + *(*d)++ = *(*s)++; + } + else + break; + } + return; +} + /* Returns a NUL terminated string, with the length of the string written to *slp */ @@ -9191,44 +9353,14 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN dVAR; char *d = dest; char * const e = d + destlen - 3; /* two-character token, ending NUL */ + bool is_utf8 = cBOOL(UTF); PERL_ARGS_ASSERT_SCAN_WORD; - for (;;) { - if (d >= e) - Perl_croak(aTHX_ ident_too_long); - if (isWORDCHAR(*s) - || (!UTF && isALPHANUMERIC_L1(*s))) /* UTF handled below */ - { - *d++ = *s++; - } - else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) { - *d++ = ':'; - *d++ = ':'; - s++; - } - else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) { - *d++ = *s++; - *d++ = *s++; - } - else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - size_t len; - while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - len = t - s; - if (d + len > e) - Perl_croak(aTHX_ ident_too_long); - Copy(s, d, len, char); - d += len; - s = t; - } - else { - *d = '\0'; - *slp = d - dest; - return s; - } - } + parse_ident(&s, &d, e, allow_package, is_utf8); + *d = '\0'; + *slp = d - dest; + return s; } STATIC char * @@ -9239,6 +9371,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck char funny = *s++; char *d = dest; char * const e = d + destlen - 3; /* two-character token, ending NUL */ + bool is_utf8 = cBOOL(UTF); PERL_ARGS_ASSERT_SCAN_IDENT; @@ -9247,57 +9380,50 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck if (isDIGIT(*s)) { while (isDIGIT(*s)) { if (d >= e) - Perl_croak(aTHX_ ident_too_long); + Perl_croak(aTHX_ "%s", ident_too_long); *d++ = *s++; } } else { - for (;;) { - if (d >= e) - Perl_croak(aTHX_ ident_too_long); - if (isWORDCHAR(*s)) /* UTF handled below */ - *d++ = *s++; - else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) { - *d++ = ':'; - *d++ = ':'; - s++; - } - else if (*s == ':' && s[1] == ':') { - *d++ = *s++; - *d++ = *s++; - } - else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - if (d + (t - s) > e) - Perl_croak(aTHX_ ident_too_long); - Copy(s, d, t - s, char); - d += t - s; - s = t; - } - else - break; - } + parse_ident(&s, &d, e, 1, is_utf8); } *d = '\0'; d = dest; if (*d) { + /* Either a digit variable, or parse_ident() found an identifier + (anything valid as a bareword), so job done and return. */ if (PL_lex_state != LEX_NORMAL) PL_lex_state = LEX_INTERPENDMAYBE; return s; } if (*s == '$' && s[1] && - (isWORDCHAR_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) + (isIDFIRST_lazy_if(s+1,is_utf8) + || isDIGIT_A((U8)s[1]) + || s[1] == '$' + || s[1] == '{' + || strnEQ(s+1,"::",2)) ) { + /* Dereferencing a value in a scalar variable. + The alternatives are different syntaxes for a scalar variable. + Using ' as a leading package separator isn't allowed. :: is. */ return s; } + /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ if (*s == '{') { bracket = s; s++; + while (s < send && SPACE_OR_TAB(*s)) + s++; } - if (s < send) { - if (UTF) { + +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ + || isCNTRL_A((U8)(d)) \ + || isDIGIT_A((U8)(d)) \ + || (!(u) && !UTF8_IS_INVARIANT((U8)(d)))) + if (s < send + && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) + { + if (is_utf8) { const STRLEN skip = UTF8SKIP(s); STRLEN i; d[skip] = '\0'; @@ -9309,45 +9435,29 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck d[1] = '\0'; } } + /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ if (*d == '^' && *s && isCONTROLVAR(*s)) { *d = toCTRL(*s); s++; } + /* Warn about ambiguous code after unary operators if {...} notation isn't + used. There's no difference in ambiguity; it's merely a heuristic + about when not to warn. */ else if (ck_uni && !bracket) check_uni(); if (bracket) { - if (isSPACE(s[-1])) { - while (s < send) { - const char ch = *s++; - if (!SPACE_OR_TAB(ch)) { - *d = ch; - break; - } - } - } - if (isIDFIRST_lazy_if(d,UTF)) { - d += UTF8SKIP(d); - if (UTF) { - char *end = s; - while ((end < send && isWORDCHAR_lazy_if(end,UTF)) || *end == ':') { - end += UTF8SKIP(end); - while (end < send && UTF8_IS_CONTINUED(*end) && _is_utf8_mark((U8*)end)) - end += UTF8SKIP(end); - } - Copy(s, d, end - s, char); - d += end - s; - s = end; - } - else { - while ((isWORDCHAR(*s) || *s == ':') && d < e) - *d++ = *s++; - if (d >= e) - Perl_croak(aTHX_ ident_too_long); - } + /* If we were processing {...} notation then... */ + if (isIDFIRST_lazy_if(d,is_utf8)) { + /* if it starts as a valid identifier, assume that it is one. + (the later check for } being at the expected point will trap + cases where this doesn't pan out.) */ + d += is_utf8 ? UTF8SKIP(d) : 1; + parse_ident(&s, &d, e, 1, is_utf8); *d = '\0'; while (s < send && SPACE_OR_TAB(*s)) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { + /* ${foo[0]} and ${foo{bar}} notation. */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { const char * const brack = (const char *) @@ -9365,7 +9475,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } /* Handle extended ${^Foo} variables * 1999-02-27 mjd-perl-patch@plover.com */ - else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */ + else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ && isWORDCHAR(*s)) { d++; @@ -9373,9 +9483,15 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck *d++ = *s++; } if (d >= e) - Perl_croak(aTHX_ ident_too_long); + Perl_croak(aTHX_ "%s", ident_too_long); *d = '\0'; } + + while (s < send && SPACE_OR_TAB(*s)) + s++; + + /* Expect to find a closing } after consuming any trailing whitespace. + */ if (*s == '}') { s++; if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { @@ -9385,10 +9501,10 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck if (PL_lex_state == LEX_NORMAL) { if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest, 0) - || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0))) + || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) { SV *tmp = newSVpvn_flags( dest, d - dest, - SVs_TEMP | (UTF ? SVf_UTF8 : 0) ); + SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); if (funny == '#') funny = '@'; Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), @@ -9398,6 +9514,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } } else { + /* Didn't find the closing } at the point we expected, so restore + state such that the next thing to process is the opening { and */ s = bracket; /* let the parser handle it */ *dest = '\0'; } @@ -9508,7 +9626,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing); + char *s; const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9518,9 +9636,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; - /* this was only needed for the initial scan_str; set it to false - * so that any (?{}) code blocks etc are parsed normally */ - PL_reg_state.re_reparsing = FALSE; + s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), + TRUE /* look for escaped bracketed metas */ ); + if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -9611,7 +9729,8 @@ S_scan_subst(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE, + TRUE /* look for escaped bracketed metas */ ); if (!s) Perl_croak(aTHX_ "Substitution pattern not terminated"); @@ -9629,7 +9748,7 @@ S_scan_subst(pTHX_ char *start) #endif first_start = PL_multi_start; - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9715,7 +9834,7 @@ S_scan_trans(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Transliteration pattern not terminated"); @@ -9731,7 +9850,7 @@ S_scan_trans(pTHX_ char *start) } #endif - s = scan_str(s,!!PL_madskills,FALSE,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9972,12 +10091,12 @@ S_scan_heredoc(pTHX_ char *s) linestr = shared->ls_linestr; bufend = SvEND(linestr); d = s; - while (s < bufend && - (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) { + while (s < bufend - len + 1 && + memNE(s,PL_tokenbuf,len) ) { if (*s++ == '\n') ++shared->herelines; } - if (s >= bufend) { + if (s >= bufend - len + 1) { goto interminable; } sv_setpvn(tmpstr,d+1,s-d); @@ -10058,8 +10177,11 @@ S_scan_heredoc(pTHX_ char *s) } CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { - lex_grow_linestr(SvCUR(PL_linestr) + 2); + s = lex_grow_linestr(SvLEN(PL_linestr) + 3); + /* ^That should be enough to avoid this needing to grow: */ sv_catpvs(PL_linestr, "\n\0"); + assert(s == SvPVX(PL_linestr)); + PL_bufend = SvEND(PL_linestr); } s = PL_bufptr; #ifdef PERL_MAD @@ -10180,7 +10302,7 @@ S_scan_inputsymbol(pTHX_ char *start) if (d - PL_tokenbuf != len) { pl_yylval.ival = OP_GLOB; - s = scan_str(start,!!PL_madskills,FALSE,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Glob not terminated"); return s; @@ -10322,7 +10444,11 @@ intro_sym: */ STATIC char * -S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) +S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, + bool deprecate_escaped_meta /* Should we issue a deprecation warning + for certain paired metacharacters that + appear escaped within it */ + ) { dVAR; SV *sv; /* scalar value: string */ @@ -10336,6 +10462,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) U8 termstr[UTF8_MAXBYTES]; /* terminating string */ STRLEN termlen; /* length of terminating string */ int last_off = 0; /* last position for nesting bracket */ + char *escaped_open = NULL; #ifdef PERL_MAD int stuffstart; char *tstart; @@ -10382,6 +10509,18 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) PL_multi_close = term; + /* A warning is raised if the input parameter requires it for escaped (by a + * backslash) paired metacharacters {} [] and () when the delimiters are + * those same characters, and the backslash is ineffective. This doesn't + * happen for <>, as they aren't metas. */ + if (deprecate_escaped_meta + && (PL_multi_open == PL_multi_close + || ! ckWARN_d(WARN_DEPRECATED) + || PL_multi_open == '<')) + { + deprecate_escaped_meta = FALSE; + } + /* create a new SV to hold the contents. 79 is the SV's initial length. What a random number. */ sv = newSV_type(SVt_PVIV); @@ -10408,8 +10547,49 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) int offset = s - SvPVX_const(PL_linestr); const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, &offset, (char*)termstr, termlen); - const char * const ns = SvPVX_const(PL_linestr) + offset; - char * const svlast = SvEND(sv) - 1; + const char *ns; + char *svlast; + + if (SvIsCOW(PL_linestr)) { + STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; + STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; + STRLEN last_lop_pos, re_eval_start_pos, s_pos; + char *buf = SvPVX(PL_linestr); + bufend_pos = PL_parser->bufend - buf; + bufptr_pos = PL_parser->bufptr - buf; + oldbufptr_pos = PL_parser->oldbufptr - buf; + oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; + linestart_pos = PL_parser->linestart - buf; + last_uni_pos = PL_parser->last_uni + ? PL_parser->last_uni - buf + : 0; + last_lop_pos = PL_parser->last_lop + ? PL_parser->last_lop - buf + : 0; + re_eval_start_pos = + PL_parser->lex_shared->re_eval_start ? + PL_parser->lex_shared->re_eval_start - buf : 0; + s_pos = s - buf; + + sv_force_normal(PL_linestr); + + buf = SvPVX(PL_linestr); + PL_parser->bufend = buf + bufend_pos; + PL_parser->bufptr = buf + bufptr_pos; + PL_parser->oldbufptr = buf + oldbufptr_pos; + PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; + PL_parser->linestart = buf + linestart_pos; + if (PL_parser->last_uni) + PL_parser->last_uni = buf + last_uni_pos; + if (PL_parser->last_lop) + PL_parser->last_lop = buf + last_lop_pos; + if (PL_parser->lex_shared->re_eval_start) + PL_parser->lex_shared->re_eval_start = + buf + re_eval_start_pos; + s = buf + s_pos; + } + ns = SvPVX_const(PL_linestr) + offset; + svlast = SvEND(sv) - 1; for (; s < ns; s++) { if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) @@ -10520,7 +10700,57 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) if (*s == '\\' && s+1 < PL_bufend) { if (!keep_quoted && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) + { s++; + + /* Here, 'deprecate_escaped_meta' is true iff the + * delimiters are paired metacharacters, and 's' points + * to an occurrence of one of them within the string, + * which was preceded by a backslash. If this is a + * context where the delimiter is also a metacharacter, + * the backslash is useless, and deprecated. () and [] + * are meta in any context. {} are meta only when + * appearing in a quantifier or in things like '\p{' + * (but '\\p{' isn't meta). They also aren't meta + * unless there is a matching closed, escaped char + * later on within the string. If 's' points to an + * open, set a flag; if to a close, test that flag, and + * raise a warning if it was set */ + + if (deprecate_escaped_meta) { + if (*s == PL_multi_open) { + if (*s != '{') { + escaped_open = s; + } + /* Look for a closing '\}' */ + else if (regcurly(s, TRUE)) { + escaped_open = s; + } + /* Look for e.g. '\x{' */ + else if (s - start > 2 + && _generic_isCC(*(s-2), + _CC_BACKSLASH_FOO_LBRACE_IS_META)) + { /* Exclude '\\x', '\\\\x', etc. */ + char *lookbehind = s - 4; + bool is_meta = TRUE; + while (lookbehind >= start + && *lookbehind == '\\') + { + is_meta = ! is_meta; + lookbehind--; + } + if (is_meta) { + escaped_open = s; + } + } + } + else if (escaped_open) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open); + escaped_open = NULL; + } + } + } else *to++ = *s++; } @@ -10886,7 +11116,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) else { /* check for end of fixed-length buffer */ if (d >= e) - Perl_croak(aTHX_ number_too_long); + Perl_croak(aTHX_ "%s", number_too_long); /* if we're ok, copy the character */ *d++ = *s++; } @@ -10916,7 +11146,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) for (; isDIGIT(*s) || *s == '_'; s++) { /* fixed length buffer check */ if (d >= e) - Perl_croak(aTHX_ number_too_long); + Perl_croak(aTHX_ "%s", number_too_long); if (*s == '_') { if (lastub && s == lastub + 1) Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), @@ -10968,7 +11198,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) while (isDIGIT(*s) || *s == '_') { if (isDIGIT(*s)) { if (d >= e) - Perl_croak(aTHX_ number_too_long); + Perl_croak(aTHX_ "%s", number_too_long); *d++ = *s++; } else { @@ -11294,9 +11524,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n", - SVfARG(newSVpvn_flags(context, contlen, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", + UTF8fARG(UTF, contlen, context)); else Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {