X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/acc19697c67fa63c10e07491b670a26c48f4175f..bf1b738b6190342ba8d6c94bea457aa9c7c17d40:/toke.c diff --git a/toke.c b/toke.c index 3493c5b..362aa71 100644 --- a/toke.c +++ b/toke.c @@ -554,13 +554,13 @@ S_no_op(pTHX_ const char *const what, char *s) if (t < PL_bufptr && isSPACE(*t)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\t(Do you need to predeclare %"UTF8f"?)\n", - UTF, (STRLEN)(t - PL_oldoldbufptr), PL_oldoldbufptr); + UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); } else { assert(s >= oldbp); Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\t(Missing operator before %"UTF8f"?)\n", - UTF, (STRLEN)(s - oldbp), oldbp); + UTF8fARG(UTF, s - oldbp, oldbp)); } } PL_bufptr = oldbp; @@ -753,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->linestr = flags & LEX_START_COPIED ? SvREFCNT_inc_simple_NN(line) : newSVpvn_flags(s, len, SvUTF8(line)); - sv_catpvs(parser->linestr, "\n;"); + sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2); } else { - parser->linestr = newSVpvs("\n;"); + parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); } parser->oldoldbufptr = parser->oldbufptr = @@ -1512,14 +1512,16 @@ chunk will not be discarded. =cut */ +#define LEX_NO_INCLINE 0x40000000 #define LEX_NO_NEXT_CHUNK 0x80000000 void Perl_lex_read_space(pTHX_ U32 flags) { char *s, *bufend; + const bool can_incline = !(flags & LEX_NO_INCLINE); bool need_incline = 0; - if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK)) + if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); #ifdef PERL_MAD if (PL_skipwhite) { @@ -1539,11 +1541,13 @@ Perl_lex_read_space(pTHX_ U32 flags) } while (!(c == '\n' || (c == 0 && s == bufend))); } else if (c == '\n') { s++; - PL_parser->linestart = s; - if (s == bufend) - need_incline = 1; - else - incline(s); + if (can_incline) { + PL_parser->linestart = s; + if (s == bufend) + need_incline = 1; + else + incline(s); + } } else if (isSPACE(c)) { s++; } else if (c == 0 && s == bufend) { @@ -1555,14 +1559,14 @@ Perl_lex_read_space(pTHX_ U32 flags) if (flags & LEX_NO_NEXT_CHUNK) break; PL_parser->bufptr = s; - COPLINE_INC_WITH_HERELINES; + if (can_incline) COPLINE_INC_WITH_HERELINES; got_more = lex_next_chunk(flags); - CopLINE_dec(PL_curcop); + if (can_incline) CopLINE_dec(PL_curcop); s = PL_parser->bufptr; bufend = PL_parser->bufend; if (!got_more) break; - if (need_incline && PL_parser->rsfp) { + if (can_incline && need_incline && PL_parser->rsfp) { incline(s); need_incline = 0; } @@ -1578,6 +1582,107 @@ Perl_lex_read_space(pTHX_ U32 flags) } /* + +=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn + +This function performs syntax checking on a prototype, C. +If C is true, any illegal characters or mismatched brackets +will trigger illegalproto warnings, declaring that they were +detected in the prototype for C. + +The return value is C if this is a valid prototype, and +C if it is not, regardless of whether C was C or +C. + +Note that C is a valid C and will always return C. + +=cut + + */ + +bool +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +{ + STRLEN len, origlen; + char *p = proto ? SvPV(proto, len) : NULL; + bool bad_proto = FALSE; + bool in_brackets = FALSE; + bool after_slash = FALSE; + char greedy_proto = ' '; + bool proto_after_greedy_proto = FALSE; + bool must_be_last = FALSE; + bool underscore = FALSE; + bool bad_proto_after_underscore = FALSE; + + PERL_ARGS_ASSERT_VALIDATE_PROTO; + + if (!proto) + return TRUE; + + origlen = len; + for (; len--; p++) { + if (!isSPACE(*p)) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (underscore) { + if (!strchr(";@%", *p)) + bad_proto_after_underscore = TRUE; + underscore = FALSE; + } + if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { + bad_proto = TRUE; + } + else { + if (*p == '[') + in_brackets = TRUE; + else if (*p == ']') + in_brackets = FALSE; + else if ((*p == '@' || *p == '%') && + !after_slash && + !in_brackets ) { + must_be_last = TRUE; + greedy_proto = *p; + } + else if (*p == '_') + underscore = TRUE; + } + if (*p == '\\') + after_slash = TRUE; + else + after_slash = FALSE; + } + } + + if (warn) { + SV *tmpsv = newSVpvs_flags("", SVs_TEMP); + p -= origlen; + p = SvUTF8(proto) + ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), + origlen, UNI_DISPLAY_ISPRINT) + : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Prototype after '%c' for %"SVf" : %s", + greedy_proto, SVfARG(name), p); + if (in_brackets) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Missing ']' in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character in prototype for %"SVf" : %s", + SVfARG(name), p); + if (bad_proto_after_underscore) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character after '_' in prototype for %"SVf" : %s", + SVfARG(name), p); + } + + return (! (proto_after_greedy_proto || bad_proto) ); +} + +/* * S_incline * This subroutine has nothing to do with tilting, whether at windmills * or pinball tables. Its name is short for "increment line". It @@ -1648,37 +1753,16 @@ S_incline(pTHX_ const char *s) if (t - s > 0) { const STRLEN len = t - s; - SV *const temp_sv = CopFILESV(PL_curcop); - const char *cf; - STRLEN tmplen; - - if (temp_sv) { - cf = SvPVX(temp_sv); - tmplen = SvCUR(temp_sv); - } else { - cf = NULL; - tmplen = 0; - } if (!PL_rsfp && !PL_parser->filtered) { /* must copy *{"::_<(eval N)[oldfilename:L]"} * to *{"::_ 0) { @@ -1725,7 +1805,6 @@ S_incline(pTHX_ const char *s) if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); } - if (tmpbuf != smallbuf) Safefree(tmpbuf); } CopFILE_free(PL_curcop); CopFILE_setn(PL_curcop, s, len); @@ -1733,6 +1812,8 @@ S_incline(pTHX_ const char *s) CopLINE_set(PL_curcop, line_num); } +#define skipspace(s) skipspace_flags(s, 0) + #ifdef PERL_MAD /* skip space before PL_thistoken */ @@ -1788,13 +1869,11 @@ STATIC char * S_skipspace2(pTHX_ char *s, SV **svp) { char *start; - const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr); const I32 startoff = s - SvPVX(PL_linestr); PERL_ARGS_ASSERT_SKIPSPACE2; s = skipspace(s); - PL_bufptr = SvPVX(PL_linestr) + bufptroff; if (!PL_madskills || !svp) return s; start = SvPVX(PL_linestr) + startoff; @@ -1822,7 +1901,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) if (av) { SV * const sv = newSV_type(SVt_PVMG); if (orig_sv) - sv_setsv(sv, orig_sv); + sv_setsv_flags(sv, orig_sv, 0); /* no cow */ else sv_setpvn(sv, buf, len); (void)SvIOK_on(sv); @@ -1838,12 +1917,12 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) */ STATIC char * -S_skipspace(pTHX_ char *s) +S_skipspace_flags(pTHX_ char *s, U32 flags) { #ifdef PERL_MAD char *start = s; #endif /* PERL_MAD */ - PERL_ARGS_ASSERT_SKIPSPACE; + PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; #ifdef PERL_MAD if (PL_skipwhite) { sv_free(PL_skipwhite); @@ -1856,7 +1935,7 @@ S_skipspace(pTHX_ char *s) } else { STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); PL_bufptr = s; - lex_read_space(LEX_KEEP_PREVIOUS | + lex_read_space(flags | LEX_KEEP_PREVIOUS | (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? LEX_NO_NEXT_CHUNK : 0)); s = PL_bufptr; @@ -2469,7 +2548,7 @@ S_sublex_start(pTHX) return THING; } else if (op_type == OP_BACKTICK && PL_lex_op) { - /* readpipe() vas overriden */ + /* readpipe() was overridden */ cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff); pl_yylval.opval = PL_lex_op; PL_lex_op = NULL; @@ -3178,12 +3257,12 @@ S_scan_const(pTHX_ char *start) * char, which will be done separately. * Stop on (?{..}) and friends */ - else if (*s == '(' && PL_lex_inpat && s[1] == '?') { + else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') *d++ = NATIVE_TO_NEED(has_utf8,*s++); } - else if (!PL_lex_casemods && !in_charclass && + else if (!PL_lex_casemods && ( s[2] == '{' /* This should match regcomp.c */ || (s[2] == '?' && s[3] == '{'))) { @@ -3192,7 +3271,7 @@ S_scan_const(pTHX_ char *start) } /* likewise skip #-initiated comments in //x patterns */ - else if (*s == '#' && PL_lex_inpat && + else if (*s == '#' && PL_lex_inpat && !in_charclass && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { while (s+1 < send && *s != '\n') *d++ = NATIVE_TO_NEED(has_utf8,*s++); @@ -4638,6 +4717,7 @@ Perl_yylex(pTHX) char *d; STRLEN len; bool bof = FALSE; + const bool saw_infix_sigil = PL_parser->saw_infix_sigil; U8 formbrack = 0; U32 fake_eof = 0; @@ -5037,9 +5117,12 @@ Perl_yylex(pTHX) return yylex(); } + /* We really do *not* want PL_linestr ever becoming a COW. */ + assert (!SvIsCOW(PL_linestr)); s = PL_bufptr; PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; + PL_parser->saw_infix_sigil = 0; retry: #ifdef PERL_MAD @@ -5515,7 +5598,11 @@ Perl_yylex(pTHX) while (d < PL_bufend && *d != '\n') d++; if (d < PL_bufend) + { d++; + if (d < PL_bufend) + incline(s); + } else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ Perl_croak(aTHX_ "panic: input overflow"); if (PL_madskills && CopLINE(PL_curcop) >= 1) { @@ -5533,8 +5620,17 @@ Perl_yylex(pTHX) PL_bufend = s; */ } #else - *s = '\0'; - PL_bufend = s; + while (s < PL_bufend && *s != '\n') + s++; + if (s < PL_bufend) + { + s++; + if (s < PL_bufend) + incline(s); + } + + else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ + Perl_croak(aTHX_ "panic: input overflow"); #endif } goto retry; @@ -5695,6 +5791,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; Mop(OP_MULTIPLY); case '%': @@ -5703,6 +5800,7 @@ Perl_yylex(pTHX) PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) TOKEN(0); ++s; + PL_parser->saw_infix_sigil = 1; Mop(OP_MODULO); } PL_tokenbuf[0] = '%'; @@ -6196,6 +6294,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; BAop(OP_BIT_AND); } @@ -6500,7 +6599,7 @@ Perl_yylex(pTHX) && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "You need to quote \"%"UTF8f"\"", - UTF, len, tmpbuf); + UTF8fARG(UTF, len, tmpbuf)); } } } @@ -6586,8 +6685,8 @@ Perl_yylex(pTHX) /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Scalar value %"UTF8f" better written as $%"UTF8f, - UTF, (STRLEN)(t-PL_bufptr), PL_bufptr, - UTF, (STRLEN)(t-PL_bufptr-1), PL_bufptr+1); + UTF8fARG(UTF, t-PL_bufptr, PL_bufptr), + UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1)); } } } @@ -6859,6 +6958,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { + fat_arrow: CLINE; pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, @@ -6992,6 +7092,18 @@ Perl_yylex(pTHX) } } + if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ + && (!anydelim || *s != '#')) { + /* no override, and not s### either; skipspace is safe here + * check for => on following line */ + STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); + STRLEN soff = s - SvPVX(PL_linestr); + s = skipspace_flags(s, LEX_NO_INCLINE); + if (*s == '=' && s[1] == '>') goto fat_arrow; + PL_bufptr = SvPVX(PL_linestr) + bufoff; + s = SvPVX(PL_linestr) + soff; + } + reserved_word: switch (tmp) { @@ -7031,7 +7143,7 @@ Perl_yylex(pTHX) TRUE, &morelen); if (!morelen) Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", - UTF, len, PL_tokenbuf, + UTF8fARG(UTF, len, PL_tokenbuf), *s == '\'' ? "'" : "::"); len += morelen; pkgname = 1; @@ -7059,7 +7171,7 @@ Perl_yylex(pTHX) && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) Perl_warner(aTHX_ packWARN(WARN_BAREWORD), "Bareword \"%"UTF8f"\" refers to nonexistent package", - UTF, len, PL_tokenbuf); + UTF8fARG(UTF, len, PL_tokenbuf)); len -= 2; PL_tokenbuf[len] = '\0'; gv = NULL; @@ -7192,7 +7304,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv(cv))) { + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { s = d + 1; goto its_constant; } @@ -7252,16 +7364,23 @@ Perl_yylex(pTHX) const STRLEN l = len ? len : strlen(PL_tokenbuf); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", - UTF, l, PL_tokenbuf, UTF, l, PL_tokenbuf); + UTF8fARG(UTF, l, PL_tokenbuf), + UTF8fARG(UTF, l, PL_tokenbuf)); } /* Check for a constant sub */ - if ((sv = cv_const_sv(cv))) { + if ((sv = cv_const_sv_or_av(cv))) { its_constant: op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - pl_yylval.opval->op_private = OPpCONST_FOLDED; - pl_yylval.opval->op_flags |= OPf_SPECIAL; + if (SvTYPE(sv) == SVt_PVAV) + pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, + pl_yylval.opval); + else { + pl_yylval.opval->op_private = OPpCONST_FOLDED; + pl_yylval.opval->op_folded = 1; + pl_yylval.opval->op_flags |= OPf_SPECIAL; + } TOKEN(WORD); } @@ -7281,6 +7400,7 @@ Perl_yylex(pTHX) STRLEN protolen = CvPROTOLEN(cv); const char *proto = CvPROTO(cv); bool optional; + proto = S_strip_spaces(aTHX_ proto, &protolen); if (!protolen) TERM(FUNC0SUB); if ((optional = *proto == ';')) @@ -7425,11 +7545,13 @@ Perl_yylex(pTHX) op_free(rv2cv_op); safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && saw_infix_sigil) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Operator or semicolon missing before %c%"UTF8f, - lastchar, UTF, strlen(PL_tokenbuf), - PL_tokenbuf); + lastchar, + UTF8fARG(UTF, strlen(PL_tokenbuf), + PL_tokenbuf)); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); @@ -7590,7 +7712,7 @@ Perl_yylex(pTHX) } if (!tmp) Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", - UTF, len, PL_tokenbuf); + UTF8fARG(UTF, len, PL_tokenbuf)); if (tmp < 0) tmp = -tmp; else if (tmp == KEY_require || tmp == KEY_do @@ -8146,11 +8268,9 @@ Perl_yylex(pTHX) && !(t[0] == ':' && t[1] == ':') && !keyword(s, d-s, 0) ) { - SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)); Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", - UTF, (STRLEN)(d-s), s, UTF, (STRLEN)(d-s), s); + UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); } } LOP(OP_OPEN,XTERM); @@ -8498,7 +8618,9 @@ Perl_yylex(pTHX) expectation attrful; bool have_name, have_proto; const int key = tmp; +#ifndef PERL_MAD SV *format_name = NULL; +#endif #ifdef PERL_MAD SV *tmpwhite = 0; @@ -8530,9 +8652,10 @@ Perl_yylex(pTHX) #ifdef PERL_MAD if (PL_madskills) nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); -#endif +#else if (key == KEY_format) format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); +#endif *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub || pad_findmy_pvn( @@ -8581,8 +8704,6 @@ Perl_yylex(pTHX) #else if (format_name) { start_force(PL_curforce); - if (PL_madskills) - curmad('X', newSVpvn(start,s-start)); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST,0, format_name); NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; @@ -8594,78 +8715,10 @@ Perl_yylex(pTHX) /* Look for a prototype */ if (*s == '(') { - char *p; - bool bad_proto = FALSE; - bool in_brackets = FALSE; - char greedy_proto = ' '; - bool proto_after_greedy_proto = FALSE; - bool must_be_last = FALSE; - bool underscore = FALSE; - bool seen_underscore = FALSE; - const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); - STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - /* strip spaces and check for bad characters */ - d = SvPV(PL_lex_stuff, tmplen); - tmp = 0; - for (p = d; tmplen; tmplen--, ++p) { - if (!isSPACE(*p)) { - d[tmp++] = *p; - - if (warnillegalproto) { - if (must_be_last) - proto_after_greedy_proto = TRUE; - if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { - bad_proto = TRUE; - } - else { - if ( underscore ) { - if ( !strchr(";@%", *p) ) - bad_proto = TRUE; - underscore = FALSE; - } - if ( *p == '[' ) { - in_brackets = TRUE; - } - else if ( *p == ']' ) { - in_brackets = FALSE; - } - else if ( (*p == '@' || *p == '%') && - ( tmp < 2 || d[tmp-2] != '\\' ) && - !in_brackets ) { - must_be_last = TRUE; - greedy_proto = *p; - } - else if ( *p == '_' ) { - underscore = seen_underscore = TRUE; - } - } - } - } - } - d[tmp] = '\0'; - if (proto_after_greedy_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Prototype after '%c' for %"SVf" : %s", - greedy_proto, SVfARG(PL_subname), d); - if (bad_proto) { - SV *dsv = newSVpvs_flags("", SVs_TEMP); - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character %sin prototype for %"SVf" : %s", - seen_underscore ? "after '_' " : "", - SVfARG(PL_subname), - SvUTF8(PL_lex_stuff) - ? sv_uni_display(dsv, - newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), - tmp, - UNI_DISPLAY_ISPRINT) - : pv_pretty(dsv, d, tmp, 60, NULL, NULL, - PERL_PV_ESCAPE_NONASCII)); - } - SvCUR_set(PL_lex_stuff, tmp); + (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); have_proto = TRUE; #ifdef PERL_MAD @@ -9004,7 +9057,7 @@ S_pending_ident(pTHX) Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Possible unintended interpolation of %"UTF8f " in string", - UTF, tokenbuf_len, PL_tokenbuf); + UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); } } @@ -10001,7 +10054,7 @@ S_scan_heredoc(pTHX_ char *s) /* shared is only null if we have gone beyond the outermost lexing scope. In a file, we will have broken out of the loop in the previous iteration. In an eval, the string buf- - fer ends with "\n;", so the while condition below will have + fer ends with "\n;", so the while condition above will have evaluated to false. So shared can never be null. */ assert(shared); /* A LEXSHARED struct with a null ls_prev pointer is the outer- @@ -10105,8 +10158,11 @@ S_scan_heredoc(pTHX_ char *s) } CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { - lex_grow_linestr(SvCUR(PL_linestr) + 2); + s = lex_grow_linestr(SvLEN(PL_linestr) + 3); + /* ^That should be enough to avoid this needing to grow: */ sv_catpvs(PL_linestr, "\n\0"); + assert(s == SvPVX(PL_linestr)); + PL_bufend = SvEND(PL_linestr); } s = PL_bufptr; #ifdef PERL_MAD @@ -10472,8 +10528,49 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, int offset = s - SvPVX_const(PL_linestr); const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, &offset, (char*)termstr, termlen); - const char * const ns = SvPVX_const(PL_linestr) + offset; - char * const svlast = SvEND(sv) - 1; + const char *ns; + char *svlast; + + if (SvIsCOW(PL_linestr)) { + STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; + STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; + STRLEN last_lop_pos, re_eval_start_pos, s_pos; + char *buf = SvPVX(PL_linestr); + bufend_pos = PL_parser->bufend - buf; + bufptr_pos = PL_parser->bufptr - buf; + oldbufptr_pos = PL_parser->oldbufptr - buf; + oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; + linestart_pos = PL_parser->linestart - buf; + last_uni_pos = PL_parser->last_uni + ? PL_parser->last_uni - buf + : 0; + last_lop_pos = PL_parser->last_lop + ? PL_parser->last_lop - buf + : 0; + re_eval_start_pos = + PL_parser->lex_shared->re_eval_start ? + PL_parser->lex_shared->re_eval_start - buf : 0; + s_pos = s - buf; + + sv_force_normal(PL_linestr); + + buf = SvPVX(PL_linestr); + PL_parser->bufend = buf + bufend_pos; + PL_parser->bufptr = buf + bufptr_pos; + PL_parser->oldbufptr = buf + oldbufptr_pos; + PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; + PL_parser->linestart = buf + linestart_pos; + if (PL_parser->last_uni) + PL_parser->last_uni = buf + last_uni_pos; + if (PL_parser->last_lop) + PL_parser->last_lop = buf + last_lop_pos; + if (PL_parser->lex_shared->re_eval_start) + PL_parser->lex_shared->re_eval_start = + buf + re_eval_start_pos; + s = buf + s_pos; + } + ns = SvPVX_const(PL_linestr) + offset; + svlast = SvEND(sv) - 1; for (; s < ns; s++) { if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) @@ -10594,26 +10691,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, * context where the delimiter is also a metacharacter, * the backslash is useless, and deprecated. () and [] * are meta in any context. {} are meta only when - * appearing in a quantifier or in things like '\p{'. - * They also aren't meta unless there is a matching - * closed, escaped char later on within the string. - * If 's' points to an open, set a flag; if to a close, - * test that flag, and raise a warning if it was set */ + * appearing in a quantifier or in things like '\p{' + * (but '\\p{' isn't meta). They also aren't meta + * unless there is a matching closed, escaped char + * later on within the string. If 's' points to an + * open, set a flag; if to a close, test that flag, and + * raise a warning if it was set */ if (deprecate_escaped_meta) { if (*s == PL_multi_open) { if (*s != '{') { escaped_open = s; } - else if (regcurly(s, - TRUE /* Look for a closing - '\}' */) - || (s - start > 2 /* Look for e.g. - '\x{' */ - && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META))) - { + /* Look for a closing '\}' */ + else if (regcurly(s, TRUE)) { escaped_open = s; } + /* Look for e.g. '\x{' */ + else if (s - start > 2 + && _generic_isCC(*(s-2), + _CC_BACKSLASH_FOO_LBRACE_IS_META)) + { /* Exclude '\\x', '\\\\x', etc. */ + char *lookbehind = s - 4; + bool is_meta = TRUE; + while (lookbehind >= start + && *lookbehind == '\\') + { + is_meta = ! is_meta; + lookbehind--; + } + if (is_meta) { + escaped_open = s; + } + } } else if (escaped_open) { Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), @@ -11396,7 +11506,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", - UTF, contlen, context); + UTF8fARG(UTF, contlen, context)); else Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {