X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/82a7b38ebca5ead4666341f73512665b295afd51..fedc1b0e2d9cec34b7e3b1fa65dd0f7eb4f539fd:/toke.c diff --git a/toke.c b/toke.c index f48fa28..0eeafd4 100644 --- a/toke.c +++ b/toke.c @@ -99,9 +99,9 @@ static const char* const ident_too_long = "Identifier too long"; #define XFAKEBRACK 0x80 #ifdef USE_UTF8_SCRIPTS -# define UTF (!IN_BYTES) +# define UTF cBOOL(!IN_BYTES) #else -# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) +# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) #endif /* The maximum number of characters preceding the unrecognized one to display */ @@ -186,6 +186,7 @@ static const char* const lex_state_names[] = { * FUN1 : not used, except for not, which isn't a UNIOP * BOop : bitwise or or xor * BAop : bitwise and + * BCop : bitwise complement * SHop : shift operator * PWop : power operator * PMop : pattern-matching operator @@ -206,7 +207,7 @@ static const char* const lex_state_names[] = { #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) -#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) +#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) @@ -220,14 +221,16 @@ static const char* const lex_state_names[] = { #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) -#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) -#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) -#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) -#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) +#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) +#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) +#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ + REPORT('~') +#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) +#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) -#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) +#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) -#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) +#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) @@ -399,7 +402,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) } if (name) Perl_sv_catpv(aTHX_ report, name); - else if ((char)rv > ' ' && (char)rv <= '~') + else if (isGRAPH(rv)) { Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); if ((char)rv == 'p') @@ -486,7 +489,7 @@ S_ao(pTHX_ int toketype) pl_yylval.ival = OP_DORASSIGN; toketype = ASSIGNOP; } - return toketype; + return REPORT(toketype); } /* @@ -500,6 +503,9 @@ S_ao(pTHX_ int toketype) * It prints "Missing operator before end of line" if there's nothing * after the missing operator, or "... before <...>" if there is something * after the missing operator. + * + * PL_bufptr is expected to point to the start of the thing that was found, + * and s after the next token or partial token. */ STATIC void @@ -730,7 +736,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; - assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES + STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES |LEX_DONT_CLOSE_RSFP)); parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES |LEX_DONT_CLOSE_RSFP)); @@ -1246,7 +1252,7 @@ buffer has reached the end of the input text. */ #define LEX_FAKE_EOF 0x80000000 -#define LEX_NO_TERM 0x40000000 +#define LEX_NO_TERM 0x40000000 /* here-doc */ bool Perl_lex_next_chunk(pTHX_ U32 flags) @@ -1260,6 +1266,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags) bool got_some; if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); + if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) + return FALSE; linestr = PL_parser->linestr; buf = SvPVX(linestr); if (!(flags & LEX_KEEP_PREVIOUS) && @@ -1514,6 +1522,8 @@ Perl_lex_read_space(pTHX_ U32 flags) incline(s); need_incline = 0; } + } else if (!c) { + s++; } else { break; } @@ -1790,13 +1800,13 @@ S_skipspace_flags(pTHX_ char *s, U32 flags) { PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - while (s < PL_bufend && SPACE_OR_TAB(*s)) + while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) s++; } else { STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); PL_bufptr = s; lex_read_space(flags | LEX_KEEP_PREVIOUS | - (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ? + (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? LEX_NO_NEXT_CHUNK : 0)); s = PL_bufptr; PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; @@ -1899,6 +1909,7 @@ S_force_next(pTHX_ I32 type) tokereport(type, &NEXTVAL_NEXTTOKE); } #endif + assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); PL_nexttype[PL_nexttoke] = type; PL_nexttoke++; if (PL_lex_state != LEX_KNOWNEXT) { @@ -1968,7 +1979,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) SV * const sv = newSVpvn_utf8(start, len, !IN_BYTES && UTF - && !is_ascii_string((const U8*)start, len) + && !is_invariant_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -2005,9 +2016,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); if (check_keyword) { char *s2 = PL_tokenbuf; + STRLEN len2 = len; if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) - s2 += 6, len -= 6; - if (keyword(s2, len, 0)) + s2 += 6, len2 -= 6; + if (keyword(s2, len2, 0)) return start; } if (token == METHOD) { @@ -2272,7 +2284,9 @@ S_sublex_start(pTHX) return THING; } if (op_type == OP_CONST) { - SV *sv = tokeq(PL_lex_stuff); + SV *sv = PL_lex_stuff; + PL_lex_stuff = NULL; + sv = tokeq(sv); if (SvTYPE(sv) == SVt_PVIV) { /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ @@ -2283,7 +2297,6 @@ S_sublex_start(pTHX) sv = nsv; } pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); - PL_lex_stuff = NULL; return THING; } @@ -2363,6 +2376,13 @@ S_sublex_push(pTHX) PL_lex_stuff = NULL; PL_sublex_info.repl = NULL; + /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets + set for an inner quote-like operator and then an error causes scope- + popping. We must not have a PL_lex_stuff value left dangling, as + that breaks assumptions elsewhere. See bug #123617. */ + SAVEGENERICSV(PL_lex_stuff); + SAVEGENERICSV(PL_sublex_info.repl); + PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); @@ -2457,7 +2477,7 @@ S_sublex_done(pTHX) + PL_parser->herelines; PL_parser->herelines = 0; } - return ','; + return '/'; } else { const line_t l = CopLINE(PL_curcop); @@ -2467,7 +2487,6 @@ S_sublex_done(pTHX) PL_bufend = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); PL_expect = XOPERATOR; - PL_sublex_info.sub_inwhat = 0; return ')'; } } @@ -2491,6 +2510,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; + if (!SvCUR(res)) + return res; + if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, e - backslash_ptr, &first_bad_char_loc)) @@ -3084,6 +3106,7 @@ S_scan_const(pTHX_ char *start) * symbol meaning, e.g. \x{2E} would be confused with a dot. But * in spite of this, we do have to process \N here while the proper * charnames handler is in scope. See bugs #56444 and #62056. + * * There is a complication because \N in a pattern may also stand * for 'match a non-nl', and not mean a charname, in which case its * processing should be deferred to the regex compiler. To be a @@ -3188,9 +3211,13 @@ S_scan_const(pTHX_ char *start) SvPOK_on(sv); *d = '\0'; /* See Note on sizing above. */ - sv_utf8_upgrade_flags_grow(sv, - SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - UNISKIP(uv) + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow( + sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE + /* Above-latin1 in string + * implies no encoding */ + |SV_UTF8_NO_ENCODING, + UNISKIP(uv) + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } @@ -3218,31 +3245,44 @@ S_scan_const(pTHX_ char *start) continue; case 'N': - /* In a non-pattern \N must be a named character, like \N{LATIN - * SMALL LETTER A} or \N{U+0041}. For patterns, it also can - * mean to match a non-newline. For non-patterns, named - * characters are converted to their string equivalents. In - * patterns, named characters are not converted to their - * ultimate forms for the same reasons that other escapes - * aren't. Instead, they are converted to the \N{U+...} form - * to get the value from the charnames that is in effect right - * now, while preserving the fact that it was a named character - * so that the regex compiler knows this */ - - /* The structure of this section of code (besides checking for + /* In a non-pattern \N must be like \N{U+0041}, or it can be a + * named character, like \N{LATIN SMALL LETTER A}, or a named + * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND + * GRAVE}. For convenience all three forms are referred to as + * "named characters" below. + * + * For patterns, \N also can mean to match a non-newline. Code + * before this 'switch' statement should already have handled + * this situation, and hence this code only has to deal with + * the named character cases. + * + * For non-patterns, the named characters are converted to + * their string equivalents. In patterns, named characters are + * not converted to their ultimate forms for the same reasons + * that other escapes aren't. Instead, they are converted to + * the \N{U+...} form to get the value from the charnames that + * is in effect right now, while preserving the fact that it + * was a named character, so that the regex compiler knows + * this. + * + * The structure of this section of code (besides checking for * errors and upgrading to utf8) is: - * Further disambiguate between the two meanings of \N, and if - * not a charname, go process it elsewhere - * If of form \N{U+...}, pass it through if a pattern; - * otherwise convert to utf8 - * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a - * pattern; otherwise convert to utf8 */ - - /* Here, s points to the 'N'; the test below is guaranteed to - * succeed if we are being called on a pattern as we already - * know from a test above that the next character is a '{'. - * On a non-pattern \N must mean 'named sequence, which - * requires braces */ + * If the named character is of the form \N{U+...}, pass it + * through if a pattern; otherwise convert the code point + * to utf8 + * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...} + * if a pattern; otherwise convert to utf8 + * + * If the regex compiler should ever need to differentiate + * between the \N{U+...} and \N{name} forms, that could easily + * be done here by stripping any leading zeros from the + * \N{U+...} case, and adding them to the other one. */ + + /* Here, 's' points to the 'N'; the test below is guaranteed to + * succeed if we are being called on a pattern, as we already + * know from a test above that the next character is a '{'. A + * non-pattern \N must mean 'named character', which requires + * braces */ s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); @@ -3263,43 +3303,45 @@ S_scan_const(pTHX_ char *start) /* Here it looks like a named character */ if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES - | PERL_SCAN_DISALLOW_PREFIX; - STRLEN len; - - /* For \N{U+...}, the '...' is a unicode value even on - * EBCDIC machines */ s += 2; /* Skip to next char after the 'U+' */ - len = e - s; - uv = grok_hex(s, &len, &flags, NULL); - if (len == 0 || len != (STRLEN)(e - s)) { - yyerror("Invalid hexadecimal number in \\N{U+...}"); - s = e + 1; - continue; - } - if (PL_lex_inpat) { - /* On non-EBCDIC platforms, pass through to the regex - * compiler unchanged. The reason we evaluated the - * number above is to make sure there wasn't a syntax - * error. But on EBCDIC we convert to native so - * downstream code can continue to assume it's native - */ - s -= 5; /* Include the '\N{U+' */ -#ifdef EBCDIC - d += my_snprintf(d, e - s + 1 + 1, /* includes the } - and the \0 */ - "\\N{U+%X}", - (unsigned int) UNI_TO_NATIVE(uv)); -#else - Copy(s, d, e - s + 1, char); /* 1 = include the } */ - d += e - s + 1; -#endif + /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ + /* Check the syntax. */ + const char *orig_s; + orig_s = s - 5; + if (!isXDIGIT(*s)) { + bad_NU: + yyerror( + "Invalid hexadecimal number in \\N{U+...}" + ); + s = e + 1; + continue; + } + while (++s < e) { + if (isXDIGIT(*s)) + continue; + else if ((*s == '.' || *s == '_') + && isXDIGIT(s[1])) + continue; + goto bad_NU; + } + + /* Pass everything through unchanged. + * +1 is for the '}' */ + Copy(orig_s, d, e - orig_s + 1, char); + d += e - orig_s + 1; } else { /* Not a pattern: convert the hex to string */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES + | PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_DISALLOW_PREFIX; + STRLEN len = e - s; + uv = grok_hex(s, &len, &flags, NULL); + if (len == 0 || (len != (STRLEN)(e - s))) + goto bad_NU; - /* If destination is not in utf8, unconditionally + /* If the destination is not in utf8, unconditionally * recode it to be so. This is because \N{} implies * Unicode semantics, and scalars have to be in utf8 * to guarantee those semantics */ @@ -3352,13 +3394,18 @@ S_scan_const(pTHX_ char *start) * through the string. Each character takes up * 2 hex digits plus either a trailing dot or * the "}" */ + const char initial_text[] = "\\N{U+"; + const STRLEN initial_len = sizeof(initial_text) + - 1; d = off + SvGROW(sv, off + 3 * len - + 6 /* For the "\N{U+", and - trailing NUL */ + + /* +1 for trailing NUL */ + + initial_len + 1 + + (STRLEN)(send - e)); - Copy("\\N{U+", d, 5, char); - d += 5; + Copy(initial_text, d, initial_len, char); + d += initial_len; while (str < str_end) { char hex_string[4]; int len = @@ -3370,7 +3417,7 @@ S_scan_const(pTHX_ char *start) d += 3; str++; } - d--; /* We will overwrite below the final + d--; /* Below, we will overwrite the final dot with a right brace */ } else { @@ -3455,8 +3502,8 @@ S_scan_const(pTHX_ char *start) const STRLEN off = d - SvPVX_const(sv); d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); } - if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */ - sv_utf8_upgrade(res); + if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */ + sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING); str = SvPV_const(res, len); } Copy(str, d, len, char); @@ -3572,8 +3619,8 @@ S_scan_const(pTHX_ char *start) " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); SvPOK_on(sv); - if (PL_encoding && !has_utf8) { - sv_recode_to_utf8(sv, PL_encoding); + if (IN_ENCODING && !has_utf8) { + sv_recode_to_utf8(sv, _get_encoding()); if (SvUTF8(sv)) has_utf8 = TRUE; } @@ -3769,11 +3816,10 @@ S_intuit_more(pTHX_ char *s) && !(last_un_char == '$' || last_un_char == '@' || last_un_char == '&') && isALPHA(*s) && s[1] && isALPHA(s[1])) { - char *d = tmpbuf; + char *d = s; while (isALPHA(*s)) - *d++ = *s++; - *d = '\0'; - if (keyword(tmpbuf, d - tmpbuf, 0)) + s++; + if (keyword(d, s - d, 0)) weight -= 150; } if (un_char == last_un_char + 1) @@ -4268,13 +4314,8 @@ Perl_yylex(pTHX) SvREFCNT_dec(tmp); } ); - switch (PL_lex_state) { - case LEX_NORMAL: - case LEX_INTERPNORMAL: - break; - /* when we've already built the next token, just pull it out of the queue */ - case LEX_KNOWNEXT: + if (PL_nexttoke) { PL_nexttoke--; pl_yylval = PL_nextval[PL_nexttoke]; if (!PL_nexttoke) { @@ -4299,6 +4340,12 @@ Perl_yylex(pTHX) } return REPORT(next_type == 'p' ? pending_ident() : next_type); } + } + + switch (PL_lex_state) { + case LEX_NORMAL: + case LEX_INTERPNORMAL: + break; /* interpolated case modifiers like \L \U, including \Q and \E. when we get here, PL_bufptr is at the \ @@ -4447,6 +4494,14 @@ Perl_yylex(pTHX) /* FALLTHROUGH */ case LEX_INTERPEND: + /* Treat state as LEX_NORMAL if we have no inner lexing scope. + XXX This hack can be removed if we stop setting PL_lex_state to + LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */ + if (UNLIKELY(!PL_lex_inwhat)) { + PL_lex_state = LEX_NORMAL; + break; + } + if (PL_lex_dojoin) { const U8 dojoin_was = PL_lex_dojoin; PL_lex_dojoin = FALSE; @@ -4498,6 +4553,14 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", (long) PL_lex_brackets); #endif + /* Treat state as LEX_NORMAL when not in an inner lexing scope. + XXX This hack can be removed if we stop setting PL_lex_state to + LEX_KNOWNEXT. */ + if (UNLIKELY(!PL_lex_inwhat)) { + PL_lex_state = LEX_NORMAL; + break; + } + if (PL_bufptr == PL_bufend) return REPORT(sublex_done()); @@ -4578,7 +4641,8 @@ Perl_yylex(pTHX) case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ case 0: - if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) { + if ((!PL_rsfp || PL_lex_inwhat) + && (!PL_parser->filtered || s+1 < PL_bufend)) { PL_last_uni = 0; PL_last_lop = 0; if (PL_lex_brackets && @@ -4917,7 +4981,6 @@ Perl_yylex(pTHX) } if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; - NEXTVAL_NEXTTOKE.ival = 0; force_next(FORMRBRACK); TOKEN(';'); } @@ -4928,7 +4991,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif - case ' ': case '\t': case '\f': case 013: + case ' ': case '\t': case '\f': case '\v': s++; goto retry; case '#': @@ -4960,7 +5023,6 @@ Perl_yylex(pTHX) incline(s); if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; - NEXTVAL_NEXTTOKE.ival = 0; force_next(FORMRBRACK); TOKEN(';'); } @@ -5181,11 +5243,18 @@ Perl_yylex(pTHX) TERM('%'); } case '^': + d = s; + bof = FEATURE_BITWISE_IS_ENABLED; + if (bof && s[1] == '.') + s++; if (!PL_lex_allbrackets && PL_lex_fakeeof >= (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) + { + s = d; TOKEN(0); + } s++; - BOop(OP_BIT_XOR); + BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); case '[': if (PL_lex_brackets > 100) Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); @@ -5208,7 +5277,11 @@ Perl_yylex(pTHX) Eop(OP_SMARTMATCH); } s++; - OPERATOR('~'); + if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { + s++; + BCop(OP_SCOMPLEMENT); + } + BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); case ',': if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) TOKEN(0); @@ -5281,7 +5354,7 @@ Perl_yylex(pTHX) sv_catsv(sv, PL_lex_stuff); attrs = op_append_elem(OP_LIST, attrs, newSVOP(OP_CONST, 0, sv)); - SvREFCNT_dec(PL_lex_stuff); + SvREFCNT_dec_NN(PL_lex_stuff); PL_lex_stuff = NULL; } else { @@ -5308,6 +5381,19 @@ Perl_yylex(pTHX) sv_free(sv); CvMETHOD_on(PL_compcv); } + else if (!PL_in_my && len == 5 + && strnEQ(SvPVX(sv), "const", len)) + { + sv_free(sv); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__CONST_ATTR), + ":const is experimental" + ); + CvANONCONST_on(PL_compcv); + if (!CvANON(PL_compcv)) + yyerror(":const is not permitted on named " + "subroutines"); + } /* After we've set the flags, it could be argued that we don't need to do the attributes.pm-based setting process, and shouldn't bother appending recognized @@ -5422,6 +5508,7 @@ Perl_yylex(pTHX) } switch (PL_expect) { case XTERM: + case XTERMORDORDOR: PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; PL_lex_allbrackets++; OPERATOR(HASHBRACK); @@ -5485,9 +5572,10 @@ Perl_yylex(pTHX) OPERATOR(HASHBRACK); } if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { - /* ${...} or @{...} etc., but not print {...} */ - PL_expect = XTERM; - break; + /* ${...} or @{...} etc., but not print {...} + * Skip the disambiguation and treat this as a block. + */ + goto block_expectation; } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation @@ -5571,7 +5659,28 @@ Perl_yylex(pTHX) || (*t == '=' && t[1] == '>'))) OPERATOR(HASHBRACK); if (PL_expect == XREF) - PL_expect = XTERM; + { + block_expectation: + /* If there is an opening brace or 'sub:', treat it + as a term to make ${{...}}{k} and &{sub:attr...} + dwim. Otherwise, treat it as a statement, so + map {no strict; ...} works. + */ + s = skipspace(s); + if (*s == '{') { + PL_expect = XTERM; + break; + } + if (strnEQ(s, "sub", 3)) { + d = s + 3; + d = skipspace(d); + if (*d == ':') { + PL_expect = XTERM; + break; + } + } + PL_expect = XSTATE; + } else { PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; PL_expect = XSTATE; @@ -5580,8 +5689,7 @@ Perl_yylex(pTHX) break; } pl_yylval.ival = CopLINE(PL_curcop); - if (isSPACE(*s) || *s == '#') - PL_copline = NOLINE; /* invalidate current command line number */ + PL_copline = NOLINE; /* invalidate current command line number */ TOKEN(formbrack ? '=' : '{'); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) @@ -5643,25 +5751,32 @@ Perl_yylex(pTHX) Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); CopLINE_inc(PL_curcop); } + d = s; + if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') + s++; if (!PL_lex_allbrackets && PL_lex_fakeeof >= (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { + s = d; s--; TOKEN(0); } - PL_parser->saw_infix_sigil = 1; - BAop(OP_BIT_AND); + if (d == s) { + PL_parser->saw_infix_sigil = 1; + BAop(bof ? OP_NBIT_AND : OP_BIT_AND); + } + else + BAop(OP_SBIT_AND); } PL_tokenbuf[0] = '&'; s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); + pl_yylval.ival = (OPpENTERSUB_AMPER<<8); if (PL_tokenbuf[1]) { - PL_expect = XOPERATOR; force_ident_maybe_lex('&'); } else PREREF('&'); - pl_yylval.ival = (OPpENTERSUB_AMPER<<8); TERM('&'); case '|': @@ -5675,12 +5790,15 @@ Perl_yylex(pTHX) AOPERATOR(OROR); } s--; + d = s; + if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') + s++; if (!PL_lex_allbrackets && PL_lex_fakeeof >= (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { - s--; + s = d - 1; TOKEN(0); } - BOop(OP_BIT_OR); + BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); case '=': s++; { @@ -5710,30 +5828,30 @@ Perl_yylex(pTHX) s--; if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) - { - if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) - || PL_lex_state != LEX_NORMAL) { - d = PL_bufend; - while (s < d) { - if (*s++ == '\n') { - incline(s); - if (strnEQ(s,"=cut",4)) { - s = strchr(s,'\n'); - if (s) - s++; - else - s = d; - incline(s); - goto retry; - } - } - } - goto retry; - } - s = PL_bufend; - PL_parser->in_pod = 1; - goto retry; - } + { + if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) + || PL_lex_state != LEX_NORMAL) { + d = PL_bufend; + while (s < d) { + if (*s++ == '\n') { + incline(s); + if (strnEQ(s,"=cut",4)) { + s = strchr(s,'\n'); + if (s) + s++; + else + s = d; + incline(s); + goto retry; + } + } + } + goto retry; + } + s = PL_bufend; + PL_parser->in_pod = 1; + goto retry; + } } if (PL_expect == XBLOCK) { const char *t = s; @@ -5899,8 +6017,14 @@ Perl_yylex(pTHX) PL_tokenbuf[0] = '$'; s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); - if (PL_expect == XOPERATOR) - no_op("Scalar", s); + if (PL_expect == XOPERATOR) { + d = s; + if (PL_bufptr > s) { + d = PL_bufptr-1; + PL_bufptr = PL_oldbufptr; + } + no_op("Scalar", d); + } if (!PL_tokenbuf[1]) { if (s == PL_bufend) yyerror("Final $ should be \\$ or $name"); @@ -6214,7 +6338,7 @@ Perl_yylex(pTHX) } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM - || PL_expect == XSTATE + || PL_expect == XREF || PL_expect == XSTATE || PL_expect == XTERMORDORDOR)) { GV *const gv = gv_fetchpvn_flags(s, start - s, UTF ? SVf_UTF8 : 0, SVt_PVCV); @@ -6351,7 +6475,7 @@ Perl_yylex(pTHX) char tmpbuf[sizeof PL_tokenbuf + 1]; *tmpbuf = '&'; Copy(PL_tokenbuf, tmpbuf+1, len, char); - off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); + off = pad_findmy_pvn(tmpbuf, len+1, 0); if (off != NOT_IN_PAD) { assert(off); /* we assume this is boolean-true below */ if (PAD_COMPNAME_FLAGS_isOUR(off)) { @@ -6883,13 +7007,13 @@ Perl_yylex(pTHX) if (!IN_BYTES) { if (UTF) PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); - else if (PL_encoding) { + else if (IN_ENCODING) { SV *name; dSP; ENTER; SAVETMPS; PUSHMARK(sp); - XPUSHs(PL_encoding); + XPUSHs(_get_encoding()); PUTBACK; call_method("name", G_SCALAR); SPAGAIN; @@ -6909,7 +7033,9 @@ Perl_yylex(pTHX) } case KEY___SUB__: - FUN0OP(newPVOP(OP_RUNCV,0,NULL)); + FUN0OP(CvCLONE(PL_compcv) + ? newOP(OP_RUNCV, 0) + : newPVOP(OP_RUNCV,0,NULL)); case KEY_AUTOLOAD: case KEY_DESTROY: @@ -7580,10 +7706,8 @@ Perl_yylex(pTHX) } if (!words) words = newNULLLIST(); - if (PL_lex_stuff) { - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = NULL; - } + SvREFCNT_dec_NN(PL_lex_stuff); + PL_lex_stuff = NULL; PL_expect = XOPERATOR; pl_yylval.opval = sawparens(words); TOKEN(QWLIST); @@ -7838,7 +7962,7 @@ Perl_yylex(pTHX) *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub || pad_findmy_pvn( - PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 + PL_tokenbuf, len + 1, 0 ) != NOT_IN_PAD) sv_setpvn(PL_subname, tmpbuf, len); else { @@ -8118,7 +8242,9 @@ S_pending_ident(pTHX) /* PL_no_myglob is constant */ GCC_DIAG_IGNORE(-Wformat-nonliteral); yyerror_pv(Perl_form(aTHX_ PL_no_myglob, - PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), + PL_in_my == KEY_my ? "my" : "state", + *PL_tokenbuf == '&' ? "subroutin" : "variabl", + PL_tokenbuf), UTF ? SVf_UTF8 : 0); GCC_DIAG_RESTORE; } @@ -8137,7 +8263,7 @@ S_pending_ident(pTHX) if (!has_colon) { if (!PL_in_my) tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, - UTF ? SVf_UTF8 : 0); + 0); if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { @@ -8255,7 +8381,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) char tmpbuf[256]; Copy(w, tmpbuf+1, s - w, char); *tmpbuf = '&'; - off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0); + off = pad_findmy_pvn(tmpbuf, s-w+1, 0); if (off != NOT_IN_PAD) return; } Perl_croak(aTHX_ "No comma allowed after %s", what); @@ -8352,7 +8478,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, yyerror_pv(msg, UTF ? SVf_UTF8 : 0); return SvREFCNT_inc_simple_NN(sv); } -now_ok: + now_ok: cv = *cvp; if (!pv && s) pv = newSVpvn_flags(s, len, SVs_TEMP); @@ -8487,7 +8613,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) PERL_ARGS_ASSERT_SCAN_IDENT; - if (isSPACE(*s)) + if (isSPACE(*s) || !*s) s = skipspace(s); if (isDIGIT(*s)) { while (isDIGIT(*s)) { @@ -8532,25 +8658,54 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) /* Is the byte 'd' a legal single character identifier name? 'u' is true * iff Unicode semantics are to be used. The legal ones are any of: - * a) ASCII digits - * b) ASCII punctuation + * a) all ASCII characters except: + * 1) space-type ones, like \t and SPACE; + 2) NUL; + * 3) '{' + * The final case currently doesn't get this far in the program, so we + * don't test for it. If that were to change, it would be ok to allow it. * c) When not under Unicode rules, any upper Latin1 character - * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally - * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus - * the \s ones. */ -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ - || isDIGIT_A((U8)(d)) \ - || (!(u) && !isASCII((U8)(d))) \ - || ((((U8)(d)) < 32) \ - && (((((U8)(d)) >= 14) \ - || (((U8)(d)) <= 8 && (d) != 0) \ - || (((U8)(d)) == 13)))) \ - || (((U8)(d)) == toCTRL('?'))) - if (s < PL_bufend - && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) + * d) Otherwise, when unicode rules are used, all XIDS characters. + * + * Because all ASCII characters have the same representation whether + * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and + * '{' without knowing if is UTF-8 or not. + * EBCDIC already uses the rules that ASCII platforms will use after the + * deprecation cycle; see comment below about the deprecation. */ +#ifdef EBCDIC +# define VALID_LEN_ONE_IDENT(s, is_utf8) \ + (isGRAPH_A(*(s)) || ((is_utf8) \ + ? isIDFIRST_utf8((U8*) (s)) \ + : (isGRAPH_L1(*s) \ + && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) +#else +# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \ + && LIKELY(*(s) != '\0') \ + && (! is_utf8 \ + || isASCII_utf8((U8*) (s)) \ + || isIDFIRST_utf8((U8*) (s)))) +#endif + if ((s <= PL_bufend - (is_utf8) + ? UTF8SKIP(s) + : 1) + && VALID_LEN_ONE_IDENT(s, is_utf8)) { - if ( isCNTRL_A((U8)*s) ) { - deprecate("literal control characters in variable names"); + /* Deprecate all non-graphic characters. Include SHY as a non-graphic, + * because often it has no graphic representation. (We can't get to + * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 + * test for it.) */ + if ((is_utf8) + ? ! isGRAPH_utf8( (U8*) s) + : (! isGRAPH_L1( (U8) *s) + || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) + { + /* Split messages for back compat */ + if (isCNTRL_A( (U8) *s)) { + deprecate("literal control characters in variable names"); + } + else { + deprecate("literal non-graphic characters in variable names"); + } } if (is_utf8) { @@ -8867,10 +9022,8 @@ S_scan_subst(pTHX_ char *start) first_line = CopLINE(PL_curcop); s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) { - if (PL_lex_stuff) { - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = NULL; - } + SvREFCNT_dec_NN(PL_lex_stuff); + PL_lex_stuff = NULL; Perl_croak(aTHX_ "Substitution replacement not terminated"); } PL_multi_start = first_start; /* so whole substitution is taken together */ @@ -8949,10 +9102,8 @@ S_scan_trans(pTHX_ char *start) s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) { - if (PL_lex_stuff) { - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = NULL; - } + SvREFCNT_dec_NN(PL_lex_stuff); + PL_lex_stuff = NULL; Perl_croak(aTHX_ "Transliteration replacement not terminated"); } @@ -9209,7 +9360,14 @@ S_scan_heredoc(pTHX_ char *s) origline + 1 + PL_parser->herelines); if (!lex_next_chunk(LEX_NO_TERM) && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { - SvREFCNT_dec(linestr_save); + /* Simply freeing linestr_save might seem simpler here, as it + does not matter what PL_linestr points to, since we are + about to croak; but in a quote-like op, linestr_save + will have been prospectively freed already, via + SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to + restore PL_linestr. */ + SvREFCNT_dec_NN(PL_linestr); + PL_linestr = linestr_save; goto interminable; } CopLINE_set(PL_curcop, origline); @@ -9259,8 +9417,8 @@ S_scan_heredoc(pTHX_ char *s) if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) SvUTF8_on(tmpstr); - else if (PL_encoding) - sv_recode_to_utf8(tmpstr, PL_encoding); + else if (IN_ENCODING) + sv_recode_to_utf8(tmpstr, _get_encoding()); } PL_lex_stuff = tmpstr; pl_yylval.ival = op_type; @@ -9371,7 +9529,7 @@ S_scan_inputsymbol(pTHX_ char *start) /* try to find it in the pad for this block, otherwise find add symbol table ops */ - const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); + const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); if (tmp != NOT_IN_PAD) { if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { HV * const stash = PAD_COMPNAME_OURSTASH(tmp); @@ -9395,7 +9553,7 @@ S_scan_inputsymbol(pTHX_ char *start) else { GV *gv; ++d; -intro_sym: + intro_sym: gv = gv_fetchpv(d, GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), SVt_PV); @@ -9550,12 +9708,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re sv_catpvn(sv, s, termlen); s += termlen; for (;;) { - if (PL_encoding && !UTF && !re_reparse) { + if (IN_ENCODING && !UTF && !re_reparse) { bool cont = TRUE; while (cont) { int offset = s - SvPVX_const(PL_linestr); - const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr, &offset, (char*)termstr, termlen); const char *ns; char *svlast; @@ -9768,13 +9926,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* at this point, we have successfully read the delimited string */ - if (!PL_encoding || UTF || re_reparse) { + if (!IN_ENCODING || UTF || re_reparse) { if (keep_delims) sv_catpvn(sv, s, termlen); s += termlen; } - if (has_utf8 || (PL_encoding && !re_reparse)) + if (has_utf8 || (IN_ENCODING && !re_reparse)) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); @@ -10337,7 +10495,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* if it starts with a v, it could be a v-string */ case 'v': -vstring: + vstring: sv = newSV(5); /* preallocate storage space */ ENTER_with_name("scan_vstring"); SAVEFREESV(sv); @@ -10431,7 +10589,7 @@ S_scan_formline(pTHX_ char *s) if (needargs) { const char *s2 = s; while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f' - || *s2 == 013) + || *s2 == '\v') s2++; if (*s2 == '{') { PL_expect = XTERMBLOCK; @@ -10444,8 +10602,8 @@ S_scan_formline(pTHX_ char *s) if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) SvUTF8_on(stuff); - else if (PL_encoding) - sv_recode_to_utf8(stuff, PL_encoding); + else if (IN_ENCODING) + sv_recode_to_utf8(stuff, _get_encoding()); } NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); @@ -10476,8 +10634,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; if (outsidecv && CvPADLIST(outsidecv)) - CvPADLIST(PL_compcv)->xpadl_outid = - PadlistNAMES(CvPADLIST(outsidecv)); + CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; return oldsavestack_ix; } @@ -10489,7 +10646,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags) PL_in_eval |= EVAL_WARNONLY; yyerror_pv(s, flags); - PL_in_eval &= ~EVAL_WARNONLY; return 0; } @@ -10555,7 +10711,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) } else if (yychar > 255) sv_catpvs(where_sv, "next token ???"); - else if (yychar == -2) { /* YYEMPTY */ + else if (yychar == YYEMPTY) { if (PL_lex_state == LEX_NORMAL || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) sv_catpvs(where_sv, "at end of line"); @@ -10593,6 +10749,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else @@ -11383,7 +11540,6 @@ S_parse_opt_lexvar(pTHX) PL_bufptr = s; if (d == PL_tokenbuf+1) return NULL; - *d = 0; var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, OPf_MOD | (OPpLVAL_INTRO<<8)); var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); @@ -11464,10 +11620,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(1))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Odd name/value argument " - "for subroutine")))); + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0)))))); if (pos != min_arity) chkop = newLOGOP(OP_AND, 0, newBINOP(OP_GT, 0, @@ -11530,9 +11692,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(min_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too few arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too few arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } if (max_arity != -1) { @@ -11543,9 +11712,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(max_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too many arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too many arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } return initops;