X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/4bff32c56f5c1d1515069d4ce0d05531758f6561..43275f00a97a14a80f9493c38895a5c77f0fc88a:/toke.c diff --git a/toke.c b/toke.c index 028c685..b6da013 100644 --- a/toke.c +++ b/toke.c @@ -99,9 +99,9 @@ static const char* const ident_too_long = "Identifier too long"; #define XFAKEBRACK 0x80 #ifdef USE_UTF8_SCRIPTS -# define UTF (!IN_BYTES) +# define UTF cBOOL(!IN_BYTES) #else -# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) +# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) #endif /* The maximum number of characters preceding the unrecognized one to display */ @@ -206,7 +206,7 @@ static const char* const lex_state_names[] = { #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) -#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) +#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) @@ -220,14 +220,14 @@ static const char* const lex_state_names[] = { #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) -#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) -#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) -#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) -#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) +#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) +#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) +#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) +#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) -#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) +#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) -#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) +#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) @@ -486,7 +486,7 @@ S_ao(pTHX_ int toketype) pl_yylval.ival = OP_DORASSIGN; toketype = ASSIGNOP; } - return toketype; + return REPORT(toketype); } /* @@ -1987,7 +1987,6 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) * a keyword (do this if the word is a label, e.g. goto FOO) * int allow_pack : if true, : characters will also be allowed (require, * use, etc. do this) - * int allow_initial_tick : used by the "sub" lexer only. */ STATIC char * @@ -3085,6 +3084,7 @@ S_scan_const(pTHX_ char *start) * symbol meaning, e.g. \x{2E} would be confused with a dot. But * in spite of this, we do have to process \N here while the proper * charnames handler is in scope. See bugs #56444 and #62056. + * * There is a complication because \N in a pattern may also stand * for 'match a non-nl', and not mean a charname, in which case its * processing should be deferred to the regex compiler. To be a @@ -3219,31 +3219,44 @@ S_scan_const(pTHX_ char *start) continue; case 'N': - /* In a non-pattern \N must be a named character, like \N{LATIN - * SMALL LETTER A} or \N{U+0041}. For patterns, it also can - * mean to match a non-newline. For non-patterns, named - * characters are converted to their string equivalents. In - * patterns, named characters are not converted to their - * ultimate forms for the same reasons that other escapes - * aren't. Instead, they are converted to the \N{U+...} form - * to get the value from the charnames that is in effect right - * now, while preserving the fact that it was a named character - * so that the regex compiler knows this */ - - /* The structure of this section of code (besides checking for + /* In a non-pattern \N must be like \N{U+0041}, or it can be a + * named character, like \N{LATIN SMALL LETTER A}, or a named + * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND + * GRAVE}. For convenience all three forms are referred to as + * "named characters" below. + * + * For patterns, \N also can mean to match a non-newline. Code + * before this 'switch' statement should already have handled + * this situation, and hence this code only has to deal with + * the named character cases. + * + * For non-patterns, the named characters are converted to + * their string equivalents. In patterns, named characters are + * not converted to their ultimate forms for the same reasons + * that other escapes aren't. Instead, they are converted to + * the \N{U+...} form to get the value from the charnames that + * is in effect right now, while preserving the fact that it + * was a named character, so that the regex compiler knows + * this. + * + * The structure of this section of code (besides checking for * errors and upgrading to utf8) is: - * Further disambiguate between the two meanings of \N, and if - * not a charname, go process it elsewhere - * If of form \N{U+...}, pass it through if a pattern; - * otherwise convert to utf8 - * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a - * pattern; otherwise convert to utf8 */ - - /* Here, s points to the 'N'; the test below is guaranteed to - * succeed if we are being called on a pattern as we already - * know from a test above that the next character is a '{'. - * On a non-pattern \N must mean 'named sequence, which - * requires braces */ + * If the named character is of the form \N{U+...}, pass it + * through if a pattern; otherwise convert the code point + * to utf8 + * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...} + * if a pattern; otherwise convert to utf8 + * + * If the regex compiler should ever need to differentiate + * between the \N{U+...} and \N{name} forms, that could easily + * be done here by stripping any leading zeros from the + * \N{U+...} case, and adding them to the other one. */ + + /* Here, 's' points to the 'N'; the test below is guaranteed to + * succeed if we are being called on a pattern, as we already + * know from a test above that the next character is a '{'. A + * non-pattern \N must mean 'named character', which requires + * braces */ s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); @@ -3268,8 +3281,6 @@ S_scan_const(pTHX_ char *start) | PERL_SCAN_DISALLOW_PREFIX; STRLEN len; - /* For \N{U+...}, the '...' is a unicode value even on - * EBCDIC machines */ s += 2; /* Skip to next char after the 'U+' */ len = e - s; uv = grok_hex(s, &len, &flags, NULL); @@ -3280,27 +3291,26 @@ S_scan_const(pTHX_ char *start) } if (PL_lex_inpat) { - - /* On non-EBCDIC platforms, pass through to the regex - * compiler unchanged. The reason we evaluated the - * number above is to make sure there wasn't a syntax - * error. But on EBCDIC we convert to native so - * downstream code can continue to assume it's native - */ s -= 5; /* Include the '\N{U+' */ #ifdef EBCDIC - d += my_snprintf(d, e - s + 1 + 1, /* includes the } + /* On EBCDIC platforms, in \N{U+...}, the '...' is a + * Unicode value, so convert to native so downstream + * code can continue to assume it's native */ + d += my_snprintf(d, e - s + 1 + 1, /* includes the '}' and the \0 */ - "\\N{U+%X}", - (unsigned int) UNI_TO_NATIVE(uv)); + "\\N{U+%X}", + (unsigned int) UNI_TO_NATIVE(uv)); #else - Copy(s, d, e - s + 1, char); /* 1 = include the } */ + /* On non-EBCDIC platforms, pass it through unchanged. + * The reason we evaluated the number above is to make + * sure there wasn't a syntax error. */ + Copy(s, d, e - s + 1, char); /* +1 is for the '}' */ d += e - s + 1; #endif } else { /* Not a pattern: convert the hex to string */ - /* If destination is not in utf8, unconditionally + /* If the destination is not in utf8, unconditionally * recode it to be so. This is because \N{} implies * Unicode semantics, and scalars have to be in utf8 * to guarantee those semantics */ @@ -3353,13 +3363,18 @@ S_scan_const(pTHX_ char *start) * through the string. Each character takes up * 2 hex digits plus either a trailing dot or * the "}" */ + const char initial_text[] = "\\N{U+"; + const STRLEN initial_len = sizeof(initial_text) + - 1; d = off + SvGROW(sv, off + 3 * len - + 6 /* For the "\N{U+", and - trailing NUL */ + + /* +1 for trailing NUL */ + + initial_len + 1 + + (STRLEN)(send - e)); - Copy("\\N{U+", d, 5, char); - d += 5; + Copy(initial_text, d, initial_len, char); + d += initial_len; while (str < str_end) { char hex_string[4]; int len = @@ -3371,7 +3386,7 @@ S_scan_const(pTHX_ char *start) d += 3; str++; } - d--; /* We will overwrite below the final + d--; /* Below, we will overwrite the final dot with a right brace */ } else { @@ -5486,9 +5501,10 @@ Perl_yylex(pTHX) OPERATOR(HASHBRACK); } if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { - /* ${...} or @{...} etc., but not print {...} */ - PL_expect = XTERM; - break; + /* ${...} or @{...} etc., but not print {...} + * Skip the disambiguation and treat this as a block. + */ + goto block_expectation; } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation @@ -5572,7 +5588,28 @@ Perl_yylex(pTHX) || (*t == '=' && t[1] == '>'))) OPERATOR(HASHBRACK); if (PL_expect == XREF) - PL_expect = XTERM; + { + block_expectation: + /* If there is an opening brace or 'sub:', treat it + as a term to make ${{...}}{k} and &{sub:attr...} + dwim. Otherwise, treat it as a statement, so + map {no strict; ...} works. + */ + s = skipspace(s); + if (*s == '{') { + PL_expect = XTERM; + break; + } + if (strnEQ(s, "sub", 3)) { + d = s + 3; + d = skipspace(d); + if (*d == ':') { + PL_expect = XTERM; + break; + } + } + PL_expect = XSTATE; + } else { PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; PL_expect = XSTATE; @@ -5581,8 +5618,7 @@ Perl_yylex(pTHX) break; } pl_yylval.ival = CopLINE(PL_curcop); - if (isSPACE(*s) || *s == '#') - PL_copline = NOLINE; /* invalidate current command line number */ + PL_copline = NOLINE; /* invalidate current command line number */ TOKEN(formbrack ? '=' : '{'); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) @@ -5797,7 +5833,7 @@ Perl_yylex(pTHX) if (PL_expect != XOPERATOR) { if (s[1] != '<' && !strchr(s,'>')) check_uni(); - if (s[1] == '<') + if (s[1] == '<' && s[2] != '>') s = scan_heredoc(s); else s = scan_inputsymbol(s); @@ -6215,7 +6251,7 @@ Perl_yylex(pTHX) } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM - || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XSTATE || PL_expect == XTERMORDORDOR)) { GV *const gv = gv_fetchpvn_flags(s, start - s, UTF ? SVf_UTF8 : 0, SVt_PVCV); @@ -6552,7 +6588,11 @@ Perl_yylex(pTHX) rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); cv = lex - ? isGV(gv) ? GvCV(gv) : (CV *)gv + ? isGV(gv) + ? GvCV(gv) + : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV + ? (CV *)SvRV(gv) + : ((CV *)gv) : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); } @@ -6681,7 +6721,6 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - OP *gvop; /* Check for a constant sub */ if ((sv = cv_const_sv_or_av(cv))) { its_constant: @@ -6699,20 +6738,6 @@ Perl_yylex(pTHX) TOKEN(WORD); } - /* Resolve to GV now if this is a placeholder. */ - if (!off && (gvop = cUNOPx(rv2cv_op)->op_first) - && gvop->op_type == OP_GV) { - GV *gv2 = cGVOPx_gv(gvop); - if (gv2 && !isGV(gv2)) { - gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV); - assert (SvTYPE(gv) == SVt_PVGV); - /* cv must have been some sort of placeholder, - so now needs replacing with a real code - reference. */ - cv = GvCV(gv); - } - } - op_free(pl_yylval.opval); pl_yylval.opval = off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; @@ -6921,7 +6946,9 @@ Perl_yylex(pTHX) } case KEY___SUB__: - FUN0OP(newPVOP(OP_RUNCV,0,NULL)); + FUN0OP(CvCLONE(PL_compcv) + ? newOP(OP_RUNCV, 0) + : newPVOP(OP_RUNCV,0,NULL)); case KEY_AUTOLOAD: case KEY_DESTROY: @@ -8126,10 +8153,13 @@ S_pending_ident(pTHX) } else { if (has_colon) { + /* "my" variable %s can't be in a package */ /* PL_no_myglob is constant */ GCC_DIAG_IGNORE(-Wformat-nonliteral); yyerror_pv(Perl_form(aTHX_ PL_no_myglob, - PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), + PL_in_my == KEY_my ? "my" : "state", + *PL_tokenbuf == '&' ? "subroutin" : "variabl", + PL_tokenbuf), UTF ? SVf_UTF8 : 0); GCC_DIAG_RESTORE; } @@ -8255,12 +8285,20 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; + PADOFFSET off; if (keyword(w, s - w, 0)) return; gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); if (gv && GvCVu(gv)) return; + if (s - w <= 254) { + char tmpbuf[256]; + Copy(w, tmpbuf+1, s - w, char); + *tmpbuf = '&'; + off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0); + if (off != NOT_IN_PAD) return; + } Perl_croak(aTHX_ "No comma allowed after %s", what); } } @@ -8535,25 +8573,52 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) /* Is the byte 'd' a legal single character identifier name? 'u' is true * iff Unicode semantics are to be used. The legal ones are any of: - * a) ASCII digits - * b) ASCII punctuation + * a) all ASCII characters except: + * 1) space-type ones, like \t and SPACE; + 2) NUL; + * 3) '{' + * The final case currently doesn't get this far in the program, so we + * don't test for it. If that were to change, it would be ok to allow it. * c) When not under Unicode rules, any upper Latin1 character - * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally - * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus - * the \s ones. */ -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ - || isDIGIT_A((U8)(d)) \ - || (!(u) && !isASCII((U8)(d))) \ - || ((((U8)(d)) < 32) \ - && (((((U8)(d)) >= 14) \ - || (((U8)(d)) <= 8 && (d) != 0) \ - || (((U8)(d)) == 13)))) \ - || (((U8)(d)) == toCTRL('?'))) - if (s < PL_bufend - && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) + * d) Otherwise, when unicode rules are used, all XIDS characters. + * + * Because all ASCII characters have the same representation whether + * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and + * '{' without knowing if is UTF-8 or not */ +#ifdef EBCDIC +# define VALID_LEN_ONE_IDENT(s, is_utf8) \ + (isGRAPH_A(*(s)) || ((is_utf8) \ + ? isIDFIRST_utf8((U8*) (s)) \ + : (isGRAPH_L1(*s) \ + && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) +#else +# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \ + && LIKELY(*(s) != '\0') \ + && (! is_utf8 \ + || isASCII_utf8((U8*) (s)) \ + || isIDFIRST_utf8((U8*) (s)))) +#endif + if ((s <= PL_bufend - (is_utf8) + ? UTF8SKIP(s) + : 1) + && VALID_LEN_ONE_IDENT(s, is_utf8)) { - if ( isCNTRL_A((U8)*s) ) { - deprecate("literal control characters in variable names"); + /* Deprecate all non-graphic characters. Include SHY as a non-graphic, + * because often it has no graphic representation. (We can't get to + * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 + * test for it.) */ + if ((is_utf8) + ? ! isGRAPH_utf8( (U8*) s) + : (! isGRAPH_L1( (U8) *s) + || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) + { + /* Split messages for back compat */ + if (isCNTRL_A( (U8) *s)) { + deprecate("literal control characters in variable names"); + } + else { + deprecate("literal non-graphic characters in variable names"); + } } if (is_utf8) { @@ -8672,14 +8737,14 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } static bool -S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { - - /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in - * the parse starting at 's', based on the subset that are valid in this - * context input to this routine in 'valid_flags'. Advances s. Returns - * TRUE if the input should be treated as a valid flag, so the next char - * may be as well; otherwise FALSE. 'charset' should point to a NUL upon - * first call on the current regex. This routine will set it to any +S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { + + /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag + * found in the parse starting at 's', based on the subset that are valid + * in this context input to this routine in 'valid_flags'. Advances s. + * Returns TRUE if the input should be treated as a valid flag, so the next + * char may be as well; otherwise FALSE. 'charset' should point to a NUL + * upon first call on the current regex. This routine will set it to any * charset modifier found. The caller shouldn't change it. This way, * another charset modifier encountered in the parse can be detected as an * error, as we have decided to allow only one */ @@ -8701,7 +8766,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse switch (c) { - CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); + CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; @@ -8776,6 +8841,7 @@ S_scan_pat(pTHX_ char *start, I32 type) const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ + unsigned int x_mod_count = 0; PERL_ARGS_ASSERT_SCAN_PAT; @@ -8825,7 +8891,9 @@ S_scan_pat(pTHX_ char *start, I32 type) pm->op_pmflags |= PMf_IS_QR; } - while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; + while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), + &s, &charset, &x_mod_count)) + {}; /* issue a warning if /c is specified,but /g is not */ if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { @@ -8833,6 +8901,8 @@ S_scan_pat(pTHX_ char *start, I32 type) "Use of /c modifier is meaningless without /g" ); } + STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + PL_lex_op = (OP*)pm; pl_yylval.ival = OP_MATCH; return s; @@ -8847,6 +8917,7 @@ S_scan_subst(pTHX_ char *start) line_t first_line; I32 es = 0; char charset = '\0'; /* character set modifier */ + unsigned int x_mod_count = 0; char *t; PERL_ARGS_ASSERT_SCAN_SUBST; @@ -8880,12 +8951,15 @@ S_scan_subst(pTHX_ char *start) s++; es++; } - else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) + else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), + &s, &charset, &x_mod_count)) { break; } } + STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + if ((pm->op_pmflags & PMf_CONTINUE)) { Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); } @@ -9203,7 +9277,14 @@ S_scan_heredoc(pTHX_ char *s) origline + 1 + PL_parser->herelines); if (!lex_next_chunk(LEX_NO_TERM) && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { - SvREFCNT_dec(linestr_save); + /* Simply freeing linestr_save might seem simpler here, as it + does not matter what PL_linestr points to, since we are + about to croak; but in a quote-like op, linestr_save + will have been prospectively freed already, via + SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to + restore PL_linestr. */ + SvREFCNT_dec_NN(PL_linestr); + PL_linestr = linestr_save; goto interminable; } CopLINE_set(PL_curcop, origline); @@ -9232,7 +9313,8 @@ S_scan_heredoc(pTHX_ char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { + if (*s == term && PL_bufend-s >= len + && memEQ(s,PL_tokenbuf + 1,len)) { SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -9273,6 +9355,7 @@ S_scan_heredoc(pTHX_ char *s) This code handles: <> read from ARGV + <<>> read from ARGV without magic open read from filehandle read from package qualified filehandle read from package qualified filehandle @@ -9287,6 +9370,7 @@ S_scan_inputsymbol(pTHX_ char *start) char *s = start; /* current position in buffer */ char *end; I32 len; + bool nomagicopen = FALSE; char *d = PL_tokenbuf; /* start of temp holding space */ const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ @@ -9295,7 +9379,14 @@ S_scan_inputsymbol(pTHX_ char *start) end = strchr(s, '\n'); if (!end) end = PL_bufend; - s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ + if (s[1] == '<' && s[2] == '>' && s[3] == '>') { + nomagicopen = TRUE; + *d = '\0'; + len = 0; + s += 3; + } + else + s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ /* die if we didn't have space for the contents of the <>, or if it didn't end, or if we see a newline @@ -9405,7 +9496,7 @@ intro_sym: op_append_elem(OP_LIST, newGVOP(OP_GV, 0, gv), newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) - : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); + : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); pl_yylval.ival = OP_NULL; } } @@ -10456,7 +10547,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) CvFLAGS(PL_compcv) |= flags; PL_subline = CopLINE(PL_curcop); - CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); + CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB)); CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; if (outsidecv && CvPADLIST(outsidecv)) @@ -10473,7 +10564,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags) PL_in_eval |= EVAL_WARNONLY; yyerror_pv(s, flags); - PL_in_eval &= ~EVAL_WARNONLY; return 0; } @@ -10577,6 +10667,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else @@ -11367,7 +11458,6 @@ S_parse_opt_lexvar(pTHX) PL_bufptr = s; if (d == PL_tokenbuf+1) return NULL; - *d = 0; var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, OPf_MOD | (OPpLVAL_INTRO<<8)); var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); @@ -11448,10 +11538,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(1))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Odd name/value argument " - "for subroutine")))); + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0)))))); if (pos != min_arity) chkop = newLOGOP(OP_AND, 0, newBINOP(OP_GT, 0, @@ -11514,9 +11610,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(min_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too few arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too few arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } if (max_arity != -1) { @@ -11527,9 +11630,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(max_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too many arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too many arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } return initops;