X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/82a7b38ebca5ead4666341f73512665b295afd51..fedc1b0e2d9cec34b7e3b1fa65dd0f7eb4f539fd:/toke.c

diff --git a/toke.c b/toke.c
index f48fa28..0eeafd4 100644
--- a/toke.c
+++ b/toke.c
@@ -99,9 +99,9 @@ static const char* const ident_too_long = "Identifier too long";
 #define XFAKEBRACK 0x80
 
 #ifdef USE_UTF8_SCRIPTS
-#   define UTF (!IN_BYTES)
+#   define UTF cBOOL(!IN_BYTES)
 #else
-#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
+#   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 #endif
 
 /* The maximum number of characters preceding the unrecognized one to display */
@@ -186,6 +186,7 @@ static const char* const lex_state_names[] = {
  * FUN1         : not used, except for not, which isn't a UNIOP
  * BOop         : bitwise or or xor
  * BAop         : bitwise and
+ * BCop         : bitwise complement
  * SHop         : shift operator
  * PWop         : power operator
  * PMop         : pattern-matching operator
@@ -206,7 +207,7 @@ static const char* const lex_state_names[] = {
 
 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
-#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
@@ -220,14 +221,16 @@ static const char* const lex_state_names[] = {
 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
+#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
+		       REPORT('~')
+#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
+#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
-#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 
@@ -399,7 +402,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 	}
 	if (name)
 	    Perl_sv_catpv(aTHX_ report, name);
-	else if ((char)rv > ' ' && (char)rv <= '~')
+	else if (isGRAPH(rv))
 	{
 	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 	    if ((char)rv == 'p')
@@ -486,7 +489,7 @@ S_ao(pTHX_ int toketype)
 	    pl_yylval.ival = OP_DORASSIGN;
 	toketype = ASSIGNOP;
     }
-    return toketype;
+    return REPORT(toketype);
 }
 
 /*
@@ -500,6 +503,9 @@ S_ao(pTHX_ int toketype)
  * It prints "Missing operator before end of line" if there's nothing
  * after the missing operator, or "... before <...>" if there is something
  * after the missing operator.
+ *
+ * PL_bufptr is expected to point to the start of the thing that was found,
+ * and s after the next token or partial token.
  */
 
 STATIC void
@@ -730,7 +736,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
     parser->last_lop = parser->last_uni = NULL;
 
-    assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
@@ -1246,7 +1252,7 @@ buffer has reached the end of the input text.
 */
 
 #define LEX_FAKE_EOF 0x80000000
-#define LEX_NO_TERM  0x40000000
+#define LEX_NO_TERM  0x40000000 /* here-doc */
 
 bool
 Perl_lex_next_chunk(pTHX_ U32 flags)
@@ -1260,6 +1266,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     bool got_some;
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
+	return FALSE;
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (!(flags & LEX_KEEP_PREVIOUS) &&
@@ -1514,6 +1522,8 @@ Perl_lex_read_space(pTHX_ U32 flags)
 		incline(s);
 		need_incline = 0;
 	    }
+	} else if (!c) {
+	    s++;
 	} else {
 	    break;
 	}
@@ -1790,13 +1800,13 @@ S_skipspace_flags(pTHX_ char *s, U32 flags)
 {
     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
-	while (s < PL_bufend && SPACE_OR_TAB(*s))
+	while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
 	    s++;
     } else {
 	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
 	PL_bufptr = s;
 	lex_read_space(flags | LEX_KEEP_PREVIOUS |
-		(PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+		(PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
 		    LEX_NO_NEXT_CHUNK : 0));
 	s = PL_bufptr;
 	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
@@ -1899,6 +1909,7 @@ S_force_next(pTHX_ I32 type)
 	tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
+    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
     PL_nexttype[PL_nexttoke] = type;
     PL_nexttoke++;
     if (PL_lex_state != LEX_KNOWNEXT) {
@@ -1968,7 +1979,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
     SV * const sv = newSVpvn_utf8(start, len,
 				  !IN_BYTES
 				  && UTF
-				  && !is_ascii_string((const U8*)start, len)
+				  && !is_invariant_string((const U8*)start, len)
 				  && is_utf8_string((const U8*)start, len));
     return sv;
 }
@@ -2005,9 +2016,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
 	if (check_keyword) {
 	  char *s2 = PL_tokenbuf;
+	  STRLEN len2 = len;
 	  if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
-	    s2 += 6, len -= 6;
-	  if (keyword(s2, len, 0))
+	    s2 += 6, len2 -= 6;
+	  if (keyword(s2, len2, 0))
 	    return start;
 	}
 	if (token == METHOD) {
@@ -2272,7 +2284,9 @@ S_sublex_start(pTHX)
 	return THING;
     }
     if (op_type == OP_CONST) {
-	SV *sv = tokeq(PL_lex_stuff);
+	SV *sv = PL_lex_stuff;
+	PL_lex_stuff = NULL;
+	sv = tokeq(sv);
 
 	if (SvTYPE(sv) == SVt_PVIV) {
 	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
@@ -2283,7 +2297,6 @@ S_sublex_start(pTHX)
 	    sv = nsv;
 	}
 	pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
-	PL_lex_stuff = NULL;
 	return THING;
     }
 
@@ -2363,6 +2376,13 @@ S_sublex_push(pTHX)
     PL_lex_stuff = NULL;
     PL_sublex_info.repl = NULL;
 
+    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
+       set for an inner quote-like operator and then an error causes scope-
+       popping.  We must not have a PL_lex_stuff value left dangling, as
+       that breaks assumptions elsewhere.  See bug #123617.  */
+    SAVEGENERICSV(PL_lex_stuff);
+    SAVEGENERICSV(PL_sublex_info.repl);
+
     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
 	= SvPVX(PL_linestr);
     PL_bufend += SvCUR(PL_linestr);
@@ -2457,7 +2477,7 @@ S_sublex_done(pTHX)
 		 + PL_parser->herelines;
 	    PL_parser->herelines = 0;
 	}
-	return ',';
+	return '/';
     }
     else {
 	const line_t l = CopLINE(PL_curcop);
@@ -2467,7 +2487,6 @@ S_sublex_done(pTHX)
 	PL_bufend = SvPVX(PL_linestr);
 	PL_bufend += SvCUR(PL_linestr);
 	PL_expect = XOPERATOR;
-	PL_sublex_info.sub_inwhat = 0;
 	return ')';
     }
 }
@@ -2491,6 +2510,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
+    if (!SvCUR(res))
+        return res;
+
     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
                                      e - backslash_ptr,
                                      &first_bad_char_loc))
@@ -3084,6 +3106,7 @@ S_scan_const(pTHX_ char *start)
 	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
 	     * in spite of this, we do have to process \N here while the proper
 	     * charnames handler is in scope.  See bugs #56444 and #62056.
+             *
 	     * There is a complication because \N in a pattern may also stand
 	     * for 'match a non-nl', and not mean a charname, in which case its
 	     * processing should be deferred to the regex compiler.  To be a
@@ -3188,9 +3211,13 @@ S_scan_const(pTHX_ char *start)
 			SvPOK_on(sv);
 			*d = '\0';
 			/* See Note on sizing above.  */
-			sv_utf8_upgrade_flags_grow(sv,
-					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-					UNISKIP(uv) + (STRLEN)(send - s) + 1);
+			sv_utf8_upgrade_flags_grow(
+                                         sv,
+                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+                                                  /* Above-latin1 in string
+                                                   * implies no encoding */
+                                                  |SV_UTF8_NO_ENCODING,
+                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
 			d = SvPVX(sv) + SvCUR(sv);
 			has_utf8 = TRUE;
                     }
@@ -3218,31 +3245,44 @@ S_scan_const(pTHX_ char *start)
 		continue;
 
  	    case 'N':
-		/* In a non-pattern \N must be a named character, like \N{LATIN
-		 * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
-		 * mean to match a non-newline.  For non-patterns, named
-		 * characters are converted to their string equivalents. In
-		 * patterns, named characters are not converted to their
-		 * ultimate forms for the same reasons that other escapes
-		 * aren't.  Instead, they are converted to the \N{U+...} form
-		 * to get the value from the charnames that is in effect right
-		 * now, while preserving the fact that it was a named character
-		 * so that the regex compiler knows this */
-
-		/* The structure of this section of code (besides checking for
+                /* In a non-pattern \N must be like \N{U+0041}, or it can be a
+                 * named character, like \N{LATIN SMALL LETTER A}, or a named
+                 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
+                 * GRAVE}.  For convenience all three forms are referred to as
+                 * "named characters" below.
+                 *
+                 * For patterns, \N also can mean to match a non-newline.  Code
+                 * before this 'switch' statement should already have handled
+                 * this situation, and hence this code only has to deal with
+                 * the named character cases.
+                 *
+                 * For non-patterns, the named characters are converted to
+                 * their string equivalents.  In patterns, named characters are
+                 * not converted to their ultimate forms for the same reasons
+                 * that other escapes aren't.  Instead, they are converted to
+                 * the \N{U+...} form to get the value from the charnames that
+                 * is in effect right now, while preserving the fact that it
+                 * was a named character, so that the regex compiler knows
+                 * this.
+                 *
+		 * The structure of this section of code (besides checking for
 		 * errors and upgrading to utf8) is:
-		 *  Further disambiguate between the two meanings of \N, and if
-		 *	not a charname, go process it elsewhere
-		 *  If of form \N{U+...}, pass it through if a pattern;
-		 *	otherwise convert to utf8
-		 *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
-		 *  pattern; otherwise convert to utf8 */
-
-		/* Here, s points to the 'N'; the test below is guaranteed to
-		 * succeed if we are being called on a pattern as we already
-		 * know from a test above that the next character is a '{'.
-		 * On a non-pattern \N must mean 'named sequence, which
-		 * requires braces */
+                 *  If the named character is of the form \N{U+...}, pass it
+                 *      through if a pattern; otherwise convert the code point
+                 *      to utf8
+                 *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
+                 *      if a pattern; otherwise convert to utf8
+                 *
+                 * If the regex compiler should ever need to differentiate
+                 * between the \N{U+...} and \N{name} forms, that could easily
+                 * be done here by stripping any leading zeros from the
+                 * \N{U+...} case, and adding them to the other one. */
+
+                /* Here, 's' points to the 'N'; the test below is guaranteed to
+		 * succeed if we are being called on a pattern, as we already
+                 * know from a test above that the next character is a '{'.  A
+                 * non-pattern \N must mean 'named character', which requires
+                 * braces */
 		s++;
 		if (*s != '{') {
 		    yyerror("Missing braces on \\N{}"); 
@@ -3263,43 +3303,45 @@ S_scan_const(pTHX_ char *start)
 		/* Here it looks like a named character */
 
 		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
-		    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-				| PERL_SCAN_DISALLOW_PREFIX;
-		    STRLEN len;
-
-		    /* For \N{U+...}, the '...' is a unicode value even on
-		     * EBCDIC machines */
 		    s += 2;	    /* Skip to next char after the 'U+' */
-		    len = e - s;
-		    uv = grok_hex(s, &len, &flags, NULL);
-		    if (len == 0 || len != (STRLEN)(e - s)) {
-			yyerror("Invalid hexadecimal number in \\N{U+...}");
-			s = e + 1;
-			continue;
-		    }
-
 		    if (PL_lex_inpat) {
 
-			/* On non-EBCDIC platforms, pass through to the regex
-			 * compiler unchanged.  The reason we evaluated the
-			 * number above is to make sure there wasn't a syntax
-			 * error.  But on EBCDIC we convert to native so
-			 * downstream code can continue to assume it's native
-			 */
-			s -= 5;	    /* Include the '\N{U+' */
-#ifdef EBCDIC
-			d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
-							       and the \0 */
-				    "\\N{U+%X}",
-				    (unsigned int) UNI_TO_NATIVE(uv));
-#else
-			Copy(s, d, e - s + 1, char);	/* 1 = include the } */
-			d += e - s + 1;
-#endif
+                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
+                        /* Check the syntax.  */
+                        const char *orig_s;
+                        orig_s = s - 5;
+                        if (!isXDIGIT(*s)) {
+                          bad_NU:
+                            yyerror(
+                                "Invalid hexadecimal number in \\N{U+...}"
+                            );
+                            s = e + 1;
+                            continue;
+                        }
+                        while (++s < e) {
+                            if (isXDIGIT(*s))
+                                continue;
+                            else if ((*s == '.' || *s == '_')
+                                  && isXDIGIT(s[1]))
+                                continue;
+                            goto bad_NU;
+                        }
+
+                        /* Pass everything through unchanged.
+                         * +1 is for the '}' */
+                        Copy(orig_s, d, e - orig_s + 1, char);
+                        d += e - orig_s + 1;
 		    }
 		    else {  /* Not a pattern: convert the hex to string */
+                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+				| PERL_SCAN_SILENT_ILLDIGIT
+				| PERL_SCAN_DISALLOW_PREFIX;
+                        STRLEN len = e - s;
+                        uv = grok_hex(s, &len, &flags, NULL);
+                        if (len == 0 || (len != (STRLEN)(e - s)))
+                            goto bad_NU;
 
-			 /* If destination is not in utf8, unconditionally
+                         /* If the destination is not in utf8, unconditionally
 			  * recode it to be so.  This is because \N{} implies
 			  * Unicode semantics, and scalars have to be in utf8
 			  * to guarantee those semantics */
@@ -3352,13 +3394,18 @@ S_scan_const(pTHX_ char *start)
                                  * through the string.  Each character takes up
                                  * 2 hex digits plus either a trailing dot or
                                  * the "}" */
+                                const char initial_text[] = "\\N{U+";
+                                const STRLEN initial_len = sizeof(initial_text)
+                                                           - 1;
                                 d = off + SvGROW(sv, off
                                                     + 3 * len
-                                                    + 6 /* For the "\N{U+", and
-                                                           trailing NUL */
+
+                                                    /* +1 for trailing NUL */
+                                                    + initial_len + 1
+
                                                     + (STRLEN)(send - e));
-                                Copy("\\N{U+", d, 5, char);
-                                d += 5;
+                                Copy(initial_text, d, initial_len, char);
+                                d += initial_len;
                                 while (str < str_end) {
                                     char hex_string[4];
                                     int len =
@@ -3370,7 +3417,7 @@ S_scan_const(pTHX_ char *start)
                                     d += 3;
                                     str++;
                                 }
-                                d--;    /* We will overwrite below the final
+                                d--;    /* Below, we will overwrite the final
                                            dot with a right brace */
                             }
                             else {
@@ -3455,8 +3502,8 @@ S_scan_const(pTHX_ char *start)
 			    const STRLEN off = d - SvPVX_const(sv);
 			    d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
 			}
-                        if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
-                            sv_utf8_upgrade(res);
+                        if (! SvUTF8(res)) {    /* Make sure \N{} return is UTF-8 */
+                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
                             str = SvPV_const(res, len);
                         }
 			Copy(str, d, len, char);
@@ -3572,8 +3619,8 @@ S_scan_const(pTHX_ char *start)
 		   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
 
     SvPOK_on(sv);
-    if (PL_encoding && !has_utf8) {
-	sv_recode_to_utf8(sv, PL_encoding);
+    if (IN_ENCODING && !has_utf8) {
+	sv_recode_to_utf8(sv, _get_encoding());
 	if (SvUTF8(sv))
 	    has_utf8 = TRUE;
     }
@@ -3769,11 +3816,10 @@ S_intuit_more(pTHX_ char *s)
 		    && !(last_un_char == '$' || last_un_char == '@'
 			 || last_un_char == '&')
 		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
-		    char *d = tmpbuf;
+		    char *d = s;
 		    while (isALPHA(*s))
-			*d++ = *s++;
-		    *d = '\0';
-		    if (keyword(tmpbuf, d - tmpbuf, 0))
+			s++;
+		    if (keyword(d, s - d, 0))
 			weight -= 150;
 		}
 		if (un_char == last_un_char + 1)
@@ -4268,13 +4314,8 @@ Perl_yylex(pTHX)
 	SvREFCNT_dec(tmp);
     } );
 
-    switch (PL_lex_state) {
-    case LEX_NORMAL:
-    case LEX_INTERPNORMAL:
-	break;
-
     /* when we've already built the next token, just pull it out of the queue */
-    case LEX_KNOWNEXT:
+    if (PL_nexttoke) {
 	PL_nexttoke--;
 	pl_yylval = PL_nextval[PL_nexttoke];
 	if (!PL_nexttoke) {
@@ -4299,6 +4340,12 @@ Perl_yylex(pTHX)
 	    }
 	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
 	}
+    }
+
+    switch (PL_lex_state) {
+    case LEX_NORMAL:
+    case LEX_INTERPNORMAL:
+	break;
 
     /* interpolated case modifiers like \L \U, including \Q and \E.
        when we get here, PL_bufptr is at the \
@@ -4447,6 +4494,14 @@ Perl_yylex(pTHX)
 	/* FALLTHROUGH */
 
     case LEX_INTERPEND:
+	/* Treat state as LEX_NORMAL if we have no inner lexing scope.
+	   XXX This hack can be removed if we stop setting PL_lex_state to
+	   LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
+	if (UNLIKELY(!PL_lex_inwhat)) {
+	    PL_lex_state = LEX_NORMAL;
+	    break;
+	}
+
 	if (PL_lex_dojoin) {
 	    const U8 dojoin_was = PL_lex_dojoin;
 	    PL_lex_dojoin = FALSE;
@@ -4498,6 +4553,14 @@ Perl_yylex(pTHX)
 	    Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
 		       (long) PL_lex_brackets);
 #endif
+	/* Treat state as LEX_NORMAL when not in an inner lexing scope.
+	   XXX This hack can be removed if we stop setting PL_lex_state to
+	   LEX_KNOWNEXT.  */
+	if (UNLIKELY(!PL_lex_inwhat)) {
+	    PL_lex_state = LEX_NORMAL;
+	    break;
+	}
+
 	if (PL_bufptr == PL_bufend)
 	    return REPORT(sublex_done());
 
@@ -4578,7 +4641,8 @@ Perl_yylex(pTHX)
     case 26:
 	goto fake_eof;			/* emulate EOF on ^D or ^Z */
     case 0:
-	if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
+	if ((!PL_rsfp || PL_lex_inwhat)
+	 && (!PL_parser->filtered || s+1 < PL_bufend)) {
 	    PL_last_uni = 0;
 	    PL_last_lop = 0;
 	    if (PL_lex_brackets &&
@@ -4917,7 +4981,6 @@ Perl_yylex(pTHX)
 	}
 	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 	    PL_lex_state = LEX_FORMLINE;
-	    NEXTVAL_NEXTTOKE.ival = 0;
 	    force_next(FORMRBRACK);
 	    TOKEN(';');
 	}
@@ -4928,7 +4991,7 @@ Perl_yylex(pTHX)
 	Perl_croak(aTHX_
       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
 #endif
-    case ' ': case '\t': case '\f': case 013:
+    case ' ': case '\t': case '\f': case '\v':
 	s++;
 	goto retry;
     case '#':
@@ -4960,7 +5023,6 @@ Perl_yylex(pTHX)
                 incline(s);
 	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 		PL_lex_state = LEX_FORMLINE;
-		NEXTVAL_NEXTTOKE.ival = 0;
 		force_next(FORMRBRACK);
 		TOKEN(';');
 	    }
@@ -5181,11 +5243,18 @@ Perl_yylex(pTHX)
 	TERM('%');
     }
     case '^':
+	d = s;
+	bof = FEATURE_BITWISE_IS_ENABLED;
+	if (bof && s[1] == '.')
+	    s++;
 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
 		(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
+	{
+	    s = d;
 	    TOKEN(0);
+	}
 	s++;
-	BOop(OP_BIT_XOR);
+	BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
     case '[':
 	if (PL_lex_brackets > 100)
 	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
@@ -5208,7 +5277,11 @@ Perl_yylex(pTHX)
 	    Eop(OP_SMARTMATCH);
 	}
 	s++;
-	OPERATOR('~');
+	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
+	    s++;
+	    BCop(OP_SCOMPLEMENT);
+	}
+	BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
     case ',':
 	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
 	    TOKEN(0);
@@ -5281,7 +5354,7 @@ Perl_yylex(pTHX)
 		    sv_catsv(sv, PL_lex_stuff);
 		    attrs = op_append_elem(OP_LIST, attrs,
 					newSVOP(OP_CONST, 0, sv));
-		    SvREFCNT_dec(PL_lex_stuff);
+		    SvREFCNT_dec_NN(PL_lex_stuff);
 		    PL_lex_stuff = NULL;
 		}
 		else {
@@ -5308,6 +5381,19 @@ Perl_yylex(pTHX)
 			sv_free(sv);
 			CvMETHOD_on(PL_compcv);
 		    }
+		    else if (!PL_in_my && len == 5
+			  && strnEQ(SvPVX(sv), "const", len))
+		    {
+			sv_free(sv);
+			Perl_ck_warner_d(aTHX_
+			    packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+			   ":const is experimental"
+			);
+			CvANONCONST_on(PL_compcv);
+			if (!CvANON(PL_compcv))
+			    yyerror(":const is not permitted on named "
+				    "subroutines");
+		    }
 		    /* After we've set the flags, it could be argued that
 		       we don't need to do the attributes.pm-based setting
 		       process, and shouldn't bother appending recognized
@@ -5422,6 +5508,7 @@ Perl_yylex(pTHX)
 	}
 	switch (PL_expect) {
 	case XTERM:
+	case XTERMORDORDOR:
 	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
 	    PL_lex_allbrackets++;
 	    OPERATOR(HASHBRACK);
@@ -5485,9 +5572,10 @@ Perl_yylex(pTHX)
 		    OPERATOR(HASHBRACK);
 		}
 		if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
-		    /* ${...} or @{...} etc., but not print {...} */
-		    PL_expect = XTERM;
-		    break;
+		    /* ${...} or @{...} etc., but not print {...}
+		     * Skip the disambiguation and treat this as a block.
+		     */
+		    goto block_expectation;
 		}
 		/* This hack serves to disambiguate a pair of curlies
 		 * as being a block or an anon hash.  Normally, expectation
@@ -5571,7 +5659,28 @@ Perl_yylex(pTHX)
 				   || (*t == '=' && t[1] == '>')))
 		    OPERATOR(HASHBRACK);
 		if (PL_expect == XREF)
-		    PL_expect = XTERM;
+		{
+		  block_expectation:
+		    /* If there is an opening brace or 'sub:', treat it
+		       as a term to make ${{...}}{k} and &{sub:attr...}
+		       dwim.  Otherwise, treat it as a statement, so
+		       map {no strict; ...} works.
+		     */
+		    s = skipspace(s);
+		    if (*s == '{') {
+			PL_expect = XTERM;
+			break;
+		    }
+		    if (strnEQ(s, "sub", 3)) {
+			d = s + 3;
+			d = skipspace(d);
+			if (*d == ':') {
+			    PL_expect = XTERM;
+			    break;
+			}
+		    }
+		    PL_expect = XSTATE;
+		}
 		else {
 		    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
 		    PL_expect = XSTATE;
@@ -5580,8 +5689,7 @@ Perl_yylex(pTHX)
 	    break;
 	}
 	pl_yylval.ival = CopLINE(PL_curcop);
-	if (isSPACE(*s) || *s == '#')
-	    PL_copline = NOLINE;   /* invalidate current command line number */
+	PL_copline = NOLINE;   /* invalidate current command line number */
 	TOKEN(formbrack ? '=' : '{');
     case '}':
 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
@@ -5643,25 +5751,32 @@ Perl_yylex(pTHX)
 		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
 		CopLINE_inc(PL_curcop);
 	    }
+	    d = s;
+	    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+		s++;
 	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
 		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
+		s = d;
 		s--;
 		TOKEN(0);
 	    }
-	    PL_parser->saw_infix_sigil = 1;
-	    BAop(OP_BIT_AND);
+	    if (d == s) {
+		PL_parser->saw_infix_sigil = 1;
+		BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
+	    }
+	    else
+		BAop(OP_SBIT_AND);
 	}
 
 	PL_tokenbuf[0] = '&';
 	s = scan_ident(s - 1, PL_tokenbuf + 1,
 		       sizeof PL_tokenbuf - 1, TRUE);
+	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
 	if (PL_tokenbuf[1]) {
-	    PL_expect = XOPERATOR;
 	    force_ident_maybe_lex('&');
 	}
 	else
 	    PREREF('&');
-	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
 	TERM('&');
 
     case '|':
@@ -5675,12 +5790,15 @@ Perl_yylex(pTHX)
 	    AOPERATOR(OROR);
 	}
 	s--;
+	d = s;
+	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+	    s++;
 	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
 		(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
-	    s--;
+	    s = d - 1;
 	    TOKEN(0);
 	}
-	BOop(OP_BIT_OR);
+	BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
     case '=':
 	s++;
 	{
@@ -5710,30 +5828,30 @@ Perl_yylex(pTHX)
 	    s--;
 	    if (PL_expect == XSTATE && isALPHA(tmp) &&
 		(s == PL_linestart+1 || s[-2] == '\n') )
-		{
-		    if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
-			|| PL_lex_state != LEX_NORMAL) {
-			d = PL_bufend;
-			while (s < d) {
-			    if (*s++ == '\n') {
-				incline(s);
-				if (strnEQ(s,"=cut",4)) {
-				    s = strchr(s,'\n');
-				    if (s)
-					s++;
-				    else
-					s = d;
-				    incline(s);
-				    goto retry;
-				}
-			    }
-			}
-			goto retry;
-		    }
-		    s = PL_bufend;
-		    PL_parser->in_pod = 1;
-		    goto retry;
-		}
+            {
+                if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+                    || PL_lex_state != LEX_NORMAL) {
+                    d = PL_bufend;
+                    while (s < d) {
+                        if (*s++ == '\n') {
+                            incline(s);
+                            if (strnEQ(s,"=cut",4)) {
+                                s = strchr(s,'\n');
+                                if (s)
+                                    s++;
+                                else
+                                    s = d;
+                                incline(s);
+                                goto retry;
+                            }
+                        }
+                    }
+                    goto retry;
+                }
+                s = PL_bufend;
+                PL_parser->in_pod = 1;
+                goto retry;
+            }
 	}
 	if (PL_expect == XBLOCK) {
 	    const char *t = s;
@@ -5899,8 +6017,14 @@ Perl_yylex(pTHX)
 	PL_tokenbuf[0] = '$';
 	s = scan_ident(s, PL_tokenbuf + 1,
 		       sizeof PL_tokenbuf - 1, FALSE);
-	if (PL_expect == XOPERATOR)
-	    no_op("Scalar", s);
+	if (PL_expect == XOPERATOR) {
+	    d = s;
+	    if (PL_bufptr > s) {
+		d = PL_bufptr-1;
+		PL_bufptr = PL_oldbufptr;
+	    }
+	    no_op("Scalar", d);
+	}
 	if (!PL_tokenbuf[1]) {
 	    if (s == PL_bufend)
 		yyerror("Final $ should be \\$ or $name");
@@ -6214,7 +6338,7 @@ Perl_yylex(pTHX)
 	    }
 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
 	    if (!isALPHA(*start) && (PL_expect == XTERM
-			|| PL_expect == XSTATE
+			|| PL_expect == XREF || PL_expect == XSTATE
 			|| PL_expect == XTERMORDORDOR)) {
 		GV *const gv = gv_fetchpvn_flags(s, start - s,
                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
@@ -6351,7 +6475,7 @@ Perl_yylex(pTHX)
 	    char tmpbuf[sizeof PL_tokenbuf + 1];
 	    *tmpbuf = '&';
 	    Copy(PL_tokenbuf, tmpbuf+1, len, char);
-	    off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+	    off = pad_findmy_pvn(tmpbuf, len+1, 0);
 	    if (off != NOT_IN_PAD) {
 		assert(off); /* we assume this is boolean-true below */
 		if (PAD_COMPNAME_FLAGS_isOUR(off)) {
@@ -6883,13 +7007,13 @@ Perl_yylex(pTHX)
 		if (!IN_BYTES) {
 		    if (UTF)
 			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
-		    else if (PL_encoding) {
+		    else if (IN_ENCODING) {
 			SV *name;
 			dSP;
 			ENTER;
 			SAVETMPS;
 			PUSHMARK(sp);
-			XPUSHs(PL_encoding);
+			XPUSHs(_get_encoding());
 			PUTBACK;
 			call_method("name", G_SCALAR);
 			SPAGAIN;
@@ -6909,7 +7033,9 @@ Perl_yylex(pTHX)
 	}
 
 	case KEY___SUB__:
-	    FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+	    FUN0OP(CvCLONE(PL_compcv)
+			? newOP(OP_RUNCV, 0)
+			: newPVOP(OP_RUNCV,0,NULL));
 
 	case KEY_AUTOLOAD:
 	case KEY_DESTROY:
@@ -7580,10 +7706,8 @@ Perl_yylex(pTHX)
 	    }
 	    if (!words)
 		words = newNULLLIST();
-	    if (PL_lex_stuff) {
-		SvREFCNT_dec(PL_lex_stuff);
-		PL_lex_stuff = NULL;
-	    }
+	    SvREFCNT_dec_NN(PL_lex_stuff);
+	    PL_lex_stuff = NULL;
 	    PL_expect = XOPERATOR;
 	    pl_yylval.opval = sawparens(words);
 	    TOKEN(QWLIST);
@@ -7838,7 +7962,7 @@ Perl_yylex(pTHX)
 		    *PL_tokenbuf = '&';
 		    if (memchr(tmpbuf, ':', len) || key != KEY_sub
 		     || pad_findmy_pvn(
-			    PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+			    PL_tokenbuf, len + 1, 0
 			) != NOT_IN_PAD)
 			sv_setpvn(PL_subname, tmpbuf, len);
 		    else {
@@ -8118,7 +8242,9 @@ S_pending_ident(pTHX)
                 /* PL_no_myglob is constant */
                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
-			    PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+                            PL_in_my == KEY_my ? "my" : "state",
+                            *PL_tokenbuf == '&' ? "subroutin" : "variabl",
+                            PL_tokenbuf),
                             UTF ? SVf_UTF8 : 0);
                 GCC_DIAG_RESTORE;
             }
@@ -8137,7 +8263,7 @@ S_pending_ident(pTHX)
     if (!has_colon) {
 	if (!PL_in_my)
 	    tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
-                                    UTF ? SVf_UTF8 : 0);
+                                 0);
         if (tmp != NOT_IN_PAD) {
             /* might be an "our" variable" */
             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
@@ -8255,7 +8381,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
 		char tmpbuf[256];
 		Copy(w, tmpbuf+1, s - w, char);
 		*tmpbuf = '&';
-		off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+		off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
 		if (off != NOT_IN_PAD) return;
 	    }
 	    Perl_croak(aTHX_ "No comma allowed after %s", what);
@@ -8352,7 +8478,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
 	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
   	return SvREFCNT_inc_simple_NN(sv);
     }
-now_ok:
+  now_ok:
     cv = *cvp;
     if (!pv && s)
   	pv = newSVpvn_flags(s, len, SVs_TEMP);
@@ -8487,7 +8613,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
-    if (isSPACE(*s))
+    if (isSPACE(*s) || !*s)
 	s = skipspace(s);
     if (isDIGIT(*s)) {
 	while (isDIGIT(*s)) {
@@ -8532,25 +8658,54 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
 /* Is the byte 'd' a legal single character identifier name?  'u' is true
  * iff Unicode semantics are to be used.  The legal ones are any of:
- *  a) ASCII digits
- *  b) ASCII punctuation
+ *  a) all ASCII characters except:
+ *          1) space-type ones, like \t and SPACE;
+            2) NUL;
+ *          3) '{'
+ *     The final case currently doesn't get this far in the program, so we
+ *     don't test for it.  If that were to change, it would be ok to allow it.
  *  c) When not under Unicode rules, any upper Latin1 character
- *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
- *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
- *     the \s ones. */
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
-                                   || isDIGIT_A((U8)(d))                    \
-                                   || (!(u) && !isASCII((U8)(d)))           \
-                                   || ((((U8)(d)) < 32)                     \
-                                       && (((((U8)(d)) >= 14)               \
-                                           || (((U8)(d)) <= 8 && (d) != 0) \
-                                           || (((U8)(d)) == 13))))          \
-                                   || (((U8)(d)) == toCTRL('?')))
-    if (s < PL_bufend
-        && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ *  d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ *      Because all ASCII characters have the same representation whether
+ *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ *      '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
+                         ? isIDFIRST_utf8((U8*) (s))                          \
+                         : (isGRAPH_L1(*s)                                    \
+                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+#   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
+                                            && LIKELY(*(s) != '\0')           \
+                                            && (! is_utf8                     \
+                                                || isASCII_utf8((U8*) (s))    \
+                                                || isIDFIRST_utf8((U8*) (s))))
+#endif
+    if ((s <= PL_bufend - (is_utf8)
+                          ? UTF8SKIP(s)
+                          : 1)
+        && VALID_LEN_ONE_IDENT(s, is_utf8))
     {
-        if ( isCNTRL_A((U8)*s) ) {
-            deprecate("literal control characters in variable names");
+        /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
+         * because often it has no graphic representation.  (We can't get to
+         * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
+         * test for it.) */
+        if ((is_utf8)
+            ? ! isGRAPH_utf8( (U8*) s)
+            : (! isGRAPH_L1( (U8) *s)
+               || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
+        {
+            /* Split messages for back compat */
+            if (isCNTRL_A( (U8) *s)) {
+                deprecate("literal control characters in variable names");
+            }
+            else {
+                deprecate("literal non-graphic characters in variable names");
+            }
         }
         
         if (is_utf8) {
@@ -8867,10 +9022,8 @@ S_scan_subst(pTHX_ char *start)
     first_line = CopLINE(PL_curcop);
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-	if (PL_lex_stuff) {
-	    SvREFCNT_dec(PL_lex_stuff);
-	    PL_lex_stuff = NULL;
-	}
+	SvREFCNT_dec_NN(PL_lex_stuff);
+	PL_lex_stuff = NULL;
 	Perl_croak(aTHX_ "Substitution replacement not terminated");
     }
     PL_multi_start = first_start;	/* so whole substitution is taken together */
@@ -8949,10 +9102,8 @@ S_scan_trans(pTHX_ char *start)
 
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-	if (PL_lex_stuff) {
-	    SvREFCNT_dec(PL_lex_stuff);
-	    PL_lex_stuff = NULL;
-	}
+	SvREFCNT_dec_NN(PL_lex_stuff);
+	PL_lex_stuff = NULL;
 	Perl_croak(aTHX_ "Transliteration replacement not terminated");
     }
 
@@ -9209,7 +9360,14 @@ S_scan_heredoc(pTHX_ char *s)
 		    origline + 1 + PL_parser->herelines);
 	if (!lex_next_chunk(LEX_NO_TERM)
 	 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-	    SvREFCNT_dec(linestr_save);
+	    /* Simply freeing linestr_save might seem simpler here, as it
+	       does not matter what PL_linestr points to, since we are
+	       about to croak; but in a quote-like op, linestr_save
+	       will have been prospectively freed already, via
+	       SAVEFREESV(PL_linestr) in sublex_push, so itâs easier to
+	       restore PL_linestr. */
+	    SvREFCNT_dec_NN(PL_linestr);
+	    PL_linestr = linestr_save;
 	    goto interminable;
 	}
 	CopLINE_set(PL_curcop, origline);
@@ -9259,8 +9417,8 @@ S_scan_heredoc(pTHX_ char *s)
     if (!IN_BYTES) {
 	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
 	    SvUTF8_on(tmpstr);
-	else if (PL_encoding)
-	    sv_recode_to_utf8(tmpstr, PL_encoding);
+	else if (IN_ENCODING)
+	    sv_recode_to_utf8(tmpstr, _get_encoding());
     }
     PL_lex_stuff = tmpstr;
     pl_yylval.ival = op_type;
@@ -9371,7 +9529,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 	    /* try to find it in the pad for this block, otherwise find
 	       add symbol table ops
 	    */
-	    const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+	    const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
 	    if (tmp != NOT_IN_PAD) {
 		if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
 		    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
@@ -9395,7 +9553,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 	    else {
 		GV *gv;
 		++d;
-intro_sym:
+              intro_sym:
 		gv = gv_fetchpv(d,
 				GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
 				SVt_PV);
@@ -9550,12 +9708,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 	sv_catpvn(sv, s, termlen);
     s += termlen;
     for (;;) {
-	if (PL_encoding && !UTF && !re_reparse) {
+	if (IN_ENCODING && !UTF && !re_reparse) {
 	    bool cont = TRUE;
 
 	    while (cont) {
 		int offset = s - SvPVX_const(PL_linestr);
-		const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+		const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
 					   &offset, (char*)termstr, termlen);
 		const char *ns;
 		char *svlast;
@@ -9768,13 +9926,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 
     /* at this point, we have successfully read the delimited string */
 
-    if (!PL_encoding || UTF || re_reparse) {
+    if (!IN_ENCODING || UTF || re_reparse) {
 
 	if (keep_delims)
 	    sv_catpvn(sv, s, termlen);
 	s += termlen;
     }
-    if (has_utf8 || (PL_encoding && !re_reparse))
+    if (has_utf8 || (IN_ENCODING && !re_reparse))
 	SvUTF8_on(sv);
 
     PL_multi_end = CopLINE(PL_curcop);
@@ -10337,7 +10495,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
     /* if it starts with a v, it could be a v-string */
     case 'v':
-vstring:
+    vstring:
 		sv = newSV(5); /* preallocate storage space */
 		ENTER_with_name("scan_vstring");
 		SAVEFREESV(sv);
@@ -10431,7 +10589,7 @@ S_scan_formline(pTHX_ char *s)
 	if (needargs) {
 	    const char *s2 = s;
 	    while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
-		|| *s2 == 013)
+		|| *s2 == '\v')
 		s2++;
 	    if (*s2 == '{') {
 		PL_expect = XTERMBLOCK;
@@ -10444,8 +10602,8 @@ S_scan_formline(pTHX_ char *s)
 	if (!IN_BYTES) {
 	    if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
 		SvUTF8_on(stuff);
-	    else if (PL_encoding)
-		sv_recode_to_utf8(stuff, PL_encoding);
+	    else if (IN_ENCODING)
+		sv_recode_to_utf8(stuff, _get_encoding());
 	}
 	NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
 	force_next(THING);
@@ -10476,8 +10634,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
     if (outsidecv && CvPADLIST(outsidecv))
-	CvPADLIST(PL_compcv)->xpadl_outid =
-	    PadlistNAMES(CvPADLIST(outsidecv));
+	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
 
     return oldsavestack_ix;
 }
@@ -10489,7 +10646,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
 
     PL_in_eval |= EVAL_WARNONLY;
     yyerror_pv(s, flags);
-    PL_in_eval &= ~EVAL_WARNONLY;
     return 0;
 }
 
@@ -10555,7 +10711,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     }
     else if (yychar > 255)
 	sv_catpvs(where_sv, "next token ???");
-    else if (yychar == -2) { /* YYEMPTY */
+    else if (yychar == YYEMPTY) {
 	if (PL_lex_state == LEX_NORMAL ||
 	   (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
 	    sv_catpvs(where_sv, "at end of line");
@@ -10593,6 +10749,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
         PL_multi_end = 0;
     }
     if (PL_in_eval & EVAL_WARNONLY) {
+	PL_in_eval &= ~EVAL_WARNONLY;
 	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
     }
     else
@@ -11383,7 +11540,6 @@ S_parse_opt_lexvar(pTHX)
     PL_bufptr = s;
     if (d == PL_tokenbuf+1)
 	return NULL;
-    *d = 0;
     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
 		OPf_MOD | (OPpLVAL_INTRO<<8));
     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
@@ -11464,10 +11620,16 @@ Perl_parse_subsignature(pTHX)
 				scalar(newUNOP(OP_RV2AV, 0,
 				    newGVOP(OP_GV, 0, PL_defgv))),
 				newSVOP(OP_CONST, 0, newSViv(1))),
-			    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-				newSVOP(OP_CONST, 0,
-				    newSVpvs("Odd name/value argument "
-					"for subroutine"))));
+		            op_convert_list(OP_DIE, 0,
+		                op_convert_list(OP_SPRINTF, 0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0,
+		                            newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+		                        newSLICEOP(0,
+		                            op_append_list(OP_LIST,
+		                                newSVOP(OP_CONST, 0, newSViv(1)),
+		                                newSVOP(OP_CONST, 0, newSViv(2))),
+		                            newOP(OP_CALLER, 0))))));
 		    if (pos != min_arity)
 			chkop = newLOGOP(OP_AND, 0,
 				    newBINOP(OP_GT, 0,
@@ -11530,9 +11692,16 @@ Perl_parse_subsignature(pTHX)
 			scalar(newUNOP(OP_RV2AV, 0,
 			    newGVOP(OP_GV, 0, PL_defgv))),
 			newSVOP(OP_CONST, 0, newSViv(min_arity))),
-		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-			newSVOP(OP_CONST, 0,
-			    newSVpvs("Too few arguments for subroutine"))))),
+		    op_convert_list(OP_DIE, 0,
+		        op_convert_list(OP_SPRINTF, 0,
+		            op_append_list(OP_LIST,
+		                newSVOP(OP_CONST, 0,
+		                    newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+		                newSLICEOP(0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0, newSViv(1)),
+		                        newSVOP(OP_CONST, 0, newSViv(2))),
+		                    newOP(OP_CALLER, 0))))))),
 	    initops);
     }
     if (max_arity != -1) {
@@ -11543,9 +11712,16 @@ Perl_parse_subsignature(pTHX)
 			scalar(newUNOP(OP_RV2AV, 0,
 			    newGVOP(OP_GV, 0, PL_defgv))),
 			newSVOP(OP_CONST, 0, newSViv(max_arity))),
-		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-			newSVOP(OP_CONST, 0,
-			    newSVpvs("Too many arguments for subroutine"))))),
+		    op_convert_list(OP_DIE, 0,
+		        op_convert_list(OP_SPRINTF, 0,
+		            op_append_list(OP_LIST,
+		                newSVOP(OP_CONST, 0,
+		                    newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+		                newSLICEOP(0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0, newSViv(1)),
+		                        newSVOP(OP_CONST, 0, newSViv(2))),
+		                    newOP(OP_CALLER, 0))))))),
 	    initops);
     }
     return initops;