This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c: Comments, white-space only
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index f4e8258..48b853d 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -186,6 +186,7 @@ static const char* const lex_state_names[] = {
  * FUN1         : not used, except for not, which isn't a UNIOP
  * BOop         : bitwise or or xor
  * BAop         : bitwise and
+ * BCop         : bitwise complement
  * SHop         : shift operator
  * PWop         : power operator
  * PMop         : pattern-matching operator
@@ -222,6 +223,8 @@ static const char* const lex_state_names[] = {
 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
+                      REPORT('~')
 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
@@ -399,7 +402,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
        }
        if (name)
            Perl_sv_catpv(aTHX_ report, name);
-       else if ((char)rv > ' ' && (char)rv <= '~')
+       else if (isGRAPH(rv))
        {
            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
            if ((char)rv == 'p')
@@ -500,6 +503,9 @@ S_ao(pTHX_ int toketype)
  * It prints "Missing operator before end of line" if there's nothing
  * after the missing operator, or "... before <...>" if there is something
  * after the missing operator.
+ *
+ * PL_bufptr is expected to point to the start of the thing that was found,
+ * and s after the next token or partial token.
  */
 
 STATIC void
@@ -730,7 +736,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
     parser->last_lop = parser->last_uni = NULL;
 
-    assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
@@ -1246,7 +1252,7 @@ buffer has reached the end of the input text.
 */
 
 #define LEX_FAKE_EOF 0x80000000
-#define LEX_NO_TERM  0x40000000
+#define LEX_NO_TERM  0x40000000 /* here-doc */
 
 bool
 Perl_lex_next_chunk(pTHX_ U32 flags)
@@ -1260,6 +1266,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     bool got_some;
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
+       return FALSE;
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (!(flags & LEX_KEEP_PREVIOUS) &&
@@ -1514,6 +1522,8 @@ Perl_lex_read_space(pTHX_ U32 flags)
                incline(s);
                need_incline = 0;
            }
+       } else if (!c) {
+           s++;
        } else {
            break;
        }
@@ -1639,6 +1649,7 @@ S_incline(pTHX_ const char *s)
     const char *n;
     const char *e;
     line_t line_num;
+    UV uv;
 
     PERL_ARGS_ASSERT_INCLINE;
 
@@ -1688,7 +1699,9 @@ S_incline(pTHX_ const char *s)
     if (*e != '\n' && *e != '\0')
        return;         /* false alarm */
 
-    line_num = grok_atou(n, &e) - 1;
+    if (!grok_atoUV(n, &uv, &e))
+        return;
+    line_num = ((line_t)uv) - 1;
 
     if (t - s > 0) {
        const STRLEN len = t - s;
@@ -1790,13 +1803,13 @@ S_skipspace_flags(pTHX_ char *s, U32 flags)
 {
     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
-       while (s < PL_bufend && SPACE_OR_TAB(*s))
+       while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
            s++;
     } else {
        STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
        PL_bufptr = s;
        lex_read_space(flags | LEX_KEEP_PREVIOUS |
-               (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+               (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
                    LEX_NO_NEXT_CHUNK : 0));
        s = PL_bufptr;
        PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
@@ -1828,13 +1841,13 @@ S_check_uni(pTHX)
        PL_last_uni++;
     s = PL_last_uni;
     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
-       s++;
+       s += UTF ? UTF8SKIP(s) : 1;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
 
     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
-                    (int)(s - PL_last_uni), PL_last_uni);
+                    "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+                    UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
 }
 
 /*
@@ -1899,6 +1912,7 @@ S_force_next(pTHX_ I32 type)
        tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
+    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
     PL_nexttype[PL_nexttoke] = type;
     PL_nexttoke++;
     if (PL_lex_state != LEX_KNOWNEXT) {
@@ -1968,7 +1982,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
     SV * const sv = newSVpvn_utf8(start, len,
                                  !IN_BYTES
                                  && UTF
-                                 && !is_ascii_string((const U8*)start, len)
+                                 && !is_invariant_string((const U8*)start, len)
                                  && is_utf8_string((const U8*)start, len));
     return sv;
 }
@@ -2005,9 +2019,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
        if (check_keyword) {
          char *s2 = PL_tokenbuf;
+         STRLEN len2 = len;
          if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
-           s2 += 6, len -= 6;
-         if (keyword(s2, len, 0))
+           s2 += 6, len2 -= 6;
+         if (keyword(s2, len2, 0))
            return start;
        }
        if (token == METHOD) {
@@ -2272,7 +2287,9 @@ S_sublex_start(pTHX)
        return THING;
     }
     if (op_type == OP_CONST) {
-       SV *sv = tokeq(PL_lex_stuff);
+       SV *sv = PL_lex_stuff;
+       PL_lex_stuff = NULL;
+       sv = tokeq(sv);
 
        if (SvTYPE(sv) == SVt_PVIV) {
            /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
@@ -2283,7 +2300,6 @@ S_sublex_start(pTHX)
            sv = nsv;
        }
        pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
-       PL_lex_stuff = NULL;
        return THING;
     }
 
@@ -2326,6 +2342,7 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_casemods);
     SAVEI32(PL_lex_starts);
     SAVEI8(PL_lex_state);
+    SAVEI8(PL_lex_defer);
     SAVESPTR(PL_lex_repl);
     SAVEVPTR(PL_lex_inpat);
     SAVEI16(PL_lex_inwhat);
@@ -2363,6 +2380,13 @@ S_sublex_push(pTHX)
     PL_lex_stuff = NULL;
     PL_sublex_info.repl = NULL;
 
+    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
+       set for an inner quote-like operator and then an error causes scope-
+       popping.  We must not have a PL_lex_stuff value left dangling, as
+       that breaks assumptions elsewhere.  See bug #123617.  */
+    SAVEGENERICSV(PL_lex_stuff);
+    SAVEGENERICSV(PL_sublex_info.repl);
+
     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
        = SvPVX(PL_linestr);
     PL_bufend += SvCUR(PL_linestr);
@@ -2457,7 +2481,7 @@ S_sublex_done(pTHX)
                 + PL_parser->herelines;
            PL_parser->herelines = 0;
        }
-       return ',';
+       return '/';
     }
     else {
        const line_t l = CopLINE(PL_curcop);
@@ -2467,7 +2491,6 @@ S_sublex_done(pTHX)
        PL_bufend = SvPVX(PL_linestr);
        PL_bufend += SvCUR(PL_linestr);
        PL_expect = XOPERATOR;
-       PL_sublex_info.sub_inwhat = 0;
        return ')';
     }
 }
@@ -2491,6 +2514,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
+    if (!SvCUR(res))
+        return res;
+
     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
                                      e - backslash_ptr,
                                      &first_bad_char_loc))
@@ -2504,9 +2530,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         /* We deliberately don't try to print the malformed character, which
          * might not print very well; it also may be just the first of many
          * malformations, so don't print what comes after it */
-        yyerror(Perl_form(aTHX_
+        yyerror_pv(Perl_form(aTHX_
             "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
+                   SVf_UTF8);
        return NULL;
     }
 
@@ -3031,7 +3058,7 @@ S_scan_const(pTHX_ char *start)
           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
           */
        else if (*s == '@' && s[1]) {
-           if (isWORDCHAR_lazy_if(s+1,UTF))
+           if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
                break;
            if (strchr(":'{$", s[1]))
                break;
@@ -3189,9 +3216,13 @@ S_scan_const(pTHX_ char *start)
                        SvPOK_on(sv);
                        *d = '\0';
                        /* See Note on sizing above.  */
-                       sv_utf8_upgrade_flags_grow(sv,
-                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                       UNISKIP(uv) + (STRLEN)(send - s) + 1);
+                       sv_utf8_upgrade_flags_grow(
+                                         sv,
+                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+                                                  /* Above-latin1 in string
+                                                   * implies no encoding */
+                                                  |SV_UTF8_NO_ENCODING,
+                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
                        d = SvPVX(sv) + SvCUR(sv);
                        has_utf8 = TRUE;
                     }
@@ -3247,12 +3278,7 @@ S_scan_const(pTHX_ char *start)
                  *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
                  *      if a pattern; otherwise convert to utf8
                  *
-                 * If the regex compiler should ever need to differentiate
-                 * between the \N{U+...} and \N{name} forms, that could easily
-                 * be done here by stripping any leading zeros from the
-                 * \N{U+...} case, and adding them to the other one. */
-
-                /* Here, 's' points to the 'N'; the test below is guaranteed to
+                 * Here, 's' points to the 'N'; the test below is guaranteed to
                 * succeed if we are being called on a pattern, as we already
                  * know from a test above that the next character is a '{'.  A
                  * non-pattern \N must mean 'named character', which requires
@@ -3277,38 +3303,43 @@ S_scan_const(pTHX_ char *start)
                /* Here it looks like a named character */
 
                if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
-                   I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                               | PERL_SCAN_DISALLOW_PREFIX;
-                   STRLEN len;
-
                    s += 2;         /* Skip to next char after the 'U+' */
-                   len = e - s;
-                   uv = grok_hex(s, &len, &flags, NULL);
-                   if (len == 0 || len != (STRLEN)(e - s)) {
-                       yyerror("Invalid hexadecimal number in \\N{U+...}");
-                       s = e + 1;
-                       continue;
-                   }
-
                    if (PL_lex_inpat) {
-                       s -= 5;     /* Include the '\N{U+' */
-#ifdef EBCDIC
-                        /* On EBCDIC platforms, in \N{U+...}, the '...' is a
-                         * Unicode value, so convert to native so downstream
-                         * code can continue to assume it's native */
-                       d += my_snprintf(d, e - s + 1 + 1,  /* includes the '}'
-                                                              and the \0 */
-                                         "\\N{U+%X}",
-                                         (unsigned int) UNI_TO_NATIVE(uv));
-#else
-                        /* On non-EBCDIC platforms, pass it through unchanged.
-                         * The reason we evaluated the number above is to make
-                         * sure there wasn't a syntax error. */
-                       Copy(s, d, e - s + 1, char);    /* +1 is for the '}' */
-                       d += e - s + 1;
-#endif
+
+                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
+                        /* Check the syntax.  */
+                        const char *orig_s;
+                        orig_s = s - 5;
+                        if (!isXDIGIT(*s)) {
+                          bad_NU:
+                            yyerror(
+                                "Invalid hexadecimal number in \\N{U+...}"
+                            );
+                            s = e + 1;
+                            continue;
+                        }
+                        while (++s < e) {
+                            if (isXDIGIT(*s))
+                                continue;
+                            else if ((*s == '.' || *s == '_')
+                                  && isXDIGIT(s[1]))
+                                continue;
+                            goto bad_NU;
+                        }
+
+                        /* Pass everything through unchanged.
+                         * +1 is for the '}' */
+                        Copy(orig_s, d, e - orig_s + 1, char);
+                        d += e - orig_s + 1;
                    }
                    else {  /* Not a pattern: convert the hex to string */
+                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                               | PERL_SCAN_SILENT_ILLDIGIT
+                               | PERL_SCAN_DISALLOW_PREFIX;
+                        STRLEN len = e - s;
+                        uv = grok_hex(s, &len, &flags, NULL);
+                        if (len == 0 || (len != (STRLEN)(e - s)))
+                            goto bad_NU;
 
                          /* If the destination is not in utf8, unconditionally
                          * recode it to be so.  This is because \N{} implies
@@ -3379,9 +3410,15 @@ S_scan_const(pTHX_ char *start)
                                     char hex_string[4];
                                     int len =
                                         my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    "%02X.", (U8) *str);
-                                    PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
+                                                  sizeof(hex_string),
+                                                  "%02X.",
+
+                                                  /* The regex compiler is
+                                                   * expecting Unicode, not
+                                                   * native */
+                                                  (U8) NATIVE_TO_LATIN1(*str));
+                                    PERL_MY_SNPRINTF_POST_GUARD(len,
+                                                           sizeof(hex_string));
                                     Copy(hex_string, d, 3, char);
                                     d += 3;
                                     str++;
@@ -3405,12 +3442,12 @@ S_scan_const(pTHX_ char *start)
                                                         len,
                                                         &char_length,
                                                         UTF8_ALLOW_ANYUV);
-                                /* Convert first code point to hex, including
-                                 * the boiler plate before it. */
+                                /* Convert first code point to Unicode hex,
+                                 * including the boiler plate before it. */
                                 output_length =
                                     my_snprintf(hex_string, sizeof(hex_string),
-                                                "\\N{U+%X",
-                                                (unsigned int) uv);
+                                             "\\N{U+%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
 
                                 /* Make sure there is enough space to hold it */
                                 d = off + SvGROW(sv, off
@@ -3422,7 +3459,7 @@ S_scan_const(pTHX_ char *start)
                                 d += output_length;
 
                                 /* For each subsequent character, append dot and
-                                * its ordinal in hex */
+                                * its Unicode code point in hex */
                                 while ((str += char_length) < str_end) {
                                     const STRLEN off = d - SvPVX_const(sv);
                                     U32 uv = utf8n_to_uvchr((U8 *) str,
@@ -3431,9 +3468,9 @@ S_scan_const(pTHX_ char *start)
                                                             UTF8_ALLOW_ANYUV);
                                     output_length =
                                         my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    ".%X",
-                                                    (unsigned int) uv);
+                                             sizeof(hex_string),
+                                             ".%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
 
                                     d = off + SvGROW(sv, off
                                                         + output_length
@@ -3471,8 +3508,8 @@ S_scan_const(pTHX_ char *start)
                            const STRLEN off = d - SvPVX_const(sv);
                            d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
                        }
-                        if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
-                            sv_utf8_upgrade(res);
+                        if (! SvUTF8(res)) {    /* Make sure \N{} return is UTF-8 */
+                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
                             str = SvPV_const(res, len);
                         }
                        Copy(str, d, len, char);
@@ -3588,8 +3625,8 @@ S_scan_const(pTHX_ char *start)
                   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
 
     SvPOK_on(sv);
-    if (PL_encoding && !has_utf8) {
-       sv_recode_to_utf8(sv, PL_encoding);
+    if (IN_ENCODING && !has_utf8) {
+       sv_recode_to_utf8(sv, _get_encoding());
        if (SvUTF8(sv))
            has_utf8 = TRUE;
     }
@@ -3785,11 +3822,10 @@ S_intuit_more(pTHX_ char *s)
                    && !(last_un_char == '$' || last_un_char == '@'
                         || last_un_char == '&')
                    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
-                   char *d = tmpbuf;
+                   char *d = s;
                    while (isALPHA(*s))
-                       *d++ = *s++;
-                   *d = '\0';
-                   if (keyword(tmpbuf, d - tmpbuf, 0))
+                       s++;
+                   if (keyword(d, s - d, 0))
                        weight -= 150;
                }
                if (un_char == last_un_char + 1)
@@ -4284,13 +4320,8 @@ Perl_yylex(pTHX)
        SvREFCNT_dec(tmp);
     } );
 
-    switch (PL_lex_state) {
-    case LEX_NORMAL:
-    case LEX_INTERPNORMAL:
-       break;
-
     /* when we've already built the next token, just pull it out of the queue */
-    case LEX_KNOWNEXT:
+    if (PL_nexttoke) {
        PL_nexttoke--;
        pl_yylval = PL_nextval[PL_nexttoke];
        if (!PL_nexttoke) {
@@ -4315,6 +4346,12 @@ Perl_yylex(pTHX)
            }
            return REPORT(next_type == 'p' ? pending_ident() : next_type);
        }
+    }
+
+    switch (PL_lex_state) {
+    case LEX_NORMAL:
+    case LEX_INTERPNORMAL:
+       break;
 
     /* interpolated case modifiers like \L \U, including \Q and \E.
        when we get here, PL_bufptr is at the \
@@ -4463,6 +4500,14 @@ Perl_yylex(pTHX)
        /* FALLTHROUGH */
 
     case LEX_INTERPEND:
+       /* Treat state as LEX_NORMAL if we have no inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
        if (PL_lex_dojoin) {
            const U8 dojoin_was = PL_lex_dojoin;
            PL_lex_dojoin = FALSE;
@@ -4514,6 +4559,14 @@ Perl_yylex(pTHX)
            Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
                       (long) PL_lex_brackets);
 #endif
+       /* Treat state as LEX_NORMAL when not in an inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
        if (PL_bufptr == PL_bufend)
            return REPORT(sublex_done());
 
@@ -4594,7 +4647,8 @@ Perl_yylex(pTHX)
     case 26:
        goto fake_eof;                  /* emulate EOF on ^D or ^Z */
     case 0:
-       if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
+       if ((!PL_rsfp || PL_lex_inwhat)
+        && (!PL_parser->filtered || s+1 < PL_bufend)) {
            PL_last_uni = 0;
            PL_last_lop = 0;
            if (PL_lex_brackets &&
@@ -4809,6 +4863,8 @@ Perl_yylex(pTHX)
                d = instr(s,"perl -");
                if (!d) {
                    d = instr(s,"perl");
+                    if (d && d[4] == '6')
+                        d = NULL;
 #if defined(DOSISH)
                    /* avoid getting into infinite loops when shebang
                     * line contains "Perl" rather than "perl" */
@@ -4933,7 +4989,6 @@ Perl_yylex(pTHX)
        }
        if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
            PL_lex_state = LEX_FORMLINE;
-           NEXTVAL_NEXTTOKE.ival = 0;
            force_next(FORMRBRACK);
            TOKEN(';');
        }
@@ -4944,7 +4999,7 @@ Perl_yylex(pTHX)
        Perl_croak(aTHX_
       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
 #endif
-    case ' ': case '\t': case '\f': case 013:
+    case ' ': case '\t': case '\f': case '\v':
        s++;
        goto retry;
     case '#':
@@ -4976,7 +5031,6 @@ Perl_yylex(pTHX)
                 incline(s);
            if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
                PL_lex_state = LEX_FORMLINE;
-               NEXTVAL_NEXTTOKE.ival = 0;
                force_next(FORMRBRACK);
                TOKEN(';');
            }
@@ -5079,17 +5133,12 @@ Perl_yylex(pTHX)
            else if (*s == '>') {
                s++;
                s = skipspace(s);
-               if (FEATURE_POSTDEREF_IS_ENABLED && (
-                   ((*s == '$' || *s == '&') && s[1] == '*')
+               if (((*s == '$' || *s == '&') && s[1] == '*')
                  ||(*s == '$' && s[1] == '#' && s[2] == '*')
                  ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
                  ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
-                ))
+                )
                {
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__POSTDEREF),
-                       "Postfix dereference is experimental"
-                   );
                    PL_expect = XPOSTDEREF;
                    TOKEN(ARROW);
                }
@@ -5197,11 +5246,18 @@ Perl_yylex(pTHX)
        TERM('%');
     }
     case '^':
+       d = s;
+       bof = FEATURE_BITWISE_IS_ENABLED;
+       if (bof && s[1] == '.')
+           s++;
        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
                (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
+       {
+           s = d;
            TOKEN(0);
+       }
        s++;
-       BOop(OP_BIT_XOR);
+       BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
     case '[':
        if (PL_lex_brackets > 100)
            Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
@@ -5224,7 +5280,11 @@ Perl_yylex(pTHX)
            Eop(OP_SMARTMATCH);
        }
        s++;
-       OPERATOR('~');
+       if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
+           s++;
+           BCop(OP_SCOMPLEMENT);
+       }
+       BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
     case ',':
        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
            TOKEN(0);
@@ -5297,7 +5357,7 @@ Perl_yylex(pTHX)
                    sv_catsv(sv, PL_lex_stuff);
                    attrs = op_append_elem(OP_LIST, attrs,
                                        newSVOP(OP_CONST, 0, sv));
-                   SvREFCNT_dec(PL_lex_stuff);
+                   SvREFCNT_dec_NN(PL_lex_stuff);
                    PL_lex_stuff = NULL;
                }
                else {
@@ -5324,6 +5384,19 @@ Perl_yylex(pTHX)
                        sv_free(sv);
                        CvMETHOD_on(PL_compcv);
                    }
+                   else if (!PL_in_my && len == 5
+                         && strnEQ(SvPVX(sv), "const", len))
+                   {
+                       sv_free(sv);
+                       Perl_ck_warner_d(aTHX_
+                           packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+                          ":const is experimental"
+                       );
+                       CvANONCONST_on(PL_compcv);
+                       if (!CvANON(PL_compcv))
+                           yyerror(":const is not permitted on named "
+                                   "subroutines");
+                   }
                    /* After we've set the flags, it could be argued that
                       we don't need to do the attributes.pm-based setting
                       process, and shouldn't bother appending recognized
@@ -5438,6 +5511,7 @@ Perl_yylex(pTHX)
        }
        switch (PL_expect) {
        case XTERM:
+       case XTERMORDORDOR:
            PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
            PL_lex_allbrackets++;
            OPERATOR(HASHBRACK);
@@ -5501,9 +5575,10 @@ Perl_yylex(pTHX)
                    OPERATOR(HASHBRACK);
                }
                if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
-                   /* ${...} or @{...} etc., but not print {...} */
-                   PL_expect = XTERM;
-                   break;
+                   /* ${...} or @{...} etc., but not print {...}
+                    * Skip the disambiguation and treat this as a block.
+                    */
+                   goto block_expectation;
                }
                /* This hack serves to disambiguate a pair of curlies
                 * as being a block or an anon hash.  Normally, expectation
@@ -5587,7 +5662,28 @@ Perl_yylex(pTHX)
                                   || (*t == '=' && t[1] == '>')))
                    OPERATOR(HASHBRACK);
                if (PL_expect == XREF)
-                   PL_expect = XTERM;
+               {
+                 block_expectation:
+                   /* If there is an opening brace or 'sub:', treat it
+                      as a term to make ${{...}}{k} and &{sub:attr...}
+                      dwim.  Otherwise, treat it as a statement, so
+                      map {no strict; ...} works.
+                    */
+                   s = skipspace(s);
+                   if (*s == '{') {
+                       PL_expect = XTERM;
+                       break;
+                   }
+                   if (strnEQ(s, "sub", 3)) {
+                       d = s + 3;
+                       d = skipspace(d);
+                       if (*d == ':') {
+                           PL_expect = XTERM;
+                           break;
+                       }
+                   }
+                   PL_expect = XSTATE;
+               }
                else {
                    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
                    PL_expect = XSTATE;
@@ -5658,25 +5754,32 @@ Perl_yylex(pTHX)
                Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
                CopLINE_inc(PL_curcop);
            }
+           d = s;
+           if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+               s++;
            if (!PL_lex_allbrackets && PL_lex_fakeeof >=
                    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
+               s = d;
                s--;
                TOKEN(0);
            }
-           PL_parser->saw_infix_sigil = 1;
-           BAop(OP_BIT_AND);
+           if (d == s) {
+               PL_parser->saw_infix_sigil = 1;
+               BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
+           }
+           else
+               BAop(OP_SBIT_AND);
        }
 
        PL_tokenbuf[0] = '&';
        s = scan_ident(s - 1, PL_tokenbuf + 1,
                       sizeof PL_tokenbuf - 1, TRUE);
+       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
        if (PL_tokenbuf[1]) {
-           PL_expect = XOPERATOR;
            force_ident_maybe_lex('&');
        }
        else
            PREREF('&');
-       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
        TERM('&');
 
     case '|':
@@ -5690,12 +5793,15 @@ Perl_yylex(pTHX)
            AOPERATOR(OROR);
        }
        s--;
+       d = s;
+       if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+           s++;
        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
-           s--;
+           s = d - 1;
            TOKEN(0);
        }
-       BOop(OP_BIT_OR);
+       BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
     case '=':
        s++;
        {
@@ -5725,30 +5831,30 @@ Perl_yylex(pTHX)
            s--;
            if (PL_expect == XSTATE && isALPHA(tmp) &&
                (s == PL_linestart+1 || s[-2] == '\n') )
-               {
-                   if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
-                       || PL_lex_state != LEX_NORMAL) {
-                       d = PL_bufend;
-                       while (s < d) {
-                           if (*s++ == '\n') {
-                               incline(s);
-                               if (strnEQ(s,"=cut",4)) {
-                                   s = strchr(s,'\n');
-                                   if (s)
-                                       s++;
-                                   else
-                                       s = d;
-                                   incline(s);
-                                   goto retry;
-                               }
-                           }
-                       }
-                       goto retry;
-                   }
-                   s = PL_bufend;
-                   PL_parser->in_pod = 1;
-                   goto retry;
-               }
+            {
+                if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+                    || PL_lex_state != LEX_NORMAL) {
+                    d = PL_bufend;
+                    while (s < d) {
+                        if (*s++ == '\n') {
+                            incline(s);
+                            if (strnEQ(s,"=cut",4)) {
+                                s = strchr(s,'\n');
+                                if (s)
+                                    s++;
+                                else
+                                    s = d;
+                                incline(s);
+                                goto retry;
+                            }
+                        }
+                    }
+                    goto retry;
+                }
+                s = PL_bufend;
+                PL_parser->in_pod = 1;
+                goto retry;
+            }
        }
        if (PL_expect == XBLOCK) {
            const char *t = s;
@@ -5902,8 +6008,14 @@ Perl_yylex(pTHX)
            PL_tokenbuf[0] = '@';
            s = scan_ident(s + 1, PL_tokenbuf + 1,
                           sizeof PL_tokenbuf - 1, FALSE);
-           if (PL_expect == XOPERATOR)
-               no_op("Array length", s);
+            if (PL_expect == XOPERATOR) {
+                d = s;
+                if (PL_bufptr > s) {
+                    d = PL_bufptr-1;
+                    PL_bufptr = PL_oldbufptr;
+                }
+               no_op("Array length", d);
+            }
            if (!PL_tokenbuf[1])
                PREREF(DOLSHARP);
            PL_expect = XOPERATOR;
@@ -5914,8 +6026,14 @@ Perl_yylex(pTHX)
        PL_tokenbuf[0] = '$';
        s = scan_ident(s, PL_tokenbuf + 1,
                       sizeof PL_tokenbuf - 1, FALSE);
-       if (PL_expect == XOPERATOR)
-           no_op("Scalar", s);
+       if (PL_expect == XOPERATOR) {
+           d = s;
+           if (PL_bufptr > s) {
+               d = PL_bufptr-1;
+               PL_bufptr = PL_oldbufptr;
+           }
+           no_op("Scalar", d);
+       }
        if (!PL_tokenbuf[1]) {
            if (s == PL_bufend)
                yyerror("Final $ should be \\$ or $name");
@@ -5936,14 +6054,14 @@ Perl_yylex(pTHX)
                        char *t = s+1;
 
                        while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
-                           t++;
+                           t += UTF ? UTF8SKIP(t) : 1;
                        if (*t++ == ',') {
                            PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                            while (t < PL_bufend && *t != ']')
                                t++;
                            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "Multidimensional syntax %.*s not supported",
-                                   (int)((t - PL_bufptr) + 1), PL_bufptr);
+                                       "Multidimensional syntax %"UTF8f" not supported",
+                                        UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
                        }
                    }
                }
@@ -6229,7 +6347,7 @@ Perl_yylex(pTHX)
            }
            /* avoid v123abc() or $h{v1}, allow C<print v10;> */
            if (!isALPHA(*start) && (PL_expect == XTERM
-                       || PL_expect == XSTATE
+                       || PL_expect == XREF || PL_expect == XSTATE
                        || PL_expect == XTERMORDORDOR)) {
                GV *const gv = gv_fetchpvn_flags(s, start - s,
                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
@@ -6366,7 +6484,7 @@ Perl_yylex(pTHX)
            char tmpbuf[sizeof PL_tokenbuf + 1];
            *tmpbuf = '&';
            Copy(PL_tokenbuf, tmpbuf+1, len, char);
-           off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+           off = pad_findmy_pvn(tmpbuf, len+1, 0);
            if (off != NOT_IN_PAD) {
                assert(off); /* we assume this is boolean-true below */
                if (PAD_COMPNAME_FLAGS_isOUR(off)) {
@@ -6898,13 +7016,13 @@ Perl_yylex(pTHX)
                if (!IN_BYTES) {
                    if (UTF)
                        PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
-                   else if (PL_encoding) {
+                   else if (IN_ENCODING) {
                        SV *name;
                        dSP;
                        ENTER;
                        SAVETMPS;
                        PUSHMARK(sp);
-                       XPUSHs(PL_encoding);
+                       XPUSHs(_get_encoding());
                        PUTBACK;
                        call_method("name", G_SCALAR);
                        SPAGAIN;
@@ -6924,7 +7042,9 @@ Perl_yylex(pTHX)
        }
 
        case KEY___SUB__:
-           FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+           FUN0OP(CvCLONE(PL_compcv)
+                       ? newOP(OP_RUNCV, 0)
+                       : newPVOP(OP_RUNCV,0,NULL));
 
        case KEY_AUTOLOAD:
        case KEY_DESTROY:
@@ -7595,10 +7715,8 @@ Perl_yylex(pTHX)
            }
            if (!words)
                words = newNULLLIST();
-           if (PL_lex_stuff) {
-               SvREFCNT_dec(PL_lex_stuff);
-               PL_lex_stuff = NULL;
-           }
+           SvREFCNT_dec_NN(PL_lex_stuff);
+           PL_lex_stuff = NULL;
            PL_expect = XOPERATOR;
            pl_yylval.opval = sawparens(words);
            TOKEN(QWLIST);
@@ -7853,7 +7971,7 @@ Perl_yylex(pTHX)
                    *PL_tokenbuf = '&';
                    if (memchr(tmpbuf, ':', len) || key != KEY_sub
                     || pad_findmy_pvn(
-                           PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+                           PL_tokenbuf, len + 1, 0
                        ) != NOT_IN_PAD)
                        sv_setpvn(PL_subname, tmpbuf, len);
                    else {
@@ -8154,7 +8272,7 @@ S_pending_ident(pTHX)
     if (!has_colon) {
        if (!PL_in_my)
            tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
-                                    UTF ? SVf_UTF8 : 0);
+                                 0);
         if (tmp != NOT_IN_PAD) {
             /* might be an "our" variable" */
             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
@@ -8272,7 +8390,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
                char tmpbuf[256];
                Copy(w, tmpbuf+1, s - w, char);
                *tmpbuf = '&';
-               off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+               off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
                if (off != NOT_IN_PAD) return;
            }
            Perl_croak(aTHX_ "No comma allowed after %s", what);
@@ -8369,7 +8487,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
        yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
        return SvREFCNT_inc_simple_NN(sv);
     }
-now_ok:
+  now_ok:
     cv = *cvp;
     if (!pv && s)
        pv = newSVpvn_flags(s, len, SVs_TEMP);
@@ -8504,7 +8622,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
-    if (isSPACE(*s))
+    if (isSPACE(*s) || !*s)
        s = skipspace(s);
     if (isDIGIT(*s)) {
        while (isDIGIT(*s)) {
@@ -8513,7 +8631,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
            *d++ = *s++;
        }
     }
-    else {
+    else {  /* See if it is a "normal" identifier */
         parse_ident(&s, &d, e, 1, is_utf8);
     }
     *d = '\0';
@@ -8525,6 +8643,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
            PL_lex_state = LEX_INTERPENDMAYBE;
        return s;
     }
+
+    /* Here, it is not a run-of-the-mill identifier name */
+
     if (*s == '$' && s[1] &&
       (isIDFIRST_lazy_if(s+1,is_utf8)
          || isDIGIT_A((U8)s[1])
@@ -8560,7 +8681,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
  *
  *      Because all ASCII characters have the same representation whether
  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- *      '{' without knowing if is UTF-8 or not */
+ *      '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
 #ifdef EBCDIC
 #   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
@@ -8625,8 +8748,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             /* if it starts as a valid identifier, assume that it is one.
                (the later check for } being at the expected point will trap
                cases where this doesn't pan out.)  */
-        d += is_utf8 ? UTF8SKIP(d) : 1;
-        parse_ident(&s, &d, e, 1, is_utf8);
+            d += is_utf8 ? UTF8SKIP(d) : 1;
+            parse_ident(&s, &d, e, 1, is_utf8);
            *d = '\0';
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
@@ -8911,10 +9034,8 @@ S_scan_subst(pTHX_ char *start)
     first_line = CopLINE(PL_curcop);
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-       if (PL_lex_stuff) {
-           SvREFCNT_dec(PL_lex_stuff);
-           PL_lex_stuff = NULL;
-       }
+       SvREFCNT_dec_NN(PL_lex_stuff);
+       PL_lex_stuff = NULL;
        Perl_croak(aTHX_ "Substitution replacement not terminated");
     }
     PL_multi_start = first_start;      /* so whole substitution is taken together */
@@ -8993,10 +9114,8 @@ S_scan_trans(pTHX_ char *start)
 
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-       if (PL_lex_stuff) {
-           SvREFCNT_dec(PL_lex_stuff);
-           PL_lex_stuff = NULL;
-       }
+       SvREFCNT_dec_NN(PL_lex_stuff);
+       PL_lex_stuff = NULL;
        Perl_croak(aTHX_ "Transliteration replacement not terminated");
     }
 
@@ -9099,10 +9218,14 @@ S_scan_heredoc(pTHX_ char *s)
            term = '"';
        if (!isWORDCHAR_lazy_if(s,UTF))
            deprecate("bare << to mean <<\"\"");
-       for (; isWORDCHAR_lazy_if(s,UTF); s++) {
-           if (d < e)
-               *d++ = *s;
+       peek = s;
+       while (isWORDCHAR_lazy_if(peek,UTF)) {
+           peek += UTF ? UTF8SKIP(peek) : 1;
        }
+       len = (peek - s >= e - d) ? (e - d) : (peek - s);
+       Copy(s, d, len, char);
+       s += len;
+       d += len;
     }
     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
        Perl_croak(aTHX_ "Delimiter for here document is too long");
@@ -9173,8 +9296,13 @@ S_scan_heredoc(pTHX_ char *s)
               lexing scope.  In a file, we will have broken out of the
               loop in the previous iteration.  In an eval, the string buf-
               fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null. */
-           assert(shared);
+              evaluated to false.  So shared can never be null.  Or so you
+              might think.  Odd syntax errors like s;@{<<; can gobble up
+              the implicit semicolon at the end of a flie, causing the
+              file handle to be closed even when we are not in a string
+              eval.  So shared may be null in that case.  */
+           if (UNLIKELY(!shared))
+               goto interminable;
            /* A LEXSHARED struct with a null ls_prev pointer is the outer-
               most lexing scope.  In a file, shared->ls_linestr at that
               level is just one line, so there is no body to steal. */
@@ -9253,7 +9381,14 @@ S_scan_heredoc(pTHX_ char *s)
                    origline + 1 + PL_parser->herelines);
        if (!lex_next_chunk(LEX_NO_TERM)
         && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-           SvREFCNT_dec(linestr_save);
+           /* Simply freeing linestr_save might seem simpler here, as it
+              does not matter what PL_linestr points to, since we are
+              about to croak; but in a quote-like op, linestr_save
+              will have been prospectively freed already, via
+              SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+              restore PL_linestr. */
+           SvREFCNT_dec_NN(PL_linestr);
+           PL_linestr = linestr_save;
            goto interminable;
        }
        CopLINE_set(PL_curcop, origline);
@@ -9303,8 +9438,8 @@ S_scan_heredoc(pTHX_ char *s)
     if (!IN_BYTES) {
        if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
            SvUTF8_on(tmpstr);
-       else if (PL_encoding)
-           sv_recode_to_utf8(tmpstr, PL_encoding);
+       else if (IN_ENCODING)
+           sv_recode_to_utf8(tmpstr, _get_encoding());
     }
     PL_lex_stuff = tmpstr;
     pl_yylval.ival = op_type;
@@ -9415,7 +9550,7 @@ S_scan_inputsymbol(pTHX_ char *start)
            /* try to find it in the pad for this block, otherwise find
               add symbol table ops
            */
-           const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+           const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
            if (tmp != NOT_IN_PAD) {
                if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
                    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
@@ -9439,7 +9574,7 @@ S_scan_inputsymbol(pTHX_ char *start)
            else {
                GV *gv;
                ++d;
-intro_sym:
+              intro_sym:
                gv = gv_fetchpv(d,
                                GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
                                SVt_PV);
@@ -9594,12 +9729,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
        sv_catpvn(sv, s, termlen);
     s += termlen;
     for (;;) {
-       if (PL_encoding && !UTF && !re_reparse) {
+       if (IN_ENCODING && !UTF && !re_reparse) {
            bool cont = TRUE;
 
            while (cont) {
                int offset = s - SvPVX_const(PL_linestr);
-               const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+               const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
                                           &offset, (char*)termstr, termlen);
                const char *ns;
                char *svlast;
@@ -9812,13 +9947,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 
     /* at this point, we have successfully read the delimited string */
 
-    if (!PL_encoding || UTF || re_reparse) {
+    if (!IN_ENCODING || UTF || re_reparse) {
 
        if (keep_delims)
            sv_catpvn(sv, s, termlen);
        s += termlen;
     }
-    if (has_utf8 || (PL_encoding && !re_reparse))
+    if (has_utf8 || (IN_ENCODING && !re_reparse))
        SvUTF8_on(sv);
 
     PL_multi_end = CopLINE(PL_curcop);
@@ -10349,7 +10484,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
               floatit = TRUE;
         }
        if (floatit) {
-            STORE_NUMERIC_LOCAL_SET_STANDARD();
+            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
            /* terminate the string */
            *d = '\0';
             if (UNLIKELY(hexfp)) {
@@ -10366,7 +10501,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             } else {
                 nv = Atof(PL_tokenbuf);
             }
-            RESTORE_NUMERIC_LOCAL();
+            RESTORE_LC_NUMERIC_UNDERLYING();
             sv = newSVnv(nv);
        }
 
@@ -10381,7 +10516,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
     /* if it starts with a v, it could be a v-string */
     case 'v':
-vstring:
+    vstring:
                sv = newSV(5); /* preallocate storage space */
                ENTER_with_name("scan_vstring");
                SAVEFREESV(sv);
@@ -10475,7 +10610,7 @@ S_scan_formline(pTHX_ char *s)
        if (needargs) {
            const char *s2 = s;
            while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
-               || *s2 == 013)
+               || *s2 == '\v')
                s2++;
            if (*s2 == '{') {
                PL_expect = XTERMBLOCK;
@@ -10488,8 +10623,8 @@ S_scan_formline(pTHX_ char *s)
        if (!IN_BYTES) {
            if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
                SvUTF8_on(stuff);
-           else if (PL_encoding)
-               sv_recode_to_utf8(stuff, PL_encoding);
+           else if (IN_ENCODING)
+               sv_recode_to_utf8(stuff, _get_encoding());
        }
        NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
        force_next(THING);
@@ -10520,8 +10655,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
     if (outsidecv && CvPADLIST(outsidecv))
-       CvPADLIST(PL_compcv)->xpadl_outid =
-           PadlistNAMES(CvPADLIST(outsidecv));
+       CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
 
     return oldsavestack_ix;
 }
@@ -10533,7 +10667,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
 
     PL_in_eval |= EVAL_WARNONLY;
     yyerror_pv(s, flags);
-    PL_in_eval &= ~EVAL_WARNONLY;
     return 0;
 }
 
@@ -10599,7 +10732,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     }
     else if (yychar > 255)
        sv_catpvs(where_sv, "next token ???");
-    else if (yychar == -2) { /* YYEMPTY */
+    else if (yychar == YYEMPTY) {
        if (PL_lex_state == LEX_NORMAL ||
           (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
            sv_catpvs(where_sv, "at end of line");
@@ -10637,6 +10770,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
         PL_multi_end = 0;
     }
     if (PL_in_eval & EVAL_WARNONLY) {
+       PL_in_eval &= ~EVAL_WARNONLY;
        Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
     }
     else
@@ -11507,10 +11641,16 @@ Perl_parse_subsignature(pTHX)
                                scalar(newUNOP(OP_RV2AV, 0,
                                    newGVOP(OP_GV, 0, PL_defgv))),
                                newSVOP(OP_CONST, 0, newSViv(1))),
-                           newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-                               newSVOP(OP_CONST, 0,
-                                   newSVpvs("Odd name/value argument "
-                                       "for subroutine"))));
+                           op_convert_list(OP_DIE, 0,
+                               op_convert_list(OP_SPRINTF, 0,
+                                   op_append_list(OP_LIST,
+                                       newSVOP(OP_CONST, 0,
+                                           newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+                                       newSLICEOP(0,
+                                           op_append_list(OP_LIST,
+                                               newSVOP(OP_CONST, 0, newSViv(1)),
+                                               newSVOP(OP_CONST, 0, newSViv(2))),
+                                           newOP(OP_CALLER, 0))))));
                    if (pos != min_arity)
                        chkop = newLOGOP(OP_AND, 0,
                                    newBINOP(OP_GT, 0,
@@ -11573,9 +11713,16 @@ Perl_parse_subsignature(pTHX)
                        scalar(newUNOP(OP_RV2AV, 0,
                            newGVOP(OP_GV, 0, PL_defgv))),
                        newSVOP(OP_CONST, 0, newSViv(min_arity))),
-                   newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-                       newSVOP(OP_CONST, 0,
-                           newSVpvs("Too few arguments for subroutine"))))),
+                   op_convert_list(OP_DIE, 0,
+                       op_convert_list(OP_SPRINTF, 0,
+                           op_append_list(OP_LIST,
+                               newSVOP(OP_CONST, 0,
+                                   newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+                               newSLICEOP(0,
+                                   op_append_list(OP_LIST,
+                                       newSVOP(OP_CONST, 0, newSViv(1)),
+                                       newSVOP(OP_CONST, 0, newSViv(2))),
+                                   newOP(OP_CALLER, 0))))))),
            initops);
     }
     if (max_arity != -1) {
@@ -11586,20 +11733,21 @@ Perl_parse_subsignature(pTHX)
                        scalar(newUNOP(OP_RV2AV, 0,
                            newGVOP(OP_GV, 0, PL_defgv))),
                        newSVOP(OP_CONST, 0, newSViv(max_arity))),
-                   newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-                       newSVOP(OP_CONST, 0,
-                           newSVpvs("Too many arguments for subroutine"))))),
+                   op_convert_list(OP_DIE, 0,
+                       op_convert_list(OP_SPRINTF, 0,
+                           op_append_list(OP_LIST,
+                               newSVOP(OP_CONST, 0,
+                                   newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+                               newSLICEOP(0,
+                                   op_append_list(OP_LIST,
+                                       newSVOP(OP_CONST, 0, newSViv(1)),
+                                       newSVOP(OP_CONST, 0, newSViv(2))),
+                                   newOP(OP_CALLER, 0))))))),
            initops);
     }
     return initops;
 }
 
 /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
  * ex: set ts=8 sts=4 sw=4 et:
  */