This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(perl #132245) don't try to process a char range with no preceding char
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 864fda7..68ec96b 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -464,13 +464,6 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s)
 
 #endif
 
-static int
-S_deprecate_commaless_var_list(pTHX) {
-    PL_expect = XTERM;
-    deprecate_fatal_in("5.28", "Use of comma-less variable list is deprecated");
-    return REPORT(','); /* grandfather non-comma-format format */
-}
-
 /*
  * S_ao
  *
@@ -727,6 +720,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->lex_state = LEX_NORMAL;
     parser->expect = XSTATE;
     parser->rsfp = rsfp;
+    parser->recheck_utf8_validity = FALSE;
     parser->rsfp_filters =
       !(flags & LEX_START_SAME_FILTER) || !oparser
         ? NULL
@@ -747,9 +741,10 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 
        s = SvPV_const(line, len);
 
-        if (SvUTF8(line) && ! is_utf8_string_loc((U8 *) s,
-                                                 SvCUR(line),
-                                                 &first_bad_char_loc))
+        if (   SvUTF8(line)
+            && UNLIKELY(! is_utf8_string_loc((U8 *) s,
+                                             SvCUR(line),
+                                             &first_bad_char_loc)))
         {
             _force_out_malformed_utf8_message(first_bad_char_loc,
                                               (U8 *) s + SvCUR(line),
@@ -1055,12 +1050,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                    p++;
                    highhalf++;
-                } else if (! UTF8_IS_INVARIANT(c)) {
-                    _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
-                                                      0,
-                                                      1 /* 1 means die */ );
-                    NOT_REACHED; /* NOTREACHED */
-               }
+                } else assert(UTF8_IS_INVARIANT(c));
            }
            if (!highhalf)
                goto plain_copy;
@@ -1274,6 +1264,24 @@ Perl_lex_discard_to(pTHX_ char *ptr)
        PL_parser->last_lop -= discard_len;
 }
 
+void
+Perl_notify_parser_that_changed_to_utf8(pTHX)
+{
+    /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
+     * off to on.  At compile time, this has the effect of entering a 'use
+     * utf8' section.  This means that any input was not previously checked for
+     * UTF-8 (because it was off), but now we do need to check it, or our
+     * assumptions about the input being sane could be wrong, and we could
+     * segfault.  This routine just sets a flag so that the next time we look
+     * at the input we do the well-formed UTF-8 check.  If we aren't in the
+     * proper phase, there may not be a parser object, but if there is, setting
+     * the flag is harmless */
+
+    if (PL_parser) {
+        PL_parser->recheck_utf8_validity = TRUE;
+    }
+}
+
 /*
 =for apidoc Amx|bool|lex_next_chunk|U32 flags
 
@@ -1615,7 +1623,7 @@ Note that C<NULL> is a valid C<proto> and will always return C<true>.
  */
 
 bool
-Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
 {
     STRLEN len, origlen;
     char *p;
@@ -1677,6 +1685,13 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
                             origlen, UNI_DISPLAY_ISPRINT)
            : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
 
+       if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
+           SV *name2 = sv_2mortal(newSVsv(PL_curstname));
+           sv_catpvs(name2, "::");
+           sv_catsv(name2, (SV *)name);
+           name = name2;
+       }
+
        if (proto_after_greedy_proto)
            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                        "Prototype after '%c' for %" SVf " : %s",
@@ -2575,29 +2590,14 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     SV *cv;
     SV *rv;
     HV *stash;
-    const U8* first_bad_char_loc;
     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
     if (!SvCUR(res)) {
-        deprecate_fatal_in("5.28", "Unknown charname '' is deprecated");
-        return res;
-    }
-
-    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
-                                     e - backslash_ptr,
-                                     &first_bad_char_loc))
-    {
-        _force_out_malformed_utf8_message(first_bad_char_loc,
-                                          (U8 *) PL_parser->bufend,
-                                          0,
-                                          0 /* 0 means don't die */ );
-        yyerror_pv(Perl_form(aTHX_
-            "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
-                   SVf_UTF8);
-       return NULL;
+        /* diag_listed_as: Unknown charname '%s' */
+        yyerror("Unknown charname ''");
+        return NULL;
     }
 
     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
@@ -2709,6 +2709,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         }
     }
     if (*(s-1) == ' ') {
+        /* diag_listed_as: charnames alias definitions may not contain
+                           trailing white-space; marked by <-- HERE in %s
+         */
         yyerror_pv(
             Perl_form(aTHX_
             "charnames alias definitions may not contain trailing "
@@ -2724,11 +2727,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         const U8* first_bad_char_loc;
         STRLEN len;
         const char* const str = SvPV_const(res, len);
-        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+        if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
+                                          &first_bad_char_loc)))
+        {
             _force_out_malformed_utf8_message(first_bad_char_loc,
                                               (U8 *) PL_parser->bufend,
                                               0,
                                               0 /* 0 means don't die */ );
+            /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
+                               immediately after '%s' */
             yyerror_pv(
               Perl_form(aTHX_
                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
@@ -2746,6 +2753,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
         /* The final %.*s makes sure that should the trailing NUL be missing
          * that this print won't run off the end of the string */
+        /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
+                           in \N{%s} */
         yyerror_pv(
           Perl_form(aTHX_
             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
@@ -2757,6 +2766,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     }
 
   multi_spaces:
+        /* diag_listed_as: charnames alias definitions may not contain a
+                           sequence of multiple spaces; marked by <-- HERE
+                           in %s */
         yyerror_pv(
           Perl_form(aTHX_
             "charnames alias definitions may not contain a sequence of "
@@ -2957,9 +2969,9 @@ S_scan_const(pTHX_ char *start)
 
                 /* Here, we don't think we're in a range.  If the new character
                  * is not a hyphen; or if it is a hyphen, but it's too close to
-                 * either edge to indicate a range, then it's a regular
-                 * character. */
-                if (*s != '-' || s >= send - 1 || s == start) {
+                 * either edge to indicate a range, or if we haven't output any
+                 * characters yet then it's a regular character. */
+                if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
 
                     /* A regular character.  Process like any other, but first
                      * clear any flags */
@@ -3274,18 +3286,18 @@ S_scan_const(pTHX_ char *start)
 #endif
                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
                 {
-                    SSize_t i;
-
                     /* Here, no conversions are necessary, which means that the
                      * first character in the range is already in 'd' and
                      * valid, so we can skip overwriting it */
                     if (has_utf8) {
+                        SSize_t i;
                         d += UTF8SKIP(d);
                         for (i = range_min + 1; i <= range_max; i++) {
                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
                         }
                     }
                     else {
+                        SSize_t i;
                         d++;
                         assert(range_min + 1 <= range_max);
                         for (i = range_min + 1; i < range_max; i++) {
@@ -3615,11 +3627,12 @@ S_scan_const(pTHX_ char *start)
                  * For non-patterns, the named characters are converted to
                  * their string equivalents.  In patterns, named characters are
                  * not converted to their ultimate forms for the same reasons
-                 * that other escapes aren't.  Instead, they are converted to
-                 * the \N{U+...} form to get the value from the charnames that
-                 * is in effect right now, while preserving the fact that it
-                 * was a named character, so that the regex compiler knows
-                 * this.
+                 * that other escapes aren't (mainly that the ultimate
+                 * character could be considered a meta-symbol by the regex
+                 * compiler).  Instead, they are converted to the \N{U+...}
+                 * form to get the value from the charnames that is in effect
+                 * right now, while preserving the fact that it was a named
+                 * character, so that the regex compiler knows this.
                  *
                 * The structure of this section of code (besides checking for
                 * errors and upgrading to utf8) is:
@@ -3640,6 +3653,7 @@ S_scan_const(pTHX_ char *start)
                s++;
                if (*s != '{') {
                    yyerror("Missing braces on \\N{}");
+                    *d++ = '\0';
                    continue;
                }
                s++;
@@ -3651,7 +3665,7 @@ S_scan_const(pTHX_ char *start)
                    } else {
                        yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
                    }
-                   continue;
+                    yyquit(); /* Have exhausted the input. */
                }
 
                /* Here it looks like a named character */
@@ -3670,6 +3684,7 @@ S_scan_const(pTHX_ char *start)
                                 "Invalid hexadecimal number in \\N{U+...}"
                             );
                             s = e + 1;
+                            *d++ = '\0';
                             continue;
                         }
                         while (++s < e) {
@@ -3868,6 +3883,7 @@ S_scan_const(pTHX_ char *start)
                                     " in transliteration operator",
                                         /*  +1 to include the "}" */
                                     (int) (e + 1 - start), start));
+                                *d++ = '\0';
                                 goto end_backslash_N;
                             }
 
@@ -3933,15 +3949,16 @@ S_scan_const(pTHX_ char *start)
            case 'c':
                s++;
                if (s < send) {
-                   *d++ = grok_bslash_c(*s++, 1);
+                   *d++ = grok_bslash_c(*s, 1);
                }
                else {
                    yyerror("Missing control char name in \\c");
+                   yyquit();   /* Are at end of input, no sense continuing */
                }
 #ifdef EBCDIC
                 non_portable_endpoint++;
 #endif
-               continue;
+                break;
 
            /* printf-style backslashes, formfeeds, newlines, etc */
            case 'b':
@@ -4427,8 +4444,8 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
                    PL_parser->last_uni = buf + last_uni_pos;
                if (PL_parser->last_lop)
                    PL_parser->last_lop = buf + last_lop_pos;
-               SvLEN(linestr) = SvCUR(linestr);
-               SvCUR(linestr) = s-SvPVX(linestr);
+               SvLEN_set(linestr, SvCUR(linestr));
+               SvCUR_set(linestr, s - SvPVX(linestr));
                PL_parser->filtered = 1;
                break;
            }
@@ -4614,6 +4631,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
     PERL_ARGS_ASSERT_TOKENIZE_USE;
 
     if (PL_expect != XSTATE)
+       /* diag_listed_as: "use" not allowed in expression */
        yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
                    is_use ? "use" : "no"));
     PL_expect = XTERM;
@@ -4759,6 +4777,20 @@ Perl_yylex(pTHX)
     GV *gv = NULL;
     GV **gvp = NULL;
 
+    if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
+        const U8* first_bad_char_loc;
+        if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
+                                                        PL_bufend - PL_bufptr,
+                                                        &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+        PL_parser->recheck_utf8_validity = FALSE;
+    }
     DEBUG_T( {
        SV* tmp = newSVpvs("");
        PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
@@ -5006,7 +5038,16 @@ Perl_yylex(pTHX)
            s = PL_bufend;
        }
        else {
+            int save_error_count = PL_error_count;
+
            s = scan_const(PL_bufptr);
+
+            /* Set flag if this was a pattern and there were errors.  op.c will
+             * refuse to compile a pattern with this flag set.  Otherwise, we
+             * could get segfaults, etc. */
+            if (PL_lex_inpat && PL_error_count > save_error_count) {
+                ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
+            }
            if (*s == '\\')
                PL_lex_state = LEX_INTERPCASEMOD;
            else
@@ -5085,12 +5126,43 @@ Perl_yylex(pTHX)
                     0, cBOOL(UTF), FALSE);
                 *dest = '\0';
                 assert(PL_tokenbuf[1]); /* we have a variable name */
+            }
+            else {
+                *PL_tokenbuf = 0;
+                PL_in_my = 0;
+            }
+
+            s = skipspace(s);
+            /* parse the = for the default ourselves to avoid '+=' etc being accepted here
+             * as the ASSIGNOP, and exclude other tokens that start with =
+             */
+            if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
+                /* save now to report with the same context as we did when
+                 * all ASSIGNOPS were accepted */
+                PL_oldbufptr = s;
+
+                ++s;
+                NEXTVAL_NEXTTOKE.ival = 0;
+                force_next(ASSIGNOP);
+                PL_expect = XTERM;
+            }
+            else if (*s == ',' || *s == ')') {
+                PL_expect = XOPERATOR;
+            }
+            else {
+                /* make sure the context shows the unexpected character and
+                 * hopefully a bit more */
+                if (*s) ++s;
+                while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
+                    s++;
+                PL_bufptr = s; /* for error reporting */
+                yyerror("Illegal operator following parameter in a subroutine signature");
+                PL_in_my = 0;
+            }
+            if (*PL_tokenbuf) {
                 NEXTVAL_NEXTTOKE.ival = sigil;
                 force_next('p'); /* force a signature pending identifier */
             }
-            else
-                PL_in_my = 0;
-            PL_expect = XOPERATOR;
             break;
 
         case ')':
@@ -5115,12 +5187,6 @@ Perl_yylex(pTHX)
     switch (*s) {
     default:
        if (UTF) {
-            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
-                _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
-                                                  0,
-                                                  1 /* 1 means die */ );
-                NOT_REACHED; /* NOTREACHED */
-            }
             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
                 goto keylookup;
             }
@@ -5142,12 +5208,23 @@ Perl_yylex(pTHX)
         else {
             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
         }
-        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
-        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
-            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
-        } else {
+
+        if (s >= PL_linestart) {
             d = PL_linestart;
         }
+        else {
+            /* somehow (probably due to a parse failure), PL_linestart has advanced
+             * pass PL_bufptr, get a reasonable beginning of line
+             */
+            d = s;
+            while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
+                --d;
+        }
+        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
+        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
+        }
+
         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
                           UTF8fARG(UTF, (s - d), d),
                          (int) len + 1);
@@ -5374,8 +5451,6 @@ Perl_yylex(pTHX)
                d = instr(s,"perl -");
                if (!d) {
                    d = instr(s,"perl");
-                    if (d && d[4] == '6')
-                        d = NULL;
 #if defined(DOSISH)
                    /* avoid getting into infinite loops when shebang
                     * line contains "Perl" rather than "perl" */
@@ -5877,27 +5952,12 @@ Perl_yylex(pTHX)
                    PL_lex_stuff = NULL;
                }
                else {
-                   if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
-                       sv_free(sv);
-                       if (PL_in_my == KEY_our) {
-                            deprecate_disappears_in("5.28",
-                                "Attribute \"unique\" is deprecated");
-                       }
-                       else
-                           Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
-                   }
-
                    /* NOTE: any CV attrs applied here need to be part of
                       the CVf_BUILTIN_ATTRS define in cv.h! */
-                   else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
+                   if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
                        sv_free(sv);
                        CvLVALUE_on(PL_compcv);
                    }
-                   else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
-                       sv_free(sv);
-                        deprecate_disappears_in("5.28",
-                            "Attribute \"locked\" is deprecated");
-                   }
                    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
                        sv_free(sv);
                        CvMETHOD_on(PL_compcv);
@@ -6201,8 +6261,10 @@ Perl_yylex(pTHX)
                        break;
                    }
                    if (strEQs(s, "sub")) {
+                        PL_bufptr = s;
                        d = s + 3;
                        d = skipspace(d);
+                        s = PL_bufptr;
                        if (*d == ':') {
                            PL_expect = XTERM;
                            break;
@@ -6543,12 +6605,7 @@ Perl_yylex(pTHX)
     case '$':
        CLINE;
 
-       if (PL_expect == XOPERATOR) {
-           if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
-               return deprecate_commaless_var_list();
-           }
-       }
-       else if (PL_expect == XPOSTDEREF) {
+        if (PL_expect == XPOSTDEREF) {
            if (s[1] == '#') {
                s++;
                POSTDEREF(DOLSHARP);
@@ -6838,10 +6895,6 @@ Perl_yylex(pTHX)
        TERM(THING);
 
     case '\'':
-       if (   PL_expect == XOPERATOR
-           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-               return deprecate_commaless_var_list();
-
        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
        if (!s)
            missingterm(NULL);
@@ -6854,10 +6907,6 @@ Perl_yylex(pTHX)
        TERM(sublex_start());
 
     case '"':
-       if (   PL_expect == XOPERATOR
-           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-               return deprecate_commaless_var_list();
-
        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
        DEBUG_T( {
            if (s)
@@ -7183,6 +7232,7 @@ Perl_yylex(pTHX)
                orig_keyword = 0;
                lex = 0;
                off = 0;
+            /* FALLTHROUGH */
        default:                        /* not a keyword */
          just_a_word: {
                int pkgname = 0;
@@ -7891,6 +7941,7 @@ Perl_yylex(pTHX)
                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
             {
                char *p = s;
+                SSize_t s_off = s - SvPVX(PL_linestr);
 
                if ((PL_bufend - p) >= 3
                     && strEQs(p, "my") && isSPACE(*(p + 2)))
@@ -7908,6 +7959,9 @@ Perl_yylex(pTHX)
                }
                if (*p != '$' && *p != '\\')
                    Perl_croak(aTHX_ "Missing $ on loop variable");
+
+                /* The buffer may have been reallocated, update s */
+                s = SvPVX(PL_linestr) + s_off;
            }
            OPERATOR(FOR);
 
@@ -8622,7 +8676,8 @@ Perl_yylex(pTHX)
                    COPLINE_SET_FROM_MULTI_END;
                    if (!s)
                        Perl_croak(aTHX_ "Prototype not terminated");
-                   (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
+                   (void)validate_proto(PL_subname, PL_lex_stuff,
+                                        ckWARN(WARN_ILLEGALPROTO), 0);
                    have_proto = TRUE;
 
                    s = skipspace(s);
@@ -8857,8 +8912,11 @@ S_pending_ident(pTHX)
     if (PL_in_my) {
         if (PL_in_my == KEY_our) {     /* "our" is merely analogous to "my" */
             if (has_colon)
+                /* diag_listed_as: No package name allowed for variable %s
+                                   in "our" */
                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
-                                  "variable %s in \"our\"",
+                                  "%se %s in \"our\"",
+                                  *PL_tokenbuf=='&' ?"subroutin":"variabl",
                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
         }
@@ -9018,7 +9076,6 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
            s++;
        if (*s == ',') {
            GV* gv;
-           PADOFFSET off;
            if (keyword(w, s - w, 0))
                return;
 
@@ -9026,6 +9083,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
            if (gv && GvCVu(gv))
                return;
            if (s - w <= 254) {
+                PADOFFSET off;
                char tmpbuf[256];
                Copy(w, tmpbuf+1, s - w, char);
                *tmpbuf = '&';
@@ -9362,19 +9420,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         bool skip;
         char *s2;
         /* If we were processing {...} notation then...  */
-        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
-            /* if it starts as a valid identifier, assume that it is one.
-               (the later check for } being at the expected point will trap
-               cases where this doesn't pan out.)  */
-            d += is_utf8 ? UTF8SKIP(d) : 1;
-            parse_ident(&s, &d, e, 1, is_utf8, TRUE);
-           *d = '\0';
+        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
+            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+                 && isWORDCHAR(*s))
+        ) {
+            /* note we have to check for a normal identifier first,
+             * as it handles utf8 symbols, and only after that has
+             * been ruled out can we look at the caret words */
+            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
+                /* if it starts as a valid identifier, assume that it is one.
+                   (the later check for } being at the expected point will trap
+                   cases where this doesn't pan out.)  */
+                d += is_utf8 ? UTF8SKIP(d) : 1;
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                *d = '\0';
+            }
+            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
+                d++;
+                while (isWORDCHAR(*s) && d < e) {
+                    *d++ = *s++;
+                }
+                if (d >= e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                *d = '\0';
+            }
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
                 s = skipspace(s);
             }
            if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
-                /* ${foo[0]} and ${foo{bar}} notation.  */
+                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
                if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
                    const char * const brack =
                        (const char *)
@@ -9393,26 +9468,16 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                return s;
            }
        }
-       /* Handle extended ${^Foo} variables
-        * 1999-02-27 mjd-perl-patch@plover.com */
-       else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
-                && isWORDCHAR(*s))
-       {
-           d++;
-           while (isWORDCHAR(*s) && d < e) {
-               *d++ = *s++;
-           }
-           if (d >= e)
-               Perl_croak(aTHX_ "%s", ident_too_long);
-           *d = '\0';
-       }
 
         if ( !tmp_copline )
             tmp_copline = CopLINE(PL_curcop);
-        if ((skip = s < PL_bufend && isSPACE(*s)))
+        if ((skip = s < PL_bufend && isSPACE(*s))) {
             /* Avoid incrementing line numbers or resetting PL_linestart,
                in case we have to back up.  */
+            STRLEN s_off = s - SvPVX(PL_linestr);
             s2 = peekspace(s);
+            s = SvPVX(PL_linestr) + s_off;
+        }
         else
             s2 = s;
 
@@ -9689,18 +9754,14 @@ S_scan_subst(pTHX_ char *start)
 
        PL_multi_end = 0;
        pm->op_pmflags |= PMf_EVAL;
-       while (es-- > 0) {
-           if (es)
-               sv_catpvs(repl, "eval ");
-           else
-               sv_catpvs(repl, "do ");
-       }
-       sv_catpvs(repl, "{");
+        for (; es > 1; es--) {
+            sv_catpvs(repl, "eval ");
+        }
+        sv_catpvs(repl, "do {");
        sv_catsv(repl, PL_parser->lex_sub_repl);
        sv_catpvs(repl, "}");
        SvREFCNT_dec(PL_parser->lex_sub_repl);
        PL_parser->lex_sub_repl = repl;
-        es = 1;
     }
 
 
@@ -9856,7 +9917,7 @@ S_scan_heredoc(pTHX_ char *s)
        else
            term = '"';
        if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
-           deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated");
+           Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
        peek = s;
         while (
                isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
@@ -11187,9 +11248,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
             && strchr("+-0123456789_", s[1]))
         {
-            floatit = TRUE;
+            int exp_digits = 0;
+            const char *save_s = s;
+            char * save_d = d;
 
-           /* regardless of whether user said 3E5 or 3e5, use lower 'e',
+            /* regardless of whether user said 3E5 or 3e5, use lower 'e',
                ditto for p (hexfloats) */
             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
                /* At least some Mach atof()s don't grok 'E' */
@@ -11221,6 +11284,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
            /* read digits of exponent */
            while (isDIGIT(*s) || *s == '_') {
                if (isDIGIT(*s)) {
+                    ++exp_digits;
                    if (d >= e)
                        Perl_croak(aTHX_ "%s", number_too_long);
                    *d++ = *s++;
@@ -11232,6 +11296,20 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                   lastub = s++;
                }
            }
+
+            if (!exp_digits) {
+                /* no exponent digits, the [eEpP] could be for something else,
+                 * though in practice we don't get here for p since that's preparsed
+                 * earlier, and results in only the 0xX being consumed, so behave similarly
+                 * for decimal floats and consume only the D.DD, leaving the [eE] to the
+                 * next token.
+                 */
+                s = save_s;
+                d = save_d;
+            }
+            else {
+                floatit = TRUE;
+            }
        }
 
 
@@ -11313,8 +11391,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 STATIC char *
 S_scan_formline(pTHX_ char *s)
 {
-    char *eol;
-    char *t;
     SV * const stuff = newSVpvs("");
     bool needargs = FALSE;
     bool eofmt = FALSE;
@@ -11322,8 +11398,9 @@ S_scan_formline(pTHX_ char *s)
     PERL_ARGS_ASSERT_SCAN_FORMLINE;
 
     while (!needargs) {
+        char *eol;
        if (*s == '.') {
-           t = s+1;
+            char *t = s+1;
 #ifdef PERL_STRICT_CR
            while (SPACE_OR_TAB(*t))
                t++;
@@ -11340,6 +11417,7 @@ S_scan_formline(pTHX_ char *s)
        if (!eol++)
                eol = PL_bufend;
        if (*s != '#') {
+            char *t;
            for (t = s; t < eol; t++) {
                if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
                    needargs = FALSE;
@@ -11441,6 +11519,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
     return 0;
 }
 
+void
+Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
+{
+    PERL_ARGS_ASSERT_ABORT_EXECUTION;
+
+    if (PL_minus_c)
+        Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
+    else {
+        Perl_croak(aTHX_
+                "%sExecution of %s aborted due to compilation errors.\n", msg, name);
+    }
+    NOT_REACHED; /* NOTREACHED */
+}
+
+void
+Perl_yyquit(pTHX)
+{
+    /* Called, after at least one error has been found, to abort the parse now,
+     * instead of trying to forge ahead */
+
+    yyerror_pvn(NULL, 0, 0);
+}
+
 int
 Perl_yyerror(pTHX_ const char *const s)
 {
@@ -11464,101 +11565,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
     int yychar  = PL_parser->yychar;
 
-    PERL_ARGS_ASSERT_YYERROR_PVN;
-
-    if (!yychar || (yychar == ';' && !PL_rsfp))
-       sv_catpvs(where_sv, "at EOF");
-    else if (   PL_oldoldbufptr
-             && PL_bufptr > PL_oldoldbufptr
-             && PL_bufptr - PL_oldoldbufptr < 200
-             && PL_oldoldbufptr != PL_oldbufptr
-             && PL_oldbufptr != PL_bufptr)
-    {
-       /*
-               Only for NetWare:
-               The code below is removed for NetWare because it abends/crashes on NetWare
-               when the script has error such as not having the closing quotes like:
-                   if ($var eq "value)
-               Checking of white spaces is anyway done in NetWare code.
-       */
+    /* Output error message 's' with length 'len'.  'flags' are SV flags that
+     * apply.  If the number of errors found is large enough, it abandons
+     * parsing.  If 's' is NULL, there is no message, and it abandons
+     * processing unconditionally */
+
+    if (s != NULL) {
+        if (!yychar || (yychar == ';' && !PL_rsfp))
+            sv_catpvs(where_sv, "at EOF");
+        else if (   PL_oldoldbufptr
+                 && PL_bufptr > PL_oldoldbufptr
+                 && PL_bufptr - PL_oldoldbufptr < 200
+                 && PL_oldoldbufptr != PL_oldbufptr
+                 && PL_oldbufptr != PL_bufptr)
+        {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
 #ifndef NETWARE
-       while (isSPACE(*PL_oldoldbufptr))
-           PL_oldoldbufptr++;
+            while (isSPACE(*PL_oldoldbufptr))
+                PL_oldoldbufptr++;
 #endif
-       context = PL_oldoldbufptr;
-       contlen = PL_bufptr - PL_oldoldbufptr;
-    }
-    else if (  PL_oldbufptr
-            && PL_bufptr > PL_oldbufptr
-            && PL_bufptr - PL_oldbufptr < 200
-            && PL_oldbufptr != PL_bufptr) {
-       /*
-               Only for NetWare:
-               The code below is removed for NetWare because it abends/crashes on NetWare
-               when the script has error such as not having the closing quotes like:
-                   if ($var eq "value)
-               Checking of white spaces is anyway done in NetWare code.
-       */
+            context = PL_oldoldbufptr;
+            contlen = PL_bufptr - PL_oldoldbufptr;
+        }
+        else if (  PL_oldbufptr
+                && PL_bufptr > PL_oldbufptr
+                && PL_bufptr - PL_oldbufptr < 200
+                && PL_oldbufptr != PL_bufptr) {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
 #ifndef NETWARE
-       while (isSPACE(*PL_oldbufptr))
-           PL_oldbufptr++;
+            while (isSPACE(*PL_oldbufptr))
+                PL_oldbufptr++;
 #endif
-       context = PL_oldbufptr;
-       contlen = PL_bufptr - PL_oldbufptr;
-    }
-    else if (yychar > 255)
-       sv_catpvs(where_sv, "next token ???");
-    else if (yychar == YYEMPTY) {
-       if (PL_lex_state == LEX_NORMAL)
-           sv_catpvs(where_sv, "at end of line");
-       else if (PL_lex_inpat)
-           sv_catpvs(where_sv, "within pattern");
-       else
-           sv_catpvs(where_sv, "within string");
-    }
-    else {
-       sv_catpvs(where_sv, "next char ");
-       if (yychar < 32)
-           Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
-       else if (isPRINT_LC(yychar)) {
-           const char string = yychar;
-           sv_catpvn(where_sv, &string, 1);
-       }
-       else
-           Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
-    }
-    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
-    Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
-        OutCopFILE(PL_curcop),
-        (IV)(PL_parser->preambling == NOLINE
-               ? CopLINE(PL_curcop)
-               : PL_parser->preambling));
-    if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
-                            UTF8fARG(UTF, contlen, context));
-    else
-       Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
-    if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
-        Perl_sv_catpvf(aTHX_ msg,
-        "  (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
-                (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
-        PL_multi_end = 0;
-    }
-    if (PL_in_eval & EVAL_WARNONLY) {
-       PL_in_eval &= ~EVAL_WARNONLY;
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
-    }
-    else {
-       qerror(msg);
+            context = PL_oldbufptr;
+            contlen = PL_bufptr - PL_oldbufptr;
+        }
+        else if (yychar > 255)
+            sv_catpvs(where_sv, "next token ???");
+        else if (yychar == YYEMPTY) {
+            if (PL_lex_state == LEX_NORMAL)
+                sv_catpvs(where_sv, "at end of line");
+            else if (PL_lex_inpat)
+                sv_catpvs(where_sv, "within pattern");
+            else
+                sv_catpvs(where_sv, "within string");
+        }
+        else {
+            sv_catpvs(where_sv, "next char ");
+            if (yychar < 32)
+                Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
+            else if (isPRINT_LC(yychar)) {
+                const char string = yychar;
+                sv_catpvn(where_sv, &string, 1);
+            }
+            else
+                Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
+        }
+        msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
+        Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
+            OutCopFILE(PL_curcop),
+            (IV)(PL_parser->preambling == NOLINE
+                   ? CopLINE(PL_curcop)
+                   : PL_parser->preambling));
+        if (context)
+            Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
+                                 UTF8fARG(UTF, contlen, context));
+        else
+            Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
+        if (   PL_multi_start < PL_multi_end
+            && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
+        {
+            Perl_sv_catpvf(aTHX_ msg,
+            "  (Might be a runaway multi-line %c%c string starting on"
+            " line %" IVdf ")\n",
+                    (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
+            PL_multi_end = 0;
+        }
+        if (PL_in_eval & EVAL_WARNONLY) {
+            PL_in_eval &= ~EVAL_WARNONLY;
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
+        }
+        else {
+            qerror(msg);
+        }
     }
-    if (PL_error_count >= 10) {
-       SV * errsv;
-       if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
-           Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
-                      SVfARG(errsv), OutCopFILE(PL_curcop));
-       else
-           Perl_croak(aTHX_ "%s has too many errors.\n",
-            OutCopFILE(PL_curcop));
+    if (s == NULL || PL_error_count >= 10) {
+        const char * msg = "";
+        const char * const name = OutCopFILE(PL_curcop);
+
+       if (PL_in_eval) {
+            SV * errsv = ERRSV;
+            if (SvCUR(errsv)) {
+                msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
+            }
+        }
+
+        if (s == NULL) {
+            abort_execution(msg, name);
+        }
+        else {
+            Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
+        }
     }
     PL_in_my = 0;
     PL_in_my_stash = NULL;
@@ -11580,7 +11700,9 @@ S_swallow_bom(pTHX_ U8 *s)
                /* diag_listed_as: Unsupported script encoding %s */
                Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
 #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
+#endif
            s += 2;
            if (PL_bufend > (char*)s) {
                s = add_utf16_textfilter(s, TRUE);
@@ -11594,7 +11716,9 @@ S_swallow_bom(pTHX_ U8 *s)
     case 0xFE:
        if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
 #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
+#endif
            s += 2;
            if (PL_bufend > (char *)s) {
                s = add_utf16_textfilter(s, FALSE);
@@ -11608,7 +11732,9 @@ S_swallow_bom(pTHX_ U8 *s)
     case BOM_UTF8_FIRST_BYTE: {
         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+#ifdef DEBUGGING
             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+#endif
             s += len + 1;                      /* UTF-8 */
         }
         break;
@@ -11627,7 +11753,9 @@ S_swallow_bom(pTHX_ U8 *s)
                   * 00 xx 00 xx
                   * are a good indicator of UTF-16BE. */
 #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
                  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+#endif
                  s = add_utf16_textfilter(s, FALSE);
 #else
                  /* diag_listed_as: Unsupported script encoding %s */
@@ -11643,7 +11771,9 @@ S_swallow_bom(pTHX_ U8 *s)
                   * xx 00 xx 00
                   * are a good indicator of UTF-16LE. */
 #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
              if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+#endif
              s = add_utf16_textfilter(s, TRUE);
 #else
              /* diag_listed_as: Unsupported script encoding %s */