yada-yada is a term, not an operator

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 9bd145d..afcd719 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -464,13 +464,6 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s)
  
  #endif
  
-static int
-S_deprecate_commaless_var_list(pTHX) {
-    PL_expect = XTERM;
-    deprecate("comma-less variable list");
-    return REPORT(','); /* grandfather non-comma-format format */
-}
-
  /*
   * S_ao
   *
@@ -669,7 +662,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
  Creates and initialises a new lexer/parser state object, supplying
  a context in which to lex and parse from a new source of Perl code.
  A pointer to the new state object is placed in L</PL_parser>.  An entry
-is made on the save stack so that upon unwinding the new state object
+is made on the save stack so that upon unwinding, the new state object
  will be destroyed and the former value of L</PL_parser> will be restored.
  Nothing else need be done to clean up the parsing context.
  
@@ -701,6 +694,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
  {
      const char *s = NULL;
      yy_parser *parser, *oparser;
+
      if (flags && flags & ~LEX_START_FLAGS)
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
  
@@ -726,6 +720,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      parser->lex_state = LEX_NORMAL;
      parser->expect = XSTATE;
      parser->rsfp = rsfp;
+    parser->recheck_utf8_validity = FALSE;
      parser->rsfp_filters =
        !(flags & LEX_START_SAME_FILTER) || !oparser
          ? NULL
@@ -742,7 +737,22 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
  
      if (line) {
         STRLEN len;
+        const U8* first_bad_char_loc;
+
         s = SvPV_const(line, len);
+
+        if (   SvUTF8(line)
+            && UNLIKELY(! is_utf8_string_loc((U8 *) s,
+                                             SvCUR(line),
+                                             &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) s + SvCUR(line),
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+
         parser->linestr = flags & LEX_START_COPIED
                             ? SvREFCNT_inc_simple_NN(line)
                             : newSVpvn_flags(s, len, SvUTF8(line));
@@ -751,6 +761,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      } else {
         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
      }
+
      parser->oldoldbufptr =
         parser->oldbufptr =
         parser->bufptr =
@@ -1039,12 +1050,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                     p++;
                     highhalf++;
-                } else if (! UTF8_IS_INVARIANT(c)) {
-                    _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
-                                                      0,
-                                                      1 /* 1 means die */ );
-                    NOT_REACHED; /* NOTREACHED */
-               }
+                } else assert(UTF8_IS_INVARIANT(c));
             }
             if (!highhalf)
                 goto plain_copy;
@@ -1258,6 +1264,24 @@ Perl_lex_discard_to(pTHX_ char *ptr)
         PL_parser->last_lop -= discard_len;
  }
  
+void
+Perl_notify_parser_that_changed_to_utf8(pTHX)
+{
+    /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
+     * off to on.  At compile time, this has the effect of entering a 'use
+     * utf8' section.  This means that any input was not previously checked for
+     * UTF-8 (because it was off), but now we do need to check it, or our
+     * assumptions about the input being sane could be wrong, and we could
+     * segfault.  This routine just sets a flag so that the next time we look
+     * at the input we do the well-formed UTF-8 check.  If we aren't in the
+     * proper phase, there may not be a parser object, but if there is, setting
+     * the flag is harmless */
+
+    if (PL_parser) {
+        PL_parser->recheck_utf8_validity = TRUE;
+    }
+}
+
  /*
  =for apidoc Amx|bool|lex_next_chunk|U32 flags
  
@@ -1293,7 +1317,6 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
      STRLEN linestart_pos, last_uni_pos, last_lop_pos;
      bool got_some_for_debugger = 0;
      bool got_some;
-    const U8* first_bad_char_loc;
  
      if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
@@ -1360,15 +1383,19 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
      PL_parser->bufend = buf + new_bufend_pos;
      PL_parser->bufptr = buf + bufptr_pos;
  
-    if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr,
-                                    PL_parser->bufend - PL_parser->bufptr,
-                                    &first_bad_char_loc))
-    {
-        _force_out_malformed_utf8_message(first_bad_char_loc,
-                                          (U8 *) PL_parser->bufend,
-                                          0,
-                                          1 /* 1 means die */ );
-        NOT_REACHED; /* NOTREACHED */
+    if (UTF) {
+        const U8* first_bad_char_loc;
+        if (UNLIKELY(! is_utf8_string_loc(
+                            (U8 *) PL_parser->bufptr,
+                                   PL_parser->bufend - PL_parser->bufptr,
+                                   &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_parser->bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
      }
  
      PL_parser->oldbufptr = buf + oldbufptr_pos;
@@ -1596,7 +1623,7 @@ Note that C<NULL> is a valid C<proto> and will always return C<true>.
   */
  
  bool
-Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
  {
      STRLEN len, origlen;
      char *p;
@@ -1658,6 +1685,13 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
                              origlen, UNI_DISPLAY_ISPRINT)
             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
  
+       if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
+           SV *name2 = sv_2mortal(newSVsv(PL_curstname));
+           sv_catpvs(name2, "::");
+           sv_catsv(name2, (SV *)name);
+           name = name2;
+       }
+
         if (proto_after_greedy_proto)
             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                         "Prototype after '%c' for %" SVf " : %s",
@@ -2143,7 +2177,7 @@ Perl_str_to_version(pTHX_ SV *sv)
      STRLEN len;
      const char *start = SvPV_const(sv,len);
      const char * const end = start + len;
-    const bool utf = SvUTF8(sv) ? TRUE : FALSE;
+    const bool utf = cBOOL(SvUTF8(sv));
  
      PERL_ARGS_ASSERT_STR_TO_VERSION;
  
@@ -2249,10 +2283,9 @@ S_force_strict_version(pTHX_ char *s)
  
  /*
   * S_tokeq
- * Tokenize a quoted string passed in as an SV.  It finds the next
- * chunk, up to end of string or a backslash.  It may make a new
- * SV containing that chunk (if HINT_NEW_STRING is on).  It also
- * turns \\ into \.
+ * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
+ * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
+ * unchanged, and a new SV containing the modified input is returned.
   */
  
  STATIC SV *
@@ -2557,29 +2590,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      SV *cv;
      SV *rv;
      HV *stash;
-    const U8* first_bad_char_loc;
      const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
  
      PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
  
      if (!SvCUR(res)) {
-        deprecate_fatal_in("5.28", "Unknown charname '' is deprecated");
-        return res;
-    }
-
-    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
-                                     e - backslash_ptr,
-                                     &first_bad_char_loc))
-    {
-        _force_out_malformed_utf8_message(first_bad_char_loc,
-                                          (U8 *) PL_parser->bufend,
-                                          0,
-                                          0 /* 0 means don't die */ );
-        yyerror_pv(Perl_form(aTHX_
-            "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
-                   SVf_UTF8);
-       return NULL;
+        SvREFCNT_dec_NN(res);
+        /* diag_listed_as: Unknown charname '%s' */
+        yyerror("Unknown charname ''");
+        return NULL;
      }
  
      res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
@@ -2691,6 +2710,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          }
      }
      if (*(s-1) == ' ') {
+        /* diag_listed_as: charnames alias definitions may not contain
+                           trailing white-space; marked by <-- HERE in %s
+         */
          yyerror_pv(
              Perl_form(aTHX_
              "charnames alias definitions may not contain trailing "
@@ -2706,11 +2728,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          const U8* first_bad_char_loc;
          STRLEN len;
          const char* const str = SvPV_const(res, len);
-        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+        if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
+                                          &first_bad_char_loc)))
+        {
              _force_out_malformed_utf8_message(first_bad_char_loc,
                                                (U8 *) PL_parser->bufend,
                                                0,
                                                0 /* 0 means don't die */ );
+            /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
+                               immediately after '%s' */
              yyerror_pv(
                Perl_form(aTHX_
                  "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
@@ -2728,6 +2754,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
  
          /* The final %.*s makes sure that should the trailing NUL be missing
           * that this print won't run off the end of the string */
+        /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
+                           in \N{%s} */
          yyerror_pv(
            Perl_form(aTHX_
              "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
@@ -2739,6 +2767,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      }
  
    multi_spaces:
+        /* diag_listed_as: charnames alias definitions may not contain a
+                           sequence of multiple spaces; marked by <-- HERE
+                           in %s */
          yyerror_pv(
            Perl_form(aTHX_
              "charnames alias definitions may not contain a sequence of "
@@ -2837,7 +2868,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
           } (end if backslash)
            handle regular character
      } (end while character to read)
-               
+
  */
  
  STATIC char *
@@ -2852,8 +2883,6 @@ S_scan_const(pTHX_ char *start)
      bool didrange = FALSE;              /* did we just finish a range? */
      bool in_charclass = FALSE;          /* within /[...]/ */
      bool has_utf8 = FALSE;              /* Output constant is UTF8 */
-    bool has_above_latin1 = FALSE;      /* does something require special
-                                           handling in tr/// ? */
      bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
                                             UTF8?  But, this can show as true
                                             when the source isn't utf8, as for
@@ -2868,6 +2897,14 @@ S_scan_const(pTHX_ char *start)
      STRLEN offset_to_max;   /* The offset in the output to where the range
                                 high-end character is temporarily placed */
  
+    /* Does something require special handling in tr/// ?  This avoids extra
+     * work in a less likely case.  As such, khw didn't feel it was worth
+     * adding any branches to the more mainline code to handle this, which
+     * means that this doesn't get set in some circumstances when things like
+     * \x{100} get expanded out.  As a result there needs to be extra testing
+     * done in the tr code */
+    bool has_above_latin1 = FALSE;
+
      /* Note on sizing:  The scanned constant is placed into sv, which is
       * initialized by newSV() assuming one byte of output for every byte of
       * input.  This routine expects newSV() to allocate an extra byte for a
@@ -2912,8 +2949,8 @@ S_scan_const(pTHX_ char *start)
               * range, so for most cases we just drop down and handle the value
               * as any other.  There are two exceptions.
               *
-             * 1.  A minus sign indicates that we are actually going to have
-             *     a range.  In this case, skip the '-', set a flag, then drop
+             * 1.  A hyphen indicates that we are actually going to have a
+             *     range.  In this case, skip the '-', set a flag, then drop
               *     down to handle what should be the end range value.
               * 2.  After we've handled that value, the next time through, that
               *     flag is set and we fix up the range.
@@ -2931,12 +2968,11 @@ S_scan_const(pTHX_ char *start)
  
             if (! dorange) {
  
-                /* Here, we don't think we're in a range.  If we've processed
-                 * at least one character, then see if this next one is a '-',
-                 * indicating the previous one was the start of a range.  But
-                 * don't bother if we're too close to the end for the minus to
-                 * mean that. */
-                if (*s != '-' || s >= send - 1 || s == start) {
+                /* Here, we don't think we're in a range.  If the new character
+                 * is not a hyphen; or if it is a hyphen, but it's too close to
+                 * either edge to indicate a range, or if we haven't output any
+                 * characters yet then it's a regular character. */
+                if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
  
                      /* A regular character.  Process like any other, but first
                       * clear any flags */
@@ -2946,25 +2982,26 @@ S_scan_const(pTHX_ char *start)
                      non_portable_endpoint = 0;
                      backslash_N = 0;
  #endif
-                    /* The tests here and the following 'else' for being above
-                     * Latin1 suffice to find all such occurences in the
-                     * constant, except those added by a backslash escape
-                     * sequence, like \x{100}.  And all those set
-                     * 'has_above_latin1' as appropriate */
+                    /* The tests here for being above Latin1 and similar ones
+                     * in the following 'else' suffice to find all such
+                     * occurences in the constant, except those added by a
+                     * backslash escape sequence, like \x{100}.  Mostly, those
+                     * set 'has_above_latin1' as appropriate */
                      if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
  
                      /* Drops down to generic code to process current byte */
                  }
-                else {
+                else {  /* Is a '-' in the context where it means a range */
                      if (didrange) { /* Something like y/A-C-Z// */
-                        Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+                        Perl_croak(aTHX_ "Ambiguous range in transliteration"
+                                         " operator");
                      }
  
                      dorange = TRUE;
  
-                    s++;    /* Skip past the minus */
+                    s++;    /* Skip past the hyphen */
  
                      /* d now points to where the end-range character will be
                       * placed.  Save it so won't have to go finding it later,
@@ -2978,6 +3015,8 @@ S_scan_const(pTHX_ char *start)
                      if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
+
+                    /* Drops down to generic code to process current byte */
                  }
              }  /* End of not a range */
              else {
@@ -2989,26 +3028,33 @@ S_scan_const(pTHX_ char *start)
                   * 'd'  points to just beyond the range end in the 'sv' string,
                   *      where we would next place something
                   * 'offset_to_max' is the offset in 'sv' at which the character
-                 *      before 'd' begins.
+                 *      (the range's maximum end point) before 'd'  begins.
                   */
-                const char * max_ptr = SvPVX_const(sv) + offset_to_max;
-                const char * min_ptr;
+                char * max_ptr = SvPVX(sv) + offset_to_max;
+                char * min_ptr;
                  IV range_min;
                 IV range_max;   /* last character in range */
-                STRLEN save_offset;
                  STRLEN grow;
+                Size_t offset_to_min = 0;
+                Size_t extras = 0;
  #ifdef EBCDIC
                  bool convert_unicode;
                  IV real_range_max = 0;
  #endif
-
-                /* Get the range-ends code point values. */
+                /* Get the code point values of the range ends. */
                  if (has_utf8) {
                      /* We know the utf8 is valid, because we just constructed
                       * it ourselves in previous loop iterations */
                      min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
                      range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
                      range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
+
+                    /* This compensates for not all code setting
+                     * 'has_above_latin1', so that we don't skip stuff that
+                     * should be executed */
+                    if (range_max > 255) {
+                        has_above_latin1 = TRUE;
+                    }
                  }
                  else {
                      min_ptr = max_ptr - 1;
@@ -3016,6 +3062,23 @@ S_scan_const(pTHX_ char *start)
                      range_max = * (U8*) max_ptr;
                  }
  
+                /* If the range is just a single code point, like tr/a-a/.../,
+                 * that code point is already in the output, twice.  We can
+                 * just back up over the second instance and avoid all the rest
+                 * of the work.  But if it is a variant character, it's been
+                 * counted twice, so decrement.  (This unlikely scenario is
+                 * special cased, like the one for a range of 2 code points
+                 * below, only because the main-line code below needs a range
+                 * of 3 or more to work without special casing.  Might as well
+                 * get it out of the way now.) */
+                if (UNLIKELY(range_max == range_min)) {
+                    d = max_ptr;
+                    if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+                        utf8_variant_count--;
+                    }
+                    goto range_done;
+                }
+
  #ifdef EBCDIC
                  /* On EBCDIC platforms, we may have to deal with portable
                   * ranges.  These happen if at least one range endpoint is a
@@ -3023,16 +3086,16 @@ S_scan_const(pTHX_ char *start)
                   * [A-Z] or [a-z], and both ends are literal characters,
                   * like 'A', and not like \x{C1} */
                  convert_unicode =
-                       cBOOL(backslash_N)   /* \N{} forces Unicode, hence
-                                               portable range */
-                      || (   ! non_portable_endpoint
-                          && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
-                             || (isUPPER_A(range_min) && isUPPER_A(range_max))));
+                               cBOOL(backslash_N)   /* \N{} forces Unicode,
+                                                       hence portable range */
+                    || (     ! non_portable_endpoint
+                        && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
+                           || (isUPPER_A(range_min) && isUPPER_A(range_max))));
                  if (convert_unicode) {
  
                      /* Special handling is needed for these portable ranges.
-                     * They are defined to all be in Unicode terms, which
-                     * include all Unicode code points between the end points.
+                     * They are defined to be in Unicode terms, which includes
+                     * all the Unicode code points between the end points.
                       * Convert to Unicode to get the Unicode range.  Later we
                       * will convert each code point in the range back to
                       * native.  */
@@ -3050,7 +3113,6 @@ S_scan_const(pTHX_ char *start)
                          range_max = UNI_TO_NATIVE(range_max);
                      }
  #endif
-
                      /* Use the characters themselves for the error message if
                       * ASCII printables; otherwise some visible representation
                       * of them */
@@ -3061,32 +3123,40 @@ S_scan_const(pTHX_ char *start)
                      }
  #ifdef EBCDIC
                      else if (convert_unicode) {
-                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+        /* diag_listed_as: Invalid range "%s" in transliteration operator */
                          Perl_croak(aTHX_
-                              "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
-                               " in transliteration operator",
-                              range_min, range_max);
+                           "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
+                           UVXf "}\" in transliteration operator",
+                           range_min, range_max);
                      }
  #endif
                      else {
-                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+        /* diag_listed_as: Invalid range "%s" in transliteration operator */
                          Perl_croak(aTHX_
-                              "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
-                               " in transliteration operator",
-                              range_min, range_max);
+                           "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
+                           " in transliteration operator",
+                           range_min, range_max);
                      }
                  }
  
+                /* If the range is exactly two code points long, they are
+                 * already both in the output */
+                if (UNLIKELY(range_min + 1 == range_max)) {
+                    goto range_done;
+                }
+
+                /* Here the range contains at least 3 code points */
+
                 if (has_utf8) {
  
                      /* If everything in the transliteration is below 256, we
                       * can avoid special handling later.  A translation table
-                     * of each of those bytes is created.  And so we expand out
-                     * all ranges to their constituent code points.  But if
-                     * we've encountered something above 255, the expanding
-                     * won't help, so skip doing that.  But if it's EBCDIC, we
-                     * may have to look at each character below 256 if we have
-                     * to convert to/from Unicode values */
+                     * for each of those bytes is created by op.c.  So we
+                     * expand out all ranges to their constituent code points.
+                     * But if we've encountered something above 255, the
+                     * expanding won't help, so skip doing that.  But if it's
+                     * EBCDIC, we may have to look at each character below 256
+                     * if we have to convert to/from Unicode values */
                      if (   has_above_latin1
  #ifdef EBCDIC
                         && (range_min > 255 || ! convert_unicode)
@@ -3095,7 +3165,7 @@ S_scan_const(pTHX_ char *start)
                          /* Move the high character one byte to the right; then
                           * insert between it and the range begin, an illegal
                           * byte which serves to indicate this is a range (using
-                         * a '-' could be ambiguous). */
+                         * a '-' would be ambiguous). */
                          char *e = d++;
                          while (e-- > max_ptr) {
                              *(e + 1) = *e;
@@ -3117,51 +3187,94 @@ S_scan_const(pTHX_ char *start)
                 }
  
                  /* Here we need to expand out the string to contain each
-                 * character in the range.  Grow the output to handle this */
+                 * character in the range.  Grow the output to handle this.
+                 * For non-UTF8, we need a byte for each code point in the
+                 * range, minus the three that we've already allocated for: the
+                 * hyphen, the min, and the max.  For UTF-8, we need this
+                 * plus an extra byte for each code point that occupies two
+                 * bytes (is variant) when in UTF-8 (except we've already
+                 * allocated for the end points, including if they are
+                 * variants).  For ASCII platforms and Unicode ranges on EBCDIC
+                 * platforms, it's easy to calculate a precise number.  To
+                 * start, we count the variants in the range, which we need
+                 * elsewhere in this function anyway.  (For the case where it
+                 * isn't easy to calculate, 'extras' has been initialized to 0,
+                 * and the calculation is done in a loop further down.) */
+#ifdef EBCDIC
+                if (convert_unicode)
+#endif
+                {
+                    /* This is executed unconditionally on ASCII, and for
+                     * Unicode ranges on EBCDIC.  Under these conditions, all
+                     * code points above a certain value are variant; and none
+                     * under that value are.  We just need to find out how much
+                     * of the range is above that value.  We don't count the
+                     * end points here, as they will already have been counted
+                     * as they were parsed. */
+                    if (range_min >= UTF_CONTINUATION_MARK) {
+
+                        /* The whole range is made up of variants */
+                        extras = (range_max - 1) - (range_min + 1) + 1;
+                    }
+                    else if (range_max >= UTF_CONTINUATION_MARK) {
  
-                save_offset  = min_ptr - SvPVX_const(sv);
+                        /* Only the higher portion of the range is variants */
+                        extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
+                    }
  
-                /* The base growth is the number of code points in the range */
-                grow = range_max - range_min + 1;
-                if (has_utf8) {
+                    utf8_variant_count += extras;
+                }
+
+                /* The base growth is the number of code points in the range,
+                 * not including the endpoints, which have already been sized
+                 * for (and output).  We don't subtract for the hyphen, as it
+                 * has been parsed but not output, and the SvGROW below is
+                 * based only on what's been output plus what's left to parse.
+                 * */
+                grow = (range_max - 1) - (range_min + 1) + 1;
  
-                    /* But if the output is UTF-8, some of those characters may
-                     * need two bytes (since the maximum range value here is
-                     * 255, the max bytes per character is two).  On ASCII
-                     * platforms, it's not much trouble to get an accurate
-                     * count of what's needed.  But on EBCDIC, the ones that
-                     * need 2 bytes are scattered around, so just use a worst
-                     * case value instead of calculating for that platform.  */
+                if (has_utf8) {
  #ifdef EBCDIC
-                    grow *= 2;
-#else
-                    /* Only those above 127 require 2 bytes.  This may be
-                     * everything in the range, or not */
-                    if (range_min > 127) {
+                    /* In some cases in EBCDIC, we haven't yet calculated a
+                     * precise amount needed for the UTF-8 variants.  Just
+                     * assume the worst case, that everything will expand by a
+                     * byte */
+                    if (! convert_unicode) {
                          grow *= 2;
                      }
-                    else if (range_max > 127) {
-                        grow += range_max - 127;
-                    }
+                    else
  #endif
+                    {
+                        /* Otherwise we know exactly how many variants there
+                         * are in the range. */
+                        grow += extras;
+                    }
                  }
  
-                /* Subtract 3 for the bytes that were already accounted for
-                 * (min, max, and the hyphen) */
-                d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
+                /* Grow, but position the output to overwrite the range min end
+                 * point, because in some cases we overwrite that */
+                SvCUR_set(sv, d - SvPVX_const(sv));
+                offset_to_min = min_ptr - SvPVX_const(sv);
  
+                /* See Note on sizing above. */
+                d = offset_to_min + SvGROW(sv, SvCUR(sv)
+                                             + (send - s)
+                                             + grow
+                                             + 1 /* Trailing NUL */ );
+
+                /* Now, we can expand out the range. */
  #ifdef EBCDIC
-                /* Here, we expand out the range. */
                  if (convert_unicode) {
-                    IV i;
+                    SSize_t i;
  
                      /* Recall that the min and max are now in Unicode terms, so
                       * we have to convert each character to its native
                       * equivalent */
                      if (has_utf8) {
                          for (i = range_min; i <= range_max; i++) {
-                            append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
-                                                         (U8 **) &d);
+                            append_utf8_from_native_byte(
+                                                    LATIN1_TO_NATIVE((U8) i),
+                                                    (U8 **) &d);
                          }
                      }
                      else {
@@ -3174,34 +3287,51 @@ S_scan_const(pTHX_ char *start)
  #endif
                  /* Always gets run for ASCII, and sometimes for EBCDIC. */
                  {
-                    IV i;
-
                      /* Here, no conversions are necessary, which means that the
                       * first character in the range is already in 'd' and
                       * valid, so we can skip overwriting it */
                      if (has_utf8) {
+                        SSize_t i;
                          d += UTF8SKIP(d);
                          for (i = range_min + 1; i <= range_max; i++) {
                              append_utf8_from_native_byte((U8) i, (U8 **) &d);
                          }
                      }
                      else {
+                        SSize_t i;
                          d++;
-                        for (i = range_min + 1; i <= range_max; i++) {
+                        assert(range_min + 1 <= range_max);
+                        for (i = range_min + 1; i < range_max; i++) {
+#ifdef EBCDIC
+                            /* In this case on EBCDIC, we haven't calculated
+                             * the variants.  Do it here, as we go along */
+                            if (! UVCHR_IS_INVARIANT(i)) {
+                                utf8_variant_count++;
+                            }
+#endif
                              *d++ = (char)i;
                          }
+
+                        /* The range_max is done outside the loop so as to
+                         * avoid having to special case not incrementing
+                         * 'utf8_variant_count' on EBCDIC (it's already been
+                         * counted when originally parsed) */
+                        *d++ = (char) range_max;
                     }
                 }
  
  #ifdef EBCDIC
-                /* If the original range extended above 255, add in that portion. */
+                /* If the original range extended above 255, add in that
+                 * portion. */
                  if (real_range_max) {
                      *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
                      *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
-                    if (real_range_max > 0x101)
-                        *d++ = (char) ILLEGAL_UTF8_BYTE;
-                    if (real_range_max > 0x100)
+                    if (real_range_max > 0x100) {
+                        if (real_range_max > 0x101) {
+                            *d++ = (char) ILLEGAL_UTF8_BYTE;
+                        }
                          d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+                    }
                  }
  #endif
  
@@ -3232,11 +3362,9 @@ S_scan_const(pTHX_ char *start)
             if (!esc)
                 in_charclass = FALSE;
         }
-
-       /* skip for regexp comments /(?#comment)/, except for the last
-        * char, which will be done separately.
-        * Stop on (?{..}) and friends */
-
+            /* skip for regexp comments /(?#comment)/, except for the last
+             * char, which will be done separately.  Stop on (?{..}) and
+             * friends */
         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
                 while (s+1 < send && *s != ')')
@@ -3249,8 +3377,7 @@ S_scan_const(pTHX_ char *start)
                 break;
             }
         }
-
-       /* likewise skip #-initiated comments in //x patterns */
+            /* likewise skip #-initiated comments in //x patterns */
         else if (*s == '#'
                   && PL_lex_inpat
                   && !in_charclass
@@ -3259,14 +3386,13 @@ S_scan_const(pTHX_ char *start)
             while (s < send && *s != '\n')
                 *d++ = *s++;
         }
-
-       /* no further processing of single-quoted regex */
+            /* no further processing of single-quoted regex */
         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
             goto default_action;
  
-       /* check for embedded arrays
-          (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
-          */
+            /* check for embedded arrays
+             * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
+             */
         else if (*s == '@' && s[1]) {
             if (UTF
                 ? isIDFIRST_utf8_safe(s+1, send)
@@ -3279,10 +3405,8 @@ S_scan_const(pTHX_ char *start)
             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
                 break; /* in regexp, neither @+ nor @- are interpolated */
         }
-
-       /* check for embedded scalars.  only stop if we're sure it's a
-          variable.
-        */
+            /* check for embedded scalars.  only stop if we're sure it's a
+             * variable.  */
         else if (*s == '$') {
             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
                 break;
@@ -3391,7 +3515,7 @@ S_scan_const(pTHX_ char *start)
                                                 UTF);
                     if (! valid) {
                         yyerror(error);
-                       continue;
+                       uv = 0; /* drop through to ensure range ends are set */
                     }
                     goto NUM_ESCAPE_INSERT;
                 }
@@ -3409,13 +3533,13 @@ S_scan_const(pTHX_ char *start)
                                                 UTF);
                     if (! valid) {
                         yyerror(error);
-                       continue;
+                       uv = 0; /* drop through to ensure range ends are set */
                     }
                 }
  
               NUM_ESCAPE_INSERT:
                 /* Insert oct or hex escaped character. */
-               
+
                 /* Here uv is the ordinal of the next character being added */
                 if (UVCHR_IS_INVARIANT(uv)) {
                     *d++ = (char) uv;
@@ -3504,11 +3628,12 @@ S_scan_const(pTHX_ char *start)
                   * For non-patterns, the named characters are converted to
                   * their string equivalents.  In patterns, named characters are
                   * not converted to their ultimate forms for the same reasons
-                 * that other escapes aren't.  Instead, they are converted to
-                 * the \N{U+...} form to get the value from the charnames that
-                 * is in effect right now, while preserving the fact that it
-                 * was a named character, so that the regex compiler knows
-                 * this.
+                 * that other escapes aren't (mainly that the ultimate
+                 * character could be considered a meta-symbol by the regex
+                 * compiler).  Instead, they are converted to the \N{U+...}
+                 * form to get the value from the charnames that is in effect
+                 * right now, while preserving the fact that it was a named
+                 * character, so that the regex compiler knows this.
                   *
                  * The structure of this section of code (besides checking for
                  * errors and upgrading to utf8) is:
@@ -3529,6 +3654,7 @@ S_scan_const(pTHX_ char *start)
                 s++;
                 if (*s != '{') {
                     yyerror("Missing braces on \\N{}");
+                    *d++ = '\0';
                     continue;
                 }
                 s++;
@@ -3540,7 +3666,7 @@ S_scan_const(pTHX_ char *start)
                     } else {
                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
                     }
-                   continue;
+                    yyquit(); /* Have exhausted the input. */
                 }
  
                 /* Here it looks like a named character */
@@ -3559,6 +3685,7 @@ S_scan_const(pTHX_ char *start)
                                  "Invalid hexadecimal number in \\N{U+...}"
                              );
                              s = e + 1;
+                            *d++ = '\0';
                              continue;
                          }
                          while (++s < e) {
@@ -3757,6 +3884,7 @@ S_scan_const(pTHX_ char *start)
                                      " in transliteration operator",
                                          /*  +1 to include the "}" */
                                      (int) (e + 1 - start), start));
+                                *d++ = '\0';
                                  goto end_backslash_N;
                              }
  
@@ -3822,15 +3950,16 @@ S_scan_const(pTHX_ char *start)
             case 'c':
                 s++;
                 if (s < send) {
-                   *d++ = grok_bslash_c(*s++, 1);
+                   *d++ = grok_bslash_c(*s, 1);
                 }
                 else {
                     yyerror("Missing control char name in \\c");
+                   yyquit();   /* Are at end of input, no sense continuing */
                 }
  #ifdef EBCDIC
                  non_portable_endpoint++;
  #endif
-               continue;
+                break;
  
             /* printf-style backslashes, formfeeds, newlines, etc */
             case 'b':
@@ -4075,10 +4204,7 @@ S_intuit_more(pTHX_ char *s)
                 weight -= seen[un_char] * 10;
                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
                     int len;
-                    char *tmp = PL_bufend;
-                    PL_bufend = (char*)send;
-                    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
-                    PL_bufend = tmp;
+                   scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
                     len = (int)strlen(tmpbuf);
                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
                                                      UTF ? SVf_UTF8 : 0, SVt_PV))
@@ -4199,11 +4325,14 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
      }
  
      if (*start == '$') {
+        SSize_t start_off = start - SvPVX(PL_linestr);
         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
              || isUPPER(*PL_tokenbuf))
             return 0;
-       s = skipspace(s);
-       PL_bufptr = start;
+        /* this could be $# */
+        if (isSPACE(*s))
+            s = skipspace(s);
+       PL_bufptr = SvPVX(PL_linestr) + start_off;
         PL_expect = XREF;
         return *s == '(' ? FUNCMETH : METHOD;
      }
@@ -4316,8 +4445,8 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
                     PL_parser->last_uni = buf + last_uni_pos;
                 if (PL_parser->last_lop)
                     PL_parser->last_lop = buf + last_lop_pos;
-               SvLEN(linestr) = SvCUR(linestr);
-               SvCUR(linestr) = s-SvPVX(linestr);
+               SvLEN_set(linestr, SvCUR(linestr));
+               SvCUR_set(linestr, s - SvPVX(linestr));
                 PL_parser->filtered = 1;
                 break;
             }
@@ -4503,6 +4632,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
      PERL_ARGS_ASSERT_TOKENIZE_USE;
  
      if (PL_expect != XSTATE)
+       /* diag_listed_as: "use" not allowed in expression */
         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
                     is_use ? "use" : "no"));
      PL_expect = XTERM;
@@ -4648,6 +4778,20 @@ Perl_yylex(pTHX)
      GV *gv = NULL;
      GV **gvp = NULL;
  
+    if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
+        const U8* first_bad_char_loc;
+        if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
+                                                        PL_bufend - PL_bufptr,
+                                                        &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+        PL_parser->recheck_utf8_validity = FALSE;
+    }
      DEBUG_T( {
         SV* tmp = newSVpvs("");
         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
@@ -4895,7 +5039,16 @@ Perl_yylex(pTHX)
             s = PL_bufend;
         }
         else {
+            int save_error_count = PL_error_count;
+
             s = scan_const(PL_bufptr);
+
+            /* Set flag if this was a pattern and there were errors.  op.c will
+             * refuse to compile a pattern with this flag set.  Otherwise, we
+             * could get segfaults, etc. */
+            if (PL_lex_inpat && PL_error_count > save_error_count) {
+                ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
+            }
             if (*s == '\\')
                 PL_lex_state = LEX_INTERPCASEMOD;
             else
@@ -4946,7 +5099,7 @@ Perl_yylex(pTHX)
           * as a var; e.g. ($, ...) would be seen as the var '$,'
           */
  
-        char sigil;
+        U8 sigil;
  
          s = skipspace(s);
          sigil = *s++;
@@ -4974,12 +5127,43 @@ Perl_yylex(pTHX)
                      0, cBOOL(UTF), FALSE);
                  *dest = '\0';
                  assert(PL_tokenbuf[1]); /* we have a variable name */
+            }
+            else {
+                *PL_tokenbuf = 0;
+                PL_in_my = 0;
+            }
+
+            s = skipspace(s);
+            /* parse the = for the default ourselves to avoid '+=' etc being accepted here
+             * as the ASSIGNOP, and exclude other tokens that start with =
+             */
+            if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
+                /* save now to report with the same context as we did when
+                 * all ASSIGNOPS were accepted */
+                PL_oldbufptr = s;
+
+                ++s;
+                NEXTVAL_NEXTTOKE.ival = 0;
+                force_next(ASSIGNOP);
+                PL_expect = XTERM;
+            }
+            else if (*s == ',' || *s == ')') {
+                PL_expect = XOPERATOR;
+            }
+            else {
+                /* make sure the context shows the unexpected character and
+                 * hopefully a bit more */
+                if (*s) ++s;
+                while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
+                    s++;
+                PL_bufptr = s; /* for error reporting */
+                yyerror("Illegal operator following parameter in a subroutine signature");
+                PL_in_my = 0;
+            }
+            if (*PL_tokenbuf) {
                  NEXTVAL_NEXTTOKE.ival = sigil;
                  force_next('p'); /* force a signature pending identifier */
              }
-            else
-                PL_in_my = 0;
-            PL_expect = XOPERATOR;
              break;
  
          case ')':
@@ -5004,12 +5188,6 @@ Perl_yylex(pTHX)
      switch (*s) {
      default:
         if (UTF) {
-            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
-                _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
-                                                  0,
-                                                  1 /* 1 means die */ );
-                NOT_REACHED; /* NOTREACHED */
-            }
              if (isIDFIRST_utf8_safe(s, PL_bufend)) {
                  goto keylookup;
              }
@@ -5031,12 +5209,23 @@ Perl_yylex(pTHX)
          else {
              c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
          }
-        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
-        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
-            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
-        } else {
+
+        if (s >= PL_linestart) {
              d = PL_linestart;
          }
+        else {
+            /* somehow (probably due to a parse failure), PL_linestart has advanced
+             * pass PL_bufptr, get a reasonable beginning of line
+             */
+            d = s;
+            while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
+                --d;
+        }
+        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
+        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
+        }
+
          Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
                            UTF8fARG(UTF, (s - d), d),
                           (int) len + 1);
@@ -5105,9 +5294,10 @@ Perl_yylex(pTHX)
                     sv_catpvs(PL_linestr,"chomp;");
                 if (PL_minus_a) {
                     if (PL_minus_F) {
-                       if ((*PL_splitstr == '/' || *PL_splitstr == '\''
-                            || *PL_splitstr == '"')
-                             && strchr(PL_splitstr + 1, *PL_splitstr))
+                        if (   (   *PL_splitstr == '/'
+                                || *PL_splitstr == '\''
+                                || *PL_splitstr == '"')
+                            && strchr(PL_splitstr + 1, *PL_splitstr))
                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
                         else {
                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
@@ -5140,7 +5330,7 @@ Perl_yylex(pTHX)
         }
         do {
             fake_eof = 0;
-           bof = PL_rsfp ? TRUE : FALSE;
+           bof = cBOOL(PL_rsfp);
             if (0) {
               fake_eof:
                 fake_eof = LEX_FAKE_EOF;
@@ -5157,10 +5347,10 @@ Perl_yylex(pTHX)
             /* If it looks like the start of a BOM or raw UTF-16,
              * check if it in fact is. */
             if (bof && PL_rsfp
-                && (*s == 0
+                && (   *s == 0
                      || *(U8*)s == BOM_UTF8_FIRST_BYTE
-                        || *(U8*)s >= 0xFE
-                        || s[1] == 0))
+                    || *(U8*)s >= 0xFE
+                    || s[1] == 0))
              {
                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
                 bof = (offset == (Off_t)SvCUR(PL_linestr));
@@ -5263,8 +5453,6 @@ Perl_yylex(pTHX)
                 d = instr(s,"perl -");
                 if (!d) {
                     d = instr(s,"perl");
-                    if (d && d[4] == '6')
-                        d = NULL;
  #if defined(DOSISH)
                     /* avoid getting into infinite loops when shebang
                      * line contains "Perl" rather than "perl" */
@@ -5419,10 +5607,6 @@ Perl_yylex(pTHX)
                  d++;
              if (d < PL_bufend)
                  d++;
-            else if (d > PL_bufend)
-                /* Found by Ilya: feed random input to Perl. */
-                Perl_croak(aTHX_ "panic: input overflow, %p > %p",
-                           d, PL_bufend);
              s = d;
              if (in_comment && d == PL_bufend
                  && PL_lex_state == LEX_INTERPNORMAL
@@ -5445,9 +5629,6 @@ Perl_yylex(pTHX)
                      if (s < PL_bufend)
                          incline(s);
                  }
-            else if (s > PL_bufend)
-                /* Found by Ilya: feed random input to Perl. */
-                Perl_croak(aTHX_ "panic: input overflow");
         }
         goto retry;
      case '-':
@@ -5642,8 +5823,7 @@ Perl_yylex(pTHX)
         }
         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
         PL_tokenbuf[0] = '%';
-       s = scan_ident(s, PL_tokenbuf + 1,
-               sizeof PL_tokenbuf - 1, FALSE);
+       s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
         pl_yylval.ival = 0;
         if (!PL_tokenbuf[1]) {
             PREREF('%');
@@ -5767,27 +5947,12 @@ Perl_yylex(pTHX)
                     PL_lex_stuff = NULL;
                 }
                 else {
-                   if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
-                       sv_free(sv);
-                       if (PL_in_my == KEY_our) {
-                            deprecate_disappears_in("5.28",
-                                "Attribute \"unique\" is deprecated");
-                       }
-                       else
-                           Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
-                   }
-
                     /* NOTE: any CV attrs applied here need to be part of
                        the CVf_BUILTIN_ATTRS define in cv.h! */
-                   else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
+                   if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
                         sv_free(sv);
                         CvLVALUE_on(PL_compcv);
                     }
-                   else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
-                       sv_free(sv);
-                        deprecate_disappears_in("5.28",
-                            "Attribute \"locked\" is deprecated");
-                   }
                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
                         sv_free(sv);
                         CvMETHOD_on(PL_compcv);
@@ -6091,8 +6256,10 @@ Perl_yylex(pTHX)
                         break;
                     }
                     if (strEQs(s, "sub")) {
+                        PL_bufptr = s;
                         d = s + 3;
                         d = skipspace(d);
+                        s = PL_bufptr;
                         if (*d == ':') {
                             PL_expect = XTERM;
                             break;
@@ -6189,8 +6356,7 @@ Perl_yylex(pTHX)
         }
  
         PL_tokenbuf[0] = '&';
-       s = scan_ident(s - 1, PL_tokenbuf + 1,
-                      sizeof PL_tokenbuf - 1, TRUE);
+       s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
         if (PL_tokenbuf[1]) {
             force_ident_maybe_lex('&');
@@ -6256,8 +6422,9 @@ Perl_yylex(pTHX)
                  && isALPHA(tmp)
                  && (s == PL_linestart+1 || s[-2] == '\n') )
              {
-                if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
-                    || PL_lex_state != LEX_NORMAL) {
+                if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+                    || PL_lex_state != LEX_NORMAL)
+                {
                      d = PL_bufend;
                      while (s < d) {
                          if (*s++ == '\n') {
@@ -6434,12 +6601,7 @@ Perl_yylex(pTHX)
      case '$':
         CLINE;
  
-       if (PL_expect == XOPERATOR) {
-           if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
-               return deprecate_commaless_var_list();
-           }
-       }
-       else if (PL_expect == XPOSTDEREF) {
+        if (PL_expect == XPOSTDEREF) {
             if (s[1] == '#') {
                 s++;
                 POSTDEREF(DOLSHARP);
@@ -6470,8 +6632,7 @@ Perl_yylex(pTHX)
         }
  
         PL_tokenbuf[0] = '$';
-       s = scan_ident(s, PL_tokenbuf + 1,
-                      sizeof PL_tokenbuf - 1, FALSE);
+       s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
         if (PL_expect == XOPERATOR) {
             d = s;
             if (PL_bufptr > s) {
@@ -6520,28 +6681,28 @@ Perl_yylex(pTHX)
                     PL_tokenbuf[0] = '%';
                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
-                       {
-                           char tmpbuf[sizeof PL_tokenbuf];
-                           do {
-                               t++;
-                           } while (isSPACE(*t));
-                           if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
-                               STRLEN len;
-                               t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
-                                             &len);
-                               while (isSPACE(*t))
-                                   t++;
-                                if (  *t == ';'
-                                    && get_cvn_flags(tmpbuf, len, UTF
-                                                                  ? SVf_UTF8
-                                                                  : 0))
-                                {
-                                   Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "You need to quote \"%" UTF8f "\"",
-                                        UTF8fARG(UTF, len, tmpbuf));
-                                }
-                           }
-                       }
+                    {
+                        char tmpbuf[sizeof PL_tokenbuf];
+                        do {
+                            t++;
+                        } while (isSPACE(*t));
+                        if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
+                            STRLEN len;
+                            t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
+                                            &len);
+                            while (isSPACE(*t))
+                                t++;
+                            if (  *t == ';'
+                                && get_cvn_flags(tmpbuf, len, UTF
+                                                                ? SVf_UTF8
+                                                                : 0))
+                            {
+                                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                                    "You need to quote \"%" UTF8f "\"",
+                                        UTF8fARG(UTF, len, tmpbuf));
+                            }
+                        }
+                    }
                 }
             }
  
@@ -6692,7 +6853,7 @@ Perl_yylex(pTHX)
         }
         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
             s += 3;
-           OPERATOR(YADAYADA);
+           TERM(YADAYADA);
         }
         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
             char tmp = *s++;
@@ -6730,10 +6891,6 @@ Perl_yylex(pTHX)
         TERM(THING);
  
      case '\'':
-       if (   PL_expect == XOPERATOR
-           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-               return deprecate_commaless_var_list();
-
         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
         if (!s)
             missingterm(NULL);
@@ -6746,10 +6903,6 @@ Perl_yylex(pTHX)
         TERM(sublex_start());
  
      case '"':
-       if (   PL_expect == XOPERATOR
-           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-               return deprecate_commaless_var_list();
-
         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
         DEBUG_T( {
             if (s)
@@ -7029,8 +7182,10 @@ Perl_yylex(pTHX)
             else {                      /* no override */
                 tmp = -tmp;
                 if (tmp == KEY_dump) {
-                   Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
-                                  "dump() better written as CORE::dump()");
+                   Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
+                                    "dump() better written as CORE::dump(). "
+                                     "dump() will no longer be available "
+                                     "in Perl 5.30");
                 }
                 gv = NULL;
                 gvp = 0;
@@ -7073,6 +7228,7 @@ Perl_yylex(pTHX)
                 orig_keyword = 0;
                 lex = 0;
                 off = 0;
+            /* FALLTHROUGH */
         default:                        /* not a keyword */
           just_a_word: {
                 int pkgname = 0;
@@ -7181,17 +7337,24 @@ Perl_yylex(pTHX)
                                                                 == OA_FILEREF))
                 {
                     bool immediate_paren = *s == '(';
+                    SSize_t s_off;
  
                     /* (Now we can afford to cross potential line boundary.) */
                     s = skipspace(s);
  
+                    /* intuit_method() can indirectly call lex_next_chunk(),
+                     * invalidating s
+                     */
+                    s_off = s - SvPVX(PL_linestr);
                     /* Two barewords in a row may indicate method call. */
                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
                              || *s == '$')
                          && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
                      {
+                        /* the code at method: doesn't use s */
                         goto method;
                     }
+                    s = SvPVX(PL_linestr) + s_off;
  
                     /* If not a declared subroutine, it's an indirect object. */
                     /* (But it's an indir obj regardless for sort.) */
@@ -7715,7 +7878,7 @@ Perl_yylex(pTHX)
  
         case KEY_exists:
             UNI(OP_EXISTS);
-       
+
         case KEY_exit:
             UNI(OP_EXIT);
  
@@ -7774,6 +7937,7 @@ Perl_yylex(pTHX)
                  && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
              {
                 char *p = s;
+                SSize_t s_off = s - SvPVX(PL_linestr);
  
                 if ((PL_bufend - p) >= 3
                      && strEQs(p, "my") && isSPACE(*(p + 2)))
@@ -7791,6 +7955,9 @@ Perl_yylex(pTHX)
                 }
                 if (*p != '$' && *p != '\\')
                     Perl_croak(aTHX_ "Missing $ on loop variable");
+
+                /* The buffer may have been reallocated, update s */
+                s = SvPVX(PL_linestr) + s_off;
             }
             OPERATOR(FOR);
  
@@ -7951,7 +8118,7 @@ Perl_yylex(pTHX)
  
         case KEY_last:
             LOOPX(OP_LAST);
-       
+
         case KEY_lc:
             UNI(OP_LC);
  
@@ -8131,7 +8298,7 @@ Perl_yylex(pTHX)
  
         case KEY_pos:
             UNIDOR(OP_POS);
-       
+
         case KEY_pack:
             LOP(OP_PACK,XTERM);
  
@@ -8319,7 +8486,7 @@ Perl_yylex(pTHX)
  
         case KEY_chomp:
             UNI(OP_CHOMP);
-       
+
         case KEY_scalar:
             UNI(OP_SCALAR);
  
@@ -8505,7 +8672,8 @@ Perl_yylex(pTHX)
                     COPLINE_SET_FROM_MULTI_END;
                     if (!s)
                         Perl_croak(aTHX_ "Prototype not terminated");
-                   (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
+                   (void)validate_proto(PL_subname, PL_lex_stuff,
+                                        ckWARN(WARN_ILLEGALPROTO), 0);
                     have_proto = TRUE;
  
                     s = skipspace(s);
@@ -8740,8 +8908,11 @@ S_pending_ident(pTHX)
      if (PL_in_my) {
          if (PL_in_my == KEY_our) {     /* "our" is merely analogous to "my" */
              if (has_colon)
+                /* diag_listed_as: No package name allowed for variable %s
+                                   in "our" */
                  yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
-                                  "variable %s in \"our\"",
+                                  "%se %s in \"our\"",
+                                  *PL_tokenbuf=='&' ?"subroutin":"variabl",
                                    PL_tokenbuf), UTF ? SVf_UTF8 : 0);
              tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
          }
@@ -8901,7 +9072,6 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
             s++;
         if (*s == ',') {
             GV* gv;
-           PADOFFSET off;
             if (keyword(w, s - w, 0))
                 return;
  
@@ -8909,6 +9079,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
             if (gv && GvCVu(gv))
                 return;
             if (s - w <= 254) {
+                PADOFFSET off;
                 char tmpbuf[256];
                 Copy(w, tmpbuf+1, s - w, char);
                 *tmpbuf = '&';
@@ -8959,7 +9130,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
         || ! SvOK(*cvp))
      {
         char *msg;
-       
+
         /* Here haven't found what we're looking for.  If it is charnames,
          * perhaps it needs to be loaded.  Try doing that before giving up */
         if (*key == 'c') {
@@ -9245,19 +9416,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
          bool skip;
          char *s2;
          /* If we were processing {...} notation then...  */
-        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
-            /* if it starts as a valid identifier, assume that it is one.
-               (the later check for } being at the expected point will trap
-               cases where this doesn't pan out.)  */
-            d += is_utf8 ? UTF8SKIP(d) : 1;
-            parse_ident(&s, &d, e, 1, is_utf8, TRUE);
-           *d = '\0';
+        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
+            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+                 && isWORDCHAR(*s))
+        ) {
+            /* note we have to check for a normal identifier first,
+             * as it handles utf8 symbols, and only after that has
+             * been ruled out can we look at the caret words */
+            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
+                /* if it starts as a valid identifier, assume that it is one.
+                   (the later check for } being at the expected point will trap
+                   cases where this doesn't pan out.)  */
+                d += is_utf8 ? UTF8SKIP(d) : 1;
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                *d = '\0';
+            }
+            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
+                d++;
+                while (isWORDCHAR(*s) && d < e) {
+                    *d++ = *s++;
+                }
+                if (d >= e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                *d = '\0';
+            }
              tmp_copline = CopLINE(PL_curcop);
              if (s < PL_bufend && isSPACE(*s)) {
                  s = skipspace(s);
              }
             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
-                /* ${foo[0]} and ${foo{bar}} notation.  */
+                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
                     const char * const brack =
                         (const char *)
@@ -9276,26 +9464,16 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                 return s;
             }
         }
-       /* Handle extended ${^Foo} variables
-        * 1999-02-27 mjd-perl-patch@plover.com */
-       else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
-                && isWORDCHAR(*s))
-       {
-           d++;
-           while (isWORDCHAR(*s) && d < e) {
-               *d++ = *s++;
-           }
-           if (d >= e)
-               Perl_croak(aTHX_ "%s", ident_too_long);
-           *d = '\0';
-       }
  
          if ( !tmp_copline )
              tmp_copline = CopLINE(PL_curcop);
-        if ((skip = s < PL_bufend && isSPACE(*s)))
+        if ((skip = s < PL_bufend && isSPACE(*s))) {
              /* Avoid incrementing line numbers or resetting PL_linestart,
                 in case we have to back up.  */
+            STRLEN s_off = s - SvPVX(PL_linestr);
              s2 = peekspace(s);
+            s = SvPVX(PL_linestr) + s_off;
+        }
          else
              s2 = s;
  
@@ -9572,18 +9750,14 @@ S_scan_subst(pTHX_ char *start)
  
         PL_multi_end = 0;
         pm->op_pmflags |= PMf_EVAL;
-       while (es-- > 0) {
-           if (es)
-               sv_catpvs(repl, "eval ");
-           else
-               sv_catpvs(repl, "do ");
-       }
-       sv_catpvs(repl, "{");
+        for (; es > 1; es--) {
+            sv_catpvs(repl, "eval ");
+        }
+        sv_catpvs(repl, "do {");
         sv_catsv(repl, PL_parser->lex_sub_repl);
         sv_catpvs(repl, "}");
         SvREFCNT_dec(PL_parser->lex_sub_repl);
         PL_parser->lex_sub_repl = repl;
-        es = 1;
      }
  
  
@@ -9739,11 +9913,9 @@ S_scan_heredoc(pTHX_ char *s)
         else
             term = '"';
         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
-           deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated");
+           Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
         peek = s;
-        while (
-               isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
-        {
+        while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
             peek += UTF ? UTF8SKIP(peek) : 1;
         }
         len = (peek - s >= e - d) ? (e - d) : (peek - s);
@@ -10289,7 +10461,7 @@ S_scan_inputsymbol(pTHX_ char *start)
         ($*@)           sub prototypes          sub foo ($)
         (stuff)         sub attr parameters     sub foo : attr(stuff)
         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
-       
+
     In most of these cases (all but <>, patterns and transliterate)
     yylex() calls scan_str().  m// makes yylex() call scan_pat() which
     calls scan_str().  s/// makes yylex() call scan_subst() which calls
@@ -10333,7 +10505,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      const char * non_grapheme_msg = "Use of unassigned code point or"
                                      " non-standalone grapheme for a delimiter"
                                      " will be a fatal error starting in Perl"
-                                    " v5.30";
+                                    " 5.30";
      /* The only non-UTF character that isn't a stand alone grapheme is
       * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
      bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
@@ -10432,10 +10604,15 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                 }
                 /* terminate when run out of buffer (the for() condition), or
                    have found the terminator */
-               else if (*s == term) {
-                   if (termlen == 1)
+               else if (*s == term) {  /* First byte of terminator matches */
+                   if (termlen == 1)   /* If is the only byte, are done */
                         break;
-                   if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
+
+                    /* If the remainder of the terminator matches, also are
+                     * done, after checking that is a separate grapheme */
+                    if (   s + termlen <= PL_bufend
+                        && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
+                    {
                          if (   check_grapheme
                              && UNLIKELY(! _is_grapheme((U8 *) start,
                                                                (U8 *) s,
@@ -10446,13 +10623,16 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                                          "%s", non_grapheme_msg);
                          }
                         break;
+                    }
                 }
-               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
                     has_utf8 = TRUE;
+                }
+
                 *to = *s;
             }
         }
-       
+
         /* if the terminator isn't the same as the start character (e.g.,
            matched brackets), we have to allow more in the quoting, and
            be prepared for nested brackets.
@@ -10510,7 +10690,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
             to[-1] = '\n';
  #endif
-       
+
         /* if we're out of file, or a read fails, bail and reset the current
            line marker so we can report where the unterminated string began
         */
@@ -10589,6 +10769,15 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
      bool floatit;                      /* boolean: int or float? */
      const char *lastub = NULL;         /* position of last underbar */
      static const char* const number_too_long = "Number too long";
+    bool warned_about_underscore = 0;
+#define WARN_ABOUT_UNDERSCORE() \
+       do { \
+           if (!warned_about_underscore) { \
+               warned_about_underscore = 1; \
+               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
+                              "Misplaced _ in number"); \
+           } \
+       } while(0)
      /* Hexadecimal floating point.
       *
       * In many places (where we have quads and NV is IEEE 754 double)
@@ -10673,8 +10862,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             }
  
             if (*s == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                              "Misplaced _ in number");
+               WARN_ABOUT_UNDERSCORE();
                lastub = s++;
             }
  
@@ -10697,8 +10885,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                 /* _ are ignored -- but warned about if consecutive */
                 case '_':
                     if (lastub && s == lastub + 1)
-                       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                      "Misplaced _ in number");
+                       WARN_ABOUT_UNDERSCORE();
                     lastub = s++;
                     break;
  
@@ -10783,9 +10970,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
           out:
  
             /* final misplaced underbar check */
-           if (s[-1] == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
-           }
+           if (s[-1] == '_')
+               WARN_ABOUT_UNDERSCORE();
  
              if (UNLIKELY(HEXFP_PEEK(s))) {
                  /* Do sloppy (on the underbars) but quick detection
@@ -10994,8 +11180,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             */
             if (*s == '_') {
                 if (lastub && s == lastub + 1)
-                   Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                  "Misplaced _ in number");
+                   WARN_ABOUT_UNDERSCORE();
                 lastub = s++;
             }
             else {
@@ -11008,9 +11193,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
         }
  
         /* final misplaced underbar check */
-       if (lastub && s == lastub + 1) {
-           Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
-       }
+       if (lastub && s == lastub + 1)
+           WARN_ABOUT_UNDERSCORE();
  
         /* read a decimal portion if there is one.  avoid
            3..5 being interpreted as the number 3. followed
@@ -11021,8 +11205,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             *d++ = *s++;
  
             if (*s == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                              "Misplaced _ in number");
+               WARN_ABOUT_UNDERSCORE();
                 lastub = s;
             }
  
@@ -11038,18 +11221,15 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                     Perl_croak(aTHX_ "%s", number_too_long);
                 if (*s == '_') {
                    if (lastub && s == lastub + 1)
-                      Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                     "Misplaced _ in number");
+                       WARN_ABOUT_UNDERSCORE();
                    lastub = s;
                 }
                 else
                     *d++ = *s;
             }
             /* fractional part ending in underbar? */
-           if (s[-1] == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                              "Misplaced _ in number");
-           }
+           if (s[-1] == '_')
+               WARN_ABOUT_UNDERSCORE();
             if (*s == '.' && isDIGIT(s[1])) {
                 /* oops, it's really a v-string, but without the "v" */
                 s = start;
@@ -11062,9 +11242,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
              && strchr("+-0123456789_", s[1]))
          {
-            floatit = TRUE;
+            int exp_digits = 0;
+            const char *save_s = s;
+            char * save_d = d;
  
-           /* regardless of whether user said 3E5 or 3e5, use lower 'e',
+            /* regardless of whether user said 3E5 or 3e5, use lower 'e',
                 ditto for p (hexfloats) */
              if ((isALPHA_FOLD_EQ(*s, 'e'))) {
                 /* At least some Mach atof()s don't grok 'E' */
@@ -11079,8 +11261,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
             /* stray preinitial _ */
             if (*s == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                              "Misplaced _ in number");
+               WARN_ABOUT_UNDERSCORE();
                 lastub = s++;
             }
  
@@ -11090,14 +11271,14 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
             /* stray initial _ */
             if (*s == '_') {
-               Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                              "Misplaced _ in number");
+               WARN_ABOUT_UNDERSCORE();
                 lastub = s++;
             }
  
             /* read digits of exponent */
             while (isDIGIT(*s) || *s == '_') {
                 if (isDIGIT(*s)) {
+                    ++exp_digits;
                     if (d >= e)
                         Perl_croak(aTHX_ "%s", number_too_long);
                     *d++ = *s++;
@@ -11105,11 +11286,24 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                 else {
                    if (((lastub && s == lastub + 1)
                          || (!isDIGIT(s[1]) && s[1] != '_')))
-                      Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                     "Misplaced _ in number");
+                       WARN_ABOUT_UNDERSCORE();
                    lastub = s++;
                 }
             }
+
+            if (!exp_digits) {
+                /* no exponent digits, the [eEpP] could be for something else,
+                 * though in practice we don't get here for p since that's preparsed
+                 * earlier, and results in only the 0xX being consumed, so behave similarly
+                 * for decimal floats and consume only the D.DD, leaving the [eE] to the
+                 * next token.
+                 */
+                s = save_s;
+                d = save_d;
+            }
+            else {
+                floatit = TRUE;
+            }
         }
  
  
@@ -11191,8 +11385,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  STATIC char *
  S_scan_formline(pTHX_ char *s)
  {
-    char *eol;
-    char *t;
      SV * const stuff = newSVpvs("");
      bool needargs = FALSE;
      bool eofmt = FALSE;
@@ -11200,8 +11392,9 @@ S_scan_formline(pTHX_ char *s)
      PERL_ARGS_ASSERT_SCAN_FORMLINE;
  
      while (!needargs) {
+        char *eol;
         if (*s == '.') {
-           t = s+1;
+            char *t = s+1;
  #ifdef PERL_STRICT_CR
             while (SPACE_OR_TAB(*t))
                 t++;
@@ -11218,6 +11411,7 @@ S_scan_formline(pTHX_ char *s)
         if (!eol++)
                 eol = PL_bufend;
         if (*s != '#') {
+            char *t;
             for (t = s; t < eol; t++) {
                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
                     needargs = FALSE;
@@ -11319,6 +11513,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
      return 0;
  }
  
+void
+Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
+{
+    PERL_ARGS_ASSERT_ABORT_EXECUTION;
+
+    if (PL_minus_c)
+        Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
+    else {
+        Perl_croak(aTHX_
+                "%sExecution of %s aborted due to compilation errors.\n", msg, name);
+    }
+    NOT_REACHED; /* NOTREACHED */
+}
+
+void
+Perl_yyquit(pTHX)
+{
+    /* Called, after at least one error has been found, to abort the parse now,
+     * instead of trying to forge ahead */
+
+    yyerror_pvn(NULL, 0, 0);
+}
+
  int
  Perl_yyerror(pTHX_ const char *const s)
  {
@@ -11342,100 +11559,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
      int yychar  = PL_parser->yychar;
  
-    PERL_ARGS_ASSERT_YYERROR_PVN;
-
-    if (!yychar || (yychar == ';' && !PL_rsfp))
-       sv_catpvs(where_sv, "at EOF");
-    else if (   PL_oldoldbufptr
-             && PL_bufptr > PL_oldoldbufptr
-             && PL_bufptr - PL_oldoldbufptr < 200
-             && PL_oldoldbufptr != PL_oldbufptr
-             && PL_oldbufptr != PL_bufptr)
-    {
-       /*
-               Only for NetWare:
-               The code below is removed for NetWare because it abends/crashes on NetWare
-               when the script has error such as not having the closing quotes like:
-                   if ($var eq "value)
-               Checking of white spaces is anyway done in NetWare code.
-       */
+    /* Output error message 's' with length 'len'.  'flags' are SV flags that
+     * apply.  If the number of errors found is large enough, it abandons
+     * parsing.  If 's' is NULL, there is no message, and it abandons
+     * processing unconditionally */
+
+    if (s != NULL) {
+        if (!yychar || (yychar == ';' && !PL_rsfp))
+            sv_catpvs(where_sv, "at EOF");
+        else if (   PL_oldoldbufptr
+                 && PL_bufptr > PL_oldoldbufptr
+                 && PL_bufptr - PL_oldoldbufptr < 200
+                 && PL_oldoldbufptr != PL_oldbufptr
+                 && PL_oldbufptr != PL_bufptr)
+        {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
  #ifndef NETWARE
-       while (isSPACE(*PL_oldoldbufptr))
-           PL_oldoldbufptr++;
+            while (isSPACE(*PL_oldoldbufptr))
+                PL_oldoldbufptr++;
  #endif
-       context = PL_oldoldbufptr;
-       contlen = PL_bufptr - PL_oldoldbufptr;
-    }
-    else if (  PL_oldbufptr
-            && PL_bufptr > PL_oldbufptr
-            && PL_bufptr - PL_oldbufptr < 200
-            && PL_oldbufptr != PL_bufptr) {
-       /*
-               Only for NetWare:
-               The code below is removed for NetWare because it abends/crashes on NetWare
-               when the script has error such as not having the closing quotes like:
-                   if ($var eq "value)
-               Checking of white spaces is anyway done in NetWare code.
-       */
+            context = PL_oldoldbufptr;
+            contlen = PL_bufptr - PL_oldoldbufptr;
+        }
+        else if (  PL_oldbufptr
+                && PL_bufptr > PL_oldbufptr
+                && PL_bufptr - PL_oldbufptr < 200
+                && PL_oldbufptr != PL_bufptr) {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
  #ifndef NETWARE
-       while (isSPACE(*PL_oldbufptr))
-           PL_oldbufptr++;
+            while (isSPACE(*PL_oldbufptr))
+                PL_oldbufptr++;
  #endif
-       context = PL_oldbufptr;
-       contlen = PL_bufptr - PL_oldbufptr;
-    }
-    else if (yychar > 255)
-       sv_catpvs(where_sv, "next token ???");
-    else if (yychar == YYEMPTY) {
-       if (PL_lex_state == LEX_NORMAL)
-           sv_catpvs(where_sv, "at end of line");
-       else if (PL_lex_inpat)
-           sv_catpvs(where_sv, "within pattern");
-       else
-           sv_catpvs(where_sv, "within string");
-    }
-    else {
-       sv_catpvs(where_sv, "next char ");
-       if (yychar < 32)
-           Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
-       else if (isPRINT_LC(yychar)) {
-           const char string = yychar;
-           sv_catpvn(where_sv, &string, 1);
-       }
-       else
-           Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
-    }
-    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
-    Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
-        OutCopFILE(PL_curcop),
-        (IV)(PL_parser->preambling == NOLINE
-               ? CopLINE(PL_curcop)
-               : PL_parser->preambling));
-    if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
-                            UTF8fARG(UTF, contlen, context));
-    else
-       Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
-    if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
-        Perl_sv_catpvf(aTHX_ msg,
-        "  (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
-                (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
-        PL_multi_end = 0;
-    }
-    if (PL_in_eval & EVAL_WARNONLY) {
-       PL_in_eval &= ~EVAL_WARNONLY;
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
+            context = PL_oldbufptr;
+            contlen = PL_bufptr - PL_oldbufptr;
+        }
+        else if (yychar > 255)
+            sv_catpvs(where_sv, "next token ???");
+        else if (yychar == YYEMPTY) {
+            if (PL_lex_state == LEX_NORMAL)
+                sv_catpvs(where_sv, "at end of line");
+            else if (PL_lex_inpat)
+                sv_catpvs(where_sv, "within pattern");
+            else
+                sv_catpvs(where_sv, "within string");
+        }
+        else {
+            sv_catpvs(where_sv, "next char ");
+            if (yychar < 32)
+                Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
+            else if (isPRINT_LC(yychar)) {
+                const char string = yychar;
+                sv_catpvn(where_sv, &string, 1);
+            }
+            else
+                Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
+        }
+        msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
+        Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
+            OutCopFILE(PL_curcop),
+            (IV)(PL_parser->preambling == NOLINE
+                   ? CopLINE(PL_curcop)
+                   : PL_parser->preambling));
+        if (context)
+            Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
+                                 UTF8fARG(UTF, contlen, context));
+        else
+            Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
+        if (   PL_multi_start < PL_multi_end
+            && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
+        {
+            Perl_sv_catpvf(aTHX_ msg,
+            "  (Might be a runaway multi-line %c%c string starting on"
+            " line %" IVdf ")\n",
+                    (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
+            PL_multi_end = 0;
+        }
+        if (PL_in_eval & EVAL_WARNONLY) {
+            PL_in_eval &= ~EVAL_WARNONLY;
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
+        }
+        else {
+            qerror(msg);
+        }
      }
-    else
-       qerror(msg);
-    if (PL_error_count >= 10) {
-       SV * errsv;
-       if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
-           Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
-                      SVfARG(errsv), OutCopFILE(PL_curcop));
-       else
-           Perl_croak(aTHX_ "%s has too many errors.\n",
-            OutCopFILE(PL_curcop));
+    if (s == NULL || PL_error_count >= 10) {
+        const char * msg = "";
+        const char * const name = OutCopFILE(PL_curcop);
+
+       if (PL_in_eval) {
+            SV * errsv = ERRSV;
+            if (SvCUR(errsv)) {
+                msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
+            }
+        }
+
+        if (s == NULL) {
+            abort_execution(msg, name);
+        }
+        else {
+            Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
+        }
      }
      PL_in_my = 0;
      PL_in_my_stash = NULL;
@@ -11457,7 +11694,9 @@ S_swallow_bom(pTHX_ U8 *s)
                 /* diag_listed_as: Unsupported script encoding %s */
                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
  #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
+#endif
             s += 2;
             if (PL_bufend > (char*)s) {
                 s = add_utf16_textfilter(s, TRUE);
@@ -11471,7 +11710,9 @@ S_swallow_bom(pTHX_ U8 *s)
      case 0xFE:
         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
  #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
+#endif
             s += 2;
             if (PL_bufend > (char *)s) {
                 s = add_utf16_textfilter(s, FALSE);
@@ -11485,7 +11726,9 @@ S_swallow_bom(pTHX_ U8 *s)
      case BOM_UTF8_FIRST_BYTE: {
          const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
          if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+#ifdef DEBUGGING
              if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+#endif
              s += len + 1;                      /* UTF-8 */
          }
          break;
@@ -11504,7 +11747,9 @@ S_swallow_bom(pTHX_ U8 *s)
                    * 00 xx 00 xx
                    * are a good indicator of UTF-16BE. */
  #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+#endif
                   s = add_utf16_textfilter(s, FALSE);
  #else
                   /* diag_listed_as: Unsupported script encoding %s */
@@ -11520,7 +11765,9 @@ S_swallow_bom(pTHX_ U8 *s)
                    * xx 00 xx 00
                    * are a good indicator of UTF-16LE. */
  #ifndef PERL_NO_UTF16_FILTER
+#ifdef DEBUGGING
               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+#endif
               s = add_utf16_textfilter(s, TRUE);
  #else
               /* diag_listed_as: Unsupported script encoding %s */