Note that various items in perlguts are documented

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index da3ecdb..26de580 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -25,7 +25,7 @@
  =head1 Lexer interface
  This is the lower layer of the Perl parser, managing characters and tokens.
  
-=for apidoc AmU|yy_parser *|PL_parser
+=for apidoc AmnU|yy_parser *|PL_parser
  
  Pointer to a structure encapsulating the state of the parsing operation
  currently in progress.  The pointer can be locally changed to perform
@@ -39,9 +39,10 @@ Individual members of C<PL_parser> have their own documentation.
  #define PERL_IN_TOKE_C
  #include "perl.h"
  #include "dquote_inline.h"
+#include "invlist_inline.h"
  
-#define new_constant(a,b,c,d,e,f,g)    \
-       S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+       S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  
  #define pl_yylval      (PL_parser->yylval)
  
@@ -310,6 +311,7 @@ static struct debug_tokens {
      { ANDAND,          TOKENTYPE_NONE,         "ANDAND" },
      { ANDOP,           TOKENTYPE_NONE,         "ANDOP" },
      { ANONSUB,         TOKENTYPE_IVAL,         "ANONSUB" },
+    { ANON_SIGSUB,     TOKENTYPE_IVAL,         "ANON_SIGSUB" },
      { ARROW,           TOKENTYPE_NONE,         "ARROW" },
      { ASSIGNOP,                TOKENTYPE_OPNUM,        "ASSIGNOP" },
      { BITANDOP,                TOKENTYPE_OPNUM,        "BITANDOP" },
@@ -338,7 +340,7 @@ static struct debug_tokens {
      { GIVEN,           TOKENTYPE_IVAL,         "GIVEN" },
      { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
      { IF,              TOKENTYPE_IVAL,         "IF" },
-    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
+    { LABEL,           TOKENTYPE_OPVAL,        "LABEL" },
      { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
      { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
      { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
@@ -367,7 +369,10 @@ static struct debug_tokens {
      { RELOP,           TOKENTYPE_OPNUM,        "RELOP" },
      { REQUIRE,         TOKENTYPE_NONE,         "REQUIRE" },
      { SHIFTOP,         TOKENTYPE_OPNUM,        "SHIFTOP" },
+    { SIGSUB,          TOKENTYPE_NONE,         "SIGSUB" },
      { SUB,             TOKENTYPE_NONE,         "SUB" },
+    { SUBLEXEND,       TOKENTYPE_NONE,         "SUBLEXEND" },
+    { SUBLEXSTART,     TOKENTYPE_NONE,         "SUBLEXSTART" },
      { THING,           TOKENTYPE_OPVAL,        "THING" },
      { UMINUS,          TOKENTYPE_NONE,         "UMINUS" },
      { UNIOP,           TOKENTYPE_OPNUM,        "UNIOP" },
@@ -456,9 +461,9 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s)
  
      PERL_ARGS_ASSERT_PRINTBUF;
  
-    GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
+    GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
      PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
-    GCC_DIAG_RESTORE;
+    GCC_DIAG_RESTORE_STMT;
      SvREFCNT_dec(tmp);
  }
  
@@ -663,7 +668,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
  #endif
  
  /*
-=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
+=for apidoc lex_start
  
  Creates and initialises a new lexer/parser state object, supplying
  a context in which to lex and parse from a new source of Perl code.
@@ -726,7 +731,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      parser->lex_state = LEX_NORMAL;
      parser->expect = XSTATE;
      parser->rsfp = rsfp;
-    parser->recheck_utf8_validity = FALSE;
+    parser->recheck_utf8_validity = TRUE;
      parser->rsfp_filters =
        !(flags & LEX_START_SAME_FILTER) || !oparser
          ? NULL
@@ -828,7 +833,7 @@ Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
  
  
  /*
-=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
  
  Buffer scalar containing the chunk currently under consideration of the
  text currently being lexed.  This is always a plain string scalar (for
@@ -855,7 +860,7 @@ lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
  of these pointers is usually preferable to examination of the scalar
  through normal scalar means.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
  
  Direct pointer to the end of the chunk of text currently being lexed, the
  end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
@@ -863,7 +868,7 @@ end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
  always located at the end of the buffer, and does not count as part of
  the buffer's contents.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
  
  Points to the current position of lexing inside the lexer buffer.
  Characters around this point may be freely examined, within
@@ -881,7 +886,7 @@ Interpretation of the buffer's octets can be abstracted out by
  using the slightly higher-level functions L</lex_peek_unichar> and
  L</lex_read_unichar>.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
  
  Points to the start of the current line inside the lexer buffer.
  This is useful for indicating at which column an error occurred, and
@@ -892,7 +897,7 @@ a newline; the function L</lex_read_to> handles this detail.
  */
  
  /*
-=for apidoc Amx|bool|lex_bufutf8
+=for apidoc lex_bufutf8
  
  Indicates whether the octets in the lexer buffer
  (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
@@ -923,7 +928,7 @@ Perl_lex_bufutf8(pTHX)
  }
  
  /*
-=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+=for apidoc lex_grow_linestr
  
  Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
  at least C<len> octets (including terminating C<NUL>).  Returns a
@@ -986,7 +991,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
+=for apidoc lex_stuff_pvn
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1019,13 +1024,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
         if (flags & LEX_STUFF_UTF8) {
             goto plain_copy;
         } else {
-           STRLEN highhalf = 0;    /* Count of variants */
-           const char *p, *e = pv+len;
-           for (p = pv; p != e; p++) {
-               if (! UTF8_IS_INVARIANT(*p)) {
-                    highhalf++;
-                }
-            }
+           STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+                                                       (U8 *) pv + len);
+            const char *p, *e = pv+len;;
             if (!highhalf)
                 goto plain_copy;
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
@@ -1035,13 +1036,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 SvCUR(PL_parser->linestr) + len+highhalf);
             PL_parser->bufend += len+highhalf;
             for (p = pv; p != e; p++) {
-               U8 c = (U8)*p;
-               if (! UTF8_IS_INVARIANT(c)) {
-                   *bufptr++ = UTF8_TWO_BYTE_HI(c);
-                   *bufptr++ = UTF8_TWO_BYTE_LO(c);
-               } else {
-                   *bufptr++ = (char)c;
-               }
+                append_utf8_from_native_byte(*p, (U8 **) &bufptr);
             }
         }
      } else {
@@ -1091,7 +1086,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
+=for apidoc lex_stuff_pv
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1120,7 +1115,7 @@ Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+=for apidoc lex_stuff_sv
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1152,7 +1147,7 @@ Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_unstuff|char *ptr
+=for apidoc lex_unstuff
  
  Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
  C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
@@ -1186,7 +1181,7 @@ Perl_lex_unstuff(pTHX_ char *ptr)
  }
  
  /*
-=for apidoc Amx|void|lex_read_to|char *ptr
+=for apidoc lex_read_to
  
  Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
  to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
@@ -1217,7 +1212,7 @@ Perl_lex_read_to(pTHX_ char *ptr)
  }
  
  /*
-=for apidoc Amx|void|lex_discard_to|char *ptr
+=for apidoc lex_discard_to
  
  Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
  up to C<ptr>.  The remaining content of the buffer will be moved, and
@@ -1289,7 +1284,7 @@ Perl_notify_parser_that_changed_to_utf8(pTHX)
  }
  
  /*
-=for apidoc Amx|bool|lex_next_chunk|U32 flags
+=for apidoc lex_next_chunk
  
  Reads in the next chunk of text to be lexed, appending it to
  L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
@@ -1341,7 +1336,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
             PL_parser->last_lop = NULL;
         last_uni_pos = last_lop_pos = 0;
         *buf = 0;
-       SvCUR(linestr) = 0;
+       SvCUR_set(linestr, 0);
      } else {
         old_bufend_pos = PL_parser->bufend - buf;
         bufptr_pos = PL_parser->bufptr - buf;
@@ -1429,7 +1424,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+=for apidoc lex_peek_unichar
  
  Looks ahead one (Unicode) character in the text currently being lexed.
  Returns the codepoint (unsigned integer value) of the next character,
@@ -1498,7 +1493,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|I32|lex_read_unichar|U32 flags
+=for apidoc lex_read_unichar
  
  Reads the next (Unicode) character in the text currently being lexed.
  Returns the codepoint (unsigned integer value) of the character read,
@@ -1536,7 +1531,7 @@ Perl_lex_read_unichar(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_read_space|U32 flags
+=for apidoc lex_read_space
  
  Reads optional spaces, in Perl style, in the text currently being
  lexed.  The spaces may include ordinary whitespace characters and
@@ -1611,7 +1606,7 @@ Perl_lex_read_space(pTHX_ U32 flags)
  
  /*
  
-=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+=for apidoc validate_proto
  
  This function performs syntax checking on a prototype, C<proto>.
  If C<warn> is true, any illegal characters or mismatched brackets
@@ -1832,14 +1827,14 @@ S_incline(pTHX_ const char *s, const char *end)
                     }
                     else if (GvAV(cfgv)) {
                         AV * const av = GvAV(cfgv);
-                       const I32 start = CopLINE(PL_curcop)+1;
-                       I32 items = AvFILLp(av) - start;
+                       const line_t start = CopLINE(PL_curcop)+1;
+                       SSize_t items = AvFILLp(av) - start;
                         if (items > 0) {
                             AV * const av2 = GvAVn(gv2);
                             SV **svp = AvARRAY(av) + start;
-                           I32 l = (I32)line_num+1;
-                           while (items--)
-                               av_store(av2, l++, SvREFCNT_inc(*svp++));
+                           Size_t l = line_num+1;
+                           while (items-- && l < SSize_t_MAX && l == (line_t)l)
+                               av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
                         }
                     }
                 }
@@ -1892,8 +1887,8 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
  #define skipspace(s) skipspace_flags(s, 0)
  #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
  
-STATIC char *
-S_skipspace_flags(pTHX_ char *s, U32 flags)
+char *
+Perl_skipspace_flags(pTHX_ char *s, U32 flags)
  {
      PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
      if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
@@ -1935,7 +1930,7 @@ S_check_uni(pTHX)
      s = PL_last_uni;
      while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
         s += UTF ? UTF8SKIP(s) : 1;
-    if (memchr(s, '(', PL_bufptr - s))
+    if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
         return;
  
      Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
@@ -2014,7 +2009,7 @@ S_force_next(pTHX_ I32 type)
   * S_postderef
   *
   * This subroutine handles postfix deref syntax after the arrow has already
- * been emitted.  @* $* etc. are emitted as two separate token right here.
+ * been emitted.  @* $* etc. are emitted as two separate tokens right here.
   * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
   * only the first, leaving yylex to find the next.
   */
@@ -2069,10 +2064,10 @@ STATIC SV *
  S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
  {
      SV * const sv = newSVpvn_utf8(start, len,
-                          !IN_BYTES
-                          && UTF
-                          && !is_utf8_invariant_string((const U8*)start, len)
-                          && is_utf8_string((const U8*)start, len));
+                    ! IN_BYTES
+                  &&  UTF
+                  &&  len != 0
+                  &&  is_utf8_non_invariant_string((const U8*)start, len));
      return sv;
  }
  
@@ -2335,7 +2330,7 @@ S_tokeq(pTHX_ SV *sv)
      SvCUR_set(sv, d - SvPVX_const(sv));
    finish:
      if ( PL_hints & HINT_NEW_STRING )
-       return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+       return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
      return sv;
  }
  
@@ -2397,6 +2392,8 @@ S_sublex_start(pTHX)
      PL_parser->lex_super_state = PL_lex_state;
      PL_parser->lex_sub_inwhat = (U16)op_type;
      PL_parser->lex_sub_op = PL_lex_op;
+    PL_parser->sub_no_recover = FALSE;
+    PL_parser->sub_error_count = PL_error_count;
      PL_lex_state = LEX_INTERPPUSH;
  
      PL_expect = XTERM;
@@ -2512,7 +2509,7 @@ S_sublex_push(pTHX)
      PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
      PL_in_eval &= ~EVAL_RE_REPARSING;
  
-    return '(';
+    return SUBLEXSTART;
  }
  
  /*
@@ -2576,43 +2573,85 @@ S_sublex_done(pTHX)
      else {
         const line_t l = CopLINE(PL_curcop);
         LEAVE;
+        if (PL_parser->sub_error_count != PL_error_count) {
+            if (PL_parser->sub_no_recover) {
+                yyquit();
+                NOT_REACHED;
+            }
+        }
         if (PL_multi_close == '<')
             PL_parser->herelines += l - PL_multi_end;
         PL_bufend = SvPVX(PL_linestr);
         PL_bufend += SvCUR(PL_linestr);
         PL_expect = XOPERATOR;
-       return ')';
+       return SUBLEXEND;
      }
  }
  
  STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+    /* This justs wraps get_and_check_backslash_N_name() to output any error
+     * message it returns. */
+
+    const char * error_msg = NULL;
+    SV * result;
+
+    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+    /* charnames doesn't work well if there have been errors found */
+    if (PL_error_count > 0) {
+       return NULL;
+    }
+
+    result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+    if (error_msg) {
+        yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+    }
+
+    return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+                                          const char* const e,
+                                          const bool is_utf8,
+                                          const char ** error_msg)
  {
      /* <s> points to first character of interior of \N{}, <e> to one beyond the
       * interior, hence to the "}".  Finds what the name resolves to, returning
-     * an SV* containing it; NULL if no valid one found */
-
-    SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+     * an SV* containing it; NULL if no valid one found.
+     *
+     * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+     * doesn't have to be. */
  
+    SV* res;
      HV * table;
      SV **cvp;
      SV *cv;
      SV *rv;
      HV *stash;
      const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+    dVAR;
  
      PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
  
+    assert(e >= s);
+    assert(s > (char *) 3);
+
+    res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
      if (!SvCUR(res)) {
          SvREFCNT_dec_NN(res);
          /* diag_listed_as: Unknown charname '%s' */
-        yyerror("Unknown charname ''");
+        *error_msg = Perl_form(aTHX_ "Unknown charname ''");
          return NULL;
      }
  
      res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
                          /* include the <}> */
-                        e - backslash_ptr + 1);
+                        e - backslash_ptr + 1, error_msg);
      if (! SvPOK(res)) {
          SvREFCNT_dec_NN(res);
          return NULL;
@@ -2641,7 +2680,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
       * characters that begin a character name alias are alphabetic, otherwise
       * would have to create a isCHARNAME_BEGIN macro */
  
-    if (! UTF) {
+    if (! is_utf8) {
          if (! isALPHAU(*s)) {
              goto bad_charname;
          }
@@ -2672,14 +2711,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              s += 2;
          }
          else {
-            if (! PL_utf8_charname_begin) {
-                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                PL_utf8_charname_begin = _core_swash_init("utf8",
-                                                        "_Perl_Charname_Begin",
-                                                        &PL_sv_undef,
-                                                        1, 0, NULL, &flags);
-            }
-            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+            if (! _invlist_contains_cp(PL_utf8_charname_begin,
+                                       utf8_to_uvchr_buf((U8 *) s,
+                                                         (U8 *) e,
+                                                         NULL)))
+            {
                  goto bad_charname;
              }
              s += UTF8SKIP(s);
@@ -2703,14 +2739,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s += 2;
              }
              else {
-                if (! PL_utf8_charname_continue) {
-                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                    PL_utf8_charname_continue = _core_swash_init("utf8",
-                                                "_Perl_Charname_Continue",
-                                                &PL_sv_undef,
-                                                1, 0, NULL, &flags);
-                }
-                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                if (! _invlist_contains_cp(PL_utf8_charname_continue,
+                                           utf8_to_uvchr_buf((U8 *) s,
+                                                             (U8 *) e,
+                                                             NULL)))
+                {
                      goto bad_charname;
                  }
                  s += UTF8SKIP(s);
@@ -2721,18 +2754,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* diag_listed_as: charnames alias definitions may not contain
                             trailing white-space; marked by <-- HERE in %s
           */
-        yyerror_pv(
-            Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "charnames alias definitions may not contain trailing "
              "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-            ),
-        UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
      }
  
-    if (SvUTF8(res)) { /* Don't accept malformed input */
+    if (SvUTF8(res)) { /* Don't accept malformed charname value */
          const U8* first_bad_char_loc;
          STRLEN len;
          const char* const str = SvPV_const(res, len);
@@ -2745,13 +2775,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                                                0 /* 0 means don't die */ );
              /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
                                 immediately after '%s' */
-            yyerror_pv(
-              Perl_form(aTHX_
+            *error_msg = Perl_form(aTHX_
                  "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
                   (int) (e - backslash_ptr + 1), backslash_ptr,
-                 (int) ((char *) first_bad_char_loc - str), str
-              ),
-              SVf_UTF8);
+                 (int) ((char *) first_bad_char_loc - str), str);
              return NULL;
          }
      }
@@ -2764,13 +2791,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
           * that this print won't run off the end of the string */
          /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
                             in \N{%s} */
-        yyerror_pv(
-          Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-          ),
-          UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
      }
  
@@ -2778,14 +2802,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* diag_listed_as: charnames alias definitions may not contain a
                             sequence of multiple spaces; marked by <-- HERE
                             in %s */
-        yyerror_pv(
-          Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "charnames alias definitions may not contain a sequence of "
              "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-          ),
-          UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
  }
  
@@ -2890,8 +2911,8 @@ S_scan_const(pTHX_ char *start)
      bool dorange = FALSE;               /* are we in a translit range? */
      bool didrange = FALSE;              /* did we just finish a range? */
      bool in_charclass = FALSE;          /* within /[...]/ */
-    bool has_utf8 = FALSE;              /* Output constant is UTF8 */
-    bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
+    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
+    bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
                                             UTF8?  But, this can show as true
                                             when the source isn't utf8, as for
                                             example when it is entirely composed
@@ -2902,8 +2923,8 @@ S_scan_const(pTHX_ char *start)
                                             should we have to convert to
                                             UTF-8) */
      SV *res;                           /* result from charnames */
-    STRLEN offset_to_max;   /* The offset in the output to where the range
-                               high-end character is temporarily placed */
+    STRLEN offset_to_max = 0;   /* The offset in the output to where the range
+                                   high-end character is temporarily placed */
  
      /* Does something require special handling in tr/// ?  This avoids extra
       * work in a less likely case.  As such, khw didn't feel it was worth
@@ -2938,14 +2959,19 @@ S_scan_const(pTHX_ char *start)
      assert(PL_lex_inwhat != OP_TRANSR);
      if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
         /* If we are doing a trans and we know we want UTF8 set expectation */
-       has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
-       this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+       d_is_utf8  = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+       s_is_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
      }
  
      /* Protect sv from errors and fatal warnings. */
      ENTER_with_name("scan_const");
      SAVEFREESV(sv);
  
+    /* A bunch of code in the loop below assumes that if s[n] exists and is not
+     * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
+     * valid */
+    assert(*send == '\0');
+
      while (s < send
             || dorange   /* Handle tr/// range at right edge of input */
      ) {
@@ -2995,7 +3021,7 @@ S_scan_const(pTHX_ char *start)
                       * occurences in the constant, except those added by a
                       * backslash escape sequence, like \x{100}.  Mostly, those
                       * set 'has_above_latin1' as appropriate */
-                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
  
@@ -3020,7 +3046,7 @@ S_scan_const(pTHX_ char *start)
                       * time through the loop */
                      offset_to_max = d - SvPVX_const(sv);
  
-                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
  
@@ -3050,7 +3076,7 @@ S_scan_const(pTHX_ char *start)
                  IV real_range_max = 0;
  #endif
                  /* Get the code point values of the range ends. */
-                if (has_utf8) {
+                if (d_is_utf8) {
                      /* We know the utf8 is valid, because we just constructed
                       * it ourselves in previous loop iterations */
                      min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
@@ -3081,7 +3107,7 @@ S_scan_const(pTHX_ char *start)
                   * get it out of the way now.) */
                  if (UNLIKELY(range_max == range_min)) {
                      d = max_ptr;
-                    if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+                    if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
                          utf8_variant_count--;
                      }
                      goto range_done;
@@ -3155,7 +3181,7 @@ S_scan_const(pTHX_ char *start)
  
                  /* Here the range contains at least 3 code points */
  
-               if (has_utf8) {
+               if (d_is_utf8) {
  
                      /* If everything in the transliteration is below 256, we
                       * can avoid special handling later.  A translation table
@@ -3170,11 +3196,21 @@ S_scan_const(pTHX_ char *start)
                         && (range_min > 255 || ! convert_unicode)
  #endif
                      ) {
+                        const STRLEN off = d - SvPVX(sv);
+                        const STRLEN extra = 1 + (send - s) + 1;
+                        char *e;
+
                          /* Move the high character one byte to the right; then
                           * insert between it and the range begin, an illegal
                           * byte which serves to indicate this is a range (using
                           * a '-' would be ambiguous). */
-                        char *e = d++;
+
+                        if (off + extra > SvLEN(sv)) {
+                            d = off + SvGROW(sv, off + extra);
+                            max_ptr = d - off + offset_to_max;
+                        }
+
+                        e = d++;
                          while (e-- > max_ptr) {
                              *(e + 1) = *e;
                          }
@@ -3241,7 +3277,7 @@ S_scan_const(pTHX_ char *start)
                   * */
                  grow = (range_max - 1) - (range_min + 1) + 1;
  
-                if (has_utf8) {
+                if (d_is_utf8) {
  #ifdef EBCDIC
                      /* In some cases in EBCDIC, we haven't yet calculated a
                       * precise amount needed for the UTF-8 variants.  Just
@@ -3278,7 +3314,7 @@ S_scan_const(pTHX_ char *start)
                      /* Recall that the min and max are now in Unicode terms, so
                       * we have to convert each character to its native
                       * equivalent */
-                    if (has_utf8) {
+                    if (d_is_utf8) {
                          for (i = range_min; i <= range_max; i++) {
                              append_utf8_from_native_byte(
                                                      LATIN1_TO_NATIVE((U8) i),
@@ -3298,7 +3334,7 @@ S_scan_const(pTHX_ char *start)
                      /* Here, no conversions are necessary, which means that the
                       * first character in the range is already in 'd' and
                       * valid, so we can skip overwriting it */
-                    if (has_utf8) {
+                    if (d_is_utf8) {
                          SSize_t i;
                          d += UTF8SKIP(d);
                          for (i = range_min + 1; i <= range_max; i++) {
@@ -3375,8 +3411,19 @@ S_scan_const(pTHX_ char *start)
               * friends */
         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
-               while (s+1 < send && *s != ')')
-                   *d++ = *s++;
+                if (s_is_utf8) {
+                    PERL_UINT_FAST8_T  len = UTF8SKIP(s);
+
+                    while (s + len < send && *s != ')') {
+                        Copy(s, d, len, U8);
+                        d += len;
+                        s += len;
+                        len = UTF8_SAFE_SKIP(s, send);
+                    }
+                }
+                else while (s+1 < send && *s != ')') {
+                    *d++ = *s++;
+                }
             }
             else if (!PL_lex_casemods
                       && (    s[2] == '{' /* This should match regcomp.c */
@@ -3515,7 +3562,7 @@ S_scan_const(pTHX_ char *start)
                 {
                     const char* error;
  
-                   bool valid = grok_bslash_o(&s, PL_bufend,
+                   bool valid = grok_bslash_o(&s, send,
                                                 &uv, &error,
                                                 TRUE, /* Output warning */
                                                 FALSE, /* Not strict */
@@ -3534,7 +3581,7 @@ S_scan_const(pTHX_ char *start)
                 {
                     const char* error;
  
-                   bool valid = grok_bslash_x(&s, PL_bufend,
+                   bool valid = grok_bslash_x(&s, send,
                                                 &uv, &error,
                                                 TRUE, /* Output warning */
                                                 FALSE, /* Not strict */
@@ -3555,7 +3602,7 @@ S_scan_const(pTHX_ char *start)
                     *d++ = (char) uv;
                 }
                 else {
-                   if (!has_utf8 && uv > 255) {
+                   if (!d_is_utf8 && uv > 255) {
  
                          /* Here, 'uv' won't fit unless we convert to UTF-8.
                           * If we've only seen invariants so far, all we have to
@@ -3583,10 +3630,10 @@ S_scan_const(pTHX_ char *start)
                          }
  
                          has_above_latin1 = TRUE;
-                        has_utf8 = TRUE;
+                        d_is_utf8 = TRUE;
                      }
  
-                    if (! has_utf8) {
+                    if (! d_is_utf8) {
                         *d++ = (char)uv;
                          utf8_variant_count++;
                      }
@@ -3728,7 +3775,7 @@ S_scan_const(pTHX_ char *start)
                            * tr/// doesn't care about Unicode rules, so no need
                            * there to upgrade to UTF-8 for small enough code
                            * points */
-                       if (! has_utf8 && (   uv > 0xFF
+                       if (! d_is_utf8 && (   uv > 0xFF
                                             || PL_lex_inwhat != OP_TRANS))
                          {
                             /* See Note on sizing above.  */
@@ -3750,12 +3797,12 @@ S_scan_const(pTHX_ char *start)
                                  d = SvPVX(sv) + SvCUR(sv);
                              }
  
-                           has_utf8 = TRUE;
+                           d_is_utf8 = TRUE;
                              has_above_latin1 = TRUE;
                         }
  
                          /* Add the (Unicode) code point to the output. */
-                       if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
+                       if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
                             *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
                         else {
@@ -3764,15 +3811,20 @@ S_scan_const(pTHX_ char *start)
                     }
                 }
                 else /* Here is \N{NAME} but not \N{U+...}. */
-                     if ((res = get_and_check_backslash_N_name(s, e)))
-                {
+                     if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
+                {   /* Failed.  We should die eventually, but for now use a NUL
+                       to keep parsing */
+                    *d++ = '\0';
+                }
+                else {  /* Successfully evaluated the name */
                      STRLEN len;
                      const char *str = SvPV_const(res, len);
                      if (PL_lex_inpat) {
  
                         if (! len) { /* The name resolved to an empty string */
-                           Copy("\\N{}", d, 4, char);
-                           d += 4;
+                            const char empty_N[] = "\\N{_}";
+                            Copy(empty_N, d, sizeof(empty_N) - 1, char);
+                            d += sizeof(empty_N) - 1;
                         }
                         else {
                             /* In order to not lose information for the regex
@@ -3914,7 +3966,7 @@ S_scan_const(pTHX_ char *start)
  
                           /* Upgrade destination to be utf8 if this new
                            * component is */
-                       if (! has_utf8 && SvUTF8(res)) {
+                       if (! d_is_utf8 && SvUTF8(res)) {
                             /* See Note on sizing above.  */
                              const STRLEN extra = len + (send - s) + 1;
  
@@ -3932,7 +3984,7 @@ S_scan_const(pTHX_ char *start)
                                                 extra);
                                  d = SvPVX(sv) + SvCUR(sv);
                              }
-                           has_utf8 = TRUE;
+                           d_is_utf8 = TRUE;
                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
  
                             /* See Note on sizing above.  (NOTE: SvCUR() is not
@@ -4008,14 +4060,14 @@ S_scan_const(pTHX_ char *start)
          if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
             *d++ = *s++;
          }
-        else if (! this_utf8 && ! has_utf8) {
+        else if (! s_is_utf8 && ! d_is_utf8) {
              /* If neither source nor output is UTF-8, is also a single byte,
               * just copy it; but this byte counts should we later have to
               * convert to UTF-8 */
             *d++ = *s++;
              utf8_variant_count++;
          }
-        else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
+        else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
             const STRLEN len = UTF8SKIP(s);
  
              /* We expect the source to have already been checked for
@@ -4026,55 +4078,69 @@ S_scan_const(pTHX_ char *start)
              d += len;
              s += len;
          }
-        else { /* UTF8ness matters and doesn't match, need to convert */
-           STRLEN len = 1;
-           const UV nextuv   = (this_utf8)
-                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
-                                : (UV) ((U8) *s);
-           STRLEN need = UVCHR_SKIP(nextuv);
-
-           if (!has_utf8) {
-               SvCUR_set(sv, d - SvPVX_const(sv));
-               SvPOK_on(sv);
-               *d = '\0';
+        else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
+            STRLEN need = send - s + 1; /* See Note on sizing above. */
  
-                /* See Note on sizing above. */
-                need += (STRLEN)(send - s) + 1;
+            SvCUR_set(sv, d - SvPVX_const(sv));
+            SvPOK_on(sv);
+            *d = '\0';
  
-                if (utf8_variant_count == 0) {
-                    SvUTF8_on(sv);
-                    d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
-                }
-                else {
-                    sv_utf8_upgrade_flags_grow(sv,
-                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                               need);
-                    d = SvPVX(sv) + SvCUR(sv);
-                }
-               has_utf8 = TRUE;
-           } else if (need > len) {
-               /* encoded value larger than old, may need extra space (NOTE:
-                * SvCUR() is not set correctly here).   See Note on sizing
-                * above.  */
-                const STRLEN extra = need + (send - s) + 1;
-               const STRLEN off = d - SvPVX_const(sv);
+            if (utf8_variant_count == 0) {
+                SvUTF8_on(sv);
+                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+            }
+            else {
+                sv_utf8_upgrade_flags_grow(sv,
+                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                           need);
+                d = SvPVX(sv) + SvCUR(sv);
+            }
+            d_is_utf8 = TRUE;
+            goto default_action; /* Redo, having upgraded so both are UTF-8 */
+        }
+        else {  /* UTF8ness matters: convert this non-UTF8 source char to
+                   UTF-8 for output.  It will occupy 2 bytes, but don't include
+                   the input byte since we haven't incremented 's' yet. See
+                   Note on sizing above. */
+            const STRLEN off = d - SvPVX(sv);
+            const STRLEN extra = 2 + (send - s - 1) + 1;
+            if (off + extra > SvLEN(sv)) {
                 d = off + SvGROW(sv, off + extra);
             }
-           s += len;
-
-           d = (char*)uvchr_to_utf8((U8*)d, nextuv);
+            *d++ = UTF8_EIGHT_BIT_HI(*s);
+            *d++ = UTF8_EIGHT_BIT_LO(*s);
+            s++;
         }
      } /* while loop to process each character */
  
+    {
+        const STRLEN off = d - SvPVX(sv);
+
+        /* See if room for the terminating NUL */
+        if (UNLIKELY(off >= SvLEN(sv))) {
+
+#ifndef DEBUGGING
+
+            if (off > SvLEN(sv))
+#endif
+                Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
+                        " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
+
+            /* Whew!  Here we don't have room for the terminating NUL, but
+             * everything else so far has fit.  It's not too late to grow
+             * to fit the NUL and continue on.  But it is a bug, as the code
+             * above was supposed to have made room for this, so under
+             * DEBUGGING builds, we panic anyway.  */
+            d = off + SvGROW(sv, off + 1);
+        }
+    }
+
      /* terminate the string and set up the sv */
      *d = '\0';
      SvCUR_set(sv, d - SvPVX_const(sv));
-    if (SvCUR(sv) >= SvLEN(sv))
-       Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
-                  " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
  
      SvPOK_on(sv);
-    if (has_utf8) {
+    if (d_is_utf8) {
         SvUTF8_on(sv);
         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
             PL_parser->lex_sub_op->op_private |=
@@ -4118,7 +4184,7 @@ S_scan_const(pTHX_ char *start)
             }
  
             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
-                               type, typelen);
+                               type, typelen, NULL);
         }
          pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
      }
@@ -4164,6 +4230,7 @@ S_intuit_more(pTHX_ char *s, char *e)
         return TRUE;
      if (*s != '{' && *s != '[')
         return FALSE;
+    PL_parser->sub_no_recover = TRUE;
      if (!PL_lex_inpat)
         return TRUE;
  
@@ -4499,6 +4566,7 @@ I32
  Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
  {
      filter_t funcp;
+    I32 ret;
      SV *datasv = NULL;
      /* This API is bad. It should have been using unsigned int for maxlen.
         Not sure if we want to change the API, but if not we should sanity
@@ -4581,7 +4649,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
      /* Call function. The function is expected to      */
      /* call "FILTER_READ(idx+1, buf_sv)" first.                */
      /* Return: <0:error, =0:eof, >0:not eof            */
-    return (*funcp)(aTHX_ idx, buf_sv, correct_length);
+    ENTER;
+    save_scalar(PL_errgv);
+    ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+    LEAVE;
+    return ret;
  }
  
  STATIC char *
@@ -5087,6 +5159,14 @@ Perl_yylex(pTHX)
  
         return yylex();
      case LEX_FORMLINE:
+        if (PL_parser->sub_error_count != PL_error_count) {
+            /* There was an error parsing a formline, which tends to
+               mess up the parser.
+               Unlike interpolated sub-parsing, we can't treat any of
+               these as recoverable, so no need to check sub_no_recover.
+            */
+            yyquit();
+        }
         assert(PL_lex_formbrack);
         s = scan_formline(PL_bufptr);
         if (!PL_lex_formbrack)
@@ -5138,7 +5218,7 @@ Perl_yylex(pTHX)
                  /* read var name, including sigil, into PL_tokenbuf */
                  PL_tokenbuf[0] = sigil;
                  parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
-                    0, cBOOL(UTF), FALSE);
+                    0, cBOOL(UTF), FALSE, FALSE);
                  *dest = '\0';
                  assert(PL_tokenbuf[1]); /* we have a variable name */
              }
@@ -5913,7 +5993,7 @@ Perl_yylex(pTHX)
  
         switch (PL_expect) {
         case XOPERATOR:
-           if (!PL_in_my || PL_lex_state != LEX_NORMAL)
+           if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
                 break;
             PL_bufptr = s;      /* update in case we back off */
             if (*s == '=') {
@@ -5927,9 +6007,17 @@ Perl_yylex(pTHX)
         case XATTRTERM:
             PL_expect = XTERMBLOCK;
          grabattrs:
+            /* NB: as well as parsing normal attributes, we also end up
+             * here if there is something looking like attributes
+             * following a signature (which is illegal, but used to be
+             * legal in 5.20..5.26). If the latter, we still parse the
+             * attributes so that error messages(s) are less confusing,
+             * but ignore them (parser->sig_seen).
+             */
             s = skipspace(s);
             attrs = NULL;
              while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+                bool sig = PL_parser->sig_seen;
                 I32 tmp;
                 SV *sv;
                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
@@ -5972,23 +6060,27 @@ Perl_yylex(pTHX)
                        the CVf_BUILTIN_ATTRS define in cv.h! */
                     if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
                         sv_free(sv);
-                       CvLVALUE_on(PL_compcv);
+                       if (!sig)
+                            CvLVALUE_on(PL_compcv);
                     }
                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
                         sv_free(sv);
-                       CvMETHOD_on(PL_compcv);
+                       if (!sig)
+                            CvMETHOD_on(PL_compcv);
                     }
                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
                     {
                         sv_free(sv);
-                       Perl_ck_warner_d(aTHX_
-                           packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
-                          ":const is experimental"
-                       );
-                       CvANONCONST_on(PL_compcv);
-                       if (!CvANON(PL_compcv))
-                           yyerror(":const is not permitted on named "
-                                   "subroutines");
+                        if (!sig) {
+                            Perl_ck_warner_d(aTHX_
+                                packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+                               ":const is experimental"
+                            );
+                            CvANONCONST_on(PL_compcv);
+                            if (!CvANON(PL_compcv))
+                                yyerror(":const is not permitted on named "
+                                        "subroutines");
+                        }
                     }
                     /* After we've set the flags, it could be argued that
                        we don't need to do the attributes.pm-based setting
@@ -6041,6 +6133,14 @@ Perl_yylex(pTHX)
                 }
             }
         got_attrs:
+            if (PL_parser->sig_seen) {
+                /* see comment about about sig_seen and parser error
+                 * handling */
+                if (attrs)
+                    op_free(attrs);
+                Perl_croak(aTHX_ "Subroutine attributes must come "
+                                 "before the signature");
+                }
             if (attrs) {
                 NEXTVAL_NEXTTOKE.opval = attrs;
                 force_next(THING);
@@ -6486,6 +6586,7 @@ Perl_yylex(pTHX)
                 SAVEI32(PL_lex_formbrack);
                 PL_parser->form_lex_state = PL_lex_state;
                 PL_lex_formbrack = PL_lex_brackets + 1;
+                PL_parser->sub_error_count = PL_error_count;
                 goto leftbracket;
             }
         }
@@ -6739,7 +6840,7 @@ Perl_yylex(pTHX)
             }
  
             PL_expect = XOPERATOR;
-           if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
+           if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
                 if (!islop || PL_last_lop_op == OP_GREPSTART)
                     PL_expect = XOPERATOR;
@@ -6812,7 +6913,7 @@ Perl_yylex(pTHX)
         if (!PL_tokenbuf[1]) {
             PREREF('@');
         }
-       if (PL_lex_state == LEX_NORMAL)
+       if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
             s = skipspace(s);
         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
              && intuit_more(s, PL_bufend))
@@ -6887,7 +6988,7 @@ Perl_yylex(pTHX)
         }
         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
             s += 3;
-           TERM(YADAYADA);
+           OPERATOR(YADAYADA);
         }
         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
             char tmp = *s++;
@@ -7133,9 +7234,9 @@ Perl_yylex(pTHX)
         if (!anydelim && PL_expect == XSTATE
               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
             s = d + 1;
-           pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
-           pl_yylval.pval[len] = '\0';
-           pl_yylval.pval[len+1] = UTF ? 1 : 0;
+            pl_yylval.opval =
+                newSVOP(OP_CONST, 0,
+                    newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
             CLINE;
             TOKEN(LABEL);
         }
@@ -7216,10 +7317,7 @@ Perl_yylex(pTHX)
             else {                      /* no override */
                 tmp = -tmp;
                 if (tmp == KEY_dump) {
-                   Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
-                                    "dump() better written as CORE::dump(). "
-                                     "dump() will no longer be available "
-                                     "in Perl 5.30");
+                   Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
                 }
                 gv = NULL;
                 gvp = 0;
@@ -7268,7 +7366,20 @@ Perl_yylex(pTHX)
                 int pkgname = 0;
                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
                 bool safebw;
+               bool no_op_error = FALSE;
  
+               if (PL_expect == XOPERATOR) {
+                   if (PL_bufptr == PL_linestart) {
+                       CopLINE_dec(PL_curcop);
+                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
+                       CopLINE_inc(PL_curcop);
+                   }
+                   else
+                       /* We want to call no_op with s pointing after the
+                          bareword, so defer it.  But we want it to come
+                          before the Bad name croak.  */
+                       no_op_error = TRUE;
+               }
  
                 /* Get the rest if it looks like a package qualifier */
  
@@ -7276,6 +7387,10 @@ Perl_yylex(pTHX)
                     STRLEN morelen;
                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                   TRUE, &morelen);
+                   if (no_op_error) {
+                       no_op("Bareword",s);
+                       no_op_error = FALSE;
+                   }
                     if (!morelen)
                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
                                 UTF8fARG(UTF, len, PL_tokenbuf),
@@ -7284,15 +7399,8 @@ Perl_yylex(pTHX)
                     pkgname = 1;
                 }
  
-               if (PL_expect == XOPERATOR) {
-                   if (PL_bufptr == PL_linestart) {
-                       CopLINE_dec(PL_curcop);
-                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
-                       CopLINE_inc(PL_curcop);
-                   }
-                   else
+               if (no_op_error)
                         no_op("Bareword",s);
-               }
  
                 /* See if the name is "Foo::",
                    in which case Foo is a bareword
@@ -7603,10 +7711,10 @@ Perl_yylex(pTHX)
                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
                              {
                                  /* PL_warn_reserved is constant */
-                                GCC_DIAG_IGNORE(-Wformat-nonliteral);
+                                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
                                        PL_tokenbuf);
-                                GCC_DIAG_RESTORE;
+                                GCC_DIAG_RESTORE_STMT;
                              }
                         }
                     }
@@ -7661,14 +7769,6 @@ Perl_yylex(pTHX)
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
                 IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
-               {
-                   const int fd = PerlIO_fileno(PL_rsfp);
-                    if (fd >= 3) {
-                        fcntl(fd,F_SETFD, FD_CLOEXEC);
-                    }
-               }
-#endif
                 /* Mark this internal pseudo-handle as clean */
                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
@@ -8641,22 +8741,24 @@ Perl_yylex(pTHX)
           really_sub:
             {
                 char * const tmpbuf = PL_tokenbuf + 1;
-               expectation attrful;
                 bool have_name, have_proto;
                 const int key = tmp;
                  SV *format_name = NULL;
+                bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
  
                  SSize_t off = s-SvPVX(PL_linestr);
                 s = skipspace(s);
                  d = SvPVX(PL_linestr)+off;
  
+                SAVEBOOL(PL_parser->sig_seen);
+                PL_parser->sig_seen = FALSE;
+
                  if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
                      || *s == '\''
                      || (*s == ':' && s[1] == ':'))
                 {
  
-                   PL_expect = XBLOCK;
-                   attrful = XATTRBLOCK;
+                   PL_expect = XATTRBLOCK;
                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
                                   &len);
                      if (key == KEY_format)
@@ -8687,8 +8789,7 @@ Perl_yylex(pTHX)
                         Perl_croak(aTHX_
                                   "Missing name in \"%s\"", PL_bufptr);
                     }
-                   PL_expect = XTERMBLOCK;
-                   attrful = XATTRTERM;
+                   PL_expect = XATTRTERM;
                     sv_setpvs(PL_subname,"?");
                     have_name = FALSE;
                 }
@@ -8704,11 +8805,11 @@ Perl_yylex(pTHX)
                 }
  
                 /* Look for a prototype */
-               if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
+               if (*s == '(' && !is_sigsub) {
                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
-                   COPLINE_SET_FROM_MULTI_END;
                     if (!s)
                         Perl_croak(aTHX_ "Prototype not terminated");
+                   COPLINE_SET_FROM_MULTI_END;
                     (void)validate_proto(PL_subname, PL_lex_stuff,
                                          ckWARN(WARN_ILLEGALPROTO), 0);
                     have_proto = TRUE;
@@ -8718,9 +8819,9 @@ Perl_yylex(pTHX)
                 else
                     have_proto = FALSE;
  
-               if (*s == ':' && s[1] != ':')
-                   PL_expect = attrful;
-               else if ((*s != '{' && *s != '(') && key != KEY_format) {
+               if (  !(*s == ':' && s[1] != ':')
+                    && (*s != '{' && *s != '(') && key != KEY_format)
+                {
                      assert(key == KEY_sub || key == KEY_AUTOLOAD ||
                             key == KEY_DESTROY || key == KEY_BEGIN ||
                             key == KEY_UNITCHECK || key == KEY_CHECK ||
@@ -8744,10 +8845,16 @@ Perl_yylex(pTHX)
                         sv_setpvs(PL_subname, "__ANON__");
                     else
                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
-                   TOKEN(ANONSUB);
+                    if (is_sigsub)
+                        TOKEN(ANON_SIGSUB);
+                    else
+                        TOKEN(ANONSUB);
                 }
                 force_ident_maybe_lex('&');
-               TOKEN(SUB);
+                if (is_sigsub)
+                    TOKEN(SIGSUB);
+                else
+                    TOKEN(SUB);
             }
  
         case KEY_system:
@@ -8906,7 +9013,7 @@ Perl_yylex(pTHX)
  
    Looks up an identifier in the pad or in a package
  
-  is_sig indicates that this is a subroutine signature variable
+  PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
    rather than a plain pad var.
  
    Returns:
@@ -8935,6 +9042,7 @@ S_pending_ident(pTHX)
  
      DEBUG_T({ PerlIO_printf(Perl_debug_log,
            "### Pending identifier '%s'\n", PL_tokenbuf); });
+    assert(tokenbuf_len >= 2);
  
      /* if we're in a my(), we can't allow dynamics here.
         $foo'bar has already been turned into $foo::bar, so
@@ -8958,13 +9066,13 @@ S_pending_ident(pTHX)
              if (has_colon) {
                  /* "my" variable %s can't be in a package */
                  /* PL_no_myglob is constant */
-                GCC_DIAG_IGNORE(-Wformat-nonliteral);
+                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
                  yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
                              PL_in_my == KEY_my ? "my" : "state",
                              *PL_tokenbuf == '&' ? "subroutin" : "variabl",
                              PL_tokenbuf),
                              UTF ? SVf_UTF8 : 0);
-                GCC_DIAG_RESTORE;
+                GCC_DIAG_RESTORE_STMT;
              }
  
              if (PL_in_my == KEY_sigvar) {
@@ -9010,7 +9118,7 @@ S_pending_ident(pTHX)
                 HEK * const stashname = HvNAME_HEK(stash);
                 SV *  const sym = newSVhek(stashname);
                  sv_catpvs(sym, "::");
-                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
+                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
                  pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
                  pl_yylval.opval->op_private = OPpCONST_ENTERED;
                  if (pit != '&')
@@ -9038,7 +9146,7 @@ S_pending_ident(pTHX)
          && PL_lex_state != LEX_NORMAL
          && !PL_lex_brackets)
      {
-        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                                           ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
                                           SVt_PVAV);
          if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
@@ -9055,11 +9163,11 @@ S_pending_ident(pTHX)
      /* build ops for a bareword */
      pl_yylval.opval = newSVOP(OP_CONST, 0,
                                    newSVpvn_flags(PL_tokenbuf + 1,
-                                                     tokenbuf_len - 1,
+                                                      tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                                                        UTF ? SVf_UTF8 : 0 ));
      pl_yylval.opval->op_private = OPpCONST_ENTERED;
      if (pit != '&')
-       gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+        gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
                       | ( UTF ? SVf_UTF8 : 0 ),
                      ((PL_tokenbuf[0] == '$') ? SVt_PV
@@ -9134,11 +9242,15 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     Best used as sv=new_constant(..., sv, ...).
     If s, pv are NULL, calls subroutine with one argument,
     and <type> is used with error messages only.
-   <type> is assumed to be well formed UTF-8 */
+   <type> is assumed to be well formed UTF-8.
+
+   If error_msg is not NULL, *error_msg will be set to any error encountered.
+   Otherwise yyerror() will be used to output it */
  
  STATIC SV *
  S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
-              SV *sv, SV *pv, const char *type, STRLEN typelen)
+              SV *sv, SV *pv, const char *type, STRLEN typelen,
+               const char ** error_msg)
  {
      dSP;
      HV * table = GvHV(PL_hintgv);               /* ^H */
@@ -9153,13 +9265,6 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
      if (*key == 'c') { assert (strEQ(key, "charnames")); }
      assert(type || s);
  
-    /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && *key == 'c')
-    {
-       SvREFCNT_dec_NN(sv);
-       return &PL_sv_undef;
-    }
-
      sv_2mortal(sv);                    /* Parent created it permanently */
      if (!table
         || ! (PL_hints & HINT_LOCALIZE_HH)
@@ -9214,7 +9319,12 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
                                      (type ? type: s), why1, why2, why3);
              }
          }
-       yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+        if (error_msg) {
+            *error_msg = msg;
+        }
+        else {
+            yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+        }
         return SvREFCNT_inc_simple_NN(sv);
      }
    now_ok:
@@ -9276,8 +9386,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
  
  PERL_STATIC_INLINE void
  S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
-                    bool is_utf8, bool check_dollar)
+                    bool is_utf8, bool check_dollar, bool tick_warn)
  {
+    int saw_tick = 0;
+    const char *olds = *s;
      PERL_ARGS_ASSERT_PARSE_IDENT;
  
      while (*s < PL_bufend) {
@@ -9311,6 +9423,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
              *(*d)++ = ':';
              *(*d)++ = ':';
              (*s)++;
+            saw_tick++;
          }
          else if (allow_package && **s == ':' && (*s)[1] == ':'
             /* Disallow things like Foo::$bar. For the curious, this is
@@ -9324,14 +9437,38 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
          else
              break;
      }
+    if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
+              && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
+        char *d;
+       char *d2;
+        Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+        d2 = d;
+        SAVEFREEPV(d);
+        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                         "Old package separator used in string");
+        if (olds[-1] == '#')
+            *d2++ = olds[-2];
+        *d2++ = olds[-1];
+        while (olds < *s) {
+            if (*olds == '\'') {
+                *d2++ = '\\';
+                *d2++ = *olds++;
+            }
+           else
+                *d2++ = *olds++;
+        }
+        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+                          UTF8fARG(is_utf8, d2-d, d));
+    }
      return;
  }
  
  /* Returns a NUL terminated string, with the length of the string written to
     *slp
     */
-STATIC char *
-S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+char *
+Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
  {
      char *d = dest;
      char * const e = d + destlen - 3;  /* two-character token, ending NUL */
@@ -9339,7 +9476,7 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  
      PERL_ARGS_ASSERT_SCAN_WORD;
  
-    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
+    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
      *d = '\0';
      *slp = d - dest;
      return s;
@@ -9387,7 +9524,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         }
      }
      else {  /* See if it is a "normal" identifier */
-        parse_ident(&s, &d, e, 1, is_utf8, FALSE);
+        parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
      }
      *d = '\0';
      d = dest;
@@ -9422,9 +9559,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
              s = skipspace(s);
          }
      }
-    if ((s <= PL_bufend - (is_utf8)
+    if ((s <= PL_bufend - ((is_utf8)
                            ? UTF8SKIP(s)
-                          : 1)
+                          : 1))
          && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
      {
          if (is_utf8) {
@@ -9465,7 +9602,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                     (the later check for } being at the expected point will trap
                     cases where this doesn't pan out.)  */
                  d += is_utf8 ? UTF8SKIP(d) : 1;
-                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
                  *d = '\0';
              }
              else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
@@ -9525,7 +9662,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                 PL_lex_state = LEX_INTERPEND;
                 PL_expect = XREF;
             }
-           if (PL_lex_state == LEX_NORMAL) {
+           if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
                 if (ckWARN(WARN_AMBIGUOUS)
                      && (keyword(dest, d - dest, 0)
                         || get_cvn_flags(dest, d - dest, is_utf8
@@ -9552,6 +9689,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
              CopLINE_set(PL_curcop, orig_copline);
              PL_parser->herelines = herelines;
             *dest = '\0';
+            PL_parser->sub_no_recover = TRUE;
         }
      }
      else if (   PL_lex_state == LEX_INTERPNORMAL
@@ -9809,7 +9947,7 @@ S_scan_subst(pTHX_ char *start)
           * the NVX field indicates how many src code lines the replacement
           * spreads over */
          sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
-        ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
+        ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
          ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
                                                                      cBOOL(es);
      }
@@ -9930,12 +10068,15 @@ S_scan_heredoc(pTHX_ char *s)
      e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
      *PL_tokenbuf = '\n';
      peek = s;
+
      if (*peek == '~') {
         indented = TRUE;
         peek++; s++;
      }
+
      while (SPACE_OR_TAB(*peek))
         peek++;
+
      if (*peek == '`' || *peek == '\'' || *peek =='"') {
         s = peek;
         term = *s++;
@@ -9951,19 +10092,25 @@ S_scan_heredoc(pTHX_ char *s)
             s++, term = '\'';
         else
             term = '"';
+
         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+
         peek = s;
+
          while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
             peek += UTF ? UTF8SKIP(peek) : 1;
         }
+
         len = (peek - s >= e - d) ? (e - d) : (peek - s);
         Copy(s, d, len, char);
         s += len;
         d += len;
      }
+
      if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
         Perl_croak(aTHX_ "Delimiter for here document is too long");
+
      *d++ = '\n';
      *d = '\0';
      len = d - PL_tokenbuf;
@@ -10006,6 +10153,7 @@ S_scan_heredoc(pTHX_ char *s)
  
      PL_multi_start = origline + 1 + PL_parser->herelines;
      PL_multi_open = PL_multi_close = '<';
+
      /* inside a string eval or quote-like operator */
      if (!infile || PL_lex_inwhat) {
         SV *linestr;
@@ -10016,43 +10164,47 @@ S_scan_heredoc(pTHX_ char *s)
            entered.  But we need them set here. */
         shared->ls_bufptr  = s;
         shared->ls_linestr = PL_linestr;
-       if (PL_lex_inwhat)
-         /* Look for a newline.  If the current buffer does not have one,
-            peek into the line buffer of the parent lexing scope, going
-            up as many levels as necessary to find one with a newline
-            after bufptr.
-          */
-         while (!(s = (char *)memchr(
-                   (void *)shared->ls_bufptr, '\n',
-                   SvEND(shared->ls_linestr)-shared->ls_bufptr
-               ))) {
-           shared = shared->ls_prev;
-           /* shared is only null if we have gone beyond the outermost
-              lexing scope.  In a file, we will have broken out of the
-              loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null.  Or so you
-              might think.  Odd syntax errors like s;@{<<; can gobble up
-              the implicit semicolon at the end of a flie, causing the
-              file handle to be closed even when we are not in a string
-              eval.  So shared may be null in that case.
-               (Closing '}' here to balance the earlier open brace for
-               editors that look for matched pairs.) */
-           if (UNLIKELY(!shared))
-               goto interminable;
-           /* A LEXSHARED struct with a null ls_prev pointer is the outer-
-              most lexing scope.  In a file, shared->ls_linestr at that
-              level is just one line, so there is no body to steal. */
-           if (infile && !shared->ls_prev) {
-               s = olds;
-               goto streaming;
-           }
-         }
+
+        if (PL_lex_inwhat) {
+            /* Look for a newline.  If the current buffer does not have one,
+             peek into the line buffer of the parent lexing scope, going
+             up as many levels as necessary to find one with a newline
+             after bufptr.
+            */
+           while (!(s = (char *)memchr(
+                                (void *)shared->ls_bufptr, '\n',
+                                SvEND(shared->ls_linestr)-shared->ls_bufptr
+               )))
+            {
+                shared = shared->ls_prev;
+                /* shared is only null if we have gone beyond the outermost
+                   lexing scope.  In a file, we will have broken out of the
+                   loop in the previous iteration.  In an eval, the string buf-
+                   fer ends with "\n;", so the while condition above will have
+                   evaluated to false.  So shared can never be null.  Or so you
+                   might think.  Odd syntax errors like s;@{<<; can gobble up
+                   the implicit semicolon at the end of a flie, causing the
+                   file handle to be closed even when we are not in a string
+                   eval.  So shared may be null in that case.
+                   (Closing '>>}' here to balance the earlier open brace for
+                   editors that look for matched pairs.) */
+                if (UNLIKELY(!shared))
+                    goto interminable;
+                /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+                   most lexing scope.  In a file, shared->ls_linestr at that
+                   level is just one line, so there is no body to steal. */
+                if (infile && !shared->ls_prev) {
+                    s = olds;
+                    goto streaming;
+                }
+            }
+        }
         else {  /* eval or we've already hit EOF */
             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
             if (!s)
                  goto interminable;
         }
+
         linestr = shared->ls_linestr;
         bufend = SvEND(linestr);
         d = s;
@@ -10072,21 +10224,22 @@ S_scan_heredoc(pTHX_ char *s)
                         if (! SPACE_OR_TAB(*backup)) {
                             break;
                         }
-
                         indent_len++;
                     }
  
                     /* No whitespace or all! */
                     if (backup == s || *backup == '\n') {
-                       Newxz(indent, indent_len + 1, char);
+                       Newx(indent, indent_len + 1, char);
                         memcpy(indent, backup + 1, indent_len);
+                       indent[indent_len] = 0;
                         s--; /* before our delimiter */
                         PL_parser->herelines--; /* this line doesn't count */
                         break;
                     }
                 }
             }
-       } else {
+       }
+        else {
             while (s < bufend - len + 1
                    && memNE(s,PL_tokenbuf,len) )
             {
@@ -10098,6 +10251,7 @@ S_scan_heredoc(pTHX_ char *s)
         if (s >= bufend - len + 1) {
             goto interminable;
         }
+
         sv_setpvn(tmpstr,d+1,s-d);
         s += len - 1;
         /* the preceding stmt passes a newline */
@@ -10120,6 +10274,7 @@ S_scan_heredoc(pTHX_ char *s)
                                 bufend - shared->re_eval_start);
             shared->re_eval_start -= s-d;
         }
+
         if (cxstack_ix >= 0
              && CxTYPE(cx) == CXt_EVAL
              && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
@@ -10128,125 +10283,139 @@ S_scan_heredoc(pTHX_ char *s)
             cx->blk_eval.cur_text = newSVsv(linestr);
             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
         }
+
         /* Copy everything from s onwards back to d. */
         Move(s,d,bufend-s + 1,char);
         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
         /* Setting PL_bufend only applies when we have not dug deeper
            into other scopes, because sublex_done sets PL_bufend to
            SvEND(PL_linestr). */
-       if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+       if (shared == PL_parser->lex_shared)
+            PL_bufend = SvEND(linestr);
         s = olds;
      }
-    else
-    {
-      SV *linestr_save;
-      char *oldbufptr_save;
-      char *oldoldbufptr_save;
-     streaming:
-      SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
-      term = PL_tokenbuf[1];
-      len--;
-      linestr_save = PL_linestr; /* must restore this afterwards */
-      d = s;                    /* and this */
-      oldbufptr_save = PL_oldbufptr;
-      oldoldbufptr_save = PL_oldoldbufptr;
-      PL_linestr = newSVpvs("");
-      PL_bufend = SvPVX(PL_linestr);
-      while (1) {
-       PL_bufptr = PL_bufend;
-       CopLINE_set(PL_curcop,
-                   origline + 1 + PL_parser->herelines);
-       if (!lex_next_chunk(LEX_NO_TERM)
-        && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-           /* Simply freeing linestr_save might seem simpler here, as it
-              does not matter what PL_linestr points to, since we are
-              about to croak; but in a quote-like op, linestr_save
-              will have been prospectively freed already, via
-              SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
-              restore PL_linestr. */
-           SvREFCNT_dec_NN(PL_linestr);
-           PL_linestr = linestr_save;
-            PL_oldbufptr = oldbufptr_save;
-            PL_oldoldbufptr = oldoldbufptr_save;
-           goto interminable;
-       }
-       CopLINE_set(PL_curcop, origline);
-       if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
-            /* ^That should be enough to avoid this needing to grow:  */
-           sv_catpvs(PL_linestr, "\n\0");
-            assert(s == SvPVX(PL_linestr));
-            PL_bufend = SvEND(PL_linestr);
-       }
-       s = PL_bufptr;
-       PL_parser->herelines++;
-       PL_last_lop = PL_last_uni = NULL;
+    else {
+        SV *linestr_save;
+        char *oldbufptr_save;
+        char *oldoldbufptr_save;
+      streaming:
+        SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
+        term = PL_tokenbuf[1];
+        len--;
+        linestr_save = PL_linestr; /* must restore this afterwards */
+        d = s;                  /* and this */
+        oldbufptr_save = PL_oldbufptr;
+        oldoldbufptr_save = PL_oldoldbufptr;
+        PL_linestr = newSVpvs("");
+        PL_bufend = SvPVX(PL_linestr);
+
+        while (1) {
+            PL_bufptr = PL_bufend;
+            CopLINE_set(PL_curcop,
+                        origline + 1 + PL_parser->herelines);
+
+            if (   !lex_next_chunk(LEX_NO_TERM)
+                && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
+            {
+                /* Simply freeing linestr_save might seem simpler here, as it
+                   does not matter what PL_linestr points to, since we are
+                   about to croak; but in a quote-like op, linestr_save
+                   will have been prospectively freed already, via
+                   SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+                   restore PL_linestr. */
+                SvREFCNT_dec_NN(PL_linestr);
+                PL_linestr = linestr_save;
+                PL_oldbufptr = oldbufptr_save;
+                PL_oldoldbufptr = oldoldbufptr_save;
+                goto interminable;
+            }
+
+            CopLINE_set(PL_curcop, origline);
+
+            if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+                s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+                /* ^That should be enough to avoid this needing to grow:  */
+                sv_catpvs(PL_linestr, "\n\0");
+                assert(s == SvPVX(PL_linestr));
+                PL_bufend = SvEND(PL_linestr);
+            }
+
+            s = PL_bufptr;
+            PL_parser->herelines++;
+            PL_last_lop = PL_last_uni = NULL;
+
  #ifndef PERL_STRICT_CR
-       if (PL_bufend - PL_linestart >= 2) {
-           if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
-                || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
-           {
-               PL_bufend[-2] = '\n';
-               PL_bufend--;
-               SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
-           }
-           else if (PL_bufend[-1] == '\r')
-               PL_bufend[-1] = '\n';
-       }
-       else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
-           PL_bufend[-1] = '\n';
+            if (PL_bufend - PL_linestart >= 2) {
+                if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+                    || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+                {
+                    PL_bufend[-2] = '\n';
+                    PL_bufend--;
+                    SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+                }
+                else if (PL_bufend[-1] == '\r')
+                    PL_bufend[-1] = '\n';
+            }
+            else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
+                PL_bufend[-1] = '\n';
  #endif
-       if (indented && (PL_bufend-s) >= len) {
-           char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
  
-           if (found) {
-               char *backup = found;
-               indent_len = 0;
+            if (indented && (PL_bufend-s) >= len) {
+                char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
  
-               /* Only valid if it's preceded by whitespace only */
-               while (backup != s && --backup >= s) {
-                   if (! SPACE_OR_TAB(*backup)) {
-                       break;
-                   }
-                   indent_len++;
-               }
+                if (found) {
+                    char *backup = found;
+                    indent_len = 0;
  
-               /* All whitespace or none! */
-               if (backup == found || SPACE_OR_TAB(*backup)) {
-                   Newxz(indent, indent_len + 1, char);
-                   memcpy(indent, backup, indent_len);
-                   SvREFCNT_dec(PL_linestr);
-                   PL_linestr = linestr_save;
-                   PL_linestart = SvPVX(linestr_save);
-                   PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-                   PL_oldbufptr = oldbufptr_save;
-                   PL_oldoldbufptr = oldoldbufptr_save;
-                   s = d;
-                   break;
-               }
-           }
+                    /* Only valid if it's preceded by whitespace only */
+                    while (backup != s && --backup >= s) {
+                        if (! SPACE_OR_TAB(*backup)) {
+                            break;
+                        }
+                        indent_len++;
+                    }
  
-           /* Didn't find it */
-           sv_catsv(tmpstr,PL_linestr);
-       } else {
-           if (*s == term && PL_bufend-s >= len
-               && memEQ(s,PL_tokenbuf + 1,len))
-           {
-               SvREFCNT_dec(PL_linestr);
-               PL_linestr = linestr_save;
-               PL_linestart = SvPVX(linestr_save);
-               PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-               PL_oldbufptr = oldbufptr_save;
-               PL_oldoldbufptr = oldoldbufptr_save;
-               s = d;
-               break;
-           } else {
-               sv_catsv(tmpstr,PL_linestr);
-           }
-       }
-      }
+                    /* All whitespace or none! */
+                    if (backup == found || SPACE_OR_TAB(*backup)) {
+                        Newx(indent, indent_len + 1, char);
+                        memcpy(indent, backup, indent_len);
+                        indent[indent_len] = 0;
+                        SvREFCNT_dec(PL_linestr);
+                        PL_linestr = linestr_save;
+                        PL_linestart = SvPVX(linestr_save);
+                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+                        PL_oldbufptr = oldbufptr_save;
+                        PL_oldoldbufptr = oldoldbufptr_save;
+                        s = d;
+                        break;
+                    }
+                }
+
+                /* Didn't find it */
+                sv_catsv(tmpstr,PL_linestr);
+            }
+            else {
+                if (*s == term && PL_bufend-s >= len
+                    && memEQ(s,PL_tokenbuf + 1,len))
+                {
+                    SvREFCNT_dec(PL_linestr);
+                    PL_linestr = linestr_save;
+                    PL_linestart = SvPVX(linestr_save);
+                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+                    PL_oldbufptr = oldbufptr_save;
+                    PL_oldoldbufptr = oldoldbufptr_save;
+                    s = d;
+                    break;
+                }
+                else {
+                    sv_catsv(tmpstr,PL_linestr);
+                }
+            }
+        } /* while (1) */
      }
+
      PL_multi_end = origline + PL_parser->herelines;
+
      if (indented && indent) {
         STRLEN linecount = 1;
         STRLEN herelen = SvCUR(tmpstr);
@@ -10259,55 +10428,63 @@ S_scan_heredoc(pTHX_ char *s)
         while (ss < se) {
             /* newline only? Copy and move on */
             if (*ss == '\n') {
-               sv_catpv(newstr,"\n");
+               sv_catpvs(newstr,"\n");
                 ss++;
                 linecount++;
  
             /* Found our indentation? Strip it */
-           } else if (se - ss >= indent_len
+           }
+            else if (se - ss >= indent_len
                        && memEQ(ss, indent, indent_len))
             {
                 STRLEN le = 0;
-
                 ss += indent_len;
  
                 while ((ss + le) < se && *(ss + le) != '\n')
                     le++;
  
                 sv_catpvn(newstr, ss, le);
-
                 ss += le;
  
             /* Line doesn't begin with our indentation? Croak */
-           } else {
+           }
+            else {
+                Safefree(indent);
                 Perl_croak(aTHX_
                     "Indentation on line %d of here-doc doesn't match delimiter",
                     (int)linecount
                 );
             }
-       }
+       } /* while */
+
          /* avoid sv_setsv() as we dont wan't to COW here */
          sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
         Safefree(indent);
         SvREFCNT_dec_NN(newstr);
      }
+
      if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
         SvPV_shrink_to_cur(tmpstr);
      }
+
      if (!IN_BYTES) {
         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
             SvUTF8_on(tmpstr);
      }
+
      PL_lex_stuff = tmpstr;
      pl_yylval.ival = op_type;
      return s;
  
    interminable:
+    if (indent)
+       Safefree(indent);
      SvREFCNT_dec(tmpstr);
      CopLINE_set(PL_curcop, origline);
      missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
  }
  
+
  /* scan_inputsymbol
     takes: position of first '<' in input buffer
     returns: position of first char following the matching '>' in
@@ -10520,8 +10697,8 @@ S_scan_inputsymbol(pTHX_ char *start)
     SvIVX of the SV.
  */
  
-STATIC char *
-S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
+char *
+Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
                  char **delimp
      )
  {
@@ -10531,9 +10708,9 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      char term;                 /* terminating character */
      char *to;                  /* current position in the sv's data */
      I32 brackets = 1;          /* bracket nesting level */
-    bool has_utf8 = FALSE;     /* is there any utf8 content? */
+    bool d_is_utf8 = FALSE;    /* is there any utf8 content? */
      IV termcode;               /* terminating char. code */
-    U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+    U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
      STRLEN termlen;            /* length of terminating string */
      line_t herelines;
  
@@ -10541,14 +10718,11 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      const char * opening_delims = "([{<";
      const char * closing_delims = ")]}>";
  
+    /* The only non-UTF character that isn't a stand alone grapheme is
+     * white-space, hence can't be a delimiter. */
      const char * non_grapheme_msg = "Use of unassigned code point or"
                                      " non-standalone grapheme for a delimiter"
-                                    " will be a fatal error starting in Perl"
-                                    " 5.30";
-    /* The only non-UTF character that isn't a stand alone grapheme is
-     * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
-    bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+                                    " is not allowed";
      PERL_ARGS_ASSERT_SCAN_STR;
  
      /* skip space before the delimiter */
@@ -10567,26 +10741,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      }
      else {
         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
-        if (check_grapheme) {
-            if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
-                || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
-            {
-                /* These are considered graphemes, and since the ending
-                 * delimiter will be the same, we don't have to check the other
-                 * end */
-                check_grapheme = FALSE;
-            }
-            else if (UNLIKELY(! _is_grapheme((U8 *) start,
-                                             (U8 *) s,
-                                             (U8 *) PL_bufend,
-                                             termcode)))
-            {
-                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
-                /* Don't have to check the other end, as have already warned at
-                 * this one */
-                check_grapheme = FALSE;
-            }
+        if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+                                           (U8 *) s,
+                                           (U8 *) PL_bufend,
+                                                  termcode)))
+        {
+            yyerror(non_grapheme_msg);
          }
  
         Copy(s, termstr, termlen, U8);
@@ -10652,20 +10812,19 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                      if (   s + termlen <= PL_bufend
                          && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
                      {
-                        if (   check_grapheme
+                        if (   UTF
                              && UNLIKELY(! _is_grapheme((U8 *) start,
-                                                              (U8 *) s,
-                                                              (U8 *) PL_bufend,
+                                                       (U8 *) s,
+                                                       (U8 *) PL_bufend,
                                                                termcode)))
                          {
-                            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                        "%s", non_grapheme_msg);
+                            yyerror(non_grapheme_msg);
                          }
                         break;
                      }
                 }
-               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
-                   has_utf8 = TRUE;
+               else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
+                   d_is_utf8 = TRUE;
                  }
  
                 *to = *s;
@@ -10698,8 +10857,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                     break;
                 else if ((UV)*s == PL_multi_open)
                     brackets++;
-               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
-                   has_utf8 = TRUE;
+               else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+                   d_is_utf8 = TRUE;
                 *to = *s;
             }
         }
@@ -10749,7 +10908,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
             sv_catpvn(sv, s, termlen);
      s += termlen;
  
-    if (has_utf8)
+    if (d_is_utf8)
         SvUTF8_on(sv);
  
      PL_multi_end = CopLINE(PL_curcop);
@@ -10868,6 +11027,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             I32 shift;
             bool overflowed = FALSE;
             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
+            bool has_digs = FALSE;
             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
             static const char* const bases[5] =
               { "", "binary", "", "octal", "hexadecimal" };
@@ -10959,7 +11119,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
                   digit:
                     just_zero = FALSE;
+                    has_digs = TRUE;
                     if (!overflowed) {
+                       assert(shift >= 0);
                         x = u << shift; /* make room for the digit */
  
                          total_bits += shift;
@@ -11040,19 +11202,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                      NV nv_mult = 1.0;
  #endif
                      bool accumulate = TRUE;
-                    for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
+                    U8 b;
+                    int lim = 1 << shift;
+                    for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
+                               *h == '_'); h++) {
                          if (isXDIGIT(*h)) {
-                            U8 b = XDIGIT_VALUE(*h);
                              significant_bits += shift;
  #ifdef HEXFP_UQUAD
                              if (accumulate) {
                                  if (significant_bits < NV_MANT_DIG) {
                                      /* We are in the long "run" of xdigits,
                                       * accumulate the full four bits. */
+                                   assert(shift >= 0);
                                      hexfp_uquad <<= shift;
                                      hexfp_uquad |= b;
                                      hexfp_frac_bits += shift;
-                                } else {
+                                } else if (significant_bits - shift < NV_MANT_DIG) {
                                      /* We are at a hexdigit either at,
                                       * or straddling, the edge of mantissa.
                                       * We will try grabbing as many as
@@ -11061,7 +11226,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                        significant_bits - NV_MANT_DIG;
                                      if (tail <= 0)
                                         tail += shift;
+                                   assert(tail >= 0);
                                      hexfp_uquad <<= tail;
+                                   assert((shift - tail) >= 0);
                                      hexfp_uquad |= b >> (shift - tail);
                                      hexfp_frac_bits += tail;
  
@@ -11100,7 +11267,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                              }
  #else /* HEXFP_NV */
                              if (accumulate) {
-                                nv_mult /= 16.0;
+                                nv_mult /= nvshift[shift];
                                  if (nv_mult > 0.0)
                                      hexfp_nv += b * nv_mult;
                                  else
@@ -11168,6 +11335,21 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                  }
              }
  
+            if (shift != 3 && !has_digs) {
+                /* 0x or 0b with no digits, treat it as an error.
+                   Originally this backed up the parse before the b or
+                   x, but that has the potential for silent changes in
+                   behaviour, like for: "0x.3" and "0x+$foo".
+                */
+                const char *d = s;
+                char *oldbp = PL_bufptr;
+                if (*d) ++d; /* so the user sees the bad non-digit */
+                PL_bufptr = (char *)d; /* so yyerror reports the context */
+                yyerror(Perl_form(aTHX_ "No digits found for %s literal",
+                                  shift == 4 ? "hexadecimal" : "binary"));
+                PL_bufptr = oldbp;
+            }
+
             if (overflowed) {
                 if (n > 4294967295.0)
                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
@@ -11186,9 +11368,10 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             }
             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
                 sv = new_constant(start, s - start, "integer",
-                                 sv, NULL, NULL, 0);
+                                 sv, NULL, NULL, 0, NULL);
             else if (PL_hints & HINT_NEW_BINARY)
-               sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+               sv = new_constant(start, s - start, "binary",
+                                  sv, NULL, NULL, 0, NULL);
         }
         break;
  
@@ -11369,7 +11552,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                floatit = TRUE;
          }
         if (floatit) {
-            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
             /* terminate the string */
             *d = '\0';
              if (UNLIKELY(hexfp)) {
@@ -11386,7 +11568,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
              } else {
                  nv = Atof(PL_tokenbuf);
              }
-            RESTORE_LC_NUMERIC_UNDERLYING();
              sv = newSVnv(nv);
         }
  
@@ -11395,7 +11576,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             const char *const key = floatit ? "float" : "integer";
             const STRLEN keylen = floatit ? 5 : 7;
             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
-                               key, keylen, sv, NULL, NULL, 0);
+                               key, keylen, sv, NULL, NULL, 0, NULL);
         }
         break;
  
@@ -11542,6 +11723,39 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
      return oldsavestack_ix;
  }
  
+
+/* Do extra initialisation of a CV (typically one just created by
+ * start_subparse()) if that CV is for a named sub
+ */
+
+void
+Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
+{
+    PERL_ARGS_ASSERT_INIT_NAMED_CV;
+
+    if (nameop->op_type == OP_CONST) {
+        const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
+        if (   strEQ(name, "BEGIN")
+            || strEQ(name, "END")
+            || strEQ(name, "INIT")
+            || strEQ(name, "CHECK")
+            || strEQ(name, "UNITCHECK")
+        )
+          CvSPECIAL_on(cv);
+    }
+    else
+    /* State subs inside anonymous subs need to be
+     clonable themselves. */
+    if (   CvANON(CvOUTSIDE(cv))
+        || CvCLONE(CvOUTSIDE(cv))
+        || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
+                        CvOUTSIDE(cv)
+                     ))[nameop->op_targ])
+    )
+      CvCLONE_on(cv);
+}
+
+
  static int
  S_yywarn(pTHX_ const char *const s, U32 flags)
  {
@@ -11906,9 +12120,14 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
             }
         }
  
+        /* 'chars' isn't quite the right name, as code points above 0xFFFF
+         * require 4 bytes per char */
         chars = SvCUR(utf16_buffer) >> 1;
         have = SvCUR(utf8_buffer);
-       SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+        /* Assume the worst case size as noted by the functions: twice the
+         * number of input bytes */
+       SvGROW(utf8_buffer, have + chars * 4 + 1);
  
         if (reverse) {
             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
@@ -12067,6 +12286,79 @@ Perl_keyword_plugin_standard(pTHX_
      return KEYWORD_PLUGIN_DECLINE;
  }
  
+/*
+=for apidoc wrap_keyword_plugin
+
+Puts a C function into the chain of keyword plugins.  This is the
+preferred way to manipulate the L</PL_keyword_plugin> variable.
+C<new_plugin> is a pointer to the C function that is to be added to the
+keyword plugin chain, and C<old_plugin_p> points to the storage location
+where a pointer to the next function in the chain will be stored.  The
+value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
+while the value previously stored there is written to C<*old_plugin_p>.
+
+L</PL_keyword_plugin> is global to an entire process, and a module wishing
+to hook keyword parsing may find itself invoked more than once per
+process, typically in different threads.  To handle that situation, this
+function is idempotent.  The location C<*old_plugin_p> must initially
+(once per process) contain a null pointer.  A C variable of static
+duration (declared at file scope, typically also marked C<static> to give
+it internal linkage) will be implicitly initialised appropriately, if it
+does not have an explicit initialiser.  This function will only actually
+modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
+function is also thread safe on the small scale.  It uses appropriate
+locking to avoid race conditions in accessing L</PL_keyword_plugin>.
+
+When this function is called, the function referenced by C<new_plugin>
+must be ready to be called, except for C<*old_plugin_p> being unfilled.
+In a threading situation, C<new_plugin> may be called immediately, even
+before this function has returned.  C<*old_plugin_p> will always be
+appropriately set before C<new_plugin> is called.  If C<new_plugin>
+decides not to do anything special with the identifier that it is given
+(which is the usual case for most calls to a keyword plugin), it must
+chain the plugin function referenced by C<*old_plugin_p>.
+
+Taken all together, XS code to install a keyword plugin should typically
+look something like this:
+
+    static Perl_keyword_plugin_t next_keyword_plugin;
+    static OP *my_keyword_plugin(pTHX_
+        char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
+    {
+        if (memEQs(keyword_ptr, keyword_len,
+                   "my_new_keyword")) {
+            ...
+        } else {
+            return next_keyword_plugin(aTHX_
+                keyword_ptr, keyword_len, op_ptr);
+        }
+    }
+    BOOT:
+        wrap_keyword_plugin(my_keyword_plugin,
+                            &next_keyword_plugin);
+
+Direct access to L</PL_keyword_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_keyword_plugin(pTHX_
+    Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
+{
+    dVAR;
+
+    PERL_UNUSED_CONTEXT;
+    PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
+    if (*old_plugin_p) return;
+    KEYWORD_PLUGIN_MUTEX_LOCK;
+    if (!*old_plugin_p) {
+        *old_plugin_p = PL_keyword_plugin;
+        PL_keyword_plugin = new_plugin;
+    }
+    KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
  #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
  static void
  S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
@@ -12114,7 +12406,7 @@ S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_arithexpr|U32 flags
+=for apidoc parse_arithexpr
  
  Parse a Perl arithmetic expression.  This may contain operators of precedence
  down to the bit shift operators.  The expression must be followed (and thus
@@ -12146,7 +12438,7 @@ Perl_parse_arithexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_termexpr|U32 flags
+=for apidoc parse_termexpr
  
  Parse a Perl term expression.  This may contain operators of precedence
  down to the assignment operators.  The expression must be followed (and thus
@@ -12178,7 +12470,7 @@ Perl_parse_termexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_listexpr|U32 flags
+=for apidoc parse_listexpr
  
  Parse a Perl list expression.  This may contain operators of precedence
  down to the comma operator.  The expression must be followed (and thus
@@ -12210,7 +12502,7 @@ Perl_parse_listexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_fullexpr|U32 flags
+=for apidoc parse_fullexpr
  
  Parse a single complete Perl expression.  This allows the full
  expression grammar, including the lowest-precedence operators such
@@ -12243,7 +12535,7 @@ Perl_parse_fullexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_block|U32 flags
+=for apidoc parse_block
  
  Parse a single complete Perl code block.  This consists of an opening
  brace, a sequence of statements, and a closing brace.  The block
@@ -12279,7 +12571,7 @@ Perl_parse_block(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_barestmt|U32 flags
+=for apidoc parse_barestmt
  
  Parse a single unadorned Perl statement.  This may be a normal imperative
  statement or a declaration that has compile-time effect.  It does not
@@ -12317,7 +12609,7 @@ Perl_parse_barestmt(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|SV *|parse_label|U32 flags
+=for apidoc parse_label
  
  Parse a single label, possibly optional, of the type that may prefix a
  Perl statement.  It is up to the caller to ensure that the dynamic parser
@@ -12344,10 +12636,11 @@ Perl_parse_label(pTHX_ U32 flags)
      if (PL_nexttoke) {
         PL_parser->yychar = yylex();
         if (PL_parser->yychar == LABEL) {
-           char * const lpv = pl_yylval.pval;
-           STRLEN llen = strlen(lpv);
+           SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
             PL_parser->yychar = YYEMPTY;
-           return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
+           cSVOPx(pl_yylval.opval)->op_sv = NULL;
+           op_free(pl_yylval.opval);
+           return labelsv;
         } else {
             yyunlex();
             goto no_label;
@@ -12386,7 +12679,7 @@ Perl_parse_label(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+=for apidoc parse_fullstmt
  
  Parse a single complete Perl statement.  This may be a normal imperative
  statement or a declaration that has compile-time effect, and may include
@@ -12421,7 +12714,7 @@ Perl_parse_fullstmt(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_stmtseq|U32 flags
+=for apidoc parse_stmtseq
  
  Parse a sequence of zero or more Perl statements.  These may be normal
  imperative statements, including optional labels, or declarations
@@ -12465,5 +12758,37 @@ Perl_parse_stmtseq(pTHX_ U32 flags)
  }
  
  /*
+=for apidoc parse_subsignature
+
+Parse a subroutine signature declaration. This is the contents of the
+parentheses following a named or anonymous subroutine declaration when the
+C<signatures> feature is enabled. Note that this function neither expects
+nor consumes the opening and closing parentheses around the signature; it
+is the caller's job to handle these.
+
+This function must only be called during parsing of a subroutine; after
+L</start_subparse> has been called. It might allocate lexical variables on
+the pad for the current subroutine.
+
+The op tree to unpack the arguments from the stack at runtime is returned.
+This op tree should appear at the beginning of the compiled function. The
+caller may wish to use L</op_append_list> to build their function body
+after it, or splice it together with the body before calling L</newATTRSUB>.
+
+The C<flags> parameter is reserved for future use, and must always
+be zero.
+
+=cut
+*/
+
+OP *
+Perl_parse_subsignature(pTHX_ U32 flags)
+{
+    if (flags)
+        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
+    return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
+}
+
+/*
   * ex: set ts=8 sts=4 sw=4 et:
   */