toke.c: Don't remap \N{} for EBCDIC

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 2748546..388d3f0 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -137,7 +137,7 @@ static const char* const ident_too_long = "Identifier too long";
   * 1999-02-27 mjd-perl-patch@plover.com */
  #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
  
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
+#define SPACE_OR_TAB(c) isBLANK_A(c)
  
  /* LEX_* are values for PL_lex_state, the state of the lexer.
   * They are arranged oddly so that the guard on the switch statement
@@ -427,7 +427,11 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
         if (name)
             Perl_sv_catpv(aTHX_ report, name);
         else if ((char)rv > ' ' && (char)rv <= '~')
+       {
             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+           if ((char)rv == 'p')
+               sv_catpvs(report, " (pending identifier)");
+       }
         else if (!rv)
             sv_catpvs(report, "EOF");
         else
@@ -549,16 +553,14 @@ S_no_op(pTHX_ const char *const what, char *s)
                 NOOP;
             if (t < PL_bufptr && isSPACE(*t))
                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                       "\t(Do you need to predeclare %"SVf"?)\n",
-                   SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
-                                   SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                       "\t(Do you need to predeclare %"UTF8f"?)\n",
+                     UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
         }
         else {
             assert(s >= oldbp);
             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                   "\t(Missing operator before %"SVf"?)\n",
-                    SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
-                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                   "\t(Missing operator before %"UTF8f"?)\n",
+                    UTF8fARG(UTF, s - oldbp, oldbp));
         }
      }
      PL_bufptr = oldbp;
@@ -751,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
         parser->linestr = flags & LEX_START_COPIED
                             ? SvREFCNT_inc_simple_NN(line)
                             : newSVpvn_flags(s, len, SvUTF8(line));
-       sv_catpvs(parser->linestr, "\n;");
+       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
      } else {
-       parser->linestr = newSVpvs("\n;");
+       parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
      }
      parser->oldoldbufptr =
         parser->oldbufptr =
@@ -1051,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                     ENTER;
                     SAVESPTR(PL_warnhook);
                     PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
                     LEAVE;
                 }
             }
@@ -1071,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 }
                 else {
                      assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
                     p += 2;
                  }
             }
@@ -1435,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
                 bufend = PL_parser->bufend;
             }
         }
-       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
         if (retlen == (STRLEN)-1) {
             /* malformed UTF-8 */
             ENTER;
             SAVESPTR(PL_warnhook);
             PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
             LEAVE;
         }
         return unichar;
@@ -1510,14 +1512,16 @@ chunk will not be discarded.
  =cut
  */
  
+#define LEX_NO_INCLINE    0x40000000
  #define LEX_NO_NEXT_CHUNK 0x80000000
  
  void
  Perl_lex_read_space(pTHX_ U32 flags)
  {
      char *s, *bufend;
+    const bool can_incline = !(flags & LEX_NO_INCLINE);
      bool need_incline = 0;
-    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
+    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
  #ifdef PERL_MAD
      if (PL_skipwhite) {
@@ -1537,11 +1541,13 @@ Perl_lex_read_space(pTHX_ U32 flags)
             } while (!(c == '\n' || (c == 0 && s == bufend)));
         } else if (c == '\n') {
             s++;
-           PL_parser->linestart = s;
-           if (s == bufend)
-               need_incline = 1;
-           else
-               incline(s);
+           if (can_incline) {
+               PL_parser->linestart = s;
+               if (s == bufend)
+                   need_incline = 1;
+               else
+                   incline(s);
+           }
         } else if (isSPACE(c)) {
             s++;
         } else if (c == 0 && s == bufend) {
@@ -1553,14 +1559,14 @@ Perl_lex_read_space(pTHX_ U32 flags)
             if (flags & LEX_NO_NEXT_CHUNK)
                 break;
             PL_parser->bufptr = s;
-           COPLINE_INC_WITH_HERELINES;
+           if (can_incline) COPLINE_INC_WITH_HERELINES;
             got_more = lex_next_chunk(flags);
-           CopLINE_dec(PL_curcop);
+           if (can_incline) CopLINE_dec(PL_curcop);
             s = PL_parser->bufptr;
             bufend = PL_parser->bufend;
             if (!got_more)
                 break;
-           if (need_incline && PL_parser->rsfp) {
+           if (can_incline && need_incline && PL_parser->rsfp) {
                 incline(s);
                 need_incline = 0;
             }
@@ -1576,6 +1582,107 @@ Perl_lex_read_space(pTHX_ U32 flags)
  }
  
  /*
+
+=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+
+This function performs syntax checking on a prototype, C<proto>.
+If C<warn> is true, any illegal characters or mismatched brackets
+will trigger illegalproto warnings, declaring that they were
+detected in the prototype for C<name>.
+
+The return value is C<true> if this is a valid prototype, and
+C<false> if it is not, regardless of whether C<warn> was C<true> or
+C<false>.
+
+Note that C<NULL> is a valid C<proto> and will always return C<true>.
+
+=cut
+
+ */
+
+bool
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+{
+    STRLEN len, origlen;
+    char *p = proto ? SvPV(proto, len) : NULL;
+    bool bad_proto = FALSE;
+    bool in_brackets = FALSE;
+    bool after_slash = FALSE;
+    char greedy_proto = ' ';
+    bool proto_after_greedy_proto = FALSE;
+    bool must_be_last = FALSE;
+    bool underscore = FALSE;
+    bool bad_proto_after_underscore = FALSE;
+
+    PERL_ARGS_ASSERT_VALIDATE_PROTO;
+
+    if (!proto)
+       return TRUE;
+
+    origlen = len;
+    for (; len--; p++) {
+       if (!isSPACE(*p)) {
+           if (must_be_last)
+               proto_after_greedy_proto = TRUE;
+           if (underscore) {
+               if (!strchr(";@%", *p))
+                   bad_proto_after_underscore = TRUE;
+               underscore = FALSE;
+           }
+           if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+               bad_proto = TRUE;
+           }
+           else {
+               if (*p == '[')
+                   in_brackets = TRUE;
+               else if (*p == ']')
+                   in_brackets = FALSE;
+               else if ((*p == '@' || *p == '%') &&
+                   !after_slash &&
+                   !in_brackets ) {
+                   must_be_last = TRUE;
+                   greedy_proto = *p;
+               }
+               else if (*p == '_')
+                   underscore = TRUE;
+           }
+           if (*p == '\\')
+               after_slash = TRUE;
+           else
+               after_slash = FALSE;
+       }
+    }
+
+    if (warn) {
+       SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
+       p -= origlen;
+       p = SvUTF8(proto)
+           ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+                            origlen, UNI_DISPLAY_ISPRINT)
+           : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
+
+       if (proto_after_greedy_proto)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Prototype after '%c' for %"SVf" : %s",
+                       greedy_proto, SVfARG(name), p);
+       if (in_brackets)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Missing ']' in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+       if (bad_proto)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Illegal character in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+       if (bad_proto_after_underscore)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Illegal character after '_' in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+    }
+
+    return (! (proto_after_greedy_proto || bad_proto) );
+}
+
+/*
   * S_incline
   * This subroutine has nothing to do with tilting, whether at windmills
   * or pinball tables.  Its name is short for "increment line".  It
@@ -1646,37 +1753,16 @@ S_incline(pTHX_ const char *s)
  
      if (t - s > 0) {
         const STRLEN len = t - s;
-       SV *const temp_sv = CopFILESV(PL_curcop);
-       const char *cf;
-       STRLEN tmplen;
-
-       if (temp_sv) {
-           cf = SvPVX(temp_sv);
-           tmplen = SvCUR(temp_sv);
-       } else {
-           cf = NULL;
-           tmplen = 0;
-       }
  
         if (!PL_rsfp && !PL_parser->filtered) {
             /* must copy *{"::_<(eval N)[oldfilename:L]"}
              * to *{"::_<newfilename"} */
             /* However, the long form of evals is only turned on by the
                debugger - usually they're "(eval %lu)" */
-           char smallbuf[128];
-           char *tmpbuf;
-           GV **gvp;
-           STRLEN tmplen2 = len;
-           if (tmplen + 2 <= sizeof smallbuf)
-               tmpbuf = smallbuf;
-           else
-               Newx(tmpbuf, tmplen + 2, char);
-           tmpbuf[0] = '_';
-           tmpbuf[1] = '<';
-           memcpy(tmpbuf + 2, cf, tmplen);
-           tmplen += 2;
-           gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
-           if (gvp) {
+           GV * const cfgv = CopFILEGV(PL_curcop);
+           if (cfgv) {
+               char smallbuf[128];
+               STRLEN tmplen2 = len;
                 char *tmpbuf2;
                 GV *gv2;
  
@@ -1685,12 +1771,8 @@ S_incline(pTHX_ const char *s)
                 else
                     Newx(tmpbuf2, tmplen2 + 2, char);
  
-               if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
-                   /* Either they malloc'd it, or we malloc'd it,
-                      so no prefix is present in ours.  */
-                   tmpbuf2[0] = '_';
-                   tmpbuf2[1] = '<';
-               }
+               tmpbuf2[0] = '_';
+               tmpbuf2[1] = '<';
  
                 memcpy(tmpbuf2 + 2, s, tmplen2);
                 tmplen2 += 2;
@@ -1704,11 +1786,11 @@ S_incline(pTHX_ const char *s)
                        alias the saved lines that are in the array.
                        Otherwise alias the whole array. */
                     if (CopLINE(PL_curcop) == line_num) {
-                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
-                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
                     }
-                   else if (GvAV(*gvp)) {
-                       AV * const av = GvAV(*gvp);
+                   else if (GvAV(cfgv)) {
+                       AV * const av = GvAV(cfgv);
                         const I32 start = CopLINE(PL_curcop)+1;
                         I32 items = AvFILLp(av) - start;
                         if (items > 0) {
@@ -1723,7 +1805,6 @@ S_incline(pTHX_ const char *s)
  
                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
             }
-           if (tmpbuf != smallbuf) Safefree(tmpbuf);
         }
         CopFILE_free(PL_curcop);
         CopFILE_setn(PL_curcop, s, len);
@@ -1731,6 +1812,8 @@ S_incline(pTHX_ const char *s)
      CopLINE_set(PL_curcop, line_num);
  }
  
+#define skipspace(s) skipspace_flags(s, 0)
+
  #ifdef PERL_MAD
  /* skip space before PL_thistoken */
  
@@ -1786,13 +1869,11 @@ STATIC char *
  S_skipspace2(pTHX_ char *s, SV **svp)
  {
      char *start;
-    const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
      const I32 startoff = s - SvPVX(PL_linestr);
  
      PERL_ARGS_ASSERT_SKIPSPACE2;
  
      s = skipspace(s);
-    PL_bufptr = SvPVX(PL_linestr) + bufptroff;
      if (!PL_madskills || !svp)
         return s;
      start = SvPVX(PL_linestr) + startoff;
@@ -1820,12 +1901,12 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
      if (av) {
         SV * const sv = newSV_type(SVt_PVMG);
         if (orig_sv)
-           sv_setsv(sv, orig_sv);
+           sv_setsv_flags(sv, orig_sv, 0); /* no cow */
         else
             sv_setpvn(sv, buf, len);
         (void)SvIOK_on(sv);
         SvIV_set(sv, 0);
-       av_store(av, (I32)CopLINE(PL_curcop), sv);
+       av_store(av, CopLINE(PL_curcop), sv);
      }
  }
  
@@ -1836,12 +1917,12 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
   */
  
  STATIC char *
-S_skipspace(pTHX_ char *s)
+S_skipspace_flags(pTHX_ char *s, U32 flags)
  {
  #ifdef PERL_MAD
      char *start = s;
  #endif /* PERL_MAD */
-    PERL_ARGS_ASSERT_SKIPSPACE;
+    PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
  #ifdef PERL_MAD
      if (PL_skipwhite) {
         sv_free(PL_skipwhite);
@@ -1854,7 +1935,7 @@ S_skipspace(pTHX_ char *s)
      } else {
         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
         PL_bufptr = s;
-       lex_read_space(LEX_KEEP_PREVIOUS |
+       lex_read_space(flags | LEX_KEEP_PREVIOUS |
                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
                     LEX_NO_NEXT_CHUNK : 0));
         s = PL_bufptr;
@@ -2110,7 +2191,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
   */
  
  STATIC char *
-S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
  {
      dVAR;
      char *s;
@@ -2121,12 +2202,16 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, in
      start = SKIPSPACE1(start);
      s = start;
      if (isIDFIRST_lazy_if(s,UTF) ||
-       (allow_pack && *s == ':') ||
-       (allow_initial_tick && *s == '\'') )
+       (allow_pack && *s == ':') )
      {
         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
-       if (check_keyword && keyword(PL_tokenbuf, len, 0))
+       if (check_keyword) {
+         char *s2 = PL_tokenbuf;
+         if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
+           s2 += 6, len -= 6;
+         if (keyword(s2, len, 0))
             return start;
+       }
         start_force(PL_curforce);
         if (PL_madskills)
             curmad('X', newSVpvn(start,s-start));
@@ -2463,7 +2548,7 @@ S_sublex_start(pTHX)
         return THING;
      }
      else if (op_type == OP_BACKTICK && PL_lex_op) {
-       /* readpipe() vas overriden */
+       /* readpipe() was overridden */
         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
         pl_yylval.opval = PL_lex_op;
         PL_lex_op = NULL;
@@ -2525,6 +2610,7 @@ S_sublex_push(pTHX)
      SAVEGENERICPV(PL_lex_brackstack);
      SAVEGENERICPV(PL_lex_casestack);
      SAVEGENERICPV(PL_parser->lex_shared);
+    SAVEBOOL(PL_parser->lex_re_reparsing);
  
      /* The here-doc parser needs to be able to peek into outer lexing
         scopes to find the body of the here-doc.  So we put PL_linestr and
@@ -2568,6 +2654,9 @@ S_sublex_push(pTHX)
      else
         PL_lex_inpat = NULL;
  
+    PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
+    PL_in_eval &= ~EVAL_RE_REPARSING;
+
      return '(';
  }
  
@@ -2672,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      {
          /* If warnings are on, this will print a more detailed analysis of what
           * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                         e - ((char *) first_bad_char_loc),
                         NULL, 0);
  
@@ -2724,13 +2813,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              if (! isCHARNAME_CONT(*s)) {
                  goto bad_charname;
              }
-           if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
-                Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
+           if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                           "A sequence of multiple spaces in a charnames "
+                           "alias definition is deprecated");
              }
              s++;
          }
-        if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
-            Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
+        if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                        "Trailing white-space in a charnames alias "
+                        "definition is deprecated");
          }
      }
      else {
@@ -2743,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              }
              s++;
          } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                  goto bad_charname;
              }
              s += 2;
@@ -2767,14 +2860,16 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  if (! isCHARNAME_CONT(*s)) {
                      goto bad_charname;
                  }
-                if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
-                    Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
+                if (*s == ' ' && *(s-1) == ' '
+                 && ckWARN_d(WARN_DEPRECATED)) {
+                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                               "A sequence of multiple spaces in a charnam"
+                               "es alias definition is deprecated");
                  }
                  s++;
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                  {
                      goto bad_charname;
                  }
@@ -2794,8 +2889,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s += UTF8SKIP(s);
              }
          }
-        if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
-            Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
+        if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                       "Trailing white-space in a charnames alias "
+                       "definition is deprecated");
          }
      }
  
@@ -2806,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
              /* If warnings are on, this will print a more detailed analysis of
               * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                             (char *) first_bad_char_loc - str,
                             NULL, 0);
  
@@ -3007,7 +3104,7 @@ S_scan_const(pTHX_ char *start)
                     char *e = d++;
                     while (e-- > c)
                         *(e + 1) = *e;
-                   *c = (char)UTF_TO_NATIVE(0xff);
+                   *c = (char) ILLEGAL_UTF8_BYTE;
                     /* mark the range as done, and continue */
                     dorange = FALSE;
                     didrange = TRUE;
@@ -3061,16 +3158,16 @@ S_scan_const(pTHX_ char *start)
  
  #ifdef EBCDIC
                 if (literal_endpoint == 2 &&
-                   ((isLOWER(min) && isLOWER(max)) ||
-                    (isUPPER(min) && isUPPER(max)))) {
-                   if (isLOWER(min)) {
+                   ((isLOWER_A(min) && isLOWER_A(max)) ||
+                    (isUPPER_A(min) && isUPPER_A(max)))) {
+                   if (isLOWER_A(min)) {
                         for (i = min; i <= max; i++)
-                           if (isLOWER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
+                           if (isLOWER_A(i))
+                               *d++ = i;
                     } else {
                         for (i = min; i <= max; i++)
-                           if (isUPPER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
+                           if (isUPPER_A(i))
+                               *d++ = i;
                     }
                 }
                 else
@@ -3078,13 +3175,7 @@ S_scan_const(pTHX_ char *start)
                     for (i = min; i <= max; i++)
  #ifdef EBCDIC
                          if (has_utf8) {
-                            const U8 ch = (U8)NATIVE_TO_UTF(i);
-                            if (UNI_IS_INVARIANT(ch))
-                                *d++ = (U8)i;
-                            else {
-                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                            }
+                            append_utf8_from_native_byte(i, &d);
                          }
                          else
  #endif
@@ -3094,7 +3185,7 @@ S_scan_const(pTHX_ char *start)
                  if (uvmax) {
                      d = (char*)uvchr_to_utf8((U8*)d, 0x100);
                      if (uvmax > 0x101)
-                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
                      if (uvmax > 0x100)
                          d = (char*)uvchr_to_utf8((U8*)d, uvmax);
                  }
@@ -3119,7 +3210,7 @@ S_scan_const(pTHX_ char *start)
                     && !native_range
  #endif
                     ) {
-                   *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
+                   *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
                     s++;
                     continue;
                 }
@@ -3159,12 +3250,12 @@ S_scan_const(pTHX_ char *start)
          * char, which will be done separately.
          * Stop on (?{..}) and friends */
  
-       else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+       else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
                 while (s+1 < send && *s != ')')
-                   *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+                   *d++ = *s++;
             }
-           else if (!PL_lex_casemods && !in_charclass &&
+           else if (!PL_lex_casemods &&
                      (    s[2] == '{' /* This should match regcomp.c */
                       || (s[2] == '?' && s[3] == '{')))
             {
@@ -3173,10 +3264,10 @@ S_scan_const(pTHX_ char *start)
         }
  
         /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat &&
+       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
             while (s+1 < send && *s != '\n')
-               *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+               *d++ = *s++;
         }
  
         /* no further processing of single-quoted regex */
@@ -3251,7 +3342,7 @@ S_scan_const(pTHX_ char *start)
                         || s[1] != '{'
                         || regcurly(s + 1, FALSE)))
             {
-               *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+               *d++ = '\\';
                 goto default_action;
             }
  
@@ -3280,7 +3371,7 @@ S_scan_const(pTHX_ char *start)
                 {
                      I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                      STRLEN len = 3;
-                   uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+                   uv = grok_oct(s, &len, &flags, NULL);
                     s += len;
                      if (len < 3 && s < send && isDIGIT(*s)
                          && ckWARN(WARN_MISC))
@@ -3332,9 +3423,8 @@ S_scan_const(pTHX_ char *start)
                  * UTF-8 sequence they can end up as, except if they force us
                  * to recode the rest of the string into utf8 */
                 
-               /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native). */
-               if (!UNI_IS_INVARIANT(uv)) {
+               /* Here uv is the ordinal of the next character being added */
+               if (!NATIVE_IS_INVARIANT(uv)) {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
                          * far if it contains any chars variant in utf8 or
@@ -3352,7 +3442,7 @@ S_scan_const(pTHX_ char *start)
                      }
  
                      if (has_utf8) {
-                       d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                         if (PL_lex_inwhat == OP_TRANS &&
                             PL_sublex_info.sub_op) {
                             PL_sublex_info.sub_op->op_private |=
@@ -3385,16 +3475,6 @@ S_scan_const(pTHX_ char *start)
                  * now, while preserving the fact that it was a named character
                  * so that the regex compiler knows this */
  
-               /* This section of code doesn't generally use the
-                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-                * a close examination of this macro and determined it is a
-                * no-op except on utfebcdic variant characters.  Every
-                * character generated by this that would normally need to be
-                * enclosed by this macro is invariant, so the macro is not
-                * needed, and would complicate use of copy().  XXX There are
-                * other parts of this file where the macro is used
-                * inconsistently, but are saved by it being a no-op */
-
                 /* The structure of this section of code (besides checking for
                  * errors and upgrading to utf8) is:
                  *  Further disambiguate between the two meanings of \N, and if
@@ -3482,11 +3562,13 @@ S_scan_const(pTHX_ char *start)
                             has_utf8 = TRUE;
                         }
  
-                       /* Add the string to the output */
+                        /* Add the (Unicode) code point to the output. */
                         if (UNI_IS_INVARIANT(uv)) {
-                           *d++ = (char) uv;
+                           *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
-                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       else {
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                        }
                     }
                 }
                 else /* Here is \N{NAME} but not \N{U+...}. */
@@ -3546,19 +3628,16 @@ S_scan_const(pTHX_ char *start)
                                  char hex_string[2 * UTF8_MAXBYTES + 5];
  
                                  /* Get the first character of the result. */
-                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                U32 uv = utf8n_to_uvchr((U8 *) str,
                                                          len,
                                                          &char_length,
                                                          UTF8_ALLOW_ANYUV);
                                  /* Convert first code point to hex, including
-                                 * the boiler plate before it.  For all these,
-                                 * we convert to native format so that
-                                 * downstream code can continue to assume the
-                                 * input is native */
+                                 * the boiler plate before it. */
                                  output_length =
                                      my_snprintf(hex_string, sizeof(hex_string),
-                                            "\\N{U+%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                "\\N{U+%X",
+                                                (unsigned int) uv);
  
                                  /* Make sure there is enough space to hold it */
                                  d = off + SvGROW(sv, off
@@ -3573,15 +3652,15 @@ S_scan_const(pTHX_ char *start)
                                  * its ordinal in hex */
                                  while ((str += char_length) < str_end) {
                                      const STRLEN off = d - SvPVX_const(sv);
-                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                    U32 uv = utf8n_to_uvchr((U8 *) str,
                                                              str_end - str,
                                                              &char_length,
                                                              UTF8_ALLOW_ANYUV);
                                      output_length =
                                          my_snprintf(hex_string,
-                                            sizeof(hex_string),
-                                            ".%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                    sizeof(hex_string),
+                                                    ".%X",
+                                                    (unsigned int) uv);
  
                                      d = off + SvGROW(sv, off
                                                          + output_length
@@ -3646,25 +3725,25 @@ S_scan_const(pTHX_ char *start)
  
             /* printf-style backslashes, formfeeds, newlines, etc */
             case 'b':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+               *d++ = '\b';
                 break;
             case 'n':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+               *d++ = '\n';
                 break;
             case 'r':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+               *d++ = '\r';
                 break;
             case 'f':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+               *d++ = '\f';
                 break;
             case 't':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+               *d++ = '\t';
                 break;
             case 'e':
-               *d++ = ASCII_TO_NEED(has_utf8,'\033');
+               *d++ = ASCII_TO_NATIVE('\033');
                 break;
             case 'a':
-               *d++ = ASCII_TO_NEED(has_utf8,'\007');
+               *d++ = '\a';
                 break;
             } /* end switch */
  
@@ -3718,7 +3797,7 @@ S_scan_const(pTHX_ char *start)
  #endif
         }
         else {
-           *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+           *d++ = *s++;
         }
      } /* while loop to process each character */
  
@@ -3751,7 +3830,9 @@ S_scan_const(pTHX_ char *start)
      /* return the substring (via pl_yylval) only if we parsed anything */
      if (s > PL_bufptr) {
         SvREFCNT_inc_simple_void_NN(sv);
-       if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+       if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+            && ! PL_parser->lex_re_reparsing)
+        {
             const char *const key = PL_lex_inpat ? "qr" : "q";
             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
             const char *type;
@@ -3972,19 +4053,14 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
      if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
             return 0;
      if (cv && SvPOK(cv)) {
-               const char *proto = CvPROTO(cv);
-               if (proto) {
-                   if (*proto == ';')
-                       proto++;
-                   if (*proto == '*')
-                       return 0;
-               }
+       const char *proto = CvPROTO(cv);
+       if (proto) {
+           while (*proto && (isSPACE(*proto) || *proto == ';'))
+               proto++;
+           if (*proto == '*')
+               return 0;
+       }
      }
-    s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
-    /* start is the beginning of the possible filehandle/object,
-     * and s is the end of it
-     * tmpbuf is a copy of it
-     */
  
      if (*start == '$') {
         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
@@ -4001,6 +4077,13 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
         PL_expect = XREF;
         return *s == '(' ? FUNCMETH : METHOD;
      }
+
+    s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+    /* start is the beginning of the possible filehandle/object,
+     * and s is the end of it
+     * tmpbuf is a copy of it (but with single quotes as double colons)
+     */
+
      if (!keyword(tmpbuf, len, 0)) {
         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
             len -= 2;
@@ -4531,12 +4614,12 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
             force_next(WORD);
         }
         else if (*s == 'v') {
-           s = force_word(s,WORD,FALSE,TRUE,FALSE);
+           s = force_word(s,WORD,FALSE,TRUE);
             s = force_version(s, FALSE);
         }
      }
      else {
-       s = force_word(s,WORD,FALSE,TRUE,FALSE);
+       s = force_word(s,WORD,FALSE,TRUE);
         s = force_version(s, FALSE);
      }
      pl_yylval.ival = is_use;
@@ -4615,6 +4698,7 @@ Perl_yylex(pTHX)
      char *d;
      STRLEN len;
      bool bof = FALSE;
+    const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
      U8 formbrack = 0;
      U32 fake_eof = 0;
  
@@ -4839,7 +4923,10 @@ Perl_yylex(pTHX)
         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
                "### Interpolated variable\n"); });
         PL_expect = XTERM;
-       PL_lex_dojoin = (*PL_bufptr == '@');
+        /* for /@a/, we leave the joining for the regex engine to do
+         * (unless we're within \Q etc) */
+       PL_lex_dojoin = (*PL_bufptr == '@'
+                            && (!PL_lex_inpat || PL_lex_casemods));
         PL_lex_state = LEX_INTERPNORMAL;
         if (PL_lex_dojoin) {
             start_force(PL_curforce);
@@ -5011,9 +5098,12 @@ Perl_yylex(pTHX)
         return yylex();
      }
  
+    /* We really do *not* want PL_linestr ever becoming a COW. */
+    assert (!SvIsCOW(PL_linestr));
      s = PL_bufptr;
      PL_oldoldbufptr = PL_oldbufptr;
      PL_oldbufptr = s;
+    PL_parser->saw_infix_sigil = 0;
  
    retry:
  #ifdef PERL_MAD
@@ -5025,7 +5115,7 @@ Perl_yylex(pTHX)
  #endif
      switch (*s) {
      default:
-       if (isIDFIRST_lazy_if(s,UTF))
+       if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
             goto keylookup;
         {
          SV *dsv = newSVpvs_flags("", SVs_TEMP);
@@ -5484,14 +5574,19 @@ Perl_yylex(pTHX)
                 s = SKIPSPACE0(s);
             }
             else {
-/*             if (PL_madskills && PL_lex_formbrack) { */
-                   d = s;
-                   while (d < PL_bufend && *d != '\n')
-                       d++;
-                   if (d < PL_bufend)
-                       d++;
-                   else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+                   if (PL_madskills) d = s;
+                   while (s < PL_bufend && *s != '\n')
+                       s++;
+                   if (s < PL_bufend)
+                   {
+                       s++;
+                       if (s < PL_bufend)
+                           incline(s);
+                   }
+                   else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
                       Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
                         if (!PL_thiswhite)
                             PL_thiswhite = newSVpvs("");
@@ -5499,16 +5594,9 @@ Perl_yylex(pTHX)
                             sv_setpvs(PL_thiswhite, "");
                             PL_faketokens = 0;
                         }
-                       sv_catpvn(PL_thiswhite, s, d - s);
+                       sv_catpvn(PL_thiswhite, d, s - d);
                     }
-                   s = d;
-/*             }
-               *s = '\0';
-               PL_bufend = s; */
             }
-#else
-           *s = '\0';
-           PL_bufend = s;
  #endif
         }
         goto retry;
@@ -5525,7 +5613,7 @@ Perl_yylex(pTHX)
                 s++;
  
             if (strnEQ(s,"=>",2)) {
-               s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
+               s = force_word(PL_bufptr,WORD,FALSE,FALSE);
                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
                 OPERATOR('-');          /* unary minus */
             }
@@ -5597,7 +5685,7 @@ Perl_yylex(pTHX)
                 s++;
                 s = SKIPSPACE1(s);
                 if (isIDFIRST_lazy_if(s,UTF)) {
-                   s = force_word(s,METHOD,FALSE,TRUE,FALSE);
+                   s = force_word(s,METHOD,FALSE,TRUE);
                     TOKEN(ARROW);
                 }
                 else if (*s == '$')
@@ -5669,6 +5757,7 @@ Perl_yylex(pTHX)
             s--;
             TOKEN(0);
         }
+       PL_parser->saw_infix_sigil = 1;
         Mop(OP_MULTIPLY);
  
      case '%':
@@ -5677,6 +5766,7 @@ Perl_yylex(pTHX)
                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
                 TOKEN(0);
             ++s;
+           PL_parser->saw_infix_sigil = 1;
             Mop(OP_MODULO);
         }
         PL_tokenbuf[0] = '%';
@@ -5711,6 +5801,9 @@ Perl_yylex(pTHX)
             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
                 TOKEN(0);
             s += 2;
+            Perl_ck_warner_d(aTHX_
+                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+                "Smartmatch is experimental");
             Eop(OP_SMARTMATCH);
         }
         s++;
@@ -5957,7 +6050,7 @@ Perl_yylex(pTHX)
                     d++;
                 if (*d == '}') {
                     const char minus = (PL_tokenbuf[0] == '-');
-                   s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+                   s = force_word(s + minus, WORD, FALSE, TRUE);
                     if (minus)
                         force_next('-');
                 }
@@ -6167,6 +6260,7 @@ Perl_yylex(pTHX)
                 s--;
                 TOKEN(0);
             }
+           PL_parser->saw_infix_sigil = 1;
             BAop(OP_BIT_AND);
         }
  
@@ -6470,9 +6564,8 @@ Perl_yylex(pTHX)
                                 if (*t == ';'
                                         && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                               "You need to quote \"%"SVf"\"",
-                                                 SVfARG(newSVpvn_flags(tmpbuf, len, 
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                       "You need to quote \"%"UTF8f"\"",
+                                        UTF8fARG(UTF, len, tmpbuf));
                             }
                         }
                 }
@@ -6557,11 +6650,9 @@ Perl_yylex(pTHX)
                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
         /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                           "Scalar value %"SVf" better written as $%"SVf,
-                           SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
-                            SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
+                        "Scalar value %"UTF8f" better written as $%"UTF8f,
+                         UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
+                         UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
                     }
                 }
             }
@@ -6833,6 +6924,7 @@ Perl_yylex(pTHX)
  
         /* Is this a word before a => operator? */
         if (*d == '=' && d[1] == '>') {
+         fat_arrow:
             CLINE;
             pl_yylval.opval
                 = (OP*)newSVOP(OP_CONST, 0,
@@ -6909,8 +7001,7 @@ Perl_yylex(pTHX)
                 else {
                     rv2cv_op = newOP(OP_PADANY, 0);
                     rv2cv_op->op_targ = off;
-                   rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
-                   cv = (CV *)PAD_SV(off);
+                   cv = find_lexical_cv(off);
                 }
                 lex = TRUE;
                 goto just_a_word;
@@ -6967,6 +7058,18 @@ Perl_yylex(pTHX)
             }
         }
  
+       if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
+        && (!anydelim || *s != '#')) {
+           /* no override, and not s### either; skipspace is safe here
+            * check for => on following line */
+           STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
+           STRLEN   soff = s         - SvPVX(PL_linestr);
+           s = skipspace_flags(s, LEX_NO_INCLINE);
+           if (*s == '=' && s[1] == '>') goto fat_arrow;
+           PL_bufptr = SvPVX(PL_linestr) + bufoff;
+           s         = SvPVX(PL_linestr) +   soff;
+       }
+
        reserved_word:
         switch (tmp) {
  
@@ -7005,9 +7108,8 @@ Perl_yylex(pTHX)
                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                   TRUE, &morelen);
                     if (!morelen)
-                       Perl_croak(aTHX_ "Bad name after %"SVf"%s",
-                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+                       Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+                               UTF8fARG(UTF, len, PL_tokenbuf),
                                 *s == '\'' ? "'" : "::");
                     len += morelen;
                     pkgname = 1;
@@ -7034,9 +7136,8 @@ Perl_yylex(pTHX)
                     if (ckWARN(WARN_BAREWORD)
                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-                           "Bareword \"%"SVf"\" refers to nonexistent package",
-                            SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                         "Bareword \"%"UTF8f"\" refers to nonexistent package",
+                          UTF8fARG(UTF, len, PL_tokenbuf));
                     len -= 2;
                     PL_tokenbuf[len] = '\0';
                     gv = NULL;
@@ -7152,9 +7253,13 @@ Perl_yylex(pTHX)
                 if (*s == '=' && s[1] == '>' && !pkgname) {
                     op_free(rv2cv_op);
                     CLINE;
+                   /* This is our own scalar, created a few lines above,
+                      so this is safe. */
+                   SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
+                   SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
                     TERM(WORD);
                 }
  
@@ -7165,7 +7270,7 @@ Perl_yylex(pTHX)
                         d = s + 1;
                         while (SPACE_OR_TAB(*d))
                             d++;
-                       if (*d == ')' && (sv = cv_const_sv(cv))) {
+                       if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
                             s = d + 1;
                             goto its_constant;
                         }
@@ -7222,24 +7327,32 @@ Perl_yylex(pTHX)
  
                 if (cv) {
                     if (lastchar == '-' && penultchar != '-') {
-                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+                       const STRLEN l = len ? len : strlen(PL_tokenbuf);
                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                               "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
-                               SVfARG(tmpsv), SVfARG(tmpsv));
+                           "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+                            UTF8fARG(UTF, l, PL_tokenbuf),
+                            UTF8fARG(UTF, l, PL_tokenbuf));
                      }
                     /* Check for a constant sub */
-                   if ((sv = cv_const_sv(cv))) {
+                   if ((sv = cv_const_sv_or_av(cv))) {
                   its_constant:
                         op_free(rv2cv_op);
                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-                       pl_yylval.opval->op_private = OPpCONST_FOLDED;
-                       pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       if (SvTYPE(sv) == SVt_PVAV)
+                           pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+                                                     pl_yylval.opval);
+                       else {
+                           pl_yylval.opval->op_private = OPpCONST_FOLDED;
+                           pl_yylval.opval->op_folded = 1;
+                           pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       }
                         TOKEN(WORD);
                     }
  
                     op_free(pl_yylval.opval);
-                   pl_yylval.opval = rv2cv_op;
+                   pl_yylval.opval =
+                       off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
                     PL_last_lop = PL_oldbufptr;
                     PL_last_lop_op = OP_ENTERSUB;
@@ -7253,6 +7366,7 @@ Perl_yylex(pTHX)
                         STRLEN protolen = CvPROTOLEN(cv);
                         const char *proto = CvPROTO(cv);
                         bool optional;
+                       proto = S_strip_spaces(aTHX_ proto, &protolen);
                         if (!protolen)
                             TERM(FUNC0SUB);
                         if ((optional = *proto == ';'))
@@ -7335,7 +7449,8 @@ Perl_yylex(pTHX)
                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
                                          SVt_PVCV);
                         op_free(pl_yylval.opval);
-                       pl_yylval.opval = rv2cv_op;
+                       pl_yylval.opval =
+                           off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
                         PL_last_lop = PL_oldbufptr;
                         PL_last_lop_op = OP_ENTERSUB;
@@ -7396,12 +7511,13 @@ Perl_yylex(pTHX)
                 op_free(rv2cv_op);
  
             safe_bareword:
-               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+                && saw_infix_sigil) {
                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                    "Operator or semicolon missing before %c%"SVf,
-                                    lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
-                                                    strlen(PL_tokenbuf),
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                    "Operator or semicolon missing before %c%"UTF8f,
+                                    lastchar,
+                                    UTF8fARG(UTF, strlen(PL_tokenbuf),
+                                             PL_tokenbuf));
                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
                                      "Ambiguous use of %c resolved as operator %c",
                                      lastchar, lastchar);
@@ -7432,21 +7548,12 @@ Perl_yylex(pTHX)
         case KEY___END__: {
             GV *gv;
             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
-               const char *pname = "main";
-               STRLEN plen = 4;
-               U32 putf8 = 0;
-               if (PL_tokenbuf[2] == 'D')
-               {
-                   HV * const stash =
-                       PL_curstash ? PL_curstash : PL_defstash;
-                   pname = HvNAME_get(stash);
-                   plen  = HvNAMELEN (stash);
-                   if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
-               }
-               gv = gv_fetchpvn_flags(
-                       Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
-                       plen+6, GV_ADD|putf8, SVt_PVIO
-               );
+               HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+                                       ? PL_curstash
+                                       : PL_defstash;
+               gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+               if (!isGV(gv))
+                   gv_init(gv,stash,"DATA",4,0);
                 GvMULTI_on(gv);
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
@@ -7561,9 +7668,8 @@ Perl_yylex(pTHX)
                     goto just_a_word;
                 }
                 if (!tmp)
-                   Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
-                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                   Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+                                     UTF8fARG(UTF, len, PL_tokenbuf));
                 if (tmp < 0)
                     tmp = -tmp;
                 else if (tmp == KEY_require || tmp == KEY_do
@@ -7716,7 +7822,7 @@ Perl_yylex(pTHX)
  
         case KEY_dump:
             PL_expect = XOPERATOR;
-           s = force_word(s,WORD,TRUE,FALSE,FALSE);
+           s = force_word(s,WORD,TRUE,FALSE);
             LOOPX(OP_DUMP);
  
         case KEY_else:
@@ -7849,7 +7955,7 @@ Perl_yylex(pTHX)
  
         case KEY_goto:
             PL_expect = XOPERATOR;
-           s = force_word(s,WORD,TRUE,FALSE,FALSE);
+           s = force_word(s,WORD,TRUE,FALSE);
             LOOPX(OP_GOTO);
  
         case KEY_gmtime:
@@ -7935,6 +8041,9 @@ Perl_yylex(pTHX)
  
         case KEY_given:
             pl_yylval.ival = CopLINE(PL_curcop);
+            Perl_ck_warner_d(aTHX_
+                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+                "given is experimental");
             OPERATOR(GIVEN);
  
         case KEY_glob:
@@ -7972,7 +8081,7 @@ Perl_yylex(pTHX)
  
         case KEY_last:
             PL_expect = XOPERATOR;
-           s = force_word(s,WORD,TRUE,FALSE,FALSE);
+           s = force_word(s,WORD,TRUE,FALSE);
             LOOPX(OP_LAST);
         
         case KEY_lc:
@@ -8080,7 +8189,7 @@ Perl_yylex(pTHX)
  
         case KEY_next:
             PL_expect = XOPERATOR;
-           s = force_word(s,WORD,TRUE,FALSE,FALSE);
+           s = force_word(s,WORD,TRUE,FALSE);
             LOOPX(OP_NEXT);
  
         case KEY_ne:
@@ -8105,15 +8214,9 @@ Perl_yylex(pTHX)
         case KEY_open:
             s = SKIPSPACE1(s);
             if (isIDFIRST_lazy_if(s,UTF)) {
-               const char *t;
-               for (d = s; isWORDCHAR_lazy_if(d,UTF);) {
-                   d += UTF ? UTF8SKIP(d) : 1;
-                    if (UTF) {
-                        while (UTF8_IS_CONTINUED(*d) && _is_utf8_mark((U8*)d)) {
-                            d += UTF ? UTF8SKIP(d) : 1;
-                        }
-                    }
-                }
+          const char *t;
+          d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+              &len);
                 for (t=d; isSPACE(*t);)
                     t++;
                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -8122,11 +8225,9 @@ Perl_yylex(pTHX)
                     && !(t[0] == ':' && t[1] == ':')
                     && !keyword(s, d-s, 0)
                 ) {
-                   SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-                          "Precedence problem: open %"SVf" should be open(%"SVf")",
-                           SVfARG(tmpsv), SVfARG(tmpsv));
+                      "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+                       UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
                 }
             }
             LOP(OP_OPEN,XTERM);
@@ -8170,7 +8271,7 @@ Perl_yylex(pTHX)
             LOP(OP_PACK,XTERM);
  
         case KEY_package:
-           s = force_word(s,WORD,FALSE,TRUE,FALSE);
+           s = force_word(s,WORD,FALSE,TRUE);
             s = SKIPSPACE1(s);
             s = force_strict_version(s);
             PL_lex_expect = XBLOCK;
@@ -8273,7 +8374,7 @@ Perl_yylex(pTHX)
                     || (s = force_version(s, TRUE), *s == 'v'))
             {
                 *PL_tokenbuf = '\0';
-               s = force_word(s,WORD,TRUE,TRUE,FALSE);
+               s = force_word(s,WORD,TRUE,TRUE);
                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
                                  GV_ADD | (UTF ? SVf_UTF8 : 0));
@@ -8298,7 +8399,7 @@ Perl_yylex(pTHX)
  
         case KEY_redo:
             PL_expect = XOPERATOR;
-           s = force_word(s,WORD,TRUE,FALSE,FALSE);
+           s = force_word(s,WORD,TRUE,FALSE);
             LOOPX(OP_REDO);
  
         case KEY_rename:
@@ -8439,7 +8540,7 @@ Perl_yylex(pTHX)
             checkcomma(s,PL_tokenbuf,"subroutine name");
             s = SKIPSPACE1(s);
             PL_expect = XTERM;
-           s = force_word(s,WORD,TRUE,TRUE,FALSE);
+           s = force_word(s,WORD,TRUE,TRUE);
             LOP(OP_SORT,XREF);
  
         case KEY_split:
@@ -8471,10 +8572,12 @@ Perl_yylex(pTHX)
           really_sub:
             {
                 char * const tmpbuf = PL_tokenbuf + 1;
-               SSize_t tboffset = 0;
                 expectation attrful;
                 bool have_name, have_proto;
                 const int key = tmp;
+#ifndef PERL_MAD
+                SV *format_name = NULL;
+#endif
  
  #ifdef PERL_MAD
                 SV *tmpwhite = 0;
@@ -8501,13 +8604,14 @@ Perl_yylex(pTHX)
  
                     PL_expect = XBLOCK;
                     attrful = XATTRBLOCK;
-                   /* remember buffer pos'n for later force_word */
-                   tboffset = s - PL_oldbufptr;
                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
                                   &len);
  #ifdef PERL_MAD
                     if (PL_madskills)
                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
+#else
+                    if (key == KEY_format)
+                       format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
  #endif
                     *PL_tokenbuf = '&';
                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
@@ -8555,87 +8659,23 @@ Perl_yylex(pTHX)
                     PL_thistoken = subtoken;
                     s = d;
  #else
-                   if (have_name)
-                       (void) force_word(PL_oldbufptr + tboffset, WORD,
-                                         FALSE, TRUE, TRUE);
+                   if (format_name) {
+                        start_force(PL_curforce);
+                        NEXTVAL_NEXTTOKE.opval
+                            = (OP*)newSVOP(OP_CONST,0, format_name);
+                        NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
+                        force_next(WORD);
+                    }
  #endif
                     PREBLOCK(FORMAT);
                 }
  
                 /* Look for a prototype */
                 if (*s == '(') {
-                   char *p;
-                   bool bad_proto = FALSE;
-                   bool in_brackets = FALSE;
-                   char greedy_proto = ' ';
-                   bool proto_after_greedy_proto = FALSE;
-                   bool must_be_last = FALSE;
-                   bool underscore = FALSE;
-                   bool seen_underscore = FALSE;
-                   const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
-                    STRLEN tmplen;
-
                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
                     if (!s)
                         Perl_croak(aTHX_ "Prototype not terminated");
-                   /* strip spaces and check for bad characters */
-                   d = SvPV(PL_lex_stuff, tmplen);
-                   tmp = 0;
-                   for (p = d; tmplen; tmplen--, ++p) {
-                       if (!isSPACE(*p)) {
-                            d[tmp++] = *p;
-
-                           if (warnillegalproto) {
-                               if (must_be_last)
-                                   proto_after_greedy_proto = TRUE;
-                               if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
-                                   bad_proto = TRUE;
-                               }
-                               else {
-                                   if ( underscore ) {
-                                       if ( !strchr(";@%", *p) )
-                                           bad_proto = TRUE;
-                                       underscore = FALSE;
-                                   }
-                                   if ( *p == '[' ) {
-                                       in_brackets = TRUE;
-                                   }
-                                   else if ( *p == ']' ) {
-                                       in_brackets = FALSE;
-                                   }
-                                   else if ( (*p == '@' || *p == '%') &&
-                                        ( tmp < 2 || d[tmp-2] != '\\' ) &&
-                                        !in_brackets ) {
-                                       must_be_last = TRUE;
-                                       greedy_proto = *p;
-                                   }
-                                   else if ( *p == '_' ) {
-                                       underscore = seen_underscore = TRUE;
-                                   }
-                               }
-                           }
-                       }
-                   }
-                    d[tmp] = '\0';
-                   if (proto_after_greedy_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Prototype after '%c' for %"SVf" : %s",
-                                   greedy_proto, SVfARG(PL_subname), d);
-                   if (bad_proto) {
-                        SV *dsv = newSVpvs_flags("", SVs_TEMP);
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Illegal character %sin prototype for %"SVf" : %s",
-                                   seen_underscore ? "after '_' " : "",
-                                   SVfARG(PL_subname),
-                                    SvUTF8(PL_lex_stuff)
-                                        ? sv_uni_display(dsv,
-                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
-                                            tmp,
-                                            UNI_DISPLAY_ISPRINT)
-                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
-                                            PERL_PV_ESCAPE_NONASCII));
-                    }
-                    SvCUR_set(PL_lex_stuff, tmp);
+                   (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
                     have_proto = TRUE;
  
  #ifdef PERL_MAD
@@ -8676,6 +8716,7 @@ Perl_yylex(pTHX)
                 force_next(0);
  
                 PL_thistoken = subtoken;
+                PERL_UNUSED_VAR(have_proto);
  #else
                 if (have_proto) {
                     NEXTVAL_NEXTTOKE.opval =
@@ -8797,6 +8838,9 @@ Perl_yylex(pTHX)
             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
                 return REPORT(0);
             pl_yylval.ival = CopLINE(PL_curcop);
+            Perl_ck_warner_d(aTHX_
+                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+                "when is experimental");
             OPERATOR(WHEN);
  
         case KEY_while:
@@ -8968,9 +9012,9 @@ S_pending_ident(pTHX)
          {
              /* Downgraded from fatal to warning 20000522 mjd */
              Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Possible unintended interpolation of %"SVf" in string",
-                       SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
-                                        SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+                       "Possible unintended interpolation of %"UTF8f
+                       " in string",
+                       UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
          }
      }
  
@@ -9044,7 +9088,9 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
      }
  }
  
-/* Either returns sv, or mortalizes/frees sv and returns a new SV*.
+/* S_new_constant(): do any overload::constant lookup.
+
+   Either returns sv, or mortalizes/frees sv and returns a new SV*.
     Best used as sv=new_constant(..., sv, ...).
     If s, pv are NULL, calls subroutine with one argument,
     and <type> is used with error messages only.
@@ -9196,11 +9242,14 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool
      for (;;) {
          if (*d >= e)
              Perl_croak(aTHX_ "%s", ident_too_long);
-        if (isWORDCHAR(**s)) /* UTF handled below */
-            *(*d)++ = *(*s)++;
-        else if (is_utf8 && UTF8_IS_START(**s) && isWORDCHAR_utf8((U8*)*s)) {
+        if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+             /* The UTF-8 case must come first, otherwise things
+             * like c\N{COMBINING TILDE} would start failing, as the
+             * isWORDCHAR_A case below would gobble the 'c' up.
+             */
+
              char *t = *s + UTF8SKIP(*s);
-            while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
+            while (isIDCONT_utf8((U8*)t))
                  t += UTF8SKIP(t);
              if (*d + (t - *s) > e)
                  Perl_croak(aTHX_ "%s", ident_too_long);
@@ -9208,6 +9257,11 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool
              *d += t - *s;
              *s = t;
          }
+        else if ( isWORDCHAR_A(**s) ) {
+            do {
+                *(*d)++ = *(*s)++;
+            } while isWORDCHAR_A(**s);
+        }
          else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
              *(*d)++ = ':';
              *(*d)++ = ':';
@@ -9274,20 +9328,39 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
      *d = '\0';
      d = dest;
      if (*d) {
+        /* Either a digit variable, or parse_ident() found an identifier
+           (anything valid as a bareword), so job done and return.  */
         if (PL_lex_state != LEX_NORMAL)
             PL_lex_state = LEX_INTERPENDMAYBE;
         return s;
      }
      if (*s == '$' && s[1] &&
-       (isWORDCHAR_lazy_if(s+1,is_utf8) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+      (isIDFIRST_lazy_if(s+1,is_utf8)
+         || isDIGIT_A((U8)s[1])
+         || s[1] == '$'
+         || s[1] == '{'
+         || strnEQ(s+1,"::",2)) )
      {
+        /* Dereferencing a value in a scalar variable.
+           The alternatives are different syntaxes for a scalar variable.
+           Using ' as a leading package separator isn't allowed. :: is.   */
         return s;
      }
+    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
      if (*s == '{') {
         bracket = s;
         s++;
+       while (s < send && SPACE_OR_TAB(*s))
+          s++;
      }
-    if (s < send) {
+
+#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
+                                        || isCNTRL_A((U8)(d)) \
+                                        || isDIGIT_A((U8)(d)) \
+                                        || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+    if (s < send
+        && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+    {
          if (is_utf8) {
              const STRLEN skip = UTF8SKIP(s);
              STRLEN i;
@@ -9300,29 +9373,29 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
              d[1] = '\0';
          }
      }
+    /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
      if (*d == '^' && *s && isCONTROLVAR(*s)) {
         *d = toCTRL(*s);
         s++;
      }
+    /* Warn about ambiguous code after unary operators if {...} notation isn't
+       used.  There's no difference in ambiguity; it's merely a heuristic
+       about when not to warn.  */
      else if (ck_uni && !bracket)
         check_uni();
      if (bracket) {
-       if (isSPACE(s[-1])) {
-           while (s < send) {
-               const char ch = *s++;
-               if (!SPACE_OR_TAB(ch)) {
-                   *d = ch;
-                   break;
-               }
-           }
-       }
+        /* If we were processing {...} notation then...  */
         if (isIDFIRST_lazy_if(d,is_utf8)) {
+            /* if it starts as a valid identifier, assume that it is one.
+               (the later check for } being at the expected point will trap
+               cases where this doesn't pan out.)  */
          d += is_utf8 ? UTF8SKIP(d) : 1;
          parse_ident(&s, &d, e, 1, is_utf8);
             *d = '\0';
             while (s < send && SPACE_OR_TAB(*s))
                 s++;
             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+                /* ${foo[0]} and ${foo{bar}} notation.  */
                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
                     const char * const brack =
                         (const char *)
@@ -9340,7 +9413,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
         }
         /* Handle extended ${^Foo} variables
          * 1999-02-27 mjd-perl-patch@plover.com */
-       else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */
+       else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
                  && isWORDCHAR(*s))
         {
             d++;
@@ -9351,6 +9424,12 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
                 Perl_croak(aTHX_ "%s", ident_too_long);
             *d = '\0';
         }
+
+        while (s < send && SPACE_OR_TAB(*s))
+           s++;
+
+        /* Expect to find a closing } after consuming any trailing whitespace.
+         */
         if (*s == '}') {
             s++;
             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
@@ -9373,6 +9452,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
             }
         }
         else {
+            /* Didn't find the closing } at the point we expected, so restore
+               state such that the next thing to process is the opening { and */
             s = bracket;                /* let the parser handle it */
             *dest = '\0';
         }
@@ -9483,8 +9564,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
  {
      dVAR;
      PMOP *pm;
-    char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
-                       TRUE /* look for escaped bracketed metas */ );
+    char *s;
      const char * const valid_flags =
         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
      char charset = '\0';    /* character set modifier */
@@ -9494,9 +9574,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
  
      PERL_ARGS_ASSERT_SCAN_PAT;
  
-    /* this was only needed for the initial scan_str; set it to false
-     * so that any (?{}) code blocks etc are parsed normally */
-    PL_reg_state.re_reparsing = FALSE;
+    s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
+                       TRUE /* look for escaped bracketed metas */ );
+
      if (!s) {
         const char * const delimiter = skipspace(start);
         Perl_croak(aTHX_
@@ -9931,7 +10011,7 @@ S_scan_heredoc(pTHX_ char *s)
             /* shared is only null if we have gone beyond the outermost
                lexing scope.  In a file, we will have broken out of the
                loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition below will have
+              fer ends with "\n;", so the while condition above will have
                evaluated to false.  So shared can never be null. */
             assert(shared);
             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
@@ -9949,12 +10029,12 @@ S_scan_heredoc(pTHX_ char *s)
         linestr = shared->ls_linestr;
         bufend = SvEND(linestr);
         d = s;
-       while (s < bufend &&
-         (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
+       while (s < bufend - len + 1 &&
+          memNE(s,PL_tokenbuf,len) ) {
             if (*s++ == '\n')
                 ++shared->herelines;
         }
-       if (s >= bufend) {
+       if (s >= bufend - len + 1) {
             goto interminable;
         }
         sv_setpvn(tmpstr,d+1,s-d);
@@ -10035,8 +10115,11 @@ S_scan_heredoc(pTHX_ char *s)
         }
         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-           lex_grow_linestr(SvCUR(PL_linestr) + 2);
+            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+            /* ^That should be enough to avoid this needing to grow:  */
             sv_catpvs(PL_linestr, "\n\0");
+            assert(s == SvPVX(PL_linestr));
+            PL_bufend = SvEND(PL_linestr);
         }
         s = PL_bufptr;
  #ifdef PERL_MAD
@@ -10254,11 +10337,15 @@ intro_sym:
  
  
  /* scan_str
-   takes: start position in buffer
-         keep_quoted preserve \ on the embedded delimiter(s)
-         keep_delims preserve the delimiters around the string
-         re_reparse  compiling a run-time /(?{})/:
-                       collapse // to /,  and skip encoding src
+   takes:
+       start                   position in buffer
+       keep_quoted             preserve \ on the embedded delimiter(s)
+       keep_delims             preserve the delimiters around the string
+       re_reparse              compiling a run-time /(?{})/:
+                                  collapse // to /,  and skip encoding src
+       deprecate_escaped_meta  issue a deprecation warning for cer-
+                               tain paired metacharacters that appear
+                               escaped within it
     returns: position to continue reading from buffer
     side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
         updates the read buffer.
@@ -10300,9 +10387,7 @@ intro_sym:
  
  STATIC char *
  S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
-        bool deprecate_escaped_meta /* Should we issue a deprecation warning
-                                       for certain paired metacharacters that
-                                       appear escaped within it */
+                bool deprecate_escaped_meta
      )
  {
      dVAR;
@@ -10402,8 +10487,49 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                 int offset = s - SvPVX_const(PL_linestr);
                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
                                            &offset, (char*)termstr, termlen);
-               const char * const ns = SvPVX_const(PL_linestr) + offset;
-               char * const svlast = SvEND(sv) - 1;
+               const char *ns;
+               char *svlast;
+
+               if (SvIsCOW(PL_linestr)) {
+                   STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
+                   STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
+                   STRLEN last_lop_pos, re_eval_start_pos, s_pos;
+                   char *buf = SvPVX(PL_linestr);
+                   bufend_pos = PL_parser->bufend - buf;
+                   bufptr_pos = PL_parser->bufptr - buf;
+                   oldbufptr_pos = PL_parser->oldbufptr - buf;
+                   oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+                   linestart_pos = PL_parser->linestart - buf;
+                   last_uni_pos = PL_parser->last_uni
+                       ? PL_parser->last_uni - buf
+                       : 0;
+                   last_lop_pos = PL_parser->last_lop
+                       ? PL_parser->last_lop - buf
+                       : 0;
+                   re_eval_start_pos =
+                       PL_parser->lex_shared->re_eval_start ?
+                            PL_parser->lex_shared->re_eval_start - buf : 0;
+                   s_pos = s - buf;
+
+                   sv_force_normal(PL_linestr);
+
+                   buf = SvPVX(PL_linestr);
+                   PL_parser->bufend = buf + bufend_pos;
+                   PL_parser->bufptr = buf + bufptr_pos;
+                   PL_parser->oldbufptr = buf + oldbufptr_pos;
+                   PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+                   PL_parser->linestart = buf + linestart_pos;
+                   if (PL_parser->last_uni)
+                       PL_parser->last_uni = buf + last_uni_pos;
+                   if (PL_parser->last_lop)
+                       PL_parser->last_lop = buf + last_lop_pos;
+                   if (PL_parser->lex_shared->re_eval_start)
+                       PL_parser->lex_shared->re_eval_start  =
+                           buf + re_eval_start_pos;
+                   s = buf + s_pos;
+               }
+               ns = SvPVX_const(PL_linestr) + offset;
+               svlast = SvEND(sv) - 1;
  
                 for (; s < ns; s++) {
                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
@@ -10524,26 +10650,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                           * context where the delimiter is also a metacharacter,
                           * the backslash is useless, and deprecated.  () and []
                           * are meta in any context. {} are meta only when
-                         * appearing in a quantifier or in things like '\p{'.
-                         * They also aren't meta unless there is a matching
-                         * closed, escaped char later on within the string.
-                         * If 's' points to an open, set a flag; if to a close,
-                         * test that flag, and raise a warning if it was set */
+                         * appearing in a quantifier or in things like '\p{'
+                         * (but '\\p{' isn't meta).  They also aren't meta
+                         * unless there is a matching closed, escaped char
+                         * later on within the string.  If 's' points to an
+                         * open, set a flag; if to a close, test that flag, and
+                         * raise a warning if it was set */
  
                         if (deprecate_escaped_meta) {
                              if (*s == PL_multi_open) {
                                  if (*s != '{') {
                                      escaped_open = s;
                                  }
-                                else if (regcurly(s,
-                                                  TRUE /* Look for a closing
-                                                          '\}' */)
-                                         || (s - start > 2  /* Look for e.g.
-                                                               '\x{' */
-                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
-                                {
+                                     /* Look for a closing '\}' */
+                                else if (regcurly(s, TRUE)) {
                                      escaped_open = s;
                                  }
+                                     /* Look for e.g.  '\x{' */
+                                else if (s - start > 2
+                                         && _generic_isCC(*(s-2),
+                                             _CC_BACKSLASH_FOO_LBRACE_IS_META))
+                                { /* Exclude '\\x', '\\\\x', etc. */
+                                    char *lookbehind = s - 4;
+                                    bool is_meta = TRUE;
+                                    while (lookbehind >= start
+                                           && *lookbehind == '\\')
+                                    {
+                                        is_meta = ! is_meta;
+                                        lookbehind--;
+                                    }
+                                    if (is_meta) {
+                                        escaped_open = s;
+                                    }
+                                }
                              }
                              else if (escaped_open) {
                                  Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
@@ -11325,9 +11464,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
          OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
      if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
-                            SVfARG(newSVpvn_flags(context, contlen,
-                                        SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+       Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+                            UTF8fARG(UTF, contlen, context));
      else
         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
      if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
@@ -11679,7 +11817,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
             /* Append native character for the rev point */
             tmpend = uvchr_to_utf8(tmpbuf, rev);
             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-           if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+           if (!NATIVE_IS_INVARIANT(rev))
                  SvUTF8_on(sv);
             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
                  s = ++pos;