Change Constant(undef) error to something meaningful

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 0a74efe..423bebc 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -66,7 +66,6 @@ Individual members of C<PL_parser> have their own documentation.
  #define PL_multi_start         (PL_parser->multi_start)
  #define PL_multi_open          (PL_parser->multi_open)
  #define PL_multi_close         (PL_parser->multi_close)
-#define PL_pending_ident        (PL_parser->pending_ident)
  #define PL_preambled           (PL_parser->preambled)
  #define PL_sublex_info         (PL_parser->sublex_info)
  #define PL_linestr             (PL_parser->linestr)
@@ -111,11 +110,6 @@ Individual members of C<PL_parser> have their own documentation.
  #  define PL_nextval           (PL_parser->nextval)
  #endif
  
-/* This can't be done with embed.fnc, because struct yy_parser contains a
-   member named pending_ident, which clashes with the generated #define  */
-static int
-S_pending_ident(pTHX);
-
  static const char ident_too_long[] = "Identifier too long";
  
  #ifdef PERL_MAD
@@ -364,7 +358,7 @@ static struct debug_tokens {
      { GIVEN,           TOKENTYPE_IVAL,         "GIVEN" },
      { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
      { IF,              TOKENTYPE_IVAL,         "IF" },
-    { LABEL,           TOKENTYPE_OPVAL,        "LABEL" },
+    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
      { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
      { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
      { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
@@ -373,7 +367,6 @@ static struct debug_tokens {
      { METHOD,          TOKENTYPE_OPVAL,        "METHOD" },
      { MULOP,           TOKENTYPE_OPNUM,        "MULOP" },
      { MY,              TOKENTYPE_IVAL,         "MY" },
-    { MYSUB,           TOKENTYPE_NONE,         "MYSUB" },
      { NOAMP,           TOKENTYPE_NONE,         "NOAMP" },
      { NOTOP,           TOKENTYPE_NONE,         "NOTOP" },
      { OROP,            TOKENTYPE_IVAL,         "OROP" },
@@ -433,7 +426,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
         }
         if (name)
             Perl_sv_catpv(aTHX_ report, name);
-       else if ((char)rv > ' ' && (char)rv < '~')
+       else if ((char)rv > ' ' && (char)rv <= '~')
             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
         else if (!rv)
             sv_catpvs(report, "EOF");
@@ -791,6 +784,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
         PerlIO_close(parser->rsfp);
      SvREFCNT_dec(parser->rsfp_filters);
+    SvREFCNT_dec(parser->lex_stuff);
+    SvREFCNT_dec(parser->sublex_info.repl);
  
      Safefree(parser->lex_brackstack);
      Safefree(parser->lex_casestack);
@@ -799,6 +794,37 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
      Safefree(parser);
  }
  
+void
+Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
+{
+#ifdef PERL_MAD
+    I32 nexttoke = parser->lasttoke;
+#else
+    I32 nexttoke = parser->nexttoke;
+#endif
+    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
+    while (nexttoke--) {
+#ifdef PERL_MAD
+       if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
+                               & 0xffff)
+        && parser->nexttoke[nexttoke].next_val.opval
+        && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
+        && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
+               op_free(parser->nexttoke[nexttoke].next_val.opval);
+               parser->nexttoke[nexttoke].next_val.opval = NULL;
+       }
+#else
+       if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
+        && parser->nextval[nexttoke].opval
+        && parser->nextval[nexttoke].opval->op_slabbed
+        && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
+           op_free(parser->nextval[nexttoke].opval);
+           parser->nextval[nexttoke].opval = NULL;
+       }
+#endif
+    }
+}
+
  
  /*
  =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
@@ -930,8 +956,8 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
      linestart_pos = PL_parser->linestart - buf;
      last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
      last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
-    re_eval_start_pos = PL_sublex_info.re_eval_start ?
-                            PL_sublex_info.re_eval_start - buf : 0;
+    re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+                            PL_parser->lex_shared->re_eval_start - buf : 0;
  
      buf = sv_grow(linestr, len);
  
@@ -944,8 +970,8 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
         PL_parser->last_uni = buf + last_uni_pos;
      if (PL_parser->last_lop)
         PL_parser->last_lop = buf + last_lop_pos;
-    if (PL_sublex_info.re_eval_start)
-        PL_sublex_info.re_eval_start  = buf + re_eval_start_pos;
+    if (PL_parser->lex_shared->re_eval_start)
+        PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
      return buf;
  }
  
@@ -983,10 +1009,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
         if (flags & LEX_STUFF_UTF8) {
             goto plain_copy;
         } else {
-           STRLEN highhalf = 0;
+           STRLEN highhalf = 0;    /* Count of variants */
             const char *p, *e = pv+len;
-           for (p = pv; p != e; p++)
-               highhalf += !!(((U8)*p) & 0x80);
+           for (p = pv; p != e; p++) {
+               if (! UTF8_IS_INVARIANT(*p)) {
+                    highhalf++;
+                }
+            }
             if (!highhalf)
                 goto plain_copy;
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
@@ -997,9 +1026,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             PL_parser->bufend += len+highhalf;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
-               if (c & 0x80) {
-                   *bufptr++ = (char)(0xc0 | (c >> 6));
-                   *bufptr++ = (char)(0x80 | (c & 0x3f));
+               if (! UTF8_IS_INVARIANT(c)) {
+                   *bufptr++ = UTF8_TWO_BYTE_HI(c);
+                   *bufptr++ = UTF8_TWO_BYTE_LO(c);
                 } else {
                     *bufptr++ = (char)c;
                 }
@@ -1011,14 +1040,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             const char *p, *e = pv+len;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
-               if (c >= 0xc4) {
+               if (UTF8_IS_ABOVE_LATIN1(c)) {
                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
                                 "non-Latin-1 character into Latin-1 input");
-               } else if (c >= 0xc2 && p+1 != e &&
-                           (((U8)p[1]) & 0xc0) == 0x80) {
+               } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                     p++;
                     highhalf++;
-               } else if (c >= 0x80) {
+               } else if (! UTF8_IS_INVARIANT(c)) {
                     /* malformed UTF-8 */
                     ENTER;
                     SAVESPTR(PL_warnhook);
@@ -1035,17 +1063,20 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             SvCUR_set(PL_parser->linestr,
                 SvCUR(PL_parser->linestr) + len-highhalf);
             PL_parser->bufend += len-highhalf;
-           for (p = pv; p != e; p++) {
-               U8 c = (U8)*p;
-               if (c & 0x80) {
-                   *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
-                   p++;
-               } else {
-                   *bufptr++ = (char)c;
+           p = pv;
+           while (p < e) {
+               if (UTF8_IS_INVARIANT(*p)) {
+                   *bufptr++ = *p;
+                    p++;
                 }
+               else {
+                    assert(p < e -1 );
+                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   p += 2;
+                }
             }
         } else {
-           plain_copy:
+         plain_copy:
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
             bufptr = PL_parser->bufptr;
             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
@@ -1393,10 +1424,10 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
             bufend = PL_parser->bufend;
         }
         head = (U8)*s;
-       if (!(head & 0x80))
+       if (UTF8_IS_INVARIANT(head))
             return head;
-       if (head & 0x40) {
-           len = PL_utf8skip[head];
+       if (UTF8_IS_START(head)) {
+           len = UTF8SKIP(&head);
             while ((STRLEN)(bufend-s) < len) {
                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
                     break;
@@ -1566,6 +1597,12 @@ S_incline(pTHX_ const char *s)
      PERL_ARGS_ASSERT_INCLINE;
  
      COPLINE_INC_WITH_HERELINES;
+    if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
+     && s+1 == PL_bufend && *s == ';') {
+       /* fake newline in string eval */
+       CopLINE_dec(PL_curcop);
+       return;
+    }
      if (*s++ != '#')
         return;
      while (SPACE_OR_TAB(*s))
@@ -1698,7 +1735,7 @@ S_incline(pTHX_ const char *s)
  /* skip space before PL_thistoken */
  
  STATIC char *
-S_skipspace0(pTHX_ register char *s)
+S_skipspace0(pTHX_ char *s)
  {
      PERL_ARGS_ASSERT_SKIPSPACE0;
  
@@ -1719,7 +1756,7 @@ S_skipspace0(pTHX_ register char *s)
  /* skip space after PL_thistoken */
  
  STATIC char *
-S_skipspace1(pTHX_ register char *s)
+S_skipspace1(pTHX_ char *s)
  {
      const char *start = s;
      I32 startoff = start - SvPVX(PL_linestr);
@@ -1746,7 +1783,7 @@ S_skipspace1(pTHX_ register char *s)
  }
  
  STATIC char *
-S_skipspace2(pTHX_ register char *s, SV **svp)
+S_skipspace2(pTHX_ char *s, SV **svp)
  {
      char *start;
      const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
@@ -1799,7 +1836,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
   */
  
  STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ char *s)
  {
  #ifdef PERL_MAD
      char *start = s;
@@ -2001,11 +2038,6 @@ S_force_next(pTHX_ I32 type)
         tokereport(type, &NEXTVAL_NEXTTOKE);
      }
  #endif
-    /* Don’t let opslab_force_free snatch it */
-    if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
-       assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
-       NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
-    }  
  #ifdef PERL_MAD
      if (PL_curforce < 0)
         start_force(PL_lasttoke);
@@ -2078,7 +2110,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
   */
  
  STATIC char *
-S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
  {
      dVAR;
      char *s;
@@ -2127,14 +2159,14 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
   */
  
  STATIC void
-S_force_ident(pTHX_ register const char *s, int kind)
+S_force_ident(pTHX_ const char *s, int kind)
  {
      dVAR;
  
      PERL_ARGS_ASSERT_FORCE_IDENT;
  
-    if (*s) {
-       const STRLEN len = strlen(s);
+    if (s[0]) {
+       const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
                                                                  UTF ? SVf_UTF8 : 0));
         start_force(PL_curforce);
@@ -2157,6 +2189,14 @@ S_force_ident(pTHX_ register const char *s, int kind)
      }
  }
  
+static void
+S_force_ident_maybe_lex(pTHX_ char pit)
+{
+    start_force(PL_curforce);
+    NEXTVAL_NEXTTOKE.ival = pit;
+    force_next('p');
+}
+
  NV
  Perl_str_to_version(pTHX_ SV *sv)
  {
@@ -2471,8 +2511,6 @@ S_sublex_push(pTHX)
      SAVEI32(PL_lex_starts);
      SAVEI8(PL_lex_state);
      SAVESPTR(PL_lex_repl);
-    SAVEPPTR(PL_sublex_info.re_eval_start);
-    SAVESPTR(PL_sublex_info.re_eval_str);
      SAVEVPTR(PL_lex_inpat);
      SAVEI16(PL_lex_inwhat);
      SAVECOPLINE(PL_curcop);
@@ -2499,8 +2537,6 @@ S_sublex_push(pTHX)
      PL_lex_repl = PL_sublex_info.repl;
      PL_lex_stuff = NULL;
      PL_sublex_info.repl = NULL;
-    PL_sublex_info.re_eval_start = NULL;
-    PL_sublex_info.re_eval_str = NULL;
  
      PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
         = SvPVX(PL_linestr);
@@ -2611,6 +2647,188 @@ S_sublex_done(pTHX)
      }
  }
  
+PERL_STATIC_INLINE SV*
+S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+{
+    /* <s> points to first character of interior of \N{}, <e> to one beyond the
+     * interior, hence to the "}".  Finds what the name resolves to, returning
+     * an SV* containing it; NULL if no valid one found */
+
+    SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+
+    HV * table;
+    SV **cvp;
+    SV *cv;
+    SV *rv;
+    HV *stash;
+    const U8* first_bad_char_loc;
+    const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+
+    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+
+    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
+                                     e - backslash_ptr,
+                                     &first_bad_char_loc))
+    {
+        /* If warnings are on, this will print a more detailed analysis of what
+         * is wrong than the error message below */
+        utf8n_to_uvuni(first_bad_char_loc,
+                       e - ((char *) first_bad_char_loc),
+                       NULL, 0);
+
+        /* We deliberately don't try to print the malformed character, which
+         * might not print very well; it also may be just the first of many
+         * malformations, so don't print what comes after it */
+        yyerror(Perl_form(aTHX_
+            "Malformed UTF-8 character immediately after '%.*s'",
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+       return NULL;
+    }
+
+    res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
+                        /* include the <}> */
+                        e - backslash_ptr + 1);
+    if (! SvPOK(res)) {
+        return NULL;
+    }
+
+    /* See if the charnames handler is the Perl core's, and if so, we can skip
+     * the validation needed for a user-supplied one, as Perl's does its own
+     * validation. */
+    table = GvHV(PL_hintgv);            /* ^H */
+    cvp = hv_fetchs(table, "charnames", FALSE);
+    if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
+        && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
+    {
+        const char * const name = HvNAME(stash);
+        if strEQ(name, "_charnames") {
+           return res;
+       }
+    }
+
+    /* Here, it isn't Perl's charname handler.  We can't rely on a
+     * user-supplied handler to validate the input name.  For non-ut8 input,
+     * look to see that the first character is legal.  Then loop through the
+     * rest checking that each is a continuation */
+
+    /* This code needs to be sync'ed with a regex in _charnames.pm which does
+     * the same thing */
+
+    if (! UTF) {
+        if (! isALPHAU(*s)) {
+            goto bad_charname;
+        }
+        s++;
+        while (s < e) {
+            if (! isCHARNAME_CONT(*s)) {
+                goto bad_charname;
+            }
+            s++;
+        }
+    }
+    else {
+        /* Similarly for utf8.  For invariants can check directly; for other
+         * Latin1, can calculate their code point and check; otherwise  use a
+         * swash */
+        if (UTF8_IS_INVARIANT(*s)) {
+            if (! isALPHAU(*s)) {
+                goto bad_charname;
+            }
+            s++;
+        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+                goto bad_charname;
+            }
+            s += 2;
+        }
+        else {
+            if (! PL_utf8_charname_begin) {
+                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                PL_utf8_charname_begin = _core_swash_init("utf8",
+                                                        "_Perl_Charname_Begin",
+                                                        &PL_sv_undef,
+                                                        1, 0, NULL, &flags);
+            }
+            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+                goto bad_charname;
+            }
+            s += UTF8SKIP(s);
+        }
+
+        while (s < e) {
+            if (UTF8_IS_INVARIANT(*s)) {
+                if (! isCHARNAME_CONT(*s)) {
+                    goto bad_charname;
+                }
+                s++;
+            }
+            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
+                                                                    *(s+1)))))
+                {
+                    goto bad_charname;
+                }
+                s += 2;
+            }
+            else {
+                if (! PL_utf8_charname_continue) {
+                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                    PL_utf8_charname_continue = _core_swash_init("utf8",
+                                                "_Perl_Charname_Continue",
+                                                &PL_sv_undef,
+                                                1, 0, NULL, &flags);
+                }
+                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                    goto bad_charname;
+                }
+                s += UTF8SKIP(s);
+            }
+        }
+    }
+
+    if (SvUTF8(res)) { /* Don't accept malformed input */
+        const U8* first_bad_char_loc;
+        STRLEN len;
+        const char* const str = SvPV_const(res, len);
+        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+            /* If warnings are on, this will print a more detailed analysis of
+             * what is wrong than the error message below */
+            utf8n_to_uvuni(first_bad_char_loc,
+                           (char *) first_bad_char_loc - str,
+                           NULL, 0);
+
+            /* We deliberately don't try to print the malformed character,
+             * which might not print very well; it also may be just the first
+             * of many malformations, so don't print what comes after it */
+            yyerror_pv(
+              Perl_form(aTHX_
+                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
+                 (int) (e - backslash_ptr + 1), backslash_ptr,
+                 (int) ((char *) first_bad_char_loc - str), str
+              ),
+              SVf_UTF8);
+            return NULL;
+        }
+    }
+
+    return res;
+
+  bad_charname: {
+        int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
+
+        /* The final %.*s makes sure that should the trailing NUL be missing
+         * that this print won't run off the end of the string */
+        yyerror_pv(
+          Perl_form(aTHX_
+            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
+            (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
+            (int)(e - s + bad_char_size), s + bad_char_size
+          ),
+          UTF ? SVf_UTF8 : 0);
+        return NULL;
+    }
+}
+
  /*
    scan_const
  
@@ -2718,6 +2936,7 @@ S_scan_const(pTHX_ char *start)
                                                    isn't utf8, as for example
                                                    when it is entirely composed
                                                    of hex constants */
+    SV *res;                           /* result from charnames */
  
      /* Note on sizing:  The scanned constant is placed into sv, which is
       * initialized by newSV() assuming one byte of output for every byte of
@@ -2746,6 +2965,9 @@ S_scan_const(pTHX_ char *start)
         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
      }
  
+    /* Protect sv from errors and fatal warnings. */
+    ENTER_with_name("scan_const");
+    SAVEFREESV(sv);
  
      while (s < send || dorange) {
  
@@ -2765,7 +2987,7 @@ S_scan_const(pTHX_ char *start)
  #ifdef EBCDIC
                     && !native_range
  #endif
-                   ) {
+                ) {
                     char * const c = (char*)utf8_hop((U8*)d, -1);
                     char *e = d++;
                     while (e-- > c)
@@ -2817,7 +3039,6 @@ S_scan_const(pTHX_ char *start)
  #endif
  
                  if (min > max) {
-                   SvREFCNT_dec(sv);
                     Perl_croak(aTHX_
                                "Invalid range \"%c-%c\" in transliteration operator",
                                (char)min, (char)max);
@@ -2876,7 +3097,6 @@ S_scan_const(pTHX_ char *start)
             /* range begins (ignore - as first or last char) */
             else if (*s == '-' && s+1 < send  && s != start) {
                 if (didrange) {
-                   SvREFCNT_dec(sv);
                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
                 }
                 if (has_utf8
@@ -3181,31 +3401,6 @@ S_scan_const(pTHX_ char *start)
  
                 /* Here it looks like a named character */
  
-               if (PL_lex_inpat) {
-
-                   /* XXX This block is temporary code.  \N{} implies that the
-                    * pattern is to have Unicode semantics, and therefore
-                    * currently has to be encoded in utf8.  By putting it in
-                    * utf8 now, we save a whole pass in the regular expression
-                    * compiler.  Once that code is changed so Unicode
-                    * semantics doesn't necessarily have to be in utf8, this
-                    * block should be removed.  However, the code that parses
-                    * the output of this would have to be changed to not
-                    * necessarily expect utf8 */
-                   if (!has_utf8) {
-                       SvCUR_set(sv, d - SvPVX_const(sv));
-                       SvPOK_on(sv);
-                       *d = '\0';
-                       /* See Note on sizing above.  */
-                       sv_utf8_upgrade_flags_grow(sv,
-                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                       /* 5 = '\N{' + cur char + NUL */
-                                       (STRLEN)(send - s) + 5);
-                       d = SvPVX(sv) + SvCUR(sv);
-                       has_utf8 = TRUE;
-                   }
-               }
-
                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
                                 | PERL_SCAN_DISALLOW_PREFIX;
@@ -3267,32 +3462,12 @@ S_scan_const(pTHX_ char *start)
                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
                     }
                 }
-               else { /* Here is \N{NAME} but not \N{U+...}. */
-
-                   SV *res;            /* result from charnames */
-                   const char *str;    /* the string in 'res' */
-                   STRLEN len;         /* its length */
-
-                   /* Get the value for NAME */
-                   res = newSVpvn(s, e - s);
-                   res = new_constant( NULL, 0, "charnames",
-                                       /* includes all of: \N{...} */
-                                       res, NULL, s - 3, e - s + 4 );
-
-                   /* Most likely res will be in utf8 already since the
-                    * standard charnames uses pack U, but a custom translator
-                    * can leave it otherwise, so make sure.  XXX This can be
-                    * revisited to not have charnames use utf8 for characters
-                    * that don't need it when regexes don't have to be in utf8
-                    * for Unicode semantics.  If doing so, remember EBCDIC */
-                   sv_utf8_upgrade(res);
-                   str = SvPV_const(res, len);
-
-                   /* Don't accept malformed input */
-                   if (! is_utf8_string((U8 *) str, len)) {
-                       yyerror("Malformed UTF-8 returned by \\N");
-                   }
-                   else if (PL_lex_inpat) {
+               else /* Here is \N{NAME} but not \N{U+...}. */
+                     if ((res = get_and_check_backslash_N_name(s, e)))
+                {
+                    STRLEN len;
+                    const char *str = SvPV_const(res, len);
+                    if (PL_lex_inpat) {
  
                         if (! len) { /* The name resolved to an empty string */
                             Copy("\\N{}", d, 4, char);
@@ -3306,73 +3481,88 @@ S_scan_const(pTHX_ char *start)
                             * returned by charnames */
  
                             const char *str_end = str + len;
-                           STRLEN char_length;     /* cur char's byte length */
-                           STRLEN output_length;   /* and the number of bytes
-                                                      after this is translated
-                                                      into hex digits */
                             const STRLEN off = d - SvPVX_const(sv);
  
-                           /* 2 hex per byte; 2 chars for '\N'; 2 chars for
-                            * max('U+', '.'); and 1 for NUL */
-                           char hex_string[2 * UTF8_MAXBYTES + 5];
-
-                           /* Get the first character of the result. */
-                           U32 uv = utf8n_to_uvuni((U8 *) str,
-                                                   len,
-                                                   &char_length,
-                                                   UTF8_ALLOW_ANYUV);
-
-                           /* The call to is_utf8_string() above hopefully
-                            * guarantees that there won't be an error.  But
-                            * it's easy here to make sure.  The function just
-                            * above warns and returns 0 if invalid utf8, but
-                            * it can also return 0 if the input is validly a
-                            * NUL. Disambiguate */
-                           if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-                               uv = UNICODE_REPLACEMENT;
-                           }
-
-                           /* Convert first code point to hex, including the
-                            * boiler plate before it.  For all these, we
-                            * convert to native format so that downstream code
-                            * can continue to assume the input is native */
-                           output_length =
-                               my_snprintf(hex_string, sizeof(hex_string),
-                                           "\\N{U+%X",
-                                           (unsigned int) UNI_TO_NATIVE(uv));
-
-                           /* Make sure there is enough space to hold it */
-                           d = off + SvGROW(sv, off
-                                                + output_length
-                                                + (STRLEN)(send - e)
-                                                + 2);  /* '}' + NUL */
-                           /* And output it */
-                           Copy(hex_string, d, output_length, char);
-                           d += output_length;
-
-                           /* For each subsequent character, append dot and
-                            * its ordinal in hex */
-                           while ((str += char_length) < str_end) {
-                               const STRLEN off = d - SvPVX_const(sv);
-                               U32 uv = utf8n_to_uvuni((U8 *) str,
-                                                       str_end - str,
-                                                       &char_length,
-                                                       UTF8_ALLOW_ANYUV);
-                               if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-                                   uv = UNICODE_REPLACEMENT;
-                               }
-
-                               output_length =
-                                   my_snprintf(hex_string, sizeof(hex_string),
-                                           ".%X",
-                                           (unsigned int) UNI_TO_NATIVE(uv));
-
-                               d = off + SvGROW(sv, off
-                                                    + output_length
-                                                    + (STRLEN)(send - e)
-                                                    + 2);      /* '}' +  NUL */
-                               Copy(hex_string, d, output_length, char);
-                               d += output_length;
+                            if (! SvUTF8(res)) {
+                                /* For the non-UTF-8 case, we can determine the
+                                 * exact length needed without having to parse
+                                 * through the string.  Each character takes up
+                                 * 2 hex digits plus either a trailing dot or
+                                 * the "}" */
+                                d = off + SvGROW(sv, off
+                                                    + 3 * len
+                                                    + 6 /* For the "\N{U+", and
+                                                           trailing NUL */
+                                                    + (STRLEN)(send - e));
+                                Copy("\\N{U+", d, 5, char);
+                                d += 5;
+                                while (str < str_end) {
+                                    char hex_string[4];
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                                "%02X.", (U8) *str);
+                                    Copy(hex_string, d, 3, char);
+                                    d += 3;
+                                    str++;
+                                }
+                                d--;    /* We will overwrite below the final
+                                           dot with a right brace */
+                            }
+                            else {
+                                STRLEN char_length; /* cur char's byte length */
+
+                                /* and the number of bytes after this is
+                                 * translated into hex digits */
+                                STRLEN output_length;
+
+                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
+                                 * for max('U+', '.'); and 1 for NUL */
+                                char hex_string[2 * UTF8_MAXBYTES + 5];
+
+                                /* Get the first character of the result. */
+                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                        len,
+                                                        &char_length,
+                                                        UTF8_ALLOW_ANYUV);
+                                /* Convert first code point to hex, including
+                                 * the boiler plate before it.  For all these,
+                                 * we convert to native format so that
+                                 * downstream code can continue to assume the
+                                 * input is native */
+                                output_length =
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                            "\\N{U+%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                /* Make sure there is enough space to hold it */
+                                d = off + SvGROW(sv, off
+                                                    + output_length
+                                                    + (STRLEN)(send - e)
+                                                    + 2);      /* '}' + NUL */
+                                /* And output it */
+                                Copy(hex_string, d, output_length, char);
+                                d += output_length;
+
+                                /* For each subsequent character, append dot and
+                                * its ordinal in hex */
+                                while ((str += char_length) < str_end) {
+                                    const STRLEN off = d - SvPVX_const(sv);
+                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                            str_end - str,
+                                                            &char_length,
+                                                            UTF8_ALLOW_ANYUV);
+                                    output_length =
+                                        my_snprintf(hex_string,
+                                            sizeof(hex_string),
+                                            ".%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                    d = off + SvGROW(sv, off
+                                                        + output_length
+                                                        + (STRLEN)(send - e)
+                                                        + 2);  /* '}' +  NUL */
+                                    Copy(hex_string, d, output_length, char);
+                                    d += output_length;
+                                }
                             }
  
                             *d++ = '}'; /* Done.  Add the trailing brace */
@@ -3405,68 +3595,9 @@ S_scan_const(pTHX_ char *start)
                         Copy(str, d, len, char);
                         d += len;
                     }
+
                     SvREFCNT_dec(res);
  
-                   /* Deprecate non-approved name syntax */
-                   if (ckWARN_d(WARN_DEPRECATED)) {
-                       bool problematic = FALSE;
-                       char* i = s;
-
-                       /* For non-ut8 input, look to see that the first
-                        * character is an alpha, then loop through the rest
-                        * checking that each is a continuation */
-                       if (! this_utf8) {
-                           if (! isALPHAU(*i)) problematic = TRUE;
-                           else for (i = s + 1; i < e; i++) {
-                               if (isCHARNAME_CONT(*i)) continue;
-                               problematic = TRUE;
-                               break;
-                           }
-                       }
-                       else {
-                           /* Similarly for utf8.  For invariants can check
-                            * directly.  We accept anything above the latin1
-                            * range because it is immaterial to Perl if it is
-                            * correct or not, and is expensive to check.  But
-                            * it is fairly easy in the latin1 range to convert
-                            * the variants into a single character and check
-                            * those */
-                           if (UTF8_IS_INVARIANT(*i)) {
-                               if (! isALPHAU(*i)) problematic = TRUE;
-                           } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
-                               if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
-                                                                           *(i+1)))))
-                               {
-                                   problematic = TRUE;
-                               }
-                           }
-                           if (! problematic) for (i = s + UTF8SKIP(s);
-                                                   i < e;
-                                                   i+= UTF8SKIP(i))
-                           {
-                               if (UTF8_IS_INVARIANT(*i)) {
-                                   if (isCHARNAME_CONT(*i)) continue;
-                               } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
-                                   continue;
-                               } else if (isCHARNAME_CONT(
-                                           UNI_TO_NATIVE(
-                                           TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
-                               {
-                                   continue;
-                               }
-                               problematic = TRUE;
-                               break;
-                           }
-                       }
-                       if (problematic) {
-                           /* The e-i passed to the final %.*s makes sure that
-                            * should the trailing NUL be missing that this
-                            * print won't run off the end of the string */
-                           Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                       "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
-                                       (int)(i - s + 1), s, (int)(e - i), i + 1);
-                       }
-                   }
                 } /* End \N{NAME} */
  #ifdef EBCDIC
                 if (!dorange) 
@@ -3592,6 +3723,7 @@ S_scan_const(pTHX_ char *start)
  
      /* return the substring (via pl_yylval) only if we parsed anything */
      if (s > PL_bufptr) {
+       SvREFCNT_inc_simple_void_NN(sv);
         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
             const char *const key = PL_lex_inpat ? "qr" : "q";
             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
@@ -3616,8 +3748,8 @@ S_scan_const(pTHX_ char *start)
                                 type, typelen);
         }
         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
-    } else
-       SvREFCNT_dec(sv);
+    }
+    LEAVE_with_name("scan_const");
      return s;
  }
  
@@ -3643,7 +3775,7 @@ S_scan_const(pTHX_ char *start)
  /* This is the one truly awful dwimmer necessary to conflate C and sed. */
  
  STATIC int
-S_intuit_more(pTHX_ register char *s)
+S_intuit_more(pTHX_ char *s)
  {
      dVAR;
  
@@ -4092,7 +4224,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
  }
  
  STATIC char *
-S_filter_gets(pTHX_ register SV *sv, STRLEN append)
+S_filter_gets(pTHX_ SV *sv, STRLEN append)
  {
      dVAR;
  
@@ -4187,10 +4319,6 @@ Perl_madlex(pTHX)
      PL_thiswhite = 0;
      PL_thismad = 0;
  
-    /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
-    if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
-        return S_pending_ident(aTHX);
-
      /* previous token ate up our whitespace? */
      if (!PL_lasttoke && PL_nextwhite) {
         PL_thiswhite = PL_nextwhite;
@@ -4277,7 +4405,6 @@ Perl_madlex(pTHX)
      case FUNC0SUB:
      case UNIOPSUB:
      case LSTOPSUB:
-    case LABEL:
         if (pl_yylval.opval)
             append_madprops(PL_thismad, pl_yylval.opval, 0);
         PL_thismad = 0;
@@ -4292,6 +4419,10 @@ Perl_madlex(pTHX)
         }
         break;
  
+    /* pval */
+    case LABEL:
+       break;
+
      case ']':
      case '}':
         if (PL_faketokens)
@@ -4407,21 +4538,40 @@ S_word_takes_any_delimeter(char *p, STRLEN len)
    stitching them into a tree.
  
    Returns:
-    PRIVATEREF
+    The type of the next token
  
    Structure:
-      if read an identifier
-          if we're in a my declaration
-             croak if they tried to say my($foo::bar)
-             build the ops for a my() declaration
-         if it's an access to a my() variable
-             are we in a sort block?
-                 croak if my($a); $a <=> $b
-             build ops for access to a my() variable
-         if in a dq string, and they've said @foo and we can't find @foo
-             croak
-         build ops for a bareword
-      if we already built the token before, use it.
+      Switch based on the current state:
+         - if we already built the token before, use it
+         - if we have a case modifier in a string, deal with that
+         - handle other cases of interpolation inside a string
+         - scan the next line if we are inside a format
+      In the normal state switch on the next character:
+         - default:
+           if alphabetic, go to key lookup
+           unrecoginized character - croak
+         - 0/4/26: handle end-of-line or EOF
+         - cases for whitespace
+         - \n and #: handle comments and line numbers
+         - various operators, brackets and sigils
+         - numbers
+         - quotes
+         - 'v': vstrings (or go to key lookup)
+         - 'x' repetition operator (or go to key lookup)
+         - other ASCII alphanumerics (key lookup begins here):
+             word before => ?
+             keyword plugin
+             scan built-in keyword (but do nothing with it yet)
+             check for statement label
+             check for lexical subs
+                 goto just_a_word if there is one
+             see whether built-in keyword is overridden
+             switch on keyword number:
+                 - default: just_a_word:
+                     not a built-in keyword; handle bareword lookup
+                     disambiguate between method and sub call
+                     fall back to bareword
+                 - cases for built-in keywords
  */
  
  
@@ -4455,11 +4605,6 @@ Perl_yylex(pTHX)
             pv_display(tmp, s, strlen(s), 0, 60));
         SvREFCNT_dec(tmp);
      } );
-    /* check if there's an identifier for us to look at */
-    if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
-        return REPORT(S_pending_ident(aTHX));
-
-    /* no identifier pending identification */
  
      switch (PL_lex_state) {
  #ifdef COMMENTARY
@@ -4519,9 +4664,7 @@ Perl_yylex(pTHX)
                     PL_lex_allbrackets--;
                 next_type &= 0xffff;
             }
-           if (S_is_opval_token(next_type) && pl_yylval.opval)
-               pl_yylval.opval->op_savefree = 0; /* release */
-           return REPORT(next_type);
+           return REPORT(next_type == 'p' ? pending_ident() : next_type);
         }
  
      /* interpolated case modifiers like \L \U, including \Q and \E.
@@ -4562,9 +4705,11 @@ Perl_yylex(pTHX)
  #ifdef PERL_MAD
             while (PL_bufptr != PL_bufend &&
               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
-               if (!PL_thiswhite)
+               if (PL_madskills) {
+                 if (!PL_thiswhite)
                     PL_thiswhite = newSVpvs("");
-               sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+               }
                 PL_bufptr += 2;
             }
  #else
@@ -4580,9 +4725,11 @@ Perl_yylex(pTHX)
             s = PL_bufptr + 1;
             if (s[1] == '\\' && s[2] == 'E') {
  #ifdef PERL_MAD
-               if (!PL_thiswhite)
+               if (PL_madskills) {
+                 if (!PL_thiswhite)
                     PL_thiswhite = newSVpvs("");
-               sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+               }
  #endif
                 PL_bufptr = s + 3;
                 PL_lex_state = LEX_INTERPCONCAT;
@@ -4683,7 +4830,7 @@ Perl_yylex(pTHX)
         }
         /* Convert (?{...}) and friends to 'do {...}' */
         if (PL_lex_inpat && *PL_bufptr == '(') {
-           PL_sublex_info.re_eval_start = PL_bufptr;
+           PL_parser->lex_shared->re_eval_start = PL_bufptr;
             PL_bufptr += 2;
             if (*PL_bufptr != '{')
                 PL_bufptr++;
@@ -4742,28 +4889,30 @@ Perl_yylex(pTHX)
            re_eval_str.  If the here-doc body’s length equals the previous
            value of re_eval_start, re_eval_start will now be null.  So
            check re_eval_str as well. */
-       if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
+       if (PL_parser->lex_shared->re_eval_start
+        || PL_parser->lex_shared->re_eval_str) {
             SV *sv;
             if (*PL_bufptr != ')')
                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
             PL_bufptr++;
             /* having compiled a (?{..}) expression, return the original
              * text too, as a const */
-           if (PL_sublex_info.re_eval_str) {
-               sv = PL_sublex_info.re_eval_str;
-               PL_sublex_info.re_eval_str = NULL;
-               SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start);
+           if (PL_parser->lex_shared->re_eval_str) {
+               sv = PL_parser->lex_shared->re_eval_str;
+               PL_parser->lex_shared->re_eval_str = NULL;
+               SvCUR_set(sv,
+                        PL_bufptr - PL_parser->lex_shared->re_eval_start);
                 SvPV_shrink_to_cur(sv);
             }
-           else sv = newSVpvn(PL_sublex_info.re_eval_start,
-                              PL_bufptr - PL_sublex_info.re_eval_start);
+           else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
+                        PL_bufptr - PL_parser->lex_shared->re_eval_start);
             start_force(PL_curforce);
             /* XXX probably need a CURMAD(something) here */
             NEXTVAL_NEXTTOKE.opval =
                     (OP*)newSVOP(OP_CONST, 0,
                                  sv);
             force_next(THING);
-           PL_sublex_info.re_eval_start = NULL;
+           PL_parser->lex_shared->re_eval_start = NULL;
             PL_expect = XTERM;
             return REPORT(',');
         }
@@ -5238,9 +5387,11 @@ Perl_yylex(pTHX)
      case ' ': case '\t': case '\f': case 013:
  #ifdef PERL_MAD
         PL_realtokenstart = -1;
-       if (!PL_thiswhite)
+       if (PL_madskills) {
+         if (!PL_thiswhite)
             PL_thiswhite = newSVpvs("");
-       sv_catpvn(PL_thiswhite, s, 1);
+         sv_catpvn(PL_thiswhite, s, 1);
+       }
  #endif
         s++;
         goto retry;
@@ -5265,6 +5416,7 @@ Perl_yylex(pTHX)
                     incline(s);
             }
             else {
+               const bool in_comment = *s == '#';
                 d = s;
                 while (d < PL_bufend && *d != '\n')
                     d++;
@@ -5278,7 +5430,11 @@ Perl_yylex(pTHX)
                     PL_thiswhite = newSVpvn(s, d - s);
  #endif
                 s = d;
-               incline(s);
+               if (in_comment && d == PL_bufend
+                && PL_lex_state == LEX_INTERPNORMAL
+                && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+                && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
+               else incline(s);
             }
             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
                 PL_lex_state = LEX_FORMLINE;
@@ -5500,7 +5656,8 @@ Perl_yylex(pTHX)
         if (!PL_tokenbuf[1]) {
             PREREF('%');
         }
-       PL_pending_ident = '%';
+       PL_expect = XOPERATOR;
+       force_ident_maybe_lex('%');
         TERM('%');
  
      case '^':
@@ -5750,10 +5907,7 @@ Perl_yylex(pTHX)
         }
         switch (PL_expect) {
         case XTERM:
-           if (PL_oldoldbufptr == PL_last_lop)
-               PL_lex_brackstack[PL_lex_brackets++] = XTERM;
-           else
-               PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
+           PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
             PL_lex_allbrackets++;
             OPERATOR(HASHBRACK);
         case XOPERATOR:
@@ -5929,7 +6083,10 @@ Perl_yylex(pTHX)
  #endif
                     return yylex();     /* ignore fake brackets */
                 }
-               if (*s == '-' && s[1] == '>')
+               if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+                && SvEVALED(PL_lex_repl))
+                   PL_lex_state = LEX_INTERPEND;
+               else if (*s == '-' && s[1] == '>')
                     PL_lex_state = LEX_INTERPENDMAYBE;
                 else if (*s != '[' && *s != '{')
                     PL_lex_state = LEX_INTERPEND;
@@ -5948,7 +6105,7 @@ Perl_yylex(pTHX)
         force_next(formbrack ? '.' : '}');
         if (formbrack) LEAVE;
  #ifdef PERL_MAD
-       if (!PL_thistoken)
+       if (PL_madskills && !PL_thistoken)
             PL_thistoken = newSVpvs("");
  #endif
         if (formbrack == 2) { /* means . where arguments were expected */
@@ -5984,10 +6141,12 @@ Perl_yylex(pTHX)
             BAop(OP_BIT_AND);
         }
  
-       s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
-       if (*PL_tokenbuf) {
+       PL_tokenbuf[0] = '&';
+       s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
+                      sizeof PL_tokenbuf - 1, TRUE);
+       if (PL_tokenbuf[1]) {
             PL_expect = XOPERATOR;
-           force_ident(PL_tokenbuf, '&');
+           force_ident_maybe_lex('&');
         }
         else
             PREREF('&');
@@ -6223,7 +6382,7 @@ Perl_yylex(pTHX)
             if (!PL_tokenbuf[1])
                 PREREF(DOLSHARP);
             PL_expect = XOPERATOR;
-           PL_pending_ident = '#';
+           force_ident_maybe_lex('#');
             TOKEN(DOLSHARP);
         }
  
@@ -6341,7 +6500,7 @@ Perl_yylex(pTHX)
                     PL_expect = XTERM;          /* print $fh <<"EOF" */
             }
         }
-       PL_pending_ident = '$';
+       force_ident_maybe_lex('$');
         TOKEN('$');
  
      case '@':
@@ -6378,7 +6537,8 @@ Perl_yylex(pTHX)
                 }
             }
         }
-       PL_pending_ident = '@';
+       PL_expect = XOPERATOR;
+       force_ident_maybe_lex('@');
         TERM('@');
  
       case '/':                 /* may be division, defined-or, or pattern */
@@ -6554,8 +6714,16 @@ Perl_yylex(pTHX)
                 s = scan_num(s, &pl_yylval);
                 TERM(THING);
             }
+           else if ((*start == ':' && start[1] == ':')
+                 || (PL_expect == XSTATE && *start == ':'))
+               goto keylookup;
+           else if (PL_expect == XSTATE) {
+               d = start;
+               while (d < PL_bufend && isSPACE(*d)) d++;
+               if (*d == ':') goto keylookup;
+           }
             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
-           else if (!isALPHA(*start) && (PL_expect == XTERM
+           if (!isALPHA(*start) && (PL_expect == XTERM
                         || PL_expect == XREF || PL_expect == XSTATE
                         || PL_expect == XTERMORDORDOR)) {
                 GV *const gv = gv_fetchpvn_flags(s, start - s,
@@ -6604,11 +6772,21 @@ Perl_yylex(pTHX)
  
        keylookup: {
         bool anydelim;
+       bool lex;
         I32 tmp;
+       SV *sv;
+       CV *cv;
+       PADOFFSET off;
+       OP *rv2cv_op;
  
+       lex = FALSE;
         orig_keyword = 0;
+       off = 0;
+       sv = NULL;
+       cv = NULL;
         gv = NULL;
         gvp = NULL;
+       rv2cv_op = NULL;
  
         PL_bufptr = s;
         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
@@ -6668,13 +6846,44 @@ Perl_yylex(pTHX)
         if (!anydelim && PL_expect == XSTATE
               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
             s = d + 1;
-           pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
-                                            newSVpvn_flags(PL_tokenbuf,
-                                                        len, UTF ? SVf_UTF8 : 0));
+           pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
+           pl_yylval.pval[len] = '\0';
+           pl_yylval.pval[len+1] = UTF ? 1 : 0;
             CLINE;
             TOKEN(LABEL);
         }
  
+       /* Check for lexical sub */
+       if (PL_expect != XOPERATOR) {
+           char tmpbuf[sizeof PL_tokenbuf + 1];
+           *tmpbuf = '&';
+           Copy(PL_tokenbuf, tmpbuf+1, len, char);
+           off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+           if (off != NOT_IN_PAD) {
+               assert(off); /* we assume this is boolean-true below */
+               if (PAD_COMPNAME_FLAGS_isOUR(off)) {
+                   HV *  const stash = PAD_COMPNAME_OURSTASH(off);
+                   HEK * const stashname = HvNAME_HEK(stash);
+                   sv = newSVhek(stashname);
+                    sv_catpvs(sv, "::");
+                    sv_catpvn_flags(sv, PL_tokenbuf, len,
+                                   (UTF ? SV_CATUTF8 : SV_CATBYTES));
+                   gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
+                                   SVt_PVCV);
+                   off = 0;
+               }
+               else {
+                   rv2cv_op = newOP(OP_PADANY, 0);
+                   rv2cv_op->op_targ = off;
+                   rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
+                   cv = (CV *)PAD_SV(off);
+               }
+               lex = TRUE;
+               goto just_a_word;
+           }
+           off = 0;
+       }
+
         if (tmp < 0) {                  /* second-class keyword? */
             GV *ogv = NULL;     /* override (winner) */
             GV *hgv = NULL;     /* hidden (loser) */
@@ -6734,16 +6943,22 @@ Perl_yylex(pTHX)
                earlier ':' case doesn't bypass the initialisation.  */
             if (0) {
             just_a_word_zero_gv:
+               sv = NULL;
+               cv = NULL;
                 gv = NULL;
                 gvp = NULL;
+               rv2cv_op = NULL;
                 orig_keyword = 0;
+               lex = 0;
+               off = 0;
             }
           just_a_word: {
-               SV *sv;
                 int pkgname = 0;
                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
-               OP *rv2cv_op;
-               CV *cv;
+               const char penultchar =
+                   lastchar && PL_bufptr - 2 >= PL_linestart
+                        ? PL_bufptr[-2]
+                        : 0;
  #ifdef PERL_MAD
                 SV *nextPL_nextwhite = 0;
  #endif
@@ -6775,7 +6990,8 @@ Perl_yylex(pTHX)
                 }
  
                 /* Look for a subroutine with this name in current package,
-                  unless name is "Foo::", in which case Foo is a bareword
+                  unless this is a lexical sub, or name is "Foo::",
+                  in which case Foo is a bareword
                    (and a package name). */
  
                 if (len > 2 && !PL_madskills &&
@@ -6793,7 +7009,7 @@ Perl_yylex(pTHX)
                     gvp = 0;
                 }
                 else {
-                   if (!gv) {
+                   if (!lex && !gv) {
                         /* Mustn't actually add anything to a symbol table.
                            But also don't want to "initialise" any placeholder
                            constants that might already be there into full
@@ -6807,7 +7023,8 @@ Perl_yylex(pTHX)
  
                 /* if we saw a global override before, get the right name */
  
-               sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
+               if (!sv)
+                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
                     len ? len : strlen(PL_tokenbuf));
                 if (gvp) {
                     SV * const tmp_sv = sv;
@@ -6833,12 +7050,13 @@ Perl_yylex(pTHX)
                 if (len)
                     goto safe_bareword;
  
+               if (!off)
                 {
                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
                     const_op->op_private = OPpCONST_BARE;
                     rv2cv_op = newCVREF(0, const_op);
+                   cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
                 }
-               cv = rv2cv_op_cv(rv2cv_op, 0);
  
                 /* See if it's the indirect object for a list operator. */
  
@@ -6925,7 +7143,8 @@ Perl_yylex(pTHX)
                     }
                     start_force(PL_curforce);
  #endif
-                   NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
+                   NEXTVAL_NEXTTOKE.opval =
+                       off ? rv2cv_op : pl_yylval.opval;
                     PL_expect = XOPERATOR;
  #ifdef PERL_MAD
                     if (PL_madskills) {
@@ -6934,8 +7153,9 @@ Perl_yylex(pTHX)
                         PL_thistoken = newSVpvs("");
                     }
  #endif
-                   op_free(rv2cv_op);
-                   force_next(WORD);
+                   if (off)
+                        op_free(pl_yylval.opval), force_next(PRIVATEREF);
+                   else op_free(rv2cv_op),        force_next(WORD);
                     pl_yylval.ival = 0;
                     TOKEN('&');
                 }
@@ -6967,7 +7187,7 @@ Perl_yylex(pTHX)
                 /* Not a method, so call it a subroutine (if defined) */
  
                 if (cv) {
-                   if (lastchar == '-') {
+                   if (lastchar == '-' && penultchar != '-') {
                          const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
@@ -7050,7 +7270,7 @@ Perl_yylex(pTHX)
                             curmad('X', PL_thistoken);
                             PL_thistoken = newSVpvs("");
                         }
-                       force_next(WORD);
+                       force_next(off ? PRIVATEREF : WORD);
                         if (!PL_lex_allbrackets &&
                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
@@ -7093,7 +7313,7 @@ Perl_yylex(pTHX)
                         PL_nextwhite = nextPL_nextwhite;
                         curmad('X', PL_thistoken);
                         PL_thistoken = newSVpvs("");
-                       force_next(WORD);
+                       force_next(off ? PRIVATEREF : WORD);
                         if (!PL_lex_allbrackets &&
                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
@@ -7102,7 +7322,7 @@ Perl_yylex(pTHX)
  #else
                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
                     PL_expect = XTERM;
-                   force_next(WORD);
+                   force_next(off ? PRIVATEREF : WORD);
                     if (!PL_lex_allbrackets &&
                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
@@ -7418,10 +7638,15 @@ Perl_yylex(pTHX)
             if (*s == '{')
                 PRETERMBLOCK(DO);
             if (*s != '\'') {
-               d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
-               if (len) {
+               *PL_tokenbuf = '&';
+               d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
+                             1, &len);
+               if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
                     d = SKIPSPACE1(d);
-                   if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
+                   if (*d == '(') {
+                       force_ident_maybe_lex('&');
+                       s = d;
+                   }
                 }
             }
             if (orig_keyword == KEY_do) {
@@ -7456,6 +7681,7 @@ Perl_yylex(pTHX)
             UNI(OP_DBMCLOSE);
  
         case KEY_dump:
+           PL_expect = XOPERATOR;
             s = force_word(s,WORD,TRUE,FALSE,FALSE);
             LOOPX(OP_DUMP);
  
@@ -7588,6 +7814,7 @@ Perl_yylex(pTHX)
             LOP(OP_GREPSTART, XREF);
  
         case KEY_goto:
+           PL_expect = XOPERATOR;
             s = force_word(s,WORD,TRUE,FALSE,FALSE);
             LOOPX(OP_GOTO);
  
@@ -7710,6 +7937,7 @@ Perl_yylex(pTHX)
             LOP(OP_KILL,XTERM);
  
         case KEY_last:
+           PL_expect = XOPERATOR;
             s = force_word(s,WORD,TRUE,FALSE,FALSE);
             LOOPX(OP_LAST);
         
@@ -7787,7 +8015,17 @@ Perl_yylex(pTHX)
  #endif
                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
+               {
+                   if (!FEATURE_LEXSUBS_IS_ENABLED)
+                       Perl_croak(aTHX_
+                                 "Experimental \"%s\" subs not enabled",
+                                  tmp == KEY_my    ? "my"    :
+                                  tmp == KEY_state ? "state" : "our");
+                   Perl_ck_warner_d(aTHX_
+                       packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
+                       "The lexical_subs feature is experimental");
                     goto really_sub;
+               }
                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
                 if (!PL_in_my_stash) {
                     char tmpbuf[1024];
@@ -7807,6 +8045,7 @@ Perl_yylex(pTHX)
             OPERATOR(MY);
  
         case KEY_next:
+           PL_expect = XOPERATOR;
             s = force_word(s,WORD,TRUE,FALSE,FALSE);
             LOOPX(OP_NEXT);
  
@@ -7992,6 +8231,7 @@ Perl_yylex(pTHX)
  
         case KEY_require:
             s = SKIPSPACE1(s);
+           PL_expect = XOPERATOR;
             if (isDIGIT(*s)) {
                 s = force_version(s, FALSE);
             }
@@ -8023,6 +8263,7 @@ Perl_yylex(pTHX)
             UNI(OP_RESET);
  
         case KEY_redo:
+           PL_expect = XOPERATOR;
             s = force_word(s,WORD,TRUE,FALSE,FALSE);
             LOOPX(OP_REDO);
  
@@ -8195,7 +8436,7 @@ Perl_yylex(pTHX)
         case KEY_sub:
           really_sub:
             {
-               char tmpbuf[sizeof PL_tokenbuf];
+               char * const tmpbuf = PL_tokenbuf + 1;
                 SSize_t tboffset = 0;
                 expectation attrful;
                 bool have_name, have_proto;
@@ -8205,12 +8446,15 @@ Perl_yylex(pTHX)
                 SV *tmpwhite = 0;
  
                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
-               SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
+               SV *subtoken = PL_madskills
+                  ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
+                  : NULL;
                 PL_thistoken = 0;
  
                 d = s;
                 s = SKIPSPACE2(s,tmpwhite);
  #else
+               d = s;
                 s = skipspace(s);
  #endif
  
@@ -8225,12 +8469,17 @@ Perl_yylex(pTHX)
                     attrful = XATTRBLOCK;
                     /* remember buffer pos'n for later force_word */
                     tboffset = s - PL_oldbufptr;
-                   d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+                   d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
+                                 &len);
  #ifdef PERL_MAD
                     if (PL_madskills)
                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
  #endif
-                   if (memchr(tmpbuf, ':', len))
+                   *PL_tokenbuf = '&';
+                   if (memchr(tmpbuf, ':', len) || key != KEY_sub
+                    || pad_findmy_pvn(
+                           PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+                       ) != NOT_IN_PAD)
                         sv_setpvn(PL_subname, tmpbuf, len);
                     else {
                         sv_setsv(PL_subname,PL_curstname);
@@ -8241,13 +8490,12 @@ Perl_yylex(pTHX)
                          SvUTF8_on(PL_subname);
                     have_name = TRUE;
  
-#ifdef PERL_MAD
  
+#ifdef PERL_MAD
                     start_force(0);
                     CURMAD('X', nametoke);
                     CURMAD('_', tmpwhite);
-                   (void) force_word(PL_oldbufptr + tboffset, WORD,
-                                     FALSE, TRUE, TRUE);
+                   force_ident_maybe_lex('&');
  
                     s = SKIPSPACE2(d,tmpwhite);
  #else
@@ -8255,8 +8503,13 @@ Perl_yylex(pTHX)
  #endif
                 }
                 else {
-                   if (key == KEY_my)
-                       Perl_croak(aTHX_ "Missing name in \"my sub\"");
+                   if (key == KEY_my || key == KEY_our || key==KEY_state)
+                   {
+                       *d = '\0';
+                       /* diag_listed_as: Missing name in "%s sub" */
+                       Perl_croak(aTHX_
+                                 "Missing name in \"%s\"", PL_bufptr);
+                   }
                     PL_expect = XTERMBLOCK;
                     attrful = XATTRTERM;
                     sv_setpvs(PL_subname,"?");
@@ -8405,11 +8658,8 @@ Perl_yylex(pTHX)
                     TOKEN(ANONSUB);
                 }
  #ifndef PERL_MAD
-               (void) force_word(PL_oldbufptr + tboffset, WORD,
-                                 FALSE, TRUE, TRUE);
+               force_ident_maybe_lex('&');
  #endif
-               if (key == KEY_my)
-                   TOKEN(MYSUB);
                 TOKEN(SUB);
             }
  
@@ -8570,19 +8820,36 @@ Perl_yylex(pTHX)
  #pragma segment Main
  #endif
  
+/*
+  S_pending_ident
+
+  Looks up an identifier in the pad or in a package
+
+  Returns:
+    PRIVATEREF if this is a lexical name.
+    WORD       if this belongs to a package.
+
+  Structure:
+      if we're in a my declaration
+         croak if they tried to say my($foo::bar)
+         build the ops for a my() declaration
+      if it's an access to a my() variable
+         build ops for access to a my() variable
+      if in a dq string, and they've said @foo and we can't find @foo
+         warn
+      build ops for a bareword
+*/
+
  static int
  S_pending_ident(pTHX)
  {
      dVAR;
      PADOFFSET tmp = 0;
-    /* pit holds the identifier we read and pending_ident is reset */
-    char pit = PL_pending_ident;
+    const char pit = (char)pl_yylval.ival;
      const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
      /* All routes through this function want to know if there is a colon.  */
      const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
-    PL_pending_ident = 0;
  
-    /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
      DEBUG_T({ PerlIO_printf(Perl_debug_log,
            "### Pending identifier '%s'\n", PL_tokenbuf); });
  
@@ -8609,7 +8876,7 @@ S_pending_ident(pTHX)
              pl_yylval.opval = newOP(OP_PADANY, 0);
              pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
                                                          UTF ? SVf_UTF8 : 0);
-            return PRIVATEREF;
+           return PRIVATEREF;
          }
      }
  
@@ -8632,7 +8899,8 @@ S_pending_ident(pTHX)
                  sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
                  pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                  pl_yylval.opval->op_private = OPpCONST_ENTERED;
-                gv_fetchsv(sym,
+                if (pit != '&')
+                  gv_fetchsv(sym,
                      (PL_in_eval
                          ? (GV_ADDMULTI | GV_ADDINEVAL)
                          : GV_ADDMULTI
@@ -8673,11 +8941,13 @@ S_pending_ident(pTHX)
      }
  
      /* build ops for a bareword */
-    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
+    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+                                  newSVpvn_flags(PL_tokenbuf + 1,
                                                       tokenbuf_len - 1,
                                                        UTF ? SVf_UTF8 : 0 ));
      pl_yylval.opval->op_private = OPpCONST_ENTERED;
-    gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+    if (pit != '&')
+       gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
                       | ( UTF ? SVf_UTF8 : 0 ),
                      ((PL_tokenbuf[0] == '$') ? SVt_PV
@@ -8740,10 +9010,11 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
      }
  }
  
-/* Either returns sv, or mortalizes sv and returns a new SV*.
+/* Either returns sv, or mortalizes/frees sv and returns a new SV*.
     Best used as sv=new_constant(..., sv, ...).
     If s, pv are NULL, calls subroutine with one argument,
-   and type is used with error messages only. */
+   and <type> is used with error messages only.
+   <type> is assumed to be well formed UTF-8 */
  
  STATIC SV *
  S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
@@ -8752,15 +9023,22 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
      dVAR; dSP;
      HV * table = GvHV(PL_hintgv);               /* ^H */
      SV *res;
+    SV *errsv = NULL;
      SV **cvp;
      SV *cv, *typesv;
      const char *why1 = "", *why2 = "", *why3 = "";
  
      PERL_ARGS_ASSERT_NEW_CONSTANT;
+    /* We assume that this is true: */
+    if (*key == 'c') { assert (strEQ(key, "charnames")); }
+    assert(type || s);
  
      /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && strEQ(key,"charnames"))
+    if (PL_error_count > 0 && *key == 'c')
+    {
+       SvREFCNT_dec_NN(sv);
         return &PL_sv_undef;
+    }
  
      if (!table
         || ! (PL_hints & HINT_LOCALIZE_HH)
@@ -8771,7 +9049,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
         
         /* Here haven't found what we're looking for.  If it is charnames,
          * perhaps it needs to be loaded.  Try doing that before giving up */
-       if (strEQ(key,"charnames")) {
+       if (*key == 'c') {
             Perl_load_module(aTHX_
                             0,
                             newSVpvs("_charnames"),
@@ -8794,16 +9072,29 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
         }
         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
             msg = Perl_newSVpvf(aTHX_
-                           "Constant(%s) unknown", (type ? type: "undef"));
+                              "Constant(%.*s) unknown",
+                               (int)(type ? typelen : len),
+                               (type ? type: s));
         }
         else {
-       why1 = "$^H{";
-       why2 = key;
-       why3 = "} is not defined";
-    report:
-       msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
-                           (type ? type: "undef"), why1, why2, why3);
-       }
+            why1 = "$^H{";
+            why2 = key;
+            why3 = "} is not defined";
+        report:
+            if (*key == 'c') {
+                yyerror_pv(Perl_form(aTHX_
+                            /* The +3 is for '\N{'; -4 for that, plus '}' */
+                            "Unknown charname '%.*s'", (int)typelen - 4, type + 3
+                           ),
+                           UTF ? SVf_UTF8 : 0);
+                return sv;
+            }
+            else {
+                msg = Perl_newSVpvf(aTHX_ "Constant(%.*s): %s%s%s",
+                                    (int)(type ? typelen : len),
+                                    (type ? type: s), why1, why2, why3);
+            }
+        }
         yyerror(SvPVX_const(msg));
         SvREFCNT_dec(msg);
         return sv;
@@ -8835,9 +9126,12 @@ now_ok:
      SPAGAIN ;
  
      /* Check the eval first */
-    if (!PL_in_eval && SvTRUE(ERRSV)) {
-       sv_catpvs(ERRSV, "Propagated");
-       yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
+    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
+       STRLEN errlen;
+       const char * errstr;
+       sv_catpvs(errsv, "Propagated");
+       errstr = SvPV_const(errsv, errlen);
+       yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
         (void)POPs;
         res = SvREFCNT_inc_simple(sv);
      }
@@ -8866,7 +9160,7 @@ now_ok:
     *slp
     */
  STATIC char *
-S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
  {
      dVAR;
      char *d = dest;
@@ -8909,7 +9203,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
  }
  
  STATIC char *
-S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
  {
      dVAR;
      char *bracket = NULL;
@@ -9363,8 +9657,6 @@ S_scan_subst(pTHX_ char *start)
         }
         sv_catpvs(repl, "{");
         sv_catsv(repl, PL_sublex_info.repl);
-       if (strchr(SvPVX(PL_sublex_info.repl), '#'))
-           sv_catpvs(repl, "\n");
         sv_catpvs(repl, "}");
         SvEVALED_on(repl);
         SvREFCNT_dec(PL_sublex_info.repl);
@@ -9480,36 +9772,28 @@ S_scan_trans(pTHX_ char *start)
     a whole string being evalled, or the contents of the current quote-
     like operator.
  
-   The three methods are:
-    - Steal lines from the input stream (stream)
-    - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
-    - Peek at the PL_linestr of outer lexing scopes (peek)
-
-   They are used in these cases:
-     file scope or filtered eval                       stream
-     string eval                                       linestr
-     multiline quoted construct                                linestr
-     single-line quoted construct in file              stream
-     single-line quoted construct in eval or quote     peek
+   The two basic methods are:
+    - Steal lines from the input stream
+    - Scan the heredoc in PL_linestr and remove it therefrom
  
-   Single-line also applies to heredocs that begin on the last line of a
-   quote-like operator.
+   In a file scope or filtered eval, the first method is used; in a
+   string eval, the second.
  
-   Peeking within a quote also involves falling back to the stream method,
-   if the outer quote-like operators are all on one line (or the heredoc
-   marker is on the last line).
+   In a quote-like operator, we have to choose between the two,
+   depending on where we can find a newline.  We peek into outer lex-
+   ing scopes until we find one with a newline in it.  If we reach the
+   outermost lexing scope and it is a file, we use the stream method.
+   Otherwise it is treated as an eval.
  */
  
  STATIC char *
-S_scan_heredoc(pTHX_ register char *s)
+S_scan_heredoc(pTHX_ char *s)
  {
      dVAR;
-    SV *herewas;
      I32 op_type = OP_SCALAR;
      I32 len;
      SV *tmpstr;
      char term;
-    const char *found_newline = 0;
      char *d;
      char *e;
      char *peek;
@@ -9592,18 +9876,6 @@ S_scan_heredoc(pTHX_ register char *s)
         s = olds;
      }
  #endif
-    if ((infile && !PL_lex_inwhat)
-     || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
-        herewas = newSVpvn(s,PL_bufend-s);
-    }
-    else {
-#ifdef PERL_MAD
-        herewas = newSVpvn(s-1,found_newline-s+1);
-#else
-        s--;
-        herewas = newSVpvn(s,found_newline-s);
-#endif
-    }
  #ifdef PERL_MAD
      if (PL_madskills) {
         tstart = SvPVX(PL_linestr) + stuffstart;
@@ -9612,14 +9884,8 @@ S_scan_heredoc(pTHX_ register char *s)
         else
             PL_thisstuff = newSVpvn(tstart, s - tstart);
      }
-#endif
-    s += SvCUR(herewas);
  
-#ifdef PERL_MAD
      stuffstart = s - SvPVX(PL_linestr);
-
-    if (found_newline)
-       s--;
  #endif
  
      tmpstr = newSV_type(SVt_PVIV);
@@ -9635,18 +9901,26 @@ S_scan_heredoc(pTHX_ register char *s)
  
      PL_multi_start = CopLINE(PL_curcop) + 1;
      PL_multi_open = PL_multi_close = '<';
-    if (PL_lex_inwhat && !found_newline) {
-       /* Peek into the line buffer of the parent lexing scope, going up
-          as many levels as necessary to find one with a newline after
-          bufptr.  See the comments in sublex_push for how IVX and NVX
-          are abused.
-        */
+    /* inside a string eval or quote-like operator */
+    if (!infile || PL_lex_inwhat) {
         SV *linestr;
-       char *bufptr, *bufend;
-       char * const olds = s - SvCUR(herewas);
-       char * const real_olds = s;
+       char *bufend;
+       char * const olds = s;
         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
-       do {
+       /* These two fields are not set until an inner lexing scope is
+          entered.  But we need them set here. */
+       shared->ls_bufptr  = s;
+       shared->ls_linestr = PL_linestr;
+       if (PL_lex_inwhat)
+         /* Look for a newline.  If the current buffer does not have one,
+            peek into the line buffer of the parent lexing scope, going
+            up as many levels as necessary to find one with a newline
+            after bufptr.
+          */
+         while (!(s = (char *)memchr(
+                   (void *)shared->ls_bufptr, '\n',
+                   SvEND(shared->ls_linestr)-shared->ls_bufptr
+               ))) {
             shared = shared->ls_prev;
             /* shared is only null if we have gone beyond the outermost
                lexing scope.  In a file, we will have broken out of the
@@ -9658,14 +9932,14 @@ S_scan_heredoc(pTHX_ register char *s)
                most lexing scope.  In a file, shared->ls_linestr at that
                level is just one line, so there is no body to steal. */
             if (infile && !shared->ls_prev) {
-               s = real_olds;
+               s = olds;
                 goto streaming;
             }
-       } while (!(s = (char *)memchr(
-                   (void *)shared->ls_bufptr, '\n',
-                   SvEND(shared->ls_linestr)-shared->ls_bufptr
-               )));
-       bufptr = shared->ls_bufptr;
+         }
+       else {  /* eval */
+           s = (char*)memchr((void*)s, '\n', PL_bufend - s);
+           assert(s);
+       }
         linestr = shared->ls_linestr;
         bufend = SvEND(linestr);
         d = s;
@@ -9675,42 +9949,7 @@ S_scan_heredoc(pTHX_ register char *s)
                 ++shared->herelines;
         }
         if (s >= bufend) {
-           SvREFCNT_dec(herewas);
-           SvREFCNT_dec(tmpstr);
-           CopLINE_set(PL_curcop, (line_t)PL_multi_start-1);
-           missingterm(PL_tokenbuf + 1);
-       }
-       if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
-        && cx->blk_eval.cur_text == linestr) {
-           cx->blk_eval.cur_text = newSVsv(linestr);
-           SvSCREAM_on(cx->blk_eval.cur_text);
-       }
-       sv_setpvn(herewas,bufptr,d-bufptr+1);
-       sv_setpvn(tmpstr,d+1,s-d);
-       s += len - 1;
-       sv_catpvn(herewas,s,bufend-s);
-       Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
-       SvCUR_set(linestr,
-                 bufptr-SvPVX_const(linestr)
-                  + SvCUR(herewas));
-
-       s = olds;
-       goto retval;
-    }
-    else if (!infile || found_newline) {
-       char * const olds = s - SvCUR(herewas);
-       PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
-       d = s;
-       while (s < PL_bufend &&
-         (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
-           if (*s++ == '\n')
-               ++shared->herelines;
-       }
-       if (s >= PL_bufend) {
-           SvREFCNT_dec(herewas);
-           SvREFCNT_dec(tmpstr);
-           CopLINE_set(PL_curcop, (line_t)PL_multi_start-1);
-           missingterm(PL_tokenbuf + 1);
+           goto interminable;
         }
         sv_setpvn(tmpstr,d+1,s-d);
  #ifdef PERL_MAD
@@ -9729,33 +9968,48 @@ S_scan_heredoc(pTHX_ register char *s)
         /* s now points to the newline after the heredoc terminator.
            d points to the newline before the body of the heredoc.
          */
+
+       /* We are going to modify linestr in place here, so set
+          aside copies of the string if necessary for re-evals or
+          (caller $n)[6]. */
         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
-          check PL_sublex_info.re_eval_str. */
-       if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
+          check shared->re_eval_str. */
+       if (shared->re_eval_start || shared->re_eval_str) {
             /* Set aside the rest of the regexp */
-           if (!PL_sublex_info.re_eval_str)
-               PL_sublex_info.re_eval_str =
-                      newSVpvn(PL_sublex_info.re_eval_start,
-                               PL_bufend - PL_sublex_info.re_eval_start);
-           PL_sublex_info.re_eval_start -= s-d;
-       }
-       if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
-        && cx->blk_eval.cur_text == PL_linestr) {
-           cx->blk_eval.cur_text = newSVsv(PL_linestr);
+           if (!shared->re_eval_str)
+               shared->re_eval_str =
+                      newSVpvn(shared->re_eval_start,
+                               bufend - shared->re_eval_start);
+           shared->re_eval_start -= s-d;
+       }
+       if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
+            CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
+            cx->blk_eval.cur_text == linestr)
+        {
+           cx->blk_eval.cur_text = newSVsv(linestr);
             SvSCREAM_on(cx->blk_eval.cur_text);
         }
         /* Copy everything from s onwards back to d. */
-       Move(s,d,PL_bufend-s + 1,char);
-       SvCUR_set(PL_linestr, SvCUR(PL_linestr) - (s-d));
-       PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+       Move(s,d,bufend-s + 1,char);
+       SvCUR_set(linestr, SvCUR(linestr) - (s-d));
+       /* Setting PL_bufend only applies when we have not dug deeper
+          into other scopes, because sublex_done sets PL_bufend to
+          SvEND(PL_linestr). */
+       if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
         s = olds;
      }
      else
-      streaming:
-       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
-    term = PL_tokenbuf[1];
-    len--;
-    while (s >= PL_bufend) {   /* multiple line string? */
+    {
+      SV *linestr_save;
+     streaming:
+      sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
+      term = PL_tokenbuf[1];
+      len--;
+      linestr_save = PL_linestr; /* must restore this afterwards */
+      d = s;                    /* and this */
+      PL_linestr = newSVpvs("");
+      PL_bufend = SvPVX(PL_linestr);
+      while (1) {
  #ifdef PERL_MAD
         if (PL_madskills) {
             tstart = SvPVX(PL_linestr) + stuffstart;
@@ -9765,15 +10019,13 @@ S_scan_heredoc(pTHX_ register char *s)
                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
         }
  #endif
-       PL_bufptr = s;
+       PL_bufptr = PL_bufend;
         CopLINE_set(PL_curcop,
                     PL_multi_start + shared->herelines);
         if (!lex_next_chunk(LEX_NO_TERM)
          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-           SvREFCNT_dec(herewas);
-           SvREFCNT_dec(tmpstr);
-           CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
-           missingterm(PL_tokenbuf + 1);
+           SvREFCNT_dec(linestr_save);
+           goto interminable;
         }
         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
@@ -9785,7 +10037,6 @@ S_scan_heredoc(pTHX_ register char *s)
         stuffstart = s - SvPVX(PL_linestr);
  #endif
         shared->herelines++;
-       PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
         PL_last_lop = PL_last_uni = NULL;
  #ifndef PERL_STRICT_CR
         if (PL_bufend - PL_linestart >= 2) {
@@ -9803,25 +10054,22 @@ S_scan_heredoc(pTHX_ register char *s)
             PL_bufend[-1] = '\n';
  #endif
         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
-           STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
-           *(SvPVX(PL_linestr) + off ) = ' ';
-           lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
-           sv_catsv(PL_linestr,herewas);
+           SvREFCNT_dec(PL_linestr);
+           PL_linestr = linestr_save;
+           PL_linestart = SvPVX(linestr_save);
             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-           s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
+           s = d;
+           break;
         }
         else {
-           s = PL_bufend;
             sv_catsv(tmpstr,PL_linestr);
         }
+      }
      }
-    s++;
-retval:
      PL_multi_end = CopLINE(PL_curcop);
      if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
         SvPV_shrink_to_cur(tmpstr);
      }
-    SvREFCNT_dec(herewas);
      if (!IN_BYTES) {
         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
             SvUTF8_on(tmpstr);
@@ -9831,6 +10079,11 @@ retval:
      PL_lex_stuff = tmpstr;
      pl_yylval.ival = op_type;
      return s;
+
+  interminable:
+    SvREFCNT_dec(tmpstr);
+    CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+    missingterm(PL_tokenbuf + 1);
  }
  
  /* scan_inputsymbol
@@ -10113,7 +10366,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
      s += termlen;
  #ifdef PERL_MAD
      tstart = SvPVX(PL_linestr) + stuffstart;
-    if (!PL_thisopen && !keep_delims) {
+    if (PL_madskills && !PL_thisopen && !keep_delims) {
         PL_thisopen = newSVpvn(tstart, s - tstart);
         stuffstart = s - SvPVX(PL_linestr);
      }
@@ -10742,7 +10995,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
      case 'v':
  vstring:
                 sv = newSV(5); /* preallocate storage space */
+               ENTER_with_name("scan_vstring");
+               SAVEFREESV(sv);
                 s = scan_vstring(s, PL_bufend, sv);
+               SvREFCNT_inc_simple_void_NN(sv);
+               LEAVE_with_name("scan_vstring");
         break;
      }
  
@@ -10757,7 +11014,7 @@ vstring:
  }
  
  STATIC char *
-S_scan_formline(pTHX_ register char *s)
+S_scan_formline(pTHX_ char *s)
  {
      dVAR;
      char *eol;
@@ -10899,7 +11156,8 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
      CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
      CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
      if (outsidecv && CvPADLIST(outsidecv))
-       CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
+       CvPADLIST(PL_compcv)->xpadl_outid =
+           PadlistNAMES(CvPADLIST(outsidecv));
  
      return oldsavestack_ix;
  }
@@ -10943,7 +11201,6 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      SV *msg;
      SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
      int yychar  = PL_parser->yychar;
-    U32 is_utf8 = flags & SVf_UTF8;
  
      PERL_ARGS_ASSERT_YYERROR_PVN;
  
@@ -11004,7 +11261,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
         else
             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
      }
-    msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
+    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
      Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
          OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
      if (context)
@@ -11025,9 +11282,10 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      else
         qerror(msg);
      if (PL_error_count >= 10) {
-       if (PL_in_eval && SvCUR(ERRSV))
+       SV * errsv;
+       if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
-                      SVfARG(ERRSV), OutCopFILE(PL_curcop));
+                      SVfARG(errsv), OutCopFILE(PL_curcop));
         else
             Perl_croak(aTHX_ "%s has too many errors.\n",
              OutCopFILE(PL_curcop));
@@ -11291,13 +11549,18 @@ vstring, as well as updating the passed in sv.
  
  Function must be called like
  
-       sv = newSV(5);
+       sv = sv_2mortal(newSV(5));
         s = scan_vstring(s,e,sv);
  
  where s and e are the start and end of the string.
  The sv should already be large enough to store the vstring
  passed in, for performance reasons.
  
+This function may croak if fatal warnings are enabled in the
+calling scope, hence the sv_2mortal in the example (to prevent
+a leak).  Make sure to do SvREFCNT_inc afterwards if you use
+sv_2mortal.
+
  */
  
  char *
@@ -11663,11 +11926,10 @@ Perl_parse_label(pTHX_ U32 flags)
      if (PL_lex_state == LEX_KNOWNEXT) {
         PL_parser->yychar = yylex();
         if (PL_parser->yychar == LABEL) {
-           SV *lsv;
+           char * const lpv = pl_yylval.pval;
+           STRLEN llen = strlen(lpv);
             PL_parser->yychar = YYEMPTY;
-           lsv = newSV_type(SVt_PV);
-           sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
-           return lsv;
+           return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
         } else {
             yyunlex();
             goto no_label;