X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/451f421fe4742646fa2efbed0f45a19f0713d00f..0a520fced6c7f8a21494d4e9c42cd89f3a8ff5a5:/toke.c

diff --git a/toke.c b/toke.c
index 1079e94..332f653 100644
--- a/toke.c
+++ b/toke.c
@@ -110,7 +110,7 @@ Individual members of C<PL_parser> have their own documentation.
 #  define PL_nextval		(PL_parser->nextval)
 #endif
 
-static const char ident_too_long[] = "Identifier too long";
+static const char* const ident_too_long = "Identifier too long";
 
 #ifdef PERL_MAD
 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
@@ -358,7 +358,7 @@ static struct debug_tokens {
     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
     { IF,		TOKENTYPE_IVAL,		"IF" },
-    { LABEL,		TOKENTYPE_OPVAL,	"LABEL" },
+    { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
@@ -544,7 +544,7 @@ S_no_op(pTHX_ const char *const what, char *s)
 		    "\t(Missing semicolon on previous line?)\n");
 	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 	    const char *t;
-	    for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
+	    for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
                                                             t += UTF ? UTF8SKIP(t) : 1)
 		NOOP;
 	    if (t < PL_bufptr && isSPACE(*t))
@@ -784,6 +784,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
 		(parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 	PerlIO_close(parser->rsfp);
     SvREFCNT_dec(parser->rsfp_filters);
+    SvREFCNT_dec(parser->lex_stuff);
+    SvREFCNT_dec(parser->sublex_info.repl);
 
     Safefree(parser->lex_brackstack);
     Safefree(parser->lex_casestack);
@@ -792,6 +794,37 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
     Safefree(parser);
 }
 
+void
+Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
+{
+#ifdef PERL_MAD
+    I32 nexttoke = parser->lasttoke;
+#else
+    I32 nexttoke = parser->nexttoke;
+#endif
+    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
+    while (nexttoke--) {
+#ifdef PERL_MAD
+	if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
+				& 0xffff)
+	 && parser->nexttoke[nexttoke].next_val.opval
+	 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
+	 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
+		op_free(parser->nexttoke[nexttoke].next_val.opval);
+		parser->nexttoke[nexttoke].next_val.opval = NULL;
+	}
+#else
+	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
+	 && parser->nextval[nexttoke].opval
+	 && parser->nextval[nexttoke].opval->op_slabbed
+	 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
+	    op_free(parser->nextval[nexttoke].opval);
+	    parser->nextval[nexttoke].opval = NULL;
+	}
+#endif
+    }
+}
+
 
 /*
 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
@@ -976,10 +1009,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 	if (flags & LEX_STUFF_UTF8) {
 	    goto plain_copy;
 	} else {
-	    STRLEN highhalf = 0;
+	    STRLEN highhalf = 0;    /* Count of variants */
 	    const char *p, *e = pv+len;
-	    for (p = pv; p != e; p++)
-		highhalf += !!(((U8)*p) & 0x80);
+	    for (p = pv; p != e; p++) {
+		if (! UTF8_IS_INVARIANT(*p)) {
+                    highhalf++;
+                }
+            }
 	    if (!highhalf)
 		goto plain_copy;
 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
@@ -990,9 +1026,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 	    PL_parser->bufend += len+highhalf;
 	    for (p = pv; p != e; p++) {
 		U8 c = (U8)*p;
-		if (c & 0x80) {
-		    *bufptr++ = (char)(0xc0 | (c >> 6));
-		    *bufptr++ = (char)(0x80 | (c & 0x3f));
+		if (! UTF8_IS_INVARIANT(c)) {
+		    *bufptr++ = UTF8_TWO_BYTE_HI(c);
+		    *bufptr++ = UTF8_TWO_BYTE_LO(c);
 		} else {
 		    *bufptr++ = (char)c;
 		}
@@ -1004,14 +1040,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 	    const char *p, *e = pv+len;
 	    for (p = pv; p != e; p++) {
 		U8 c = (U8)*p;
-		if (c >= 0xc4) {
+		if (UTF8_IS_ABOVE_LATIN1(c)) {
 		    Perl_croak(aTHX_ "Lexing code attempted to stuff "
 				"non-Latin-1 character into Latin-1 input");
-		} else if (c >= 0xc2 && p+1 != e &&
-			    (((U8)p[1]) & 0xc0) == 0x80) {
+		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
 		    p++;
 		    highhalf++;
-		} else if (c >= 0x80) {
+		} else if (! UTF8_IS_INVARIANT(c)) {
 		    /* malformed UTF-8 */
 		    ENTER;
 		    SAVESPTR(PL_warnhook);
@@ -1028,17 +1063,20 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 	    SvCUR_set(PL_parser->linestr,
 	    	SvCUR(PL_parser->linestr) + len-highhalf);
 	    PL_parser->bufend += len-highhalf;
-	    for (p = pv; p != e; p++) {
-		U8 c = (U8)*p;
-		if (c & 0x80) {
-		    *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
-		    p++;
-		} else {
-		    *bufptr++ = (char)c;
+	    p = pv;
+	    while (p < e) {
+		if (UTF8_IS_INVARIANT(*p)) {
+		    *bufptr++ = *p;
+                    p++;
 		}
+		else {
+                    assert(p < e -1 );
+		    *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+		    p += 2;
+                }
 	    }
 	} else {
-	    plain_copy:
+	  plain_copy:
 	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
 	    bufptr = PL_parser->bufptr;
 	    Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
@@ -1386,10 +1424,10 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
 	    bufend = PL_parser->bufend;
 	}
 	head = (U8)*s;
-	if (!(head & 0x80))
+	if (UTF8_IS_INVARIANT(head))
 	    return head;
-	if (head & 0x40) {
-	    len = PL_utf8skip[head];
+	if (UTF8_IS_START(head)) {
+	    len = UTF8SKIP(&head);
 	    while ((STRLEN)(bufend-s) < len) {
 		if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
 		    break;
@@ -1697,7 +1735,7 @@ S_incline(pTHX_ const char *s)
 /* skip space before PL_thistoken */
 
 STATIC char *
-S_skipspace0(pTHX_ register char *s)
+S_skipspace0(pTHX_ char *s)
 {
     PERL_ARGS_ASSERT_SKIPSPACE0;
 
@@ -1718,7 +1756,7 @@ S_skipspace0(pTHX_ register char *s)
 /* skip space after PL_thistoken */
 
 STATIC char *
-S_skipspace1(pTHX_ register char *s)
+S_skipspace1(pTHX_ char *s)
 {
     const char *start = s;
     I32 startoff = start - SvPVX(PL_linestr);
@@ -1745,7 +1783,7 @@ S_skipspace1(pTHX_ register char *s)
 }
 
 STATIC char *
-S_skipspace2(pTHX_ register char *s, SV **svp)
+S_skipspace2(pTHX_ char *s, SV **svp)
 {
     char *start;
     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
@@ -1798,7 +1836,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
  */
 
 STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ char *s)
 {
 #ifdef PERL_MAD
     char *start = s;
@@ -1853,7 +1891,7 @@ S_check_uni(pTHX)
     while (isSPACE(*PL_last_uni))
 	PL_last_uni++;
     s = PL_last_uni;
-    while (isALNUM_lazy_if(s,UTF) || *s == '-')
+    while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
 	s++;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
 	return;
@@ -2000,11 +2038,6 @@ S_force_next(pTHX_ I32 type)
 	tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
-    /* Donât let opslab_force_free snatch it */
-    if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
-	assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
-	NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
-    }	
 #ifdef PERL_MAD
     if (PL_curforce < 0)
 	start_force(PL_lasttoke);
@@ -2077,7 +2110,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
  */
 
 STATIC char *
-S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
 {
     dVAR;
     char *s;
@@ -2126,14 +2159,14 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
  */
 
 STATIC void
-S_force_ident(pTHX_ register const char *s, int kind)
+S_force_ident(pTHX_ const char *s, int kind)
 {
     dVAR;
 
     PERL_ARGS_ASSERT_FORCE_IDENT;
 
-    if (*s) {
-	const STRLEN len = strlen(s);
+    if (s[0]) {
+	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
 	OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
                                                                 UTF ? SVf_UTF8 : 0));
 	start_force(PL_curforce);
@@ -2614,6 +2647,201 @@ S_sublex_done(pTHX)
     }
 }
 
+PERL_STATIC_INLINE SV*
+S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+{
+    /* <s> points to first character of interior of \N{}, <e> to one beyond the
+     * interior, hence to the "}".  Finds what the name resolves to, returning
+     * an SV* containing it; NULL if no valid one found */
+
+    SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+
+    HV * table;
+    SV **cvp;
+    SV *cv;
+    SV *rv;
+    HV *stash;
+    const U8* first_bad_char_loc;
+    const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+
+    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+
+    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
+                                     e - backslash_ptr,
+                                     &first_bad_char_loc))
+    {
+        /* If warnings are on, this will print a more detailed analysis of what
+         * is wrong than the error message below */
+        utf8n_to_uvuni(first_bad_char_loc,
+                       e - ((char *) first_bad_char_loc),
+                       NULL, 0);
+
+        /* We deliberately don't try to print the malformed character, which
+         * might not print very well; it also may be just the first of many
+         * malformations, so don't print what comes after it */
+        yyerror(Perl_form(aTHX_
+            "Malformed UTF-8 character immediately after '%.*s'",
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+	return NULL;
+    }
+
+    res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
+                        /* include the <}> */
+                        e - backslash_ptr + 1);
+    if (! SvPOK(res)) {
+        SvREFCNT_dec_NN(res);
+        return NULL;
+    }
+
+    /* See if the charnames handler is the Perl core's, and if so, we can skip
+     * the validation needed for a user-supplied one, as Perl's does its own
+     * validation. */
+    table = GvHV(PL_hintgv);		 /* ^H */
+    cvp = hv_fetchs(table, "charnames", FALSE);
+    if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
+        && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
+    {
+        const char * const name = HvNAME(stash);
+        if strEQ(name, "_charnames") {
+           return res;
+       }
+    }
+
+    /* Here, it isn't Perl's charname handler.  We can't rely on a
+     * user-supplied handler to validate the input name.  For non-ut8 input,
+     * look to see that the first character is legal.  Then loop through the
+     * rest checking that each is a continuation */
+
+    /* This code needs to be sync'ed with a regex in _charnames.pm which does
+     * the same thing */
+
+    if (! UTF) {
+        if (! isALPHAU(*s)) {
+            goto bad_charname;
+        }
+        s++;
+        while (s < e) {
+            if (! isCHARNAME_CONT(*s)) {
+                goto bad_charname;
+            }
+	    if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
+                Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
+            }
+            s++;
+        }
+        if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
+            Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
+        }
+    }
+    else {
+        /* Similarly for utf8.  For invariants can check directly; for other
+         * Latin1, can calculate their code point and check; otherwise  use a
+         * swash */
+        if (UTF8_IS_INVARIANT(*s)) {
+            if (! isALPHAU(*s)) {
+                goto bad_charname;
+            }
+            s++;
+        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+                goto bad_charname;
+            }
+            s += 2;
+        }
+        else {
+            if (! PL_utf8_charname_begin) {
+                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                PL_utf8_charname_begin = _core_swash_init("utf8",
+                                                        "_Perl_Charname_Begin",
+                                                        &PL_sv_undef,
+                                                        1, 0, NULL, &flags);
+            }
+            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+                goto bad_charname;
+            }
+            s += UTF8SKIP(s);
+        }
+
+        while (s < e) {
+            if (UTF8_IS_INVARIANT(*s)) {
+                if (! isCHARNAME_CONT(*s)) {
+                    goto bad_charname;
+                }
+                if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
+                    Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
+                }
+                s++;
+            }
+            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
+                                                                    *(s+1)))))
+                {
+                    goto bad_charname;
+                }
+                s += 2;
+            }
+            else {
+                if (! PL_utf8_charname_continue) {
+                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                    PL_utf8_charname_continue = _core_swash_init("utf8",
+                                                "_Perl_Charname_Continue",
+                                                &PL_sv_undef,
+                                                1, 0, NULL, &flags);
+                }
+                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                    goto bad_charname;
+                }
+                s += UTF8SKIP(s);
+            }
+        }
+        if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
+            Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
+        }
+    }
+
+    if (SvUTF8(res)) { /* Don't accept malformed input */
+        const U8* first_bad_char_loc;
+        STRLEN len;
+        const char* const str = SvPV_const(res, len);
+        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+            /* If warnings are on, this will print a more detailed analysis of
+             * what is wrong than the error message below */
+            utf8n_to_uvuni(first_bad_char_loc,
+                           (char *) first_bad_char_loc - str,
+                           NULL, 0);
+
+            /* We deliberately don't try to print the malformed character,
+             * which might not print very well; it also may be just the first
+             * of many malformations, so don't print what comes after it */
+            yyerror_pv(
+              Perl_form(aTHX_
+                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
+                 (int) (e - backslash_ptr + 1), backslash_ptr,
+                 (int) ((char *) first_bad_char_loc - str), str
+              ),
+              SVf_UTF8);
+            return NULL;
+        }
+    }
+
+    return res;
+
+  bad_charname: {
+        int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
+
+        /* The final %.*s makes sure that should the trailing NUL be missing
+         * that this print won't run off the end of the string */
+        yyerror_pv(
+          Perl_form(aTHX_
+            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
+            (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
+            (int)(e - s + bad_char_size), s + bad_char_size
+          ),
+          UTF ? SVf_UTF8 : 0);
+        return NULL;
+    }
+}
+
 /*
   scan_const
 
@@ -2635,7 +2863,8 @@ S_sublex_done(pTHX)
 
   In patterns:
     expand:
-      \N{ABC}  => \N{U+41.42.43}
+      \N{FOO}  => \N{U+hex_for_character_FOO}
+      (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
 
     pass through:
 	all other \-char, including \N and \N{ apart from \N{ABC}
@@ -2721,6 +2950,7 @@ S_scan_const(pTHX_ char *start)
 						   isn't utf8, as for example
 						   when it is entirely composed
 						   of hex constants */
+    SV *res;		                /* result from charnames */
 
     /* Note on sizing:  The scanned constant is placed into sv, which is
      * initialized by newSV() assuming one byte of output for every byte of
@@ -2734,7 +2964,8 @@ S_scan_const(pTHX_ char *start)
      * far, plus the length the current construct will occupy, plus room for
      * the trailing NUL, plus one byte for every input byte still unscanned */ 
 
-    UV uv;
+    UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
+                       before set */
 #ifdef EBCDIC
     UV literal_endpoint = 0;
     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
@@ -2749,6 +2980,9 @@ S_scan_const(pTHX_ char *start)
 	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
     }
 
+    /* Protect sv from errors and fatal warnings. */
+    ENTER_with_name("scan_const");
+    SAVEFREESV(sv);
 
     while (s < send || dorange) {
 
@@ -2768,7 +3002,7 @@ S_scan_const(pTHX_ char *start)
 #ifdef EBCDIC
 		    && !native_range
 #endif
-		    ) {
+                ) {
 		    char * const c = (char*)utf8_hop((U8*)d, -1);
 		    char *e = d++;
 		    while (e-- > c)
@@ -2820,7 +3054,6 @@ S_scan_const(pTHX_ char *start)
 #endif
 
                 if (min > max) {
-		    SvREFCNT_dec(sv);
 		    Perl_croak(aTHX_
 			       "Invalid range \"%c-%c\" in transliteration operator",
 			       (char)min, (char)max);
@@ -2879,7 +3112,6 @@ S_scan_const(pTHX_ char *start)
 	    /* range begins (ignore - as first or last char) */
 	    else if (*s == '-' && s+1 < send  && s != start) {
 		if (didrange) {
-		    SvREFCNT_dec(sv);
 		    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
 		}
 		if (has_utf8
@@ -2955,7 +3187,7 @@ S_scan_const(pTHX_ char *start)
 	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
 	   */
 	else if (*s == '@' && s[1]) {
-	    if (isALNUM_lazy_if(s+1,UTF))
+	    if (isWORDCHAR_lazy_if(s+1,UTF))
 		break;
 	    if (strchr(":'{$", s[1]))
 		break;
@@ -3017,7 +3249,7 @@ S_scan_const(pTHX_ char *start)
 	    else if (PL_lex_inpat
 		    && (*s != 'N'
 			|| s[1] != '{'
-			|| regcurly(s + 1)))
+			|| regcurly(s + 1, FALSE)))
 	    {
 		*d++ = NATIVE_TO_NEED(has_utf8,'\\');
 		goto default_action;
@@ -3034,7 +3266,7 @@ S_scan_const(pTHX_ char *start)
 		/* FALL THROUGH */
 	    default:
 	        {
-		    if ((isALNUMC(*s)))
+		    if ((isALPHANUMERIC(*s)))
 			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
 				       "Unrecognized escape \\%c passed through",
 				       *s);
@@ -3046,21 +3278,30 @@ S_scan_const(pTHX_ char *start)
 	    case '0': case '1': case '2': case '3':
 	    case '4': case '5': case '6': case '7':
 		{
-                    I32 flags = 0;
+                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                     STRLEN len = 3;
 		    uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
 		    s += len;
+                    if (len < 3 && s < send && isDIGIT(*s)
+                        && ckWARN(WARN_MISC))
+                    {
+                        Perl_warner(aTHX_ packWARN(WARN_MISC),
+                                    "%s", form_short_octal_warning(s, len));
+                    }
 		}
 		goto NUM_ESCAPE_INSERT;
 
 	    /* eg. \o{24} indicates the octal constant \024 */
 	    case 'o':
 		{
-		    STRLEN len;
 		    const char* error;
 
-		    bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
-		    s += len;
+		    bool valid = grok_bslash_o(&s, &uv, &error,
+                                               TRUE, /* Output warning */
+                                               FALSE, /* Not strict */
+                                               TRUE, /* Output warnings for
+                                                         non-portables */
+                                               UTF);
 		    if (! valid) {
 			yyerror(error);
 			continue;
@@ -3071,11 +3312,14 @@ S_scan_const(pTHX_ char *start)
 	    /* eg. \x24 indicates the hex constant 0x24 */
 	    case 'x':
 		{
-		    STRLEN len;
 		    const char* error;
 
-		    bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
-		    s += len;
+		    bool valid = grok_bslash_x(&s, &uv, &error,
+                                               TRUE, /* Output warning */
+                                               FALSE, /* Not strict */
+                                               TRUE,  /* Output warnings for
+                                                         non-portables */
+                                               UTF);
 		    if (! valid) {
 			yyerror(error);
 			continue;
@@ -3184,31 +3428,6 @@ S_scan_const(pTHX_ char *start)
 
 		/* Here it looks like a named character */
 
-		if (PL_lex_inpat) {
-
-		    /* XXX This block is temporary code.  \N{} implies that the
-		     * pattern is to have Unicode semantics, and therefore
-		     * currently has to be encoded in utf8.  By putting it in
-		     * utf8 now, we save a whole pass in the regular expression
-		     * compiler.  Once that code is changed so Unicode
-		     * semantics doesn't necessarily have to be in utf8, this
-		     * block should be removed.  However, the code that parses
-		     * the output of this would have to be changed to not
-		     * necessarily expect utf8 */
-		    if (!has_utf8) {
-			SvCUR_set(sv, d - SvPVX_const(sv));
-			SvPOK_on(sv);
-			*d = '\0';
-			/* See Note on sizing above.  */
-			sv_utf8_upgrade_flags_grow(sv,
-					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-					/* 5 = '\N{' + cur char + NUL */
-					(STRLEN)(send - s) + 5);
-			d = SvPVX(sv) + SvCUR(sv);
-			has_utf8 = TRUE;
-		    }
-		}
-
 		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
 		    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
 				| PERL_SCAN_DISALLOW_PREFIX;
@@ -3270,32 +3489,12 @@ S_scan_const(pTHX_ char *start)
 			else d = (char*)uvuni_to_utf8((U8*)d, uv);
 		    }
 		}
-		else { /* Here is \N{NAME} but not \N{U+...}. */
-
-		    SV *res;		/* result from charnames */
-		    const char *str;    /* the string in 'res' */
-		    STRLEN len;		/* its length */
-
-		    /* Get the value for NAME */
-		    res = newSVpvn(s, e - s);
-		    res = new_constant( NULL, 0, "charnames",
-					/* includes all of: \N{...} */
-					res, NULL, s - 3, e - s + 4 );
-
-		    /* Most likely res will be in utf8 already since the
-		     * standard charnames uses pack U, but a custom translator
-		     * can leave it otherwise, so make sure.  XXX This can be
-		     * revisited to not have charnames use utf8 for characters
-		     * that don't need it when regexes don't have to be in utf8
-		     * for Unicode semantics.  If doing so, remember EBCDIC */
-		    sv_utf8_upgrade(res);
-		    str = SvPV_const(res, len);
-
-		    /* Don't accept malformed input */
-		    if (! is_utf8_string((U8 *) str, len)) {
-			yyerror("Malformed UTF-8 returned by \\N");
-		    }
-		    else if (PL_lex_inpat) {
+		else /* Here is \N{NAME} but not \N{U+...}. */
+                     if ((res = get_and_check_backslash_N_name(s, e)))
+                {
+                    STRLEN len;
+                    const char *str = SvPV_const(res, len);
+                    if (PL_lex_inpat) {
 
 			if (! len) { /* The name resolved to an empty string */
 			    Copy("\\N{}", d, 4, char);
@@ -3309,73 +3508,88 @@ S_scan_const(pTHX_ char *start)
 			    * returned by charnames */
 
 			    const char *str_end = str + len;
-			    STRLEN char_length;	    /* cur char's byte length */
-			    STRLEN output_length;   /* and the number of bytes
-						       after this is translated
-						       into hex digits */
 			    const STRLEN off = d - SvPVX_const(sv);
 
-			    /* 2 hex per byte; 2 chars for '\N'; 2 chars for
-			     * max('U+', '.'); and 1 for NUL */
-			    char hex_string[2 * UTF8_MAXBYTES + 5];
-
-			    /* Get the first character of the result. */
-			    U32 uv = utf8n_to_uvuni((U8 *) str,
-						    len,
-						    &char_length,
-						    UTF8_ALLOW_ANYUV);
-
-			    /* The call to is_utf8_string() above hopefully
-			     * guarantees that there won't be an error.  But
-			     * it's easy here to make sure.  The function just
-			     * above warns and returns 0 if invalid utf8, but
-			     * it can also return 0 if the input is validly a
-			     * NUL. Disambiguate */
-			    if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-				uv = UNICODE_REPLACEMENT;
-			    }
-
-			    /* Convert first code point to hex, including the
-			     * boiler plate before it.  For all these, we
-			     * convert to native format so that downstream code
-			     * can continue to assume the input is native */
-			    output_length =
-				my_snprintf(hex_string, sizeof(hex_string),
-					    "\\N{U+%X",
-					    (unsigned int) UNI_TO_NATIVE(uv));
-
-			    /* Make sure there is enough space to hold it */
-			    d = off + SvGROW(sv, off
-						 + output_length
-						 + (STRLEN)(send - e)
-						 + 2);	/* '}' + NUL */
-			    /* And output it */
-			    Copy(hex_string, d, output_length, char);
-			    d += output_length;
-
-			    /* For each subsequent character, append dot and
-			     * its ordinal in hex */
-			    while ((str += char_length) < str_end) {
-				const STRLEN off = d - SvPVX_const(sv);
-				U32 uv = utf8n_to_uvuni((U8 *) str,
-							str_end - str,
-							&char_length,
-							UTF8_ALLOW_ANYUV);
-				if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-				    uv = UNICODE_REPLACEMENT;
-				}
-
-				output_length =
-				    my_snprintf(hex_string, sizeof(hex_string),
-					    ".%X",
-					    (unsigned int) UNI_TO_NATIVE(uv));
-
-				d = off + SvGROW(sv, off
-						     + output_length
-						     + (STRLEN)(send - e)
-						     + 2);	/* '}' +  NUL */
-				Copy(hex_string, d, output_length, char);
-				d += output_length;
+                            if (! SvUTF8(res)) {
+                                /* For the non-UTF-8 case, we can determine the
+                                 * exact length needed without having to parse
+                                 * through the string.  Each character takes up
+                                 * 2 hex digits plus either a trailing dot or
+                                 * the "}" */
+                                d = off + SvGROW(sv, off
+                                                    + 3 * len
+                                                    + 6 /* For the "\N{U+", and
+                                                           trailing NUL */
+                                                    + (STRLEN)(send - e));
+                                Copy("\\N{U+", d, 5, char);
+                                d += 5;
+                                while (str < str_end) {
+                                    char hex_string[4];
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                                "%02X.", (U8) *str);
+                                    Copy(hex_string, d, 3, char);
+                                    d += 3;
+                                    str++;
+                                }
+                                d--;    /* We will overwrite below the final
+                                           dot with a right brace */
+                            }
+                            else {
+                                STRLEN char_length; /* cur char's byte length */
+
+                                /* and the number of bytes after this is
+                                 * translated into hex digits */
+                                STRLEN output_length;
+
+                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
+                                 * for max('U+', '.'); and 1 for NUL */
+                                char hex_string[2 * UTF8_MAXBYTES + 5];
+
+                                /* Get the first character of the result. */
+                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                        len,
+                                                        &char_length,
+                                                        UTF8_ALLOW_ANYUV);
+                                /* Convert first code point to hex, including
+                                 * the boiler plate before it.  For all these,
+                                 * we convert to native format so that
+                                 * downstream code can continue to assume the
+                                 * input is native */
+                                output_length =
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                            "\\N{U+%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                /* Make sure there is enough space to hold it */
+                                d = off + SvGROW(sv, off
+                                                    + output_length
+                                                    + (STRLEN)(send - e)
+                                                    + 2);	/* '}' + NUL */
+                                /* And output it */
+                                Copy(hex_string, d, output_length, char);
+                                d += output_length;
+
+                                /* For each subsequent character, append dot and
+                                * its ordinal in hex */
+                                while ((str += char_length) < str_end) {
+                                    const STRLEN off = d - SvPVX_const(sv);
+                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                            str_end - str,
+                                                            &char_length,
+                                                            UTF8_ALLOW_ANYUV);
+                                    output_length =
+                                        my_snprintf(hex_string,
+                                            sizeof(hex_string),
+                                            ".%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                    d = off + SvGROW(sv, off
+                                                        + output_length
+                                                        + (STRLEN)(send - e)
+                                                        + 2);	/* '}' +  NUL */
+                                    Copy(hex_string, d, output_length, char);
+                                    d += output_length;
+                                }
 			    }
 
 			    *d++ = '}';	/* Done.  Add the trailing brace */
@@ -3408,68 +3622,9 @@ S_scan_const(pTHX_ char *start)
 			Copy(str, d, len, char);
 			d += len;
 		    }
+
 		    SvREFCNT_dec(res);
 
-		    /* Deprecate non-approved name syntax */
-		    if (ckWARN_d(WARN_DEPRECATED)) {
-			bool problematic = FALSE;
-			char* i = s;
-
-			/* For non-ut8 input, look to see that the first
-			 * character is an alpha, then loop through the rest
-			 * checking that each is a continuation */
-			if (! this_utf8) {
-			    if (! isALPHAU(*i)) problematic = TRUE;
-			    else for (i = s + 1; i < e; i++) {
-				if (isCHARNAME_CONT(*i)) continue;
-				problematic = TRUE;
-				break;
-			    }
-			}
-			else {
-			    /* Similarly for utf8.  For invariants can check
-			     * directly.  We accept anything above the latin1
-			     * range because it is immaterial to Perl if it is
-			     * correct or not, and is expensive to check.  But
-			     * it is fairly easy in the latin1 range to convert
-			     * the variants into a single character and check
-			     * those */
-			    if (UTF8_IS_INVARIANT(*i)) {
-				if (! isALPHAU(*i)) problematic = TRUE;
-			    } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
-				if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
-									    *(i+1)))))
-				{
-				    problematic = TRUE;
-				}
-			    }
-			    if (! problematic) for (i = s + UTF8SKIP(s);
-						    i < e;
-						    i+= UTF8SKIP(i))
-			    {
-				if (UTF8_IS_INVARIANT(*i)) {
-				    if (isCHARNAME_CONT(*i)) continue;
-				} else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
-				    continue;
-				} else if (isCHARNAME_CONT(
-					    UNI_TO_NATIVE(
-					    TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
-				{
-				    continue;
-				}
-				problematic = TRUE;
-				break;
-			    }
-			}
-			if (problematic) {
-			    /* The e-i passed to the final %.*s makes sure that
-			     * should the trailing NUL be missing that this
-			     * print won't run off the end of the string */
-			    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-					"Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
-					(int)(i - s + 1), s, (int)(e - i), i + 1);
-			}
-		    }
 		} /* End \N{NAME} */
 #ifdef EBCDIC
 		if (!dorange) 
@@ -3595,6 +3750,7 @@ S_scan_const(pTHX_ char *start)
 
     /* return the substring (via pl_yylval) only if we parsed anything */
     if (s > PL_bufptr) {
+	SvREFCNT_inc_simple_void_NN(sv);
 	if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
 	    const char *const key = PL_lex_inpat ? "qr" : "q";
 	    const STRLEN keylen = PL_lex_inpat ? 2 : 1;
@@ -3619,8 +3775,8 @@ S_scan_const(pTHX_ char *start)
 				type, typelen);
 	}
 	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
-    } else
-	SvREFCNT_dec(sv);
+    }
+    LEAVE_with_name("scan_const");
     return s;
 }
 
@@ -3646,7 +3802,7 @@ S_scan_const(pTHX_ char *start)
 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
 
 STATIC int
-S_intuit_more(pTHX_ register char *s)
+S_intuit_more(pTHX_ char *s)
 {
     dVAR;
 
@@ -3663,7 +3819,7 @@ S_intuit_more(pTHX_ register char *s)
 
     /* In a pattern, so maybe we have {n,m}. */
     if (*s == '{') {
-	if (regcurly(s)) {
+	if (regcurly(s, FALSE)) {
 	    return FALSE;
 	}
 	return TRUE;
@@ -3676,16 +3832,16 @@ S_intuit_more(pTHX_ register char *s)
 	return FALSE;
     else {
         /* this is terrifying, and it works */
-	int weight = 2;		/* let's weigh the evidence */
+	int weight;
 	char seen[256];
-	unsigned char un_char = 255, last_un_char;
 	const char * const send = strchr(s,']');
+	unsigned char un_char, last_un_char;
 	char tmpbuf[sizeof PL_tokenbuf * 4];
 
 	if (!send)		/* has to be an expression */
 	    return TRUE;
+	weight = 2;		/* let's weigh the evidence */
 
-	Zero(seen,256,char);
 	if (*s == '$')
 	    weight -= 3;
 	else if (isDIGIT(*s)) {
@@ -3696,6 +3852,8 @@ S_intuit_more(pTHX_ register char *s)
 	    else
 		weight -= 100;
 	}
+	Zero(seen,256,char);
+	un_char = 255;
 	for (; s < send; s++) {
 	    last_un_char = un_char;
 	    un_char = (unsigned char)*s;
@@ -3704,7 +3862,7 @@ S_intuit_more(pTHX_ register char *s)
 	    case '&':
 	    case '$':
 		weight -= seen[un_char] * 10;
-		if (isALNUM_lazy_if(s+1,UTF)) {
+		if (isWORDCHAR_lazy_if(s+1,UTF)) {
 		    int len;
 		    scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
 		    len = (int)strlen(tmpbuf);
@@ -3751,7 +3909,7 @@ S_intuit_more(pTHX_ register char *s)
 		    weight -= 5;	/* cope with negative subscript */
 		break;
 	    default:
-		if (!isALNUM(last_un_char)
+		if (!isWORDCHAR(last_un_char)
 		    && !(last_un_char == '$' || last_un_char == '@'
 			 || last_un_char == '&')
 		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
@@ -4095,7 +4253,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
 }
 
 STATIC char *
-S_filter_gets(pTHX_ register SV *sv, STRLEN append)
+S_filter_gets(pTHX_ SV *sv, STRLEN append)
 {
     dVAR;
 
@@ -4276,7 +4434,6 @@ Perl_madlex(pTHX)
     case FUNC0SUB:
     case UNIOPSUB:
     case LSTOPSUB:
-    case LABEL:
 	if (pl_yylval.opval)
 	    append_madprops(PL_thismad, pl_yylval.opval, 0);
 	PL_thismad = 0;
@@ -4291,6 +4448,10 @@ Perl_madlex(pTHX)
 	}
 	break;
 
+    /* pval */
+    case LABEL:
+	break;
+
     case ']':
     case '}':
 	if (PL_faketokens)
@@ -4406,21 +4567,40 @@ S_word_takes_any_delimeter(char *p, STRLEN len)
   stitching them into a tree.
 
   Returns:
-    PRIVATEREF
+    The type of the next token
 
   Structure:
-      if read an identifier
-          if we're in a my declaration
-	      croak if they tried to say my($foo::bar)
-	      build the ops for a my() declaration
-	  if it's an access to a my() variable
-	      are we in a sort block?
-	          croak if my($a); $a <=> $b
-	      build ops for access to a my() variable
-	  if in a dq string, and they've said @foo and we can't find @foo
-	      croak
-	  build ops for a bareword
-      if we already built the token before, use it.
+      Switch based on the current state:
+	  - if we already built the token before, use it
+	  - if we have a case modifier in a string, deal with that
+	  - handle other cases of interpolation inside a string
+	  - scan the next line if we are inside a format
+      In the normal state switch on the next character:
+	  - default:
+	    if alphabetic, go to key lookup
+	    unrecoginized character - croak
+	  - 0/4/26: handle end-of-line or EOF
+	  - cases for whitespace
+	  - \n and #: handle comments and line numbers
+	  - various operators, brackets and sigils
+	  - numbers
+	  - quotes
+	  - 'v': vstrings (or go to key lookup)
+	  - 'x' repetition operator (or go to key lookup)
+	  - other ASCII alphanumerics (key lookup begins here):
+	      word before => ?
+	      keyword plugin
+	      scan built-in keyword (but do nothing with it yet)
+	      check for statement label
+	      check for lexical subs
+		  goto just_a_word if there is one
+	      see whether built-in keyword is overridden
+	      switch on keyword number:
+		  - default: just_a_word:
+		      not a built-in keyword; handle bareword lookup
+		      disambiguate between method and sub call
+		      fall back to bareword
+		  - cases for built-in keywords
 */
 
 
@@ -4513,8 +4693,6 @@ Perl_yylex(pTHX)
 		    PL_lex_allbrackets--;
 		next_type &= 0xffff;
 	    }
-	    if (S_is_opval_token(next_type) && pl_yylval.opval)
-		pl_yylval.opval->op_savefree = 0; /* release */
 	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
 	}
 
@@ -4556,9 +4734,11 @@ Perl_yylex(pTHX)
 #ifdef PERL_MAD
 	    while (PL_bufptr != PL_bufend &&
 	      PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
-		if (!PL_thiswhite)
+		if (PL_madskills) {
+		  if (!PL_thiswhite)
 		    PL_thiswhite = newSVpvs("");
-		sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+		  sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+		}
 		PL_bufptr += 2;
 	    }
 #else
@@ -4574,9 +4754,11 @@ Perl_yylex(pTHX)
 	    s = PL_bufptr + 1;
 	    if (s[1] == '\\' && s[2] == 'E') {
 #ifdef PERL_MAD
-		if (!PL_thiswhite)
+		if (PL_madskills) {
+		  if (!PL_thiswhite)
 		    PL_thiswhite = newSVpvs("");
-		sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+		  sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+		}
 #endif
 	        PL_bufptr = s + 3;
 		PL_lex_state = LEX_INTERPCONCAT;
@@ -4843,7 +5025,7 @@ Perl_yylex(pTHX)
 #endif
     switch (*s) {
     default:
-	if (isIDFIRST_lazy_if(s,UTF))
+	if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
 	    goto keylookup;
 	{
         SV *dsv = newSVpvs_flags("", SVs_TEMP);
@@ -5234,9 +5416,11 @@ Perl_yylex(pTHX)
     case ' ': case '\t': case '\f': case 013:
 #ifdef PERL_MAD
 	PL_realtokenstart = -1;
-	if (!PL_thiswhite)
+	if (PL_madskills) {
+	  if (!PL_thiswhite)
 	    PL_thiswhite = newSVpvs("");
-	sv_catpvn(PL_thiswhite, s, 1);
+	  sv_catpvn(PL_thiswhite, s, 1);
+	}
 #endif
 	s++;
 	goto retry;
@@ -5329,7 +5513,7 @@ Perl_yylex(pTHX)
 	}
 	goto retry;
     case '-':
-	if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
+	if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
 	    I32 ftst = 0;
 	    char tmp;
 
@@ -5589,7 +5773,7 @@ Perl_yylex(pTHX)
 		}
 		sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
 		if (*d == '(') {
-		    d = scan_str(d,TRUE,TRUE,FALSE);
+		    d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
 		    if (!d) {
 			/* MUST advance bufptr here to avoid bogus
 			   "at end of line" context messages from yyerror().
@@ -5834,9 +6018,9 @@ Perl_yylex(pTHX)
 		}
 		else if (*s == 'q') {
 		    if (++t < PL_bufend
-			&& (!isALNUM(*t)
+			&& (!isWORDCHAR(*t)
 			    || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
-				&& !isALNUM(*t))))
+				&& !isWORDCHAR(*t))))
 		    {
 			/* skip q//-like construct */
 			const char *tmps;
@@ -5875,12 +6059,12 @@ Perl_yylex(pTHX)
 		    }
 		    else
 			/* skip plain q word */
-			while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
 			     t += UTF8SKIP(t);
 		}
-		else if (isALNUM_lazy_if(t,UTF)) {
+		else if (isWORDCHAR_lazy_if(t,UTF)) {
 		    t += UTF8SKIP(t);
-		    while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
 			 t += UTF8SKIP(t);
 		}
 		while (t < PL_bufend && isSPACE(*t))
@@ -5950,7 +6134,7 @@ Perl_yylex(pTHX)
 	force_next(formbrack ? '.' : '}');
 	if (formbrack) LEAVE;
 #ifdef PERL_MAD
-	if (!PL_thistoken)
+	if (PL_madskills && !PL_thistoken)
 	    PL_thistoken = newSVpvs("");
 #endif
 	if (formbrack == 2) { /* means . where arguments were expected */
@@ -6117,8 +6301,8 @@ Perl_yylex(pTHX)
 
 		    if (*t == '/' || *t == '?' ||
 			((*t == 'm' || *t == 's' || *t == 'y')
-			 && !isALNUM(t[1])) ||
-			(*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+			 && !isWORDCHAR(t[1])) ||
+			(*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
 			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 				    "!=~ should be !~");
 		}
@@ -6255,7 +6439,7 @@ Perl_yylex(pTHX)
 		    if (ckWARN(WARN_SYNTAX)) {
 			char *t = s+1;
 
-			while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
+			while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
 			    t++;
 			if (*t++ == ',') {
 			    PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
@@ -6366,7 +6550,7 @@ Perl_yylex(pTHX)
 	    if (*s == '[' || *s == '{') {
 		if (ckWARN(WARN_SYNTAX)) {
 		    const char *t = s + 1;
-		    while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
+		    while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
 			t += UTF ? UTF8SKIP(t) : 1;
 		    if (*t == '}' || *t == ']') {
 			t++;
@@ -6434,7 +6618,7 @@ Perl_yylex(pTHX)
 	     if (PL_oldoldbufptr == PL_last_uni
 	      && (*PL_last_uni != 's' || s - PL_last_uni < 5
 	          || memNE(PL_last_uni, "study", 5)
-	          || isALNUM_lazy_if(PL_last_uni+5,UTF)
+	          || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
 	      ))
 	         check_uni();
 	     if (*s == '?')
@@ -6494,7 +6678,7 @@ Perl_yylex(pTHX)
 	TERM(THING);
 
     case '\'':
-	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6509,7 +6693,7 @@ Perl_yylex(pTHX)
 	TERM(sublex_start());
 
     case '"':
-	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6532,7 +6716,7 @@ Perl_yylex(pTHX)
 	TERM(sublex_start());
 
     case '`':
-	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
 	if (PL_expect == XOPERATOR)
 	    no_op("Backticks",s);
@@ -6691,9 +6875,9 @@ Perl_yylex(pTHX)
 	if (!anydelim && PL_expect == XSTATE
 	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
 	    s = d + 1;
-	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
-                                            newSVpvn_flags(PL_tokenbuf,
-                                                        len, UTF ? SVf_UTF8 : 0));
+	    pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
+	    pl_yylval.pval[len] = '\0';
+	    pl_yylval.pval[len+1] = UTF ? 1 : 0;
 	    CLINE;
 	    TOKEN(LABEL);
 	}
@@ -6716,6 +6900,11 @@ Perl_yylex(pTHX)
 		    gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
 				    SVt_PVCV);
 		    off = 0;
+		    if (!gv) {
+			sv_free(sv);
+			sv = NULL;
+			goto just_a_word;
+		    }
 		}
 		else {
 		    rv2cv_op = newOP(OP_PADANY, 0);
@@ -7866,6 +8055,9 @@ Perl_yylex(pTHX)
 				  "Experimental \"%s\" subs not enabled",
 				   tmp == KEY_my    ? "my"    :
 				   tmp == KEY_state ? "state" : "our");
+		    Perl_ck_warner_d(aTHX_
+			packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
+			"The lexical_subs feature is experimental");
 		    goto really_sub;
 		}
 		PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
@@ -7913,15 +8105,9 @@ Perl_yylex(pTHX)
 	case KEY_open:
 	    s = SKIPSPACE1(s);
 	    if (isIDFIRST_lazy_if(s,UTF)) {
-		const char *t;
-		for (d = s; isALNUM_lazy_if(d,UTF);) {
-		    d += UTF ? UTF8SKIP(d) : 1;
-                    if (UTF) {
-                        while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
-                            d += UTF ? UTF8SKIP(d) : 1;
-                        }
-                    }
-                }
+          const char *t;
+          d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+              &len);
 		for (t=d; isSPACE(*t);)
 		    t++;
 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7988,7 +8174,7 @@ Perl_yylex(pTHX)
 	    LOP(OP_PIPE_OP,XTERM);
 
 	case KEY_q:
-	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    pl_yylval.ival = OP_CONST;
@@ -7999,7 +8185,7 @@ Perl_yylex(pTHX)
 
 	case KEY_qw: {
 	    OP *words = NULL;
-	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    PL_expect = XOPERATOR;
@@ -8049,7 +8235,7 @@ Perl_yylex(pTHX)
 	}
 
 	case KEY_qq:
-	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    pl_yylval.ival = OP_STRINGIFY;
@@ -8062,7 +8248,7 @@ Perl_yylex(pTHX)
 	    TERM(sublex_start());
 
 	case KEY_qx:
-	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    readpipe_override();
@@ -8288,7 +8474,9 @@ Perl_yylex(pTHX)
 		SV *tmpwhite = 0;
 
 		char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
-		SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
+		SV *subtoken = PL_madskills
+		   ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
+		   : NULL;
 		PL_thistoken = 0;
 
 		d = s;
@@ -8381,7 +8569,7 @@ Perl_yylex(pTHX)
 		    const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
                     STRLEN tmplen;
 
-		    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+		    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
 		    if (!s)
 			Perl_croak(aTHX_ "Prototype not terminated");
 		    /* strip spaces and check for bad characters */
@@ -8660,6 +8848,26 @@ Perl_yylex(pTHX)
 #pragma segment Main
 #endif
 
+/*
+  S_pending_ident
+
+  Looks up an identifier in the pad or in a package
+
+  Returns:
+    PRIVATEREF if this is a lexical name.
+    WORD       if this belongs to a package.
+
+  Structure:
+      if we're in a my declaration
+	  croak if they tried to say my($foo::bar)
+	  build the ops for a my() declaration
+      if it's an access to a my() variable
+	  build ops for access to a my() variable
+      if in a dq string, and they've said @foo and we can't find @foo
+	  warn
+      build ops for a bareword
+*/
+
 static int
 S_pending_ident(pTHX)
 {
@@ -8813,7 +9021,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     if (isIDFIRST_lazy_if(s,UTF)) {
 	const char * const w = s;
         s += UTF ? UTF8SKIP(s) : 1;
-	while (isALNUM_lazy_if(s,UTF))
+	while (isWORDCHAR_lazy_if(s,UTF))
 	    s += UTF ? UTF8SKIP(s) : 1;
 	while (s < PL_bufend && isSPACE(*s))
 	    s++;
@@ -8830,10 +9038,11 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     }
 }
 
-/* Either returns sv, or mortalizes sv and returns a new SV*.
+/* Either returns sv, or mortalizes/frees sv and returns a new SV*.
    Best used as sv=new_constant(..., sv, ...).
    If s, pv are NULL, calls subroutine with one argument,
-   and type is used with error messages only. */
+   and <type> is used with error messages only.
+   <type> is assumed to be well formed UTF-8 */
 
 STATIC SV *
 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
@@ -8842,26 +9051,34 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
     dVAR; dSP;
     HV * table = GvHV(PL_hintgv);		 /* ^H */
     SV *res;
+    SV *errsv = NULL;
     SV **cvp;
     SV *cv, *typesv;
     const char *why1 = "", *why2 = "", *why3 = "";
 
     PERL_ARGS_ASSERT_NEW_CONSTANT;
+    /* We assume that this is true: */
+    if (*key == 'c') { assert (strEQ(key, "charnames")); }
+    assert(type || s);
 
     /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && strEQ(key,"charnames"))
+    if (PL_error_count > 0 && *key == 'c')
+    {
+	SvREFCNT_dec_NN(sv);
 	return &PL_sv_undef;
+    }
 
+    sv_2mortal(sv);			/* Parent created it permanently */
     if (!table
 	|| ! (PL_hints & HINT_LOCALIZE_HH)
 	|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
 	|| ! SvOK(*cvp))
     {
-	SV *msg;
+	char *msg;
 	
 	/* Here haven't found what we're looking for.  If it is charnames,
 	 * perhaps it needs to be loaded.  Try doing that before giving up */
-	if (strEQ(key,"charnames")) {
+	if (*key == 'c') {
 	    Perl_load_module(aTHX_
 		            0,
 			    newSVpvs("_charnames"),
@@ -8883,23 +9100,32 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
 	    }
 	}
 	if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
-	    msg = Perl_newSVpvf(aTHX_
-			    "Constant(%s) unknown", (type ? type: "undef"));
+	    msg = Perl_form(aTHX_
+			       "Constant(%.*s) unknown",
+				(int)(type ? typelen : len),
+				(type ? type: s));
 	}
 	else {
-	why1 = "$^H{";
-	why2 = key;
-	why3 = "} is not defined";
-    report:
-	msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
-			    (type ? type: "undef"), why1, why2, why3);
-	}
-	yyerror(SvPVX_const(msg));
- 	SvREFCNT_dec(msg);
-  	return sv;
+            why1 = "$^H{";
+            why2 = key;
+            why3 = "} is not defined";
+        report:
+            if (*key == 'c') {
+                msg = Perl_form(aTHX_
+                            /* The +3 is for '\N{'; -4 for that, plus '}' */
+                            "Unknown charname '%.*s'", (int)typelen - 4, type + 3
+                      );
+            }
+            else {
+                msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
+                                    (int)(type ? typelen : len),
+                                    (type ? type: s), why1, why2, why3);
+            }
+        }
+	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+  	return SvREFCNT_inc_simple_NN(sv);
     }
 now_ok:
-    sv_2mortal(sv);			/* Parent created it permanently */
     cv = *cvp;
     if (!pv && s)
   	pv = newSVpvn_flags(s, len, SVs_TEMP);
@@ -8925,15 +9151,18 @@ now_ok:
     SPAGAIN ;
 
     /* Check the eval first */
-    if (!PL_in_eval && SvTRUE(ERRSV)) {
- 	sv_catpvs(ERRSV, "Propagated");
-	yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
+    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
+	STRLEN errlen;
+	const char * errstr;
+	sv_catpvs(errsv, "Propagated");
+	errstr = SvPV_const(errsv, errlen);
+	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
 	(void)POPs;
-	res = SvREFCNT_inc_simple(sv);
+	res = SvREFCNT_inc_simple_NN(sv);
     }
     else {
  	res = POPs;
-	SvREFCNT_inc_simple_void(res);
+	SvREFCNT_inc_simple_void_NN(res);
     }
 
     PUTBACK ;
@@ -8946,66 +9175,89 @@ now_ok:
  	why2 = key;
  	why3 = "}} did not return a defined value";
  	sv = res;
+	(void)sv_2mortal(sv);
  	goto report;
     }
 
     return res;
 }
 
+PERL_STATIC_INLINE void
+S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
+    dVAR;
+    PERL_ARGS_ASSERT_PARSE_IDENT;
+
+    for (;;) {
+        if (*d >= e)
+            Perl_croak(aTHX_ "%s", ident_too_long);
+        if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+             /* The UTF-8 case must come first, otherwise things
+             * like c\N{COMBINING TILDE} would start failing, as the
+             * isWORDCHAR_A case below would gobble the 'c' up.
+             */
+
+            char *t = *s + UTF8SKIP(*s);
+            while (isIDCONT_utf8((U8*)t))
+                t += UTF8SKIP(t);
+            if (*d + (t - *s) > e)
+                Perl_croak(aTHX_ "%s", ident_too_long);
+            Copy(*s, *d, t - *s, char);
+            *d += t - *s;
+            *s = t;
+        }
+        else if ( isWORDCHAR_A(**s) ) {
+            do {
+                *(*d)++ = *(*s)++;
+            } while isWORDCHAR_A(**s);
+        }
+        else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+            *(*d)++ = ':';
+            *(*d)++ = ':';
+            (*s)++;
+        }
+        else if (allow_package && **s == ':' && (*s)[1] == ':'
+           /* Disallow things like Foo::$bar. For the curious, this is
+            * the code path that triggers the "Bad name after" warning
+            * when looking for barewords.
+            */
+           && (*s)[2] != '$') {
+            *(*d)++ = *(*s)++;
+            *(*d)++ = *(*s)++;
+        }
+        else
+            break;
+    }
+    return;
+}
+
 /* Returns a NUL terminated string, with the length of the string written to
    *slp
    */
 STATIC char *
-S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
 {
     dVAR;
     char *d = dest;
     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
+    bool is_utf8 = cBOOL(UTF);
 
     PERL_ARGS_ASSERT_SCAN_WORD;
 
-    for (;;) {
-	if (d >= e)
-	    Perl_croak(aTHX_ ident_too_long);
-	if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))	/* UTF handled below */
-	    *d++ = *s++;
-	else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
-	    *d++ = ':';
-	    *d++ = ':';
-	    s++;
-	}
-	else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
-	    *d++ = *s++;
-	    *d++ = *s++;
-	}
-	else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-	    char *t = s + UTF8SKIP(s);
-	    size_t len;
-	    while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
-		t += UTF8SKIP(t);
-	    len = t - s;
-	    if (d + len > e)
-		Perl_croak(aTHX_ ident_too_long);
-	    Copy(s, d, len, char);
-	    d += len;
-	    s = t;
-	}
-	else {
-	    *d = '\0';
-	    *slp = d - dest;
-	    return s;
-	}
-    }
+    parse_ident(&s, &d, e, allow_package, is_utf8);
+    *d = '\0';
+    *slp = d - dest;
+    return s;
 }
 
 STATIC char *
-S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
 {
     dVAR;
     char *bracket = NULL;
     char funny = *s++;
     char *d = dest;
     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
+    bool is_utf8 = cBOOL(UTF);
 
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
@@ -9014,38 +9266,12 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
     if (isDIGIT(*s)) {
 	while (isDIGIT(*s)) {
 	    if (d >= e)
-		Perl_croak(aTHX_ ident_too_long);
+		Perl_croak(aTHX_ "%s", ident_too_long);
 	    *d++ = *s++;
 	}
     }
     else {
-	for (;;) {
-	    if (d >= e)
-		Perl_croak(aTHX_ ident_too_long);
-	    if (isALNUM(*s))	/* UTF handled below */
-		*d++ = *s++;
-	    else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
-		*d++ = ':';
-		*d++ = ':';
-		s++;
-	    }
-	    else if (*s == ':' && s[1] == ':') {
-		*d++ = *s++;
-		*d++ = *s++;
-	    }
-	    else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-		char *t = s + UTF8SKIP(s);
-		while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
-		    t += UTF8SKIP(t);
-		if (d + (t - s) > e)
-		    Perl_croak(aTHX_ ident_too_long);
-		Copy(s, d, t - s, char);
-		d += t - s;
-		s = t;
-	    }
-	    else
-		break;
-	}
+        parse_ident(&s, &d, e, 1, is_utf8);
     }
     *d = '\0';
     d = dest;
@@ -9055,7 +9281,11 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	return s;
     }
     if (*s == '$' && s[1] &&
-	(isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+      (isIDFIRST_lazy_if(s+1,is_utf8)
+         || isDIGIT_A((U8)s[1])
+         || s[1] == '$'
+         || s[1] == '{'
+         || strnEQ(s+1,"::",2)) )
     {
 	return s;
     }
@@ -9063,8 +9293,15 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	bracket = s;
 	s++;
     }
-    if (s < send) {
-        if (UTF) {
+
+#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)*(d))     \
+                                        || isCNTRL_A((U8)*(d)) \
+                                        || isDIGIT_A((U8)*(d)) \
+                                        || (!(u) && !UTF8_IS_INVARIANT((U8)*(d))))
+    if (s < send
+        && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(s, is_utf8)))
+    {
+        if (is_utf8) {
             const STRLEN skip = UTF8SKIP(s);
             STRLEN i;
             d[skip] = '\0';
@@ -9092,25 +9329,9 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 		}
 	    }
 	}
-	if (isIDFIRST_lazy_if(d,UTF)) {
-	    d += UTF8SKIP(d);
-	    if (UTF) {
-		char *end = s;
-		while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
-		    end += UTF8SKIP(end);
-		    while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
-			end += UTF8SKIP(end);
-		}
-		Copy(s, d, end - s, char);
-		d += end - s;
-		s = end;
-	    }
-	    else {
-		while ((isALNUM(*s) || *s == ':') && d < e)
-		    *d++ = *s++;
-		if (d >= e)
-		    Perl_croak(aTHX_ ident_too_long);
-	    }
+	if (isIDFIRST_lazy_if(d,is_utf8)) {
+        d += is_utf8 ? UTF8SKIP(d) : 1;
+        parse_ident(&s, &d, e, 1, is_utf8);
 	    *d = '\0';
 	    while (s < send && SPACE_OR_TAB(*s))
 		s++;
@@ -9132,15 +9353,15 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	}
 	/* Handle extended ${^Foo} variables
 	 * 1999-02-27 mjd-perl-patch@plover.com */
-	else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
-		 && isALNUM(*s))
+	else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */
+		 && isWORDCHAR(*s))
 	{
 	    d++;
-	    while (isALNUM(*s) && d < e) {
+	    while (isWORDCHAR(*s) && d < e) {
 		*d++ = *s++;
 	    }
 	    if (d >= e)
-		Perl_croak(aTHX_ ident_too_long);
+		Perl_croak(aTHX_ "%s", ident_too_long);
 	    *d = '\0';
 	}
 	if (*s == '}') {
@@ -9152,10 +9373,10 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	    if (PL_lex_state == LEX_NORMAL) {
 		if (ckWARN(WARN_AMBIGUOUS) &&
 		    (keyword(dest, d - dest, 0)
-		     || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
+		     || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
 		{
                     SV *tmp = newSVpvn_flags( dest, d - dest,
-                                            SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
+                                            SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
 		    if (funny == '#')
 			funny = '@';
 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
@@ -9191,7 +9412,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
 
     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
-        if (isALNUM_lazy_if(*s, UTF)) {
+        if (isWORDCHAR_lazy_if(*s, UTF)) {
             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
                        UTF ? SVf_UTF8 : 0);
             (*s) += charlen;
@@ -9275,7 +9496,8 @@ S_scan_pat(pTHX_ char *start, I32 type)
 {
     dVAR;
     PMOP *pm;
-    char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
+    char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
+                       TRUE /* look for escaped bracketed metas */ );
     const char * const valid_flags =
 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
     char charset = '\0';    /* character set modifier */
@@ -9378,7 +9600,8 @@ S_scan_subst(pTHX_ char *start)
 
     pl_yylval.ival = OP_NULL;
 
-    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE,
+                 TRUE /* look for escaped bracketed metas */ );
 
     if (!s)
 	Perl_croak(aTHX_ "Substitution pattern not terminated");
@@ -9396,7 +9619,7 @@ S_scan_subst(pTHX_ char *start)
 #endif
 
     first_start = PL_multi_start;
-    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
     if (!s) {
 	if (PL_lex_stuff) {
 	    SvREFCNT_dec(PL_lex_stuff);
@@ -9482,7 +9705,7 @@ S_scan_trans(pTHX_ char *start)
 
     pl_yylval.ival = OP_NULL;
 
-    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
     if (!s)
 	Perl_croak(aTHX_ "Transliteration pattern not terminated");
 
@@ -9498,7 +9721,7 @@ S_scan_trans(pTHX_ char *start)
     }
 #endif
 
-    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
     if (!s) {
 	if (PL_lex_stuff) {
 	    SvREFCNT_dec(PL_lex_stuff);
@@ -9583,7 +9806,7 @@ S_scan_trans(pTHX_ char *start)
 */
 
 STATIC char *
-S_scan_heredoc(pTHX_ register char *s)
+S_scan_heredoc(pTHX_ char *s)
 {
     dVAR;
     I32 op_type = OP_SCALAR;
@@ -9626,9 +9849,9 @@ S_scan_heredoc(pTHX_ register char *s)
 	    s++, term = '\'';
 	else
 	    term = '"';
-	if (!isALNUM_lazy_if(s,UTF))
+	if (!isWORDCHAR_lazy_if(s,UTF))
 	    deprecate("bare << to mean <<\"\"");
-	for (; isALNUM_lazy_if(s,UTF); s++) {
+	for (; isWORDCHAR_lazy_if(s,UTF); s++) {
 	    if (d < e)
 		*d++ = *s;
 	}
@@ -9778,8 +10001,10 @@ S_scan_heredoc(pTHX_ register char *s)
 				bufend - shared->re_eval_start);
 	    shared->re_eval_start -= s-d;
 	}
-	if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
-	 && cx->blk_eval.cur_text == linestr) {
+	if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
+            CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
+            cx->blk_eval.cur_text == linestr)
+        {
 	    cx->blk_eval.cur_text = newSVsv(linestr);
 	    SvSCREAM_on(cx->blk_eval.cur_text);
 	}
@@ -9934,7 +10159,7 @@ S_scan_inputsymbol(pTHX_ char *start)
     if (*d == '$' && d[1]) d++;
 
     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
-    while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+    while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
 	d += UTF ? UTF8SKIP(d) : 1;
 
     /* If we've tried to read what we allow filehandles to look like, and
@@ -9945,7 +10170,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 
     if (d - PL_tokenbuf != len) {
 	pl_yylval.ival = OP_GLOB;
-	s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+	s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
 	if (!s)
 	   Perl_croak(aTHX_ "Glob not terminated");
 	return s;
@@ -10087,20 +10312,25 @@ intro_sym:
 */
 
 STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
+        bool deprecate_escaped_meta /* Should we issue a deprecation warning
+                                       for certain paired metacharacters that
+                                       appear escaped within it */
+    )
 {
     dVAR;
-    SV *sv;				/* scalar value: string */
-    const char *tmps;			/* temp string, used for delimiter matching */
+    SV *sv;			/* scalar value: string */
+    const char *tmps;		/* temp string, used for delimiter matching */
     char *s = start;		/* current position in the buffer */
     char term;			/* terminating character */
     char *to;			/* current position in the sv's data */
-    I32 brackets = 1;			/* bracket nesting level */
-    bool has_utf8 = FALSE;		/* is there any utf8 content? */
-    I32 termcode;			/* terminating char. code */
-    U8 termstr[UTF8_MAXBYTES];		/* terminating string */
-    STRLEN termlen;			/* length of terminating string */
-    int last_off = 0;			/* last position for nesting bracket */
+    I32 brackets = 1;		/* bracket nesting level */
+    bool has_utf8 = FALSE;	/* is there any utf8 content? */
+    I32 termcode;		/* terminating char. code */
+    U8 termstr[UTF8_MAXBYTES];	/* terminating string */
+    STRLEN termlen;		/* length of terminating string */
+    int last_off = 0;		/* last position for nesting bracket */
+    char *escaped_open = NULL;
 #ifdef PERL_MAD
     int stuffstart;
     char *tstart;
@@ -10147,6 +10377,18 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
 
     PL_multi_close = term;
 
+    /* A warning is raised if the input parameter requires it for escaped (by a
+     * backslash) paired metacharacters {} [] and () when the delimiters are
+     * those same characters, and the backslash is ineffective.  This doesn't
+     * happen for <>, as they aren't metas. */
+    if (deprecate_escaped_meta
+        && (PL_multi_open == PL_multi_close
+            || ! ckWARN_d(WARN_DEPRECATED)
+            || PL_multi_open == '<'))
+    {
+        deprecate_escaped_meta = FALSE;
+    }
+
     /* create a new SV to hold the contents.  79 is the SV's initial length.
        What a random number. */
     sv = newSV_type(SVt_PVIV);
@@ -10160,7 +10402,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
     s += termlen;
 #ifdef PERL_MAD
     tstart = SvPVX(PL_linestr) + stuffstart;
-    if (!PL_thisopen && !keep_delims) {
+    if (PL_madskills && !PL_thisopen && !keep_delims) {
 	PL_thisopen = newSVpvn(tstart, s - tstart);
 	stuffstart = s - SvPVX(PL_linestr);
     }
@@ -10285,7 +10527,44 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
 		if (*s == '\\' && s+1 < PL_bufend) {
 		    if (!keep_quoted &&
 			((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+                    {
 			s++;
+
+                        /* Here, 'deprecate_escaped_meta' is true iff the
+                         * delimiters are paired metacharacters, and 's' points
+                         * to an occurrence of one of them within the string,
+                         * which was preceded by a backslash.  If this is a
+                         * context where the delimiter is also a metacharacter,
+                         * the backslash is useless, and deprecated.  () and []
+                         * are meta in any context. {} are meta only when
+                         * appearing in a quantifier or in things like '\p{'.
+                         * They also aren't meta unless there is a matching
+                         * closed, escaped char later on within the string.
+                         * If 's' points to an open, set a flag; if to a close,
+                         * test that flag, and raise a warning if it was set */
+
+			if (deprecate_escaped_meta) {
+                            if (*s == PL_multi_open) {
+                                if (*s != '{') {
+                                    escaped_open = s;
+                                }
+                                else if (regcurly(s,
+                                                  TRUE /* Look for a closing
+                                                          '\}' */)
+                                         || (s - start > 2  /* Look for e.g.
+                                                               '\x{' */
+                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
+                                {
+                                    escaped_open = s;
+                                }
+                            }
+                            else if (escaped_open) {
+                                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                                    "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
+                                escaped_open = NULL;
+                            }
+                        }
+                    }
 		    else
 			*to++ = *s++;
 		}
@@ -10441,7 +10720,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
     SV *sv = NULL;			/* place to put the converted number */
     bool floatit;			/* boolean: int or float? */
     const char *lastub = NULL;		/* position of last underbar */
-    static char const number_too_long[] = "Number too long";
+    static const char* const number_too_long = "Number too long";
 
     PERL_ARGS_ASSERT_SCAN_NUM;
 
@@ -10651,7 +10930,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    else {
 	        /* check for end of fixed-length buffer */
 		if (d >= e)
-		    Perl_croak(aTHX_ number_too_long);
+		    Perl_croak(aTHX_ "%s", number_too_long);
 		/* if we're ok, copy the character */
 		*d++ = *s++;
 	    }
@@ -10681,7 +10960,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    for (; isDIGIT(*s) || *s == '_'; s++) {
 	        /* fixed length buffer check */
 		if (d >= e)
-		    Perl_croak(aTHX_ number_too_long);
+		    Perl_croak(aTHX_ "%s", number_too_long);
 		if (*s == '_') {
 		   if (lastub && s == lastub + 1)
 		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
@@ -10733,7 +11012,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    while (isDIGIT(*s) || *s == '_') {
 	        if (isDIGIT(*s)) {
 		    if (d >= e)
-		        Perl_croak(aTHX_ number_too_long);
+		        Perl_croak(aTHX_ "%s", number_too_long);
 		    *d++ = *s++;
 		}
 		else {
@@ -10789,7 +11068,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
     case 'v':
 vstring:
 		sv = newSV(5); /* preallocate storage space */
+		ENTER_with_name("scan_vstring");
+		SAVEFREESV(sv);
 		s = scan_vstring(s, PL_bufend, sv);
+		SvREFCNT_inc_simple_void_NN(sv);
+		LEAVE_with_name("scan_vstring");
 	break;
     }
 
@@ -10804,7 +11087,7 @@ vstring:
 }
 
 STATIC char *
-S_scan_formline(pTHX_ register char *s)
+S_scan_formline(pTHX_ char *s)
 {
     dVAR;
     char *eol;
@@ -10946,7 +11229,8 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
     if (outsidecv && CvPADLIST(outsidecv))
-	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
+	CvPADLIST(PL_compcv)->xpadl_outid =
+	    PadlistNAMES(CvPADLIST(outsidecv));
 
     return oldsavestack_ix;
 }
@@ -10990,7 +11274,6 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     SV *msg;
     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
     int yychar  = PL_parser->yychar;
-    U32 is_utf8 = flags & SVf_UTF8;
 
     PERL_ARGS_ASSERT_YYERROR_PVN;
 
@@ -11051,7 +11334,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
 	else
 	    Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
     }
-    msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
+    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
     if (context)
@@ -11072,9 +11355,10 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     else
 	qerror(msg);
     if (PL_error_count >= 10) {
-	if (PL_in_eval && SvCUR(ERRSV))
+	SV * errsv;
+	if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
 	    Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
-		       SVfARG(ERRSV), OutCopFILE(PL_curcop));
+		       SVfARG(errsv), OutCopFILE(PL_curcop));
 	else
 	    Perl_croak(aTHX_ "%s has too many errors.\n",
             OutCopFILE(PL_curcop));
@@ -11338,13 +11622,18 @@ vstring, as well as updating the passed in sv.
 
 Function must be called like
 
-	sv = newSV(5);
+	sv = sv_2mortal(newSV(5));
 	s = scan_vstring(s,e,sv);
 
 where s and e are the start and end of the string.
 The sv should already be large enough to store the vstring
 passed in, for performance reasons.
 
+This function may croak if fatal warnings are enabled in the
+calling scope, hence the sv_2mortal in the example (to prevent
+a leak).  Make sure to do SvREFCNT_inc afterwards if you use
+sv_2mortal.
+
 */
 
 char *
@@ -11710,11 +11999,10 @@ Perl_parse_label(pTHX_ U32 flags)
     if (PL_lex_state == LEX_KNOWNEXT) {
 	PL_parser->yychar = yylex();
 	if (PL_parser->yychar == LABEL) {
-	    SV *lsv;
+	    char * const lpv = pl_yylval.pval;
+	    STRLEN llen = strlen(lpv);
 	    PL_parser->yychar = YYEMPTY;
-	    lsv = newSV_type(SVt_PV);
-	    sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
-	    return lsv;
+	    return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
 	} else {
 	    yyunlex();
 	    goto no_label;