X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/16d1d8bdaf1e7c1c3ad11abc246622d1e5c3b26a..607733213512652a8c98b3055264d6baf5019eb5:/toke.c

diff --git a/toke.c b/toke.c
index 5a181c3..a91a4fc 100644
--- a/toke.c
+++ b/toke.c
@@ -413,12 +413,12 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 	else if (!rv)
 	    sv_catpvs(report, "EOF");
 	else
-	    Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+	    Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 	switch (type) {
 	case TOKENTYPE_NONE:
 	    break;
 	case TOKENTYPE_IVAL:
-	    Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
+	    Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 	    break;
 	case TOKENTYPE_OPNUM:
 	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
@@ -464,13 +464,6 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s)
 
 #endif
 
-static int
-S_deprecate_commaless_var_list(pTHX) {
-    PL_expect = XTERM;
-    deprecate("comma-less variable list");
-    return REPORT(','); /* grandfather non-comma-format format */
-}
-
 /*
  * S_ao
  *
@@ -527,20 +520,26 @@ S_no_op(pTHX_ const char *const what, char *s)
 	if (is_first)
 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 		    "\t(Missing semicolon on previous line?)\n");
-	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
+        else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
+                                                           PL_bufend,
+                                                           UTF))
+        {
 	    const char *t;
-	    for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
-                                                            t += UTF ? UTF8SKIP(t) : 1)
+	    for (t = PL_oldoldbufptr;
+                 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+                 t += UTF ? UTF8SKIP(t) : 1)
+            {
 		NOOP;
+            }
 	    if (t < PL_bufptr && isSPACE(*t))
 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-			"\t(Do you need to predeclare %"UTF8f"?)\n",
+			"\t(Do you need to predeclare %" UTF8f "?)\n",
 		      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 	}
 	else {
 	    assert(s >= oldbp);
 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-		    "\t(Missing operator before %"UTF8f"?)\n",
+		    "\t(Missing operator before %" UTF8f "?)\n",
 		     UTF8fARG(UTF, s - oldbp, oldbp));
 	}
     }
@@ -590,7 +589,7 @@ S_missingterm(pTHX_ char *s)
     sv = sv_2mortal(newSVpv(s,0));
     if (uni)
 	SvUTF8_on(sv);
-    Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
+    Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
 		     "%c anywhere before EOF",q,SVfARG(sv),q);
 }
 
@@ -663,7 +662,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 Creates and initialises a new lexer/parser state object, supplying
 a context in which to lex and parse from a new source of Perl code.
 A pointer to the new state object is placed in L</PL_parser>.  An entry
-is made on the save stack so that upon unwinding the new state object
+is made on the save stack so that upon unwinding, the new state object
 will be destroyed and the former value of L</PL_parser> will be restored.
 Nothing else need be done to clean up the parsing context.
 
@@ -695,6 +694,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 {
     const char *s = NULL;
     yy_parser *parser, *oparser;
+
     if (flags && flags & ~LEX_START_FLAGS)
 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 
@@ -705,8 +705,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     PL_parser = parser;
 
     parser->stack = NULL;
+    parser->stack_max1 = NULL;
     parser->ps = NULL;
-    parser->stack_size = 0;
 
     /* on scope exit, free this parser and restore any outer one */
     SAVEPARSER(parser);
@@ -720,6 +720,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->lex_state = LEX_NORMAL;
     parser->expect = XSTATE;
     parser->rsfp = rsfp;
+    parser->recheck_utf8_validity = FALSE;
     parser->rsfp_filters =
       !(flags & LEX_START_SAME_FILTER) || !oparser
         ? NULL
@@ -736,7 +737,22 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 
     if (line) {
 	STRLEN len;
+        const U8* first_bad_char_loc;
+
 	s = SvPV_const(line, len);
+
+        if (   SvUTF8(line)
+            && UNLIKELY(! is_utf8_string_loc((U8 *) s,
+                                             SvCUR(line),
+                                             &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) s + SvCUR(line),
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+
 	parser->linestr = flags & LEX_START_COPIED
 			    ? SvREFCNT_inc_simple_NN(line)
 			    : newSVpvn_flags(s, len, SvUTF8(line));
@@ -745,6 +761,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     } else {
 	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
     }
+
     parser->oldoldbufptr =
 	parser->oldbufptr =
 	parser->bufptr =
@@ -923,10 +940,19 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     char *buf;
     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
+    bool current;
+
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (len <= SvLEN(linestr))
 	return buf;
+
+    /* Is the lex_shared linestr SV the same as the current linestr SV?
+     * Only in this case does re_eval_start need adjusting, since it
+     * points within lex_shared->ls_linestr's buffer */
+    current = (   !PL_parser->lex_shared->ls_linestr
+               || linestr == PL_parser->lex_shared->ls_linestr);
+
     bufend_pos = PL_parser->bufend - buf;
     bufptr_pos = PL_parser->bufptr - buf;
     oldbufptr_pos = PL_parser->oldbufptr - buf;
@@ -934,7 +960,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     linestart_pos = PL_parser->linestart - buf;
     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
-    re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+    re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
                             PL_parser->lex_shared->re_eval_start - buf : 0;
 
     buf = sv_grow(linestr, len);
@@ -948,7 +974,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
 	PL_parser->last_uni = buf + last_uni_pos;
     if (PL_parser->last_lop)
 	PL_parser->last_lop = buf + last_lop_pos;
-    if (PL_parser->lex_shared->re_eval_start)
+    if (current && PL_parser->lex_shared->re_eval_start)
         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
     return buf;
 }
@@ -1024,14 +1050,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
 		    p++;
 		    highhalf++;
-		} else if (! UTF8_IS_INVARIANT(c)) {
-		    /* malformed UTF-8 */
-		    ENTER;
-		    SAVESPTR(PL_warnhook);
-		    PL_warnhook = PERL_WARNHOOK_FATAL;
-		    utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
-		    LEAVE;
-		}
+                } else assert(UTF8_IS_INVARIANT(c));
 	    }
 	    if (!highhalf)
 		goto plain_copy;
@@ -1245,6 +1264,24 @@ Perl_lex_discard_to(pTHX_ char *ptr)
 	PL_parser->last_lop -= discard_len;
 }
 
+void
+Perl_notify_parser_that_changed_to_utf8(pTHX)
+{
+    /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
+     * off to on.  At compile time, this has the effect of entering a 'use
+     * utf8' section.  This means that any input was not previously checked for
+     * UTF-8 (because it was off), but now we do need to check it, or our
+     * assumptions about the input being sane could be wrong, and we could
+     * segfault.  This routine just sets a flag so that the next time we look
+     * at the input we do the well-formed UTF-8 check.  If we aren't in the
+     * proper phase, there may not be a parser object, but if there is, setting
+     * the flag is harmless */
+
+    if (PL_parser) {
+        PL_parser->recheck_utf8_validity = TRUE;
+    }
+}
+
 /*
 =for apidoc Amx|bool|lex_next_chunk|U32 flags
 
@@ -1280,6 +1317,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
     bool got_some_for_debugger = 0;
     bool got_some;
+
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
 	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
@@ -1344,6 +1382,22 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     new_bufend_pos = SvCUR(linestr);
     PL_parser->bufend = buf + new_bufend_pos;
     PL_parser->bufptr = buf + bufptr_pos;
+
+    if (UTF) {
+        const U8* first_bad_char_loc;
+        if (UNLIKELY(! is_utf8_string_loc(
+                            (U8 *) PL_parser->bufptr,
+                                   PL_parser->bufend - PL_parser->bufptr,
+                                   &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_parser->bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+    }
+
     PL_parser->oldbufptr = buf + oldbufptr_pos;
     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
     PL_parser->linestart = buf + linestart_pos;
@@ -1420,12 +1474,11 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
 	}
 	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
 	if (retlen == (STRLEN)-1) {
-	    /* malformed UTF-8 */
-	    ENTER;
-	    SAVESPTR(PL_warnhook);
-	    PL_warnhook = PERL_WARNHOOK_FATAL;
-	    utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
-	    LEAVE;
+            _force_out_malformed_utf8_message((U8 *) s,
+                                              (U8 *) bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
 	}
 	return unichar;
     } else {
@@ -1570,7 +1623,7 @@ Note that C<NULL> is a valid C<proto> and will always return C<true>.
  */
 
 bool
-Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
 {
     STRLEN len, origlen;
     char *p;
@@ -1632,21 +1685,28 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
 	                     origlen, UNI_DISPLAY_ISPRINT)
 	    : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
 
+	if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
+	    SV *name2 = sv_2mortal(newSVsv(PL_curstname));
+	    sv_catpvs(name2, "::");
+	    sv_catsv(name2, (SV *)name);
+	    name = name2;
+	}
+
 	if (proto_after_greedy_proto)
 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-			"Prototype after '%c' for %"SVf" : %s",
+			"Prototype after '%c' for %" SVf " : %s",
 			greedy_proto, SVfARG(name), p);
 	if (in_brackets)
 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-			"Missing ']' in prototype for %"SVf" : %s",
+			"Missing ']' in prototype for %" SVf " : %s",
 			SVfARG(name), p);
 	if (bad_proto)
 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-			"Illegal character in prototype for %"SVf" : %s",
+			"Illegal character in prototype for %" SVf " : %s",
 			SVfARG(name), p);
 	if (bad_proto_after_underscore)
 	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-			"Illegal character after '_' in prototype for %"SVf" : %s",
+			"Illegal character after '_' in prototype for %" SVf " : %s",
 			SVfARG(name), p);
     }
 
@@ -1866,13 +1926,13 @@ S_check_uni(pTHX)
     while (isSPACE(*PL_last_uni))
 	PL_last_uni++;
     s = PL_last_uni;
-    while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
+    while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
 	s += UTF ? UTF8SKIP(s) : 1;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
 	return;
 
     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-		     "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+		     "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
 		     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
 }
 
@@ -2036,7 +2096,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
 
     start = skipspace(start);
     s = start;
-    if (isIDFIRST_lazy_if(s,UTF)
+    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
         || (allow_pack && *s == ':' && s[1] == ':') )
     {
 	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
@@ -2117,7 +2177,7 @@ Perl_str_to_version(pTHX_ SV *sv)
     STRLEN len;
     const char *start = SvPV_const(sv,len);
     const char * const end = start + len;
-    const bool utf = SvUTF8(sv) ? TRUE : FALSE;
+    const bool utf = cBOOL(SvUTF8(sv));
 
     PERL_ARGS_ASSERT_STR_TO_VERSION;
 
@@ -2223,10 +2283,9 @@ S_force_strict_version(pTHX_ char *s)
 
 /*
  * S_tokeq
- * Tokenize a quoted string passed in as an SV.  It finds the next
- * chunk, up to end of string or a backslash.  It may make a new
- * SV containing that chunk (if HINT_NEW_STRING is on).  It also
- * turns \\ into \.
+ * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
+ * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
+ * unchanged, and a new SV containing the modified input is returned.
  */
 
 STATIC SV *
@@ -2429,7 +2488,7 @@ S_sublex_push(pTHX)
     if (is_heredoc)
 	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
     PL_copline = NOLINE;
-    
+
     Newxz(shared, 1, LEXSHARED);
     shared->ls_prev = PL_parser->lex_shared;
     PL_parser->lex_shared = shared;
@@ -2517,7 +2576,7 @@ S_sublex_done(pTHX)
     }
 }
 
-PERL_STATIC_INLINE SV*
+STATIC SV*
 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 {
     /* <s> points to first character of interior of \N{}, <e> to one beyond the
@@ -2531,35 +2590,14 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     SV *cv;
     SV *rv;
     HV *stash;
-    const U8* first_bad_char_loc;
     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
     if (!SvCUR(res)) {
-        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
-                       "Unknown charname '' is deprecated");
-        return res;
-    }
-
-    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
-                                     e - backslash_ptr,
-                                     &first_bad_char_loc))
-    {
-        /* If warnings are on, this will print a more detailed analysis of what
-         * is wrong than the error message below */
-        utf8n_to_uvchr(first_bad_char_loc,
-                       e - ((char *) first_bad_char_loc),
-                       NULL, 0);
-
-        /* We deliberately don't try to print the malformed character, which
-         * might not print very well; it also may be just the first of many
-         * malformations, so don't print what comes after it */
-        yyerror_pv(Perl_form(aTHX_
-            "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
-                   SVf_UTF8);
-	return NULL;
+        /* diag_listed_as: Unknown charname '%s' */
+        yyerror("Unknown charname ''");
+        return NULL;
     }
 
     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
@@ -2671,6 +2709,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         }
     }
     if (*(s-1) == ' ') {
+        /* diag_listed_as: charnames alias definitions may not contain
+                           trailing white-space; marked by <-- HERE in %s
+         */
         yyerror_pv(
             Perl_form(aTHX_
             "charnames alias definitions may not contain trailing "
@@ -2686,16 +2727,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         const U8* first_bad_char_loc;
         STRLEN len;
         const char* const str = SvPV_const(res, len);
-        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
-            /* If warnings are on, this will print a more detailed analysis of
-             * what is wrong than the error message below */
-            utf8n_to_uvchr(first_bad_char_loc,
-                           (char *) first_bad_char_loc - str,
-                           NULL, 0);
-
-            /* We deliberately don't try to print the malformed character,
-             * which might not print very well; it also may be just the first
-             * of many malformations, so don't print what comes after it */
+        if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
+                                          &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_parser->bufend,
+                                              0,
+                                              0 /* 0 means don't die */ );
+            /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
+                               immediately after '%s' */
             yyerror_pv(
               Perl_form(aTHX_
                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
@@ -2713,6 +2753,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
         /* The final %.*s makes sure that should the trailing NUL be missing
          * that this print won't run off the end of the string */
+        /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
+                           in \N{%s} */
         yyerror_pv(
           Perl_form(aTHX_
             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
@@ -2724,6 +2766,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     }
 
   multi_spaces:
+        /* diag_listed_as: charnames alias definitions may not contain a
+                           sequence of multiple spaces; marked by <-- HERE
+                           in %s */
         yyerror_pv(
           Perl_form(aTHX_
             "charnames alias definitions may not contain a sequence of "
@@ -2769,15 +2814,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
   In transliterations:
     characters are VERY literal, except for - not at the start or end
-    of the string, which indicates a range. If the range is in bytes,
+    of the string, which indicates a range.  However some backslash sequences
+    are recognized: \r, \n, and the like
+                    \007 \o{}, \x{}, \N{}
+    If all elements in the transliteration are below 256,
     scan_const expands the range to the full set of intermediate
     characters. If the range is in utf8, the hyphen is replaced with
     a certain range mark which will be handled by pmtrans() in op.c.
 
   In double-quoted strings:
     backslashes:
-      double-quoted style: \r and \n
-      constants: \x31, etc.
+      all those recognized in transliterations
       deprecated backrefs: \1 (in substitution replacements)
       case and quoting: \U \Q \E
     stops on @ and $
@@ -2820,7 +2867,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 	  } (end if backslash)
           handle regular character
     } (end while character to read)
-		
+
 */
 
 STATIC char *
@@ -2840,10 +2887,23 @@ S_scan_const(pTHX_ char *start)
                                            when the source isn't utf8, as for
                                            example when it is entirely composed
                                            of hex constants */
+    STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
+                                           number of characters found so far
+                                           that will expand (into 2 bytes)
+                                           should we have to convert to
+                                           UTF-8) */
     SV *res;		                /* result from charnames */
     STRLEN offset_to_max;   /* The offset in the output to where the range
                                high-end character is temporarily placed */
 
+    /* Does something require special handling in tr/// ?  This avoids extra
+     * work in a less likely case.  As such, khw didn't feel it was worth
+     * adding any branches to the more mainline code to handle this, which
+     * means that this doesn't get set in some circumstances when things like
+     * \x{100} get expanded out.  As a result there needs to be extra testing
+     * done in the tr code */
+    bool has_above_latin1 = FALSE;
+
     /* Note on sizing:  The scanned constant is placed into sv, which is
      * initialized by newSV() assuming one byte of output for every byte of
      * input.  This routine expects newSV() to allocate an extra byte for a
@@ -2854,7 +2914,7 @@ S_scan_const(pTHX_ char *start)
      * the needed size, SvGROW() is called.  Its size parameter each time is
      * based on the best guess estimate at the time, namely the length used so
      * far, plus the length the current construct will occupy, plus room for
-     * the trailing NUL, plus one byte for every input byte still unscanned */ 
+     * the trailing NUL, plus one byte for every input byte still unscanned */
 
     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
                        before set */
@@ -2888,30 +2948,29 @@ S_scan_const(pTHX_ char *start)
              * range, so for most cases we just drop down and handle the value
              * as any other.  There are two exceptions.
              *
-             * 1.  A minus sign indicates that we are actually going to have
-             *     a range.  In this case, skip the '-', set a flag, then drop
+             * 1.  A hyphen indicates that we are actually going to have a
+             *     range.  In this case, skip the '-', set a flag, then drop
              *     down to handle what should be the end range value.
              * 2.  After we've handled that value, the next time through, that
              *     flag is set and we fix up the range.
              *
              * Ranges entirely within Latin1 are expanded out entirely, in
-             * order to avoid the significant overhead of making a swash.
-             * Ranges that extend above Latin1 have to have a swash, so there
-             * is no advantage to abbreviating them here, so they are stored
-             * here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies
-             * a hyphen without any possible ambiguity.  On EBCDIC machines, if
-             * the range is expressed as Unicode, the Latin1 portion is
-             * expanded out even if the entire range extends above Latin1.
-             * This is because each code point in it has to be processed here
-             * individually to get its native translation */
+             * order to make the transliteration a simple table look-up.
+             * Ranges that extend above Latin1 have to be done differently, so
+             * there is no advantage to expanding them here, so they are
+             * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
+             * signifies a hyphen without any possible ambiguity.  On EBCDIC
+             * machines, if the range is expressed as Unicode, the Latin1
+             * portion is expanded out even if the range extends above
+             * Latin1.  This is because each code point in it has to be
+             * processed here individually to get its native translation */
 
 	    if (! dorange) {
 
-                /* Here, we don't think we're in a range.  If we've processed
-                 * at least one character, then see if this next one is a '-',
-                 * indicating the previous one was the start of a range.  But
-                 * don't bother if we're too close to the end for the minus to
-                 * mean that. */
+                /* Here, we don't think we're in a range.  If the new character
+                 * is not a hyphen; or if it is a hyphen, but it's too close to
+                 * either edge to indicate a range, then it's a regular
+                 * character. */
                 if (*s != '-' || s >= send - 1 || s == start) {
 
                     /* A regular character.  Process like any other, but first
@@ -2922,16 +2981,26 @@ S_scan_const(pTHX_ char *start)
                     non_portable_endpoint = 0;
                     backslash_N = 0;
 #endif
+                    /* The tests here for being above Latin1 and similar ones
+                     * in the following 'else' suffice to find all such
+                     * occurences in the constant, except those added by a
+                     * backslash escape sequence, like \x{100}.  Mostly, those
+                     * set 'has_above_latin1' as appropriate */
+                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                        has_above_latin1 = TRUE;
+                    }
+
                     /* Drops down to generic code to process current byte */
                 }
-                else {
+                else {  /* Is a '-' in the context where it means a range */
                     if (didrange) { /* Something like y/A-C-Z// */
-                        Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+                        Perl_croak(aTHX_ "Ambiguous range in transliteration"
+                                         " operator");
                     }
 
                     dorange = TRUE;
 
-                    s++;    /* Skip past the minus */
+                    s++;    /* Skip past the hyphen */
 
                     /* d now points to where the end-range character will be
                      * placed.  Save it so won't have to go finding it later,
@@ -2941,6 +3010,12 @@ S_scan_const(pTHX_ char *start)
                      * pointer).  We'll finish processing the range the next
                      * time through the loop */
                     offset_to_max = d - SvPVX_const(sv);
+
+                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                        has_above_latin1 = TRUE;
+                    }
+
+                    /* Drops down to generic code to process current byte */
                 }
             }  /* End of not a range */
             else {
@@ -2952,26 +3027,33 @@ S_scan_const(pTHX_ char *start)
                  * 'd'  points to just beyond the range end in the 'sv' string,
                  *      where we would next place something
                  * 'offset_to_max' is the offset in 'sv' at which the character
-                 *      before 'd' begins.
+                 *      (the range's maximum end point) before 'd'  begins.
                  */
-                const char * max_ptr = SvPVX_const(sv) + offset_to_max;
-                const char * min_ptr;
+                char * max_ptr = SvPVX(sv) + offset_to_max;
+                char * min_ptr;
                 IV range_min;
 		IV range_max;	/* last character in range */
-                STRLEN save_offset;
                 STRLEN grow;
+                Size_t offset_to_min = 0;
+                Size_t extras = 0;
 #ifdef EBCDIC
                 bool convert_unicode;
                 IV real_range_max = 0;
 #endif
-
-                /* Get the range-ends code point values. */
+                /* Get the code point values of the range ends. */
                 if (has_utf8) {
                     /* We know the utf8 is valid, because we just constructed
                      * it ourselves in previous loop iterations */
                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
+
+                    /* This compensates for not all code setting
+                     * 'has_above_latin1', so that we don't skip stuff that
+                     * should be executed */
+                    if (range_max > 255) {
+                        has_above_latin1 = TRUE;
+                    }
                 }
                 else {
                     min_ptr = max_ptr - 1;
@@ -2979,23 +3061,40 @@ S_scan_const(pTHX_ char *start)
                     range_max = * (U8*) max_ptr;
                 }
 
+                /* If the range is just a single code point, like tr/a-a/.../,
+                 * that code point is already in the output, twice.  We can
+                 * just back up over the second instance and avoid all the rest
+                 * of the work.  But if it is a variant character, it's been
+                 * counted twice, so decrement.  (This unlikely scenario is
+                 * special cased, like the one for a range of 2 code points
+                 * below, only because the main-line code below needs a range
+                 * of 3 or more to work without special casing.  Might as well
+                 * get it out of the way now.) */
+                if (UNLIKELY(range_max == range_min)) {
+                    d = max_ptr;
+                    if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+                        utf8_variant_count--;
+                    }
+                    goto range_done;
+                }
+
 #ifdef EBCDIC
                 /* On EBCDIC platforms, we may have to deal with portable
                  * ranges.  These happen if at least one range endpoint is a
                  * Unicode value (\N{...}), or if the range is a subset of
                  * [A-Z] or [a-z], and both ends are literal characters,
                  * like 'A', and not like \x{C1} */
-                if ((convert_unicode
-                     = cBOOL(backslash_N)   /* \N{} forces Unicode, hence
-                                               portable range */
-                      || (   ! non_portable_endpoint
-                          && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
-                             || (isUPPER_A(range_min) && isUPPER_A(range_max))))
-                )) {
+                convert_unicode =
+                               cBOOL(backslash_N)   /* \N{} forces Unicode,
+                                                       hence portable range */
+                    || (     ! non_portable_endpoint
+                        && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
+                           || (isUPPER_A(range_min) && isUPPER_A(range_max))));
+                if (convert_unicode) {
 
                     /* Special handling is needed for these portable ranges.
-                     * They are defined to all be in Unicode terms, which
-                     * include all Unicode code points between the end points.
+                     * They are defined to be in Unicode terms, which includes
+                     * all the Unicode code points between the end points.
                      * Convert to Unicode to get the Unicode range.  Later we
                      * will convert each code point in the range back to
                      * native.  */
@@ -3013,7 +3112,6 @@ S_scan_const(pTHX_ char *start)
                         range_max = UNI_TO_NATIVE(range_max);
                     }
 #endif
-
                     /* Use the characters themselves for the error message if
                      * ASCII printables; otherwise some visible representation
                      * of them */
@@ -3024,32 +3122,41 @@ S_scan_const(pTHX_ char *start)
                     }
 #ifdef EBCDIC
                     else if (convert_unicode) {
-                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+        /* diag_listed_as: Invalid range "%s" in transliteration operator */
                         Perl_croak(aTHX_
-			       "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
-                               " in transliteration operator",
-			       range_min, range_max);
+                           "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
+                           UVXf "}\" in transliteration operator",
+                           range_min, range_max);
                     }
 #endif
                     else {
-                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+        /* diag_listed_as: Invalid range "%s" in transliteration operator */
                         Perl_croak(aTHX_
-			       "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
-                               " in transliteration operator",
-			       range_min, range_max);
+                           "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
+                           " in transliteration operator",
+                           range_min, range_max);
                     }
                 }
 
+                /* If the range is exactly two code points long, they are
+                 * already both in the output */
+                if (UNLIKELY(range_min + 1 == range_max)) {
+                    goto range_done;
+                }
+
+                /* Here the range contains at least 3 code points */
+
 		if (has_utf8) {
 
-                    /* We try to avoid creating a swash.  If the upper end of
-                     * this range is below 256, this range won't force a swash;
-                     * otherwise it does force a swash, and as long as we have
-                     * to have one, we might as well not expand things out.
-                     * But if it's EBCDIC, we may have to look at each
-                     * character below 256 if we have to convert to/from
-                     * Unicode values */
-                    if (range_max > 255
+                    /* If everything in the transliteration is below 256, we
+                     * can avoid special handling later.  A translation table
+                     * for each of those bytes is created by op.c.  So we
+                     * expand out all ranges to their constituent code points.
+                     * But if we've encountered something above 255, the
+                     * expanding won't help, so skip doing that.  But if it's
+                     * EBCDIC, we may have to look at each character below 256
+                     * if we have to convert to/from Unicode values */
+                    if (   has_above_latin1
 #ifdef EBCDIC
 		        && (range_min > 255 || ! convert_unicode)
 #endif
@@ -3057,7 +3164,7 @@ S_scan_const(pTHX_ char *start)
                         /* Move the high character one byte to the right; then
                          * insert between it and the range begin, an illegal
                          * byte which serves to indicate this is a range (using
-                         * a '-' could be ambiguous). */
+                         * a '-' would be ambiguous). */
                         char *e = d++;
                         while (e-- > max_ptr) {
                             *(e + 1) = *e;
@@ -3079,51 +3186,94 @@ S_scan_const(pTHX_ char *start)
 		}
 
                 /* Here we need to expand out the string to contain each
-                 * character in the range.  Grow the output to handle this */
+                 * character in the range.  Grow the output to handle this.
+                 * For non-UTF8, we need a byte for each code point in the
+                 * range, minus the three that we've already allocated for: the
+                 * hyphen, the min, and the max.  For UTF-8, we need this
+                 * plus an extra byte for each code point that occupies two
+                 * bytes (is variant) when in UTF-8 (except we've already
+                 * allocated for the end points, including if they are
+                 * variants).  For ASCII platforms and Unicode ranges on EBCDIC
+                 * platforms, it's easy to calculate a precise number.  To
+                 * start, we count the variants in the range, which we need
+                 * elsewhere in this function anyway.  (For the case where it
+                 * isn't easy to calculate, 'extras' has been initialized to 0,
+                 * and the calculation is done in a loop further down.) */
+#ifdef EBCDIC
+                if (convert_unicode)
+#endif
+                {
+                    /* This is executed unconditionally on ASCII, and for
+                     * Unicode ranges on EBCDIC.  Under these conditions, all
+                     * code points above a certain value are variant; and none
+                     * under that value are.  We just need to find out how much
+                     * of the range is above that value.  We don't count the
+                     * end points here, as they will already have been counted
+                     * as they were parsed. */
+                    if (range_min >= UTF_CONTINUATION_MARK) {
+
+                        /* The whole range is made up of variants */
+                        extras = (range_max - 1) - (range_min + 1) + 1;
+                    }
+                    else if (range_max >= UTF_CONTINUATION_MARK) {
+
+                        /* Only the higher portion of the range is variants */
+                        extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
+                    }
 
-                save_offset  = min_ptr - SvPVX_const(sv);
+                    utf8_variant_count += extras;
+                }
 
-                /* The base growth is the number of code points in the range */
-                grow = range_max - range_min + 1;
-                if (has_utf8) {
+                /* The base growth is the number of code points in the range,
+                 * not including the endpoints, which have already been sized
+                 * for (and output).  We don't subtract for the hyphen, as it
+                 * has been parsed but not output, and the SvGROW below is
+                 * based only on what's been output plus what's left to parse.
+                 * */
+                grow = (range_max - 1) - (range_min + 1) + 1;
 
-                    /* But if the output is UTF-8, some of those characters may
-                     * need two bytes (since the maximum range value here is
-                     * 255, the max bytes per character is two).  On ASCII
-                     * platforms, it's not much trouble to get an accurate
-                     * count of what's needed.  But on EBCDIC, the ones that
-                     * need 2 bytes are scattered around, so just use a worst
-                     * case value instead of calculating for that platform.  */
+                if (has_utf8) {
 #ifdef EBCDIC
-                    grow *= 2;
-#else
-                    /* Only those above 127 require 2 bytes.  This may be
-                     * everything in the range, or not */
-                    if (range_min > 127) {
+                    /* In some cases in EBCDIC, we haven't yet calculated a
+                     * precise amount needed for the UTF-8 variants.  Just
+                     * assume the worst case, that everything will expand by a
+                     * byte */
+                    if (! convert_unicode) {
                         grow *= 2;
                     }
-                    else if (range_max > 127) {
-                        grow += range_max - 127;
-                    }
+                    else
 #endif
+                    {
+                        /* Otherwise we know exactly how many variants there
+                         * are in the range. */
+                        grow += extras;
+                    }
                 }
 
-                /* Subtract 3 for the bytes that were already accounted for
-                 * (min, max, and the hyphen) */
-                d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
+                /* Grow, but position the output to overwrite the range min end
+                 * point, because in some cases we overwrite that */
+                SvCUR_set(sv, d - SvPVX_const(sv));
+                offset_to_min = min_ptr - SvPVX_const(sv);
 
+                /* See Note on sizing above. */
+                d = offset_to_min + SvGROW(sv, SvCUR(sv)
+                                             + (send - s)
+                                             + grow
+                                             + 1 /* Trailing NUL */ );
+
+                /* Now, we can expand out the range. */
 #ifdef EBCDIC
-                /* Here, we expand out the range. */
                 if (convert_unicode) {
-                    IV i;
+                    SSize_t i;
 
                     /* Recall that the min and max are now in Unicode terms, so
                      * we have to convert each character to its native
                      * equivalent */
                     if (has_utf8) {
                         for (i = range_min; i <= range_max; i++) {
-                            append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
-                                                         (U8 **) &d);
+                            append_utf8_from_native_byte(
+                                                    LATIN1_TO_NATIVE((U8) i),
+                                                    (U8 **) &d);
                         }
                     }
                     else {
@@ -3136,34 +3286,51 @@ S_scan_const(pTHX_ char *start)
 #endif
                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
                 {
-                    IV i;
-
                     /* Here, no conversions are necessary, which means that the
                      * first character in the range is already in 'd' and
                      * valid, so we can skip overwriting it */
                     if (has_utf8) {
+                        SSize_t i;
                         d += UTF8SKIP(d);
                         for (i = range_min + 1; i <= range_max; i++) {
                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
                         }
                     }
                     else {
+                        SSize_t i;
                         d++;
-                        for (i = range_min + 1; i <= range_max; i++) {
+                        assert(range_min + 1 <= range_max);
+                        for (i = range_min + 1; i < range_max; i++) {
+#ifdef EBCDIC
+                            /* In this case on EBCDIC, we haven't calculated
+                             * the variants.  Do it here, as we go along */
+                            if (! UVCHR_IS_INVARIANT(i)) {
+                                utf8_variant_count++;
+                            }
+#endif
                             *d++ = (char)i;
                         }
+
+                        /* The range_max is done outside the loop so as to
+                         * avoid having to special case not incrementing
+                         * 'utf8_variant_count' on EBCDIC (it's already been
+                         * counted when originally parsed) */
+                        *d++ = (char) range_max;
 		    }
 		}
 
 #ifdef EBCDIC
-                /* If the original range extended above 255, add in that portion. */
+                /* If the original range extended above 255, add in that
+                 * portion. */
                 if (real_range_max) {
                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
-                    if (real_range_max > 0x101)
-                        *d++ = (char) ILLEGAL_UTF8_BYTE;
-                    if (real_range_max > 0x100)
+                    if (real_range_max > 0x100) {
+                        if (real_range_max > 0x101) {
+                            *d++ = (char) ILLEGAL_UTF8_BYTE;
+                        }
                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+                    }
                 }
 #endif
 
@@ -3186,8 +3353,7 @@ S_scan_const(pTHX_ char *start)
 	    if (!esc)
 		in_charclass = TRUE;
 	}
-
-	else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
+	else if (*s == ']' && PL_lex_inpat && in_charclass) {
 	    char *s1 = s-1;
 	    int esc = 0;
 	    while (s1 >= start && *s1-- == '\\')
@@ -3195,11 +3361,9 @@ S_scan_const(pTHX_ char *start)
 	    if (!esc)
 		in_charclass = FALSE;
 	}
-
-	/* skip for regexp comments /(?#comment)/, except for the last
-	 * char, which will be done separately.
-	 * Stop on (?{..}) and friends */
-
+            /* skip for regexp comments /(?#comment)/, except for the last
+             * char, which will be done separately.  Stop on (?{..}) and
+             * friends */
 	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
 	    if (s[2] == '#') {
 		while (s+1 < send && *s != ')')
@@ -3212,36 +3376,36 @@ S_scan_const(pTHX_ char *start)
 		break;
 	    }
 	}
-
-	/* likewise skip #-initiated comments in //x patterns */
+            /* likewise skip #-initiated comments in //x patterns */
 	else if (*s == '#'
                  && PL_lex_inpat
                  && !in_charclass
                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
         {
-	    while (s+1 < send && *s != '\n')
+	    while (s < send && *s != '\n')
 		*d++ = *s++;
 	}
-
-	/* no further processing of single-quoted regex */
+            /* no further processing of single-quoted regex */
 	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
 	    goto default_action;
 
-	/* check for embedded arrays
-	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
-	   */
+            /* check for embedded arrays
+             * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
+             */
 	else if (*s == '@' && s[1]) {
-	    if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
+	    if (UTF
+               ? isIDFIRST_utf8_safe(s+1, send)
+               : isWORDCHAR_A(s[1]))
+            {
 		break;
+            }
 	    if (strchr(":'{$", s[1]))
 		break;
 	    if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
 		break; /* in regexp, neither @+ nor @- are interpolated */
 	}
-
-	/* check for embedded scalars.  only stop if we're sure it's a
-	   variable.
-        */
+            /* check for embedded scalars.  only stop if we're sure it's a
+             * variable.  */
 	else if (*s == '$') {
 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
 		break;
@@ -3256,6 +3420,11 @@ S_scan_const(pTHX_ char *start)
 
 	/* End of else if chain - OP_TRANS rejoin rest */
 
+        if (UNLIKELY(s >= send)) {
+            assert(s == send);
+            break;
+        }
+
 	/* backslashes */
 	if (*s == '\\' && s+1 < send) {
 	    char* e;	/* Can be used for ending '}', etc. */
@@ -3345,7 +3514,7 @@ S_scan_const(pTHX_ char *start)
                                                UTF);
 		    if (! valid) {
 			yyerror(error);
-			continue;
+			uv = 0; /* drop through to ensure range ends are set */
 		    }
 		    goto NUM_ESCAPE_INSERT;
 		}
@@ -3363,51 +3532,68 @@ S_scan_const(pTHX_ char *start)
                                                UTF);
 		    if (! valid) {
 			yyerror(error);
-			continue;
+			uv = 0; /* drop through to ensure range ends are set */
 		    }
 		}
 
 	      NUM_ESCAPE_INSERT:
 		/* Insert oct or hex escaped character. */
-		
+
 		/* Here uv is the ordinal of the next character being added */
 		if (UVCHR_IS_INVARIANT(uv)) {
 		    *d++ = (char) uv;
 		}
 		else {
 		    if (!has_utf8 && uv > 255) {
-			/* Might need to recode whatever we have accumulated so
-			 * far if it contains any chars variant in utf8 or
-			 * utf-ebcdic. */
-			  
-			SvCUR_set(sv, d - SvPVX_const(sv));
-			SvPOK_on(sv);
-			*d = '\0';
-			/* See Note on sizing above.  */
-			sv_utf8_upgrade_flags_grow(
-                                       sv,
-                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
-                                                  /* Above-latin1 in string
-                                                   * implies no encoding */
-                                                  |SV_UTF8_NO_ENCODING,
-                                       UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
-			d = SvPVX(sv) + SvCUR(sv);
-			has_utf8 = TRUE;
+
+                        /* Here, 'uv' won't fit unless we convert to UTF-8.
+                         * If we've only seen invariants so far, all we have to
+                         * do is turn on the flag */
+                        if (utf8_variant_count == 0) {
+                            SvUTF8_on(sv);
+                        }
+                        else {
+                            SvCUR_set(sv, d - SvPVX_const(sv));
+                            SvPOK_on(sv);
+                            *d = '\0';
+
+                            sv_utf8_upgrade_flags_grow(
+                                           sv,
+                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+
+                                           /* Since we're having to grow here,
+                                            * make sure we have enough room for
+                                            * this escape and a NUL, so the
+                                            * code immediately below won't have
+                                            * to actually grow again */
+                                          UVCHR_SKIP(uv)
+                                        + (STRLEN)(send - s) + 1);
+                            d = SvPVX(sv) + SvCUR(sv);
+                        }
+
+                        has_above_latin1 = TRUE;
+                        has_utf8 = TRUE;
                     }
 
-                    if (has_utf8) {
+                    if (! has_utf8) {
+		        *d++ = (char)uv;
+                        utf8_variant_count++;
+                    }
+		    else {
                        /* Usually, there will already be enough room in 'sv'
                         * since such escapes are likely longer than any UTF-8
                         * sequence they can end up as.  This isn't the case on
                         * EBCDIC where \x{40000000} contains 12 bytes, and the
                         * UTF-8 for it contains 14.  And, we have to allow for
                         * a trailing NUL.  It probably can't happen on ASCII
-                        * platforms, but be safe */
-                        const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+                        * platforms, but be safe.  See Note on sizing above. */
+                        const STRLEN needed = d - SvPVX(sv)
+                                            + UVCHR_SKIP(uv)
+                                            + (send - s)
                                             + 1;
                         if (UNLIKELY(needed > SvLEN(sv))) {
                             SvCUR_set(sv, d - SvPVX_const(sv));
-                            d = sv_grow(sv, needed) + SvCUR(sv);
+                            d = SvCUR(sv) + SvGROW(sv, needed);
                         }
 
 		        d = (char*)uvchr_to_utf8((U8*)d, uv);
@@ -3418,9 +3604,6 @@ S_scan_const(pTHX_ char *start)
 				(PL_lex_repl ? OPpTRANS_FROM_UTF
 					     : OPpTRANS_TO_UTF);
 			}
-                    }
-		    else {
-		        *d++ = (char)uv;
 		    }
 		}
 #ifdef EBCDIC
@@ -3468,7 +3651,8 @@ S_scan_const(pTHX_ char *start)
                  * braces */
 		s++;
 		if (*s != '{') {
-		    yyerror("Missing braces on \\N{}"); 
+		    yyerror("Missing braces on \\N{}");
+                    *d++ = '\0';
 		    continue;
 		}
 		s++;
@@ -3480,7 +3664,7 @@ S_scan_const(pTHX_ char *start)
 		    } else {
 			yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
 		    }
-		    continue;
+                    yyquit(); /* Have exhausted the input. */
 		}
 
 		/* Here it looks like a named character */
@@ -3499,6 +3683,7 @@ S_scan_const(pTHX_ char *start)
                                 "Invalid hexadecimal number in \\N{U+...}"
                             );
                             s = e + 1;
+                            *d++ = '\0';
                             continue;
                         }
                         while (++s < e) {
@@ -3534,16 +3719,27 @@ S_scan_const(pTHX_ char *start)
 			if (! has_utf8 && (   uv > 0xFF
                                            || PL_lex_inwhat != OP_TRANS))
                         {
+			    /* See Note on sizing above.  */
+                            const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
+
 			    SvCUR_set(sv, d - SvPVX_const(sv));
 			    SvPOK_on(sv);
 			    *d = '\0';
-			    /* See Note on sizing above.  */
-			    sv_utf8_upgrade_flags_grow(
-                                    sv,
-                                    SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-				    OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
-			    d = SvPVX(sv) + SvCUR(sv);
+
+                            if (utf8_variant_count == 0) {
+                                SvUTF8_on(sv);
+                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+                            }
+                            else {
+                                sv_utf8_upgrade_flags_grow(
+                                               sv,
+                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                               extra);
+                                d = SvPVX(sv) + SvCUR(sv);
+                            }
+
 			    has_utf8 = TRUE;
+                            has_above_latin1 = TRUE;
 			}
 
                         /* Add the (Unicode) code point to the output. */
@@ -3686,36 +3882,52 @@ S_scan_const(pTHX_ char *start)
                                     " in transliteration operator",
                                         /*  +1 to include the "}" */
                                     (int) (e + 1 - start), start));
+                                *d++ = '\0';
                                 goto end_backslash_N;
                             }
+
+                            if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
+                                has_above_latin1 = TRUE;
+                            }
+
                         }
                         else if (! SvUTF8(res)) {
                             /* Make sure \N{} return is UTF-8.  This is because
                              * \N{} implies Unicode semantics, and scalars have
                              * to be in utf8 to guarantee those semantics; but
                              * not needed in tr/// */
-                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+                            sv_utf8_upgrade_flags(res, 0);
                             str = SvPV_const(res, len);
                         }
 
                          /* Upgrade destination to be utf8 if this new
                           * component is */
 			if (! has_utf8 && SvUTF8(res)) {
+			    /* See Note on sizing above.  */
+                            const STRLEN extra = len + (send - s) + 1;
+
 			    SvCUR_set(sv, d - SvPVX_const(sv));
 			    SvPOK_on(sv);
 			    *d = '\0';
-			    /* See Note on sizing above.  */
-			    sv_utf8_upgrade_flags_grow(sv,
+
+                            if (utf8_variant_count == 0) {
+                                SvUTF8_on(sv);
+                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+                            }
+                            else {
+                                sv_utf8_upgrade_flags_grow(sv,
 						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-						len + (STRLEN)(send - s) + 1);
-			    d = SvPVX(sv) + SvCUR(sv);
+						extra);
+                                d = SvPVX(sv) + SvCUR(sv);
+                            }
 			    has_utf8 = TRUE;
 			} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
 
 			    /* See Note on sizing above.  (NOTE: SvCUR() is not
 			     * set correctly here). */
+                            const STRLEN extra = len + (send - e) + 1;
 			    const STRLEN off = d - SvPVX_const(sv);
-			    d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
+			    d = off + SvGROW(sv, off + extra);
 			}
 			Copy(str, d, len, char);
 			d += len;
@@ -3736,15 +3948,16 @@ S_scan_const(pTHX_ char *start)
 	    case 'c':
 		s++;
 		if (s < send) {
-		    *d++ = grok_bslash_c(*s++, 1);
+		    *d++ = grok_bslash_c(*s, 1);
 		}
 		else {
 		    yyerror("Missing control char name in \\c");
+		    yyquit();   /* Are at end of input, no sense continuing */
 		}
 #ifdef EBCDIC
                 non_portable_endpoint++;
 #endif
-		continue;
+                break;
 
 	    /* printf-style backslashes, formfeeds, newlines, etc */
 	    case 'b':
@@ -3775,54 +3988,78 @@ S_scan_const(pTHX_ char *start)
 	} /* end if (backslash) */
 
     default_action:
-	/* If we started with encoded form, or already know we want it,
-	   then encode the next character */
-	if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
-	    STRLEN len  = 1;
-
-	    /* One might think that it is wasted effort in the case of the
-	     * source being utf8 (this_utf8 == TRUE) to take the next character
-	     * in the source, convert it to an unsigned value, and then convert
-	     * it back again.  But the source has not been validated here.  The
-	     * routine that does the conversion checks for errors like
-	     * malformed utf8 */
+        /* Just copy the input to the output, though we may have to convert
+         * to/from UTF-8.
+         *
+         * If the input has the same representation in UTF-8 as not, it will be
+         * a single byte, and we don't care about UTF8ness; just copy the byte */
+        if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
+	    *d++ = *s++;
+        }
+        else if (! this_utf8 && ! has_utf8) {
+            /* If neither source nor output is UTF-8, is also a single byte,
+             * just copy it; but this byte counts should we later have to
+             * convert to UTF-8 */
+	    *d++ = *s++;
+            utf8_variant_count++;
+        }
+        else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
+	    const STRLEN len = UTF8SKIP(s);
 
+            /* We expect the source to have already been checked for
+             * malformedness */
+            assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
+
+            Copy(s, d, len, U8);
+            d += len;
+            s += len;
+        }
+        else { /* UTF8ness matters and doesn't match, need to convert */
+	    STRLEN len = 1;
 	    const UV nextuv   = (this_utf8)
                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
                                 : (UV) ((U8) *s);
-	    const STRLEN need = UVCHR_SKIP(nextuv);
+	    STRLEN need = UVCHR_SKIP(nextuv);
+
 	    if (!has_utf8) {
 		SvCUR_set(sv, d - SvPVX_const(sv));
 		SvPOK_on(sv);
 		*d = '\0';
-		/* See Note on sizing above.  */
-		sv_utf8_upgrade_flags_grow(sv,
-					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-					need + (STRLEN)(send - s) + 1);
-		d = SvPVX(sv) + SvCUR(sv);
+
+                /* See Note on sizing above. */
+                need += (STRLEN)(send - s) + 1;
+
+                if (utf8_variant_count == 0) {
+                    SvUTF8_on(sv);
+                    d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+                }
+                else {
+                    sv_utf8_upgrade_flags_grow(sv,
+                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                               need);
+                    d = SvPVX(sv) + SvCUR(sv);
+                }
 		has_utf8 = TRUE;
 	    } else if (need > len) {
 		/* encoded value larger than old, may need extra space (NOTE:
 		 * SvCUR() is not set correctly here).   See Note on sizing
 		 * above.  */
+                const STRLEN extra = need + (send - s) + 1;
 		const STRLEN off = d - SvPVX_const(sv);
-		d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
+		d = off + SvGROW(sv, off + extra);
 	    }
 	    s += len;
 
 	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
 	}
-	else {
-	    *d++ = *s++;
-	}
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
     *d = '\0';
     SvCUR_set(sv, d - SvPVX_const(sv));
     if (SvCUR(sv) >= SvLEN(sv))
-	Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
-		   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
+	Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
+		   " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
 
     SvPOK_on(sv);
     if (has_utf8) {
@@ -3963,12 +4200,9 @@ S_intuit_more(pTHX_ char *s)
 	    case '&':
 	    case '$':
 		weight -= seen[un_char] * 10;
-		if (isWORDCHAR_lazy_if(s+1,UTF)) {
+	        if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
 		    int len;
-                    char *tmp = PL_bufend;
-                    PL_bufend = (char*)send;
-                    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
-                    PL_bufend = tmp;
+		    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
 		    len = (int)strlen(tmpbuf);
 		    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
@@ -4089,11 +4323,14 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
     }
 
     if (*start == '$') {
+        SSize_t start_off = start - SvPVX(PL_linestr);
 	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
             || isUPPER(*PL_tokenbuf))
 	    return 0;
-	s = skipspace(s);
-	PL_bufptr = start;
+        /* this could be $# */
+        if (isSPACE(*s))
+            s = skipspace(s);
+	PL_bufptr = SvPVX(PL_linestr) + start_off;
 	PL_expect = XREF;
 	return *s == '(' ? FUNCMETH : METHOD;
     }
@@ -4194,7 +4431,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
 		STRLEN const last_lop_pos =
 		    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 		av_push(PL_rsfp_filters, linestr);
-		PL_parser->linestr = 
+		PL_parser->linestr =
 		    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
 		buf = SvPVX(PL_parser->linestr);
 		PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
@@ -4393,6 +4630,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
     PERL_ARGS_ASSERT_TOKENIZE_USE;
 
     if (PL_expect != XSTATE)
+	/* diag_listed_as: "use" not allowed in expression */
 	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
 		    is_use ? "use" : "no"));
     PL_expect = XTERM;
@@ -4438,12 +4676,18 @@ static void
 S_check_scalar_slice(pTHX_ char *s)
 {
     s++;
-    while (*s == ' ' || *s == '\t') s++;
-    if (*s == 'q' && s[1] == 'w'
-     && !isWORDCHAR_lazy_if(s+2,UTF))
+    while (SPACE_OR_TAB(*s)) s++;
+    if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
+                                                             PL_bufend,
+                                                             UTF))
+    {
 	return;
-    while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
-	s += UTF ? UTF8SKIP(s) : 1;
+    }
+    while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
+           || (*s && strchr(" \t$#+-'\"", *s)))
+    {
+        s += UTF ? UTF8SKIP(s) : 1;
+    }
     if (*s == '}' || *s == ']')
 	pl_yylval.ival = OPpSLICEWARNING;
 }
@@ -4532,9 +4776,23 @@ Perl_yylex(pTHX)
     GV *gv = NULL;
     GV **gvp = NULL;
 
+    if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
+        const U8* first_bad_char_loc;
+        if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
+                                                        PL_bufend - PL_bufptr,
+                                                        &first_bad_char_loc)))
+        {
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
+        }
+        PL_parser->recheck_utf8_validity = FALSE;
+    }
     DEBUG_T( {
 	SV* tmp = newSVpvs("");
-	PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
+	PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
 	    (IV)CopLINE(PL_curcop),
 	    lex_state_names[PL_lex_state],
 	    exp_name[PL_expect],
@@ -4621,9 +4879,7 @@ Perl_yylex(pTHX)
                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                     tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
 		if ((*s == 'L' || *s == 'U' || *s == 'F')
-                    && (strchr(PL_lex_casestack, 'L')
-                        || strchr(PL_lex_casestack, 'U')
-                        || strchr(PL_lex_casestack, 'F')))
+                    && (strpbrk(PL_lex_casestack, "LUF")))
                 {
 		    PL_lex_casestack[--PL_lex_casemods] = '\0';
 		    PL_lex_allbrackets--;
@@ -4781,7 +5037,16 @@ Perl_yylex(pTHX)
 	    s = PL_bufend;
 	}
 	else {
+            int save_error_count = PL_error_count;
+
 	    s = scan_const(PL_bufptr);
+
+            /* Set flag if this was a pattern and there were errors.  op.c will
+             * refuse to compile a pattern with this flag set.  Otherwise, we
+             * could get segfaults, etc. */
+            if (PL_lex_inpat && PL_error_count > save_error_count) {
+                ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
+            }
 	    if (*s == '\\')
 		PL_lex_state = LEX_INTERPCASEMOD;
 	    else
@@ -4832,7 +5097,7 @@ Perl_yylex(pTHX)
          * as a var; e.g. ($, ...) would be seen as the var '$,'
          */
 
-        char sigil;
+        U8 sigil;
 
         s = skipspace(s);
         sigil = *s++;
@@ -4852,7 +5117,7 @@ Perl_yylex(pTHX)
                 break;
             }
             s = skipspace(s);
-            if (isIDFIRST_lazy_if(s, UTF)) {
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                 char *dest = PL_tokenbuf + 1;
                 /* read var name, including sigil, into PL_tokenbuf */
                 PL_tokenbuf[0] = sigil;
@@ -4860,12 +5125,43 @@ Perl_yylex(pTHX)
                     0, cBOOL(UTF), FALSE);
                 *dest = '\0';
                 assert(PL_tokenbuf[1]); /* we have a variable name */
+            }
+            else {
+                *PL_tokenbuf = 0;
+                PL_in_my = 0;
+            }
+
+            s = skipspace(s);
+            /* parse the = for the default ourselves to avoid '+=' etc being accepted here
+             * as the ASSIGNOP, and exclude other tokens that start with =
+             */
+            if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
+                /* save now to report with the same context as we did when
+                 * all ASSIGNOPS were accepted */
+                PL_oldbufptr = s;
+
+                ++s;
+                NEXTVAL_NEXTTOKE.ival = 0;
+                force_next(ASSIGNOP);
+                PL_expect = XTERM;
+            }
+            else if (*s == ',' || *s == ')') {
+                PL_expect = XOPERATOR;
+            }
+            else {
+                /* make sure the context shows the unexpected character and
+                 * hopefully a bit more */
+                if (*s) ++s;
+                while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
+                    s++;
+                PL_bufptr = s; /* for error reporting */
+                yyerror("Illegal operator following parameter in a subroutine signature");
+                PL_in_my = 0;
+            }
+            if (*PL_tokenbuf) {
                 NEXTVAL_NEXTTOKE.ival = sigil;
                 force_next('p'); /* force a signature pending identifier */
             }
-            else
-                PL_in_my = 0;
-            PL_expect = XOPERATOR;
             break;
 
         case ')':
@@ -4890,14 +5186,7 @@ Perl_yylex(pTHX)
     switch (*s) {
     default:
 	if (UTF) {
-            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
-                ENTER;
-                SAVESPTR(PL_warnhook);
-                PL_warnhook = PERL_WARNHOOK_FATAL;
-                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
-                LEAVE;
-            }
-            if (isIDFIRST_utf8((U8*)s)) {
+            if (isIDFIRST_utf8_safe(s, PL_bufend)) {
                 goto keylookup;
             }
         }
@@ -4918,13 +5207,24 @@ Perl_yylex(pTHX)
         else {
             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
         }
-        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
-        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
-            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
-        } else {
+
+        if (s >= PL_linestart) {
             d = PL_linestart;
         }
-        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
+        else {
+            /* somehow (probably due to a parse failure), PL_linestart has advanced
+             * pass PL_bufptr, get a reasonable beginning of line
+             */
+            d = s;
+            while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
+                --d;
+        }
+        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
+        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+            d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
+        }
+
+        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
                           UTF8fARG(UTF, (s - d), d),
                          (int) len + 1);
     }
@@ -5027,7 +5327,7 @@ Perl_yylex(pTHX)
 	}
 	do {
 	    fake_eof = 0;
-	    bof = PL_rsfp ? TRUE : FALSE;
+	    bof = cBOOL(PL_rsfp);
 	    if (0) {
 	      fake_eof:
 		fake_eof = LEX_FAKE_EOF;
@@ -5150,8 +5450,6 @@ Perl_yylex(pTHX)
 		d = instr(s,"perl -");
 		if (!d) {
 		    d = instr(s,"perl");
-                    if (d && d[4] == '6')
-                        d = NULL;
 #if defined(DOSISH)
 		    /* avoid getting into infinite loops when shebang
 		     * line contains "Perl" rather than "perl" */
@@ -5430,7 +5728,7 @@ Perl_yylex(pTHX)
 		    PL_expect = XPOSTDEREF;
 		    TOKEN(ARROW);
 		}
-		if (isIDFIRST_lazy_if(s,UTF)) {
+	        if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
 		    s = force_word(s,METHOD,FALSE,TRUE);
 		    TOKEN(ARROW);
 		}
@@ -5529,8 +5827,7 @@ Perl_yylex(pTHX)
 	}
 	else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
 	PL_tokenbuf[0] = '%';
-	s = scan_ident(s, PL_tokenbuf + 1,
-		sizeof PL_tokenbuf - 1, FALSE);
+	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
 	pl_yylval.ival = 0;
 	if (!PL_tokenbuf[1]) {
 	    PREREF('%');
@@ -5615,7 +5912,7 @@ Perl_yylex(pTHX)
 	 grabattrs:
 	    s = skipspace(s);
 	    attrs = NULL;
-	    while (isIDFIRST_lazy_if(s,UTF)) {
+            while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
 		I32 tmp;
 		SV *sv;
 		d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
@@ -5654,25 +5951,12 @@ Perl_yylex(pTHX)
 		    PL_lex_stuff = NULL;
 		}
 		else {
-		    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
-			sv_free(sv);
-			if (PL_in_my == KEY_our) {
-			    deprecate(":unique");
-			}
-			else
-			    Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
-		    }
-
 		    /* NOTE: any CV attrs applied here need to be part of
 		       the CVf_BUILTIN_ATTRS define in cv.h! */
-		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
+		    if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
 			sv_free(sv);
 			CvLVALUE_on(PL_compcv);
 		    }
-		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
-			sv_free(sv);
-			deprecate(":locked");
-		    }
 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
 			sv_free(sv);
 			CvMETHOD_on(PL_compcv);
@@ -5821,7 +6105,7 @@ Perl_yylex(pTHX)
 		while (d < PL_bufend && SPACE_OR_TAB(*d))
 		    d++;
 	    }
-	    if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
+            if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
 		d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
 			      FALSE, &len);
 		while (d < PL_bufend && SPACE_OR_TAB(*d))
@@ -5941,13 +6225,19 @@ Perl_yylex(pTHX)
 		    }
 		    else
 			/* skip plain q word */
-			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+			while (   t < PL_bufend
+                               && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+                        {
 			    t += UTF ? UTF8SKIP(t) : 1;
+                        }
 		}
-		else if (isWORDCHAR_lazy_if(t,UTF)) {
+	        else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
 		    t += UTF ? UTF8SKIP(t) : 1;
-		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+		    while (   t < PL_bufend
+                           && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+                    {
 			t += UTF ? UTF8SKIP(t) : 1;
+                    }
 		}
 		while (t < PL_bufend && isSPACE(*t))
 		    t++;
@@ -5970,8 +6260,10 @@ Perl_yylex(pTHX)
 			break;
 		    }
 		    if (strEQs(s, "sub")) {
+                        PL_bufptr = s;
 			d = s + 3;
 			d = skipspace(d);
+                        s = PL_bufptr;
 			if (*d == ':') {
 			    PL_expect = XTERM;
 			    break;
@@ -6042,8 +6334,9 @@ Perl_yylex(pTHX)
 	}
 	s--;
 	if (PL_expect == XOPERATOR) {
-	    if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
-		&& isIDFIRST_lazy_if(s,UTF))
+	    if (   PL_bufptr == PL_linestart
+                && ckWARN(WARN_SEMICOLON)
+	        && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
 	    {
 		CopLINE_dec(PL_curcop);
 		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
@@ -6067,8 +6360,7 @@ Perl_yylex(pTHX)
 	}
 
 	PL_tokenbuf[0] = '&';
-	s = scan_ident(s - 1, PL_tokenbuf + 1,
-		       sizeof PL_tokenbuf - 1, TRUE);
+	s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
 	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
 	if (PL_tokenbuf[1]) {
 	    force_ident_maybe_lex('&');
@@ -6312,12 +6604,7 @@ Perl_yylex(pTHX)
     case '$':
 	CLINE;
 
-	if (PL_expect == XOPERATOR) {
-	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
-		return deprecate_commaless_var_list();
-	    }
-	}
-	else if (PL_expect == XPOSTDEREF) {
+        if (PL_expect == XPOSTDEREF) {
 	    if (s[1] == '#') {
 		s++;
 		POSTDEREF(DOLSHARP);
@@ -6325,7 +6612,10 @@ Perl_yylex(pTHX)
 	    POSTDEREF('$');
 	}
 
-	if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
+	if (   s[1] == '#'
+            && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
+                || strchr("{$:+-@", s[2])))
+        {
 	    PL_tokenbuf[0] = '@';
 	    s = scan_ident(s + 1, PL_tokenbuf + 1,
 			   sizeof PL_tokenbuf - 1, FALSE);
@@ -6345,8 +6635,7 @@ Perl_yylex(pTHX)
 	}
 
 	PL_tokenbuf[0] = '$';
-	s = scan_ident(s, PL_tokenbuf + 1,
-		       sizeof PL_tokenbuf - 1, FALSE);
+	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
 	if (PL_expect == XOPERATOR) {
 	    d = s;
 	    if (PL_bufptr > s) {
@@ -6374,14 +6663,18 @@ Perl_yylex(pTHX)
 		    if (ckWARN(WARN_SYNTAX)) {
 			char *t = s+1;
 
-			while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
+                        while (   isSPACE(*t)
+                               || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
+                               || *t == '$')
+                        {
 			    t += UTF ? UTF8SKIP(t) : 1;
+                        }
 			if (*t++ == ',') {
 			    PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
 			    while (t < PL_bufend && *t != ']')
 				t++;
 			    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-					"Multidimensional syntax %"UTF8f" not supported",
+					"Multidimensional syntax %" UTF8f " not supported",
                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
 			}
 		    }
@@ -6396,17 +6689,21 @@ Perl_yylex(pTHX)
 			    do {
 				t++;
 			    } while (isSPACE(*t));
-			    if (isIDFIRST_lazy_if(t,UTF)) {
+		            if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
 				STRLEN len;
 				t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
 					      &len);
 				while (isSPACE(*t))
 				    t++;
-				if (*t == ';'
-                                       && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
+                                if (  *t == ';'
+                                    && get_cvn_flags(tmpbuf, len, UTF
+                                                                  ? SVf_UTF8
+                                                                  : 0))
+                                {
 				    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-					"You need to quote \"%"UTF8f"\"",
+					"You need to quote \"%" UTF8f "\"",
 					 UTF8fARG(UTF, len, tmpbuf));
+                                }
 			    }
 			}
 		}
@@ -6419,9 +6716,12 @@ Perl_yylex(pTHX)
 		    PL_expect = XOPERATOR;
 		else if (strchr("$@\"'`q", *s))
 		    PL_expect = XTERM;		/* e.g. print $fh "foo" */
-		else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
+		else if (   strchr("&*<%", *s)
+                         && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+                {
 		    PL_expect = XTERM;		/* e.g. print $fh &sub */
-		else if (isIDFIRST_lazy_if(s,UTF)) {
+                }
+	        else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
 		    char tmpbuf[sizeof PL_tokenbuf];
 		    int t2;
 		    scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
@@ -6520,10 +6820,10 @@ Perl_yylex(pTHX)
         }
 	else {
 	    /* Disable warning on "study /blah/" */
-	    if (PL_oldoldbufptr == PL_last_uni
-	     && (*PL_last_uni != 's' || s - PL_last_uni < 5
-	         || memNE(PL_last_uni, "study", 5)
-	         || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
+	    if (    PL_oldoldbufptr == PL_last_uni
+                && (   *PL_last_uni != 's' || s - PL_last_uni < 5
+                    || memNE(PL_last_uni, "study", 5)
+                    || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
 	     ))
 	        check_uni();
 	    s = scan_pat(s,OP_MATCH);
@@ -6594,10 +6894,6 @@ Perl_yylex(pTHX)
 	TERM(THING);
 
     case '\'':
-	if (   PL_expect == XOPERATOR
-	    && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-		return deprecate_commaless_var_list();
-
 	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
 	if (!s)
 	    missingterm(NULL);
@@ -6610,10 +6906,6 @@ Perl_yylex(pTHX)
 	TERM(sublex_start());
 
     case '"':
-	if (   PL_expect == XOPERATOR
-	    && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
-		return deprecate_commaless_var_list();
-
 	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
 	DEBUG_T( {
 	    if (s)
@@ -6893,8 +7185,10 @@ Perl_yylex(pTHX)
 	    else {			/* no override */
 		tmp = -tmp;
 		if (tmp == KEY_dump) {
-		    Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
-				   "dump() better written as CORE::dump()");
+		    Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
+				     "dump() better written as CORE::dump(). "
+                                     "dump() will no longer be available "
+                                     "in Perl 5.30");
 		}
 		gv = NULL;
 		gvp = 0;
@@ -6951,7 +7245,7 @@ Perl_yylex(pTHX)
 		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
 				  TRUE, &morelen);
 		    if (!morelen)
-			Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+			Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
 				UTF8fARG(UTF, len, PL_tokenbuf),
 				*s == '\'' ? "'" : "::");
 		    len += morelen;
@@ -6979,8 +7273,9 @@ Perl_yylex(pTHX)
 		    if (ckWARN(WARN_BAREWORD)
 			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
 			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-		  	  "Bareword \"%"UTF8f"\" refers to nonexistent package",
-			   UTF8fARG(UTF, len, PL_tokenbuf));
+                                    "Bareword \"%" UTF8f
+                                    "\" refers to nonexistent package",
+                                    UTF8fARG(UTF, len, PL_tokenbuf));
 		    len -= 2;
 		    PL_tokenbuf[len] = '\0';
 		    gv = NULL;
@@ -7044,17 +7339,24 @@ Perl_yylex(pTHX)
                                                                == OA_FILEREF))
 		{
 		    bool immediate_paren = *s == '(';
+                    SSize_t s_off;
 
 		    /* (Now we can afford to cross potential line boundary.) */
 		    s = skipspace(s);
 
+                    /* intuit_method() can indirectly call lex_next_chunk(),
+                     * invalidating s
+                     */
+                    s_off = s - SvPVX(PL_linestr);
 		    /* Two barewords in a row may indicate method call. */
-
-		    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+	            if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
+                            || *s == '$')
                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
                     {
+                        /* the code at method: doesn't use s */
 			goto method;
 		    }
+                    s = SvPVX(PL_linestr) + s_off;
 
 		    /* If not a declared subroutine, it's an indirect object. */
 		    /* (But it's an indir obj regardless for sort.) */
@@ -7135,9 +7437,11 @@ Perl_yylex(pTHX)
 
 		/* If followed by a bareword, see if it looks like indir obj. */
 
-		if (tmp == 1 && !orig_keyword
-			&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
-			&& (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+		if (   tmp == 1
+                    && !orig_keyword
+                    && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
+                    && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+                {
 		  method:
 		    if (lex && !off) {
 			assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
@@ -7281,7 +7585,7 @@ Perl_yylex(pTHX)
 		if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
 		 && saw_infix_sigil) {
 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-				     "Operator or semicolon missing before %c%"UTF8f,
+				     "Operator or semicolon missing before %c%" UTF8f,
 				     lastchar,
 				     UTF8fARG(UTF, strlen(PL_tokenbuf),
 					      PL_tokenbuf));
@@ -7300,7 +7604,7 @@ Perl_yylex(pTHX)
 	case KEY___LINE__:
 	    FUN0OP(
                 newSVOP(OP_CONST, 0,
-		    Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
+		    Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
 	    );
 
 	case KEY___PACKAGE__:
@@ -7405,7 +7709,7 @@ Perl_yylex(pTHX)
 		    goto just_a_word;
 		}
 		if (!tmp)
-		    Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+		    Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
 				      UTF8fARG(UTF, len, PL_tokenbuf));
 		if (tmp < 0)
 		    tmp = -tmp;
@@ -7576,7 +7880,7 @@ Perl_yylex(pTHX)
 
 	case KEY_exists:
 	    UNI(OP_EXISTS);
-	
+
 	case KEY_exit:
 	    UNI(OP_EXIT);
 
@@ -7631,8 +7935,11 @@ Perl_yylex(pTHX)
 		return REPORT(0);
 	    pl_yylval.ival = CopLINE(PL_curcop);
 	    s = skipspace(s);
-	    if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
+            if (   PL_expect == XSTATE
+                && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
+            {
 		char *p = s;
+                SSize_t s_off = s - SvPVX(PL_linestr);
 
 		if ((PL_bufend - p) >= 3
                     && strEQs(p, "my") && isSPACE(*(p + 2)))
@@ -7644,12 +7951,15 @@ Perl_yylex(pTHX)
 		    p += 3;
 		p = skipspace(p);
                 /* skip optional package name, as in "for my abc $x (..)" */
-		if (isIDFIRST_lazy_if(p,UTF)) {
+	        if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
 		    p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
 		    p = skipspace(p);
 		}
 		if (*p != '$' && *p != '\\')
 		    Perl_croak(aTHX_ "Missing $ on loop variable");
+
+                /* The buffer may have been reallocated, update s */
+                s = SvPVX(PL_linestr) + s_off;
 	    }
 	    OPERATOR(FOR);
 
@@ -7810,7 +8120,7 @@ Perl_yylex(pTHX)
 
 	case KEY_last:
 	    LOOPX(OP_LAST);
-	
+
 	case KEY_lc:
 	    UNI(OP_LC);
 
@@ -7887,7 +8197,7 @@ Perl_yylex(pTHX)
 	    }
 	    PL_in_my = (U16)tmp;
 	    s = skipspace(s);
-	    if (isIDFIRST_lazy_if(s,UTF)) {
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
 		if (len == 3 && strEQs(PL_tokenbuf, "sub"))
 		    goto really_sub;
@@ -7937,10 +8247,10 @@ Perl_yylex(pTHX)
 
 	case KEY_open:
 	    s = skipspace(s);
-	    if (isIDFIRST_lazy_if(s,UTF)) {
-          const char *t;
-          d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
-              &len);
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+                const char *t;
+                d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+                              &len);
 		for (t=d; isSPACE(*t);)
 		    t++;
 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7950,7 +8260,7 @@ Perl_yylex(pTHX)
 		    && !keyword(s, d-s, 0)
 		) {
 		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-		       "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+		       "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
 			UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
 		}
 	    }
@@ -7990,7 +8300,7 @@ Perl_yylex(pTHX)
 
 	case KEY_pos:
 	    UNIDOR(OP_POS);
-	
+
 	case KEY_pack:
 	    LOP(OP_PACK,XTERM);
 
@@ -8097,9 +8407,13 @@ Perl_yylex(pTHX)
 	    {
 		*PL_tokenbuf = '\0';
 		s = force_word(s,BAREWORD,TRUE,TRUE);
-		if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
+                if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
+                                           PL_tokenbuf + sizeof(PL_tokenbuf),
+                                           UTF))
+                {
 		    gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
+                }
 		else if (*s == '<')
 		    yyerror("<> at require-statement should be quotes");
 	    }
@@ -8107,7 +8421,7 @@ Perl_yylex(pTHX)
 		orig_keyword = 0;
 		pl_yylval.ival = 1;
 	    }
-	    else 
+	    else
 		pl_yylval.ival = 0;
 	    PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
 	    PL_bufptr = s;
@@ -8174,7 +8488,7 @@ Perl_yylex(pTHX)
 
 	case KEY_chomp:
 	    UNI(OP_CHOMP);
-	
+
 	case KEY_scalar:
 	    UNI(OP_SCALAR);
 
@@ -8301,7 +8615,7 @@ Perl_yylex(pTHX)
 		s = skipspace(s);
                 d = SvPVX(PL_linestr)+off;
 
-		if (isIDFIRST_lazy_if(s,UTF)
+                if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
                     || *s == '\''
                     || (*s == ':' && s[1] == ':'))
 		{
@@ -8360,7 +8674,8 @@ Perl_yylex(pTHX)
 		    COPLINE_SET_FROM_MULTI_END;
 		    if (!s)
 			Perl_croak(aTHX_ "Prototype not terminated");
-		    (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
+		    (void)validate_proto(PL_subname, PL_lex_stuff,
+					 ckWARN(WARN_ILLEGALPROTO), 0);
 		    have_proto = TRUE;
 
 		    s = skipspace(s);
@@ -8380,7 +8695,7 @@ Perl_yylex(pTHX)
 		    if (!have_name)
 			Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
 		    else if (*s != ';' && *s != '}')
-			Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
+			Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
 		}
 
 		if (have_proto) {
@@ -8595,8 +8910,11 @@ S_pending_ident(pTHX)
     if (PL_in_my) {
         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
             if (has_colon)
+                /* diag_listed_as: No package name allowed for variable %s
+                                   in "our" */
                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
-                                  "variable %s in \"our\"",
+                                  "%se %s in \"our\"",
+                                  *PL_tokenbuf=='&' ?"subroutin":"variabl",
                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
         }
@@ -8693,7 +9011,7 @@ S_pending_ident(pTHX)
         {
             /* Downgraded from fatal to warning 20000522 mjd */
             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-			"Possible unintended interpolation of %"UTF8f
+			"Possible unintended interpolation of %" UTF8f
 			" in string",
 			UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
         }
@@ -8747,16 +9065,15 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
 	s++;
     while (s < PL_bufend && isSPACE(*s))
 	s++;
-    if (isIDFIRST_lazy_if(s,UTF)) {
+    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
 	const char * const w = s;
         s += UTF ? UTF8SKIP(s) : 1;
-	while (isWORDCHAR_lazy_if(s,UTF))
+	while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
 	    s += UTF ? UTF8SKIP(s) : 1;
 	while (s < PL_bufend && isSPACE(*s))
 	    s++;
 	if (*s == ',') {
 	    GV* gv;
-	    PADOFFSET off;
 	    if (keyword(w, s - w, 0))
 		return;
 
@@ -8764,6 +9081,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
 	    if (gv && GvCVu(gv))
 		return;
 	    if (s - w <= 254) {
+                PADOFFSET off;
 		char tmpbuf[256];
 		Copy(w, tmpbuf+1, s - w, char);
 		*tmpbuf = '&';
@@ -8814,7 +9132,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
 	|| ! SvOK(*cvp))
     {
 	char *msg;
-	
+
 	/* Here haven't found what we're looking for.  If it is charnames,
 	 * perhaps it needs to be loaded.  Try doing that before giving up */
 	if (*key == 'c') {
@@ -8923,21 +9241,23 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
 
 PERL_STATIC_INLINE void
 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
-                    bool is_utf8, bool check_dollar) {
+                    bool is_utf8, bool check_dollar)
+{
     PERL_ARGS_ASSERT_PARSE_IDENT;
 
-    for (;;) {
+    while (*s < PL_bufend) {
         if (*d >= e)
             Perl_croak(aTHX_ "%s", ident_too_long);
-        if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+        if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
              /* The UTF-8 case must come first, otherwise things
              * like c\N{COMBINING TILDE} would start failing, as the
              * isWORDCHAR_A case below would gobble the 'c' up.
              */
 
             char *t = *s + UTF8SKIP(*s);
-            while (isIDCONT_utf8((U8*)t))
+            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
                 t += UTF8SKIP(t);
+            }
             if (*d + (t - *s) > e)
                 Perl_croak(aTHX_ "%s", ident_too_long);
             Copy(*s, *d, t - *s, char);
@@ -8949,7 +9269,10 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
                 *(*d)++ = *(*s)++;
             } while (isWORDCHAR_A(**s) && *d < e);
         }
-        else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+        else if (   allow_package
+                 && **s == '\''
+                 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
+        {
             *(*d)++ = ':';
             *(*d)++ = ':';
             (*s)++;
@@ -9000,10 +9323,10 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  *      Because all ASCII characters have the same representation whether
  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
  *      '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, is_utf8)                                       \
-    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
-                         ? isIDFIRST_utf8((U8*) (s))                          \
-                         : (isGRAPH_L1(*s)                                    \
+#define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                          \
+                         ? isIDFIRST_utf8_safe(s, e)                        \
+                         : (isGRAPH_L1(*s)                                  \
                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
 
 STATIC char *
@@ -9044,7 +9367,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
     /* Here, it is not a run-of-the-mill identifier name */
 
     if (*s == '$' && s[1]
-        && (isIDFIRST_lazy_if(s+1,is_utf8)
+        && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
             || isDIGIT_A((U8)s[1])
             || s[1] == '$'
             || s[1] == '{'
@@ -9067,7 +9390,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
     if ((s <= PL_bufend - (is_utf8)
                           ? UTF8SKIP(s)
                           : 1)
-        && VALID_LEN_ONE_IDENT(s, is_utf8))
+        && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
     {
         if (is_utf8) {
             const STRLEN skip = UTF8SKIP(s);
@@ -9095,19 +9418,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         bool skip;
         char *s2;
         /* If we were processing {...} notation then...  */
-	if (isIDFIRST_lazy_if(d,is_utf8)) {
-            /* if it starts as a valid identifier, assume that it is one.
-               (the later check for } being at the expected point will trap
-               cases where this doesn't pan out.)  */
-            d += is_utf8 ? UTF8SKIP(d) : 1;
-            parse_ident(&s, &d, e, 1, is_utf8, TRUE);
-	    *d = '\0';
+        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
+            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+                 && isWORDCHAR(*s))
+        ) {
+            /* note we have to check for a normal identifier first,
+             * as it handles utf8 symbols, and only after that has
+             * been ruled out can we look at the caret words */
+            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
+                /* if it starts as a valid identifier, assume that it is one.
+                   (the later check for } being at the expected point will trap
+                   cases where this doesn't pan out.)  */
+                d += is_utf8 ? UTF8SKIP(d) : 1;
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                *d = '\0';
+            }
+            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
+                d++;
+                while (isWORDCHAR(*s) && d < e) {
+                    *d++ = *s++;
+                }
+                if (d >= e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                *d = '\0';
+            }
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
                 s = skipspace(s);
             }
 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
-                /* ${foo[0]} and ${foo{bar}} notation.  */
+                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
 		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
 		    const char * const brack =
 			(const char *)
@@ -9126,29 +9466,19 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 		return s;
 	    }
 	}
-	/* Handle extended ${^Foo} variables
-	 * 1999-02-27 mjd-perl-patch@plover.com */
-	else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
-		 && isWORDCHAR(*s))
-	{
-	    d++;
-	    while (isWORDCHAR(*s) && d < e) {
-		*d++ = *s++;
-	    }
-	    if (d >= e)
-		Perl_croak(aTHX_ "%s", ident_too_long);
-	    *d = '\0';
-	}
 
         if ( !tmp_copline )
             tmp_copline = CopLINE(PL_curcop);
-        if ((skip = s < PL_bufend && isSPACE(*s)))
+        if ((skip = s < PL_bufend && isSPACE(*s))) {
             /* Avoid incrementing line numbers or resetting PL_linestart,
                in case we have to back up.  */
+            STRLEN s_off = s - SvPVX(PL_linestr);
             s2 = peekspace(s);
+            s = SvPVX(PL_linestr) + s_off;
+        }
         else
             s2 = s;
-	    
+
         /* Expect to find a closing } after consuming any trailing whitespace.
          */
         if (*s2 == '}') {
@@ -9174,7 +9504,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                     orig_copline = CopLINE(PL_curcop);
                     CopLINE_set(PL_curcop, tmp_copline);
 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-			"Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
+			"Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
 			funny, SVfARG(tmp), funny, SVfARG(tmp));
                     CopLINE_set(PL_curcop, orig_copline);
 		}
@@ -9211,7 +9541,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
 
     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
-        if (isWORDCHAR_lazy_if(*s, UTF)) {
+        if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
                        UTF ? SVf_UTF8 : 0);
             (*s) += charlen;
@@ -9355,14 +9685,10 @@ S_scan_pat(pTHX_ char *start, I32 type)
     /* issue a warning if /c is specified,but /g is not */
     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
     {
-        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 
+        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
 		       "Use of /c modifier is meaningless without /g" );
     }
 
-    if (UNLIKELY((x_mod_count) > 1)) {
-        yyerror("Only one /x regex modifier is allowed");
-    }
-
     PL_lex_op = (OP*)pm;
     pl_yylval.ival = OP_MATCH;
     return s;
@@ -9417,10 +9743,6 @@ S_scan_subst(pTHX_ char *start)
 	}
     }
 
-    if (UNLIKELY((x_mod_count) > 1)) {
-        yyerror("Only one /x regex modifier is allowed");
-    }
-
     if ((pm->op_pmflags & PMf_CONTINUE)) {
         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
     }
@@ -9430,18 +9752,14 @@ S_scan_subst(pTHX_ char *start)
 
 	PL_multi_end = 0;
 	pm->op_pmflags |= PMf_EVAL;
-	while (es-- > 0) {
-	    if (es)
-		sv_catpvs(repl, "eval ");
-	    else
-		sv_catpvs(repl, "do ");
-	}
-	sv_catpvs(repl, "{");
+        for (; es > 1; es--) {
+            sv_catpvs(repl, "eval ");
+        }
+        sv_catpvs(repl, "do {");
 	sv_catsv(repl, PL_parser->lex_sub_repl);
 	sv_catpvs(repl, "}");
 	SvREFCNT_dec(PL_parser->lex_sub_repl);
 	PL_parser->lex_sub_repl = repl;
-        es = 1;
     }
 
 
@@ -9455,7 +9773,8 @@ S_scan_subst(pTHX_ char *start)
          * spreads over */
         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
-        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
+        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
+                                                                    cBOOL(es);
     }
 
     PL_lex_op = (OP*)pm;
@@ -9595,10 +9914,12 @@ S_scan_heredoc(pTHX_ char *s)
 	    s++, term = '\'';
 	else
 	    term = '"';
-	if (!isWORDCHAR_lazy_if(s,UTF))
-	    deprecate("bare << to mean <<\"\"");
+	if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
+	    Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
 	peek = s;
-	while (isWORDCHAR_lazy_if(peek,UTF)) {
+        while (
+               isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
+        {
 	    peek += UTF ? UTF8SKIP(peek) : 1;
 	}
 	len = (peek - s >= e - d) ? (e - d) : (peek - s);
@@ -9713,7 +10034,7 @@ S_scan_heredoc(pTHX_ char *s)
 
 		    /* Only valid if it's preceded by whitespace only */
 		    while (backup != myolds && --backup >= myolds) {
-			if (*backup != ' ' && *backup != '\t') {
+			if (! SPACE_OR_TAB(*backup)) {
 			    break;
 			}
 
@@ -9849,14 +10170,14 @@ S_scan_heredoc(pTHX_ char *s)
 
 		/* Only valid if it's preceded by whitespace only */
 		while (backup != s && --backup >= s) {
-		    if (*backup != ' ' && *backup != '\t') {
+		    if (! SPACE_OR_TAB(*backup)) {
 			break;
 		    }
 		    indent_len++;
 		}
 
 		/* All whitespace or none! */
-		if (backup == found || *backup == ' ' || *backup == '\t') {
+		if (backup == found || SPACE_OR_TAB(*backup)) {
 		    Newxz(indent, indent_len + 1, char);
 		    memcpy(indent, backup, indent_len);
 		    SvREFCNT_dec(PL_linestr);
@@ -9896,8 +10217,8 @@ S_scan_heredoc(pTHX_ char *s)
 	STRLEN herelen = SvCUR(tmpstr);
 	char *ss = SvPVX(tmpstr);
 	char *se = ss + herelen;
-	SV *newstr = newSVpvs("");
-	SvGROW(newstr, herelen);
+        SV *newstr = newSV(herelen+1);
+        SvPOK_on(newstr);
 
 	/* Trim leading whitespace */
 	while (ss < se) {
@@ -9905,6 +10226,7 @@ S_scan_heredoc(pTHX_ char *s)
 	    if (*ss == '\n') {
 		sv_catpv(newstr,"\n");
 		ss++;
+		linecount++;
 
 	    /* Found our indentation? Strip it */
 	    } else if (se - ss >= indent_len
@@ -9928,12 +10250,9 @@ S_scan_heredoc(pTHX_ char *s)
 		    (int)linecount
 		);
 	    }
-
-	    linecount++;
 	}
-
-	sv_setsv(tmpstr,newstr);
-
+        /* avoid sv_setsv() as we dont wan't to COW here */
+        sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
 	Safefree(indent);
 	SvREFCNT_dec_NN(newstr);
     }
@@ -10017,8 +10336,9 @@ S_scan_inputsymbol(pTHX_ char *start)
     if (*d == '$' && d[1]) d++;
 
     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
-    while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+    while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
 	d += UTF ? UTF8SKIP(d) : 1;
+    }
 
     /* If we've tried to read what we allow filehandles to look like, and
        there's still text left, then it must be a glob() and not a getline.
@@ -10145,7 +10465,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 	($*@)		sub prototypes		sub foo ($)
 	(stuff)		sub attr parameters	sub foo : attr(stuff)
 	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
-	
+
    In most of these cases (all but <>, patterns and transliterate)
    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
    calls scan_str().  s/// makes yylex() call scan_subst() which calls
@@ -10182,6 +10502,18 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     STRLEN termlen;		/* length of terminating string */
     line_t herelines;
 
+    /* The delimiters that have a mirror-image closing one */
+    const char * opening_delims = "([{<";
+    const char * closing_delims = ")]}>";
+
+    const char * non_grapheme_msg = "Use of unassigned code point or"
+                                    " non-standalone grapheme for a delimiter"
+                                    " will be a fatal error starting in Perl"
+                                    " 5.30";
+    /* The only non-UTF character that isn't a stand alone grapheme is
+     * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
+    bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
+
     PERL_ARGS_ASSERT_SCAN_STR;
 
     /* skip space before the delimiter */
@@ -10194,15 +10526,35 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 
     /* after skipping whitespace, the next character is the terminator */
     term = *s;
-    if (!UTF) {
+    if (!UTF || UTF8_IS_INVARIANT(term)) {
 	termcode = termstr[0] = term;
 	termlen = 1;
     }
     else {
 	termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
+        if (check_grapheme) {
+            if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
+                || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
+            {
+                /* These are considered graphemes, and since the ending
+                 * delimiter will be the same, we don't have to check the other
+                 * end */
+                check_grapheme = FALSE;
+            }
+            else if (UNLIKELY(! _is_grapheme((U8 *) start,
+                                             (U8 *) s,
+                                             (U8 *) PL_bufend,
+                                             termcode)))
+            {
+                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
+
+                /* Don't have to check the other end, as have already warned at
+                 * this one */
+                check_grapheme = FALSE;
+            }
+        }
+
 	Copy(s, termstr, termlen, U8);
-	if (!UTF8_IS_INVARIANT(term))
-	    has_utf8 = TRUE;
     }
 
     /* mark where we are */
@@ -10210,9 +10562,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     PL_multi_open = termcode;
     herelines = PL_parser->herelines;
 
-    /* find corresponding closing delimiter */
-    if (term && (tmps = strchr("([{< )]}> )]}>",term)))
-	termcode = termstr[0] = term = tmps[5];
+    /* If the delimiter has a mirror-image closing one, get it */
+    if (term && (tmps = strchr(opening_delims, term))) {
+        termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
+    }
 
     PL_multi_close = termcode;
 
@@ -10255,18 +10608,35 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 		}
 		/* terminate when run out of buffer (the for() condition), or
 		   have found the terminator */
-		else if (*s == term) {
-		    if (termlen == 1)
+		else if (*s == term) {  /* First byte of terminator matches */
+		    if (termlen == 1)   /* If is the only byte, are done */
 			break;
-		    if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
+
+                    /* If the remainder of the terminator matches, also are
+                     * done, after checking that is a separate grapheme */
+                    if (   s + termlen <= PL_bufend
+                        && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
+                    {
+                        if (   check_grapheme
+                            && UNLIKELY(! _is_grapheme((U8 *) start,
+                                                              (U8 *) s,
+                                                              (U8 *) PL_bufend,
+                                                              termcode)))
+                        {
+                            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                                        "%s", non_grapheme_msg);
+                        }
 			break;
+                    }
 		}
-		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
 		    has_utf8 = TRUE;
+                }
+
 		*to = *s;
 	    }
 	}
-	
+
 	/* if the terminator isn't the same as the start character (e.g.,
 	   matched brackets), we have to allow more in the quoting, and
 	   be prepared for nested brackets.
@@ -10324,7 +10694,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
 	    to[-1] = '\n';
 #endif
-	
+
 	/* if we're out of file, or a read fails, bail and reset the current
 	   line marker so we can report where the unterminated string began
 	*/
@@ -10335,7 +10705,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
 	    return NULL;
 	}
-	s = PL_bufptr;
+	s = start = PL_bufptr;
     }
 
     /* at this point, we have successfully read the delimited string */
@@ -10403,6 +10773,15 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
     bool floatit;			/* boolean: int or float? */
     const char *lastub = NULL;		/* position of last underbar */
     static const char* const number_too_long = "Number too long";
+    bool warned_about_underscore = 0;
+#define WARN_ABOUT_UNDERSCORE() \
+	do { \
+	    if (!warned_about_underscore) { \
+		warned_about_underscore = 1; \
+		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
+			       "Misplaced _ in number"); \
+	    } \
+	} while(0)
     /* Hexadecimal floating point.
      *
      * In many places (where we have quads and NV is IEEE 754 double)
@@ -10487,8 +10866,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    }
 
 	    if (*s == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-			       "Misplaced _ in number");
+		WARN_ABOUT_UNDERSCORE();
 	       lastub = s++;
 	    }
 
@@ -10511,8 +10889,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 		/* _ are ignored -- but warned about if consecutive */
 		case '_':
 		    if (lastub && s == lastub + 1)
-		        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-				       "Misplaced _ in number");
+			WARN_ABOUT_UNDERSCORE();
 		    lastub = s++;
 		    break;
 
@@ -10597,9 +10974,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	  out:
 
 	    /* final misplaced underbar check */
-	    if (s[-1] == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
-	    }
+	    if (s[-1] == '_')
+		WARN_ABOUT_UNDERSCORE();
 
             if (UNLIKELY(HEXFP_PEEK(s))) {
                 /* Do sloppy (on the underbars) but quick detection
@@ -10808,8 +11184,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    */
 	    if (*s == '_') {
 		if (lastub && s == lastub + 1)
-		    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-				   "Misplaced _ in number");
+		    WARN_ABOUT_UNDERSCORE();
 		lastub = s++;
 	    }
 	    else {
@@ -10822,9 +11197,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	}
 
 	/* final misplaced underbar check */
-	if (lastub && s == lastub + 1) {
-	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
-	}
+	if (lastub && s == lastub + 1)
+	    WARN_ABOUT_UNDERSCORE();
 
 	/* read a decimal portion if there is one.  avoid
 	   3..5 being interpreted as the number 3. followed
@@ -10835,8 +11209,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    *d++ = *s++;
 
 	    if (*s == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-			       "Misplaced _ in number");
+		WARN_ABOUT_UNDERSCORE();
 		lastub = s;
 	    }
 
@@ -10852,18 +11225,15 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 		    Perl_croak(aTHX_ "%s", number_too_long);
 		if (*s == '_') {
 		   if (lastub && s == lastub + 1)
-		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-				      "Misplaced _ in number");
+			WARN_ABOUT_UNDERSCORE();
 		   lastub = s;
 		}
 		else
 		    *d++ = *s;
 	    }
 	    /* fractional part ending in underbar? */
-	    if (s[-1] == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-			       "Misplaced _ in number");
-	    }
+	    if (s[-1] == '_')
+		WARN_ABOUT_UNDERSCORE();
 	    if (*s == '.' && isDIGIT(s[1])) {
 		/* oops, it's really a v-string, but without the "v" */
 		s = start;
@@ -10876,9 +11246,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
             && strchr("+-0123456789_", s[1]))
         {
-            floatit = TRUE;
+            int exp_digits = 0;
+            const char *save_s = s;
+            char * save_d = d;
 
-	    /* regardless of whether user said 3E5 or 3e5, use lower 'e',
+            /* regardless of whether user said 3E5 or 3e5, use lower 'e',
                ditto for p (hexfloats) */
             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
 		/* At least some Mach atof()s don't grok 'E' */
@@ -10893,8 +11265,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
 	    /* stray preinitial _ */
 	    if (*s == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-			       "Misplaced _ in number");
+		WARN_ABOUT_UNDERSCORE();
 	        lastub = s++;
 	    }
 
@@ -10904,14 +11275,14 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
 	    /* stray initial _ */
 	    if (*s == '_') {
-		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-			       "Misplaced _ in number");
+		WARN_ABOUT_UNDERSCORE();
 	        lastub = s++;
 	    }
 
 	    /* read digits of exponent */
 	    while (isDIGIT(*s) || *s == '_') {
 	        if (isDIGIT(*s)) {
+                    ++exp_digits;
 		    if (d >= e)
 		        Perl_croak(aTHX_ "%s", number_too_long);
 		    *d++ = *s++;
@@ -10919,11 +11290,24 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 		else {
 		   if (((lastub && s == lastub + 1)
                         || (!isDIGIT(s[1]) && s[1] != '_')))
-		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
-				      "Misplaced _ in number");
+			WARN_ABOUT_UNDERSCORE();
 		   lastub = s++;
 		}
 	    }
+
+            if (!exp_digits) {
+                /* no exponent digits, the [eEpP] could be for something else,
+                 * though in practice we don't get here for p since that's preparsed
+                 * earlier, and results in only the 0xX being consumed, so behave similarly
+                 * for decimal floats and consume only the D.DD, leaving the [eE] to the
+                 * next token.
+                 */
+                s = save_s;
+                d = save_d;
+            }
+            else {
+                floatit = TRUE;
+            }
 	}
 
 
@@ -11005,8 +11389,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 STATIC char *
 S_scan_formline(pTHX_ char *s)
 {
-    char *eol;
-    char *t;
     SV * const stuff = newSVpvs("");
     bool needargs = FALSE;
     bool eofmt = FALSE;
@@ -11014,8 +11396,9 @@ S_scan_formline(pTHX_ char *s)
     PERL_ARGS_ASSERT_SCAN_FORMLINE;
 
     while (!needargs) {
+        char *eol;
 	if (*s == '.') {
-	    t = s+1;
+            char *t = s+1;
 #ifdef PERL_STRICT_CR
 	    while (SPACE_OR_TAB(*t))
 		t++;
@@ -11032,6 +11415,7 @@ S_scan_formline(pTHX_ char *s)
 	if (!eol++)
 		eol = PL_bufend;
 	if (*s != '#') {
+            char *t;
 	    for (t = s; t < eol; t++) {
 		if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
 		    needargs = FALSE;
@@ -11075,8 +11459,7 @@ S_scan_formline(pTHX_ char *s)
 	PL_expect = XSTATE;
 	if (needargs) {
 	    const char *s2 = s;
-	    while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
-		|| *s2 == '\v')
+	    while (isSPACE(*s2) && *s2 != '\n')
 		s2++;
 	    if (*s2 == '{') {
 		PL_expect = XTERMBLOCK;
@@ -11134,6 +11517,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
     return 0;
 }
 
+void
+Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
+{
+    PERL_ARGS_ASSERT_ABORT_EXECUTION;
+
+    if (PL_minus_c)
+        Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
+    else {
+        Perl_croak(aTHX_
+                "%sExecution of %s aborted due to compilation errors.\n", msg, name);
+    }
+    NOT_REACHED; /* NOTREACHED */
+}
+
+void
+Perl_yyquit(pTHX)
+{
+    /* Called, after at least one error has been found, to abort the parse now,
+     * instead of trying to forge ahead */
+
+    yyerror_pvn(NULL, 0, 0);
+}
+
 int
 Perl_yyerror(pTHX_ const char *const s)
 {
@@ -11157,100 +11563,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
     int yychar  = PL_parser->yychar;
 
-    PERL_ARGS_ASSERT_YYERROR_PVN;
-
-    if (!yychar || (yychar == ';' && !PL_rsfp))
-	sv_catpvs(where_sv, "at EOF");
-    else if (   PL_oldoldbufptr
-             && PL_bufptr > PL_oldoldbufptr
-             && PL_bufptr - PL_oldoldbufptr < 200
-             && PL_oldoldbufptr != PL_oldbufptr
-             && PL_oldbufptr != PL_bufptr)
-    {
-	/*
-		Only for NetWare:
-		The code below is removed for NetWare because it abends/crashes on NetWare
-		when the script has error such as not having the closing quotes like:
-		    if ($var eq "value)
-		Checking of white spaces is anyway done in NetWare code.
-	*/
+    /* Output error message 's' with length 'len'.  'flags' are SV flags that
+     * apply.  If the number of errors found is large enough, it abandons
+     * parsing.  If 's' is NULL, there is no message, and it abandons
+     * processing unconditionally */
+
+    if (s != NULL) {
+        if (!yychar || (yychar == ';' && !PL_rsfp))
+            sv_catpvs(where_sv, "at EOF");
+        else if (   PL_oldoldbufptr
+                 && PL_bufptr > PL_oldoldbufptr
+                 && PL_bufptr - PL_oldoldbufptr < 200
+                 && PL_oldoldbufptr != PL_oldbufptr
+                 && PL_oldbufptr != PL_bufptr)
+        {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
 #ifndef NETWARE
-	while (isSPACE(*PL_oldoldbufptr))
-	    PL_oldoldbufptr++;
+            while (isSPACE(*PL_oldoldbufptr))
+                PL_oldoldbufptr++;
 #endif
-	context = PL_oldoldbufptr;
-	contlen = PL_bufptr - PL_oldoldbufptr;
-    }
-    else if (  PL_oldbufptr
-            && PL_bufptr > PL_oldbufptr
-            && PL_bufptr - PL_oldbufptr < 200
-            && PL_oldbufptr != PL_bufptr) {
-	/*
-		Only for NetWare:
-		The code below is removed for NetWare because it abends/crashes on NetWare
-		when the script has error such as not having the closing quotes like:
-		    if ($var eq "value)
-		Checking of white spaces is anyway done in NetWare code.
-	*/
+            context = PL_oldoldbufptr;
+            contlen = PL_bufptr - PL_oldoldbufptr;
+        }
+        else if (  PL_oldbufptr
+                && PL_bufptr > PL_oldbufptr
+                && PL_bufptr - PL_oldbufptr < 200
+                && PL_oldbufptr != PL_bufptr) {
+            /*
+                    Only for NetWare:
+                    The code below is removed for NetWare because it
+                    abends/crashes on NetWare when the script has error such as
+                    not having the closing quotes like:
+                        if ($var eq "value)
+                    Checking of white spaces is anyway done in NetWare code.
+            */
 #ifndef NETWARE
-	while (isSPACE(*PL_oldbufptr))
-	    PL_oldbufptr++;
+            while (isSPACE(*PL_oldbufptr))
+                PL_oldbufptr++;
 #endif
-	context = PL_oldbufptr;
-	contlen = PL_bufptr - PL_oldbufptr;
-    }
-    else if (yychar > 255)
-	sv_catpvs(where_sv, "next token ???");
-    else if (yychar == YYEMPTY) {
-	if (PL_lex_state == LEX_NORMAL)
-	    sv_catpvs(where_sv, "at end of line");
-	else if (PL_lex_inpat)
-	    sv_catpvs(where_sv, "within pattern");
-	else
-	    sv_catpvs(where_sv, "within string");
-    }
-    else {
-	sv_catpvs(where_sv, "next char ");
-	if (yychar < 32)
-	    Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
-	else if (isPRINT_LC(yychar)) {
-	    const char string = yychar;
-	    sv_catpvn(where_sv, &string, 1);
-	}
-	else
-	    Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
-    }
-    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
-    Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
-        OutCopFILE(PL_curcop),
-        (IV)(PL_parser->preambling == NOLINE
-               ? CopLINE(PL_curcop)
-               : PL_parser->preambling));
-    if (context)
-	Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
-			     UTF8fARG(UTF, contlen, context));
-    else
-	Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
-    if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
-        Perl_sv_catpvf(aTHX_ msg,
-        "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
-                (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
-        PL_multi_end = 0;
-    }
-    if (PL_in_eval & EVAL_WARNONLY) {
-	PL_in_eval &= ~EVAL_WARNONLY;
-	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+            context = PL_oldbufptr;
+            contlen = PL_bufptr - PL_oldbufptr;
+        }
+        else if (yychar > 255)
+            sv_catpvs(where_sv, "next token ???");
+        else if (yychar == YYEMPTY) {
+            if (PL_lex_state == LEX_NORMAL)
+                sv_catpvs(where_sv, "at end of line");
+            else if (PL_lex_inpat)
+                sv_catpvs(where_sv, "within pattern");
+            else
+                sv_catpvs(where_sv, "within string");
+        }
+        else {
+            sv_catpvs(where_sv, "next char ");
+            if (yychar < 32)
+                Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
+            else if (isPRINT_LC(yychar)) {
+                const char string = yychar;
+                sv_catpvn(where_sv, &string, 1);
+            }
+            else
+                Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
+        }
+        msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
+        Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
+            OutCopFILE(PL_curcop),
+            (IV)(PL_parser->preambling == NOLINE
+                   ? CopLINE(PL_curcop)
+                   : PL_parser->preambling));
+        if (context)
+            Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
+                                 UTF8fARG(UTF, contlen, context));
+        else
+            Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
+        if (   PL_multi_start < PL_multi_end
+            && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
+        {
+            Perl_sv_catpvf(aTHX_ msg,
+            "  (Might be a runaway multi-line %c%c string starting on"
+            " line %" IVdf ")\n",
+                    (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
+            PL_multi_end = 0;
+        }
+        if (PL_in_eval & EVAL_WARNONLY) {
+            PL_in_eval &= ~EVAL_WARNONLY;
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
+        }
+        else {
+            qerror(msg);
+        }
     }
-    else
-	qerror(msg);
-    if (PL_error_count >= 10) {
-	SV * errsv;
-	if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
-	    Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
-		       SVfARG(errsv), OutCopFILE(PL_curcop));
-	else
-	    Perl_croak(aTHX_ "%s has too many errors.\n",
-            OutCopFILE(PL_curcop));
+    if (s == NULL || PL_error_count >= 10) {
+        const char * msg = "";
+        const char * const name = OutCopFILE(PL_curcop);
+
+	if (PL_in_eval) {
+            SV * errsv = ERRSV;
+            if (SvCUR(errsv)) {
+                msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
+            }
+        }
+
+        if (s == NULL) {
+            abort_execution(msg, name);
+        }
+        else {
+            Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
+        }
     }
     PL_in_my = 0;
     PL_in_my_stash = NULL;
@@ -11369,10 +11795,10 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
     }
     if (status < 0) {
-	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
     }
     DEBUG_P(PerlIO_printf(Perl_debug_log,
-			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
 			  FPTR2DPTR(void *, S_utf16_textfilter),
 			  reverse ? 'l' : 'b', idx, maxlen, status,
 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
@@ -11427,7 +11853,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 
 	    status = FILTER_READ(idx + 1, utf16_buffer,
 				 160 + (SvCUR(utf16_buffer) & 1));
-	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
 	    DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
 	    if (status < 0) {
 		/* Error */
@@ -11463,7 +11889,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 	}
     }
     DEBUG_P(PerlIO_printf(Perl_debug_log,
-			  "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+			  "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
 			  status,
 			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
@@ -11887,7 +12313,7 @@ Perl_parse_label(pTHX_ U32 flags)
 	STRLEN wlen, bufptr_pos;
 	lex_read_space(0);
 	t = s = PL_bufptr;
-        if (!isIDFIRST_lazy_if(s, UTF))
+        if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
 	    goto no_label;
 	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
 	if (word_takes_any_delimiter(s, wlen))