This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
disallow nested declarations [perl #125587] [perl #121058]
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index fb9db0f..7a0f1b6 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -38,7 +38,7 @@ Individual members of C<PL_parser> have their own documentation.
 #include "EXTERN.h"
 #define PERL_IN_TOKE_C
 #include "perl.h"
-#include "dquote_static.c"
+#include "dquote_inline.h"
 
 #define new_constant(a,b,c,d,e,f,g)    \
        S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
@@ -503,6 +503,9 @@ S_ao(pTHX_ int toketype)
  * It prints "Missing operator before end of line" if there's nothing
  * after the missing operator, or "... before <...>" if there is something
  * after the missing operator.
+ *
+ * PL_bufptr is expected to point to the start of the thing that was found,
+ * and s after the next token or partial token.
  */
 
 STATIC void
@@ -649,15 +652,15 @@ is made on the save stack so that upon unwinding the new state object
 will be destroyed and the former value of L</PL_parser> will be restored.
 Nothing else need be done to clean up the parsing context.
 
-The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
+The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 non-null, provides a string (in SV form) containing code to be parsed.
-A copy of the string is made, so subsequent modification of I<line>
-does not affect parsing.  I<rsfp>, if non-null, provides an input stream
+A copy of the string is made, so subsequent modification of C<line>
+does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 from which code will be read to be parsed.  If both are non-null, the
-code in I<line> comes first and must consist of complete lines of input,
-and I<rsfp> supplies the remainder of the source.
+code in C<line> comes first and must consist of complete lines of input,
+and C<rsfp> supplies the remainder of the source.
 
-The I<flags> parameter is reserved for future use.  Currently it is only
+The C<flags> parameter is reserved for future use.  Currently it is only
 used by perl internally, so extensions should always pass zero.
 
 =cut
@@ -754,8 +757,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
 
     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
        PerlIO_clearerr(parser->rsfp);
-    else if (parser->rsfp && (!parser->old_parser ||
-               (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
+    else if (parser->rsfp && (!parser->old_parser
+          || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
        PerlIO_close(parser->rsfp);
     SvREFCNT_dec(parser->rsfp_filters);
     SvREFCNT_dec(parser->lex_stuff);
@@ -884,7 +887,7 @@ Perl_lex_bufutf8(pTHX)
 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 
 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
-at least I<len> octets (including terminating C<NUL>).  Returns a
+at least C<len> octets (including terminating C<NUL>).  Returns a
 pointer to the reallocated buffer.  This is necessary before making
 any direct modification of the buffer that would increase its length.
 L</lex_stuff_pvn> provides a more convenient way to insert text into
@@ -945,9 +948,9 @@ It is not recommended to do this as part of normal parsing, and most
 uses of this facility run the risk of the inserted characters being
 interpreted in an unintended manner.
 
-The string to be inserted is represented by I<len> octets starting
-at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
-according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
+The string to be inserted is represented by C<len> octets starting
+at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
+according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
 The characters are recoded for the lexer buffer, according to how the
 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
@@ -1057,10 +1060,10 @@ It is not recommended to do this as part of normal parsing, and most
 uses of this facility run the risk of the inserted characters being
 interpreted in an unintended manner.
 
-The string to be inserted is represented by octets starting at I<pv>
+The string to be inserted is represented by octets starting at C<pv>
 and continuing to the first nul.  These octets are interpreted as either
 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
-in I<flags>.  The characters are recoded for the lexer buffer, according
+in C<flags>.  The characters are recoded for the lexer buffer, according
 to how the buffer is currently being interpreted (L</lex_bufutf8>).
 If it is not convenient to nul-terminate a string to be inserted, the
 L</lex_stuff_pvn> function is more appropriate.
@@ -1086,7 +1089,7 @@ It is not recommended to do this as part of normal parsing, and most
 uses of this facility run the risk of the inserted characters being
 interpreted in an unintended manner.
 
-The string to be inserted is the string value of I<sv>.  The characters
+The string to be inserted is the string value of C<sv>.  The characters
 are recoded for the lexer buffer, according to how the buffer is currently
 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
@@ -1111,7 +1114,7 @@ Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
 =for apidoc Amx|void|lex_unstuff|char *ptr
 
 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
-I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
+C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
 This hides the discarded text from any lexing code that runs later,
 as if the text had never appeared.
 
@@ -1145,7 +1148,7 @@ Perl_lex_unstuff(pTHX_ char *ptr)
 =for apidoc Amx|void|lex_read_to|char *ptr
 
 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
-to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
+to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
 performing the correct bookkeeping whenever a newline character is passed.
 This is the normal way to consume lexed text.
 
@@ -1176,8 +1179,8 @@ Perl_lex_read_to(pTHX_ char *ptr)
 =for apidoc Amx|void|lex_discard_to|char *ptr
 
 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
-up to I<ptr>.  The remaining content of the buffer will be moved, and
-all pointers into the buffer updated appropriately.  I<ptr> must not
+up to C<ptr>.  The remaining content of the buffer will be moved, and
+all pointers into the buffer updated appropriately.  C<ptr> must not
 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
 it is not permitted to discard text that has yet to be lexed.
 
@@ -1238,7 +1241,7 @@ the current chunk at this time.
 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
 chunk (i.e., the current chunk has been entirely consumed), normally the
 current chunk will be discarded at the same time that the new chunk is
-read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
+read in.  If C<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
 will not be discarded.  If the current chunk has not been entirely
 consumed, then it will not be discarded regardless of the flag.
 
@@ -1249,7 +1252,7 @@ buffer has reached the end of the input text.
 */
 
 #define LEX_FAKE_EOF 0x80000000
-#define LEX_NO_TERM  0x40000000
+#define LEX_NO_TERM  0x40000000 /* here-doc */
 
 bool
 Perl_lex_next_chunk(pTHX_ U32 flags)
@@ -1263,10 +1266,13 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     bool got_some;
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
+       return FALSE;
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
-    if (!(flags & LEX_KEEP_PREVIOUS) &&
-           PL_parser->bufptr == PL_parser->bufend) {
+    if (!(flags & LEX_KEEP_PREVIOUS)
+          && PL_parser->bufptr == PL_parser->bufend)
+    {
        old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
        linestart_pos = 0;
        if (PL_parser->last_uni != PL_parser->bufend)
@@ -1333,8 +1339,10 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
        CopLINE_set(PL_curcop, PL_parser->preambling + 1);
        PL_parser->preambling = NOLINE;
     }
-    if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
-           PL_curstash != PL_debstash) {
+    if (   got_some_for_debugger
+        && PERLDB_LINE_OR_SAVESRC
+        && PL_curstash != PL_debstash)
+    {
        /* debugger active and we're not compiling the debugger code,
         * so store the line into the debugger's array of lines
         */
@@ -1354,7 +1362,7 @@ peeked character, use L</lex_read_unichar>.
 
 If the next character is in (or extends into) the next chunk of input
 text, the next chunk will be read in.  Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+discarded at the same time, but if C<flags> includes C<LEX_KEEP_PREVIOUS>
 then the current chunk will not be discarded.
 
 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
@@ -1425,7 +1433,7 @@ examine the next character, use L</lex_peek_unichar> instead.
 
 If the next character is in (or extends into) the next chunk of input
 text, the next chunk will be read in.  Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+discarded at the same time, but if C<flags> includes C<LEX_KEEP_PREVIOUS>
 then the current chunk will not be discarded.
 
 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
@@ -1463,7 +1471,7 @@ at a non-space character (or the end of the input text).
 
 If spaces extend into the next chunk of input text, the next chunk will
 be read in.  Normally the current chunk will be discarded at the same
-time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
+time, but if C<flags> includes C<LEX_KEEP_PREVIOUS> then the current
 chunk will not be discarded.
 
 =cut
@@ -1517,6 +1525,8 @@ Perl_lex_read_space(pTHX_ U32 flags)
                incline(s);
                need_incline = 0;
            }
+       } else if (!c) {
+           s++;
        } else {
            break;
        }
@@ -1580,9 +1590,10 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
                    in_brackets = TRUE;
                else if (*p == ']')
                    in_brackets = FALSE;
-               else if ((*p == '@' || *p == '%') &&
-                   !after_slash &&
-                   !in_brackets ) {
+               else if ((*p == '@' || *p == '%')
+                         && !after_slash
+                         && !in_brackets )
+                {
                    must_be_last = TRUE;
                    greedy_proto = *p;
                }
@@ -1642,6 +1653,7 @@ S_incline(pTHX_ const char *s)
     const char *n;
     const char *e;
     line_t line_num;
+    UV uv;
 
     PERL_ARGS_ASSERT_INCLINE;
 
@@ -1691,7 +1703,9 @@ S_incline(pTHX_ const char *s)
     if (*e != '\n' && *e != '\0')
        return;         /* false alarm */
 
-    line_num = grok_atou(n, &e) - 1;
+    if (!grok_atoUV(n, &uv, &e))
+        return;
+    line_num = ((line_t)uv) - 1;
 
     if (t - s > 0) {
        const STRLEN len = t - s;
@@ -1793,13 +1807,13 @@ S_skipspace_flags(pTHX_ char *s, U32 flags)
 {
     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
-       while (s < PL_bufend && SPACE_OR_TAB(*s))
+       while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
            s++;
     } else {
        STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
        PL_bufptr = s;
        lex_read_space(flags | LEX_KEEP_PREVIOUS |
-               (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+               (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
                    LEX_NO_NEXT_CHUNK : 0));
        s = PL_bufptr;
        PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
@@ -1831,13 +1845,13 @@ S_check_uni(pTHX)
        PL_last_uni++;
     s = PL_last_uni;
     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
-       s++;
+       s += UTF ? UTF8SKIP(s) : 1;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
 
     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
-                    (int)(s - PL_last_uni), PL_last_uni);
+                    "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+                    UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
 }
 
 /*
@@ -1902,6 +1916,7 @@ S_force_next(pTHX_ I32 type)
        tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
+    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
     PL_nexttype[PL_nexttoke] = type;
     PL_nexttoke++;
     if (PL_lex_state != LEX_KNOWNEXT) {
@@ -2002,8 +2017,8 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
 
     start = skipspace(start);
     s = start;
-    if (isIDFIRST_lazy_if(s,UTF) ||
-       (allow_pack && *s == ':') )
+    if (isIDFIRST_lazy_if(s,UTF)
+        || (allow_pack && *s == ':') )
     {
        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
        if (check_keyword) {
@@ -2171,8 +2186,8 @@ S_force_strict_version(pTHX_ char *s)
        s = (char *)scan_version(s, ver, 0);
        version = newSVOP(OP_CONST, 0, ver);
     }
-    else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
-           (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
+    else if ((*s != ';' && *s != '{' && *s != '}' )
+             && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
     {
        PL_bufptr = s;
        if (errstr)
@@ -2276,7 +2291,9 @@ S_sublex_start(pTHX)
        return THING;
     }
     if (op_type == OP_CONST) {
-       SV *sv = tokeq(PL_lex_stuff);
+       SV *sv = PL_lex_stuff;
+       PL_lex_stuff = NULL;
+       sv = tokeq(sv);
 
        if (SvTYPE(sv) == SVt_PVIV) {
            /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
@@ -2287,7 +2304,6 @@ S_sublex_start(pTHX)
            sv = nsv;
        }
        pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
-       PL_lex_stuff = NULL;
        return THING;
     }
 
@@ -2330,6 +2346,7 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_casemods);
     SAVEI32(PL_lex_starts);
     SAVEI8(PL_lex_state);
+    SAVEI8(PL_lex_defer);
     SAVESPTR(PL_lex_repl);
     SAVEVPTR(PL_lex_inpat);
     SAVEI16(PL_lex_inwhat);
@@ -2367,6 +2384,13 @@ S_sublex_push(pTHX)
     PL_lex_stuff = NULL;
     PL_sublex_info.repl = NULL;
 
+    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
+       set for an inner quote-like operator and then an error causes scope-
+       popping.  We must not have a PL_lex_stuff value left dangling, as
+       that breaks assumptions elsewhere.  See bug #123617.  */
+    SAVEGENERICSV(PL_lex_stuff);
+    SAVEGENERICSV(PL_sublex_info.repl);
+
     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
        = SvPVX(PL_linestr);
     PL_bufend += SvCUR(PL_linestr);
@@ -2471,7 +2495,6 @@ S_sublex_done(pTHX)
        PL_bufend = SvPVX(PL_linestr);
        PL_bufend += SvCUR(PL_linestr);
        PL_expect = XOPERATOR;
-       PL_sublex_info.sub_inwhat = 0;
        return ')';
     }
 }
@@ -2511,9 +2534,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         /* We deliberately don't try to print the malformed character, which
          * might not print very well; it also may be just the first of many
          * malformations, so don't print what comes after it */
-        yyerror(Perl_form(aTHX_
+        yyerror_pv(Perl_form(aTHX_
             "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
+                   SVf_UTF8);
        return NULL;
     }
 
@@ -2735,7 +2759,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         \l \L \u \U \Q \E
        (?{  or  (??{
 
-
   In transliterations:
     characters are VERY literal, except for - not at the start or end
     of the string, which indicates a range. If the range is in bytes,
@@ -2921,9 +2944,9 @@ S_scan_const(pTHX_ char *start)
 #ifdef EBCDIC
                 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
                  * any subsets of these ranges into individual characters */
-               if (literal_endpoint == 2 &&
-                   ((isLOWER_A(min) && isLOWER_A(max)) ||
-                    (isUPPER_A(min) && isUPPER_A(max))))
+               if (literal_endpoint == 2
+                    && ((isLOWER_A(min) && isLOWER_A(max))
+                     || (isUPPER_A(min) && isUPPER_A(max))))
                 {
                     for (i = min; i <= max; i++) {
                         if (isALPHA_A(i))
@@ -2986,7 +3009,8 @@ S_scan_const(pTHX_ char *start)
            }
        }
 
-       /* if we get here, we're not doing a transliteration */
+        /* if we get to any of these else's, we're not doing a
+         * transliteration. */
 
        else if (*s == '[' && PL_lex_inpat && !in_charclass) {
            char *s1 = s-1;
@@ -3015,17 +3039,20 @@ S_scan_const(pTHX_ char *start)
                while (s+1 < send && *s != ')')
                    *d++ = *s++;
            }
-           else if (!PL_lex_casemods &&
-                    (    s[2] == '{' /* This should match regcomp.c */
-                     || (s[2] == '?' && s[3] == '{')))
+           else if (!PL_lex_casemods
+                     && (    s[2] == '{' /* This should match regcomp.c */
+                        || (s[2] == '?' && s[3] == '{')))
            {
                break;
            }
        }
 
        /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
-         ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
+       else if (*s == '#'
+                 && PL_lex_inpat
+                 && !in_charclass
+                 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
+        {
            while (s+1 < send && *s != '\n')
                *d++ = *s++;
        }
@@ -3038,7 +3065,7 @@ S_scan_const(pTHX_ char *start)
           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
           */
        else if (*s == '@' && s[1]) {
-           if (isWORDCHAR_lazy_if(s+1,UTF))
+           if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
                break;
            if (strchr(":'{$", s[1]))
                break;
@@ -3071,8 +3098,11 @@ S_scan_const(pTHX_ char *start)
 
            /* warn on \1 - \9 in substitution replacements, but note that \11
             * is an octal; and \19 is \1 followed by '9' */
-           if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
-               isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
+           if (PL_lex_inwhat == OP_SUBST
+                && !PL_lex_inpat
+                && isDIGIT(*s)
+                && *s != '0'
+                && !isDIGIT(s[1]))
            {
                /* diag_listed_as: \%d better written as $%d */
                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
@@ -3209,8 +3239,9 @@ S_scan_const(pTHX_ char *start)
 
                     if (has_utf8) {
                        d = (char*)uvchr_to_utf8((U8*)d, uv);
-                       if (PL_lex_inwhat == OP_TRANS &&
-                           PL_sublex_info.sub_op) {
+                       if (PL_lex_inwhat == OP_TRANS
+                            && PL_sublex_info.sub_op)
+                        {
                            PL_sublex_info.sub_op->op_private |=
                                (PL_lex_repl ? OPpTRANS_FROM_UTF
                                             : OPpTRANS_TO_UTF);
@@ -3258,12 +3289,7 @@ S_scan_const(pTHX_ char *start)
                  *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
                  *      if a pattern; otherwise convert to utf8
                  *
-                 * If the regex compiler should ever need to differentiate
-                 * between the \N{U+...} and \N{name} forms, that could easily
-                 * be done here by stripping any leading zeros from the
-                 * \N{U+...} case, and adding them to the other one. */
-
-                /* Here, 's' points to the 'N'; the test below is guaranteed to
+                 * Here, 's' points to the 'N'; the test below is guaranteed to
                 * succeed if we are being called on a pattern, as we already
                  * know from a test above that the next character is a '{'.  A
                  * non-pattern \N must mean 'named character', which requires
@@ -3288,72 +3314,43 @@ S_scan_const(pTHX_ char *start)
                /* Here it looks like a named character */
 
                if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
-                   I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                               | PERL_SCAN_SILENT_ILLDIGIT
-                               | PERL_SCAN_DISALLOW_PREFIX;
-                   STRLEN len;
-
                    s += 2;         /* Skip to next char after the 'U+' */
-                   len = e - s;
-                   uv = grok_hex(s, &len, &flags, NULL);
-                   if (len == 0
-                    || (  len != (STRLEN)(e - s) && s[len] != '.'
-                       && PL_lex_inpat))
-                   {
-                     bad_NU:
-                       yyerror("Invalid hexadecimal number in \\N{U+...}");
-                       s = e + 1;
-                       continue;
-                   }
-
                    if (PL_lex_inpat) {
-#ifdef EBCDIC
-                       s -= 5;     /* Include the '\N{U+' */
-                        /* On EBCDIC platforms, in \N{U+...}, the '...' is a
-                         * Unicode value, so convert to native so downstream
-                         * code can continue to assume it's native */
-                        /* XXX This should be in the regexp parser,
-                               because doing it here makes /\N{U+41}/ and
-                               =~ '\N{U+41}' do different things.  */
-                       d += my_snprintf(d, e - s + 1 + 1,  /* includes the '}'
-                                                              and the \0 */
-                                         "\\N{U+%X",
-                                         (unsigned int) UNI_TO_NATIVE(uv));
-                        s += 5 + len;
-                        while (*s == '.') {
-                            s++;
-                            len = e - s;
-                            uv = grok_hex(s, &len, &flags, NULL);
-                            if (!len
-                             || (len != (STRLEN)(e - s) && s[len] != '.'))
-                                goto bad_NU;
-                            s--;
-                            d += my_snprintf(
-                                     d, e - s + 1 + 1, ".%X",
-                                     (unsigned int)UNI_TO_NATIVE(uv)
-                                 );
-                            s += len + 1;
+
+                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
+                        /* Check the syntax.  */
+                        const char *orig_s;
+                        orig_s = s - 5;
+                        if (!isXDIGIT(*s)) {
+                          bad_NU:
+                            yyerror(
+                                "Invalid hexadecimal number in \\N{U+...}"
+                            );
+                            s = e + 1;
+                            continue;
                         }
-                        *(d++) = '}';
-#else
-                        /* On non-EBCDIC platforms, pass it through unchanged.
-                         * The reason we evaluate the numbers is to make
-                         * sure there wasn't a syntax error. */
-                        const char * const orig_s = s - 5;
-                        while (*s == '.') {
-                            s++;
-                            len = e - s;
-                            uv = grok_hex(s, &len, &flags, NULL);
-                            if (!len
-                             || (len != (STRLEN)(e - s) && s[len] != '.'))
-                                goto bad_NU;
+                        while (++s < e) {
+                            if (isXDIGIT(*s))
+                                continue;
+                            else if ((*s == '.' || *s == '_')
+                                  && isXDIGIT(s[1]))
+                                continue;
+                            goto bad_NU;
                         }
-                        /* +1 is for the '}' */
+
+                        /* Pass everything through unchanged.
+                         * +1 is for the '}' */
                         Copy(orig_s, d, e - orig_s + 1, char);
                         d += e - orig_s + 1;
-#endif
                    }
                    else {  /* Not a pattern: convert the hex to string */
+                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                               | PERL_SCAN_SILENT_ILLDIGIT
+                               | PERL_SCAN_DISALLOW_PREFIX;
+                        STRLEN len = e - s;
+                        uv = grok_hex(s, &len, &flags, NULL);
+                        if (len == 0 || (len != (STRLEN)(e - s)))
+                            goto bad_NU;
 
                          /* If the destination is not in utf8, unconditionally
                          * recode it to be so.  This is because \N{} implies
@@ -3424,9 +3421,15 @@ S_scan_const(pTHX_ char *start)
                                     char hex_string[4];
                                     int len =
                                         my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    "%02X.", (U8) *str);
-                                    PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
+                                                  sizeof(hex_string),
+                                                  "%02X.",
+
+                                                  /* The regex compiler is
+                                                   * expecting Unicode, not
+                                                   * native */
+                                                  (U8) NATIVE_TO_LATIN1(*str));
+                                    PERL_MY_SNPRINTF_POST_GUARD(len,
+                                                           sizeof(hex_string));
                                     Copy(hex_string, d, 3, char);
                                     d += 3;
                                     str++;
@@ -3450,12 +3453,12 @@ S_scan_const(pTHX_ char *start)
                                                         len,
                                                         &char_length,
                                                         UTF8_ALLOW_ANYUV);
-                                /* Convert first code point to hex, including
-                                 * the boiler plate before it. */
+                                /* Convert first code point to Unicode hex,
+                                 * including the boiler plate before it. */
                                 output_length =
                                     my_snprintf(hex_string, sizeof(hex_string),
-                                                "\\N{U+%X",
-                                                (unsigned int) uv);
+                                             "\\N{U+%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
 
                                 /* Make sure there is enough space to hold it */
                                 d = off + SvGROW(sv, off
@@ -3467,7 +3470,7 @@ S_scan_const(pTHX_ char *start)
                                 d += output_length;
 
                                 /* For each subsequent character, append dot and
-                                * its ordinal in hex */
+                                * its Unicode code point in hex */
                                 while ((str += char_length) < str_end) {
                                     const STRLEN off = d - SvPVX_const(sv);
                                     U32 uv = utf8n_to_uvchr((U8 *) str,
@@ -3476,9 +3479,9 @@ S_scan_const(pTHX_ char *start)
                                                             UTF8_ALLOW_ANYUV);
                                     output_length =
                                         my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    ".%X",
-                                                    (unsigned int) uv);
+                                             sizeof(hex_string),
+                                             ".%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
 
                                     d = off + SvGROW(sv, off
                                                         + output_length
@@ -3789,8 +3792,10 @@ S_intuit_more(pTHX_ char *s)
                    else
                        weight -= 10;
                }
-               else if (*s == '$' && s[1] &&
-                 strchr("[#!%*<>()-=",s[1])) {
+               else if (*s == '$'
+                         && s[1]
+                         && strchr("[#!%*<>()-=",s[1]))
+                {
                    if (/*{*/ strchr("])} =",s[2]))
                        weight -= 10;
                    else
@@ -3900,8 +3905,8 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
     }
 
     if (*start == '$') {
-       if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
-               isUPPER(*PL_tokenbuf))
+       if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
+            || isUPPER(*PL_tokenbuf))
            return 0;
        s = skipspace(s);
        PL_bufptr = start;
@@ -4175,9 +4180,11 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
         return PL_curstash;
 
-    if (len > 2 &&
-        (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
-        (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
+    if (len > 2
+        && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
+        && (gv = gv_fetchpvn_flags(pkgname,
+                                   len,
+                                   ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
     {
         return GvHV(gv);                       /* Foo:: */
     }
@@ -4234,10 +4241,10 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
 STATIC bool
 S_word_takes_any_delimeter(char *p, STRLEN len)
 {
-    return (len == 1 && strchr("msyq", p[0])) ||
-          (len == 2 && (
-           (p[0] == 't' && p[1] == 'r') ||
-           (p[0] == 'q' && strchr("qwxr", p[1]))));
+    return (len == 1 && strchr("msyq", p[0]))
+            || (len == 2
+                && ((p[0] == 't' && p[1] == 'r')
+                    || (p[0] == 'q' && strchr("qwxr", p[1]))));
 }
 
 static void
@@ -4328,13 +4335,8 @@ Perl_yylex(pTHX)
        SvREFCNT_dec(tmp);
     } );
 
-    switch (PL_lex_state) {
-    case LEX_NORMAL:
-    case LEX_INTERPNORMAL:
-       break;
-
     /* when we've already built the next token, just pull it out of the queue */
-    case LEX_KNOWNEXT:
+    if (PL_nexttoke) {
        PL_nexttoke--;
        pl_yylval = PL_nextval[PL_nexttoke];
        if (!PL_nexttoke) {
@@ -4359,6 +4361,12 @@ Perl_yylex(pTHX)
            }
            return REPORT(next_type == 'p' ? pending_ident() : next_type);
        }
+    }
+
+    switch (PL_lex_state) {
+    case LEX_NORMAL:
+    case LEX_INTERPNORMAL:
+       break;
 
     /* interpolated case modifiers like \L \U, including \Q and \E.
        when we get here, PL_bufptr is at the \
@@ -4409,10 +4417,11 @@ Perl_yylex(pTHX)
                I32 tmp;
                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                     tmp = *s, *s = s[2], s[2] = (char)tmp;     /* misordered... */
-               if ((*s == 'L' || *s == 'U' || *s == 'F') &&
-                   (strchr(PL_lex_casestack, 'L')
+               if ((*s == 'L' || *s == 'U' || *s == 'F')
+                    && (strchr(PL_lex_casestack, 'L')
                         || strchr(PL_lex_casestack, 'U')
-                        || strchr(PL_lex_casestack, 'F'))) {
+                        || strchr(PL_lex_casestack, 'F')))
+                {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
                    PL_lex_allbrackets--;
                    return REPORT(')');
@@ -4507,6 +4516,14 @@ Perl_yylex(pTHX)
        /* FALLTHROUGH */
 
     case LEX_INTERPEND:
+       /* Treat state as LEX_NORMAL if we have no inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
        if (PL_lex_dojoin) {
            const U8 dojoin_was = PL_lex_dojoin;
            PL_lex_dojoin = FALSE;
@@ -4558,6 +4575,14 @@ Perl_yylex(pTHX)
            Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
                       (long) PL_lex_brackets);
 #endif
+       /* Treat state as LEX_NORMAL when not in an inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
        if (PL_bufptr == PL_bufend)
            return REPORT(sublex_done());
 
@@ -4638,11 +4663,13 @@ Perl_yylex(pTHX)
     case 26:
        goto fake_eof;                  /* emulate EOF on ^D or ^Z */
     case 0:
-       if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
+       if ((!PL_rsfp || PL_lex_inwhat)
+        && (!PL_parser->filtered || s+1 < PL_bufend)) {
            PL_last_uni = 0;
            PL_last_lop = 0;
-           if (PL_lex_brackets &&
-                   PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
+           if (PL_lex_brackets
+                && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
+            {
                yyerror((const char *)
                        (PL_lex_formbrack
                         ? "Format not terminated"
@@ -4725,7 +4752,7 @@ Perl_yylex(pTHX)
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = NULL;
-           if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
+           if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
                update_debugger_info(PL_linestr, NULL, 0);
            goto retry;
        }
@@ -4747,11 +4774,12 @@ Perl_yylex(pTHX)
            s = PL_bufptr;
            /* If it looks like the start of a BOM or raw UTF-16,
             * check if it in fact is. */
-           if (bof && PL_rsfp &&
-                    (*s == 0 ||
-                     *(U8*)s == BOM_UTF8_FIRST_BYTE ||
-                     *(U8*)s >= 0xFE ||
-                     s[1] == 0)) {
+           if (bof && PL_rsfp
+                && (*s == 0
+                    || *(U8*)s == BOM_UTF8_FIRST_BYTE
+                        || *(U8*)s >= 0xFE
+                        || s[1] == 0))
+            {
                Off_t offset = (IV)PerlIO_tell(PL_rsfp);
                bof = (offset == (Off_t)SvCUR(PL_linestr));
 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
@@ -4853,6 +4881,8 @@ Perl_yylex(pTHX)
                d = instr(s,"perl -");
                if (!d) {
                    d = instr(s,"perl");
+                    if (d && d[4] == '6')
+                        d = NULL;
 #if defined(DOSISH)
                    /* avoid getting into infinite loops when shebang
                     * line contains "Perl" rather than "perl" */
@@ -4889,12 +4919,12 @@ Perl_yylex(pTHX)
                        *s = '#';       /* Don't try to parse shebang line */
                }
 #endif /* ALTERNATE_SHEBANG */
-               if (!d &&
-                   *s == '#' &&
-                   ipathend > ipath &&
-                   !PL_minus_c &&
-                   !instr(s,"indir") &&
-                   instr(PL_origargv[0],"perl"))
+               if (!d
+                    && *s == '#'
+                    && ipathend > ipath
+                    && !PL_minus_c
+                    && !instr(s,"indir")
+                    && instr(PL_origargv[0],"perl"))
                {
                    dVAR;
                    char **newargv;
@@ -4957,8 +4987,8 @@ Perl_yylex(pTHX)
                            } while (argc && argv[0][0] == '-' && argv[0][1]);
                            init_argv_symbols(argc,argv);
                        }
-                       if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
-                           ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
+                       if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
+                            || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
                              /* if we have already added "LINE: while (<>) {",
                                 we must not do it again */
                        {
@@ -4967,7 +4997,7 @@ Perl_yylex(pTHX)
                            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                            PL_last_lop = PL_last_uni = NULL;
                            PL_preambled = FALSE;
-                           if (PERLDB_LINE || PERLDB_SAVESRC)
+                           if (PERLDB_LINE_OR_SAVESRC)
                                (void)gv_fetchfile(PL_origfilename);
                            goto retry;
                        }
@@ -4977,7 +5007,6 @@ Perl_yylex(pTHX)
        }
        if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
            PL_lex_state = LEX_FORMLINE;
-           NEXTVAL_NEXTTOKE.ival = 0;
            force_next(FORMRBRACK);
            TOKEN(';');
        }
@@ -4993,8 +5022,9 @@ Perl_yylex(pTHX)
        goto retry;
     case '#':
     case '\n':
-       if (PL_lex_state != LEX_NORMAL ||
-            (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
+       if (PL_lex_state != LEX_NORMAL
+            || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
+        {
             const bool in_comment = *s == '#';
            if (*s == '#' && s == PL_linestart && PL_in_eval
             && !PL_rsfp && !PL_parser->filtered) {
@@ -5020,7 +5050,6 @@ Perl_yylex(pTHX)
                 incline(s);
            if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
                PL_lex_state = LEX_FORMLINE;
-               NEXTVAL_NEXTTOKE.ival = 0;
                force_next(FORMRBRACK);
                TOKEN(';');
            }
@@ -5123,17 +5152,12 @@ Perl_yylex(pTHX)
            else if (*s == '>') {
                s++;
                s = skipspace(s);
-               if (FEATURE_POSTDEREF_IS_ENABLED && (
-                   ((*s == '$' || *s == '&') && s[1] == '*')
+               if (((*s == '$' || *s == '&') && s[1] == '*')
                  ||(*s == '$' && s[1] == '#' && s[2] == '*')
                  ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
                  ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
-                ))
+                )
                {
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__POSTDEREF),
-                       "Postfix dereference is experimental"
-                   );
                    PL_expect = XPOSTDEREF;
                    TOKEN(ARROW);
                }
@@ -5147,8 +5171,10 @@ Perl_yylex(pTHX)
                    TERM(ARROW);
            }
            if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '='
+                    && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                    s--;
                    TOKEN(0);
                }
@@ -5172,8 +5198,10 @@ Perl_yylex(pTHX)
                    OPERATOR(PREINC);
            }
            if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '='
+                    && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                    s--;
                    TOKEN(0);
                }
@@ -5199,15 +5227,18 @@ Perl_yylex(pTHX)
        s++;
        if (*s == '*') {
            s++;
-           if (*s == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                s -= 2;
                TOKEN(0);
            }
            PWop(OP_POW);
        }
-       if (*s == '=' && !PL_lex_allbrackets &&
-               PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+       if (*s == '='
+            && !PL_lex_allbrackets
+            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+        {
            s--;
            TOKEN(0);
        }
@@ -5217,9 +5248,12 @@ Perl_yylex(pTHX)
     case '%':
     {
        if (PL_expect == XOPERATOR) {
-           if (s[1] == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+           if (s[1] == '='
+                && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                TOKEN(0);
+            }
            ++s;
            PL_parser->saw_infix_sigil = 1;
            Mop(OP_MODULO);
@@ -5352,7 +5386,7 @@ Perl_yylex(pTHX)
                    sv_catsv(sv, PL_lex_stuff);
                    attrs = op_append_elem(OP_LIST, attrs,
                                        newSVOP(OP_CONST, 0, sv));
-                   SvREFCNT_dec(PL_lex_stuff);
+                   SvREFCNT_dec_NN(PL_lex_stuff);
                    PL_lex_stuff = NULL;
                }
                else {
@@ -5415,10 +5449,12 @@ Perl_yylex(pTHX)
                /* XXX losing whitespace on sequential attributes here */
            }
            {
-               if (*s != ';' && *s != '}' &&
-                   !(PL_expect == XOPERATOR
-                       ? (*s == '=' ||  *s == ')')
-                       : (*s == '{' ||  *s == '('))) {
+               if (*s != ';'
+                    && *s != '}'
+                    && !(PL_expect == XOPERATOR
+                        ? (*s == '=' ||  *s == ')')
+                        : (*s == '{' ||  *s == '(')))
+                {
                    const char q = ((*s == '\'') ? '"' : '\'');
                    /* If here for an expression, and parsed no attrs, back
                       off. */
@@ -5769,13 +5805,12 @@ Perl_yylex(pTHX)
        PL_tokenbuf[0] = '&';
        s = scan_ident(s - 1, PL_tokenbuf + 1,
                       sizeof PL_tokenbuf - 1, TRUE);
+       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
        if (PL_tokenbuf[1]) {
-           PL_expect = XOPERATOR;
            force_ident_maybe_lex('&');
        }
        else
            PREREF('&');
-       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
        TERM('&');
 
     case '|':
@@ -5803,16 +5838,18 @@ Perl_yylex(pTHX)
        {
            const char tmp = *s++;
            if (tmp == '=') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                    s -= 2;
                    TOKEN(0);
                }
                Eop(OP_EQ);
            }
            if (tmp == '>') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
+                {
                    s -= 2;
                    TOKEN(0);
                }
@@ -5825,8 +5862,9 @@ Perl_yylex(pTHX)
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                            "Reversed %c= operator",(int)tmp);
            s--;
-           if (PL_expect == XSTATE && isALPHA(tmp) &&
-               (s == PL_linestart+1 || s[-2] == '\n') )
+           if (PL_expect == XSTATE
+                && isALPHA(tmp)
+                && (s == PL_linestart+1 || s[-2] == '\n') )
             {
                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
                     || PL_lex_state != LEX_NORMAL) {
@@ -5890,15 +5928,16 @@ Perl_yylex(pTHX)
                    while (t < PL_bufend && isSPACE(*t))
                        ++t;
 
-                   if (*t == '/' || *t == '?' ||
-                       ((*t == 'm' || *t == 's' || *t == 'y')
-                        && !isWORDCHAR(t[1])) ||
-                       (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
+                   if (*t == '/' || *t == '?'
+                        || ((*t == 'm' || *t == 's' || *t == 'y')
+                           && !isWORDCHAR(t[1]))
+                        || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                                    "!=~ should be !~");
                }
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                    s -= 2;
                    TOKEN(0);
                }
@@ -5924,8 +5963,9 @@ Perl_yylex(pTHX)
        {
            char tmp = *s++;
            if (tmp == '<') {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                    s -= 2;
                    TOKEN(0);
                }
@@ -5934,16 +5974,18 @@ Perl_yylex(pTHX)
            if (tmp == '=') {
                tmp = *s++;
                if (tmp == '>') {
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                    {
                        s -= 3;
                        TOKEN(0);
                    }
                    Eop(OP_NCMP);
                }
                s--;
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                    s -= 2;
                    TOKEN(0);
                }
@@ -5961,16 +6003,18 @@ Perl_yylex(pTHX)
        {
            const char tmp = *s++;
            if (tmp == '>') {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                    s -= 2;
                    TOKEN(0);
                }
                SHop(OP_RIGHT_SHIFT);
            }
            else if (tmp == '=') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                    s -= 2;
                    TOKEN(0);
                }
@@ -6004,8 +6048,14 @@ Perl_yylex(pTHX)
            PL_tokenbuf[0] = '@';
            s = scan_ident(s + 1, PL_tokenbuf + 1,
                           sizeof PL_tokenbuf - 1, FALSE);
-           if (PL_expect == XOPERATOR)
-               no_op("Array length", s);
+            if (PL_expect == XOPERATOR) {
+                d = s;
+                if (PL_bufptr > s) {
+                    d = PL_bufptr-1;
+                    PL_bufptr = PL_oldbufptr;
+                }
+               no_op("Array length", d);
+            }
            if (!PL_tokenbuf[1])
                PREREF(DOLSHARP);
            PL_expect = XOPERATOR;
@@ -6016,8 +6066,14 @@ Perl_yylex(pTHX)
        PL_tokenbuf[0] = '$';
        s = scan_ident(s, PL_tokenbuf + 1,
                       sizeof PL_tokenbuf - 1, FALSE);
-       if (PL_expect == XOPERATOR)
-           no_op("Scalar", s);
+       if (PL_expect == XOPERATOR) {
+           d = s;
+           if (PL_bufptr > s) {
+               d = PL_bufptr-1;
+               PL_bufptr = PL_oldbufptr;
+           }
+           no_op("Scalar", d);
+       }
        if (!PL_tokenbuf[1]) {
            if (s == PL_bufend)
                yyerror("Final $ should be \\$ or $name");
@@ -6038,14 +6094,14 @@ Perl_yylex(pTHX)
                        char *t = s+1;
 
                        while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
-                           t++;
+                           t += UTF ? UTF8SKIP(t) : 1;
                        if (*t++ == ',') {
                            PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                            while (t < PL_bufend && *t != ']')
                                t++;
                            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "Multidimensional syntax %.*s not supported",
-                                   (int)((t - PL_bufptr) + 1), PL_bufptr);
+                                       "Multidimensional syntax %"UTF8f" not supported",
+                                        UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
                        }
                    }
                }
@@ -6166,8 +6222,9 @@ Perl_yylex(pTHX)
        }
        else if (PL_expect == XOPERATOR) {
            s++;
-           if (*s == '=' && !PL_lex_allbrackets &&
-               PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                s--;
                TOKEN(0);
            }
@@ -6187,8 +6244,9 @@ Perl_yylex(pTHX)
 
      case '?':                 /* conditional */
        s++;
-       if (!PL_lex_allbrackets &&
-           PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
+       if (!PL_lex_allbrackets
+            && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
+        {
            s--;
            TOKEN(0);
        }
@@ -6215,8 +6273,9 @@ Perl_yylex(pTHX)
        if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
            char tmp = *s++;
            if (*s == tmp) {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
+                {
                    s--;
                    TOKEN(0);
                }
@@ -6229,8 +6288,9 @@ Perl_yylex(pTHX)
                    pl_yylval.ival = 0;
                OPERATOR(DOTDOT);
            }
-           if (*s == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                s--;
                TOKEN(0);
            }
@@ -6294,7 +6354,13 @@ Perl_yylex(pTHX)
 
     case '`':
        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
-       DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
+       DEBUG_T( {
+            if (s)
+                printbuf("### Saw backtick string before %s\n", s);
+            else
+               PerlIO_printf(Perl_debug_log,
+                            "### Saw unterminated backtick string\n");
+        } );
        if (PL_expect == XOPERATOR)
            no_op("Backticks",s);
        if (!s)
@@ -6505,24 +6571,23 @@ Perl_yylex(pTHX)
                CV *cv;
                if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
                                            (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
-                                           SVt_PVCV)) &&
-                   (cv = GvCVu(gv)))
+                                           SVt_PVCV))
+                    && (cv = GvCVu(gv)))
                {
                    if (GvIMPORTED_CV(gv))
                        ogv = gv;
                    else if (! CvMETHOD(cv))
                        hgv = gv;
                }
-               if (!ogv &&
-                   (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
-                                         len, FALSE)) &&
-                   (gv = *gvp) && (
-                       isGV_with_GP(gv)
-                           ? GvCVu(gv) && GvIMPORTED_CV(gv)
-                           :   SvPCS_IMPORTED(gv)
-                            && (gv_init(gv, PL_globalstash, PL_tokenbuf,
-                                        len, 0), 1)
-                  ))
+               if (!ogv
+                    && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
+                                                              len, FALSE))
+                    && (gv = *gvp)
+                    && (isGV_with_GP(gv)
+                       ? GvCVu(gv) && GvIMPORTED_CV(gv)
+                       :   SvPCS_IMPORTED(gv)
+                       && (gv_init(gv, PL_globalstash, PL_tokenbuf,
+                                                                 len, 0), 1)))
                {
                    ogv = gv;
                }
@@ -6621,8 +6686,9 @@ Perl_yylex(pTHX)
                   in which case Foo is a bareword
                   (and a package name). */
 
-               if (len > 2 &&
-                   PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
+               if (len > 2
+                    && PL_tokenbuf[len - 2] == ':'
+                    && PL_tokenbuf[len - 1] == ':')
                {
                    if (ckWARN(WARN_BAREWORD)
                        && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
@@ -6682,13 +6748,14 @@ Perl_yylex(pTHX)
 
                /* See if it's the indirect object for a list operator. */
 
-               if (PL_oldoldbufptr &&
-                   PL_oldoldbufptr < PL_bufptr &&
-                   (PL_oldoldbufptr == PL_last_lop
-                    || PL_oldoldbufptr == PL_last_uni) &&
-                   /* NO SKIPSPACE BEFORE HERE! */
-                   (PL_expect == XREF ||
-                    ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
+               if (PL_oldoldbufptr
+                    && PL_oldoldbufptr < PL_bufptr
+                    && (PL_oldoldbufptr == PL_last_lop
+                       || PL_oldoldbufptr == PL_last_uni)
+                    && /* NO SKIPSPACE BEFORE HERE! */
+                      (PL_expect == XREF
+                        || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
+                                                               == OA_FILEREF))
                {
                    bool immediate_paren = *s == '(';
 
@@ -6697,8 +6764,9 @@ Perl_yylex(pTHX)
 
                    /* Two barewords in a row may indicate method call. */
 
-                   if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
-                       (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+                   if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+                        && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+                    {
                        goto method;
                    }
 
@@ -6707,12 +6775,13 @@ Perl_yylex(pTHX)
                    /* Also, if "_" follows a filetest operator, it's a bareword */
 
                    if (
-                       ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
-                         (!cv &&
-                        (PL_last_lop_op != OP_MAPSTART &&
-                        PL_last_lop_op != OP_GREPSTART))))
+                       ( !immediate_paren && (PL_last_lop_op == OP_SORT
+                         || (!cv
+                             && (PL_last_lop_op != OP_MAPSTART
+                                 && PL_last_lop_op != OP_GREPSTART))))
                       || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
-                           && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
+                           && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
+                                                            == OA_FILESTATOP))
                       )
                    {
                        PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
@@ -6768,9 +6837,11 @@ Perl_yylex(pTHX)
                    op_free(rv2cv_op);
                    PL_last_lop = PL_oldbufptr;
                    PL_last_lop_op = OP_METHOD;
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                        PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                    PL_expect = XBLOCKTERM;
                    PL_bufptr = s;
                    return REPORT(METHOD);
@@ -6792,9 +6863,11 @@ Perl_yylex(pTHX)
                        else SvUTF8_off(sv);
                    }
                    op_free(rv2cv_op);
-                   if (tmp == METHOD && !PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (tmp == METHOD && !PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                        PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                    return REPORT(tmp);
                }
 
@@ -6863,18 +6936,22 @@ Perl_yylex(pTHX)
                                sv_setpvs(PL_subname, "__ANON__");
                            else
                                sv_setpvs(PL_subname, "__ANON__::__ANON__");
-                           if (!PL_lex_allbrackets &&
-                                   PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                           if (!PL_lex_allbrackets
+                                && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                            {
                                PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                            }
                            PREBLOCK(LSTOPSUB);
                        }
                    }
                    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
                    PL_expect = XTERM;
                    force_next(off ? PRIVATEREF : WORD);
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                        PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                    TOKEN(NOAMP);
                }
 
@@ -7285,11 +7362,13 @@ Perl_yylex(pTHX)
            if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
                char *p = s;
 
-               if ((PL_bufend - p) >= 3 &&
-                   strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+               if ((PL_bufend - p) >= 3
+                    && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+                {
                    p += 2;
-               else if ((PL_bufend - p) >= 4 &&
-                   strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
+                }
+               else if ((PL_bufend - p) >= 4
+                         && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
                    p += 3;
                p = skipspace(p);
                 /* skip optional package name, as in "for my abc $x (..)" */
@@ -7526,6 +7605,14 @@ Perl_yylex(pTHX)
        case KEY_our:
        case KEY_my:
        case KEY_state:
+           if (PL_in_my) {
+               yyerror(Perl_form(aTHX_
+                                 "Can't redeclare \"%s\" in \"%s\"",
+                                  tmp      == KEY_my    ? "my" :
+                                  tmp      == KEY_state ? "state" : "our",
+                                  PL_in_my == KEY_my    ? "my" :
+                                  PL_in_my == KEY_state ? "state" : "our"));
+           }
            PL_in_my = (U16)tmp;
            s = skipspace(s);
            if (isIDFIRST_lazy_if(s,UTF)) {
@@ -7571,9 +7658,11 @@ Perl_yylex(pTHX)
            if (*s == '(' || (s = skipspace(s), *s == '('))
                FUN1(OP_NOT);
            else {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                {
                    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                }
                OPERATOR(NOTOP);
            }
 
@@ -7699,10 +7788,8 @@ Perl_yylex(pTHX)
            }
            if (!words)
                words = newNULLLIST();
-           if (PL_lex_stuff) {
-               SvREFCNT_dec(PL_lex_stuff);
-               PL_lex_stuff = NULL;
-           }
+           SvREFCNT_dec_NN(PL_lex_stuff);
+           PL_lex_stuff = NULL;
            PL_expect = XOPERATOR;
            pl_yylval.opval = sawparens(words);
            TOKEN(QWLIST);
@@ -7944,8 +8031,9 @@ Perl_yylex(pTHX)
                d = s;
                s = skipspace(s);
 
-               if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
-                   (*s == ':' && s[1] == ':'))
+               if (isIDFIRST_lazy_if(s,UTF)
+                    || *s == '\''
+                    || (*s == ':' && s[1] == ':'))
                {
 
                    PL_expect = XBLOCK;
@@ -8168,9 +8256,11 @@ Perl_yylex(pTHX)
 
        case KEY_x:
            if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                    return REPORT(0);
+                }
                Mop(OP_REPEAT);
            }
            check_uni();
@@ -8290,14 +8380,17 @@ S_pending_ident(pTHX)
        and @foo isn't a variable we can find in the symbol
        table.
     */
-    if (ckWARN(WARN_AMBIGUOUS) &&
-       pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+    if (ckWARN(WARN_AMBIGUOUS)
+        && pit == '@'
+        && PL_lex_state != LEX_NORMAL
+        && !PL_lex_brackets)
+    {
         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
                /* DO NOT warn for @- and @+ */
-               && !( PL_tokenbuf[2] == '\0' &&
-                   ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
+               && !( PL_tokenbuf[2] == '\0'
+                      && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
           )
         {
             /* Downgraded from fatal to warning 20000522 mjd */
@@ -8595,6 +8688,34 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
     return s;
 }
 
+/* Is the byte 'd' a legal single character identifier name?  'u' is true
+ * iff Unicode semantics are to be used.  The legal ones are any of:
+ *  a) all ASCII characters except:
+ *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
+ *          2) '{'
+ *     The final case currently doesn't get this far in the program, so we
+ *     don't test for it.  If that were to change, it would be ok to allow it.
+ *  c) When not under Unicode rules, any upper Latin1 character
+ *  d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ *      Because all ASCII characters have the same representation whether
+ *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ *      '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
+                         ? isIDFIRST_utf8((U8*) (s))                          \
+                         : (isGRAPH_L1(*s)                                    \
+                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
+                         ? isIDFIRST_utf8((U8*) (s))                          \
+                         : ! isASCII_utf8((U8*) (s))))
+#endif
+
 STATIC char *
 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 {
@@ -8608,7 +8729,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
-    if (isSPACE(*s))
+    if (isSPACE(*s) || !*s)
        s = skipspace(s);
     if (isDIGIT(*s)) {
        while (isDIGIT(*s)) {
@@ -8617,7 +8738,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
            *d++ = *s++;
        }
     }
-    else {
+    else {  /* See if it is a "normal" identifier */
         parse_ident(&s, &d, e, 1, is_utf8);
     }
     *d = '\0';
@@ -8629,12 +8750,15 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
            PL_lex_state = LEX_INTERPENDMAYBE;
        return s;
     }
-    if (*s == '$' && s[1] &&
-      (isIDFIRST_lazy_if(s+1,is_utf8)
-         || isDIGIT_A((U8)s[1])
-         || s[1] == '$'
-         || s[1] == '{'
-         || strnEQ(s+1,"::",2)) )
+
+    /* Here, it is not a run-of-the-mill identifier name */
+
+    if (*s == '$' && s[1]
+        && (isIDFIRST_lazy_if(s+1,is_utf8)
+            || isDIGIT_A((U8)s[1])
+            || s[1] == '$'
+            || s[1] == '{'
+            || strnEQ(s+1,"::",2)) )
     {
         /* Dereferencing a value in a scalar variable.
            The alternatives are different syntaxes for a scalar variable.
@@ -8650,36 +8774,6 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             s = skipspace(s);
         }
     }
-
-/* Is the byte 'd' a legal single character identifier name?  'u' is true
- * iff Unicode semantics are to be used.  The legal ones are any of:
- *  a) all ASCII characters except:
- *          1) space-type ones, like \t and SPACE;
-            2) NUL;
- *          3) '{'
- *     The final case currently doesn't get this far in the program, so we
- *     don't test for it.  If that were to change, it would be ok to allow it.
- *  c) When not under Unicode rules, any upper Latin1 character
- *  d) Otherwise, when unicode rules are used, all XIDS characters.
- *
- *      Because all ASCII characters have the same representation whether
- *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- *      '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
-    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
-                         ? isIDFIRST_utf8((U8*) (s))                          \
-                         : (isGRAPH_L1(*s)                                    \
-                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-#   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
-                                            && LIKELY(*(s) != '\0')           \
-                                            && (! is_utf8                     \
-                                                || isASCII_utf8((U8*) (s))    \
-                                                || isIDFIRST_utf8((U8*) (s))))
-#endif
     if ((s <= PL_bufend - (is_utf8)
                           ? UTF8SKIP(s)
                           : 1)
@@ -8694,15 +8788,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             : (! isGRAPH_L1( (U8) *s)
                || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
         {
-            /* Split messages for back compat */
-            if (isCNTRL_A( (U8) *s)) {
-                deprecate("literal control characters in variable names");
-            }
-            else {
-                deprecate("literal non-graphic characters in variable names");
-            }
+            deprecate("literal non-graphic characters in variable names");
         }
-        
+
         if (is_utf8) {
             const STRLEN skip = UTF8SKIP(s);
             STRLEN i;
@@ -8731,8 +8819,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             /* if it starts as a valid identifier, assume that it is one.
                (the later check for } being at the expected point will trap
                cases where this doesn't pan out.)  */
-        d += is_utf8 ? UTF8SKIP(d) : 1;
-        parse_ident(&s, &d, e, 1, is_utf8);
+            d += is_utf8 ? UTF8SKIP(d) : 1;
+            parse_ident(&s, &d, e, 1, is_utf8);
            *d = '\0';
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
@@ -8787,12 +8875,14 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                PL_expect = XREF;
            }
            if (PL_lex_state == LEX_NORMAL) {
-               if (ckWARN(WARN_AMBIGUOUS) &&
-                   (keyword(dest, d - dest, 0)
-                    || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
+               if (ckWARN(WARN_AMBIGUOUS)
+                    && (keyword(dest, d - dest, 0)
+                       || get_cvn_flags(dest, d - dest, is_utf8
+                           ? SVf_UTF8
+                           : 0)))
                {
                     SV *tmp = newSVpvn_flags( dest, d - dest,
-                                            SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
+                                        SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
                    if (funny == '#')
                        funny = '@';
                     orig_copline = CopLINE(PL_curcop);
@@ -9017,10 +9107,8 @@ S_scan_subst(pTHX_ char *start)
     first_line = CopLINE(PL_curcop);
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-       if (PL_lex_stuff) {
-           SvREFCNT_dec(PL_lex_stuff);
-           PL_lex_stuff = NULL;
-       }
+       SvREFCNT_dec_NN(PL_lex_stuff);
+       PL_lex_stuff = NULL;
        Perl_croak(aTHX_ "Substitution replacement not terminated");
     }
     PL_multi_start = first_start;      /* so whole substitution is taken together */
@@ -9099,10 +9187,8 @@ S_scan_trans(pTHX_ char *start)
 
     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
     if (!s) {
-       if (PL_lex_stuff) {
-           SvREFCNT_dec(PL_lex_stuff);
-           PL_lex_stuff = NULL;
-       }
+       SvREFCNT_dec_NN(PL_lex_stuff);
+       PL_lex_stuff = NULL;
        Perl_croak(aTHX_ "Transliteration replacement not terminated");
     }
 
@@ -9205,10 +9291,14 @@ S_scan_heredoc(pTHX_ char *s)
            term = '"';
        if (!isWORDCHAR_lazy_if(s,UTF))
            deprecate("bare << to mean <<\"\"");
-       for (; isWORDCHAR_lazy_if(s,UTF); s++) {
-           if (d < e)
-               *d++ = *s;
+       peek = s;
+       while (isWORDCHAR_lazy_if(peek,UTF)) {
+           peek += UTF ? UTF8SKIP(peek) : 1;
        }
+       len = (peek - s >= e - d) ? (e - d) : (peek - s);
+       Copy(s, d, len, char);
+       s += len;
+       d += len;
     }
     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
        Perl_croak(aTHX_ "Delimiter for here document is too long");
@@ -9279,8 +9369,13 @@ S_scan_heredoc(pTHX_ char *s)
               lexing scope.  In a file, we will have broken out of the
               loop in the previous iteration.  In an eval, the string buf-
               fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null. */
-           assert(shared);
+              evaluated to false.  So shared can never be null.  Or so you
+              might think.  Odd syntax errors like s;@{<<; can gobble up
+              the implicit semicolon at the end of a flie, causing the
+              file handle to be closed even when we are not in a string
+              eval.  So shared may be null in that case.  */
+           if (UNLIKELY(!shared))
+               goto interminable;
            /* A LEXSHARED struct with a null ls_prev pointer is the outer-
               most lexing scope.  In a file, shared->ls_linestr at that
               level is just one line, so there is no body to steal. */
@@ -9296,8 +9391,9 @@ S_scan_heredoc(pTHX_ char *s)
        linestr = shared->ls_linestr;
        bufend = SvEND(linestr);
        d = s;
-       while (s < bufend - len + 1 &&
-          memNE(s,PL_tokenbuf,len) ) {
+       while (s < bufend - len + 1
+               && memNE(s,PL_tokenbuf,len) )
+        {
            if (*s++ == '\n')
                ++PL_parser->herelines;
        }
@@ -9326,9 +9422,10 @@ S_scan_heredoc(pTHX_ char *s)
                                bufend - shared->re_eval_start);
            shared->re_eval_start -= s-d;
        }
-       if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
-            CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
-            cx->blk_eval.cur_text == linestr)
+       if (cxstack_ix >= 0
+            && CxTYPE(cx) == CXt_EVAL
+            && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
+            && cx->blk_eval.cur_text == linestr)
         {
            cx->blk_eval.cur_text = newSVsv(linestr);
            SvSCREAM_on(cx->blk_eval.cur_text);
@@ -9382,8 +9479,8 @@ S_scan_heredoc(pTHX_ char *s)
        PL_last_lop = PL_last_uni = NULL;
 #ifndef PERL_STRICT_CR
        if (PL_bufend - PL_linestart >= 2) {
-           if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
-               (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+           if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+                || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
            {
                PL_bufend[-2] = '\n';
                PL_bufend--;
@@ -9864,8 +9961,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                    COPLINE_INC_WITH_HERELINES;
                /* backslashes can escape the open or closing characters */
                if (*s == '\\' && s+1 < PL_bufend) {
-                   if (!keep_bracketed_quoted &&
-                       ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+                   if (!keep_bracketed_quoted
+                       && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
                     {
                        s++;
                     }
@@ -9895,8 +9992,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 
 #ifndef PERL_STRICT_CR
        if (to - SvPVX_const(sv) >= 2) {
-           if ((to[-2] == '\r' && to[-1] == '\n') ||
-               (to[-2] == '\n' && to[-1] == '\r'))
+           if (   (to[-2] == '\r' && to[-1] == '\n')
+                || (to[-2] == '\n' && to[-1] == '\r'))
            {
                to[-2] = '\n';
                to--;
@@ -10241,8 +10338,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                 hexfp_exp *= 10;
                                 hexfp_exp += *h - '0';
 #ifdef NV_MIN_EXP
-                                if (negexp &&
-                                    -hexfp_exp < NV_MIN_EXP - 1) {
+                                if (negexp
+                                    && -hexfp_exp < NV_MIN_EXP - 1)
+                                {
                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                                    "Hexadecimal float: exponent underflow");
 #endif
@@ -10250,8 +10348,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                 }
                                 else {
 #ifdef NV_MAX_EXP
-                                    if (!negexp &&
-                                        hexfp_exp > NV_MAX_EXP - 1) {
+                                    if (!negexp
+                                        && hexfp_exp > NV_MAX_EXP - 1)
+                                    {
                                         Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                                    "Hexadecimal float: exponent overflow");
                                         break;
@@ -10315,8 +10414,10 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
         }
 
        /* read next group of digits and _ and copy into d */
-       while (isDIGIT(*s) || *s == '_' ||
-               UNLIKELY(hexfp && isXDIGIT(*s))) {
+       while (isDIGIT(*s)
+               || *s == '_'
+               || UNLIKELY(hexfp && isXDIGIT(*s)))
+        {
            /* skip underscores, checking for misplaced ones
               if -w is on
            */
@@ -10356,9 +10457,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
            /* copy, ignoring underbars, until we run out of digits.
            */
-           for (; isDIGIT(*s) || *s == '_' ||
-                     UNLIKELY(hexfp && isXDIGIT(*s));
-                 s++) {
+           for (; isDIGIT(*s)
+                   || *s == '_'
+                   || UNLIKELY(hexfp && isXDIGIT(*s));
+                 s++)
+            {
                /* fixed length buffer check */
                if (d >= e)
                    Perl_croak(aTHX_ "%s", number_too_long);
@@ -10429,8 +10532,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                    *d++ = *s++;
                }
                else {
-                  if (((lastub && s == lastub + 1) ||
-                       (!isDIGIT(s[1]) && s[1] != '_')))
+                  if (((lastub && s == lastub + 1)
+                        || (!isDIGIT(s[1]) && s[1] != '_')))
                       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
                                      "Misplaced _ in number");
                   lastub = s++;
@@ -10462,7 +10565,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
               floatit = TRUE;
         }
        if (floatit) {
-            STORE_NUMERIC_LOCAL_SET_STANDARD();
+            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
            /* terminate the string */
            *d = '\0';
             if (UNLIKELY(hexfp)) {
@@ -10479,7 +10582,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             } else {
                 nv = Atof(PL_tokenbuf);
             }
-            RESTORE_NUMERIC_LOCAL();
+            RESTORE_LC_NUMERIC_UNDERLYING();
             sv = newSVnv(nv);
        }
 
@@ -10675,9 +10778,12 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
 
     if (!yychar || (yychar == ';' && !PL_rsfp))
        sv_catpvs(where_sv, "at EOF");
-    else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
-      PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
-      PL_oldbufptr != PL_bufptr) {
+    else if (   PL_oldoldbufptr
+             && PL_bufptr > PL_oldoldbufptr
+             && PL_bufptr - PL_oldoldbufptr < 200
+             && PL_oldoldbufptr != PL_oldbufptr
+             && PL_oldbufptr != PL_bufptr)
+    {
        /*
                Only for NetWare:
                The code below is removed for NetWare because it abends/crashes on NetWare
@@ -10692,8 +10798,10 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
        context = PL_oldoldbufptr;
        contlen = PL_bufptr - PL_oldoldbufptr;
     }
-    else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
-      PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
+    else if (  PL_oldbufptr
+            && PL_bufptr > PL_oldbufptr
+            && PL_bufptr - PL_oldbufptr < 200
+            && PL_oldbufptr != PL_bufptr) {
        /*
                Only for NetWare:
                The code below is removed for NetWare because it abends/crashes on NetWare
@@ -10711,8 +10819,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     else if (yychar > 255)
        sv_catpvs(where_sv, "next token ???");
     else if (yychar == YYEMPTY) {
-       if (PL_lex_state == LEX_NORMAL ||
-          (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+       if (    PL_lex_state == LEX_NORMAL
+            || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
            sv_catpvs(where_sv, "at end of line");
        else if (PL_lex_inpat)
            sv_catpvs(where_sv, "within pattern");
@@ -11163,7 +11271,7 @@ Parse a Perl arithmetic expression.  This may contain operators of precedence
 down to the bit shift operators.  The expression must be followed (and thus
 terminated) either by a comparison or lower-precedence operator or by
 something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
 otherwise it is mandatory.  It is up to the caller to ensure that the
 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
 the source of the code to be parsed and the lexical context for the
@@ -11195,7 +11303,7 @@ Parse a Perl term expression.  This may contain operators of precedence
 down to the assignment operators.  The expression must be followed (and thus
 terminated) either by a comma or lower-precedence operator or by
 something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
 otherwise it is mandatory.  It is up to the caller to ensure that the
 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
 the source of the code to be parsed and the lexical context for the
@@ -11227,7 +11335,7 @@ Parse a Perl list expression.  This may contain operators of precedence
 down to the comma operator.  The expression must be followed (and thus
 terminated) either by a low-precedence logic operator such as C<or> or by
 something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
 otherwise it is mandatory.  It is up to the caller to ensure that the
 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
 the source of the code to be parsed and the lexical context for the
@@ -11260,7 +11368,7 @@ expression grammar, including the lowest-precedence operators such
 as C<or>.  The expression must be followed (and thus terminated) by a
 token that an expression would normally be terminated by: end-of-file,
 closing bracketing punctuation, semicolon, or one of the keywords that
-signals a postfix expression-statement modifier.  If I<flags> includes
+signals a postfix expression-statement modifier.  If C<flags> includes
 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
 mandatory.  It is up to the caller to ensure that the dynamic parser
 state (L</PL_parser> et al) is correctly set to reflect the source of
@@ -11307,7 +11415,7 @@ the parser state, normally resulting in a single exception at the top
 level of parsing which covers all the compilation errors that occurred.
 Some compilation errors, however, will throw an exception immediately.
 
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
 be zero.
 
 =cut
@@ -11345,7 +11453,7 @@ the parser state, normally resulting in a single exception at the top
 level of parsing which covers all the compilation errors that occurred.
 Some compilation errors, however, will throw an exception immediately.
 
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
 be zero.
 
 =cut
@@ -11365,7 +11473,7 @@ Perl_parse_barestmt(pTHX_ U32 flags)
 Parse a single label, possibly optional, of the type that may prefix a
 Perl statement.  It is up to the caller to ensure that the dynamic parser
 state (L</PL_parser> et al) is correctly set to reflect the source of
-the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
+the code to be parsed.  If C<flags> includes C<PARSE_OPTIONAL> then the
 label is optional, otherwise it is mandatory.
 
 The name of the label is returned in the form of a fresh scalar.  If an
@@ -11449,7 +11557,7 @@ the parser state, normally resulting in a single exception at the top
 level of parsing which covers all the compilation errors that occurred.
 Some compilation errors, however, will throw an exception immediately.
 
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
 be zero.
 
 =cut
@@ -11487,7 +11595,7 @@ normally resulting in a single exception at the top level of parsing
 which covers all the compilation errors that occurred.  Some compilation
 errors, however, will throw an exception immediately.
 
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
 be zero.
 
 =cut
@@ -11579,8 +11687,9 @@ Perl_parse_subsignature(pTHX)
                                    "lacks default expression"));
                    } else {
                        OP *defexpr = parse_termexpr(0);
-                       if (defexpr->op_type == OP_UNDEF &&
-                               !(defexpr->op_flags & OPf_KIDS)) {
+                       if (defexpr->op_type == OP_UNDEF
+                            && !(defexpr->op_flags & OPf_KIDS))
+                        {
                            op_free(defexpr);
                        } else {
                            OP *ifop = 
@@ -11727,11 +11836,5 @@ Perl_parse_subsignature(pTHX)
 }
 
 /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
  * ex: set ts=8 sts=4 sw=4 et:
  */