Allow my \$a

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 33ae20f..efb8348 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -38,7 +38,7 @@ Individual members of C<PL_parser> have their own documentation.
  #include "EXTERN.h"
  #define PERL_IN_TOKE_C
  #include "perl.h"
-#include "dquote_static.c"
+#include "dquote_inline.h"
  
  #define new_constant(a,b,c,d,e,f,g)    \
         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
@@ -52,7 +52,6 @@ Individual members of C<PL_parser> have their own documentation.
  #define PL_lex_brackstack      (PL_parser->lex_brackstack)
  #define PL_lex_casemods                (PL_parser->lex_casemods)
  #define PL_lex_casestack        (PL_parser->lex_casestack)
-#define PL_lex_defer           (PL_parser->lex_defer)
  #define PL_lex_dojoin          (PL_parser->lex_dojoin)
  #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  #define PL_lex_inpat           (PL_parser->lex_inpat)
@@ -142,7 +141,6 @@ static const char* const ident_too_long = "Identifier too long";
                                         string or after \E, $foo, etc       */
  #define LEX_INTERPCONST                 2 /* NOT USED */
  #define LEX_FORMLINE            1 /* expecting a format line               */
-#define LEX_KNOWNEXT            0 /* next token known; just return it      */
  
  
  #ifdef DEBUGGING
@@ -652,15 +650,15 @@ is made on the save stack so that upon unwinding the new state object
  will be destroyed and the former value of L</PL_parser> will be restored.
  Nothing else need be done to clean up the parsing context.
  
-The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
+The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
  non-null, provides a string (in SV form) containing code to be parsed.
-A copy of the string is made, so subsequent modification of I<line>
-does not affect parsing.  I<rsfp>, if non-null, provides an input stream
+A copy of the string is made, so subsequent modification of C<line>
+does not affect parsing.  C<rsfp>, if non-null, provides an input stream
  from which code will be read to be parsed.  If both are non-null, the
-code in I<line> comes first and must consist of complete lines of input,
-and I<rsfp> supplies the remainder of the source.
+code in C<line> comes first and must consist of complete lines of input,
+and C<rsfp> supplies the remainder of the source.
  
-The I<flags> parameter is reserved for future use.  Currently it is only
+The C<flags> parameter is reserved for future use.  Currently it is only
  used by perl internally, so extensions should always pass zero.
  
  =cut
@@ -725,7 +723,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
         parser->linestr = flags & LEX_START_COPIED
                             ? SvREFCNT_inc_simple_NN(line)
                             : newSVpvn_flags(s, len, SvUTF8(line));
-       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
+       if (!rsfp)
+           sv_catpvs(parser->linestr, "\n;");
      } else {
         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
      }
@@ -757,8 +756,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
  
      if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
         PerlIO_clearerr(parser->rsfp);
-    else if (parser->rsfp && (!parser->old_parser ||
-               (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
+    else if (parser->rsfp && (!parser->old_parser
+          || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
         PerlIO_close(parser->rsfp);
      SvREFCNT_dec(parser->rsfp_filters);
      SvREFCNT_dec(parser->lex_stuff);
@@ -887,7 +886,7 @@ Perl_lex_bufutf8(pTHX)
  =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
  
  Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
-at least I<len> octets (including terminating C<NUL>).  Returns a
+at least C<len> octets (including terminating C<NUL>).  Returns a
  pointer to the reallocated buffer.  This is necessary before making
  any direct modification of the buffer that would increase its length.
  L</lex_stuff_pvn> provides a more convenient way to insert text into
@@ -948,9 +947,9 @@ It is not recommended to do this as part of normal parsing, and most
  uses of this facility run the risk of the inserted characters being
  interpreted in an unintended manner.
  
-The string to be inserted is represented by I<len> octets starting
-at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
-according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
+The string to be inserted is represented by C<len> octets starting
+at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
+according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
  The characters are recoded for the lexer buffer, according to how the
  buffer is currently being interpreted (L</lex_bufutf8>).  If a string
  to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
@@ -1033,7 +1032,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 }
                 else {
                      assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+                   *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
                     p += 2;
                  }
             }
@@ -1060,10 +1059,10 @@ It is not recommended to do this as part of normal parsing, and most
  uses of this facility run the risk of the inserted characters being
  interpreted in an unintended manner.
  
-The string to be inserted is represented by octets starting at I<pv>
+The string to be inserted is represented by octets starting at C<pv>
  and continuing to the first nul.  These octets are interpreted as either
  UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
-in I<flags>.  The characters are recoded for the lexer buffer, according
+in C<flags>.  The characters are recoded for the lexer buffer, according
  to how the buffer is currently being interpreted (L</lex_bufutf8>).
  If it is not convenient to nul-terminate a string to be inserted, the
  L</lex_stuff_pvn> function is more appropriate.
@@ -1089,7 +1088,7 @@ It is not recommended to do this as part of normal parsing, and most
  uses of this facility run the risk of the inserted characters being
  interpreted in an unintended manner.
  
-The string to be inserted is the string value of I<sv>.  The characters
+The string to be inserted is the string value of C<sv>.  The characters
  are recoded for the lexer buffer, according to how the buffer is currently
  being interpreted (L</lex_bufutf8>).  If a string to be inserted is
  not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
@@ -1114,7 +1113,7 @@ Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
  =for apidoc Amx|void|lex_unstuff|char *ptr
  
  Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
-I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
+C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
  This hides the discarded text from any lexing code that runs later,
  as if the text had never appeared.
  
@@ -1148,7 +1147,7 @@ Perl_lex_unstuff(pTHX_ char *ptr)
  =for apidoc Amx|void|lex_read_to|char *ptr
  
  Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
-to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
+to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
  performing the correct bookkeeping whenever a newline character is passed.
  This is the normal way to consume lexed text.
  
@@ -1179,8 +1178,8 @@ Perl_lex_read_to(pTHX_ char *ptr)
  =for apidoc Amx|void|lex_discard_to|char *ptr
  
  Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
-up to I<ptr>.  The remaining content of the buffer will be moved, and
-all pointers into the buffer updated appropriately.  I<ptr> must not
+up to C<ptr>.  The remaining content of the buffer will be moved, and
+all pointers into the buffer updated appropriately.  C<ptr> must not
  be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
  it is not permitted to discard text that has yet to be lexed.
  
@@ -1241,7 +1240,7 @@ the current chunk at this time.
  If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
  chunk (i.e., the current chunk has been entirely consumed), normally the
  current chunk will be discarded at the same time that the new chunk is
-read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
+read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
  will not be discarded.  If the current chunk has not been entirely
  consumed, then it will not be discarded regardless of the flag.
  
@@ -1270,8 +1269,9 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
         return FALSE;
      linestr = PL_parser->linestr;
      buf = SvPVX(linestr);
-    if (!(flags & LEX_KEEP_PREVIOUS) &&
-           PL_parser->bufptr == PL_parser->bufend) {
+    if (!(flags & LEX_KEEP_PREVIOUS)
+          && PL_parser->bufptr == PL_parser->bufend)
+    {
         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
         linestart_pos = 0;
         if (PL_parser->last_uni != PL_parser->bufend)
@@ -1338,8 +1338,10 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
         PL_parser->preambling = NOLINE;
      }
-    if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
-           PL_curstash != PL_debstash) {
+    if (   got_some_for_debugger
+        && PERLDB_LINE_OR_SAVESRC
+        && PL_curstash != PL_debstash)
+    {
         /* debugger active and we're not compiling the debugger code,
          * so store the line into the debugger's array of lines
          */
@@ -1359,8 +1361,8 @@ peeked character, use L</lex_read_unichar>.
  
  If the next character is in (or extends into) the next chunk of input
  text, the next chunk will be read in.  Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
-then the current chunk will not be discarded.
+discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
+bit set, then the current chunk will not be discarded.
  
  If the input is being interpreted as UTF-8 and a UTF-8 encoding error
  is encountered, an exception is generated.
@@ -1430,8 +1432,8 @@ examine the next character, use L</lex_peek_unichar> instead.
  
  If the next character is in (or extends into) the next chunk of input
  text, the next chunk will be read in.  Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
-then the current chunk will not be discarded.
+discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
+bit set, then the current chunk will not be discarded.
  
  If the input is being interpreted as UTF-8 and a UTF-8 encoding error
  is encountered, an exception is generated.
@@ -1468,7 +1470,7 @@ at a non-space character (or the end of the input text).
  
  If spaces extend into the next chunk of input text, the next chunk will
  be read in.  Normally the current chunk will be discarded at the same
-time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
+time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
  chunk will not be discarded.
  
  =cut
@@ -1587,9 +1589,10 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
                     in_brackets = TRUE;
                 else if (*p == ']')
                     in_brackets = FALSE;
-               else if ((*p == '@' || *p == '%') &&
-                   !after_slash &&
-                   !in_brackets ) {
+               else if ((*p == '@' || *p == '%')
+                         && !after_slash
+                         && !in_brackets )
+                {
                     must_be_last = TRUE;
                     greedy_proto = *p;
                 }
@@ -1649,6 +1652,7 @@ S_incline(pTHX_ const char *s)
      const char *n;
      const char *e;
      line_t line_num;
+    UV uv;
  
      PERL_ARGS_ASSERT_INCLINE;
  
@@ -1689,7 +1693,7 @@ S_incline(pTHX_ const char *s)
      }
      else {
         t = s;
-       while (!isSPACE(*t))
+       while (*t && !isSPACE(*t))
             t++;
         e = t;
      }
@@ -1698,7 +1702,9 @@ S_incline(pTHX_ const char *s)
      if (*e != '\n' && *e != '\0')
         return;         /* false alarm */
  
-    line_num = grok_atou(n, &e) - 1;
+    if (!grok_atoUV(n, &uv, &e))
+        return;
+    line_num = ((line_t)uv) - 1;
  
      if (t - s > 0) {
         const STRLEN len = t - s;
@@ -1838,13 +1844,13 @@ S_check_uni(pTHX)
         PL_last_uni++;
      s = PL_last_uni;
      while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
-       s++;
+       s += UTF ? UTF8SKIP(s) : 1;
      if ((t = strchr(s, '(')) && t < PL_bufptr)
         return;
  
      Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
-                    (int)(s - PL_last_uni), PL_last_uni);
+                    "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+                    UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
  }
  
  /*
@@ -1912,10 +1918,6 @@ S_force_next(pTHX_ I32 type)
      assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
      PL_nexttype[PL_nexttoke] = type;
      PL_nexttoke++;
-    if (PL_lex_state != LEX_KNOWNEXT) {
-       PL_lex_defer = PL_lex_state;
-       PL_lex_state = LEX_KNOWNEXT;
-    }
  }
  
  /*
@@ -1931,13 +1933,13 @@ static int
  S_postderef(pTHX_ int const funny, char const next)
  {
      assert(funny == DOLSHARP || strchr("$@%&*", funny));
-    assert(strchr("*[{", next));
      if (next == '*') {
         PL_expect = XOPERATOR;
         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
             assert('@' == funny || '$' == funny || DOLSHARP == funny);
             PL_lex_state = LEX_INTERPEND;
-           force_next(POSTJOIN);
+           if ('@' == funny)
+               force_next(POSTJOIN);
         }
         force_next(next);
         PL_bufptr+=2;
@@ -2010,8 +2012,8 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
  
      start = skipspace(start);
      s = start;
-    if (isIDFIRST_lazy_if(s,UTF) ||
-       (allow_pack && *s == ':') )
+    if (isIDFIRST_lazy_if(s,UTF)
+        || (allow_pack && *s == ':' && s[1] == ':') )
      {
         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
         if (check_keyword) {
@@ -2179,8 +2181,8 @@ S_force_strict_version(pTHX_ char *s)
         s = (char *)scan_version(s, ver, 0);
         version = newSVOP(OP_CONST, 0, ver);
      }
-    else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
-           (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
+    else if ((*s != ';' && *s != '{' && *s != '}' )
+             && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
      {
         PL_bufptr = s;
         if (errstr)
@@ -2381,6 +2383,7 @@ S_sublex_push(pTHX)
         popping.  We must not have a PL_lex_stuff value left dangling, as
         that breaks assumptions elsewhere.  See bug #123617.  */
      SAVEGENERICSV(PL_lex_stuff);
+    SAVEGENERICSV(PL_sublex_info.repl);
  
      PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
         = SvPVX(PL_linestr);
@@ -2509,8 +2512,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
  
      PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
  
-    if (!SvCUR(res))
+    if (!SvCUR(res)) {
+        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+                       "Unknown charname '' is deprecated");
          return res;
+    }
  
      if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
                                       e - backslash_ptr,
@@ -2525,9 +2531,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* We deliberately don't try to print the malformed character, which
           * might not print very well; it also may be just the first of many
           * malformations, so don't print what comes after it */
-        yyerror(Perl_form(aTHX_
+        yyerror_pv(Perl_form(aTHX_
              "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
+                   SVf_UTF8);
         return NULL;
      }
  
@@ -2575,11 +2582,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
             if (*s == ' ' && *(s-1) == ' ') {
                  goto multi_spaces;
              }
-           if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
-                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                           "NO-BREAK SPACE in a charnames "
-                           "alias definition is deprecated");
-            }
              s++;
          }
      }
@@ -2593,7 +2595,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              }
              s++;
          } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
+            if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
                  goto bad_charname;
              }
              s += 2;
@@ -2623,18 +2625,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s++;
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
+                if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
                  {
                      goto bad_charname;
                  }
-                if (*s == *NBSP_UTF8
-                    && *(s+1) == *(NBSP_UTF8+1)
-                    && ckWARN_d(WARN_DEPRECATED))
-                {
-                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                "NO-BREAK SPACE in a charnames "
-                                "alias definition is deprecated");
-                }
                  s += 2;
              }
              else {
@@ -2749,7 +2743,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          \l \L \u \U \Q \E
         (?{  or  (??{
  
-
    In transliterations:
      characters are VERY literal, except for - not at the start or end
      of the string, which indicates a range. If the range is in bytes,
@@ -2824,6 +2817,8 @@ S_scan_const(pTHX_ char *start)
                                             example when it is entirely composed
                                             of hex constants */
      SV *res;                           /* result from charnames */
+    STRLEN offset_to_max;   /* The offset in the output to where the range
+                               high-end character is temporarily placed */
  
      /* Note on sizing:  The scanned constant is placed into sv, which is
       * initialized by newSV() assuming one byte of output for every byte of
@@ -2840,8 +2835,9 @@ S_scan_const(pTHX_ char *start)
      UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
                         before set */
  #ifdef EBCDIC
-    UV literal_endpoint = 0;
-    bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
+    int backslash_N = 0;            /* ? was the character from \N{} */
+    int non_portable_endpoint = 0;  /* ? In a range is an endpoint
+                                       platform-specific like \x65 */
  #endif
  
      PERL_ARGS_ASSERT_SCAN_CONST;
@@ -2857,151 +2853,304 @@ S_scan_const(pTHX_ char *start)
      ENTER_with_name("scan_const");
      SAVEFREESV(sv);
  
-    while (s < send || dorange) {
+    while (s < send
+           || dorange   /* Handle tr/// range at right edge of input */
+    ) {
  
          /* get transliterations out of the way (they're most literal) */
         if (PL_lex_inwhat == OP_TRANS) {
-           /* expand a range A-Z to the full set of characters.  AIE! */
-           if (dorange) {
-               I32 i;                          /* current expanded character */
-               I32 min;                        /* first character in range */
-               I32 max;                        /* last character in range */
  
+            /* But there isn't any special handling necessary unless there is a
+             * range, so for most cases we just drop down and handle the value
+             * as any other.  There are two exceptions.
+             *
+             * 1.  A minus sign indicates that we are actually going to have
+             *     a range.  In this case, skip the '-', set a flag, then drop
+             *     down to handle what should be the end range value.
+             * 2.  After we've handled that value, the next time through, that
+             *     flag is set and we fix up the range.
+             *
+             * Ranges entirely within Latin1 are expanded out entirely, in
+             * order to avoid the significant overhead of making a swash.
+             * Ranges that extend above Latin1 have to have a swash, so there
+             * is no advantage to abbreviate them here, so they are stored here
+             * as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies a
+             * hyphen without any possible ambiguity.  On EBCDIC machines, if
+             * the range is expressed as Unicode, the Latin1 portion is
+             * expanded out even if the entire range extends above Latin1.
+             * This is because each code point in it has to be processed here
+             * individually to get its native translation */
+
+           if (! dorange) {
+
+                /* Here, we don't think we're in a range.  If we've processed
+                 * at least one character, then see if this next one is a '-',
+                 * indicating the previous one was the start of a range.  But
+                 * don't bother if we're too close to the end for the minus to
+                 * mean that. */
+                if (*s != '-' || s >= send - 1 || s == start) {
+
+                    /* A regular character.  Process like any other, but first
+                     * clear any flags */
+                    didrange = FALSE;
+                    dorange = FALSE;
  #ifdef EBCDIC
-               UV uvmax = 0;
+                    non_portable_endpoint = 0;
+                    backslash_N = 0;
  #endif
+                    /* Drops down to generic code to process current byte */
+                }
+                else {
+                    if (didrange) { /* Something like y/A-C-Z// */
+                        Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+                    }
  
-               if (has_utf8
-#ifdef EBCDIC
-                   && !native_range
-#endif
-                ) {
-                   char * const c = (char*)utf8_hop((U8*)d, -1);
-                   char *e = d++;
-                   while (e-- > c)
-                       *(e + 1) = *e;
-                   *c = (char) ILLEGAL_UTF8_BYTE;
-                   /* mark the range as done, and continue */
-                   dorange = FALSE;
-                   didrange = TRUE;
-                   continue;
-               }
+                    dorange = TRUE;
  
-               i = d - SvPVX_const(sv);                /* remember current offset */
-#ifdef EBCDIC
-                SvGROW(sv,
-                      SvLEN(sv) + ((has_utf8)
-                                    ?  (512 - UTF_CONTINUATION_MARK
-                                        + UNISKIP(0x100))
-                                   : 256));
-                /* How many two-byte within 0..255: 128 in UTF-8,
-                * 96 in UTF-8-mod. */
+                    s++;    /* Skip past the minus */
+
+                    /* d now points to where the end-range character will be
+                     * placed.  Save it so won't have to go finding it later,
+                     * and drop down to get that character.  (Actually we
+                     * instead save the offset, to handle the case where a
+                     * realloc in the meantime could change the actual
+                     * pointer).  We'll finish processing the range the next
+                     * time through the loop */
+                    offset_to_max = d - SvPVX_const(sv);
+                }
+            }  /* End of not a range */
+            else {
+                /* Here we have parsed a range.  Now must handle it.  At this
+                 * point:
+                 * 'sv' is a SV* that contains the output string we are
+                 *      constructing.  The final two characters in that string
+                 *      are the range start and range end, in order.
+                 * 'd'  points to just beyond the range end in the 'sv' string,
+                 *      where we would next place something
+                 * 'offset_to_max' is the offset in 'sv' at which the character
+                 *      before 'd' begins.
+                 */
+                const char * max_ptr = SvPVX_const(sv) + offset_to_max;
+                const char * min_ptr;
+                IV range_min;
+               IV range_max;   /* last character in range */
+                STRLEN save_offset;
+                STRLEN grow;
+#ifndef EBCDIC  /* Not meaningful except in EBCDIC, so initialize to false */
+                const bool convert_unicode = FALSE;
+                const IV real_range_max = 0;
  #else
-               SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
+                bool convert_unicode;
+                IV real_range_max = 0;
  #endif
-               d = SvPVX(sv) + i;              /* refresh d after realloc */
-#ifdef EBCDIC
+
+                /* Get the range-ends code point values. */
                  if (has_utf8) {
-                    int j;
-                    for (j = 0; j <= 1; j++) {
-                        char * const c = (char*)utf8_hop((U8*)d, -1);
-                        const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
-                        if (j)
-                            min = (U8)uv;
-                        else if (uv < 256)
-                            max = (U8)uv;
-                        else {
-                            max = (U8)0xff; /* only to \xff */
-                            uvmax = uv; /* \x{100} to uvmax */
-                        }
-                        d = c; /* eat endpoint chars */
-                     }
+                    /* We know the utf8 is valid, because we just constructed
+                     * it ourselves in previous loop iterations */
+                    min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
+                    range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
+                    range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
                  }
-               else {
-#endif
-                  d -= 2;              /* eat the first char and the - */
-                  min = (U8)*d;        /* first char in range */
-                  max = (U8)d[1];      /* last char in range  */
+                else {
+                    min_ptr = max_ptr - 1;
+                    range_min = * (U8*) min_ptr;
+                    range_max = * (U8*) max_ptr;
+                }
+
  #ifdef EBCDIC
-              }
+                /* On EBCDIC platforms, we may have to deal with portable
+                 * ranges.  These happen if at least one range endpoint is a
+                 * Unicode value (\N{...}), or if the range is a subset of
+                 * [A-Z] or [a-z], and both ends are literal characters,
+                 * like 'A', and not like \x{C1} */
+                if ((convert_unicode
+                     = cBOOL(backslash_N)   /* \N{} forces Unicode, hence
+                                               portable range */
+                      || (   ! non_portable_endpoint
+                          && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
+                             || (isUPPER_A(range_min) && isUPPER_A(range_max))))
+                )) {
+
+                    /* Special handling is needed for these portable ranges.
+                     * They are defined to all be in Unicode terms, which
+                     * include all Unicode code points between the end points.
+                     * Convert to Unicode to get the Unicode range.  Later we
+                     * will convert each code point in the range back to
+                     * native.  */
+                    range_min = NATIVE_TO_UNI(range_min);
+                    range_max = NATIVE_TO_UNI(range_max);
+                }
  #endif
  
-                if (min > max) {
-                   Perl_croak(aTHX_
-                              "Invalid range \"%c-%c\" in transliteration operator",
-                              (char)min, (char)max);
+                if (range_min > range_max) {
+                    if (convert_unicode) {
+                        /* Need to convert back to native for meaningful
+                         * messages for this platform */
+                        range_min = UNI_TO_NATIVE(range_min);
+                        range_max = UNI_TO_NATIVE(range_max);
+                    }
+
+                    /* Use the characters themselves for the error message if
+                     * ASCII printables; otherwise some visible representation
+                     * of them */
+                    if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
+                        Perl_croak(aTHX_
+                        "Invalid range \"%c-%c\" in transliteration operator",
+                        (char)range_min, (char)range_max);
+                    }
+                    else if (convert_unicode) {
+                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+                        Perl_croak(aTHX_
+                              "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
+                               " in transliteration operator",
+                              range_min, range_max);
+                    }
+                    else {
+                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
+                        Perl_croak(aTHX_
+                              "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
+                               " in transliteration operator",
+                              range_min, range_max);
+                    }
                  }
  
+               if (has_utf8) {
+
+                    /* We try to avoid creating a swash.  If the upper end of
+                     * this range is below 256, this range won't force a swash;
+                     * otherwise it does force a swash, and as long as we have
+                     * to have one, we might as well not expand things out.
+                     * But if it's EBCDIC, we may have to look at each
+                     * character below 256 if we have to convert to/from
+                     * Unicode values */
+                    if (range_max > 255
  #ifdef EBCDIC
-                /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
-                 * any subsets of these ranges into individual characters */
-               if (literal_endpoint == 2 &&
-                   ((isLOWER_A(min) && isLOWER_A(max)) ||
-                    (isUPPER_A(min) && isUPPER_A(max))))
-                {
-                    for (i = min; i <= max; i++) {
-                        if (isALPHA_A(i))
-                            *d++ = i;
-                   }
-               }
-               else
+                       && (range_min > 255 || ! convert_unicode)
  #endif
-                   for (i = min; i <= max; i++)
-#ifdef EBCDIC
-                        if (has_utf8) {
-                            append_utf8_from_native_byte(i, &d);
+                    ) {
+                        /* Move the high character one byte to the right; then
+                         * insert between it and the range begin, an illegal
+                         * byte which serves to indicate this is a range (using
+                         * a '-' could be ambiguous). */
+                        char *e = d++;
+                        while (e-- > max_ptr) {
+                            *(e + 1) = *e;
                          }
-                        else
-#endif
-                            *d++ = (char)i;
- 
+                        *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
+                        goto range_done;
+                    }
+
+                    /* Here, we're going to expand out the range.  For EBCDIC
+                     * the range can extend above 255 (not so in ASCII), so
+                     * for EBCDIC, split it into the parts above and below
+                     * 255/256 */
  #ifdef EBCDIC
-                if (uvmax) {
-                    d = (char*)uvchr_to_utf8((U8*)d, 0x100);
-                    if (uvmax > 0x101)
-                        *d++ = (char) ILLEGAL_UTF8_BYTE;
-                    if (uvmax > 0x100)
-                        d = (char*)uvchr_to_utf8((U8*)d, uvmax);
-                }
+                    if (range_max > 255) {
+                        real_range_max = range_max;
+                        range_max = 255;
+                    }
  #endif
+               }
  
-               /* mark the range as done, and continue */
-               dorange = FALSE;
-               didrange = TRUE;
+                /* Here we need to expand out the string to contain each
+                 * character in the range.  Grow the output to handle this */
+
+                save_offset  = min_ptr - SvPVX_const(sv);
+
+                /* The base growth is the number of code points in the range */
+                grow = range_max - range_min + 1;
+                if (has_utf8) {
+
+                    /* But if the output is UTF-8, some of those characters may
+                     * need two bytes (since the maximum range value here is
+                     * 255, the max bytes per character is two).  On ASCII
+                     * platforms, it's not much trouble to get an accurate
+                     * count of what's needed.  But on EBCDIC, the ones that
+                     * need 2 bytes are scattered around, so just use a worst
+                     * case value instead of calculating for that platform.  */
  #ifdef EBCDIC
-               literal_endpoint = 0;
+                    grow *= 2;
+#else
+                    /* Only those above 127 require 2 bytes.  This may be
+                     * everything in the range, or not */
+                    if (range_min > 127) {
+                        grow *= 2;
+                    }
+                    else if (range_max > 127) {
+                        grow += range_max - 127;
+                    }
  #endif
-               continue;
-           }
+                }
+
+                /* Subtract 3 for the bytes that were already accounted for
+                 * (min, max, and the hyphen) */
+                SvGROW(sv, SvLEN(sv) + grow - 3);
+               d = SvPVX(sv) + save_offset;    /* refresh d after realloc */
  
-           /* range begins (ignore - as first or last char) */
-           else if (*s == '-' && s+1 < send  && s != start) {
-               if (didrange) {
-                   Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+                /* Here, we expand out the range.  On ASCII platforms, the
+                 * compiler should optimize out the 'convert_unicode==TRUE'
+                 * portion of this */
+                if (convert_unicode) {
+                    IV i;
+
+                    /* Recall that the min and max are now in Unicode terms, so
+                     * we have to convert each character to its native
+                     * equivalent */
+                    if (has_utf8) {
+                        for (i = range_min; i <= range_max; i++) {
+                            append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
+                                                         (U8 **) &d);
+                        }
+                    }
+                    else {
+                        for (i = range_min; i <= range_max; i++) {
+                            *d++ = (char)LATIN1_TO_NATIVE((U8) i);
+                        }
+                   }
                 }
-               if (has_utf8
-#ifdef EBCDIC
-                   && !native_range
-#endif
-                   ) {
-                   *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
-                   s++;
-                   continue;
+                else {
+                    IV i;
+
+                    /* Here, no conversions are necessary, which means that the
+                     * first character in the range is already in 'd' and
+                     * valid, so we can skip overwriting it */
+                    if (has_utf8) {
+                        d += UTF8SKIP(d);
+                        for (i = range_min + 1; i <= range_max; i++) {
+                            append_utf8_from_native_byte((U8) i, (U8 **) &d);
+                        }
+                    }
+                    else {
+                        d++;
+                        for (i = range_min + 1; i <= range_max; i++) {
+                            *d++ = (char)i;
+                        }
+                   }
                 }
-               dorange = TRUE;
-               s++;
-           }
-           else {
-               didrange = FALSE;
-#ifdef EBCDIC
-               literal_endpoint = 0;
-               native_range = TRUE;
-#endif
-           }
-       }
  
-       /* if we get here, we're not doing a transliteration */
+                /* (Compilers should optimize this out for non-EBCDIC).  If the
+                 * original range extended above 255, add in that portion */
+                if (real_range_max) {
+                    *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
+                    *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
+                    if (real_range_max > 0x101)
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
+                    if (real_range_max > 0x100)
+                        d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+                }
  
+              range_done:
+               /* mark the range as done, and continue */
+               didrange = TRUE;
+               dorange = FALSE;
+#ifdef EBCDIC
+               non_portable_endpoint = 0;
+                backslash_N = 0;
+#endif
+               continue;
+           } /* End of is a range */
+        } /* End of transliteration.  Joins main code after these else's */
         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
             char *s1 = s-1;
             int esc = 0;
@@ -3029,17 +3178,20 @@ S_scan_const(pTHX_ char *start)
                 while (s+1 < send && *s != ')')
                     *d++ = *s++;
             }
-           else if (!PL_lex_casemods &&
-                    (    s[2] == '{' /* This should match regcomp.c */
-                     || (s[2] == '?' && s[3] == '{')))
+           else if (!PL_lex_casemods
+                     && (    s[2] == '{' /* This should match regcomp.c */
+                        || (s[2] == '?' && s[3] == '{')))
             {
                 break;
             }
         }
  
         /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
-         ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
+       else if (*s == '#'
+                 && PL_lex_inpat
+                 && !in_charclass
+                 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
+        {
             while (s+1 < send && *s != '\n')
                 *d++ = *s++;
         }
@@ -3052,7 +3204,7 @@ S_scan_const(pTHX_ char *start)
            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
            */
         else if (*s == '@' && s[1]) {
-           if (isWORDCHAR_lazy_if(s+1,UTF))
+           if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
                 break;
             if (strchr(":'{$", s[1]))
                 break;
@@ -3085,8 +3237,11 @@ S_scan_const(pTHX_ char *start)
  
             /* warn on \1 - \9 in substitution replacements, but note that \11
              * is an octal; and \19 is \1 followed by '9' */
-           if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
-               isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
+           if (PL_lex_inwhat == OP_SUBST
+                && !PL_lex_inpat
+                && isDIGIT(*s)
+                && *s != '0'
+                && !isDIGIT(s[1]))
             {
                 /* diag_listed_as: \%d better written as $%d */
                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
@@ -3123,14 +3278,6 @@ S_scan_const(pTHX_ char *start)
             }
  
             switch (*s) {
-
-           /* quoted - in transliterations */
-           case '-':
-               if (PL_lex_inwhat == OP_TRANS) {
-                   *d++ = *s++;
-                   continue;
-               }
-               /* FALLTHROUGH */
             default:
                 {
                     if ((isALPHANUMERIC(*s)))
@@ -3194,13 +3341,13 @@ S_scan_const(pTHX_ char *start)
                 }
  
               NUM_ESCAPE_INSERT:
-               /* Insert oct or hex escaped character.  There will always be
-                * enough room in sv since such escapes will be longer than any
-                * UTF-8 sequence they can end up as, except if they force us
-                * to recode the rest of the string into utf8 */
+               /* Insert oct or hex escaped character. */
                 
                 /* Here uv is the ordinal of the next character being added */
-               if (!UVCHR_IS_INVARIANT(uv)) {
+               if (UVCHR_IS_INVARIANT(uv)) {
+                   *d++ = (char) uv;
+               }
+               else {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
                          * far if it contains any chars variant in utf8 or
@@ -3211,44 +3358,56 @@ S_scan_const(pTHX_ char *start)
                         *d = '\0';
                         /* See Note on sizing above.  */
                         sv_utf8_upgrade_flags_grow(
-                                         sv,
-                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+                                       sv,
+                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
                                                    /* Above-latin1 in string
                                                     * implies no encoding */
                                                    |SV_UTF8_NO_ENCODING,
-                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
+                                       UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
                         d = SvPVX(sv) + SvCUR(sv);
                         has_utf8 = TRUE;
                      }
  
                      if (has_utf8) {
+                       /* Usually, there will already be enough room in 'sv'
+                        * since such escapes are likely longer than any UTF-8
+                        * sequence they can end up as.  This isn't the case on
+                        * EBCDIC where \x{40000000} contains 12 bytes, and the
+                        * UTF-8 for it contains 14.  And, we have to allow for
+                        * a trailing NUL.  It probably can't happen on ASCII
+                        * platforms, but be safe */
+                        const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+                                            + 1;
+                        if (UNLIKELY(needed > SvLEN(sv))) {
+                            SvCUR_set(sv, d - SvPVX_const(sv));
+                            d = sv_grow(sv, needed) + SvCUR(sv);
+                        }
+
                         d = (char*)uvchr_to_utf8((U8*)d, uv);
-                       if (PL_lex_inwhat == OP_TRANS &&
-                           PL_sublex_info.sub_op) {
+                       if (PL_lex_inwhat == OP_TRANS
+                            && PL_sublex_info.sub_op)
+                        {
                             PL_sublex_info.sub_op->op_private |=
                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
                                              : OPpTRANS_TO_UTF);
                         }
-#ifdef EBCDIC
-                       if (uv > 255 && !dorange)
-                           native_range = FALSE;
-#endif
                      }
                     else {
                         *d++ = (char)uv;
                     }
                 }
-               else {
-                   *d++ = (char) uv;
-               }
+#ifdef EBCDIC
+                non_portable_endpoint++;
+#endif
                 continue;
  
             case 'N':
                  /* In a non-pattern \N must be like \N{U+0041}, or it can be a
                   * named character, like \N{LATIN SMALL LETTER A}, or a named
                   * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
-                 * GRAVE}.  For convenience all three forms are referred to as
-                 * "named characters" below.
+                 * GRAVE} (except y/// can't handle the latter, croaking).  For
+                 * convenience all three forms are referred to as "named
+                 * characters" below.
                   *
                   * For patterns, \N also can mean to match a non-newline.  Code
                   * before this 'switch' statement should already have handled
@@ -3266,18 +3425,16 @@ S_scan_const(pTHX_ char *start)
                   *
                  * The structure of this section of code (besides checking for
                  * errors and upgrading to utf8) is:
-                 *  If the named character is of the form \N{U+...}, pass it
+                 *    If the named character is of the form \N{U+...}, pass it
                   *      through if a pattern; otherwise convert the code point
                   *      to utf8
-                 *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
-                 *      if a pattern; otherwise convert to utf8
+                 *    Otherwise must be some \N{NAME}: convert to
+                 *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
                   *
-                 * If the regex compiler should ever need to differentiate
-                 * between the \N{U+...} and \N{name} forms, that could easily
-                 * be done here by stripping any leading zeros from the
-                 * \N{U+...} case, and adding them to the other one. */
-
-                /* Here, 's' points to the 'N'; the test below is guaranteed to
+                 * Transliteration is an exception.  The conversion to utf8 is
+                 * only done if the code point requires it to be representable.
+                 *
+                 * Here, 's' points to the 'N'; the test below is guaranteed to
                  * succeed if we are being called on a pattern, as we already
                   * know from a test above that the next character is a '{'.  A
                   * non-pattern \N must mean 'named character', which requires
@@ -3340,25 +3497,30 @@ S_scan_const(pTHX_ char *start)
                          if (len == 0 || (len != (STRLEN)(e - s)))
                              goto bad_NU;
  
-                         /* If the destination is not in utf8, unconditionally
-                         * recode it to be so.  This is because \N{} implies
-                         * Unicode semantics, and scalars have to be in utf8
-                         * to guarantee those semantics */
-                       if (! has_utf8) {
+                         /* For non-tr///, if the destination is not in utf8,
+                          * unconditionally recode it to be so.  This is
+                          * because \N{} implies Unicode semantics, and scalars
+                          * have to be in utf8 to guarantee those semantics.
+                          * tr/// doesn't care about Unicode rules, so no need
+                          * there to upgrade to UTF-8 for small enough code
+                          * points */
+                       if (! has_utf8 && (   uv > 0xFF
+                                           || PL_lex_inwhat != OP_TRANS))
+                        {
                             SvCUR_set(sv, d - SvPVX_const(sv));
                             SvPOK_on(sv);
                             *d = '\0';
                             /* See Note on sizing above.  */
                             sv_utf8_upgrade_flags_grow(
-                                       sv,
-                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                       UNISKIP(uv) + (STRLEN)(send - e) + 1);
+                                    sv,
+                                    SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                   UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
                             d = SvPVX(sv) + SvCUR(sv);
                             has_utf8 = TRUE;
                         }
  
                          /* Add the (Unicode) code point to the output. */
-                       if (UNI_IS_INVARIANT(uv)) {
+                       if (OFFUNI_IS_INVARIANT(uv)) {
                             *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
                         else {
@@ -3409,9 +3571,15 @@ S_scan_const(pTHX_ char *start)
                                      char hex_string[4];
                                      int len =
                                          my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    "%02X.", (U8) *str);
-                                    PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
+                                                  sizeof(hex_string),
+                                                  "%02X.",
+
+                                                  /* The regex compiler is
+                                                   * expecting Unicode, not
+                                                   * native */
+                                                  NATIVE_TO_LATIN1(*str));
+                                    PERL_MY_SNPRINTF_POST_GUARD(len,
+                                                           sizeof(hex_string));
                                      Copy(hex_string, d, 3, char);
                                      d += 3;
                                      str++;
@@ -3435,12 +3603,12 @@ S_scan_const(pTHX_ char *start)
                                                          len,
                                                          &char_length,
                                                          UTF8_ALLOW_ANYUV);
-                                /* Convert first code point to hex, including
-                                 * the boiler plate before it. */
+                                /* Convert first code point to Unicode hex,
+                                 * including the boiler plate before it. */
                                  output_length =
                                      my_snprintf(hex_string, sizeof(hex_string),
-                                                "\\N{U+%X",
-                                                (unsigned int) uv);
+                                             "\\N{U+%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
  
                                  /* Make sure there is enough space to hold it */
                                  d = off + SvGROW(sv, off
@@ -3452,7 +3620,7 @@ S_scan_const(pTHX_ char *start)
                                  d += output_length;
  
                                  /* For each subsequent character, append dot and
-                                * its ordinal in hex */
+                                * its Unicode code point in hex */
                                  while ((str += char_length) < str_end) {
                                      const STRLEN off = d - SvPVX_const(sv);
                                      U32 uv = utf8n_to_uvchr((U8 *) str,
@@ -3461,9 +3629,9 @@ S_scan_const(pTHX_ char *start)
                                                              UTF8_ALLOW_ANYUV);
                                      output_length =
                                          my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    ".%X",
-                                                    (unsigned int) uv);
+                                             sizeof(hex_string),
+                                             ".%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
  
                                      d = off + SvGROW(sv, off
                                                          + output_length
@@ -3480,11 +3648,32 @@ S_scan_const(pTHX_ char *start)
                     else { /* Here, not in a pattern.  Convert the name to a
                             * string. */
  
-                        /* If destination is not in utf8, unconditionally
-                         * recode it to be so.  This is because \N{} implies
-                         * Unicode semantics, and scalars have to be in utf8
-                         * to guarantee those semantics */
-                       if (! has_utf8) {
+                        if (PL_lex_inwhat == OP_TRANS) {
+                            str = SvPV_const(res, len);
+                            if (len > ((SvUTF8(res))
+                                       ? UTF8SKIP(str)
+                                       : 1U))
+                            {
+                                yyerror(Perl_form(aTHX_
+                                    "%.*s must not be a named sequence"
+                                    " in transliteration operator",
+                                        /*  +1 to include the "}" */
+                                    (int) (e + 1 - start), start));
+                                goto end_backslash_N;
+                            }
+                        }
+                        else if (! SvUTF8(res)) {
+                            /* Make sure \N{} return is UTF-8.  This is because
+                            * \N{} implies Unicode semantics, and scalars have to
+                            * be in utf8 to guarantee those semantics; but not
+                            * needed in tr/// */
+                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+                            str = SvPV_const(res, len);
+                        }
+
+                         /* Upgrade destination to be utf8 if this new
+                          * component is */
+                       if (! has_utf8 && SvUTF8(res)) {
                             SvCUR_set(sv, d - SvPVX_const(sv));
                             SvPOK_on(sv);
                             *d = '\0';
@@ -3501,10 +3690,6 @@ S_scan_const(pTHX_ char *start)
                             const STRLEN off = d - SvPVX_const(sv);
                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
                         }
-                        if (! SvUTF8(res)) {    /* Make sure \N{} return is UTF-8 */
-                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
-                            str = SvPV_const(res, len);
-                        }
                         Copy(str, d, len, char);
                         d += len;
                     }
@@ -3512,9 +3697,10 @@ S_scan_const(pTHX_ char *start)
                     SvREFCNT_dec(res);
  
                 } /* End \N{NAME} */
+
+              end_backslash_N:
  #ifdef EBCDIC
-               if (!dorange) 
-                   native_range = FALSE; /* \N{} is defined to be Unicode */
+                backslash_N++; /* \N{} is defined to be Unicode */
  #endif
                 s = e + 1;  /* Point to just after the '}' */
                 continue;
@@ -3528,6 +3714,9 @@ S_scan_const(pTHX_ char *start)
                 else {
                     yyerror("Missing control char name in \\c");
                 }
+#ifdef EBCDIC
+                non_portable_endpoint++;
+#endif
                 continue;
  
             /* printf-style backslashes, formfeeds, newlines, etc */
@@ -3557,10 +3746,6 @@ S_scan_const(pTHX_ char *start)
             s++;
             continue;
         } /* end if (backslash) */
-#ifdef EBCDIC
-       else
-           literal_endpoint++;
-#endif
  
      default_action:
         /* If we started with encoded form, or already know we want it,
@@ -3568,7 +3753,6 @@ S_scan_const(pTHX_ char *start)
         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
             STRLEN len  = 1;
  
-
             /* One might think that it is wasted effort in the case of the
              * source being utf8 (this_utf8 == TRUE) to take the next character
              * in the source, convert it to an unsigned value, and then convert
@@ -3579,7 +3763,7 @@ S_scan_const(pTHX_ char *start)
             const UV nextuv   = (this_utf8)
                                  ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
                                  : (UV) ((U8) *s);
-           const STRLEN need = UNISKIP(nextuv);
+           const STRLEN need = UVCHR_SKIP(nextuv);
             if (!has_utf8) {
                 SvCUR_set(sv, d - SvPVX_const(sv));
                 SvPOK_on(sv);
@@ -3600,10 +3784,6 @@ S_scan_const(pTHX_ char *start)
             s += len;
  
             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
-#ifdef EBCDIC
-           if (uv > 255 && !dorange)
-               native_range = FALSE;
-#endif
         }
         else {
             *d++ = *s++;
@@ -3618,11 +3798,6 @@ S_scan_const(pTHX_ char *start)
                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
  
      SvPOK_on(sv);
-    if (IN_ENCODING && !has_utf8) {
-       sv_recode_to_utf8(sv, _get_encoding());
-       if (SvUTF8(sv))
-           has_utf8 = TRUE;
-    }
      if (has_utf8) {
         SvUTF8_on(sv);
         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
@@ -3774,8 +3949,10 @@ S_intuit_more(pTHX_ char *s)
                     else
                         weight -= 10;
                 }
-               else if (*s == '$' && s[1] &&
-                 strchr("[#!%*<>()-=",s[1])) {
+               else if (*s == '$'
+                         && s[1]
+                         && strchr("[#!%*<>()-=",s[1]))
+                {
                     if (/*{*/ strchr("])} =",s[2]))
                         weight -= 10;
                     else
@@ -3885,8 +4062,8 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
      }
  
      if (*start == '$') {
-       if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
-               isUPPER(*PL_tokenbuf))
+       if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
+            || isUPPER(*PL_tokenbuf))
             return 0;
         s = skipspace(s);
         PL_bufptr = start;
@@ -4160,9 +4337,11 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
      if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
          return PL_curstash;
  
-    if (len > 2 &&
-        (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
-        (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
+    if (len > 2
+        && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
+        && (gv = gv_fetchpvn_flags(pkgname,
+                                   len,
+                                   ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
      {
          return GvHV(gv);                       /* Foo:: */
      }
@@ -4215,14 +4394,14 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
         };
  #endif
  
-#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
+#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
  STATIC bool
-S_word_takes_any_delimeter(char *p, STRLEN len)
+S_word_takes_any_delimiter(char *p, STRLEN len)
  {
-    return (len == 1 && strchr("msyq", p[0])) ||
-          (len == 2 && (
-           (p[0] == 't' && p[1] == 'r') ||
-           (p[0] == 'q' && strchr("qwxr", p[1]))));
+    return (len == 1 && strchr("msyq", p[0]))
+            || (len == 2
+                && ((p[0] == 't' && p[1] == 'r')
+                    || (p[0] == 'q' && strchr("qwxr", p[1]))));
  }
  
  static void
@@ -4239,6 +4418,26 @@ S_check_scalar_slice(pTHX_ char *s)
         pl_yylval.ival = OPpSLICEWARNING;
  }
  
+#define lex_token_boundary() S_lex_token_boundary(aTHX)
+static void
+S_lex_token_boundary(pTHX)
+{
+    PL_oldoldbufptr = PL_oldbufptr;
+    PL_oldbufptr = PL_bufptr;
+}
+
+#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
+static char *
+S_vcs_conflict_marker(pTHX_ char *s)
+{
+    lex_token_boundary();
+    PL_bufptr = s;
+    yyerror("Version control conflict marker");
+    while (s < PL_bufend && *s != '\n')
+       s++;
+    return s;
+}
+
  /*
    yylex
  
@@ -4250,15 +4449,15 @@ S_check_scalar_slice(pTHX_ char *s)
      The type of the next token
  
    Structure:
+      Check if we have already built the token; if so, use it.
        Switch based on the current state:
-         - if we already built the token before, use it
           - if we have a case modifier in a string, deal with that
           - handle other cases of interpolation inside a string
           - scan the next line if we are inside a format
-      In the normal state switch on the next character:
+      In the normal state, switch on the next character:
           - default:
             if alphabetic, go to key lookup
-           unrecoginized character - croak
+           unrecognized character - croak
           - 0/4/26: handle end-of-line or EOF
           - cases for whitespace
           - \n and #: handle comments and line numbers
@@ -4317,10 +4516,6 @@ Perl_yylex(pTHX)
      if (PL_nexttoke) {
         PL_nexttoke--;
         pl_yylval = PL_nextval[PL_nexttoke];
-       if (!PL_nexttoke) {
-           PL_lex_state = PL_lex_defer;
-           PL_lex_defer = LEX_NORMAL;
-       }
         {
             I32 next_type;
             next_type = PL_nexttype[PL_nexttoke];
@@ -4395,10 +4590,11 @@ Perl_yylex(pTHX)
                 I32 tmp;
                  if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                      tmp = *s, *s = s[2], s[2] = (char)tmp;     /* misordered... */
-               if ((*s == 'L' || *s == 'U' || *s == 'F') &&
-                   (strchr(PL_lex_casestack, 'L')
+               if ((*s == 'L' || *s == 'U' || *s == 'F')
+                    && (strchr(PL_lex_casestack, 'L')
                          || strchr(PL_lex_casestack, 'U')
-                        || strchr(PL_lex_casestack, 'F'))) {
+                        || strchr(PL_lex_casestack, 'F')))
+                {
                     PL_lex_casestack[--PL_lex_casemods] = '\0';
                     PL_lex_allbrackets--;
                     return REPORT(')');
@@ -4493,14 +4689,6 @@ Perl_yylex(pTHX)
         /* FALLTHROUGH */
  
      case LEX_INTERPEND:
-       /* Treat state as LEX_NORMAL if we have no inner lexing scope.
-          XXX This hack can be removed if we stop setting PL_lex_state to
-          LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
-       if (UNLIKELY(!PL_lex_inwhat)) {
-           PL_lex_state = LEX_NORMAL;
-           break;
-       }
-
         if (PL_lex_dojoin) {
             const U8 dojoin_was = PL_lex_dojoin;
             PL_lex_dojoin = FALSE;
@@ -4555,14 +4743,6 @@ Perl_yylex(pTHX)
         if (PL_bufptr == PL_bufend)
             return REPORT(sublex_done());
  
-       /* Treat state as LEX_NORMAL if SvIVX is not valid on PL_linestr.
-          XXX This hack can be removed if we stop setting PL_lex_state to
-          LEX_KNOWNEXT.  */
-       if (SvTYPE(PL_linestr) == SVt_PV) {
-           PL_lex_state = LEX_NORMAL;
-           break;
-       }
-
         /* m'foo' still needs to be parsed for possible (?{...}) */
         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
             SV *sv = newSVsv(PL_linestr);
@@ -4617,9 +4797,22 @@ Perl_yylex(pTHX)
    retry:
      switch (*s) {
      default:
-       if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+       if (UTF) {
+            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+                ENTER;
+                SAVESPTR(PL_warnhook);
+                PL_warnhook = PERL_WARNHOOK_FATAL;
+                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+                LEAVE;
+            }
+            if (isIDFIRST_utf8((U8*)s)) {
+                goto keylookup;
+            }
+        }
+        else if (isALNUMC(*s)) {
             goto keylookup;
-       {
+       }
+    {
          SV *dsv = newSVpvs_flags("", SVs_TEMP);
          const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
                                                      UTF8SKIP(s),
@@ -4644,8 +4837,9 @@ Perl_yylex(pTHX)
          && (!PL_parser->filtered || s+1 < PL_bufend)) {
             PL_last_uni = 0;
             PL_last_lop = 0;
-           if (PL_lex_brackets &&
-                   PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
+           if (PL_lex_brackets
+                && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
+            {
                 yyerror((const char *)
                         (PL_lex_formbrack
                          ? "Format not terminated"
@@ -4728,7 +4922,7 @@ Perl_yylex(pTHX)
             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
             PL_last_lop = PL_last_uni = NULL;
-           if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
+           if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
                 update_debugger_info(PL_linestr, NULL, 0);
             goto retry;
         }
@@ -4750,11 +4944,12 @@ Perl_yylex(pTHX)
             s = PL_bufptr;
             /* If it looks like the start of a BOM or raw UTF-16,
              * check if it in fact is. */
-           if (bof && PL_rsfp &&
-                    (*s == 0 ||
-                     *(U8*)s == BOM_UTF8_FIRST_BYTE ||
-                     *(U8*)s >= 0xFE ||
-                     s[1] == 0)) {
+           if (bof && PL_rsfp
+                && (*s == 0
+                    || *(U8*)s == BOM_UTF8_FIRST_BYTE
+                        || *(U8*)s >= 0xFE
+                        || s[1] == 0))
+            {
                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
                 bof = (offset == (Off_t)SvCUR(PL_linestr));
  #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
@@ -4856,6 +5051,8 @@ Perl_yylex(pTHX)
                 d = instr(s,"perl -");
                 if (!d) {
                     d = instr(s,"perl");
+                    if (d && d[4] == '6')
+                        d = NULL;
  #if defined(DOSISH)
                     /* avoid getting into infinite loops when shebang
                      * line contains "Perl" rather than "perl" */
@@ -4892,12 +5089,12 @@ Perl_yylex(pTHX)
                         *s = '#';       /* Don't try to parse shebang line */
                 }
  #endif /* ALTERNATE_SHEBANG */
-               if (!d &&
-                   *s == '#' &&
-                   ipathend > ipath &&
-                   !PL_minus_c &&
-                   !instr(s,"indir") &&
-                   instr(PL_origargv[0],"perl"))
+               if (!d
+                    && *s == '#'
+                    && ipathend > ipath
+                    && !PL_minus_c
+                    && !instr(s,"indir")
+                    && instr(PL_origargv[0],"perl"))
                 {
                     dVAR;
                     char **newargv;
@@ -4960,8 +5157,8 @@ Perl_yylex(pTHX)
                             } while (argc && argv[0][0] == '-' && argv[0][1]);
                             init_argv_symbols(argc,argv);
                         }
-                       if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
-                           ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
+                       if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
+                            || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
                               /* if we have already added "LINE: while (<>) {",
                                  we must not do it again */
                         {
@@ -4970,7 +5167,7 @@ Perl_yylex(pTHX)
                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                             PL_last_lop = PL_last_uni = NULL;
                             PL_preambled = FALSE;
-                           if (PERLDB_LINE || PERLDB_SAVESRC)
+                           if (PERLDB_LINE_OR_SAVESRC)
                                 (void)gv_fetchfile(PL_origfilename);
                             goto retry;
                         }
@@ -4995,8 +5192,9 @@ Perl_yylex(pTHX)
         goto retry;
      case '#':
      case '\n':
-       if (PL_lex_state != LEX_NORMAL ||
-            (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
+       if (PL_lex_state != LEX_NORMAL
+            || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
+        {
              const bool in_comment = *s == '#';
             if (*s == '#' && s == PL_linestart && PL_in_eval
              && !PL_rsfp && !PL_parser->filtered) {
@@ -5124,17 +5322,12 @@ Perl_yylex(pTHX)
             else if (*s == '>') {
                 s++;
                 s = skipspace(s);
-               if (FEATURE_POSTDEREF_IS_ENABLED && (
-                   ((*s == '$' || *s == '&') && s[1] == '*')
+               if (((*s == '$' || *s == '&') && s[1] == '*')
                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
-                ))
+                )
                 {
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__POSTDEREF),
-                       "Postfix dereference is experimental"
-                   );
                     PL_expect = XPOSTDEREF;
                     TOKEN(ARROW);
                 }
@@ -5148,8 +5341,10 @@ Perl_yylex(pTHX)
                     TERM(ARROW);
             }
             if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '='
+                    && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                     s--;
                     TOKEN(0);
                 }
@@ -5173,8 +5368,10 @@ Perl_yylex(pTHX)
                     OPERATOR(PREINC);
             }
             if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if (*s == '='
+                    && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                     s--;
                     TOKEN(0);
                 }
@@ -5200,15 +5397,18 @@ Perl_yylex(pTHX)
         s++;
         if (*s == '*') {
             s++;
-           if (*s == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                 s -= 2;
                 TOKEN(0);
             }
             PWop(OP_POW);
         }
-       if (*s == '=' && !PL_lex_allbrackets &&
-               PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+       if (*s == '='
+            && !PL_lex_allbrackets
+            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+        {
             s--;
             TOKEN(0);
         }
@@ -5218,9 +5418,12 @@ Perl_yylex(pTHX)
      case '%':
      {
         if (PL_expect == XOPERATOR) {
-           if (s[1] == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+           if (s[1] == '='
+                && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                 TOKEN(0);
+            }
             ++s;
             PL_parser->saw_infix_sigil = 1;
             Mop(OP_MODULO);
@@ -5416,10 +5619,12 @@ Perl_yylex(pTHX)
                 /* XXX losing whitespace on sequential attributes here */
             }
             {
-               if (*s != ';' && *s != '}' &&
-                   !(PL_expect == XOPERATOR
-                       ? (*s == '=' ||  *s == ')')
-                       : (*s == '{' ||  *s == '('))) {
+               if (*s != ';'
+                    && *s != '}'
+                    && !(PL_expect == XOPERATOR
+                        ? (*s == '=' ||  *s == ')')
+                        : (*s == '{' ||  *s == '(')))
+                {
                     const char q = ((*s == '\'') ? '"' : '\'');
                     /* If here for an expression, and parsed no attrs, back
                        off. */
@@ -5643,12 +5848,12 @@ Perl_yylex(pTHX)
                     else
                         /* skip plain q word */
                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-                            t += UTF8SKIP(t);
+                           t += UTF ? UTF8SKIP(t) : 1;
                 }
                 else if (isWORDCHAR_lazy_if(t,UTF)) {
-                   t += UTF8SKIP(t);
+                   t += UTF ? UTF8SKIP(t) : 1;
                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-                        t += UTF8SKIP(t);
+                       t += UTF ? UTF8SKIP(t) : 1;
                 }
                 while (t < PL_bufend && isSPACE(*t))
                     t++;
@@ -5803,16 +6008,22 @@ Perl_yylex(pTHX)
         {
             const char tmp = *s++;
             if (tmp == '=') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5)) {
+                   s = vcs_conflict_marker(s + 5);
+                   goto retry;
+               }
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
                 Eop(OP_EQ);
             }
             if (tmp == '>') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
@@ -5825,8 +6036,9 @@ Perl_yylex(pTHX)
                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                             "Reversed %c= operator",(int)tmp);
             s--;
-           if (PL_expect == XSTATE && isALPHA(tmp) &&
-               (s == PL_linestart+1 || s[-2] == '\n') )
+           if (PL_expect == XSTATE
+                && isALPHA(tmp)
+                && (s == PL_linestart+1 || s[-2] == '\n') )
              {
                  if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
                      || PL_lex_state != LEX_NORMAL) {
@@ -5890,15 +6102,16 @@ Perl_yylex(pTHX)
                     while (t < PL_bufend && isSPACE(*t))
                         ++t;
  
-                   if (*t == '/' || *t == '?' ||
-                       ((*t == 'm' || *t == 's' || *t == 'y')
-                        && !isWORDCHAR(t[1])) ||
-                       (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
+                   if (*t == '/' || *t == '?'
+                        || ((*t == 'm' || *t == 's' || *t == 'y')
+                           && !isWORDCHAR(t[1]))
+                        || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                                     "!=~ should be !~");
                 }
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
@@ -5913,8 +6126,13 @@ Perl_yylex(pTHX)
         if (PL_expect != XOPERATOR) {
             if (s[1] != '<' && !strchr(s,'>'))
                 check_uni();
-           if (s[1] == '<' && s[2] != '>')
+           if (s[1] == '<' && s[2] != '>') {
+               if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5)) {
+                   s = vcs_conflict_marker(s + 7);
+                   goto retry;
+               }
                 s = scan_heredoc(s);
+           }
             else
                 s = scan_inputsymbol(s);
             PL_expect = XOPERATOR;
@@ -5924,8 +6142,13 @@ Perl_yylex(pTHX)
         {
             char tmp = *s++;
             if (tmp == '<') {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5)) {
+                    s = vcs_conflict_marker(s + 5);
+                   goto retry;
+               }
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
@@ -5934,16 +6157,18 @@ Perl_yylex(pTHX)
             if (tmp == '=') {
                 tmp = *s++;
                 if (tmp == '>') {
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                    {
                         s -= 3;
                         TOKEN(0);
                     }
                     Eop(OP_NCMP);
                 }
                 s--;
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
@@ -5961,16 +6186,22 @@ Perl_yylex(pTHX)
         {
             const char tmp = *s++;
             if (tmp == '>') {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5)) {
+                   s = vcs_conflict_marker(s + 5);
+                   goto retry;
+               }
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
                 SHop(OP_RIGHT_SHIFT);
             }
             else if (tmp == '=') {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+                {
                     s -= 2;
                     TOKEN(0);
                 }
@@ -6004,8 +6235,14 @@ Perl_yylex(pTHX)
             PL_tokenbuf[0] = '@';
             s = scan_ident(s + 1, PL_tokenbuf + 1,
                            sizeof PL_tokenbuf - 1, FALSE);
-           if (PL_expect == XOPERATOR)
-               no_op("Array length", s);
+            if (PL_expect == XOPERATOR) {
+                d = s;
+                if (PL_bufptr > s) {
+                    d = PL_bufptr-1;
+                    PL_bufptr = PL_oldbufptr;
+                }
+               no_op("Array length", d);
+            }
             if (!PL_tokenbuf[1])
                 PREREF(DOLSHARP);
             PL_expect = XOPERATOR;
@@ -6044,14 +6281,14 @@ Perl_yylex(pTHX)
                         char *t = s+1;
  
                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
-                           t++;
+                           t += UTF ? UTF8SKIP(t) : 1;
                         if (*t++ == ',') {
                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                             while (t < PL_bufend && *t != ']')
                                 t++;
                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "Multidimensional syntax %.*s not supported",
-                                   (int)((t - PL_bufptr) + 1), PL_bufptr);
+                                       "Multidimensional syntax %"UTF8f" not supported",
+                                        UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
                         }
                     }
                 }
@@ -6136,11 +6373,18 @@ Perl_yylex(pTHX)
         TOKEN('$');
  
      case '@':
-       if (PL_expect == XOPERATOR)
-           no_op("Array", s);
-       else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
+        if (PL_expect == XPOSTDEREF)
+            POSTDEREF('@');
         PL_tokenbuf[0] = '@';
         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+       if (PL_expect == XOPERATOR) {
+            d = s;
+            if (PL_bufptr > s) {
+                d = PL_bufptr-1;
+                PL_bufptr = PL_oldbufptr;
+            }
+           no_op("Array", d);
+        }
         pl_yylval.ival = 0;
         if (!PL_tokenbuf[1]) {
             PREREF('@');
@@ -6172,8 +6416,9 @@ Perl_yylex(pTHX)
         }
         else if (PL_expect == XOPERATOR) {
             s++;
-           if (*s == '=' && !PL_lex_allbrackets &&
-               PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                 s--;
                 TOKEN(0);
             }
@@ -6193,8 +6438,9 @@ Perl_yylex(pTHX)
  
       case '?':                 /* conditional */
         s++;
-       if (!PL_lex_allbrackets &&
-           PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
+       if (!PL_lex_allbrackets
+            && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
+        {
             s--;
             TOKEN(0);
         }
@@ -6221,8 +6467,9 @@ Perl_yylex(pTHX)
         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
             char tmp = *s++;
             if (*s == tmp) {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
+                {
                     s--;
                     TOKEN(0);
                 }
@@ -6235,8 +6482,9 @@ Perl_yylex(pTHX)
                     pl_yylval.ival = 0;
                 OPERATOR(DOTDOT);
             }
-           if (*s == '=' && !PL_lex_allbrackets &&
-                   PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+           if (*s == '=' && !PL_lex_allbrackets
+                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+            {
                 s--;
                 TOKEN(0);
             }
@@ -6252,22 +6500,26 @@ Perl_yylex(pTHX)
         TERM(THING);
  
      case '\'':
+       if (   PL_expect == XOPERATOR
+           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
+               return deprecate_commaless_var_list();
+
         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
         if (!s)
             missingterm(NULL);
         COPLINE_SET_FROM_MULTI_END;
         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
         if (PL_expect == XOPERATOR) {
-           if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
-               return deprecate_commaless_var_list();
-           }
-           else
-               no_op("String",s);
+            no_op("String",s);
         }
         pl_yylval.ival = OP_CONST;
         TERM(sublex_start());
  
      case '"':
+       if (   PL_expect == XOPERATOR
+           && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
+               return deprecate_commaless_var_list();
+
         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
         DEBUG_T( {
             if (s)
@@ -6277,10 +6529,6 @@ Perl_yylex(pTHX)
                              "### Saw unterminated string\n");
         } );
         if (PL_expect == XOPERATOR) {
-           if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
-               return deprecate_commaless_var_list();
-           }
-           else
                 no_op("String",s);
         }
         if (!s)
@@ -6300,7 +6548,13 @@ Perl_yylex(pTHX)
  
      case '`':
         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
-       DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
+       DEBUG_T( {
+            if (s)
+                printbuf("### Saw backtick string before %s\n", s);
+            else
+               PerlIO_printf(Perl_debug_log,
+                            "### Saw unterminated backtick string\n");
+        } );
         if (PL_expect == XOPERATOR)
             no_op("Backticks",s);
         if (!s)
@@ -6405,7 +6659,7 @@ Perl_yylex(pTHX)
         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
  
         /* Some keywords can be followed by any delimiter, including ':' */
-       anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
+       anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
  
         /* x::* is just a word, unless x is "CORE" */
         if (!anydelim && *s == ':' && s[1] == ':') {
@@ -6511,24 +6765,23 @@ Perl_yylex(pTHX)
                 CV *cv;
                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
-                                           SVt_PVCV)) &&
-                   (cv = GvCVu(gv)))
+                                           SVt_PVCV))
+                    && (cv = GvCVu(gv)))
                 {
                     if (GvIMPORTED_CV(gv))
                         ogv = gv;
                     else if (! CvMETHOD(cv))
                         hgv = gv;
                 }
-               if (!ogv &&
-                   (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
-                                         len, FALSE)) &&
-                   (gv = *gvp) && (
-                       isGV_with_GP(gv)
-                           ? GvCVu(gv) && GvIMPORTED_CV(gv)
-                           :   SvPCS_IMPORTED(gv)
-                            && (gv_init(gv, PL_globalstash, PL_tokenbuf,
-                                        len, 0), 1)
-                  ))
+               if (!ogv
+                    && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
+                                                              len, FALSE))
+                    && (gv = *gvp)
+                    && (isGV_with_GP(gv)
+                       ? GvCVu(gv) && GvIMPORTED_CV(gv)
+                       :   SvPCS_IMPORTED(gv)
+                       && (gv_init(gv, PL_globalstash, PL_tokenbuf,
+                                                                 len, 0), 1)))
                 {
                     ogv = gv;
                 }
@@ -6627,8 +6880,9 @@ Perl_yylex(pTHX)
                    in which case Foo is a bareword
                    (and a package name). */
  
-               if (len > 2 &&
-                   PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
+               if (len > 2
+                    && PL_tokenbuf[len - 2] == ':'
+                    && PL_tokenbuf[len - 1] == ':')
                 {
                     if (ckWARN(WARN_BAREWORD)
                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
@@ -6688,13 +6942,14 @@ Perl_yylex(pTHX)
  
                 /* See if it's the indirect object for a list operator. */
  
-               if (PL_oldoldbufptr &&
-                   PL_oldoldbufptr < PL_bufptr &&
-                   (PL_oldoldbufptr == PL_last_lop
-                    || PL_oldoldbufptr == PL_last_uni) &&
-                   /* NO SKIPSPACE BEFORE HERE! */
-                   (PL_expect == XREF ||
-                    ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
+               if (PL_oldoldbufptr
+                    && PL_oldoldbufptr < PL_bufptr
+                    && (PL_oldoldbufptr == PL_last_lop
+                       || PL_oldoldbufptr == PL_last_uni)
+                    && /* NO SKIPSPACE BEFORE HERE! */
+                      (PL_expect == XREF
+                        || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
+                                                               == OA_FILEREF))
                 {
                     bool immediate_paren = *s == '(';
  
@@ -6703,8 +6958,9 @@ Perl_yylex(pTHX)
  
                     /* Two barewords in a row may indicate method call. */
  
-                   if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
-                       (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+                   if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+                        && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+                    {
                         goto method;
                     }
  
@@ -6713,12 +6969,13 @@ Perl_yylex(pTHX)
                     /* Also, if "_" follows a filetest operator, it's a bareword */
  
                     if (
-                       ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
-                         (!cv &&
-                        (PL_last_lop_op != OP_MAPSTART &&
-                        PL_last_lop_op != OP_GREPSTART))))
+                       ( !immediate_paren && (PL_last_lop_op == OP_SORT
+                         || (!cv
+                             && (PL_last_lop_op != OP_MAPSTART
+                                 && PL_last_lop_op != OP_GREPSTART))))
                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
-                           && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
+                           && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
+                                                            == OA_FILESTATOP))
                        )
                     {
                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
@@ -6774,9 +7031,11 @@ Perl_yylex(pTHX)
                     op_free(rv2cv_op);
                     PL_last_lop = PL_oldbufptr;
                     PL_last_lop_op = OP_METHOD;
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                     PL_expect = XBLOCKTERM;
                     PL_bufptr = s;
                     return REPORT(METHOD);
@@ -6798,9 +7057,11 @@ Perl_yylex(pTHX)
                         else SvUTF8_off(sv);
                     }
                     op_free(rv2cv_op);
-                   if (tmp == METHOD && !PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (tmp == METHOD && !PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                     return REPORT(tmp);
                 }
  
@@ -6869,18 +7130,22 @@ Perl_yylex(pTHX)
                                 sv_setpvs(PL_subname, "__ANON__");
                             else
                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
-                           if (!PL_lex_allbrackets &&
-                                   PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                           if (!PL_lex_allbrackets
+                                && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                            {
                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                            }
                             PREBLOCK(LSTOPSUB);
                         }
                     }
                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
                     PL_expect = XTERM;
                     force_next(off ? PRIVATEREF : WORD);
-                   if (!PL_lex_allbrackets &&
-                           PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                   if (!PL_lex_allbrackets
+                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                    {
                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                    }
                     TOKEN(NOAMP);
                 }
  
@@ -6968,10 +7233,12 @@ Perl_yylex(pTHX)
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
                 IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD)
+#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
                 {
                     const int fd = PerlIO_fileno(PL_rsfp);
-                   fcntl(fd,F_SETFD,fd >= 3);
+                    if (fd >= 3) {
+                        fcntl(fd,F_SETFD, FD_CLOEXEC);
+                    }
                 }
  #endif
                 /* Mark this internal pseudo-handle as clean */
@@ -7006,24 +7273,6 @@ Perl_yylex(pTHX)
                 if (!IN_BYTES) {
                     if (UTF)
                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
-                   else if (IN_ENCODING) {
-                       SV *name;
-                       dSP;
-                       ENTER;
-                       SAVETMPS;
-                       PUSHMARK(sp);
-                       XPUSHs(_get_encoding());
-                       PUTBACK;
-                       call_method("name", G_SCALAR);
-                       SPAGAIN;
-                       name = POPs;
-                       PUTBACK;
-                       PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
-                                           Perl_form(aTHX_ ":encoding(%"SVf")",
-                                                     SVfARG(name)));
-                       FREETMPS;
-                       LEAVE;
-                   }
                 }
  #endif
                 PL_rsfp = NULL;
@@ -7291,11 +7540,13 @@ Perl_yylex(pTHX)
             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
                 char *p = s;
  
-               if ((PL_bufend - p) >= 3 &&
-                   strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+               if ((PL_bufend - p) >= 3
+                    && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+                {
                     p += 2;
-               else if ((PL_bufend - p) >= 4 &&
-                   strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
+                }
+               else if ((PL_bufend - p) >= 4
+                         && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
                     p += 3;
                 p = skipspace(p);
                  /* skip optional package name, as in "for my abc $x (..)" */
@@ -7303,7 +7554,7 @@ Perl_yylex(pTHX)
                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                     p = skipspace(p);
                 }
-               if (*p != '$')
+               if (*p != '$' && *p != '\\')
                     Perl_croak(aTHX_ "Missing $ on loop variable");
             }
             OPERATOR(FOR);
@@ -7473,7 +7724,6 @@ Perl_yylex(pTHX)
             UNI(OP_LCFIRST);
  
         case KEY_local:
-           pl_yylval.ival = 0;
             OPERATOR(LOCAL);
  
         case KEY_length:
@@ -7532,22 +7782,21 @@ Perl_yylex(pTHX)
         case KEY_our:
         case KEY_my:
         case KEY_state:
+           if (PL_in_my) {
+               PL_bufptr = s;
+               yyerror(Perl_form(aTHX_
+                                 "Can't redeclare \"%s\" in \"%s\"",
+                                  tmp      == KEY_my    ? "my" :
+                                  tmp      == KEY_state ? "state" : "our",
+                                  PL_in_my == KEY_my    ? "my" :
+                                  PL_in_my == KEY_state ? "state" : "our"));
+           }
             PL_in_my = (U16)tmp;
             s = skipspace(s);
             if (isIDFIRST_lazy_if(s,UTF)) {
                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
-               {
-                   if (!FEATURE_LEXSUBS_IS_ENABLED)
-                       Perl_croak(aTHX_
-                                 "Experimental \"%s\" subs not enabled",
-                                  tmp == KEY_my    ? "my"    :
-                                  tmp == KEY_state ? "state" : "our");
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
-                       "The lexical_subs feature is experimental");
                     goto really_sub;
-               }
                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
                 if (!PL_in_my_stash) {
                     char tmpbuf[1024];
@@ -7558,7 +7807,6 @@ Perl_yylex(pTHX)
                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
                 }
             }
-           pl_yylval.ival = 1;
             OPERATOR(MY);
  
         case KEY_next:
@@ -7577,9 +7825,11 @@ Perl_yylex(pTHX)
             if (*s == '(' || (s = skipspace(s), *s == '('))
                 FUN1(OP_NOT);
             else {
-               if (!PL_lex_allbrackets &&
-                       PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+               if (!PL_lex_allbrackets
+                    && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+                {
                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+                }
                 OPERATOR(NOTOP);
             }
  
@@ -7948,8 +8198,9 @@ Perl_yylex(pTHX)
                 d = s;
                 s = skipspace(s);
  
-               if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
-                   (*s == ':' && s[1] == ':'))
+               if (isIDFIRST_lazy_if(s,UTF)
+                    || *s == '\''
+                    || (*s == ':' && s[1] == ':'))
                 {
  
                     PL_expect = XBLOCK;
@@ -8016,7 +8267,13 @@ Perl_yylex(pTHX)
  
                 if (*s == ':' && s[1] != ':')
                     PL_expect = attrful;
-               else if ((*s != '{' && *s != '(') && key == KEY_sub) {
+               else if ((*s != '{' && *s != '(') && key != KEY_format) {
+                    assert(key == KEY_sub || key == KEY_AUTOLOAD ||
+                           key == KEY_DESTROY || key == KEY_BEGIN ||
+                           key == KEY_UNITCHECK || key == KEY_CHECK ||
+                           key == KEY_INIT || key == KEY_END ||
+                           key == KEY_my || key == KEY_state ||
+                           key == KEY_our);
                     if (!have_name)
                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
                     else if (*s != ';' && *s != '}')
@@ -8172,9 +8429,11 @@ Perl_yylex(pTHX)
  
         case KEY_x:
             if (PL_expect == XOPERATOR) {
-               if (*s == '=' && !PL_lex_allbrackets &&
-                       PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+               if (*s == '=' && !PL_lex_allbrackets
+                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+                {
                     return REPORT(0);
+                }
                 Mop(OP_REPEAT);
             }
             check_uni();
@@ -8294,14 +8553,17 @@ S_pending_ident(pTHX)
         and @foo isn't a variable we can find in the symbol
         table.
      */
-    if (ckWARN(WARN_AMBIGUOUS) &&
-       pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+    if (ckWARN(WARN_AMBIGUOUS)
+        && pit == '@'
+        && PL_lex_state != LEX_NORMAL
+        && !PL_lex_brackets)
+    {
          GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
                                          ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
          if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
                 /* DO NOT warn for @- and @+ */
-               && !( PL_tokenbuf[2] == '\0' &&
-                   ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
+               && !( PL_tokenbuf[2] == '\0'
+                      && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
            )
          {
              /* Downgraded from fatal to warning 20000522 mjd */
@@ -8535,7 +8797,8 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
  }
  
  PERL_STATIC_INLINE void
-S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
+S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
+                    bool is_utf8, bool check_dollar) {
      PERL_ARGS_ASSERT_PARSE_IDENT;
  
      for (;;) {
@@ -8571,7 +8834,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool
              * the code path that triggers the "Bad name after" warning
              * when looking for barewords.
              */
-           && (*s)[2] != '$') {
+           && !(check_dollar && (*s)[2] == '$')) {
              *(*d)++ = *(*s)++;
              *(*d)++ = *(*s)++;
          }
@@ -8593,12 +8856,31 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  
      PERL_ARGS_ASSERT_SCAN_WORD;
  
-    parse_ident(&s, &d, e, allow_package, is_utf8);
+    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
      *d = '\0';
      *slp = d - dest;
      return s;
  }
  
+/* Is the byte 'd' a legal single character identifier name?  'u' is true
+ * iff Unicode semantics are to be used.  The legal ones are any of:
+ *  a) all ASCII characters except:
+ *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
+ *          2) '{'
+ *     The final case currently doesn't get this far in the program, so we
+ *     don't test for it.  If that were to change, it would be ok to allow it.
+ *  b) When not under Unicode rules, any upper Latin1 character
+ *  c) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ *      Because all ASCII characters have the same representation whether
+ *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ *      '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8)                                       \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
+                         ? isIDFIRST_utf8((U8*) (s))                          \
+                         : (isGRAPH_L1(*s)                                    \
+                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+
  STATIC char *
  S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
  {
@@ -8621,8 +8903,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             *d++ = *s++;
         }
      }
-    else {
-        parse_ident(&s, &d, e, 1, is_utf8);
+    else {  /* See if it is a "normal" identifier */
+        parse_ident(&s, &d, e, 1, is_utf8, FALSE);
      }
      *d = '\0';
      d = dest;
@@ -8633,12 +8915,15 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
             PL_lex_state = LEX_INTERPENDMAYBE;
         return s;
      }
-    if (*s == '$' && s[1] &&
-      (isIDFIRST_lazy_if(s+1,is_utf8)
-         || isDIGIT_A((U8)s[1])
-         || s[1] == '$'
-         || s[1] == '{'
-         || strnEQ(s+1,"::",2)) )
+
+    /* Here, it is not a run-of-the-mill identifier name */
+
+    if (*s == '$' && s[1]
+        && (isIDFIRST_lazy_if(s+1,is_utf8)
+            || isDIGIT_A((U8)s[1])
+            || s[1] == '$'
+            || s[1] == '{'
+            || strnEQ(s+1,"::",2)) )
      {
          /* Dereferencing a value in a scalar variable.
             The alternatives are different syntaxes for a scalar variable.
@@ -8654,59 +8939,11 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
              s = skipspace(s);
          }
      }
-
-/* Is the byte 'd' a legal single character identifier name?  'u' is true
- * iff Unicode semantics are to be used.  The legal ones are any of:
- *  a) all ASCII characters except:
- *          1) space-type ones, like \t and SPACE;
-            2) NUL;
- *          3) '{'
- *     The final case currently doesn't get this far in the program, so we
- *     don't test for it.  If that were to change, it would be ok to allow it.
- *  c) When not under Unicode rules, any upper Latin1 character
- *  d) Otherwise, when unicode rules are used, all XIDS characters.
- *
- *      Because all ASCII characters have the same representation whether
- *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- *      '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
-    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
-                         ? isIDFIRST_utf8((U8*) (s))                          \
-                         : (isGRAPH_L1(*s)                                    \
-                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-#   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
-                                            && LIKELY(*(s) != '\0')           \
-                                            && (! is_utf8                     \
-                                                || isASCII_utf8((U8*) (s))    \
-                                                || isIDFIRST_utf8((U8*) (s))))
-#endif
      if ((s <= PL_bufend - (is_utf8)
                            ? UTF8SKIP(s)
                            : 1)
          && VALID_LEN_ONE_IDENT(s, is_utf8))
      {
-        /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
-         * because often it has no graphic representation.  (We can't get to
-         * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
-         * test for it.) */
-        if ((is_utf8)
-            ? ! isGRAPH_utf8( (U8*) s)
-            : (! isGRAPH_L1( (U8) *s)
-               || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
-        {
-            /* Split messages for back compat */
-            if (isCNTRL_A( (U8) *s)) {
-                deprecate("literal control characters in variable names");
-            }
-            else {
-                deprecate("literal non-graphic characters in variable names");
-            }
-        }
-        
          if (is_utf8) {
              const STRLEN skip = UTF8SKIP(s);
              STRLEN i;
@@ -8735,8 +8972,8 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
              /* if it starts as a valid identifier, assume that it is one.
                 (the later check for } being at the expected point will trap
                 cases where this doesn't pan out.)  */
-        d += is_utf8 ? UTF8SKIP(d) : 1;
-        parse_ident(&s, &d, e, 1, is_utf8);
+            d += is_utf8 ? UTF8SKIP(d) : 1;
+            parse_ident(&s, &d, e, 1, is_utf8, TRUE);
             *d = '\0';
              tmp_copline = CopLINE(PL_curcop);
              if (s < PL_bufend && isSPACE(*s)) {
@@ -8791,12 +9028,14 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                 PL_expect = XREF;
             }
             if (PL_lex_state == LEX_NORMAL) {
-               if (ckWARN(WARN_AMBIGUOUS) &&
-                   (keyword(dest, d - dest, 0)
-                    || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
+               if (ckWARN(WARN_AMBIGUOUS)
+                    && (keyword(dest, d - dest, 0)
+                       || get_cvn_flags(dest, d - dest, is_utf8
+                           ? SVf_UTF8
+                           : 0)))
                 {
                      SV *tmp = newSVpvn_flags( dest, d - dest,
-                                            SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
+                                        SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
                     if (funny == '#')
                         funny = '@';
                      orig_copline = CopLINE(PL_curcop);
@@ -8987,7 +9226,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
                        "Use of /c modifier is meaningless without /g" );
      }
  
-    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+    if (UNLIKELY((x_mod_count) > 1)) {
+        yyerror("Only one /x regex modifier is allowed");
+    }
  
      PL_lex_op = (OP*)pm;
      pl_yylval.ival = OP_MATCH;
@@ -9042,7 +9283,9 @@ S_scan_subst(pTHX_ char *start)
         }
      }
  
-    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+    if (UNLIKELY((x_mod_count) > 1)) {
+        yyerror("Only one /x regex modifier is allowed");
+    }
  
      if ((pm->op_pmflags & PMf_CONTINUE)) {
          Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
@@ -9205,10 +9448,14 @@ S_scan_heredoc(pTHX_ char *s)
             term = '"';
         if (!isWORDCHAR_lazy_if(s,UTF))
             deprecate("bare << to mean <<\"\"");
-       for (; isWORDCHAR_lazy_if(s,UTF); s++) {
-           if (d < e)
-               *d++ = *s;
+       peek = s;
+       while (isWORDCHAR_lazy_if(peek,UTF)) {
+           peek += UTF ? UTF8SKIP(peek) : 1;
         }
+       len = (peek - s >= e - d) ? (e - d) : (peek - s);
+       Copy(s, d, len, char);
+       s += len;
+       d += len;
      }
      if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
         Perl_croak(aTHX_ "Delimiter for here document is too long");
@@ -9259,7 +9506,7 @@ S_scan_heredoc(pTHX_ char *s)
         SV *linestr;
         char *bufend;
         char * const olds = s;
-       PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+       PERL_CONTEXT * const cx = CX_CUR();
         /* These two fields are not set until an inner lexing scope is
            entered.  But we need them set here. */
         shared->ls_bufptr  = s;
@@ -9279,8 +9526,13 @@ S_scan_heredoc(pTHX_ char *s)
                lexing scope.  In a file, we will have broken out of the
                loop in the previous iteration.  In an eval, the string buf-
                fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null. */
-           assert(shared);
+              evaluated to false.  So shared can never be null.  Or so you
+              might think.  Odd syntax errors like s;@{<<; can gobble up
+              the implicit semicolon at the end of a flie, causing the
+              file handle to be closed even when we are not in a string
+              eval.  So shared may be null in that case.  */
+           if (UNLIKELY(!shared))
+               goto interminable;
             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
                most lexing scope.  In a file, shared->ls_linestr at that
                level is just one line, so there is no body to steal. */
@@ -9289,15 +9541,17 @@ S_scan_heredoc(pTHX_ char *s)
                 goto streaming;
             }
           }
-       else {  /* eval */
+       else {  /* eval or we've already hit EOF */
             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
-           assert(s);
+           if (!s)
+                goto interminable;
         }
         linestr = shared->ls_linestr;
         bufend = SvEND(linestr);
         d = s;
-       while (s < bufend - len + 1 &&
-          memNE(s,PL_tokenbuf,len) ) {
+       while (s < bufend - len + 1
+               && memNE(s,PL_tokenbuf,len) )
+        {
             if (*s++ == '\n')
                 ++PL_parser->herelines;
         }
@@ -9326,9 +9580,10 @@ S_scan_heredoc(pTHX_ char *s)
                                 bufend - shared->re_eval_start);
             shared->re_eval_start -= s-d;
         }
-       if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
-            CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
-            cx->blk_eval.cur_text == linestr)
+       if (cxstack_ix >= 0
+            && CxTYPE(cx) == CXt_EVAL
+            && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
+            && cx->blk_eval.cur_text == linestr)
          {
             cx->blk_eval.cur_text = newSVsv(linestr);
             SvSCREAM_on(cx->blk_eval.cur_text);
@@ -9345,12 +9600,14 @@ S_scan_heredoc(pTHX_ char *s)
      else
      {
        SV *linestr_save;
+      char *oldbufptr_save;
       streaming:
        sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
        term = PL_tokenbuf[1];
        len--;
        linestr_save = PL_linestr; /* must restore this afterwards */
        d = s;                    /* and this */
+      oldbufptr_save = PL_oldbufptr;
        PL_linestr = newSVpvs("");
        PL_bufend = SvPVX(PL_linestr);
        while (1) {
@@ -9367,6 +9624,7 @@ S_scan_heredoc(pTHX_ char *s)
                restore PL_linestr. */
             SvREFCNT_dec_NN(PL_linestr);
             PL_linestr = linestr_save;
+            PL_oldbufptr = oldbufptr_save;
             goto interminable;
         }
         CopLINE_set(PL_curcop, origline);
@@ -9382,8 +9640,8 @@ S_scan_heredoc(pTHX_ char *s)
         PL_last_lop = PL_last_uni = NULL;
  #ifndef PERL_STRICT_CR
         if (PL_bufend - PL_linestart >= 2) {
-           if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
-               (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+           if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+                || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
             {
                 PL_bufend[-2] = '\n';
                 PL_bufend--;
@@ -9401,6 +9659,7 @@ S_scan_heredoc(pTHX_ char *s)
             PL_linestr = linestr_save;
             PL_linestart = SvPVX(linestr_save);
             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+            PL_oldbufptr = oldbufptr_save;
             s = d;
             break;
         }
@@ -9416,8 +9675,6 @@ S_scan_heredoc(pTHX_ char *s)
      if (!IN_BYTES) {
         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
             SvUTF8_on(tmpstr);
-       else if (IN_ENCODING)
-           sv_recode_to_utf8(tmpstr, _get_encoding());
      }
      PL_lex_stuff = tmpstr;
      pl_yylval.ival = op_type;
@@ -9654,7 +9911,6 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      I32 termcode;              /* terminating char. code */
      U8 termstr[UTF8_MAXBYTES]; /* terminating string */
      STRLEN termlen;            /* length of terminating string */
-    int last_off = 0;          /* last position for nesting bracket */
      line_t herelines;
  
      PERL_ARGS_ASSERT_SCAN_STR;
@@ -9707,116 +9963,6 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
         sv_catpvn(sv, s, termlen);
      s += termlen;
      for (;;) {
-       if (IN_ENCODING && !UTF && !re_reparse) {
-           bool cont = TRUE;
-
-           while (cont) {
-               int offset = s - SvPVX_const(PL_linestr);
-               const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
-                                          &offset, (char*)termstr, termlen);
-               const char *ns;
-               char *svlast;
-
-               if (SvIsCOW(PL_linestr)) {
-                   STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
-                   STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
-                   STRLEN last_lop_pos, re_eval_start_pos, s_pos;
-                   char *buf = SvPVX(PL_linestr);
-                   bufend_pos = PL_parser->bufend - buf;
-                   bufptr_pos = PL_parser->bufptr - buf;
-                   oldbufptr_pos = PL_parser->oldbufptr - buf;
-                   oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
-                   linestart_pos = PL_parser->linestart - buf;
-                   last_uni_pos = PL_parser->last_uni
-                       ? PL_parser->last_uni - buf
-                       : 0;
-                   last_lop_pos = PL_parser->last_lop
-                       ? PL_parser->last_lop - buf
-                       : 0;
-                   re_eval_start_pos =
-                       PL_parser->lex_shared->re_eval_start ?
-                            PL_parser->lex_shared->re_eval_start - buf : 0;
-                   s_pos = s - buf;
-
-                   sv_force_normal(PL_linestr);
-
-                   buf = SvPVX(PL_linestr);
-                   PL_parser->bufend = buf + bufend_pos;
-                   PL_parser->bufptr = buf + bufptr_pos;
-                   PL_parser->oldbufptr = buf + oldbufptr_pos;
-                   PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
-                   PL_parser->linestart = buf + linestart_pos;
-                   if (PL_parser->last_uni)
-                       PL_parser->last_uni = buf + last_uni_pos;
-                   if (PL_parser->last_lop)
-                       PL_parser->last_lop = buf + last_lop_pos;
-                   if (PL_parser->lex_shared->re_eval_start)
-                       PL_parser->lex_shared->re_eval_start  =
-                           buf + re_eval_start_pos;
-                   s = buf + s_pos;
-               }
-               ns = SvPVX_const(PL_linestr) + offset;
-               svlast = SvEND(sv) - 1;
-
-               for (; s < ns; s++) {
-                   if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
-                       COPLINE_INC_WITH_HERELINES;
-               }
-               if (!found)
-                   goto read_more_line;
-               else {
-                   /* handle quoted delimiters */
-                   if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
-                       const char *t;
-                       for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
-                           t--;
-                       if ((svlast-1 - t) % 2) {
-                           if (!keep_bracketed_quoted) {
-                               *(svlast-1) = term;
-                               *svlast = '\0';
-                               SvCUR_set(sv, SvCUR(sv) - 1);
-                           }
-                           continue;
-                       }
-                   }
-                   if (PL_multi_open == PL_multi_close) {
-                       cont = FALSE;
-                   }
-                   else {
-                       const char *t;
-                       char *w;
-                       for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
-                           /* At here, all closes are "was quoted" one,
-                              so we don't check PL_multi_close. */
-                           if (*t == '\\') {
-                               if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
-                                   t++;
-                               else
-                                   *w++ = *t++;
-                           }
-                           else if (*t == PL_multi_open)
-                               brackets++;
-
-                           *w = *t;
-                       }
-                       if (w < t) {
-                           *w++ = term;
-                           *w = '\0';
-                           SvCUR_set(sv, w - SvPVX_const(sv));
-                       }
-                       last_off = w - SvPVX(sv);
-                       if (--brackets <= 0)
-                           cont = FALSE;
-                   }
-               }
-           }
-           if (!keep_delims) {
-               SvCUR_set(sv, SvCUR(sv) - 1);
-               *SvEND(sv) = '\0';
-           }
-           break;
-       }
-
         /* extend sv if need be */
         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
         /* set 'to' to the next character in the sv's string */
@@ -9864,8 +10010,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                     COPLINE_INC_WITH_HERELINES;
                 /* backslashes can escape the open or closing characters */
                 if (*s == '\\' && s+1 < PL_bufend) {
-                   if (!keep_bracketed_quoted &&
-                       ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+                   if (!keep_bracketed_quoted
+                       && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
                      {
                         s++;
                      }
@@ -9895,8 +10041,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
  
  #ifndef PERL_STRICT_CR
         if (to - SvPVX_const(sv) >= 2) {
-           if ((to[-2] == '\r' && to[-1] == '\n') ||
-               (to[-2] == '\n' && to[-1] == '\r'))
+           if (   (to[-2] == '\r' && to[-1] == '\n')
+                || (to[-2] == '\n' && to[-1] == '\r'))
             {
                 to[-2] = '\n';
                 to--;
@@ -9909,7 +10055,6 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
             to[-1] = '\n';
  #endif
         
-     read_more_line:
         /* if we're out of file, or a read fails, bail and reset the current
            line marker so we can report where the unterminated string began
         */
@@ -9925,13 +10070,11 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
  
      /* at this point, we have successfully read the delimited string */
  
-    if (!IN_ENCODING || UTF || re_reparse) {
-
-       if (keep_delims)
+    if (keep_delims)
             sv_catpvn(sv, s, termlen);
-       s += termlen;
-    }
-    if (has_utf8 || (IN_ENCODING && !re_reparse))
+    s += termlen;
+
+    if (has_utf8)
         SvUTF8_on(sv);
  
      PL_multi_end = CopLINE(PL_curcop);
@@ -10001,6 +10144,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
       * multiple fp operations. */
      bool hexfp = FALSE;
      int total_bits = 0;
+    int significant_bits = 0;
  #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
  #  define HEXFP_UQUAD
      Uquad_t hexfp_uquad = 0;
@@ -10011,6 +10155,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  #endif
      NV hexfp_mult = 1.0;
      UV high_non_zero = 0; /* highest digit */
+    int non_zero_integer_digits = 0;
  
      PERL_ARGS_ASSERT_SCAN_NUM;
  
@@ -10163,6 +10308,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                      if (high_non_zero == 0 && b > 0)
                          high_non_zero = b;
  
+                    if (high_non_zero)
+                        non_zero_integer_digits++;
+
                      /* this could be hexfp, but peek ahead
                       * to avoid matching ".." */
                      if (UNLIKELY(HEXFP_PEEK(s))) {
@@ -10189,43 +10337,103 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                   * detection will shortly be more thorough with the
                   * underbar checks. */
                  const char* h = s;
+                significant_bits = non_zero_integer_digits * shift;
  #ifdef HEXFP_UQUAD
                  hexfp_uquad = u;
  #else /* HEXFP_NV */
                  hexfp_nv = u;
  #endif
+                /* Ignore the leading zero bits of
+                 * the high (first) non-zero digit. */
+                if (high_non_zero) {
+                    if (high_non_zero < 0x8)
+                        significant_bits--;
+                    if (high_non_zero < 0x4)
+                        significant_bits--;
+                    if (high_non_zero < 0x2)
+                        significant_bits--;
+                }
+
                  if (*h == '.') {
  #ifdef HEXFP_NV
-                    NV mult = 1 / 16.0;
+                    NV nv_mult = 1.0;
  #endif
-                    h++;
-                    while (isXDIGIT(*h) || *h == '_') {
+                    bool accumulate = TRUE;
+                    for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
                          if (isXDIGIT(*h)) {
                              U8 b = XDIGIT_VALUE(*h);
-                            total_bits += shift;
+                            significant_bits += shift;
  #ifdef HEXFP_UQUAD
-                            hexfp_uquad <<= shift;
-                            hexfp_uquad |= b;
-                            hexfp_frac_bits += shift;
+                            if (accumulate) {
+                                if (significant_bits < NV_MANT_DIG) {
+                                    /* We are in the long "run" of xdigits,
+                                     * accumulate the full four bits. */
+                                    hexfp_uquad <<= shift;
+                                    hexfp_uquad |= b;
+                                    hexfp_frac_bits += shift;
+                                } else {
+                                    /* We are at a hexdigit either at,
+                                     * or straddling, the edge of mantissa.
+                                     * We will try grabbing as many as
+                                     * possible bits. */
+                                    int tail =
+                                      significant_bits - NV_MANT_DIG;
+                                    if (tail <= 0)
+                                       tail += shift;
+                                    hexfp_uquad <<= tail;
+                                    hexfp_uquad |= b >> (shift - tail);
+                                    hexfp_frac_bits += tail;
+
+                                    /* Ignore the trailing zero bits
+                                     * of the last non-zero xdigit.
+                                     *
+                                     * The assumption here is that if
+                                     * one has input of e.g. the xdigit
+                                     * eight (0x8), there is only one
+                                     * bit being input, not the full
+                                     * four bits.  Conversely, if one
+                                     * specifies a zero xdigit, the
+                                     * assumption is that one really
+                                     * wants all those bits to be zero. */
+                                    if (b) {
+                                        if ((b & 0x1) == 0x0) {
+                                            significant_bits--;
+                                            if ((b & 0x2) == 0x0) {
+                                                significant_bits--;
+                                                if ((b & 0x4) == 0x0) {
+                                                    significant_bits--;
+                                                }
+                                            }
+                                        }
+                                    }
+
+                                    accumulate = FALSE;
+                                }
+                            } else {
+                                /* Keep skipping the xdigits, and
+                                 * accumulating the significant bits,
+                                 * but do not shift the uquad
+                                 * (which would catastrophically drop
+                                 * high-order bits) or accumulate the
+                                 * xdigits anymore. */
+                            }
  #else /* HEXFP_NV */
-                            hexfp_nv += b * mult;
-                            mult /= 16.0;
+                            if (accumulate) {
+                                nv_mult /= 16.0;
+                                if (nv_mult > 0.0)
+                                    hexfp_nv += b * nv_mult;
+                                else
+                                    accumulate = FALSE;
+                            }
  #endif
                          }
-                        h++;
+                        if (significant_bits >= NV_MANT_DIG)
+                            accumulate = FALSE;
                      }
                  }
  
-                if (total_bits >= 4) {
-                    if (high_non_zero < 0x8)
-                        total_bits--;
-                    if (high_non_zero < 0x4)
-                        total_bits--;
-                    if (high_non_zero < 0x2)
-                        total_bits--;
-                }
-
-                if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+                if ((total_bits > 0 || significant_bits > 0) &&
+                    isALPHA_FOLD_EQ(*h, 'p')) {
                      bool negexp = FALSE;
                      h++;
                      if (*h == '+')
@@ -10241,23 +10449,21 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                  hexfp_exp *= 10;
                                  hexfp_exp += *h - '0';
  #ifdef NV_MIN_EXP
-                                if (negexp &&
-                                    -hexfp_exp < NV_MIN_EXP - 1) {
+                                if (negexp
+                                    && -hexfp_exp < NV_MIN_EXP - 1) {
                                      Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                                     "Hexadecimal float: exponent underflow");
-#endif
                                      break;
                                  }
-                                else {
+#endif
  #ifdef NV_MAX_EXP
-                                    if (!negexp &&
-                                        hexfp_exp > NV_MAX_EXP - 1) {
-                                        Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                if (!negexp
+                                    && hexfp_exp > NV_MAX_EXP - 1) {
+                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                                     "Hexadecimal float: exponent overflow");
-                                        break;
-                                    }
-#endif
+                                    break;
                                  }
+#endif
                              }
                              h++;
                          }
@@ -10315,8 +10521,10 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
          }
  
         /* read next group of digits and _ and copy into d */
-       while (isDIGIT(*s) || *s == '_' ||
-               UNLIKELY(hexfp && isXDIGIT(*s))) {
+       while (isDIGIT(*s)
+               || *s == '_'
+               || UNLIKELY(hexfp && isXDIGIT(*s)))
+        {
             /* skip underscores, checking for misplaced ones
                if -w is on
             */
@@ -10356,9 +10564,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
             /* copy, ignoring underbars, until we run out of digits.
             */
-           for (; isDIGIT(*s) || *s == '_' ||
-                     UNLIKELY(hexfp && isXDIGIT(*s));
-                 s++) {
+           for (; isDIGIT(*s)
+                   || *s == '_'
+                   || UNLIKELY(hexfp && isXDIGIT(*s));
+                 s++)
+            {
                 /* fixed length buffer check */
                 if (d >= e)
                     Perl_croak(aTHX_ "%s", number_too_long);
@@ -10429,8 +10639,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                     *d++ = *s++;
                 }
                 else {
-                  if (((lastub && s == lastub + 1) ||
-                       (!isDIGIT(s[1]) && s[1] != '_')))
+                  if (((lastub && s == lastub + 1)
+                        || (!isDIGIT(s[1]) && s[1] != '_')))
                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
                                       "Misplaced _ in number");
                    lastub = s++;
@@ -10462,12 +10672,12 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                floatit = TRUE;
          }
         if (floatit) {
-            STORE_NUMERIC_LOCAL_SET_STANDARD();
+            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
             /* terminate the string */
             *d = '\0';
              if (UNLIKELY(hexfp)) {
  #  ifdef NV_MANT_DIG
-                if (total_bits > NV_MANT_DIG)
+                if (significant_bits > NV_MANT_DIG)
                      Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                     "Hexadecimal float: mantissa overflow");
  #  endif
@@ -10479,7 +10689,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
              } else {
                  nv = Atof(PL_tokenbuf);
              }
-            RESTORE_NUMERIC_LOCAL();
+            RESTORE_LC_NUMERIC_UNDERLYING();
              sv = newSVnv(nv);
         }
  
@@ -10601,8 +10811,6 @@ S_scan_formline(pTHX_ char *s)
         if (!IN_BYTES) {
             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
                 SvUTF8_on(stuff);
-           else if (IN_ENCODING)
-               sv_recode_to_utf8(stuff, _get_encoding());
         }
         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
         force_next(THING);
@@ -10675,9 +10883,12 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
  
      if (!yychar || (yychar == ';' && !PL_rsfp))
         sv_catpvs(where_sv, "at EOF");
-    else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
-      PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
-      PL_oldbufptr != PL_bufptr) {
+    else if (   PL_oldoldbufptr
+             && PL_bufptr > PL_oldoldbufptr
+             && PL_bufptr - PL_oldoldbufptr < 200
+             && PL_oldoldbufptr != PL_oldbufptr
+             && PL_oldbufptr != PL_bufptr)
+    {
         /*
                 Only for NetWare:
                 The code below is removed for NetWare because it abends/crashes on NetWare
@@ -10692,8 +10903,10 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
         context = PL_oldoldbufptr;
         contlen = PL_bufptr - PL_oldoldbufptr;
      }
-    else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
-      PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
+    else if (  PL_oldbufptr
+            && PL_bufptr > PL_oldbufptr
+            && PL_bufptr - PL_oldbufptr < 200
+            && PL_oldbufptr != PL_bufptr) {
         /*
                 Only for NetWare:
                 The code below is removed for NetWare because it abends/crashes on NetWare
@@ -10711,8 +10924,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      else if (yychar > 255)
         sv_catpvs(where_sv, "next token ???");
      else if (yychar == YYEMPTY) {
-       if (PL_lex_state == LEX_NORMAL ||
-          (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+       if (PL_lex_state == LEX_NORMAL)
             sv_catpvs(where_sv, "at end of line");
         else if (PL_lex_inpat)
             sv_catpvs(where_sv, "within pattern");
@@ -11073,10 +11285,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
                                          "Integer overflow in decimal number");
                 }
             }
-#ifdef EBCDIC
-           if (rev > 0x7FFFFFFF)
-                Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
-#endif
+
             /* Append native character for the rev point */
             tmpend = uvchr_to_utf8(tmpbuf, rev);
             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
@@ -11163,7 +11372,7 @@ Parse a Perl arithmetic expression.  This may contain operators of precedence
  down to the bit shift operators.  The expression must be followed (and thus
  terminated) either by a comparison or lower-precedence operator or by
  something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
  otherwise it is mandatory.  It is up to the caller to ensure that the
  dynamic parser state (L</PL_parser> et al) is correctly set to reflect
  the source of the code to be parsed and the lexical context for the
@@ -11195,7 +11404,7 @@ Parse a Perl term expression.  This may contain operators of precedence
  down to the assignment operators.  The expression must be followed (and thus
  terminated) either by a comma or lower-precedence operator or by
  something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
  otherwise it is mandatory.  It is up to the caller to ensure that the
  dynamic parser state (L</PL_parser> et al) is correctly set to reflect
  the source of the code to be parsed and the lexical context for the
@@ -11227,7 +11436,7 @@ Parse a Perl list expression.  This may contain operators of precedence
  down to the comma operator.  The expression must be followed (and thus
  terminated) either by a low-precedence logic operator such as C<or> or by
  something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
  otherwise it is mandatory.  It is up to the caller to ensure that the
  dynamic parser state (L</PL_parser> et al) is correctly set to reflect
  the source of the code to be parsed and the lexical context for the
@@ -11260,8 +11469,8 @@ expression grammar, including the lowest-precedence operators such
  as C<or>.  The expression must be followed (and thus terminated) by a
  token that an expression would normally be terminated by: end-of-file,
  closing bracketing punctuation, semicolon, or one of the keywords that
-signals a postfix expression-statement modifier.  If I<flags> includes
-C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
+signals a postfix expression-statement modifier.  If C<flags> has the
+C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
  mandatory.  It is up to the caller to ensure that the dynamic parser
  state (L</PL_parser> et al) is correctly set to reflect the source of
  the code to be parsed and the lexical context for the expression.
@@ -11307,7 +11516,7 @@ the parser state, normally resulting in a single exception at the top
  level of parsing which covers all the compilation errors that occurred.
  Some compilation errors, however, will throw an exception immediately.
  
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
  be zero.
  
  =cut
@@ -11345,7 +11554,7 @@ the parser state, normally resulting in a single exception at the top
  level of parsing which covers all the compilation errors that occurred.
  Some compilation errors, however, will throw an exception immediately.
  
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
  be zero.
  
  =cut
@@ -11365,7 +11574,7 @@ Perl_parse_barestmt(pTHX_ U32 flags)
  Parse a single label, possibly optional, of the type that may prefix a
  Perl statement.  It is up to the caller to ensure that the dynamic parser
  state (L</PL_parser> et al) is correctly set to reflect the source of
-the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
+the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
  label is optional, otherwise it is mandatory.
  
  The name of the label is returned in the form of a fresh scalar.  If an
@@ -11384,7 +11593,7 @@ Perl_parse_label(pTHX_ U32 flags)
  {
      if (flags & ~PARSE_OPTIONAL)
         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
-    if (PL_lex_state == LEX_KNOWNEXT) {
+    if (PL_nexttoke) {
         PL_parser->yychar = yylex();
         if (PL_parser->yychar == LABEL) {
             char * const lpv = pl_yylval.pval;
@@ -11403,7 +11612,7 @@ Perl_parse_label(pTHX_ U32 flags)
          if (!isIDFIRST_lazy_if(s, UTF))
             goto no_label;
         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
-       if (word_takes_any_delimeter(s, wlen))
+       if (word_takes_any_delimiter(s, wlen))
             goto no_label;
         bufptr_pos = s - SvPVX(PL_linestr);
         PL_bufptr = t;
@@ -11449,7 +11658,7 @@ the parser state, normally resulting in a single exception at the top
  level of parsing which covers all the compilation errors that occurred.
  Some compilation errors, however, will throw an exception immediately.
  
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
  be zero.
  
  =cut
@@ -11487,7 +11696,7 @@ normally resulting in a single exception at the top level of parsing
  which covers all the compilation errors that occurred.  Some compilation
  errors, however, will throw an exception immediately.
  
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
  be zero.
  
  =cut
@@ -11507,14 +11716,6 @@ Perl_parse_stmtseq(pTHX_ U32 flags)
      return stmtseqop;
  }
  
-#define lex_token_boundary() S_lex_token_boundary(aTHX)
-static void
-S_lex_token_boundary(pTHX)
-{
-    PL_oldoldbufptr = PL_oldbufptr;
-    PL_oldbufptr = PL_bufptr;
-}
-
  #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
  static OP *
  S_parse_opt_lexvar(pTHX)
@@ -11535,7 +11736,8 @@ S_parse_opt_lexvar(pTHX)
      s = PL_bufptr;
      d = PL_tokenbuf + 1;
      PL_tokenbuf[0] = (char)sigil;
-    parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
+    parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0,
+               cBOOL(UTF), FALSE);
      PL_bufptr = s;
      if (d == PL_tokenbuf+1)
         return NULL;
@@ -11579,8 +11781,9 @@ Perl_parse_subsignature(pTHX)
                                     "lacks default expression"));
                     } else {
                         OP *defexpr = parse_termexpr(0);
-                       if (defexpr->op_type == OP_UNDEF &&
-                               !(defexpr->op_flags & OPf_KIDS)) {
+                       if (defexpr->op_type == OP_UNDEF
+                            && !(defexpr->op_flags & OPf_KIDS))
+                        {
                             op_free(defexpr);
                         } else {
                             OP *ifop = 
@@ -11727,11 +11930,5 @@ Perl_parse_subsignature(pTHX)
  }
  
  /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
   * ex: set ts=8 sts=4 sw=4 et:
   */