Added note about directories not being created with File::Copy::copy

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index aedccc5..5a65548 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -303,9 +303,17 @@ static const char* const lex_state_names[] = {
  #define COPLINE_INC_WITH_HERELINES                 \
      STMT_START {                                    \
         CopLINE_inc(PL_curcop);                       \
-       if (PL_parser->lex_shared->herelines)          \
-           CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
-           PL_parser->lex_shared->herelines = 0;                    \
+       if (PL_parser->herelines)                      \
+           CopLINE(PL_curcop) += PL_parser->herelines, \
+           PL_parser->herelines = 0;                    \
+    } STMT_END
+/* Called after scan_str to update CopLINE(PL_curcop), but only when there
+ * is no sublex_push to follow. */
+#define COPLINE_SET_FROM_MULTI_END           \
+    STMT_START {                              \
+       CopLINE_set(PL_curcop, PL_multi_end);   \
+       if (PL_multi_end != PL_multi_start)      \
+           PL_parser->herelines = 0;             \
      } STMT_END
  
  
@@ -553,16 +561,14 @@ S_no_op(pTHX_ const char *const what, char *s)
                 NOOP;
             if (t < PL_bufptr && isSPACE(*t))
                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                       "\t(Do you need to predeclare %"SVf"?)\n",
-                   SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
-                                   SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                       "\t(Do you need to predeclare %"UTF8f"?)\n",
+                     UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
         }
         else {
             assert(s >= oldbp);
             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                   "\t(Missing operator before %"SVf"?)\n",
-                    SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
-                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                   "\t(Missing operator before %"UTF8f"?)\n",
+                    UTF8fARG(UTF, s - oldbp, oldbp));
         }
      }
      PL_bufptr = oldbp;
@@ -588,7 +594,7 @@ S_missingterm(pTHX_ char *s)
         if (nl)
             *nl = '\0';
      }
-    else if (isCNTRL(PL_multi_close)) {
+    else if ((U8) PL_multi_close < 32) {
         *tmpbuf = '^';
         tmpbuf[1] = (char)toCTRL(PL_multi_close);
         tmpbuf[2] = '\0';
@@ -731,7 +737,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      parser->nexttoke = 0;
  #endif
      parser->error_count = oparser ? oparser->error_count : 0;
-    parser->copline = NOLINE;
+    parser->copline = parser->preambling = NOLINE;
      parser->lex_state = LEX_NORMAL;
      parser->expect = XSTATE;
      parser->rsfp = rsfp;
@@ -755,9 +761,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
         parser->linestr = flags & LEX_START_COPIED
                             ? SvREFCNT_inc_simple_NN(line)
                             : newSVpvn_flags(s, len, SvUTF8(line));
-       sv_catpvs(parser->linestr, "\n;");
+       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
      } else {
-       parser->linestr = newSVpvs("\n;");
+       parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
      }
      parser->oldoldbufptr =
         parser->oldbufptr =
@@ -1055,7 +1061,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                     ENTER;
                     SAVESPTR(PL_warnhook);
                     PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
                     LEAVE;
                 }
             }
@@ -1075,7 +1081,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 }
                 else {
                      assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
                     p += 2;
                  }
             }
@@ -1378,6 +1384,10 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
         PL_parser->last_uni = buf + last_uni_pos;
      if (PL_parser->last_lop)
         PL_parser->last_lop = buf + last_lop_pos;
+    if (PL_parser->preambling != NOLINE) {
+       CopLINE_set(PL_curcop, PL_parser->preambling + 1);
+       PL_parser->preambling = NOLINE;
+    }
      if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
             PL_curstash != PL_debstash) {
         /* debugger active and we're not compiling the debugger code,
@@ -1439,13 +1449,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
                 bufend = PL_parser->bufend;
             }
         }
-       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
         if (retlen == (STRLEN)-1) {
             /* malformed UTF-8 */
             ENTER;
             SAVESPTR(PL_warnhook);
             PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
             LEAVE;
         }
         return unichar;
@@ -1514,14 +1524,16 @@ chunk will not be discarded.
  =cut
  */
  
+#define LEX_NO_INCLINE    0x40000000
  #define LEX_NO_NEXT_CHUNK 0x80000000
  
  void
  Perl_lex_read_space(pTHX_ U32 flags)
  {
      char *s, *bufend;
+    const bool can_incline = !(flags & LEX_NO_INCLINE);
      bool need_incline = 0;
-    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
+    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
  #ifdef PERL_MAD
      if (PL_skipwhite) {
@@ -1541,15 +1553,18 @@ Perl_lex_read_space(pTHX_ U32 flags)
             } while (!(c == '\n' || (c == 0 && s == bufend)));
         } else if (c == '\n') {
             s++;
-           PL_parser->linestart = s;
-           if (s == bufend)
-               need_incline = 1;
-           else
-               incline(s);
+           if (can_incline) {
+               PL_parser->linestart = s;
+               if (s == bufend)
+                   need_incline = 1;
+               else
+                   incline(s);
+           }
         } else if (isSPACE(c)) {
             s++;
         } else if (c == 0 && s == bufend) {
             bool got_more;
+           line_t l;
  #ifdef PERL_MAD
             if (PL_madskills)
                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
@@ -1557,14 +1572,15 @@ Perl_lex_read_space(pTHX_ U32 flags)
             if (flags & LEX_NO_NEXT_CHUNK)
                 break;
             PL_parser->bufptr = s;
-           COPLINE_INC_WITH_HERELINES;
+           l = CopLINE(PL_curcop);
+           CopLINE(PL_curcop) += PL_parser->herelines + 1;
             got_more = lex_next_chunk(flags);
-           CopLINE_dec(PL_curcop);
+           CopLINE_set(PL_curcop, l);
             s = PL_parser->bufptr;
             bufend = PL_parser->bufend;
             if (!got_more)
                 break;
-           if (need_incline && PL_parser->rsfp) {
+           if (can_incline && need_incline && PL_parser->rsfp) {
                 incline(s);
                 need_incline = 0;
             }
@@ -1580,6 +1596,107 @@ Perl_lex_read_space(pTHX_ U32 flags)
  }
  
  /*
+
+=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+
+This function performs syntax checking on a prototype, C<proto>.
+If C<warn> is true, any illegal characters or mismatched brackets
+will trigger illegalproto warnings, declaring that they were
+detected in the prototype for C<name>.
+
+The return value is C<true> if this is a valid prototype, and
+C<false> if it is not, regardless of whether C<warn> was C<true> or
+C<false>.
+
+Note that C<NULL> is a valid C<proto> and will always return C<true>.
+
+=cut
+
+ */
+
+bool
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+{
+    STRLEN len, origlen;
+    char *p = proto ? SvPV(proto, len) : NULL;
+    bool bad_proto = FALSE;
+    bool in_brackets = FALSE;
+    bool after_slash = FALSE;
+    char greedy_proto = ' ';
+    bool proto_after_greedy_proto = FALSE;
+    bool must_be_last = FALSE;
+    bool underscore = FALSE;
+    bool bad_proto_after_underscore = FALSE;
+
+    PERL_ARGS_ASSERT_VALIDATE_PROTO;
+
+    if (!proto)
+       return TRUE;
+
+    origlen = len;
+    for (; len--; p++) {
+       if (!isSPACE(*p)) {
+           if (must_be_last)
+               proto_after_greedy_proto = TRUE;
+           if (underscore) {
+               if (!strchr(";@%", *p))
+                   bad_proto_after_underscore = TRUE;
+               underscore = FALSE;
+           }
+           if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+               bad_proto = TRUE;
+           }
+           else {
+               if (*p == '[')
+                   in_brackets = TRUE;
+               else if (*p == ']')
+                   in_brackets = FALSE;
+               else if ((*p == '@' || *p == '%') &&
+                   !after_slash &&
+                   !in_brackets ) {
+                   must_be_last = TRUE;
+                   greedy_proto = *p;
+               }
+               else if (*p == '_')
+                   underscore = TRUE;
+           }
+           if (*p == '\\')
+               after_slash = TRUE;
+           else
+               after_slash = FALSE;
+       }
+    }
+
+    if (warn) {
+       SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
+       p -= origlen;
+       p = SvUTF8(proto)
+           ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+                            origlen, UNI_DISPLAY_ISPRINT)
+           : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
+
+       if (proto_after_greedy_proto)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Prototype after '%c' for %"SVf" : %s",
+                       greedy_proto, SVfARG(name), p);
+       if (in_brackets)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Missing ']' in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+       if (bad_proto)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Illegal character in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+       if (bad_proto_after_underscore)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Illegal character after '_' in prototype for %"SVf" : %s",
+                       SVfARG(name), p);
+    }
+
+    return (! (proto_after_greedy_proto || bad_proto) );
+}
+
+/*
   * S_incline
   * This subroutine has nothing to do with tilting, whether at windmills
   * or pinball tables.  Its name is short for "increment line".  It
@@ -1650,37 +1767,16 @@ S_incline(pTHX_ const char *s)
  
      if (t - s > 0) {
         const STRLEN len = t - s;
-       SV *const temp_sv = CopFILESV(PL_curcop);
-       const char *cf;
-       STRLEN tmplen;
-
-       if (temp_sv) {
-           cf = SvPVX(temp_sv);
-           tmplen = SvCUR(temp_sv);
-       } else {
-           cf = NULL;
-           tmplen = 0;
-       }
  
         if (!PL_rsfp && !PL_parser->filtered) {
             /* must copy *{"::_<(eval N)[oldfilename:L]"}
              * to *{"::_<newfilename"} */
             /* However, the long form of evals is only turned on by the
                debugger - usually they're "(eval %lu)" */
-           char smallbuf[128];
-           char *tmpbuf;
-           GV **gvp;
-           STRLEN tmplen2 = len;
-           if (tmplen + 2 <= sizeof smallbuf)
-               tmpbuf = smallbuf;
-           else
-               Newx(tmpbuf, tmplen + 2, char);
-           tmpbuf[0] = '_';
-           tmpbuf[1] = '<';
-           memcpy(tmpbuf + 2, cf, tmplen);
-           tmplen += 2;
-           gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
-           if (gvp) {
+           GV * const cfgv = CopFILEGV(PL_curcop);
+           if (cfgv) {
+               char smallbuf[128];
+               STRLEN tmplen2 = len;
                 char *tmpbuf2;
                 GV *gv2;
  
@@ -1689,12 +1785,8 @@ S_incline(pTHX_ const char *s)
                 else
                     Newx(tmpbuf2, tmplen2 + 2, char);
  
-               if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
-                   /* Either they malloc'd it, or we malloc'd it,
-                      so no prefix is present in ours.  */
-                   tmpbuf2[0] = '_';
-                   tmpbuf2[1] = '<';
-               }
+               tmpbuf2[0] = '_';
+               tmpbuf2[1] = '<';
  
                 memcpy(tmpbuf2 + 2, s, tmplen2);
                 tmplen2 += 2;
@@ -1708,11 +1800,11 @@ S_incline(pTHX_ const char *s)
                        alias the saved lines that are in the array.
                        Otherwise alias the whole array. */
                     if (CopLINE(PL_curcop) == line_num) {
-                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
-                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
                     }
-                   else if (GvAV(*gvp)) {
-                       AV * const av = GvAV(*gvp);
+                   else if (GvAV(cfgv)) {
+                       AV * const av = GvAV(cfgv);
                         const I32 start = CopLINE(PL_curcop)+1;
                         I32 items = AvFILLp(av) - start;
                         if (items > 0) {
@@ -1727,7 +1819,6 @@ S_incline(pTHX_ const char *s)
  
                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
             }
-           if (tmpbuf != smallbuf) Safefree(tmpbuf);
         }
         CopFILE_free(PL_curcop);
         CopFILE_setn(PL_curcop, s, len);
@@ -1735,6 +1826,8 @@ S_incline(pTHX_ const char *s)
      CopLINE_set(PL_curcop, line_num);
  }
  
+#define skipspace(s) skipspace_flags(s, 0)
+
  #ifdef PERL_MAD
  /* skip space before PL_thistoken */
  
@@ -1790,13 +1883,11 @@ STATIC char *
  S_skipspace2(pTHX_ char *s, SV **svp)
  {
      char *start;
-    const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
      const I32 startoff = s - SvPVX(PL_linestr);
  
      PERL_ARGS_ASSERT_SKIPSPACE2;
  
      s = skipspace(s);
-    PL_bufptr = SvPVX(PL_linestr) + bufptroff;
      if (!PL_madskills || !svp)
         return s;
      start = SvPVX(PL_linestr) + startoff;
@@ -1822,14 +1913,23 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
  {
      AV *av = CopFILEAVx(PL_curcop);
      if (av) {
-       SV * const sv = newSV_type(SVt_PVMG);
+       SV * sv;
+       if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
+       else {
+           sv = *av_fetch(av, 0, 1);
+           SvUPGRADE(sv, SVt_PVMG);
+       }
+       if (!SvPOK(sv)) sv_setpvs(sv,"");
         if (orig_sv)
-           sv_setsv(sv, orig_sv);
+           sv_catsv(sv, orig_sv);
         else
-           sv_setpvn(sv, buf, len);
-       (void)SvIOK_on(sv);
-       SvIV_set(sv, 0);
-       av_store(av, (I32)CopLINE(PL_curcop), sv);
+           sv_catpvn(sv, buf, len);
+       if (!SvIOK(sv)) {
+           (void)SvIOK_on(sv);
+           SvIV_set(sv, 0);
+       }
+       if (PL_parser->preambling == NOLINE)
+           av_store(av, CopLINE(PL_curcop), sv);
      }
  }
  
@@ -1840,12 +1940,12 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
   */
  
  STATIC char *
-S_skipspace(pTHX_ char *s)
+S_skipspace_flags(pTHX_ char *s, U32 flags)
  {
  #ifdef PERL_MAD
      char *start = s;
  #endif /* PERL_MAD */
-    PERL_ARGS_ASSERT_SKIPSPACE;
+    PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
  #ifdef PERL_MAD
      if (PL_skipwhite) {
         sv_free(PL_skipwhite);
@@ -1858,7 +1958,7 @@ S_skipspace(pTHX_ char *s)
      } else {
         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
         PL_bufptr = s;
-       lex_read_space(LEX_KEEP_PREVIOUS |
+       lex_read_space(flags | LEX_KEEP_PREVIOUS |
                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
                     LEX_NO_NEXT_CHUNK : 0));
         s = PL_bufptr;
@@ -2471,7 +2571,7 @@ S_sublex_start(pTHX)
         return THING;
      }
      else if (op_type == OP_BACKTICK && PL_lex_op) {
-       /* readpipe() vas overriden */
+       /* readpipe() was overridden */
         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
         pl_yylval.opval = PL_lex_op;
         PL_lex_op = NULL;
@@ -2507,6 +2607,7 @@ S_sublex_push(pTHX)
  {
      dVAR;
      LEXSHARED *shared;
+    const bool is_heredoc = PL_multi_close == '<';
      ENTER;
  
      PL_lex_state = PL_sublex_info.super_state;
@@ -2521,7 +2622,14 @@ S_sublex_push(pTHX)
      SAVESPTR(PL_lex_repl);
      SAVEVPTR(PL_lex_inpat);
      SAVEI16(PL_lex_inwhat);
-    SAVECOPLINE(PL_curcop);
+    if (is_heredoc)
+    {
+       SAVECOPLINE(PL_curcop);
+       SAVEI32(PL_multi_end);
+       SAVEI32(PL_parser->herelines);
+       PL_parser->herelines = 0;
+    }
+    SAVEI8(PL_multi_close);
      SAVEPPTR(PL_bufptr);
      SAVEPPTR(PL_bufend);
      SAVEPPTR(PL_oldbufptr);
@@ -2534,6 +2642,7 @@ S_sublex_push(pTHX)
      SAVEGENERICPV(PL_lex_casestack);
      SAVEGENERICPV(PL_parser->lex_shared);
      SAVEBOOL(PL_parser->lex_re_reparsing);
+    SAVEI32(PL_copline);
  
      /* The here-doc parser needs to be able to peek into outer lexing
         scopes to find the body of the here-doc.  So we put PL_linestr and
@@ -2564,7 +2673,9 @@ S_sublex_push(pTHX)
      *PL_lex_casestack = '\0';
      PL_lex_starts = 0;
      PL_lex_state = LEX_INTERPCONCAT;
-    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+    if (is_heredoc)
+       CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+    PL_copline = NOLINE;
      
      Newxz(shared, 1, LEXSHARED);
      shared->ls_prev = PL_parser->lex_shared;
@@ -2633,9 +2744,16 @@ S_sublex_done(pTHX)
             PL_lex_state = LEX_INTERPCONCAT;
             PL_lex_repl = NULL;
         }
+       if (SvTYPE(PL_linestr) >= SVt_PVNV) {
+           CopLINE(PL_curcop) +=
+               ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
+                + PL_parser->herelines;
+           PL_parser->herelines = 0;
+       }
         return ',';
      }
      else {
+       const line_t l = CopLINE(PL_curcop);
  #ifdef PERL_MAD
         if (PL_madskills) {
             if (PL_thiswhite) {
@@ -2651,6 +2769,8 @@ S_sublex_done(pTHX)
         }
  #endif
         LEAVE;
+       if (PL_multi_close == '<')
+           PL_parser->herelines += l - PL_multi_end;
         PL_bufend = SvPVX(PL_linestr);
         PL_bufend += SvCUR(PL_linestr);
         PL_expect = XOPERATOR;
@@ -2684,7 +2804,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      {
          /* If warnings are on, this will print a more detailed analysis of what
           * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                         e - ((char *) first_bad_char_loc),
                         NULL, 0);
  
@@ -2759,7 +2879,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              }
              s++;
          } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                  goto bad_charname;
              }
              s += 2;
@@ -2792,8 +2912,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s++;
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                  {
                      goto bad_charname;
                  }
@@ -2827,7 +2946,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
              /* If warnings are on, this will print a more detailed analysis of
               * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                             (char *) first_bad_char_loc - str,
                             NULL, 0);
  
@@ -3028,7 +3147,7 @@ S_scan_const(pTHX_ char *start)
                     char *e = d++;
                     while (e-- > c)
                         *(e + 1) = *e;
-                   *c = (char)UTF_TO_NATIVE(0xff);
+                   *c = (char) ILLEGAL_UTF8_BYTE;
                     /* mark the range as done, and continue */
                     dorange = FALSE;
                     didrange = TRUE;
@@ -3082,16 +3201,12 @@ S_scan_const(pTHX_ char *start)
  
  #ifdef EBCDIC
                 if (literal_endpoint == 2 &&
-                   ((isLOWER(min) && isLOWER(max)) ||
-                    (isUPPER(min) && isUPPER(max)))) {
-                   if (isLOWER(min)) {
-                       for (i = min; i <= max; i++)
-                           if (isLOWER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
-                   } else {
-                       for (i = min; i <= max; i++)
-                           if (isUPPER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
+                   ((isLOWER_A(min) && isLOWER_A(max)) ||
+                    (isUPPER_A(min) && isUPPER_A(max))))
+                {
+                    for (i = min; i <= max; i++) {
+                        if (isALPHA_A(i))
+                            *d++ = i;
                     }
                 }
                 else
@@ -3099,13 +3214,7 @@ S_scan_const(pTHX_ char *start)
                     for (i = min; i <= max; i++)
  #ifdef EBCDIC
                          if (has_utf8) {
-                            const U8 ch = (U8)NATIVE_TO_UTF(i);
-                            if (UNI_IS_INVARIANT(ch))
-                                *d++ = (U8)i;
-                            else {
-                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                            }
+                            append_utf8_from_native_byte(i, &d);
                          }
                          else
  #endif
@@ -3115,7 +3224,7 @@ S_scan_const(pTHX_ char *start)
                  if (uvmax) {
                      d = (char*)uvchr_to_utf8((U8*)d, 0x100);
                      if (uvmax > 0x101)
-                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
                      if (uvmax > 0x100)
                          d = (char*)uvchr_to_utf8((U8*)d, uvmax);
                  }
@@ -3140,7 +3249,7 @@ S_scan_const(pTHX_ char *start)
                     && !native_range
  #endif
                     ) {
-                   *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
+                   *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
                     s++;
                     continue;
                 }
@@ -3180,12 +3289,12 @@ S_scan_const(pTHX_ char *start)
          * char, which will be done separately.
          * Stop on (?{..}) and friends */
  
-       else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+       else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
                 while (s+1 < send && *s != ')')
-                   *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+                   *d++ = *s++;
             }
-           else if (!PL_lex_casemods && !in_charclass &&
+           else if (!PL_lex_casemods &&
                      (    s[2] == '{' /* This should match regcomp.c */
                       || (s[2] == '?' && s[3] == '{')))
             {
@@ -3194,10 +3303,10 @@ S_scan_const(pTHX_ char *start)
         }
  
         /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat &&
+       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
             while (s+1 < send && *s != '\n')
-               *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+               *d++ = *s++;
         }
  
         /* no further processing of single-quoted regex */
@@ -3272,7 +3381,7 @@ S_scan_const(pTHX_ char *start)
                         || s[1] != '{'
                         || regcurly(s + 1, FALSE)))
             {
-               *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+               *d++ = '\\';
                 goto default_action;
             }
  
@@ -3301,7 +3410,7 @@ S_scan_const(pTHX_ char *start)
                 {
                      I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                      STRLEN len = 3;
-                   uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+                   uv = grok_oct(s, &len, &flags, NULL);
                     s += len;
                      if (len < 3 && s < send && isDIGIT(*s)
                          && ckWARN(WARN_MISC))
@@ -3353,9 +3462,8 @@ S_scan_const(pTHX_ char *start)
                  * UTF-8 sequence they can end up as, except if they force us
                  * to recode the rest of the string into utf8 */
                 
-               /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native). */
-               if (!UNI_IS_INVARIANT(uv)) {
+               /* Here uv is the ordinal of the next character being added */
+               if (!UVCHR_IS_INVARIANT(uv)) {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
                          * far if it contains any chars variant in utf8 or
@@ -3373,7 +3481,7 @@ S_scan_const(pTHX_ char *start)
                      }
  
                      if (has_utf8) {
-                       d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                         if (PL_lex_inwhat == OP_TRANS &&
                             PL_sublex_info.sub_op) {
                             PL_sublex_info.sub_op->op_private |=
@@ -3406,16 +3514,6 @@ S_scan_const(pTHX_ char *start)
                  * now, while preserving the fact that it was a named character
                  * so that the regex compiler knows this */
  
-               /* This section of code doesn't generally use the
-                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-                * a close examination of this macro and determined it is a
-                * no-op except on utfebcdic variant characters.  Every
-                * character generated by this that would normally need to be
-                * enclosed by this macro is invariant, so the macro is not
-                * needed, and would complicate use of copy().  XXX There are
-                * other parts of this file where the macro is used
-                * inconsistently, but are saved by it being a no-op */
-
                 /* The structure of this section of code (besides checking for
                  * errors and upgrading to utf8) is:
                  *  Further disambiguate between the two meanings of \N, and if
@@ -3503,11 +3601,13 @@ S_scan_const(pTHX_ char *start)
                             has_utf8 = TRUE;
                         }
  
-                       /* Add the string to the output */
+                        /* Add the (Unicode) code point to the output. */
                         if (UNI_IS_INVARIANT(uv)) {
-                           *d++ = (char) uv;
+                           *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
-                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       else {
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                        }
                     }
                 }
                 else /* Here is \N{NAME} but not \N{U+...}. */
@@ -3567,19 +3667,16 @@ S_scan_const(pTHX_ char *start)
                                  char hex_string[2 * UTF8_MAXBYTES + 5];
  
                                  /* Get the first character of the result. */
-                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                U32 uv = utf8n_to_uvchr((U8 *) str,
                                                          len,
                                                          &char_length,
                                                          UTF8_ALLOW_ANYUV);
                                  /* Convert first code point to hex, including
-                                 * the boiler plate before it.  For all these,
-                                 * we convert to native format so that
-                                 * downstream code can continue to assume the
-                                 * input is native */
+                                 * the boiler plate before it. */
                                  output_length =
                                      my_snprintf(hex_string, sizeof(hex_string),
-                                            "\\N{U+%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                "\\N{U+%X",
+                                                (unsigned int) uv);
  
                                  /* Make sure there is enough space to hold it */
                                  d = off + SvGROW(sv, off
@@ -3594,15 +3691,15 @@ S_scan_const(pTHX_ char *start)
                                  * its ordinal in hex */
                                  while ((str += char_length) < str_end) {
                                      const STRLEN off = d - SvPVX_const(sv);
-                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                    U32 uv = utf8n_to_uvchr((U8 *) str,
                                                              str_end - str,
                                                              &char_length,
                                                              UTF8_ALLOW_ANYUV);
                                      output_length =
                                          my_snprintf(hex_string,
-                                            sizeof(hex_string),
-                                            ".%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                    sizeof(hex_string),
+                                                    ".%X",
+                                                    (unsigned int) uv);
  
                                      d = off + SvGROW(sv, off
                                                          + output_length
@@ -3667,25 +3764,25 @@ S_scan_const(pTHX_ char *start)
  
             /* printf-style backslashes, formfeeds, newlines, etc */
             case 'b':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+               *d++ = '\b';
                 break;
             case 'n':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+               *d++ = '\n';
                 break;
             case 'r':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+               *d++ = '\r';
                 break;
             case 'f':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+               *d++ = '\f';
                 break;
             case 't':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+               *d++ = '\t';
                 break;
             case 'e':
-               *d++ = ASCII_TO_NEED(has_utf8,'\033');
+               *d++ = ASCII_TO_NATIVE('\033');
                 break;
             case 'a':
-               *d++ = ASCII_TO_NEED(has_utf8,'\007');
+               *d++ = '\a';
                 break;
             } /* end switch */
  
@@ -3700,7 +3797,7 @@ S_scan_const(pTHX_ char *start)
      default_action:
         /* If we started with encoded form, or already know we want it,
            then encode the next character */
-       if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
+       if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
             STRLEN len  = 1;
  
  
@@ -3711,8 +3808,10 @@ S_scan_const(pTHX_ char *start)
              * routine that does the conversion checks for errors like
              * malformed utf8 */
  
-           const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
-           const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
+           const UV nextuv   = (this_utf8)
+                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
+                                : (UV) ((U8) *s);
+           const STRLEN need = UNISKIP(nextuv);
             if (!has_utf8) {
                 SvCUR_set(sv, d - SvPVX_const(sv));
                 SvPOK_on(sv);
@@ -3739,7 +3838,7 @@ S_scan_const(pTHX_ char *start)
  #endif
         }
         else {
-           *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+           *d++ = *s++;
         }
      } /* while loop to process each character */
  
@@ -3770,7 +3869,12 @@ S_scan_const(pTHX_ char *start)
      }
  
      /* return the substring (via pl_yylval) only if we parsed anything */
-    if (s > PL_bufptr) {
+    if (s > start) {
+       char *s2 = start;
+       for (; s2 < s; s2++) {
+           if (*s2 == '\n')
+               COPLINE_INC_WITH_HERELINES;
+       }
         SvREFCNT_inc_simple_void_NN(sv);
         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
              && ! PL_parser->lex_re_reparsing)
@@ -4189,14 +4293,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
      /* This API is bad. It should have been using unsigned int for maxlen.
         Not sure if we want to change the API, but if not we should sanity
         check the value here.  */
-    unsigned int correct_length
-       = maxlen < 0 ?
-#ifdef PERL_MICRO
-       0x7FFFFFFF
-#else
-       INT_MAX
-#endif
-       : maxlen;
+    unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
  
      PERL_ARGS_ASSERT_FILTER_READ;
  
@@ -4348,6 +4445,7 @@ S_readpipe_override(pTHX)
              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
      {
+       COPLINE_SET_FROM_MULTI_END;
         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
             op_append_elem(OP_LIST,
                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
@@ -4584,6 +4682,20 @@ S_word_takes_any_delimeter(char *p, STRLEN len)
             (p[0] == 'q' && strchr("qwxr", p[1]))));
  }
  
+static void
+S_check_scalar_slice(pTHX_ char *s)
+{
+    s++;
+    while (*s == ' ' || *s == '\t') s++;
+    if (*s == 'q' && s[1] == 'w'
+     && !isWORDCHAR_lazy_if(s+2,UTF))
+       return;
+    while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
+       s += UTF ? UTF8SKIP(s) : 1;
+    if (*s == '}' || *s == ']')
+       pl_yylval.ival = OPpSLICEWARNING;
+}
+
  /*
    yylex
  
@@ -4640,6 +4752,7 @@ Perl_yylex(pTHX)
      char *d;
      STRLEN len;
      bool bof = FALSE;
+    const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
      U8 formbrack = 0;
      U32 fake_eof = 0;
  
@@ -5039,9 +5152,12 @@ Perl_yylex(pTHX)
         return yylex();
      }
  
+    /* We really do *not* want PL_linestr ever becoming a COW. */
+    assert (!SvIsCOW(PL_linestr));
      s = PL_bufptr;
      PL_oldoldbufptr = PL_oldbufptr;
      PL_oldbufptr = s;
+    PL_parser->saw_infix_sigil = 0;
  
    retry:
  #ifdef PERL_MAD
@@ -5057,22 +5173,20 @@ Perl_yylex(pTHX)
             goto keylookup;
         {
          SV *dsv = newSVpvs_flags("", SVs_TEMP);
-        const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
+        const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
                                                      UTF8SKIP(s),
                                                      SVs_TEMP | SVf_UTF8),
-                                            10, UNI_DISPLAY_ISPRINT))
+                                            10, UNI_DISPLAY_ISPRINT)
                              : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
          len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
          if (len > UNRECOGNIZED_PRECEDE_COUNT) {
              d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
          } else {
              d = PL_linestart;
-        }      
-        *s = '\0';
-        sv_setpv(dsv, d);
-        if (UTF)
-            SvUTF8_on(dsv);
-        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
+        }
+        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
+                          UTF8fARG(UTF, (s - d), d),
+                         (int) len + 1);
      }
      case 4:
      case 26:
@@ -5121,6 +5235,7 @@ Perl_yylex(pTHX)
                     SETERRNO(0,SS_NORMAL);
                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
                 }
+               PL_parser->preambling = CopLINE(PL_curcop);
             } else
                 sv_setpvs(PL_linestr,"");
             if (PL_preambleav) {
@@ -5200,7 +5315,7 @@ Perl_yylex(pTHX)
              * check if it in fact is. */
             if (bof && PL_rsfp &&
                      (*s == 0 ||
-                     *(U8*)s == 0xEF ||
+                     *(U8*)s == BOM_UTF8_FIRST_BYTE ||
                       *(U8*)s >= 0xFE ||
                       s[1] == 0)) {
                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
@@ -5512,14 +5627,19 @@ Perl_yylex(pTHX)
                 s = SKIPSPACE0(s);
             }
             else {
-/*             if (PL_madskills && PL_lex_formbrack) { */
-                   d = s;
-                   while (d < PL_bufend && *d != '\n')
-                       d++;
-                   if (d < PL_bufend)
-                       d++;
-                   else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+                   if (PL_madskills) d = s;
+                   while (s < PL_bufend && *s != '\n')
+                       s++;
+                   if (s < PL_bufend)
+                   {
+                       s++;
+                       if (s < PL_bufend)
+                           incline(s);
+                   }
+                   else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
                       Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
                         if (!PL_thiswhite)
                             PL_thiswhite = newSVpvs("");
@@ -5527,16 +5647,9 @@ Perl_yylex(pTHX)
                             sv_setpvs(PL_thiswhite, "");
                             PL_faketokens = 0;
                         }
-                       sv_catpvn(PL_thiswhite, s, d - s);
+                       sv_catpvn(PL_thiswhite, d, s - d);
                     }
-                   s = d;
-/*             }
-               *s = '\0';
-               PL_bufend = s; */
             }
-#else
-           *s = '\0';
-           PL_bufend = s;
  #endif
         }
         goto retry;
@@ -5697,26 +5810,41 @@ Perl_yylex(pTHX)
             s--;
             TOKEN(0);
         }
+       PL_parser->saw_infix_sigil = 1;
         Mop(OP_MULTIPLY);
  
      case '%':
+    {
         if (PL_expect == XOPERATOR) {
             if (s[1] == '=' && !PL_lex_allbrackets &&
                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
                 TOKEN(0);
             ++s;
+           PL_parser->saw_infix_sigil = 1;
             Mop(OP_MODULO);
         }
         PL_tokenbuf[0] = '%';
         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
                 sizeof PL_tokenbuf - 1, FALSE);
+       pl_yylval.ival = 0;
         if (!PL_tokenbuf[1]) {
             PREREF('%');
         }
+       if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
+           if (*s == '[')
+               PL_tokenbuf[0] = '@';
+
+           /* Warn about % where they meant $. */
+           if (*s == '[' || *s == '{') {
+               if (ckWARN(WARN_SYNTAX)) {
+                   S_check_scalar_slice(aTHX_ s);
+               }
+           }
+       }
         PL_expect = XOPERATOR;
         force_ident_maybe_lex('%');
         TERM('%');
-
+    }
      case '^':
         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
@@ -5805,6 +5933,7 @@ Perl_yylex(pTHX)
                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
                 if (*d == '(') {
                     d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
+                   COPLINE_SET_FROM_MULTI_END;
                     if (!d) {
                         /* MUST advance bufptr here to avoid bogus
                            "at end of line" context messages from yyerror().
@@ -6198,6 +6327,7 @@ Perl_yylex(pTHX)
                 s--;
                 TOKEN(0);
             }
+           PL_parser->saw_infix_sigil = 1;
             BAop(OP_BIT_AND);
         }
  
@@ -6501,9 +6631,8 @@ Perl_yylex(pTHX)
                                 if (*t == ';'
                                         && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                               "You need to quote \"%"SVf"\"",
-                                                 SVfARG(newSVpvn_flags(tmpbuf, len, 
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                       "You need to quote \"%"UTF8f"\"",
+                                        UTF8fARG(UTF, len, tmpbuf));
                             }
                         }
                 }
@@ -6568,6 +6697,7 @@ Perl_yylex(pTHX)
             no_op("Array", s);
         PL_tokenbuf[0] = '@';
         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+       pl_yylval.ival = 0;
         if (!PL_tokenbuf[1]) {
             PREREF('@');
         }
@@ -6580,20 +6710,7 @@ Perl_yylex(pTHX)
             /* Warn about @ where they meant $. */
             if (*s == '[' || *s == '{') {
                 if (ckWARN(WARN_SYNTAX)) {
-                   const char *t = s + 1;
-                   while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
-                       t += UTF ? UTF8SKIP(t) : 1;
-                   if (*t == '}' || *t == ']') {
-                       t++;
-                       PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
-       /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
-                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                           "Scalar value %"SVf" better written as $%"SVf,
-                           SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
-                            SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
-                   }
+                   S_check_scalar_slice(aTHX_ s);
                 }
             }
         }
@@ -6710,6 +6827,7 @@ Perl_yylex(pTHX)
  
      case '\'':
         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
+       COPLINE_SET_FROM_MULTI_END;
         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
         if (PL_expect == XOPERATOR) {
             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6725,7 +6843,13 @@ Perl_yylex(pTHX)
  
      case '"':
         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
-       DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
+       DEBUG_T( {
+           if (s)
+               printbuf("### Saw string before %s\n", s);
+           else
+               PerlIO_printf(Perl_debug_log,
+                            "### Saw unterminated string\n");
+       } );
         if (PL_expect == XOPERATOR) {
             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                 return deprecate_commaless_var_list();
@@ -6744,6 +6868,8 @@ Perl_yylex(pTHX)
                 break;
             }
         }
+       if (pl_yylval.ival == OP_CONST)
+           COPLINE_SET_FROM_MULTI_END;
         TERM(sublex_start());
  
      case '`':
@@ -6864,6 +6990,7 @@ Perl_yylex(pTHX)
  
         /* Is this a word before a => operator? */
         if (*d == '=' && d[1] == '>') {
+         fat_arrow:
             CLINE;
             pl_yylval.opval
                 = (OP*)newSVOP(OP_CONST, 0,
@@ -6997,6 +7124,21 @@ Perl_yylex(pTHX)
             }
         }
  
+       if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
+        && (!anydelim || *s != '#')) {
+           /* no override, and not s### either; skipspace is safe here
+            * check for => on following line */
+           bool arrow;
+           STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
+           STRLEN   soff = s         - SvPVX(PL_linestr);
+           s = skipspace_flags(s, LEX_NO_INCLINE);
+           arrow = *s == '=' && s[1] == '>';
+           PL_bufptr = SvPVX(PL_linestr) + bufoff;
+           s         = SvPVX(PL_linestr) +   soff;
+           if (arrow)
+               goto fat_arrow;
+       }
+
        reserved_word:
         switch (tmp) {
  
@@ -7035,9 +7177,8 @@ Perl_yylex(pTHX)
                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                   TRUE, &morelen);
                     if (!morelen)
-                       Perl_croak(aTHX_ "Bad name after %"SVf"%s",
-                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+                       Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+                               UTF8fARG(UTF, len, PL_tokenbuf),
                                 *s == '\'' ? "'" : "::");
                     len += morelen;
                     pkgname = 1;
@@ -7064,9 +7205,8 @@ Perl_yylex(pTHX)
                     if (ckWARN(WARN_BAREWORD)
                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-                           "Bareword \"%"SVf"\" refers to nonexistent package",
-                            SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                         "Bareword \"%"UTF8f"\" refers to nonexistent package",
+                          UTF8fARG(UTF, len, PL_tokenbuf));
                     len -= 2;
                     PL_tokenbuf[len] = '\0';
                     gv = NULL;
@@ -7199,7 +7339,7 @@ Perl_yylex(pTHX)
                         d = s + 1;
                         while (SPACE_OR_TAB(*d))
                             d++;
-                       if (*d == ')' && (sv = cv_const_sv(cv))) {
+                       if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
                             s = d + 1;
                             goto its_constant;
                         }
@@ -7256,19 +7396,26 @@ Perl_yylex(pTHX)
  
                 if (cv) {
                     if (lastchar == '-' && penultchar != '-') {
-                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+                       const STRLEN l = len ? len : strlen(PL_tokenbuf);
                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                               "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
-                               SVfARG(tmpsv), SVfARG(tmpsv));
+                           "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+                            UTF8fARG(UTF, l, PL_tokenbuf),
+                            UTF8fARG(UTF, l, PL_tokenbuf));
                      }
                     /* Check for a constant sub */
-                   if ((sv = cv_const_sv(cv))) {
+                   if ((sv = cv_const_sv_or_av(cv))) {
                   its_constant:
                         op_free(rv2cv_op);
                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-                       pl_yylval.opval->op_private = OPpCONST_FOLDED;
-                       pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       if (SvTYPE(sv) == SVt_PVAV)
+                           pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+                                                     pl_yylval.opval);
+                       else {
+                           pl_yylval.opval->op_private = 0;
+                           pl_yylval.opval->op_folded = 1;
+                           pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       }
                         TOKEN(WORD);
                     }
  
@@ -7433,12 +7580,13 @@ Perl_yylex(pTHX)
                 op_free(rv2cv_op);
  
             safe_bareword:
-               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+                && saw_infix_sigil) {
                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                    "Operator or semicolon missing before %c%"SVf,
-                                    lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
-                                                    strlen(PL_tokenbuf),
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                    "Operator or semicolon missing before %c%"UTF8f,
+                                    lastchar,
+                                    UTF8fARG(UTF, strlen(PL_tokenbuf),
+                                             PL_tokenbuf));
                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
                                      "Ambiguous use of %c resolved as operator %c",
                                      lastchar, lastchar);
@@ -7469,21 +7617,12 @@ Perl_yylex(pTHX)
         case KEY___END__: {
             GV *gv;
             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
-               const char *pname = "main";
-               STRLEN plen = 4;
-               U32 putf8 = 0;
-               if (PL_tokenbuf[2] == 'D')
-               {
-                   HV * const stash =
-                       PL_curstash ? PL_curstash : PL_defstash;
-                   pname = HvNAME_get(stash);
-                   plen  = HvNAMELEN (stash);
-                   if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
-               }
-               gv = gv_fetchpvn_flags(
-                       Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
-                       plen+6, GV_ADD|putf8, SVt_PVIO
-               );
+               HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+                                       ? PL_curstash
+                                       : PL_defstash;
+               gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+               if (!isGV(gv))
+                   gv_init(gv,stash,"DATA",4,0);
                 GvMULTI_on(gv);
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
@@ -7598,9 +7737,8 @@ Perl_yylex(pTHX)
                     goto just_a_word;
                 }
                 if (!tmp)
-                   Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
-                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                   Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+                                     UTF8fARG(UTF, len, PL_tokenbuf));
                 if (tmp < 0)
                     tmp = -tmp;
                 else if (tmp == KEY_require || tmp == KEY_do
@@ -8156,11 +8294,9 @@ Perl_yylex(pTHX)
                     && !(t[0] == ':' && t[1] == ':')
                     && !keyword(s, d-s, 0)
                 ) {
-                   SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-                          "Precedence problem: open %"SVf" should be open(%"SVf")",
-                           SVfARG(tmpsv), SVfARG(tmpsv));
+                      "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+                       UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
                 }
             }
             LOP(OP_OPEN,XTERM);
@@ -8215,6 +8351,7 @@ Perl_yylex(pTHX)
  
         case KEY_q:
             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
+           COPLINE_SET_FROM_MULTI_END;
             if (!s)
                 missingterm(NULL);
             pl_yylval.ival = OP_CONST;
@@ -8226,6 +8363,7 @@ Perl_yylex(pTHX)
         case KEY_qw: {
             OP *words = NULL;
             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
+           COPLINE_SET_FROM_MULTI_END;
             if (!s)
                 missingterm(NULL);
             PL_expect = XOPERATOR;
@@ -8508,7 +8646,9 @@ Perl_yylex(pTHX)
                 expectation attrful;
                 bool have_name, have_proto;
                 const int key = tmp;
+#ifndef PERL_MAD
                  SV *format_name = NULL;
+#endif
  
  #ifdef PERL_MAD
                 SV *tmpwhite = 0;
@@ -8540,9 +8680,10 @@ Perl_yylex(pTHX)
  #ifdef PERL_MAD
                     if (PL_madskills)
                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
-#endif
+#else
                      if (key == KEY_format)
                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
+#endif
                     *PL_tokenbuf = '&';
                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
                      || pad_findmy_pvn(
@@ -8591,8 +8732,6 @@ Perl_yylex(pTHX)
  #else
                     if (format_name) {
                          start_force(PL_curforce);
-                        if (PL_madskills)
-                            curmad('X', newSVpvn(start,s-start));
                          NEXTVAL_NEXTTOKE.opval
                              = (OP*)newSVOP(OP_CONST,0, format_name);
                          NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
@@ -8604,78 +8743,11 @@ Perl_yylex(pTHX)
  
                 /* Look for a prototype */
                 if (*s == '(') {
-                   char *p;
-                   bool bad_proto = FALSE;
-                   bool in_brackets = FALSE;
-                   char greedy_proto = ' ';
-                   bool proto_after_greedy_proto = FALSE;
-                   bool must_be_last = FALSE;
-                   bool underscore = FALSE;
-                   bool seen_underscore = FALSE;
-                   const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
-                    STRLEN tmplen;
-
                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
+                   COPLINE_SET_FROM_MULTI_END;
                     if (!s)
                         Perl_croak(aTHX_ "Prototype not terminated");
-                   /* strip spaces and check for bad characters */
-                   d = SvPV(PL_lex_stuff, tmplen);
-                   tmp = 0;
-                   for (p = d; tmplen; tmplen--, ++p) {
-                       if (!isSPACE(*p)) {
-                            d[tmp++] = *p;
-
-                           if (warnillegalproto) {
-                               if (must_be_last)
-                                   proto_after_greedy_proto = TRUE;
-                               if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
-                                   bad_proto = TRUE;
-                               }
-                               else {
-                                   if ( underscore ) {
-                                       if ( !strchr(";@%", *p) )
-                                           bad_proto = TRUE;
-                                       underscore = FALSE;
-                                   }
-                                   if ( *p == '[' ) {
-                                       in_brackets = TRUE;
-                                   }
-                                   else if ( *p == ']' ) {
-                                       in_brackets = FALSE;
-                                   }
-                                   else if ( (*p == '@' || *p == '%') &&
-                                        ( tmp < 2 || d[tmp-2] != '\\' ) &&
-                                        !in_brackets ) {
-                                       must_be_last = TRUE;
-                                       greedy_proto = *p;
-                                   }
-                                   else if ( *p == '_' ) {
-                                       underscore = seen_underscore = TRUE;
-                                   }
-                               }
-                           }
-                       }
-                   }
-                    d[tmp] = '\0';
-                   if (proto_after_greedy_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Prototype after '%c' for %"SVf" : %s",
-                                   greedy_proto, SVfARG(PL_subname), d);
-                   if (bad_proto) {
-                        SV *dsv = newSVpvs_flags("", SVs_TEMP);
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Illegal character %sin prototype for %"SVf" : %s",
-                                   seen_underscore ? "after '_' " : "",
-                                   SVfARG(PL_subname),
-                                    SvUTF8(PL_lex_stuff)
-                                        ? sv_uni_display(dsv,
-                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
-                                            tmp,
-                                            UNI_DISPLAY_ISPRINT)
-                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
-                                            PERL_PV_ESCAPE_NONASCII));
-                    }
-                    SvCUR_set(PL_lex_stuff, tmp);
+                   (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
                     have_proto = TRUE;
  
  #ifdef PERL_MAD
@@ -8863,17 +8935,9 @@ Perl_yylex(pTHX)
             FUN0(OP_WANTARRAY);
  
         case KEY_write:
-#ifdef EBCDIC
-       {
-           char ctl_l[2];
-           ctl_l[0] = toCTRL('L');
-           ctl_l[1] = '\0';
-           gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
-       }
-#else
-           /* Make sure $^L is defined */
-           gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
-#endif
+            /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
+             * we use the same number on EBCDIC */
+           gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
             UNI(OP_ENTERWRITE);
  
         case KEY_x:
@@ -9012,9 +9076,9 @@ S_pending_ident(pTHX)
          {
              /* Downgraded from fatal to warning 20000522 mjd */
              Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Possible unintended interpolation of %"SVf" in string",
-                       SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
-                                        SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+                       "Possible unintended interpolation of %"UTF8f
+                       " in string",
+                       UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
          }
      }
  
@@ -9260,7 +9324,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool
          else if ( isWORDCHAR_A(**s) ) {
              do {
                  *(*d)++ = *(*s)++;
-            } while isWORDCHAR_A(**s);
+            } while (isWORDCHAR_A(**s) && *d < e);
          }
          else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
              *(*d)++ = ':';
@@ -9354,10 +9418,22 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
            s++;
      }
  
-#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
-                                        || isCNTRL_A((U8)(d)) \
-                                        || isDIGIT_A((U8)(d)) \
-                                        || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+/* Is the byte 'd' a legal single character identifier name?  'u' is true
+ * iff Unicode semantics are to be used.  The legal ones are any of:
+ *  a) ASCII digits
+ *  b) ASCII punctuation
+ *  c) When not under Unicode rules, any upper Latin1 character
+ *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
+ *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
+ *     the \s ones. */
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
+                                   || isDIGIT_A((U8)(d))                    \
+                                   || (!(u) && !isASCII((U8)(d)))           \
+                                   || ((((U8)(d)) < 32)                     \
+                                       && (((((U8)(d)) >= 14)               \
+                                           || (((U8)(d)) <= 8 && (d) != 0) \
+                                           || (((U8)(d)) == 13))))          \
+                                   || (((U8)(d)) == toCTRL('?')))
      if (s < send
          && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
      {
@@ -9657,6 +9733,7 @@ S_scan_subst(pTHX_ char *start)
      char *s;
      PMOP *pm;
      I32 first_start;
+    line_t first_line;
      I32 es = 0;
      char charset = '\0';    /* character set modifier */
  #ifdef PERL_MAD
@@ -9686,6 +9763,7 @@ S_scan_subst(pTHX_ char *start)
  #endif
  
      first_start = PL_multi_start;
+    first_line = CopLINE(PL_curcop);
      s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
      if (!s) {
         if (PL_lex_stuff) {
@@ -9748,6 +9826,12 @@ S_scan_subst(pTHX_ char *start)
         SvREFCNT_dec(PL_sublex_info.repl);
         PL_sublex_info.repl = repl;
      }
+    if (CopLINE(PL_curcop) != first_line) {
+       sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
+       ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
+           CopLINE(PL_curcop) - first_line;
+       CopLINE_set(PL_curcop, first_line);
+    }
  
      PL_lex_op = (OP*)pm;
      pl_yylval.ival = OP_SUBST;
@@ -9884,6 +9968,7 @@ S_scan_heredoc(pTHX_ char *s)
      char *e;
      char *peek;
      const bool infile = PL_rsfp || PL_parser->filtered;
+    const line_t origline = CopLINE(PL_curcop);
      LEXSHARED *shared = PL_parser->lex_shared;
  #ifdef PERL_MAD
      I32 stuffstart = s - SvPVX(PL_linestr);
@@ -9985,7 +10070,7 @@ S_scan_heredoc(pTHX_ char *s)
         SvIV_set(tmpstr, '\\');
      }
  
-    PL_multi_start = CopLINE(PL_curcop) + 1;
+    PL_multi_start = origline + 1 + PL_parser->herelines;
      PL_multi_open = PL_multi_close = '<';
      /* inside a string eval or quote-like operator */
      if (!infile || PL_lex_inwhat) {
@@ -10011,7 +10096,7 @@ S_scan_heredoc(pTHX_ char *s)
             /* shared is only null if we have gone beyond the outermost
                lexing scope.  In a file, we will have broken out of the
                loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition below will have
+              fer ends with "\n;", so the while condition above will have
                evaluated to false.  So shared can never be null. */
             assert(shared);
             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
@@ -10032,7 +10117,7 @@ S_scan_heredoc(pTHX_ char *s)
         while (s < bufend - len + 1 &&
            memNE(s,PL_tokenbuf,len) ) {
             if (*s++ == '\n')
-               ++shared->herelines;
+               ++PL_parser->herelines;
         }
         if (s >= bufend - len + 1) {
             goto interminable;
@@ -10049,7 +10134,7 @@ S_scan_heredoc(pTHX_ char *s)
  #endif
         s += len - 1;
         /* the preceding stmt passes a newline */
-       shared->herelines++;
+       PL_parser->herelines++;
  
         /* s now points to the newline after the heredoc terminator.
            d points to the newline before the body of the heredoc.
@@ -10107,22 +10192,25 @@ S_scan_heredoc(pTHX_ char *s)
  #endif
         PL_bufptr = PL_bufend;
         CopLINE_set(PL_curcop,
-                   PL_multi_start + shared->herelines);
+                   origline + 1 + PL_parser->herelines);
         if (!lex_next_chunk(LEX_NO_TERM)
          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
             SvREFCNT_dec(linestr_save);
             goto interminable;
         }
-       CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+       CopLINE_set(PL_curcop, origline);
         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-           lex_grow_linestr(SvCUR(PL_linestr) + 2);
+            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+            /* ^That should be enough to avoid this needing to grow:  */
             sv_catpvs(PL_linestr, "\n\0");
+            assert(s == SvPVX(PL_linestr));
+            PL_bufend = SvEND(PL_linestr);
         }
         s = PL_bufptr;
  #ifdef PERL_MAD
         stuffstart = s - SvPVX(PL_linestr);
  #endif
-       shared->herelines++;
+       PL_parser->herelines++;
         PL_last_lop = PL_last_uni = NULL;
  #ifndef PERL_STRICT_CR
         if (PL_bufend - PL_linestart >= 2) {
@@ -10152,7 +10240,7 @@ S_scan_heredoc(pTHX_ char *s)
         }
        }
      }
-    PL_multi_end = CopLINE(PL_curcop);
+    PL_multi_end = origline + PL_parser->herelines;
      if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
         SvPV_shrink_to_cur(tmpstr);
      }
@@ -10168,7 +10256,7 @@ S_scan_heredoc(pTHX_ char *s)
  
    interminable:
      SvREFCNT_dec(tmpstr);
-    CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+    CopLINE_set(PL_curcop, origline);
      missingterm(PL_tokenbuf + 1);
  }
  
@@ -10334,11 +10422,15 @@ intro_sym:
  
  
  /* scan_str
-   takes: start position in buffer
-         keep_quoted preserve \ on the embedded delimiter(s)
-         keep_delims preserve the delimiters around the string
-         re_reparse  compiling a run-time /(?{})/:
-                       collapse // to /,  and skip encoding src
+   takes:
+       start                   position in buffer
+       keep_quoted             preserve \ on the embedded delimiter(s)
+       keep_delims             preserve the delimiters around the string
+       re_reparse              compiling a run-time /(?{})/:
+                                  collapse // to /,  and skip encoding src
+       deprecate_escaped_meta  issue a deprecation warning for cer-
+                               tain paired metacharacters that appear
+                               escaped within it
     returns: position to continue reading from buffer
     side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
         updates the read buffer.
@@ -10380,9 +10472,7 @@ intro_sym:
  
  STATIC char *
  S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
-        bool deprecate_escaped_meta /* Should we issue a deprecation warning
-                                       for certain paired metacharacters that
-                                       appear escaped within it */
+                bool deprecate_escaped_meta
      )
  {
      dVAR;
@@ -10398,6 +10488,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
      STRLEN termlen;            /* length of terminating string */
      int last_off = 0;          /* last position for nesting bracket */
      char *escaped_open = NULL;
+    line_t herelines;
  #ifdef PERL_MAD
      int stuffstart;
      char *tstart;
@@ -10437,6 +10528,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
      /* mark where we are */
      PL_multi_start = CopLINE(PL_curcop);
      PL_multi_open = term;
+    herelines = PL_parser->herelines;
  
      /* find corresponding closing delimiter */
      if (term && (tmps = strchr("([{< )]}> )]}>",term)))
@@ -10450,8 +10542,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
       * happen for <>, as they aren't metas. */
      if (deprecate_escaped_meta
          && (PL_multi_open == PL_multi_close
-            || ! ckWARN_d(WARN_DEPRECATED)
-            || PL_multi_open == '<'))
+            || PL_multi_open == '<'
+            || ! ckWARN_d(WARN_DEPRECATED)))
      {
          deprecate_escaped_meta = FALSE;
      }
@@ -10482,8 +10574,49 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                 int offset = s - SvPVX_const(PL_linestr);
                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
                                            &offset, (char*)termstr, termlen);
-               const char * const ns = SvPVX_const(PL_linestr) + offset;
-               char * const svlast = SvEND(sv) - 1;
+               const char *ns;
+               char *svlast;
+
+               if (SvIsCOW(PL_linestr)) {
+                   STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
+                   STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
+                   STRLEN last_lop_pos, re_eval_start_pos, s_pos;
+                   char *buf = SvPVX(PL_linestr);
+                   bufend_pos = PL_parser->bufend - buf;
+                   bufptr_pos = PL_parser->bufptr - buf;
+                   oldbufptr_pos = PL_parser->oldbufptr - buf;
+                   oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+                   linestart_pos = PL_parser->linestart - buf;
+                   last_uni_pos = PL_parser->last_uni
+                       ? PL_parser->last_uni - buf
+                       : 0;
+                   last_lop_pos = PL_parser->last_lop
+                       ? PL_parser->last_lop - buf
+                       : 0;
+                   re_eval_start_pos =
+                       PL_parser->lex_shared->re_eval_start ?
+                            PL_parser->lex_shared->re_eval_start - buf : 0;
+                   s_pos = s - buf;
+
+                   sv_force_normal(PL_linestr);
+
+                   buf = SvPVX(PL_linestr);
+                   PL_parser->bufend = buf + bufend_pos;
+                   PL_parser->bufptr = buf + bufptr_pos;
+                   PL_parser->oldbufptr = buf + oldbufptr_pos;
+                   PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+                   PL_parser->linestart = buf + linestart_pos;
+                   if (PL_parser->last_uni)
+                       PL_parser->last_uni = buf + last_uni_pos;
+                   if (PL_parser->last_lop)
+                       PL_parser->last_lop = buf + last_lop_pos;
+                   if (PL_parser->lex_shared->re_eval_start)
+                       PL_parser->lex_shared->re_eval_start  =
+                           buf + re_eval_start_pos;
+                   s = buf + s_pos;
+               }
+               ns = SvPVX_const(PL_linestr) + offset;
+               svlast = SvEND(sv) - 1;
  
                 for (; s < ns; s++) {
                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
@@ -10604,26 +10737,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                           * context where the delimiter is also a metacharacter,
                           * the backslash is useless, and deprecated.  () and []
                           * are meta in any context. {} are meta only when
-                         * appearing in a quantifier or in things like '\p{'.
-                         * They also aren't meta unless there is a matching
-                         * closed, escaped char later on within the string.
-                         * If 's' points to an open, set a flag; if to a close,
-                         * test that flag, and raise a warning if it was set */
+                         * appearing in a quantifier or in things like '\p{'
+                         * (but '\\p{' isn't meta).  They also aren't meta
+                         * unless there is a matching closed, escaped char
+                         * later on within the string.  If 's' points to an
+                         * open, set a flag; if to a close, test that flag, and
+                         * raise a warning if it was set */
  
                         if (deprecate_escaped_meta) {
                              if (*s == PL_multi_open) {
                                  if (*s != '{') {
                                      escaped_open = s;
                                  }
-                                else if (regcurly(s,
-                                                  TRUE /* Look for a closing
-                                                          '\}' */)
-                                         || (s - start > 2  /* Look for e.g.
-                                                               '\x{' */
-                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
-                                {
+                                     /* Look for a closing '\}' */
+                                else if (regcurly(s, TRUE)) {
                                      escaped_open = s;
                                  }
+                                     /* Look for e.g.  '\x{' */
+                                else if (s - start > 2
+                                         && _generic_isCC(*(s-2),
+                                             _CC_BACKSLASH_FOO_LBRACE_IS_META))
+                                { /* Exclude '\\x', '\\\\x', etc. */
+                                    char *lookbehind = s - 4;
+                                    bool is_meta = TRUE;
+                                    while (lookbehind >= start
+                                           && *lookbehind == '\\')
+                                    {
+                                        is_meta = ! is_meta;
+                                        lookbehind--;
+                                    }
+                                    if (is_meta) {
+                                        escaped_open = s;
+                                    }
+                                }
                              }
                              else if (escaped_open) {
                                  Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
@@ -10736,6 +10882,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
         SvUTF8_on(sv);
  
      PL_multi_end = CopLINE(PL_curcop);
+    CopLINE_set(PL_curcop, PL_multi_start);
+    PL_parser->herelines = herelines;
  
      /* if we allocated too much space, give some back */
      if (SvCUR(sv) + 5 < SvLEN(sv)) {
@@ -11403,11 +11551,13 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      }
      msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
      Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
-        OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
+        OutCopFILE(PL_curcop),
+        (IV)(PL_parser->preambling == NOLINE
+               ? CopLINE(PL_curcop)
+               : PL_parser->preambling));
      if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
-                            SVfARG(newSVpvn_flags(context, contlen,
-                                        SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+       Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+                            UTF8fARG(UTF, contlen, context));
      else
         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
      if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
@@ -11479,12 +11629,14 @@ S_swallow_bom(pTHX_ U8 *s)
  #endif
         }
         break;
-    case 0xEF:
-       if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-           s += 3;                      /* UTF-8 */
-       }
-       break;
+    case BOM_UTF8_FIRST_BYTE: {
+        const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
+        if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += len + 1;                      /* UTF-8 */
+        }
+        break;
+    }
      case 0:
         if (slen > 3) {
              if (s[1] == 0) {
@@ -11507,14 +11659,6 @@ S_swallow_bom(pTHX_ U8 *s)
  #endif
              }
         }
-#ifdef EBCDIC
-    case 0xDD:
-        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
-            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-            s += 4;                      /* UTF-8 */
-        }
-        break;
-#endif
  
      default:
          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
@@ -11759,7 +11903,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
             /* Append native character for the rev point */
             tmpend = uvchr_to_utf8(tmpbuf, rev);
             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-           if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+           if (!UVCHR_IS_INVARIANT(rev))
                  SvUTF8_on(sv);
             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
                  s = ++pos;