toke.c: Reorder checks around deprecate_escaped_meta

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index a8ce485..bf9d160 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -586,7 +586,7 @@ S_missingterm(pTHX_ char *s)
         if (nl)
             *nl = '\0';
      }
-    else if (isCNTRL(PL_multi_close)) {
+    else if ((U8) PL_multi_close < 32) {
         *tmpbuf = '^';
         tmpbuf[1] = (char)toCTRL(PL_multi_close);
         tmpbuf[2] = '\0';
@@ -753,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
         parser->linestr = flags & LEX_START_COPIED
                             ? SvREFCNT_inc_simple_NN(line)
                             : newSVpvn_flags(s, len, SvUTF8(line));
-       sv_catpvs(parser->linestr, "\n;");
+       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
      } else {
-       parser->linestr = newSVpvs("\n;");
+       parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
      }
      parser->oldoldbufptr =
         parser->oldbufptr =
@@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                     ENTER;
                     SAVESPTR(PL_warnhook);
                     PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
                     LEAVE;
                 }
             }
@@ -1073,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 }
                 else {
                      assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
                     p += 2;
                  }
             }
@@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
                 bufend = PL_parser->bufend;
             }
         }
-       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
         if (retlen == (STRLEN)-1) {
             /* malformed UTF-8 */
             ENTER;
             SAVESPTR(PL_warnhook);
             PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
             LEAVE;
         }
         return unichar;
@@ -1753,37 +1753,16 @@ S_incline(pTHX_ const char *s)
  
      if (t - s > 0) {
         const STRLEN len = t - s;
-       SV *const temp_sv = CopFILESV(PL_curcop);
-       const char *cf;
-       STRLEN tmplen;
-
-       if (temp_sv) {
-           cf = SvPVX(temp_sv);
-           tmplen = SvCUR(temp_sv);
-       } else {
-           cf = NULL;
-           tmplen = 0;
-       }
  
         if (!PL_rsfp && !PL_parser->filtered) {
             /* must copy *{"::_<(eval N)[oldfilename:L]"}
              * to *{"::_<newfilename"} */
             /* However, the long form of evals is only turned on by the
                debugger - usually they're "(eval %lu)" */
-           char smallbuf[128];
-           char *tmpbuf;
-           GV **gvp;
-           STRLEN tmplen2 = len;
-           if (tmplen + 2 <= sizeof smallbuf)
-               tmpbuf = smallbuf;
-           else
-               Newx(tmpbuf, tmplen + 2, char);
-           tmpbuf[0] = '_';
-           tmpbuf[1] = '<';
-           memcpy(tmpbuf + 2, cf, tmplen);
-           tmplen += 2;
-           gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
-           if (gvp) {
+           GV * const cfgv = CopFILEGV(PL_curcop);
+           if (cfgv) {
+               char smallbuf[128];
+               STRLEN tmplen2 = len;
                 char *tmpbuf2;
                 GV *gv2;
  
@@ -1792,12 +1771,8 @@ S_incline(pTHX_ const char *s)
                 else
                     Newx(tmpbuf2, tmplen2 + 2, char);
  
-               if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
-                   /* Either they malloc'd it, or we malloc'd it,
-                      so no prefix is present in ours.  */
-                   tmpbuf2[0] = '_';
-                   tmpbuf2[1] = '<';
-               }
+               tmpbuf2[0] = '_';
+               tmpbuf2[1] = '<';
  
                 memcpy(tmpbuf2 + 2, s, tmplen2);
                 tmplen2 += 2;
@@ -1811,11 +1786,11 @@ S_incline(pTHX_ const char *s)
                        alias the saved lines that are in the array.
                        Otherwise alias the whole array. */
                     if (CopLINE(PL_curcop) == line_num) {
-                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
-                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
                     }
-                   else if (GvAV(*gvp)) {
-                       AV * const av = GvAV(*gvp);
+                   else if (GvAV(cfgv)) {
+                       AV * const av = GvAV(cfgv);
                         const I32 start = CopLINE(PL_curcop)+1;
                         I32 items = AvFILLp(av) - start;
                         if (items > 0) {
@@ -1830,7 +1805,6 @@ S_incline(pTHX_ const char *s)
  
                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
             }
-           if (tmpbuf != smallbuf) Safefree(tmpbuf);
         }
         CopFILE_free(PL_curcop);
         CopFILE_setn(PL_curcop, s, len);
@@ -1895,13 +1869,11 @@ STATIC char *
  S_skipspace2(pTHX_ char *s, SV **svp)
  {
      char *start;
-    const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
      const I32 startoff = s - SvPVX(PL_linestr);
  
      PERL_ARGS_ASSERT_SKIPSPACE2;
  
      s = skipspace(s);
-    PL_bufptr = SvPVX(PL_linestr) + bufptroff;
      if (!PL_madskills || !svp)
         return s;
      start = SvPVX(PL_linestr) + startoff;
@@ -1934,7 +1906,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
             sv_setpvn(sv, buf, len);
         (void)SvIOK_on(sv);
         SvIV_set(sv, 0);
-       av_store(av, (I32)CopLINE(PL_curcop), sv);
+       av_store(av, CopLINE(PL_curcop), sv);
      }
  }
  
@@ -2576,7 +2548,7 @@ S_sublex_start(pTHX)
         return THING;
      }
      else if (op_type == OP_BACKTICK && PL_lex_op) {
-       /* readpipe() vas overriden */
+       /* readpipe() was overridden */
         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
         pl_yylval.opval = PL_lex_op;
         PL_lex_op = NULL;
@@ -2789,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      {
          /* If warnings are on, this will print a more detailed analysis of what
           * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                         e - ((char *) first_bad_char_loc),
                         NULL, 0);
  
@@ -2864,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              }
              s++;
          } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                  goto bad_charname;
              }
              s += 2;
@@ -2897,8 +2869,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s++;
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                  {
                      goto bad_charname;
                  }
@@ -2932,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
              /* If warnings are on, this will print a more detailed analysis of
               * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                             (char *) first_bad_char_loc - str,
                             NULL, 0);
  
@@ -3133,7 +3104,7 @@ S_scan_const(pTHX_ char *start)
                     char *e = d++;
                     while (e-- > c)
                         *(e + 1) = *e;
-                   *c = (char)UTF_TO_NATIVE(0xff);
+                   *c = (char) ILLEGAL_UTF8_BYTE;
                     /* mark the range as done, and continue */
                     dorange = FALSE;
                     didrange = TRUE;
@@ -3187,16 +3158,12 @@ S_scan_const(pTHX_ char *start)
  
  #ifdef EBCDIC
                 if (literal_endpoint == 2 &&
-                   ((isLOWER(min) && isLOWER(max)) ||
-                    (isUPPER(min) && isUPPER(max)))) {
-                   if (isLOWER(min)) {
-                       for (i = min; i <= max; i++)
-                           if (isLOWER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
-                   } else {
-                       for (i = min; i <= max; i++)
-                           if (isUPPER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
+                   ((isLOWER_A(min) && isLOWER_A(max)) ||
+                    (isUPPER_A(min) && isUPPER_A(max))))
+                {
+                    for (i = min; i <= max; i++) {
+                        if (isALPHA_A(i))
+                            *d++ = i;
                     }
                 }
                 else
@@ -3204,13 +3171,7 @@ S_scan_const(pTHX_ char *start)
                     for (i = min; i <= max; i++)
  #ifdef EBCDIC
                          if (has_utf8) {
-                            const U8 ch = (U8)NATIVE_TO_UTF(i);
-                            if (UNI_IS_INVARIANT(ch))
-                                *d++ = (U8)i;
-                            else {
-                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                            }
+                            append_utf8_from_native_byte(i, &d);
                          }
                          else
  #endif
@@ -3220,7 +3181,7 @@ S_scan_const(pTHX_ char *start)
                  if (uvmax) {
                      d = (char*)uvchr_to_utf8((U8*)d, 0x100);
                      if (uvmax > 0x101)
-                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
                      if (uvmax > 0x100)
                          d = (char*)uvchr_to_utf8((U8*)d, uvmax);
                  }
@@ -3245,7 +3206,7 @@ S_scan_const(pTHX_ char *start)
                     && !native_range
  #endif
                     ) {
-                   *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
+                   *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
                     s++;
                     continue;
                 }
@@ -3285,12 +3246,12 @@ S_scan_const(pTHX_ char *start)
          * char, which will be done separately.
          * Stop on (?{..}) and friends */
  
-       else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+       else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
                 while (s+1 < send && *s != ')')
-                   *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+                   *d++ = *s++;
             }
-           else if (!PL_lex_casemods && !in_charclass &&
+           else if (!PL_lex_casemods &&
                      (    s[2] == '{' /* This should match regcomp.c */
                       || (s[2] == '?' && s[3] == '{')))
             {
@@ -3299,10 +3260,10 @@ S_scan_const(pTHX_ char *start)
         }
  
         /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat &&
+       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
             while (s+1 < send && *s != '\n')
-               *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+               *d++ = *s++;
         }
  
         /* no further processing of single-quoted regex */
@@ -3377,7 +3338,7 @@ S_scan_const(pTHX_ char *start)
                         || s[1] != '{'
                         || regcurly(s + 1, FALSE)))
             {
-               *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+               *d++ = '\\';
                 goto default_action;
             }
  
@@ -3406,7 +3367,7 @@ S_scan_const(pTHX_ char *start)
                 {
                      I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                      STRLEN len = 3;
-                   uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+                   uv = grok_oct(s, &len, &flags, NULL);
                     s += len;
                      if (len < 3 && s < send && isDIGIT(*s)
                          && ckWARN(WARN_MISC))
@@ -3458,9 +3419,8 @@ S_scan_const(pTHX_ char *start)
                  * UTF-8 sequence they can end up as, except if they force us
                  * to recode the rest of the string into utf8 */
                 
-               /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native). */
-               if (!UNI_IS_INVARIANT(uv)) {
+               /* Here uv is the ordinal of the next character being added */
+               if (!NATIVE_IS_INVARIANT(uv)) {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
                          * far if it contains any chars variant in utf8 or
@@ -3478,7 +3438,7 @@ S_scan_const(pTHX_ char *start)
                      }
  
                      if (has_utf8) {
-                       d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                         if (PL_lex_inwhat == OP_TRANS &&
                             PL_sublex_info.sub_op) {
                             PL_sublex_info.sub_op->op_private |=
@@ -3511,16 +3471,6 @@ S_scan_const(pTHX_ char *start)
                  * now, while preserving the fact that it was a named character
                  * so that the regex compiler knows this */
  
-               /* This section of code doesn't generally use the
-                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-                * a close examination of this macro and determined it is a
-                * no-op except on utfebcdic variant characters.  Every
-                * character generated by this that would normally need to be
-                * enclosed by this macro is invariant, so the macro is not
-                * needed, and would complicate use of copy().  XXX There are
-                * other parts of this file where the macro is used
-                * inconsistently, but are saved by it being a no-op */
-
                 /* The structure of this section of code (besides checking for
                  * errors and upgrading to utf8) is:
                  *  Further disambiguate between the two meanings of \N, and if
@@ -3608,11 +3558,13 @@ S_scan_const(pTHX_ char *start)
                             has_utf8 = TRUE;
                         }
  
-                       /* Add the string to the output */
+                        /* Add the (Unicode) code point to the output. */
                         if (UNI_IS_INVARIANT(uv)) {
-                           *d++ = (char) uv;
+                           *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
-                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       else {
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                        }
                     }
                 }
                 else /* Here is \N{NAME} but not \N{U+...}. */
@@ -3672,19 +3624,16 @@ S_scan_const(pTHX_ char *start)
                                  char hex_string[2 * UTF8_MAXBYTES + 5];
  
                                  /* Get the first character of the result. */
-                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                U32 uv = utf8n_to_uvchr((U8 *) str,
                                                          len,
                                                          &char_length,
                                                          UTF8_ALLOW_ANYUV);
                                  /* Convert first code point to hex, including
-                                 * the boiler plate before it.  For all these,
-                                 * we convert to native format so that
-                                 * downstream code can continue to assume the
-                                 * input is native */
+                                 * the boiler plate before it. */
                                  output_length =
                                      my_snprintf(hex_string, sizeof(hex_string),
-                                            "\\N{U+%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                "\\N{U+%X",
+                                                (unsigned int) uv);
  
                                  /* Make sure there is enough space to hold it */
                                  d = off + SvGROW(sv, off
@@ -3699,15 +3648,15 @@ S_scan_const(pTHX_ char *start)
                                  * its ordinal in hex */
                                  while ((str += char_length) < str_end) {
                                      const STRLEN off = d - SvPVX_const(sv);
-                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                    U32 uv = utf8n_to_uvchr((U8 *) str,
                                                              str_end - str,
                                                              &char_length,
                                                              UTF8_ALLOW_ANYUV);
                                      output_length =
                                          my_snprintf(hex_string,
-                                            sizeof(hex_string),
-                                            ".%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                    sizeof(hex_string),
+                                                    ".%X",
+                                                    (unsigned int) uv);
  
                                      d = off + SvGROW(sv, off
                                                          + output_length
@@ -3772,25 +3721,25 @@ S_scan_const(pTHX_ char *start)
  
             /* printf-style backslashes, formfeeds, newlines, etc */
             case 'b':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+               *d++ = '\b';
                 break;
             case 'n':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+               *d++ = '\n';
                 break;
             case 'r':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+               *d++ = '\r';
                 break;
             case 'f':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+               *d++ = '\f';
                 break;
             case 't':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+               *d++ = '\t';
                 break;
             case 'e':
-               *d++ = ASCII_TO_NEED(has_utf8,'\033');
+               *d++ = ASCII_TO_NATIVE('\033');
                 break;
             case 'a':
-               *d++ = ASCII_TO_NEED(has_utf8,'\007');
+               *d++ = '\a';
                 break;
             } /* end switch */
  
@@ -3816,8 +3765,10 @@ S_scan_const(pTHX_ char *start)
              * routine that does the conversion checks for errors like
              * malformed utf8 */
  
-           const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
-           const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
+           const UV nextuv   = (this_utf8)
+                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
+                                : (UV) ((U8) *s);
+           const STRLEN need = UNISKIP(nextuv);
             if (!has_utf8) {
                 SvCUR_set(sv, d - SvPVX_const(sv));
                 SvPOK_on(sv);
@@ -3844,7 +3795,7 @@ S_scan_const(pTHX_ char *start)
  #endif
         }
         else {
-           *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+           *d++ = *s++;
         }
      } /* while loop to process each character */
  
@@ -5309,7 +5260,7 @@ Perl_yylex(pTHX)
              * check if it in fact is. */
             if (bof && PL_rsfp &&
                      (*s == 0 ||
-                     *(U8*)s == 0xEF ||
+                     *(U8*)s == BOM_UTF8_FIRST_BYTE ||
                       *(U8*)s >= 0xFE ||
                       s[1] == 0)) {
                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
@@ -5621,14 +5572,19 @@ Perl_yylex(pTHX)
                 s = SKIPSPACE0(s);
             }
             else {
-/*             if (PL_madskills && PL_lex_formbrack) { */
-                   d = s;
-                   while (d < PL_bufend && *d != '\n')
-                       d++;
-                   if (d < PL_bufend)
-                       d++;
-                   else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+                   if (PL_madskills) d = s;
+                   while (s < PL_bufend && *s != '\n')
+                       s++;
+                   if (s < PL_bufend)
+                   {
+                       s++;
+                       if (s < PL_bufend)
+                           incline(s);
+                   }
+                   else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
                       Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
                         if (!PL_thiswhite)
                             PL_thiswhite = newSVpvs("");
@@ -5636,20 +5592,9 @@ Perl_yylex(pTHX)
                             sv_setpvs(PL_thiswhite, "");
                             PL_faketokens = 0;
                         }
-                       sv_catpvn(PL_thiswhite, s, d - s);
+                       sv_catpvn(PL_thiswhite, d, s - d);
                     }
-                   s = d;
-/*             }
-               *s = '\0';
-               PL_bufend = s; */
             }
-#else
-           while (s < PL_bufend && *s != '\n')
-               s++;
-           if (s < PL_bufend)
-               s++;
-           else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
-             Perl_croak(aTHX_ "panic: input overflow");
  #endif
         }
         goto retry;
@@ -7323,7 +7268,7 @@ Perl_yylex(pTHX)
                         d = s + 1;
                         while (SPACE_OR_TAB(*d))
                             d++;
-                       if (*d == ')' && (sv = cv_const_sv(cv))) {
+                       if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
                             s = d + 1;
                             goto its_constant;
                         }
@@ -7387,13 +7332,19 @@ Perl_yylex(pTHX)
                              UTF8fARG(UTF, l, PL_tokenbuf));
                      }
                     /* Check for a constant sub */
-                   if ((sv = cv_const_sv(cv))) {
+                   if ((sv = cv_const_sv_or_av(cv))) {
                   its_constant:
                         op_free(rv2cv_op);
                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-                       pl_yylval.opval->op_private = OPpCONST_FOLDED;
-                       pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       if (SvTYPE(sv) == SVt_PVAV)
+                           pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+                                                     pl_yylval.opval);
+                       else {
+                           pl_yylval.opval->op_private = OPpCONST_FOLDED;
+                           pl_yylval.opval->op_folded = 1;
+                           pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       }
                         TOKEN(WORD);
                     }
  
@@ -7595,21 +7546,12 @@ Perl_yylex(pTHX)
         case KEY___END__: {
             GV *gv;
             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
-               const char *pname = "main";
-               STRLEN plen = 4;
-               U32 putf8 = 0;
-               if (PL_tokenbuf[2] == 'D')
-               {
-                   HV * const stash =
-                       PL_curstash ? PL_curstash : PL_defstash;
-                   pname = HvNAME_get(stash);
-                   plen  = HvNAMELEN (stash);
-                   if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
-               }
-               gv = gv_fetchpvn_flags(
-                       Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
-                       plen+6, GV_ADD|putf8, SVt_PVIO
-               );
+               HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+                                       ? PL_curstash
+                                       : PL_defstash;
+               gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+               if (!isGV(gv))
+                   gv_init(gv,stash,"DATA",4,0);
                 GvMULTI_on(gv);
                 if (!GvIO(gv))
                     GvIOp(gv) = newIO();
@@ -8919,17 +8861,9 @@ Perl_yylex(pTHX)
             FUN0(OP_WANTARRAY);
  
         case KEY_write:
-#ifdef EBCDIC
-       {
-           char ctl_l[2];
-           ctl_l[0] = toCTRL('L');
-           ctl_l[1] = '\0';
-           gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
-       }
-#else
-           /* Make sure $^L is defined */
-           gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
-#endif
+            /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
+             * we use the same number on EBCDIC */
+           gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
             UNI(OP_ENTERWRITE);
  
         case KEY_x:
@@ -9410,10 +9344,17 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
            s++;
      }
  
-#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
-                                        || isCNTRL_A((U8)(d)) \
-                                        || isDIGIT_A((U8)(d)) \
-                                        || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+/*  \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally
+ *  been matched by \s on ASCII platforms, are the legal control char names
+ *  here, that is \c? plus 1-32 minus the \s ones. */
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
+                                   || isDIGIT_A((U8)(d))                    \
+                                   || (!(u) && !isASCII((U8)(d)))           \
+                                   || ((((U8)(d)) < 32)                     \
+                                       && (((((U8)(d)) >= 14)               \
+                                           || (((U8)(d)) <= 8 && (d) != 0) \
+                                           || (((U8)(d)) == 13))))          \
+                                   || (((U8)(d)) == toCTRL('?')))
      if (s < send
          && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
      {
@@ -10067,7 +10008,7 @@ S_scan_heredoc(pTHX_ char *s)
             /* shared is only null if we have gone beyond the outermost
                lexing scope.  In a file, we will have broken out of the
                loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition below will have
+              fer ends with "\n;", so the while condition above will have
                evaluated to false.  So shared can never be null. */
             assert(shared);
             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
@@ -10171,8 +10112,11 @@ S_scan_heredoc(pTHX_ char *s)
         }
         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-           lex_grow_linestr(SvCUR(PL_linestr) + 2);
+            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+            /* ^That should be enough to avoid this needing to grow:  */
             sv_catpvs(PL_linestr, "\n\0");
+            assert(s == SvPVX(PL_linestr));
+            PL_bufend = SvEND(PL_linestr);
         }
         s = PL_bufptr;
  #ifdef PERL_MAD
@@ -10390,11 +10334,15 @@ intro_sym:
  
  
  /* scan_str
-   takes: start position in buffer
-         keep_quoted preserve \ on the embedded delimiter(s)
-         keep_delims preserve the delimiters around the string
-         re_reparse  compiling a run-time /(?{})/:
-                       collapse // to /,  and skip encoding src
+   takes:
+       start                   position in buffer
+       keep_quoted             preserve \ on the embedded delimiter(s)
+       keep_delims             preserve the delimiters around the string
+       re_reparse              compiling a run-time /(?{})/:
+                                  collapse // to /,  and skip encoding src
+       deprecate_escaped_meta  issue a deprecation warning for cer-
+                               tain paired metacharacters that appear
+                               escaped within it
     returns: position to continue reading from buffer
     side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
         updates the read buffer.
@@ -10436,9 +10384,7 @@ intro_sym:
  
  STATIC char *
  S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
-        bool deprecate_escaped_meta /* Should we issue a deprecation warning
-                                       for certain paired metacharacters that
-                                       appear escaped within it */
+                bool deprecate_escaped_meta
      )
  {
      dVAR;
@@ -10506,8 +10452,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
       * happen for <>, as they aren't metas. */
      if (deprecate_escaped_meta
          && (PL_multi_open == PL_multi_close
-            || ! ckWARN_d(WARN_DEPRECATED)
-            || PL_multi_open == '<'))
+            || PL_multi_open == '<'
+            || ! ckWARN_d(WARN_DEPRECATED)))
      {
          deprecate_escaped_meta = FALSE;
      }
@@ -10701,26 +10647,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                           * context where the delimiter is also a metacharacter,
                           * the backslash is useless, and deprecated.  () and []
                           * are meta in any context. {} are meta only when
-                         * appearing in a quantifier or in things like '\p{'.
-                         * They also aren't meta unless there is a matching
-                         * closed, escaped char later on within the string.
-                         * If 's' points to an open, set a flag; if to a close,
-                         * test that flag, and raise a warning if it was set */
+                         * appearing in a quantifier or in things like '\p{'
+                         * (but '\\p{' isn't meta).  They also aren't meta
+                         * unless there is a matching closed, escaped char
+                         * later on within the string.  If 's' points to an
+                         * open, set a flag; if to a close, test that flag, and
+                         * raise a warning if it was set */
  
                         if (deprecate_escaped_meta) {
                              if (*s == PL_multi_open) {
                                  if (*s != '{') {
                                      escaped_open = s;
                                  }
-                                else if (regcurly(s,
-                                                  TRUE /* Look for a closing
-                                                          '\}' */)
-                                         || (s - start > 2  /* Look for e.g.
-                                                               '\x{' */
-                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
-                                {
+                                     /* Look for a closing '\}' */
+                                else if (regcurly(s, TRUE)) {
                                      escaped_open = s;
                                  }
+                                     /* Look for e.g.  '\x{' */
+                                else if (s - start > 2
+                                         && _generic_isCC(*(s-2),
+                                             _CC_BACKSLASH_FOO_LBRACE_IS_META))
+                                { /* Exclude '\\x', '\\\\x', etc. */
+                                    char *lookbehind = s - 4;
+                                    bool is_meta = TRUE;
+                                    while (lookbehind >= start
+                                           && *lookbehind == '\\')
+                                    {
+                                        is_meta = ! is_meta;
+                                        lookbehind--;
+                                    }
+                                    if (is_meta) {
+                                        escaped_open = s;
+                                    }
+                                }
                              }
                              else if (escaped_open) {
                                  Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
@@ -11575,12 +11534,14 @@ S_swallow_bom(pTHX_ U8 *s)
  #endif
         }
         break;
-    case 0xEF:
-       if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-           s += 3;                      /* UTF-8 */
-       }
-       break;
+    case BOM_UTF8_FIRST_BYTE: {
+        const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
+        if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += len + 1;                      /* UTF-8 */
+        }
+        break;
+    }
      case 0:
         if (slen > 3) {
              if (s[1] == 0) {
@@ -11603,14 +11564,6 @@ S_swallow_bom(pTHX_ U8 *s)
  #endif
              }
         }
-#ifdef EBCDIC
-    case 0xDD:
-        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
-            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-            s += 4;                      /* UTF-8 */
-        }
-        break;
-#endif
  
      default:
          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
@@ -11855,7 +11808,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
             /* Append native character for the rev point */
             tmpend = uvchr_to_utf8(tmpbuf, rev);
             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-           if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+           if (!NATIVE_IS_INVARIANT(rev))
                  SvUTF8_on(sv);
             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
                  s = ++pos;