This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c: Reorder checks around deprecate_escaped_meta
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index a8ce485..bf9d160 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -586,7 +586,7 @@ S_missingterm(pTHX_ char *s)
        if (nl)
            *nl = '\0';
     }
-    else if (isCNTRL(PL_multi_close)) {
+    else if ((U8) PL_multi_close < 32) {
        *tmpbuf = '^';
        tmpbuf[1] = (char)toCTRL(PL_multi_close);
        tmpbuf[2] = '\0';
@@ -753,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
        parser->linestr = flags & LEX_START_COPIED
                            ? SvREFCNT_inc_simple_NN(line)
                            : newSVpvn_flags(s, len, SvUTF8(line));
-       sv_catpvs(parser->linestr, "\n;");
+       sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
     } else {
-       parser->linestr = newSVpvs("\n;");
+       parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
     }
     parser->oldoldbufptr =
        parser->oldbufptr =
@@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                    ENTER;
                    SAVESPTR(PL_warnhook);
                    PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
                    LEAVE;
                }
            }
@@ -1073,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                }
                else {
                     assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
                    p += 2;
                 }
            }
@@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
                bufend = PL_parser->bufend;
            }
        }
-       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
        if (retlen == (STRLEN)-1) {
            /* malformed UTF-8 */
            ENTER;
            SAVESPTR(PL_warnhook);
            PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
            LEAVE;
        }
        return unichar;
@@ -1753,37 +1753,16 @@ S_incline(pTHX_ const char *s)
 
     if (t - s > 0) {
        const STRLEN len = t - s;
-       SV *const temp_sv = CopFILESV(PL_curcop);
-       const char *cf;
-       STRLEN tmplen;
-
-       if (temp_sv) {
-           cf = SvPVX(temp_sv);
-           tmplen = SvCUR(temp_sv);
-       } else {
-           cf = NULL;
-           tmplen = 0;
-       }
 
        if (!PL_rsfp && !PL_parser->filtered) {
            /* must copy *{"::_<(eval N)[oldfilename:L]"}
             * to *{"::_<newfilename"} */
            /* However, the long form of evals is only turned on by the
               debugger - usually they're "(eval %lu)" */
-           char smallbuf[128];
-           char *tmpbuf;
-           GV **gvp;
-           STRLEN tmplen2 = len;
-           if (tmplen + 2 <= sizeof smallbuf)
-               tmpbuf = smallbuf;
-           else
-               Newx(tmpbuf, tmplen + 2, char);
-           tmpbuf[0] = '_';
-           tmpbuf[1] = '<';
-           memcpy(tmpbuf + 2, cf, tmplen);
-           tmplen += 2;
-           gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
-           if (gvp) {
+           GV * const cfgv = CopFILEGV(PL_curcop);
+           if (cfgv) {
+               char smallbuf[128];
+               STRLEN tmplen2 = len;
                char *tmpbuf2;
                GV *gv2;
 
@@ -1792,12 +1771,8 @@ S_incline(pTHX_ const char *s)
                else
                    Newx(tmpbuf2, tmplen2 + 2, char);
 
-               if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
-                   /* Either they malloc'd it, or we malloc'd it,
-                      so no prefix is present in ours.  */
-                   tmpbuf2[0] = '_';
-                   tmpbuf2[1] = '<';
-               }
+               tmpbuf2[0] = '_';
+               tmpbuf2[1] = '<';
 
                memcpy(tmpbuf2 + 2, s, tmplen2);
                tmplen2 += 2;
@@ -1811,11 +1786,11 @@ S_incline(pTHX_ const char *s)
                       alias the saved lines that are in the array.
                       Otherwise alias the whole array. */
                    if (CopLINE(PL_curcop) == line_num) {
-                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
-                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+                       GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+                       GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
                    }
-                   else if (GvAV(*gvp)) {
-                       AV * const av = GvAV(*gvp);
+                   else if (GvAV(cfgv)) {
+                       AV * const av = GvAV(cfgv);
                        const I32 start = CopLINE(PL_curcop)+1;
                        I32 items = AvFILLp(av) - start;
                        if (items > 0) {
@@ -1830,7 +1805,6 @@ S_incline(pTHX_ const char *s)
 
                if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
            }
-           if (tmpbuf != smallbuf) Safefree(tmpbuf);
        }
        CopFILE_free(PL_curcop);
        CopFILE_setn(PL_curcop, s, len);
@@ -1895,13 +1869,11 @@ STATIC char *
 S_skipspace2(pTHX_ char *s, SV **svp)
 {
     char *start;
-    const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
     const I32 startoff = s - SvPVX(PL_linestr);
 
     PERL_ARGS_ASSERT_SKIPSPACE2;
 
     s = skipspace(s);
-    PL_bufptr = SvPVX(PL_linestr) + bufptroff;
     if (!PL_madskills || !svp)
        return s;
     start = SvPVX(PL_linestr) + startoff;
@@ -1934,7 +1906,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
            sv_setpvn(sv, buf, len);
        (void)SvIOK_on(sv);
        SvIV_set(sv, 0);
-       av_store(av, (I32)CopLINE(PL_curcop), sv);
+       av_store(av, CopLINE(PL_curcop), sv);
     }
 }
 
@@ -2576,7 +2548,7 @@ S_sublex_start(pTHX)
        return THING;
     }
     else if (op_type == OP_BACKTICK && PL_lex_op) {
-       /* readpipe() vas overriden */
+       /* readpipe() was overridden */
        cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
        pl_yylval.opval = PL_lex_op;
        PL_lex_op = NULL;
@@ -2789,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     {
         /* If warnings are on, this will print a more detailed analysis of what
          * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                        e - ((char *) first_bad_char_loc),
                        NULL, 0);
 
@@ -2864,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
             }
             s++;
         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                 goto bad_charname;
             }
             s += 2;
@@ -2897,8 +2869,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                 s++;
             }
             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                 {
                     goto bad_charname;
                 }
@@ -2932,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
             /* If warnings are on, this will print a more detailed analysis of
              * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                            (char *) first_bad_char_loc - str,
                            NULL, 0);
 
@@ -3133,7 +3104,7 @@ S_scan_const(pTHX_ char *start)
                    char *e = d++;
                    while (e-- > c)
                        *(e + 1) = *e;
-                   *c = (char)UTF_TO_NATIVE(0xff);
+                   *c = (char) ILLEGAL_UTF8_BYTE;
                    /* mark the range as done, and continue */
                    dorange = FALSE;
                    didrange = TRUE;
@@ -3187,16 +3158,12 @@ S_scan_const(pTHX_ char *start)
 
 #ifdef EBCDIC
                if (literal_endpoint == 2 &&
-                   ((isLOWER(min) && isLOWER(max)) ||
-                    (isUPPER(min) && isUPPER(max)))) {
-                   if (isLOWER(min)) {
-                       for (i = min; i <= max; i++)
-                           if (isLOWER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
-                   } else {
-                       for (i = min; i <= max; i++)
-                           if (isUPPER(i))
-                               *d++ = NATIVE_TO_NEED(has_utf8,i);
+                   ((isLOWER_A(min) && isLOWER_A(max)) ||
+                    (isUPPER_A(min) && isUPPER_A(max))))
+                {
+                    for (i = min; i <= max; i++) {
+                        if (isALPHA_A(i))
+                            *d++ = i;
                    }
                }
                else
@@ -3204,13 +3171,7 @@ S_scan_const(pTHX_ char *start)
                    for (i = min; i <= max; i++)
 #ifdef EBCDIC
                         if (has_utf8) {
-                            const U8 ch = (U8)NATIVE_TO_UTF(i);
-                            if (UNI_IS_INVARIANT(ch))
-                                *d++ = (U8)i;
-                            else {
-                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                            }
+                            append_utf8_from_native_byte(i, &d);
                         }
                         else
 #endif
@@ -3220,7 +3181,7 @@ S_scan_const(pTHX_ char *start)
                 if (uvmax) {
                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
                     if (uvmax > 0x101)
-                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
                     if (uvmax > 0x100)
                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
                 }
@@ -3245,7 +3206,7 @@ S_scan_const(pTHX_ char *start)
                    && !native_range
 #endif
                    ) {
-                   *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
+                   *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
                    s++;
                    continue;
                }
@@ -3285,12 +3246,12 @@ S_scan_const(pTHX_ char *start)
         * char, which will be done separately.
         * Stop on (?{..}) and friends */
 
-       else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+       else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
            if (s[2] == '#') {
                while (s+1 < send && *s != ')')
-                   *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+                   *d++ = *s++;
            }
-           else if (!PL_lex_casemods && !in_charclass &&
+           else if (!PL_lex_casemods &&
                     (    s[2] == '{' /* This should match regcomp.c */
                      || (s[2] == '?' && s[3] == '{')))
            {
@@ -3299,10 +3260,10 @@ S_scan_const(pTHX_ char *start)
        }
 
        /* likewise skip #-initiated comments in //x patterns */
-       else if (*s == '#' && PL_lex_inpat &&
+       else if (*s == '#' && PL_lex_inpat && !in_charclass &&
          ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
            while (s+1 < send && *s != '\n')
-               *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+               *d++ = *s++;
        }
 
        /* no further processing of single-quoted regex */
@@ -3377,7 +3338,7 @@ S_scan_const(pTHX_ char *start)
                        || s[1] != '{'
                        || regcurly(s + 1, FALSE)))
            {
-               *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+               *d++ = '\\';
                goto default_action;
            }
 
@@ -3406,7 +3367,7 @@ S_scan_const(pTHX_ char *start)
                {
                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                     STRLEN len = 3;
-                   uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+                   uv = grok_oct(s, &len, &flags, NULL);
                    s += len;
                     if (len < 3 && s < send && isDIGIT(*s)
                         && ckWARN(WARN_MISC))
@@ -3458,9 +3419,8 @@ S_scan_const(pTHX_ char *start)
                 * UTF-8 sequence they can end up as, except if they force us
                 * to recode the rest of the string into utf8 */
                
-               /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native). */
-               if (!UNI_IS_INVARIANT(uv)) {
+               /* Here uv is the ordinal of the next character being added */
+               if (!NATIVE_IS_INVARIANT(uv)) {
                    if (!has_utf8 && uv > 255) {
                        /* Might need to recode whatever we have accumulated so
                         * far if it contains any chars variant in utf8 or
@@ -3478,7 +3438,7 @@ S_scan_const(pTHX_ char *start)
                     }
 
                     if (has_utf8) {
-                       d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                        if (PL_lex_inwhat == OP_TRANS &&
                            PL_sublex_info.sub_op) {
                            PL_sublex_info.sub_op->op_private |=
@@ -3511,16 +3471,6 @@ S_scan_const(pTHX_ char *start)
                 * now, while preserving the fact that it was a named character
                 * so that the regex compiler knows this */
 
-               /* This section of code doesn't generally use the
-                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-                * a close examination of this macro and determined it is a
-                * no-op except on utfebcdic variant characters.  Every
-                * character generated by this that would normally need to be
-                * enclosed by this macro is invariant, so the macro is not
-                * needed, and would complicate use of copy().  XXX There are
-                * other parts of this file where the macro is used
-                * inconsistently, but are saved by it being a no-op */
-
                /* The structure of this section of code (besides checking for
                 * errors and upgrading to utf8) is:
                 *  Further disambiguate between the two meanings of \N, and if
@@ -3608,11 +3558,13 @@ S_scan_const(pTHX_ char *start)
                            has_utf8 = TRUE;
                        }
 
-                       /* Add the string to the output */
+                        /* Add the (Unicode) code point to the output. */
                        if (UNI_IS_INVARIANT(uv)) {
-                           *d++ = (char) uv;
+                           *d++ = (char) LATIN1_TO_NATIVE(uv);
                        }
-                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       else {
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                        }
                    }
                }
                else /* Here is \N{NAME} but not \N{U+...}. */
@@ -3672,19 +3624,16 @@ S_scan_const(pTHX_ char *start)
                                 char hex_string[2 * UTF8_MAXBYTES + 5];
 
                                 /* Get the first character of the result. */
-                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                U32 uv = utf8n_to_uvchr((U8 *) str,
                                                         len,
                                                         &char_length,
                                                         UTF8_ALLOW_ANYUV);
                                 /* Convert first code point to hex, including
-                                 * the boiler plate before it.  For all these,
-                                 * we convert to native format so that
-                                 * downstream code can continue to assume the
-                                 * input is native */
+                                 * the boiler plate before it. */
                                 output_length =
                                     my_snprintf(hex_string, sizeof(hex_string),
-                                            "\\N{U+%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                "\\N{U+%X",
+                                                (unsigned int) uv);
 
                                 /* Make sure there is enough space to hold it */
                                 d = off + SvGROW(sv, off
@@ -3699,15 +3648,15 @@ S_scan_const(pTHX_ char *start)
                                 * its ordinal in hex */
                                 while ((str += char_length) < str_end) {
                                     const STRLEN off = d - SvPVX_const(sv);
-                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                    U32 uv = utf8n_to_uvchr((U8 *) str,
                                                             str_end - str,
                                                             &char_length,
                                                             UTF8_ALLOW_ANYUV);
                                     output_length =
                                         my_snprintf(hex_string,
-                                            sizeof(hex_string),
-                                            ".%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                    sizeof(hex_string),
+                                                    ".%X",
+                                                    (unsigned int) uv);
 
                                     d = off + SvGROW(sv, off
                                                         + output_length
@@ -3772,25 +3721,25 @@ S_scan_const(pTHX_ char *start)
 
            /* printf-style backslashes, formfeeds, newlines, etc */
            case 'b':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+               *d++ = '\b';
                break;
            case 'n':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+               *d++ = '\n';
                break;
            case 'r':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+               *d++ = '\r';
                break;
            case 'f':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+               *d++ = '\f';
                break;
            case 't':
-               *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+               *d++ = '\t';
                break;
            case 'e':
-               *d++ = ASCII_TO_NEED(has_utf8,'\033');
+               *d++ = ASCII_TO_NATIVE('\033');
                break;
            case 'a':
-               *d++ = ASCII_TO_NEED(has_utf8,'\007');
+               *d++ = '\a';
                break;
            } /* end switch */
 
@@ -3816,8 +3765,10 @@ S_scan_const(pTHX_ char *start)
             * routine that does the conversion checks for errors like
             * malformed utf8 */
 
-           const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
-           const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
+           const UV nextuv   = (this_utf8)
+                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
+                                : (UV) ((U8) *s);
+           const STRLEN need = UNISKIP(nextuv);
            if (!has_utf8) {
                SvCUR_set(sv, d - SvPVX_const(sv));
                SvPOK_on(sv);
@@ -3844,7 +3795,7 @@ S_scan_const(pTHX_ char *start)
 #endif
        }
        else {
-           *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+           *d++ = *s++;
        }
     } /* while loop to process each character */
 
@@ -5309,7 +5260,7 @@ Perl_yylex(pTHX)
             * check if it in fact is. */
            if (bof && PL_rsfp &&
                     (*s == 0 ||
-                     *(U8*)s == 0xEF ||
+                     *(U8*)s == BOM_UTF8_FIRST_BYTE ||
                      *(U8*)s >= 0xFE ||
                      s[1] == 0)) {
                Off_t offset = (IV)PerlIO_tell(PL_rsfp);
@@ -5621,14 +5572,19 @@ Perl_yylex(pTHX)
                s = SKIPSPACE0(s);
            }
            else {
-/*             if (PL_madskills && PL_lex_formbrack) { */
-                   d = s;
-                   while (d < PL_bufend && *d != '\n')
-                       d++;
-                   if (d < PL_bufend)
-                       d++;
-                   else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+                   if (PL_madskills) d = s;
+                   while (s < PL_bufend && *s != '\n')
+                       s++;
+                   if (s < PL_bufend)
+                   {
+                       s++;
+                       if (s < PL_bufend)
+                           incline(s);
+                   }
+                   else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
                      Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
                    if (PL_madskills && CopLINE(PL_curcop) >= 1) {
                        if (!PL_thiswhite)
                            PL_thiswhite = newSVpvs("");
@@ -5636,20 +5592,9 @@ Perl_yylex(pTHX)
                            sv_setpvs(PL_thiswhite, "");
                            PL_faketokens = 0;
                        }
-                       sv_catpvn(PL_thiswhite, s, d - s);
+                       sv_catpvn(PL_thiswhite, d, s - d);
                    }
-                   s = d;
-/*             }
-               *s = '\0';
-               PL_bufend = s; */
            }
-#else
-           while (s < PL_bufend && *s != '\n')
-               s++;
-           if (s < PL_bufend)
-               s++;
-           else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
-             Perl_croak(aTHX_ "panic: input overflow");
 #endif
        }
        goto retry;
@@ -7323,7 +7268,7 @@ Perl_yylex(pTHX)
                        d = s + 1;
                        while (SPACE_OR_TAB(*d))
                            d++;
-                       if (*d == ')' && (sv = cv_const_sv(cv))) {
+                       if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
                            s = d + 1;
                            goto its_constant;
                        }
@@ -7387,13 +7332,19 @@ Perl_yylex(pTHX)
                             UTF8fARG(UTF, l, PL_tokenbuf));
                     }
                    /* Check for a constant sub */
-                   if ((sv = cv_const_sv(cv))) {
+                   if ((sv = cv_const_sv_or_av(cv))) {
                  its_constant:
                        op_free(rv2cv_op);
                        SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
                        ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-                       pl_yylval.opval->op_private = OPpCONST_FOLDED;
-                       pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       if (SvTYPE(sv) == SVt_PVAV)
+                           pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+                                                     pl_yylval.opval);
+                       else {
+                           pl_yylval.opval->op_private = OPpCONST_FOLDED;
+                           pl_yylval.opval->op_folded = 1;
+                           pl_yylval.opval->op_flags |= OPf_SPECIAL;
+                       }
                        TOKEN(WORD);
                    }
 
@@ -7595,21 +7546,12 @@ Perl_yylex(pTHX)
        case KEY___END__: {
            GV *gv;
            if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
-               const char *pname = "main";
-               STRLEN plen = 4;
-               U32 putf8 = 0;
-               if (PL_tokenbuf[2] == 'D')
-               {
-                   HV * const stash =
-                       PL_curstash ? PL_curstash : PL_defstash;
-                   pname = HvNAME_get(stash);
-                   plen  = HvNAMELEN (stash);
-                   if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
-               }
-               gv = gv_fetchpvn_flags(
-                       Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
-                       plen+6, GV_ADD|putf8, SVt_PVIO
-               );
+               HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+                                       ? PL_curstash
+                                       : PL_defstash;
+               gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+               if (!isGV(gv))
+                   gv_init(gv,stash,"DATA",4,0);
                GvMULTI_on(gv);
                if (!GvIO(gv))
                    GvIOp(gv) = newIO();
@@ -8919,17 +8861,9 @@ Perl_yylex(pTHX)
            FUN0(OP_WANTARRAY);
 
        case KEY_write:
-#ifdef EBCDIC
-       {
-           char ctl_l[2];
-           ctl_l[0] = toCTRL('L');
-           ctl_l[1] = '\0';
-           gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
-       }
-#else
-           /* Make sure $^L is defined */
-           gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
-#endif
+            /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
+             * we use the same number on EBCDIC */
+           gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
            UNI(OP_ENTERWRITE);
 
        case KEY_x:
@@ -9410,10 +9344,17 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
           s++;
     }
 
-#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
-                                        || isCNTRL_A((U8)(d)) \
-                                        || isDIGIT_A((U8)(d)) \
-                                        || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+/*  \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally
+ *  been matched by \s on ASCII platforms, are the legal control char names
+ *  here, that is \c? plus 1-32 minus the \s ones. */
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
+                                   || isDIGIT_A((U8)(d))                    \
+                                   || (!(u) && !isASCII((U8)(d)))           \
+                                   || ((((U8)(d)) < 32)                     \
+                                       && (((((U8)(d)) >= 14)               \
+                                           || (((U8)(d)) <= 8 && (d) != 0) \
+                                           || (((U8)(d)) == 13))))          \
+                                   || (((U8)(d)) == toCTRL('?')))
     if (s < send
         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
     {
@@ -10067,7 +10008,7 @@ S_scan_heredoc(pTHX_ char *s)
            /* shared is only null if we have gone beyond the outermost
               lexing scope.  In a file, we will have broken out of the
               loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition below will have
+              fer ends with "\n;", so the while condition above will have
               evaluated to false.  So shared can never be null. */
            assert(shared);
            /* A LEXSHARED struct with a null ls_prev pointer is the outer-
@@ -10171,8 +10112,11 @@ S_scan_heredoc(pTHX_ char *s)
        }
        CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
        if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-           lex_grow_linestr(SvCUR(PL_linestr) + 2);
+            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+            /* ^That should be enough to avoid this needing to grow:  */
            sv_catpvs(PL_linestr, "\n\0");
+            assert(s == SvPVX(PL_linestr));
+            PL_bufend = SvEND(PL_linestr);
        }
        s = PL_bufptr;
 #ifdef PERL_MAD
@@ -10390,11 +10334,15 @@ intro_sym:
 
 
 /* scan_str
-   takes: start position in buffer
-         keep_quoted preserve \ on the embedded delimiter(s)
-         keep_delims preserve the delimiters around the string
-         re_reparse  compiling a run-time /(?{})/:
-                       collapse // to /,  and skip encoding src
+   takes:
+       start                   position in buffer
+       keep_quoted             preserve \ on the embedded delimiter(s)
+       keep_delims             preserve the delimiters around the string
+       re_reparse              compiling a run-time /(?{})/:
+                                  collapse // to /,  and skip encoding src
+       deprecate_escaped_meta  issue a deprecation warning for cer-
+                               tain paired metacharacters that appear
+                               escaped within it
    returns: position to continue reading from buffer
    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
        updates the read buffer.
@@ -10436,9 +10384,7 @@ intro_sym:
 
 STATIC char *
 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
-        bool deprecate_escaped_meta /* Should we issue a deprecation warning
-                                       for certain paired metacharacters that
-                                       appear escaped within it */
+                bool deprecate_escaped_meta
     )
 {
     dVAR;
@@ -10506,8 +10452,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
      * happen for <>, as they aren't metas. */
     if (deprecate_escaped_meta
         && (PL_multi_open == PL_multi_close
-            || ! ckWARN_d(WARN_DEPRECATED)
-            || PL_multi_open == '<'))
+            || PL_multi_open == '<'
+            || ! ckWARN_d(WARN_DEPRECATED)))
     {
         deprecate_escaped_meta = FALSE;
     }
@@ -10701,26 +10647,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                          * context where the delimiter is also a metacharacter,
                          * the backslash is useless, and deprecated.  () and []
                          * are meta in any context. {} are meta only when
-                         * appearing in a quantifier or in things like '\p{'.
-                         * They also aren't meta unless there is a matching
-                         * closed, escaped char later on within the string.
-                         * If 's' points to an open, set a flag; if to a close,
-                         * test that flag, and raise a warning if it was set */
+                         * appearing in a quantifier or in things like '\p{'
+                         * (but '\\p{' isn't meta).  They also aren't meta
+                         * unless there is a matching closed, escaped char
+                         * later on within the string.  If 's' points to an
+                         * open, set a flag; if to a close, test that flag, and
+                         * raise a warning if it was set */
 
                        if (deprecate_escaped_meta) {
                             if (*s == PL_multi_open) {
                                 if (*s != '{') {
                                     escaped_open = s;
                                 }
-                                else if (regcurly(s,
-                                                  TRUE /* Look for a closing
-                                                          '\}' */)
-                                         || (s - start > 2  /* Look for e.g.
-                                                               '\x{' */
-                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
-                                {
+                                     /* Look for a closing '\}' */
+                                else if (regcurly(s, TRUE)) {
                                     escaped_open = s;
                                 }
+                                     /* Look for e.g.  '\x{' */
+                                else if (s - start > 2
+                                         && _generic_isCC(*(s-2),
+                                             _CC_BACKSLASH_FOO_LBRACE_IS_META))
+                                { /* Exclude '\\x', '\\\\x', etc. */
+                                    char *lookbehind = s - 4;
+                                    bool is_meta = TRUE;
+                                    while (lookbehind >= start
+                                           && *lookbehind == '\\')
+                                    {
+                                        is_meta = ! is_meta;
+                                        lookbehind--;
+                                    }
+                                    if (is_meta) {
+                                        escaped_open = s;
+                                    }
+                                }
                             }
                             else if (escaped_open) {
                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
@@ -11575,12 +11534,14 @@ S_swallow_bom(pTHX_ U8 *s)
 #endif
        }
        break;
-    case 0xEF:
-       if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-           s += 3;                      /* UTF-8 */
-       }
-       break;
+    case BOM_UTF8_FIRST_BYTE: {
+        const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
+        if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += len + 1;                      /* UTF-8 */
+        }
+        break;
+    }
     case 0:
        if (slen > 3) {
             if (s[1] == 0) {
@@ -11603,14 +11564,6 @@ S_swallow_bom(pTHX_ U8 *s)
 #endif
             }
        }
-#ifdef EBCDIC
-    case 0xDD:
-        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
-            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-            s += 4;                      /* UTF-8 */
-        }
-        break;
-#endif
 
     default:
         if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
@@ -11855,7 +11808,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
            /* Append native character for the rev point */
            tmpend = uvchr_to_utf8(tmpbuf, rev);
            sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-           if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+           if (!NATIVE_IS_INVARIANT(rev))
                 SvUTF8_on(sv);
            if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
                 s = ++pos;