X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2cc6fe62efccaf47e15982ddbe988a976469d887..a446b943f7a5b38f67cca69a513d873bc8335552:/toke.c

diff --git a/toke.c b/toke.c
index a8ce485..bf9d160 100644
--- a/toke.c
+++ b/toke.c
@@ -586,7 +586,7 @@ S_missingterm(pTHX_ char *s)
 	if (nl)
 	    *nl = '\0';
     }
-    else if (isCNTRL(PL_multi_close)) {
+    else if ((U8) PL_multi_close < 32) {
 	*tmpbuf = '^';
 	tmpbuf[1] = (char)toCTRL(PL_multi_close);
 	tmpbuf[2] = '\0';
@@ -753,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 	parser->linestr = flags & LEX_START_COPIED
 			    ? SvREFCNT_inc_simple_NN(line)
 			    : newSVpvn_flags(s, len, SvUTF8(line));
-	sv_catpvs(parser->linestr, "\n;");
+	sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
     } else {
-	parser->linestr = newSVpvs("\n;");
+	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
     }
     parser->oldoldbufptr =
 	parser->oldbufptr =
@@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 		    ENTER;
 		    SAVESPTR(PL_warnhook);
 		    PL_warnhook = PERL_WARNHOOK_FATAL;
-		    utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+		    utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
 		    LEAVE;
 		}
 	    }
@@ -1073,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 		}
 		else {
                     assert(p < e -1 );
-		    *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+		    *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
 		    p += 2;
                 }
 	    }
@@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
 		bufend = PL_parser->bufend;
 	    }
 	}
-	unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
 	if (retlen == (STRLEN)-1) {
 	    /* malformed UTF-8 */
 	    ENTER;
 	    SAVESPTR(PL_warnhook);
 	    PL_warnhook = PERL_WARNHOOK_FATAL;
-	    utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+	    utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
 	    LEAVE;
 	}
 	return unichar;
@@ -1753,37 +1753,16 @@ S_incline(pTHX_ const char *s)
 
     if (t - s > 0) {
 	const STRLEN len = t - s;
-	SV *const temp_sv = CopFILESV(PL_curcop);
-	const char *cf;
-	STRLEN tmplen;
-
-	if (temp_sv) {
-	    cf = SvPVX(temp_sv);
-	    tmplen = SvCUR(temp_sv);
-	} else {
-	    cf = NULL;
-	    tmplen = 0;
-	}
 
 	if (!PL_rsfp && !PL_parser->filtered) {
 	    /* must copy *{"::_<(eval N)[oldfilename:L]"}
 	     * to *{"::_<newfilename"} */
 	    /* However, the long form of evals is only turned on by the
 	       debugger - usually they're "(eval %lu)" */
-	    char smallbuf[128];
-	    char *tmpbuf;
-	    GV **gvp;
-	    STRLEN tmplen2 = len;
-	    if (tmplen + 2 <= sizeof smallbuf)
-		tmpbuf = smallbuf;
-	    else
-		Newx(tmpbuf, tmplen + 2, char);
-	    tmpbuf[0] = '_';
-	    tmpbuf[1] = '<';
-	    memcpy(tmpbuf + 2, cf, tmplen);
-	    tmplen += 2;
-	    gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
-	    if (gvp) {
+	    GV * const cfgv = CopFILEGV(PL_curcop);
+	    if (cfgv) {
+		char smallbuf[128];
+		STRLEN tmplen2 = len;
 		char *tmpbuf2;
 		GV *gv2;
 
@@ -1792,12 +1771,8 @@ S_incline(pTHX_ const char *s)
 		else
 		    Newx(tmpbuf2, tmplen2 + 2, char);
 
-		if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
-		    /* Either they malloc'd it, or we malloc'd it,
-		       so no prefix is present in ours.  */
-		    tmpbuf2[0] = '_';
-		    tmpbuf2[1] = '<';
-		}
+		tmpbuf2[0] = '_';
+		tmpbuf2[1] = '<';
 
 		memcpy(tmpbuf2 + 2, s, tmplen2);
 		tmplen2 += 2;
@@ -1811,11 +1786,11 @@ S_incline(pTHX_ const char *s)
 		       alias the saved lines that are in the array.
 		       Otherwise alias the whole array. */
 		    if (CopLINE(PL_curcop) == line_num) {
-			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
-			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
 		    }
-		    else if (GvAV(*gvp)) {
-			AV * const av = GvAV(*gvp);
+		    else if (GvAV(cfgv)) {
+			AV * const av = GvAV(cfgv);
 			const I32 start = CopLINE(PL_curcop)+1;
 			I32 items = AvFILLp(av) - start;
 			if (items > 0) {
@@ -1830,7 +1805,6 @@ S_incline(pTHX_ const char *s)
 
 		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
 	    }
-	    if (tmpbuf != smallbuf) Safefree(tmpbuf);
 	}
 	CopFILE_free(PL_curcop);
 	CopFILE_setn(PL_curcop, s, len);
@@ -1895,13 +1869,11 @@ STATIC char *
 S_skipspace2(pTHX_ char *s, SV **svp)
 {
     char *start;
-    const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
     const I32 startoff = s - SvPVX(PL_linestr);
 
     PERL_ARGS_ASSERT_SKIPSPACE2;
 
     s = skipspace(s);
-    PL_bufptr = SvPVX(PL_linestr) + bufptroff;
     if (!PL_madskills || !svp)
 	return s;
     start = SvPVX(PL_linestr) + startoff;
@@ -1934,7 +1906,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
 	    sv_setpvn(sv, buf, len);
 	(void)SvIOK_on(sv);
 	SvIV_set(sv, 0);
-	av_store(av, (I32)CopLINE(PL_curcop), sv);
+	av_store(av, CopLINE(PL_curcop), sv);
     }
 }
 
@@ -2576,7 +2548,7 @@ S_sublex_start(pTHX)
 	return THING;
     }
     else if (op_type == OP_BACKTICK && PL_lex_op) {
-	/* readpipe() vas overriden */
+	/* readpipe() was overridden */
 	cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
 	pl_yylval.opval = PL_lex_op;
 	PL_lex_op = NULL;
@@ -2789,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     {
         /* If warnings are on, this will print a more detailed analysis of what
          * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                        e - ((char *) first_bad_char_loc),
                        NULL, 0);
 
@@ -2864,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
             }
             s++;
         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                 goto bad_charname;
             }
             s += 2;
@@ -2897,8 +2869,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                 s++;
             }
             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                 {
                     goto bad_charname;
                 }
@@ -2932,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
             /* If warnings are on, this will print a more detailed analysis of
              * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                            (char *) first_bad_char_loc - str,
                            NULL, 0);
 
@@ -3133,7 +3104,7 @@ S_scan_const(pTHX_ char *start)
 		    char *e = d++;
 		    while (e-- > c)
 			*(e + 1) = *e;
-		    *c = (char)UTF_TO_NATIVE(0xff);
+		    *c = (char) ILLEGAL_UTF8_BYTE;
 		    /* mark the range as done, and continue */
 		    dorange = FALSE;
 		    didrange = TRUE;
@@ -3187,16 +3158,12 @@ S_scan_const(pTHX_ char *start)
 
 #ifdef EBCDIC
 		if (literal_endpoint == 2 &&
-		    ((isLOWER(min) && isLOWER(max)) ||
-		     (isUPPER(min) && isUPPER(max)))) {
-		    if (isLOWER(min)) {
-			for (i = min; i <= max; i++)
-			    if (isLOWER(i))
-				*d++ = NATIVE_TO_NEED(has_utf8,i);
-		    } else {
-			for (i = min; i <= max; i++)
-			    if (isUPPER(i))
-				*d++ = NATIVE_TO_NEED(has_utf8,i);
+		    ((isLOWER_A(min) && isLOWER_A(max)) ||
+		     (isUPPER_A(min) && isUPPER_A(max))))
+                {
+                    for (i = min; i <= max; i++) {
+                        if (isALPHA_A(i))
+                            *d++ = i;
 		    }
 		}
 		else
@@ -3204,13 +3171,7 @@ S_scan_const(pTHX_ char *start)
 		    for (i = min; i <= max; i++)
 #ifdef EBCDIC
                         if (has_utf8) {
-                            const U8 ch = (U8)NATIVE_TO_UTF(i);
-                            if (UNI_IS_INVARIANT(ch))
-                                *d++ = (U8)i;
-                            else {
-                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                            }
+                            append_utf8_from_native_byte(i, &d);
                         }
                         else
 #endif
@@ -3220,7 +3181,7 @@ S_scan_const(pTHX_ char *start)
                 if (uvmax) {
                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
                     if (uvmax > 0x101)
-                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                        *d++ = (char) ILLEGAL_UTF8_BYTE;
                     if (uvmax > 0x100)
                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
                 }
@@ -3245,7 +3206,7 @@ S_scan_const(pTHX_ char *start)
 		    && !native_range
 #endif
 		    ) {
-		    *d++ = (char)UTF_TO_NATIVE(0xff);	/* use illegal utf8 byte--see pmtrans */
+		    *d++ = (char) ILLEGAL_UTF8_BYTE;	/* use illegal utf8 byte--see pmtrans */
 		    s++;
 		    continue;
 		}
@@ -3285,12 +3246,12 @@ S_scan_const(pTHX_ char *start)
 	 * char, which will be done separately.
 	 * Stop on (?{..}) and friends */
 
-	else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
 	    if (s[2] == '#') {
 		while (s+1 < send && *s != ')')
-		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+		    *d++ = *s++;
 	    }
-	    else if (!PL_lex_casemods && !in_charclass &&
+	    else if (!PL_lex_casemods &&
 		     (    s[2] == '{' /* This should match regcomp.c */
 		      || (s[2] == '?' && s[3] == '{')))
 	    {
@@ -3299,10 +3260,10 @@ S_scan_const(pTHX_ char *start)
 	}
 
 	/* likewise skip #-initiated comments in //x patterns */
-	else if (*s == '#' && PL_lex_inpat &&
+	else if (*s == '#' && PL_lex_inpat && !in_charclass &&
 	  ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
 	    while (s+1 < send && *s != '\n')
-		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
+		*d++ = *s++;
 	}
 
 	/* no further processing of single-quoted regex */
@@ -3377,7 +3338,7 @@ S_scan_const(pTHX_ char *start)
 			|| s[1] != '{'
 			|| regcurly(s + 1, FALSE)))
 	    {
-		*d++ = NATIVE_TO_NEED(has_utf8,'\\');
+		*d++ = '\\';
 		goto default_action;
 	    }
 
@@ -3406,7 +3367,7 @@ S_scan_const(pTHX_ char *start)
 		{
                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                     STRLEN len = 3;
-		    uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+		    uv = grok_oct(s, &len, &flags, NULL);
 		    s += len;
                     if (len < 3 && s < send && isDIGIT(*s)
                         && ckWARN(WARN_MISC))
@@ -3458,9 +3419,8 @@ S_scan_const(pTHX_ char *start)
 		 * UTF-8 sequence they can end up as, except if they force us
 		 * to recode the rest of the string into utf8 */
 		
-		/* Here uv is the ordinal of the next character being added in
-		 * unicode (converted from native). */
-		if (!UNI_IS_INVARIANT(uv)) {
+		/* Here uv is the ordinal of the next character being added */
+		if (!NATIVE_IS_INVARIANT(uv)) {
 		    if (!has_utf8 && uv > 255) {
 			/* Might need to recode whatever we have accumulated so
 			 * far if it contains any chars variant in utf8 or
@@ -3478,7 +3438,7 @@ S_scan_const(pTHX_ char *start)
                     }
 
                     if (has_utf8) {
-		        d = (char*)uvuni_to_utf8((U8*)d, uv);
+		        d = (char*)uvchr_to_utf8((U8*)d, uv);
 			if (PL_lex_inwhat == OP_TRANS &&
 			    PL_sublex_info.sub_op) {
 			    PL_sublex_info.sub_op->op_private |=
@@ -3511,16 +3471,6 @@ S_scan_const(pTHX_ char *start)
 		 * now, while preserving the fact that it was a named character
 		 * so that the regex compiler knows this */
 
-		/* This section of code doesn't generally use the
-		 * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-		 * a close examination of this macro and determined it is a
-		 * no-op except on utfebcdic variant characters.  Every
-		 * character generated by this that would normally need to be
-		 * enclosed by this macro is invariant, so the macro is not
-		 * needed, and would complicate use of copy().  XXX There are
-		 * other parts of this file where the macro is used
-		 * inconsistently, but are saved by it being a no-op */
-
 		/* The structure of this section of code (besides checking for
 		 * errors and upgrading to utf8) is:
 		 *  Further disambiguate between the two meanings of \N, and if
@@ -3608,11 +3558,13 @@ S_scan_const(pTHX_ char *start)
 			    has_utf8 = TRUE;
 			}
 
-			/* Add the string to the output */
+                        /* Add the (Unicode) code point to the output. */
 			if (UNI_IS_INVARIANT(uv)) {
-			    *d++ = (char) uv;
+			    *d++ = (char) LATIN1_TO_NATIVE(uv);
 			}
-			else d = (char*)uvuni_to_utf8((U8*)d, uv);
+			else {
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                        }
 		    }
 		}
 		else /* Here is \N{NAME} but not \N{U+...}. */
@@ -3672,19 +3624,16 @@ S_scan_const(pTHX_ char *start)
                                 char hex_string[2 * UTF8_MAXBYTES + 5];
 
                                 /* Get the first character of the result. */
-                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                U32 uv = utf8n_to_uvchr((U8 *) str,
                                                         len,
                                                         &char_length,
                                                         UTF8_ALLOW_ANYUV);
                                 /* Convert first code point to hex, including
-                                 * the boiler plate before it.  For all these,
-                                 * we convert to native format so that
-                                 * downstream code can continue to assume the
-                                 * input is native */
+                                 * the boiler plate before it. */
                                 output_length =
                                     my_snprintf(hex_string, sizeof(hex_string),
-                                            "\\N{U+%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                "\\N{U+%X",
+                                                (unsigned int) uv);
 
                                 /* Make sure there is enough space to hold it */
                                 d = off + SvGROW(sv, off
@@ -3699,15 +3648,15 @@ S_scan_const(pTHX_ char *start)
                                 * its ordinal in hex */
                                 while ((str += char_length) < str_end) {
                                     const STRLEN off = d - SvPVX_const(sv);
-                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                    U32 uv = utf8n_to_uvchr((U8 *) str,
                                                             str_end - str,
                                                             &char_length,
                                                             UTF8_ALLOW_ANYUV);
                                     output_length =
                                         my_snprintf(hex_string,
-                                            sizeof(hex_string),
-                                            ".%X",
-                                            (unsigned int) UNI_TO_NATIVE(uv));
+                                                    sizeof(hex_string),
+                                                    ".%X",
+                                                    (unsigned int) uv);
 
                                     d = off + SvGROW(sv, off
                                                         + output_length
@@ -3772,25 +3721,25 @@ S_scan_const(pTHX_ char *start)
 
 	    /* printf-style backslashes, formfeeds, newlines, etc */
 	    case 'b':
-		*d++ = NATIVE_TO_NEED(has_utf8,'\b');
+		*d++ = '\b';
 		break;
 	    case 'n':
-		*d++ = NATIVE_TO_NEED(has_utf8,'\n');
+		*d++ = '\n';
 		break;
 	    case 'r':
-		*d++ = NATIVE_TO_NEED(has_utf8,'\r');
+		*d++ = '\r';
 		break;
 	    case 'f':
-		*d++ = NATIVE_TO_NEED(has_utf8,'\f');
+		*d++ = '\f';
 		break;
 	    case 't':
-		*d++ = NATIVE_TO_NEED(has_utf8,'\t');
+		*d++ = '\t';
 		break;
 	    case 'e':
-		*d++ = ASCII_TO_NEED(has_utf8,'\033');
+		*d++ = ASCII_TO_NATIVE('\033');
 		break;
 	    case 'a':
-		*d++ = ASCII_TO_NEED(has_utf8,'\007');
+		*d++ = '\a';
 		break;
 	    } /* end switch */
 
@@ -3816,8 +3765,10 @@ S_scan_const(pTHX_ char *start)
 	     * routine that does the conversion checks for errors like
 	     * malformed utf8 */
 
-	    const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
-	    const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
+	    const UV nextuv   = (this_utf8)
+                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
+                                : (UV) ((U8) *s);
+	    const STRLEN need = UNISKIP(nextuv);
 	    if (!has_utf8) {
 		SvCUR_set(sv, d - SvPVX_const(sv));
 		SvPOK_on(sv);
@@ -3844,7 +3795,7 @@ S_scan_const(pTHX_ char *start)
 #endif
 	}
 	else {
-	    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+	    *d++ = *s++;
 	}
     } /* while loop to process each character */
 
@@ -5309,7 +5260,7 @@ Perl_yylex(pTHX)
 	     * check if it in fact is. */
 	    if (bof && PL_rsfp &&
 		     (*s == 0 ||
-		      *(U8*)s == 0xEF ||
+		      *(U8*)s == BOM_UTF8_FIRST_BYTE ||
 		      *(U8*)s >= 0xFE ||
 		      s[1] == 0)) {
 		Off_t offset = (IV)PerlIO_tell(PL_rsfp);
@@ -5621,14 +5572,19 @@ Perl_yylex(pTHX)
 		s = SKIPSPACE0(s);
 	    }
 	    else {
-/*		if (PL_madskills && PL_lex_formbrack) { */
-		    d = s;
-		    while (d < PL_bufend && *d != '\n')
-			d++;
-		    if (d < PL_bufend)
-			d++;
-		    else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+		    if (PL_madskills) d = s;
+		    while (s < PL_bufend && *s != '\n')
+			s++;
+		    if (s < PL_bufend)
+		    {
+			s++;
+			if (s < PL_bufend)
+			    incline(s);
+		    }
+		    else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
 		      Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
 		    if (PL_madskills && CopLINE(PL_curcop) >= 1) {
 			if (!PL_thiswhite)
 			    PL_thiswhite = newSVpvs("");
@@ -5636,20 +5592,9 @@ Perl_yylex(pTHX)
 			    sv_setpvs(PL_thiswhite, "");
 			    PL_faketokens = 0;
 			}
-			sv_catpvn(PL_thiswhite, s, d - s);
+			sv_catpvn(PL_thiswhite, d, s - d);
 		    }
-		    s = d;
-/*		}
-		*s = '\0';
-		PL_bufend = s; */
 	    }
-#else
-	    while (s < PL_bufend && *s != '\n')
-		s++;
-	    if (s < PL_bufend)
-		s++;
-	    else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
-	      Perl_croak(aTHX_ "panic: input overflow");
 #endif
 	}
 	goto retry;
@@ -7323,7 +7268,7 @@ Perl_yylex(pTHX)
 			d = s + 1;
 			while (SPACE_OR_TAB(*d))
 			    d++;
-			if (*d == ')' && (sv = cv_const_sv(cv))) {
+			if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
 			    s = d + 1;
 			    goto its_constant;
 			}
@@ -7387,13 +7332,19 @@ Perl_yylex(pTHX)
 			     UTF8fARG(UTF, l, PL_tokenbuf));
                     }
 		    /* Check for a constant sub */
-		    if ((sv = cv_const_sv(cv))) {
+		    if ((sv = cv_const_sv_or_av(cv))) {
 		  its_constant:
 			op_free(rv2cv_op);
 			SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
 			((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-			pl_yylval.opval->op_private = OPpCONST_FOLDED;
-			pl_yylval.opval->op_flags |= OPf_SPECIAL;
+			if (SvTYPE(sv) == SVt_PVAV)
+			    pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+						      pl_yylval.opval);
+			else {
+			    pl_yylval.opval->op_private = OPpCONST_FOLDED;
+			    pl_yylval.opval->op_folded = 1;
+			    pl_yylval.opval->op_flags |= OPf_SPECIAL;
+			}
 			TOKEN(WORD);
 		    }
 
@@ -7595,21 +7546,12 @@ Perl_yylex(pTHX)
 	case KEY___END__: {
 	    GV *gv;
 	    if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
-		const char *pname = "main";
-		STRLEN plen = 4;
-		U32 putf8 = 0;
-		if (PL_tokenbuf[2] == 'D')
-		{
-		    HV * const stash =
-			PL_curstash ? PL_curstash : PL_defstash;
-		    pname = HvNAME_get(stash);
-		    plen  = HvNAMELEN (stash);
-		    if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
-		}
-		gv = gv_fetchpvn_flags(
-			Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
-			plen+6, GV_ADD|putf8, SVt_PVIO
-		);
+		HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+					? PL_curstash
+					: PL_defstash;
+		gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+		if (!isGV(gv))
+		    gv_init(gv,stash,"DATA",4,0);
 		GvMULTI_on(gv);
 		if (!GvIO(gv))
 		    GvIOp(gv) = newIO();
@@ -8919,17 +8861,9 @@ Perl_yylex(pTHX)
 	    FUN0(OP_WANTARRAY);
 
 	case KEY_write:
-#ifdef EBCDIC
-	{
-	    char ctl_l[2];
-	    ctl_l[0] = toCTRL('L');
-	    ctl_l[1] = '\0';
-	    gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
-	}
-#else
-	    /* Make sure $^L is defined */
-	    gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
-#endif
+            /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
+             * we use the same number on EBCDIC */
+	    gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
 	    UNI(OP_ENTERWRITE);
 
 	case KEY_x:
@@ -9410,10 +9344,17 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck
 	   s++;
     }
 
-#define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
-                                        || isCNTRL_A((U8)(d)) \
-                                        || isDIGIT_A((U8)(d)) \
-                                        || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+/*  \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally
+ *  been matched by \s on ASCII platforms, are the legal control char names
+ *  here, that is \c? plus 1-32 minus the \s ones. */
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
+                                   || isDIGIT_A((U8)(d))                    \
+                                   || (!(u) && !isASCII((U8)(d)))           \
+                                   || ((((U8)(d)) < 32)                     \
+                                       && (((((U8)(d)) >= 14)               \
+                                           || (((U8)(d)) <= 8 && (d) != 0) \
+                                           || (((U8)(d)) == 13))))          \
+                                   || (((U8)(d)) == toCTRL('?')))
     if (s < send
         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
     {
@@ -10067,7 +10008,7 @@ S_scan_heredoc(pTHX_ char *s)
 	    /* shared is only null if we have gone beyond the outermost
 	       lexing scope.  In a file, we will have broken out of the
 	       loop in the previous iteration.  In an eval, the string buf-
-	       fer ends with "\n;", so the while condition below will have
+	       fer ends with "\n;", so the while condition above will have
 	       evaluated to false.  So shared can never be null. */
 	    assert(shared);
 	    /* A LEXSHARED struct with a null ls_prev pointer is the outer-
@@ -10171,8 +10112,11 @@ S_scan_heredoc(pTHX_ char *s)
 	}
 	CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
 	if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-	    lex_grow_linestr(SvCUR(PL_linestr) + 2);
+            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+            /* ^That should be enough to avoid this needing to grow:  */
 	    sv_catpvs(PL_linestr, "\n\0");
+            assert(s == SvPVX(PL_linestr));
+            PL_bufend = SvEND(PL_linestr);
 	}
 	s = PL_bufptr;
 #ifdef PERL_MAD
@@ -10390,11 +10334,15 @@ intro_sym:
 
 
 /* scan_str
-   takes: start position in buffer
-	  keep_quoted preserve \ on the embedded delimiter(s)
-	  keep_delims preserve the delimiters around the string
-	  re_reparse  compiling a run-time /(?{})/:
-			collapse // to /,  and skip encoding src
+   takes:
+	start			position in buffer
+	keep_quoted		preserve \ on the embedded delimiter(s)
+	keep_delims		preserve the delimiters around the string
+	re_reparse		compiling a run-time /(?{})/:
+				   collapse // to /,  and skip encoding src
+	deprecate_escaped_meta	issue a deprecation warning for cer-
+				tain paired metacharacters that appear
+				escaped within it
    returns: position to continue reading from buffer
    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
    	updates the read buffer.
@@ -10436,9 +10384,7 @@ intro_sym:
 
 STATIC char *
 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
-        bool deprecate_escaped_meta /* Should we issue a deprecation warning
-                                       for certain paired metacharacters that
-                                       appear escaped within it */
+		 bool deprecate_escaped_meta
     )
 {
     dVAR;
@@ -10506,8 +10452,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
      * happen for <>, as they aren't metas. */
     if (deprecate_escaped_meta
         && (PL_multi_open == PL_multi_close
-            || ! ckWARN_d(WARN_DEPRECATED)
-            || PL_multi_open == '<'))
+            || PL_multi_open == '<'
+            || ! ckWARN_d(WARN_DEPRECATED)))
     {
         deprecate_escaped_meta = FALSE;
     }
@@ -10701,26 +10647,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
                          * context where the delimiter is also a metacharacter,
                          * the backslash is useless, and deprecated.  () and []
                          * are meta in any context. {} are meta only when
-                         * appearing in a quantifier or in things like '\p{'.
-                         * They also aren't meta unless there is a matching
-                         * closed, escaped char later on within the string.
-                         * If 's' points to an open, set a flag; if to a close,
-                         * test that flag, and raise a warning if it was set */
+                         * appearing in a quantifier or in things like '\p{'
+                         * (but '\\p{' isn't meta).  They also aren't meta
+                         * unless there is a matching closed, escaped char
+                         * later on within the string.  If 's' points to an
+                         * open, set a flag; if to a close, test that flag, and
+                         * raise a warning if it was set */
 
 			if (deprecate_escaped_meta) {
                             if (*s == PL_multi_open) {
                                 if (*s != '{') {
                                     escaped_open = s;
                                 }
-                                else if (regcurly(s,
-                                                  TRUE /* Look for a closing
-                                                          '\}' */)
-                                         || (s - start > 2  /* Look for e.g.
-                                                               '\x{' */
-                                             && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
-                                {
+                                     /* Look for a closing '\}' */
+                                else if (regcurly(s, TRUE)) {
                                     escaped_open = s;
                                 }
+                                     /* Look for e.g.  '\x{' */
+                                else if (s - start > 2
+                                         && _generic_isCC(*(s-2),
+                                             _CC_BACKSLASH_FOO_LBRACE_IS_META))
+                                { /* Exclude '\\x', '\\\\x', etc. */
+                                    char *lookbehind = s - 4;
+                                    bool is_meta = TRUE;
+                                    while (lookbehind >= start
+                                           && *lookbehind == '\\')
+                                    {
+                                        is_meta = ! is_meta;
+                                        lookbehind--;
+                                    }
+                                    if (is_meta) {
+                                        escaped_open = s;
+                                    }
+                                }
                             }
                             else if (escaped_open) {
                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
@@ -11575,12 +11534,14 @@ S_swallow_bom(pTHX_ U8 *s)
 #endif
 	}
 	break;
-    case 0xEF:
-	if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-	    s += 3;                      /* UTF-8 */
-	}
-	break;
+    case BOM_UTF8_FIRST_BYTE: {
+        const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
+        if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += len + 1;                      /* UTF-8 */
+        }
+        break;
+    }
     case 0:
 	if (slen > 3) {
 	     if (s[1] == 0) {
@@ -11603,14 +11564,6 @@ S_swallow_bom(pTHX_ U8 *s)
 #endif
 	     }
 	}
-#ifdef EBCDIC
-    case 0xDD:
-        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
-            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
-            s += 4;                      /* UTF-8 */
-        }
-        break;
-#endif
 
     default:
 	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
@@ -11855,7 +11808,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
 	    /* Append native character for the rev point */
 	    tmpend = uvchr_to_utf8(tmpbuf, rev);
 	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-	    if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+	    if (!NATIVE_IS_INVARIANT(rev))
 		 SvUTF8_on(sv);
 	    if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
 		 s = ++pos;