X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7918f24d20384771923d344a382e1d16d9552018..f533c11bbc025019148e213de55420f51676c83f:/toke.c?ds=sidebyside

diff --git a/toke.c b/toke.c
index cb73104..de163fb 100644
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
- *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
+ *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -9,7 +9,9 @@
  */
 
 /*
- *   "It all comes from here, the stench and the peril."  --Frodo
+ *  'It all comes from here, the stench and the peril.'    --Frodo
+ *
+ *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  */
 
 /*
@@ -122,16 +124,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 #endif
 
+/* The maximum number of characters preceding the unrecognized one to display */
+#define UNRECOGNIZED_PRECEDE_COUNT 10
+
 /* In variables named $^X, these are the legal values for X.
  * 1999-02-27 mjd-perl-patch@plover.com */
 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 
-/* On MacOS, respect nonbreaking spaces */
-#ifdef MACOS_TRADITIONAL
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
-#else
 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
-#endif
 
 /* LEX_* are values for PL_lex_state, the state of the lexer.
  * They are arranged oddly so that the guard on the switch statement
@@ -368,6 +368,7 @@ static struct debug_tokens {
     { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
     { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
     { WORD,		TOKENTYPE_OPVAL,	"WORD" },
+    { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
     { 0,		TOKENTYPE_NONE,		NULL }
 };
 
@@ -543,13 +544,7 @@ S_missingterm(pTHX_ char *s)
 	if (nl)
 	    *nl = '\0';
     }
-    else if (
-#ifdef EBCDIC
-	iscntrl(PL_multi_close)
-#else
-	PL_multi_close < 32 || PL_multi_close == 127
-#endif
-	) {
+    else if (isCNTRL(PL_multi_close)) {
 	*tmpbuf = '^';
 	tmpbuf[1] = (char)toCTRL(PL_multi_close);
 	tmpbuf[2] = '\0';
@@ -716,7 +711,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
     parser->expect = XSTATE;
     parser->rsfp = rsfp;
     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
-		: (AV*)SvREFCNT_inc(oparser->rsfp_filters);
+		: MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 
     Newx(parser->lex_brackstack, 120, char);
     Newx(parser->lex_casestack, 12, char);
@@ -760,8 +755,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
 
     if (parser->rsfp == PerlIO_stdin())
 	PerlIO_clearerr(parser->rsfp);
-    else if (parser->rsfp && parser->old_parser
-			  && parser->rsfp != parser->old_parser->rsfp)
+    else if (parser->rsfp && (!parser->old_parser ||
+		(parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 	PerlIO_close(parser->rsfp);
     SvREFCNT_dec(parser->rsfp_filters);
 
@@ -827,6 +822,8 @@ S_incline(pTHX_ const char *s)
     n = s;
     while (isDIGIT(*s))
 	s++;
+    if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
+	return;
     while (SPACE_OR_TAB(*s))
 	s++;
     if (*s == '"' && (t = strchr(s+1, '"'))) {
@@ -901,8 +898,8 @@ S_incline(pTHX_ const char *s)
 		    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
 		    /* adjust ${"::_<newfilename"} to store the new file name */
 		    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
-		    GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
-		    GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
+		    GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
+		    GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
 		}
 
 		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
@@ -1118,17 +1115,17 @@ S_skipspace(pTHX_ register char *s)
 	    }
 	    else if (PL_minus_n) {
 #ifdef PERL_MAD
-		sv_catpvn(PL_linestr, ";}", 2);
+		sv_catpvs(PL_linestr, ";}");
 #else
-		sv_setpvn(PL_linestr, ";}", 2);
+		sv_setpvs(PL_linestr, ";}");
 #endif
 		PL_minus_n = 0;
 	    }
 	    else
 #ifdef PERL_MAD
-		sv_catpvn(PL_linestr,";", 1);
+		sv_catpvs(PL_linestr,";");
 #else
-		sv_setpvn(PL_linestr,";", 1);
+		sv_setpvs(PL_linestr,";");
 #endif
 
 	    /* reset variables for next time we lex */
@@ -1178,7 +1175,7 @@ S_skipspace(pTHX_ register char *s)
 	/* debugger active and we're not compiling the debugger code,
 	 * so store the line into the debugger's array of lines
 	 */
-	if (PERLDB_LINE && PL_curstash != PL_debstash)
+	if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
 	    update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
     }
 
@@ -1318,7 +1315,7 @@ S_curmad(pTHX_ char slot, SV *sv)
 	where = &PL_nexttoke[PL_curforce].next_mad;
 
     if (PL_faketokens)
-	sv_setpvn(sv, "", 0);
+	sv_setpvs(sv, "");
     else {
 	if (!IN_BYTES) {
 	    if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
@@ -1332,7 +1329,7 @@ S_curmad(pTHX_ char slot, SV *sv)
     /* keep a slot open for the head of the list? */
     if (slot != '_' && *where && (*where)->mad_key == '^') {
 	(*where)->mad_key = slot;
-	sv_free((SV*)((*where)->mad_val));
+	sv_free(MUTABLE_SV(((*where)->mad_val)));
 	(*where)->mad_val = (void*)sv;
     }
     else
@@ -1359,7 +1356,7 @@ S_force_next(pTHX_ I32 type)
 #ifdef DEBUGGING
     if (DEBUG_T_TEST) {
         PerlIO_printf(Perl_debug_log, "### forced token:\n");
-	tokereport(THING, &NEXTVAL_NEXTTOKE);
+	tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
 #ifdef PERL_MAD
@@ -1387,7 +1384,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
 {
     dVAR;
     SV * const sv = newSVpvn_utf8(start, len,
-				  UTF && !IN_BYTES
+				  !IN_BYTES
+				  && UTF
+				  && !is_ascii_string((const U8*)start, len)
 				  && is_utf8_string((const U8*)start, len));
     return sv;
 }
@@ -1845,7 +1844,7 @@ S_sublex_done(pTHX)
 		PL_thiswhite = 0;
 	    }
 	    if (PL_thistoken)
-		sv_setpvn(PL_thistoken,"",0);
+		sv_setpvs(PL_thistoken,"");
 	    else
 		PL_realtokenstart = -1;
 	}
@@ -1933,7 +1932,9 @@ S_sublex_done(pTHX)
 		  handle \cV (control characters)
 		  handle printf-style backslashes (\f, \r, \n, etc)
 	      } (end switch)
+	      continue
 	  } (end if backslash)
+          handle regular character
     } (end while character to read)
 		
 */
@@ -1943,13 +1944,32 @@ S_scan_const(pTHX_ char *start)
 {
     dVAR;
     register char *send = PL_bufend;		/* end of the constant */
-    SV *sv = newSV(send - start);		/* sv for the constant */
+    SV *sv = newSV(send - start);		/* sv for the constant.  See
+						   note below on sizing. */
     register char *s = start;			/* start of the constant */
     register char *d = SvPVX(sv);		/* destination for copies */
     bool dorange = FALSE;			/* are we in a translit range? */
     bool didrange = FALSE;		        /* did we just finish a range? */
     I32  has_utf8 = FALSE;			/* Output constant is UTF8 */
-    I32  this_utf8 = UTF;			/* The source string is assumed to be UTF8 */
+    I32  this_utf8 = UTF;			/* Is the source string assumed
+						   to be UTF8?  But, this can
+						   show as true when the source
+						   isn't utf8, as for example
+						   when it is entirely composed
+						   of hex constants */
+
+    /* Note on sizing:  The scanned constant is placed into sv, which is
+     * initialized by newSV() assuming one byte of output for every byte of
+     * input.  This routine expects newSV() to allocate an extra byte for a
+     * trailing NUL, which this routine will append if it gets to the end of
+     * the input.  There may be more bytes of input than output (eg., \N{LATIN
+     * CAPITAL LETTER A}), or more output than input if the constant ends up
+     * recoded to utf8, but each time a construct is found that might increase
+     * the needed size, SvGROW() is called.  Its size parameter each time is
+     * based on the best guess estimate at the time, namely the length used so
+     * far, plus the length the current construct will occupy, plus room for
+     * the trailing NUL, plus one byte for every input byte still unscanned */ 
+
     UV uv;
 #ifdef EBCDIC
     UV literal_endpoint = 0;
@@ -2172,8 +2192,13 @@ S_scan_const(pTHX_ char *start)
 	else if (*s == '$') {
 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
 		break;
-	    if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
+	    if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
+		if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
+		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+				"Possible unintended interpolation of $\\ in regex");
+		}
 		break;		/* in regexp, $ might be tail anchor */
+            }
 	}
 
 	/* End of else if chain - OP_TRANS rejoin rest */
@@ -2224,18 +2249,18 @@ S_scan_const(pTHX_ char *start)
 		    goto default_action;
 		}
 
-	    /* \132 indicates an octal constant */
+	    /* eg. \132 indicates the octal constant 0x132 */
 	    case '0': case '1': case '2': case '3':
 	    case '4': case '5': case '6': case '7':
 		{
                     I32 flags = 0;
                     STRLEN len = 3;
-		    uv = grok_oct(s, &len, &flags, NULL);
+		    uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
 		    s += len;
 		}
 		goto NUM_ESCAPE_INSERT;
 
-	    /* \x24 indicates a hex constant */
+	    /* eg. \x24 indicates the hex constant 0x24 */
 	    case 'x':
 		++s;
 		if (*s == '{') {
@@ -2250,67 +2275,47 @@ S_scan_const(pTHX_ char *start)
 			continue;
 		    }
                     len = e - s;
-		    uv = grok_hex(s, &len, &flags, NULL);
+		    uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
 		    s = e + 1;
 		}
 		else {
 		    {
 			STRLEN len = 2;
                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-			uv = grok_hex(s, &len, &flags, NULL);
+			uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
 			s += len;
 		    }
 		}
 
 	      NUM_ESCAPE_INSERT:
-		/* Insert oct or hex escaped character.
-		 * There will always enough room in sv since such
-		 * escapes will be longer than any UTF-8 sequence
-		 * they can end up as. */
+		/* Insert oct, hex, or \N{U+...} escaped character.  There will
+		 * always be enough room in sv since such escapes will be
+		 * longer than any UTF-8 sequence they can end up as, except if
+		 * they force us to recode the rest of the string into utf8 */
 		
-		/* We need to map to chars to ASCII before doing the tests
-		   to cover EBCDIC
-		*/
-		if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
+		/* Here uv is the ordinal of the next character being added in
+		 * unicode (converted from native).  (It has to be done before
+		 * here because \N is interpreted as unicode, and oct and hex
+		 * as native.) */
+		if (!UNI_IS_INVARIANT(uv)) {
 		    if (!has_utf8 && uv > 255) {
-		        /* Might need to recode whatever we have
-			 * accumulated so far if it contains any
-			 * hibit chars.
-			 *
-			 * (Can't we keep track of that and avoid
-			 *  this rescan? --jhi)
-			 */
-			int hicount = 0;
-			U8 *c;
-			for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
-			    if (!NATIVE_IS_INVARIANT(*c)) {
-			        hicount++;
-			    }
-			}
-			if (hicount) {
-			    const STRLEN offset = d - SvPVX_const(sv);
-			    U8 *src, *dst;
-			    d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
-			    src = (U8 *)d - 1;
-			    dst = src+hicount;
-			    d  += hicount;
-			    while (src >= (const U8 *)SvPVX_const(sv)) {
-			        if (!NATIVE_IS_INVARIANT(*src)) {
-				    const U8 ch = NATIVE_TO_ASCII(*src);
-				    *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
-				    *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
-			        }
-			        else {
-				    *dst-- = *src;
-			        }
-				src--;
-			    }
-                        }
+			/* Might need to recode whatever we have accumulated so
+			 * far if it contains any chars variant in utf8 or
+			 * utf-ebcdic. */
+			  
+			SvCUR_set(sv, d - SvPVX_const(sv));
+			SvPOK_on(sv);
+			*d = '\0';
+			/* See Note on sizing above.  */
+			sv_utf8_upgrade_flags_grow(sv,
+					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+					UNISKIP(uv) + (STRLEN)(send - s) + 1);
+			d = SvPVX(sv) + SvCUR(sv);
+			has_utf8 = TRUE;
                     }
 
-                    if (has_utf8 || uv > 255) {
-		        d = (char*)uvchr_to_utf8((U8*)d, uv);
-			has_utf8 = TRUE;
+                    if (has_utf8) {
+		        d = (char*)uvuni_to_utf8((U8*)d, uv);
 			if (PL_lex_inwhat == OP_TRANS &&
 			    PL_sublex_info.sub_op) {
 			    PL_sublex_info.sub_op->op_private |=
@@ -2331,7 +2336,8 @@ S_scan_const(pTHX_ char *start)
 		}
 		continue;
 
- 	    /* \N{LATIN SMALL LETTER A} is a named character */
+	    /* \N{LATIN SMALL LETTER A} is a named character, and so is
+	     * \N{U+0041} */
  	    case 'N':
  		++s;
  		if (*s == '{') {
@@ -2346,7 +2352,8 @@ S_scan_const(pTHX_ char *start)
 			goto cont_scan;
 		    }
 		    if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
-		        /* \N{U+...} */
+			/* \N{U+...} The ... is a unicode value even on EBCDIC
+			 * machines */
 		        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
 			  PERL_SCAN_DISALLOW_PREFIX;
 		        s += 3;
@@ -2384,22 +2391,24 @@ S_scan_const(pTHX_ char *start)
 			 }
 		    }
 #endif
+		    /* If destination is not in utf8 but this new character is,
+		     * recode the dest to utf8 */
 		    if (!has_utf8 && SvUTF8(res)) {
-			const char * const ostart = SvPVX_const(sv);
-			SvCUR_set(sv, d - ostart);
+			SvCUR_set(sv, d - SvPVX_const(sv));
 			SvPOK_on(sv);
 			*d = '\0';
-			sv_utf8_upgrade(sv);
-			/* this just broke our allocation above... */
-			SvGROW(sv, (STRLEN)(send - start));
+			/* See Note on sizing above.  */
+			sv_utf8_upgrade_flags_grow(sv,
+					    SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+					    len + (STRLEN)(send - s) + 1);
 			d = SvPVX(sv) + SvCUR(sv);
 			has_utf8 = TRUE;
-		    }
-		    if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
-			const char * const odest = SvPVX_const(sv);
+		    } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
 
-			SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
-			d = SvPVX(sv) + (d - odest);
+			/* See Note on sizing above.  (NOTE: SvCUR() is not set
+			 * correctly here). */
+			const STRLEN off = d - SvPVX_const(sv);
+			d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
 		    }
 #ifdef EBCDIC
 		    if (!dorange)
@@ -2464,20 +2473,41 @@ S_scan_const(pTHX_ char *start)
 #endif
 
     default_action:
-	/* If we started with encoded form, or already know we want it
-	   and then encode the next character */
-	if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
+	/* If we started with encoded form, or already know we want it,
+	   then encode the next character */
+	if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
 	    STRLEN len  = 1;
+
+
+	    /* One might think that it is wasted effort in the case of the
+	     * source being utf8 (this_utf8 == TRUE) to take the next character
+	     * in the source, convert it to an unsigned value, and then convert
+	     * it back again.  But the source has not been validated here.  The
+	     * routine that does the conversion checks for errors like
+	     * malformed utf8 */
+
 	    const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
 	    const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
-	    s += len;
-	    if (need > len) {
-		/* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
+	    if (!has_utf8) {
+		SvCUR_set(sv, d - SvPVX_const(sv));
+		SvPOK_on(sv);
+		*d = '\0';
+		/* See Note on sizing above.  */
+		sv_utf8_upgrade_flags_grow(sv,
+					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+					need + (STRLEN)(send - s) + 1);
+		d = SvPVX(sv) + SvCUR(sv);
+		has_utf8 = TRUE;
+	    } else if (need > len) {
+		/* encoded value larger than old, may need extra space (NOTE:
+		 * SvCUR() is not set correctly here).   See Note on sizing
+		 * above.  */
 		const STRLEN off = d - SvPVX_const(sv);
-		d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
+		d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
 	    }
+	    s += len;
+
 	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
-	    has_utf8 = TRUE;
 #ifdef EBCDIC
 	    if (uv > 255 && !dorange)
 		native_range = FALSE;
@@ -2796,7 +2826,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
       bare_package:
 	    start_force(PL_curforce);
 	    NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
-						   newSVpvn(tmpbuf,len));
+						  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
 	    NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
 	    if (PL_madskills)
 		curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
@@ -3332,7 +3362,7 @@ Perl_yylex(pTHX)
 	    PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
 	    PL_nexttoke[PL_lasttoke].next_mad = 0;
 	    if (PL_thismad && PL_thismad->mad_key == '_') {
-		PL_thiswhite = (SV*)PL_thismad->mad_val;
+		PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
 		PL_thismad->mad_val = 0;
 		mad_free(PL_thismad);
 		PL_thismad = 0;
@@ -3627,8 +3657,17 @@ Perl_yylex(pTHX)
     default:
 	if (isIDFIRST_lazy_if(s,UTF))
 	    goto keylookup;
-	len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
-	Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
+	{
+        unsigned char c = *s;
+        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
+        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+            d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
+        } else {
+            d = PL_linestart;
+        }	
+        *s = '\0';
+        Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
+    }
     case 4:
     case 26:
 	goto fake_eof;			/* emulate EOF on ^D or ^Z */
@@ -3685,9 +3724,12 @@ Perl_yylex(pTHX)
 		    ++svp;
 		    sv_catpvs(PL_linestr, ";");
 		}
-		sv_free((SV*)PL_preambleav);
+		sv_free(MUTABLE_SV(PL_preambleav));
 		PL_preambleav = NULL;
 	    }
+	    if (PL_minus_E)
+		sv_catpvs(PL_linestr,
+			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
 	    if (PL_minus_n || PL_minus_p) {
 		sv_catpvs(PL_linestr, "LINE: while (<>) {");
 		if (PL_minus_l)
@@ -3719,14 +3761,11 @@ Perl_yylex(pTHX)
 		        sv_catpvs(PL_linestr,"our @F=split(' ');");
 		}
 	    }
-	    if (PL_minus_E)
-		sv_catpvs(PL_linestr,
-			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
 	    sv_catpvs(PL_linestr, "\n");
 	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
 	    PL_last_lop = PL_last_uni = NULL;
-	    if (PERLDB_LINE && PL_curstash != PL_debstash)
+	    if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
 		update_debugger_info(PL_linestr, NULL, 0);
 	    goto retry;
 	}
@@ -3762,7 +3801,7 @@ Perl_yylex(pTHX)
 		}
 		PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
 		PL_last_lop = PL_last_uni = NULL;
-		sv_setpvn(PL_linestr,"",0);
+		sv_setpvs(PL_linestr,"");
 		TOKEN(';');	/* not infinite loop because rsfp is NULL now */
 	    }
 	    /* If it looks like the start of a BOM or raw UTF-16,
@@ -3798,7 +3837,7 @@ Perl_yylex(pTHX)
 		    sv_catsv(PL_thiswhite, PL_linestr);
 #endif
 		if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
-		    sv_setpvn(PL_linestr, "", 0);
+		    sv_setpvs(PL_linestr, "");
 		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
 		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
 		    PL_last_lop = PL_last_uni = NULL;
@@ -3808,7 +3847,7 @@ Perl_yylex(pTHX)
 	    incline(s);
 	} while (PL_doextract);
 	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
-	if (PERLDB_LINE && PL_curstash != PL_debstash)
+	if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
 	    update_debugger_info(PL_linestr, NULL, 0);
 	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
 	PL_last_lop = PL_last_uni = NULL;
@@ -3918,7 +3957,6 @@ Perl_yylex(pTHX)
 			*s = '#';	/* Don't try to parse shebang line */
 		}
 #endif /* ALTERNATE_SHEBANG */
-#ifndef MACOS_TRADITIONAL
 		if (!d &&
 		    *s == '#' &&
 		    ipathend > ipath &&
@@ -3934,7 +3972,7 @@ Perl_yylex(pTHX)
 		    while (s < PL_bufend && isSPACE(*s))
 			s++;
 		    if (s < PL_bufend) {
-			Newxz(newargv,PL_origargc+3,char*);
+			Newx(newargv,PL_origargc+3,char*);
 			newargv[1] = s;
 			while (s < PL_bufend && !isSPACE(*s))
 			    s++;
@@ -3949,7 +3987,6 @@ Perl_yylex(pTHX)
 		    PERL_FPU_POST_EXEC
 		    Perl_croak(aTHX_ "Can't exec %s", ipath);
 		}
-#endif
 		if (d) {
 		    while (*d && !isSPACE(*d))
 			d++;
@@ -3964,7 +4001,14 @@ Perl_yylex(pTHX)
 			const char *d1 = d;
 
 			do {
-			    if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
+			    bool baduni = FALSE;
+			    if (*d1 == 'C') {
+				const char *d2 = d1 + 1;
+				if (parse_unicode_opts((const char **)&d2)
+				    != PL_unicode)
+				    baduni = TRUE;
+			    }
+			    if (baduni || *d1 == 'M' || *d1 == 'm') {
 				const char * const m = d1;
 				while (*d1 && !isSPACE(*d1))
 				    d1++;
@@ -3981,17 +4025,17 @@ Perl_yylex(pTHX)
 			    } while (argc && argv[0][0] == '-' && argv[0][1]);
 			    init_argv_symbols(argc,argv);
 			}
-			if ((PERLDB_LINE && !oldpdb) ||
+			if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
 			    ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
 			      /* if we have already added "LINE: while (<>) {",
 			         we must not do it again */
 			{
-			    sv_setpvn(PL_linestr, "", 0);
+			    sv_setpvs(PL_linestr, "");
 			    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
 			    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
 			    PL_last_lop = PL_last_uni = NULL;
 			    PL_preambled = FALSE;
-			    if (PERLDB_LINE)
+			    if (PERLDB_LINE || PERLDB_SAVESRC)
 				(void)gv_fetchfile(PL_origfilename);
 			    goto retry;
 			}
@@ -4012,9 +4056,6 @@ Perl_yylex(pTHX)
       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
 #endif
     case ' ': case '\t': case '\f': case 013:
-#ifdef MACOS_TRADITIONAL
-    case '\312':
-#endif
 #ifdef PERL_MAD
 	PL_realtokenstart = -1;
 	if (!PL_thiswhite)
@@ -4085,7 +4126,7 @@ Perl_yylex(pTHX)
 			if (!PL_thiswhite)
 			    PL_thiswhite = newSVpvs("");
 			if (CopLINE(PL_curcop) == 1) {
-			    sv_setpvn(PL_thiswhite, "", 0);
+			    sv_setpvs(PL_thiswhite, "");
 			    PL_faketokens = 0;
 			}
 			sv_catpvn(PL_thiswhite, s, d - s);
@@ -4257,7 +4298,10 @@ Perl_yylex(pTHX)
 	BOop(OP_BIT_XOR);
     case '[':
 	PL_lex_brackets++;
-	/* FALL THROUGH */
+	{
+	    const char tmp = *s++;
+	    OPERATOR(tmp);
+	}
     case '~':
 	if (s[1] == '~'
 	    && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
@@ -4307,6 +4351,7 @@ Perl_yylex(pTHX)
 		    case KEY_or:
 		    case KEY_and:
 		    case KEY_for:
+		    case KEY_foreach:
 		    case KEY_unless:
 		    case KEY_if:
 		    case KEY_while:
@@ -4342,11 +4387,6 @@ Perl_yylex(pTHX)
 		    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
 			sv_free(sv);
 			if (PL_in_my == KEY_our) {
-#ifdef USE_ITHREADS
-			    GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
-#else
-			    /* skip to avoid loading attributes.pm */
-#endif
 			    deprecate(":unique");
 			}
 			else
@@ -4361,7 +4401,7 @@ Perl_yylex(pTHX)
 		    }
 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
 			sv_free(sv);
-			CvLOCKED_on(PL_compcv);
+			deprecate(":locked");
 		    }
 		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
 			sv_free(sv);
@@ -4649,7 +4689,7 @@ Perl_yylex(pTHX)
 		    if (PL_madskills) {
 			if (!PL_thiswhite)
 			    PL_thiswhite = newSVpvs("");
-			sv_catpvn(PL_thiswhite,"}",1);
+			sv_catpvs(PL_thiswhite,"}");
 		    }
 #endif
 		    return yylex();	/* ignore fake brackets */
@@ -4686,7 +4726,7 @@ Perl_yylex(pTHX)
 		&& isIDFIRST_lazy_if(s,UTF))
 	    {
 		CopLINE_dec(PL_curcop);
-		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
 		CopLINE_inc(PL_curcop);
 	    }
 	    BAop(OP_BIT_AND);
@@ -5025,10 +5065,10 @@ Perl_yylex(pTHX)
 	    AOPERATOR(DORDOR);
 	}
      case '?':			/* may either be conditional or pattern */
-	 if(PL_expect == XOPERATOR) {
+	if (PL_expect == XOPERATOR) {
 	     char tmp = *s++;
 	     if(tmp == '?') {
-    	          OPERATOR('?');
+		OPERATOR('?');
 	     }
              else {
 	         tmp = *s++;
@@ -5067,6 +5107,10 @@ Perl_yylex(pTHX)
 	    PL_expect = XSTATE;
 	    goto rightbracket;
 	}
+	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
+	    s += 3;
+	    OPERATOR(YADAYADA);
+	}
 	if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
 	    char tmp = *s++;
 	    if (*s == tmp) {
@@ -5236,14 +5280,17 @@ Perl_yylex(pTHX)
 	/* Is this a label? */
 	if (!tmp && PL_expect == XSTATE
 	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
+	    tmp = keyword(PL_tokenbuf, len, 0);
+	    if (tmp)
+		Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
 	    s = d + 1;
 	    pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
 	    CLINE;
 	    TOKEN(LABEL);
 	}
-
-	/* Check for keywords */
-	tmp = keyword(PL_tokenbuf, len, 0);
+	else
+	    /* Check for keywords */
+	    tmp = keyword(PL_tokenbuf, len, 0);
 
 	/* Is this a word before a => operator? */
 	if (*d == '=' && d[1] == '>') {
@@ -5342,7 +5389,7 @@ Perl_yylex(pTHX)
 		if (PL_expect == XOPERATOR) {
 		    if (PL_bufptr == PL_linestart) {
 			CopLINE_dec(PL_curcop);
-			Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+			Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
 			CopLINE_inc(PL_curcop);
 		    }
 		    else
@@ -5424,7 +5471,7 @@ Perl_yylex(pTHX)
 		    /* Real typeglob, so get the real subroutine: */
 			   ? GvCVu(gv)
 		    /* A proxy for a subroutine in this package? */
-			   : SvOK(gv) ? (CV *) gv : NULL)
+			   : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
 		    : NULL;
 
 		/* See if it's the indirect object for a list operator. */
@@ -5573,7 +5620,7 @@ Perl_yylex(pTHX)
 			SvPOK(cv))
 		    {
 			STRLEN protolen;
-			const char *proto = SvPV_const((SV*)cv, protolen);
+			const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
 			if (!protolen)
 			    TERM(FUNC0SUB);
 			if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
@@ -5658,6 +5705,18 @@ Perl_yylex(pTHX)
 		    pl_yylval.opval->op_private |= OPpCONST_STRICT;
 		else {
 		bareword:
+		    /* after "print" and similar functions (corresponding to
+		     * "F? L" in opcode.pl), whatever wasn't already parsed as
+		     * a filehandle should be subject to "strict subs".
+		     * Likewise for the optional indirect-object argument to system
+		     * or exec, which can't be a bareword */
+		    if ((PL_last_lop_op == OP_PRINT
+			    || PL_last_lop_op == OP_PRTF
+			    || PL_last_lop_op == OP_SAY
+			    || PL_last_lop_op == OP_SYSTEM
+			    || PL_last_lop_op == OP_EXEC)
+			    && (PL_hints & HINT_STRICT_SUBS))
+			pl_yylval.opval->op_private |= OPpCONST_STRICT;
 		    if (lastchar != '-') {
 			if (ckWARN(WARN_RESERVED)) {
 			    d = PL_tokenbuf;
@@ -6670,7 +6729,7 @@ Perl_yylex(pTHX)
 		    (*s == ':' && s[1] == ':'))
 		{
 #ifdef PERL_MAD
-		    SV *nametoke;
+		    SV *nametoke = NULL;
 #endif
 
 		    PL_expect = XBLOCK;
@@ -6709,7 +6768,7 @@ Perl_yylex(pTHX)
 			Perl_croak(aTHX_ "Missing name in \"my sub\"");
 		    PL_expect = XTERMBLOCK;
 		    attrful = XATTRTERM;
-		    sv_setpvn(PL_subname,"?",1);
+		    sv_setpvs(PL_subname,"?");
 		    have_name = FALSE;
 		}
 
@@ -6731,6 +6790,12 @@ Perl_yylex(pTHX)
 		if (*s == '(') {
 		    char *p;
 		    bool bad_proto = FALSE;
+		    bool in_brackets = FALSE;
+		    char greedy_proto = ' ';
+		    bool proto_after_greedy_proto = FALSE;
+		    bool must_be_last = FALSE;
+		    bool underscore = FALSE;
+		    bool seen_underscore = FALSE;
 		    const bool warnsyntax = ckWARN(WARN_SYNTAX);
 
 		    s = scan_str(s,!!PL_madskills,FALSE);
@@ -6742,14 +6807,47 @@ Perl_yylex(pTHX)
 		    for (p = d; *p; ++p) {
 			if (!isSPACE(*p)) {
 			    d[tmp++] = *p;
-			    if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
-				bad_proto = TRUE;
+
+			    if (warnsyntax) {
+				if (must_be_last)
+				    proto_after_greedy_proto = TRUE;
+				if (!strchr("$@%*;[]&\\_", *p)) {
+				    bad_proto = TRUE;
+				}
+				else {
+				    if ( underscore ) {
+					if ( *p != ';' )
+					    bad_proto = TRUE;
+					underscore = FALSE;
+				    }
+				    if ( *p == '[' ) {
+					in_brackets = TRUE;
+				    }
+				    else if ( *p == ']' ) {
+					in_brackets = FALSE;
+				    }
+				    else if ( (*p == '@' || *p == '%') &&
+					 ( tmp < 2 || d[tmp-2] != '\\' ) &&
+					 !in_brackets ) {
+					must_be_last = TRUE;
+					greedy_proto = *p;
+				    }
+				    else if ( *p == '_' ) {
+					underscore = seen_underscore = TRUE;
+				    }
+				}
+			    }
 			}
 		    }
 		    d[tmp] = '\0';
+		    if (proto_after_greedy_proto)
+			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+				    "Prototype after '%c' for %"SVf" : %s",
+				    greedy_proto, SVfARG(PL_subname), d);
 		    if (bad_proto)
 			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-				    "Illegal character in prototype for %"SVf" : %s",
+				    "Illegal character %sin prototype for %"SVf" : %s",
+				    seen_underscore ? "after '_' " : "",
 				    SVfARG(PL_subname), d);
 		    SvCUR_set(PL_lex_stuff, tmp);
 		    have_proto = TRUE;
@@ -10924,10 +11022,10 @@ S_scan_pat(pTHX_ char *start, I32 type)
 	   matches.  */
 	assert(type != OP_TRANS);
 	if (PL_curstash) {
-	    MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
+	    MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
 	    U32 elements;
 	    if (!mg) {
-		mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
+		mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
 				 0);
 	    }
 	    elements = mg->mg_len / sizeof(PMOP**);
@@ -11074,9 +11172,9 @@ S_scan_trans(pTHX_ char *start)
     register char* s;
     OP *o;
     short *tbl;
-    I32 squash;
-    I32 del;
-    I32 complement;
+    U8 squash;
+    U8 del;
+    U8 complement;
 #ifdef PERL_MAD
     char *modstart;
 #endif
@@ -11353,7 +11451,7 @@ S_scan_heredoc(pTHX_ register char *s)
 	PL_last_lop = PL_last_uni = NULL;
     }
     else
-	sv_setpvn(tmpstr,"",0);   /* avoid "uninitialized" warning */
+	sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
     while (s >= PL_bufend) {	/* multiple line string? */
 #ifdef PERL_MAD
 	if (PL_madskills) {
@@ -11390,7 +11488,7 @@ S_scan_heredoc(pTHX_ register char *s)
 	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
 	    PL_bufend[-1] = '\n';
 #endif
-	if (PERLDB_LINE && PL_curstash != PL_debstash)
+	if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
 	    update_debugger_info(PL_linestr, NULL, 0);
 	if (*s == term && memEQ(s,PL_tokenbuf,len)) {
 	    STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
@@ -11889,7 +11987,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 	CopLINE_inc(PL_curcop);
 
 	/* update debugger info */
-	if (PERLDB_LINE && PL_curstash != PL_debstash)
+	if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
 	    update_debugger_info(PL_linestr, NULL, 0);
 
 	/* having changed the buffer, we must update PL_bufend */
@@ -12372,8 +12470,8 @@ S_scan_formline(pTHX_ register char *s)
     bool eofmt = FALSE;
 #ifdef PERL_MAD
     char *tokenstart = s;
-    SV* savewhite;
-    
+    SV* savewhite = NULL;
+
     if (PL_madskills) {
 	savewhite = PL_thiswhite;
 	PL_thiswhite = 0;
@@ -12508,12 +12606,12 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     save_item(PL_subname);
     SAVESPTR(PL_compcv);
 
-    PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
+    PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
     CvFLAGS(PL_compcv) |= flags;
 
     PL_subline = CopLINE(PL_curcop);
     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
-    CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
+    CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
 
     return oldsavestack_ix;
@@ -12522,8 +12620,8 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
 #ifdef __SC__
 #pragma segment Perl_yylex
 #endif
-int
-Perl_yywarn(pTHX_ const char *const s)
+static int
+S_yywarn(pTHX_ const char *const s)
 {
     dVAR;
 
@@ -12850,7 +12948,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
 	if (*s == 'v')
 	    s++;  /* get past 'v' */
 
-	sv_setpvn(sv, "", 0);
+	sv_setpvs(sv, "");
 
 	for (;;) {
 	    /* this is atoi() that tolerates underscores */