X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/4bff32c56f5c1d1515069d4ce0d05531758f6561..43275f00a97a14a80f9493c38895a5c77f0fc88a:/toke.c

diff --git a/toke.c b/toke.c
index 028c685..b6da013 100644
--- a/toke.c
+++ b/toke.c
@@ -99,9 +99,9 @@ static const char* const ident_too_long = "Identifier too long";
 #define XFAKEBRACK 0x80
 
 #ifdef USE_UTF8_SCRIPTS
-#   define UTF (!IN_BYTES)
+#   define UTF cBOOL(!IN_BYTES)
 #else
-#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
+#   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 #endif
 
 /* The maximum number of characters preceding the unrecognized one to display */
@@ -206,7 +206,7 @@ static const char* const lex_state_names[] = {
 
 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
-#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
@@ -220,14 +220,14 @@ static const char* const lex_state_names[] = {
 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
+#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
+#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
-#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 
@@ -486,7 +486,7 @@ S_ao(pTHX_ int toketype)
 	    pl_yylval.ival = OP_DORASSIGN;
 	toketype = ASSIGNOP;
     }
-    return toketype;
+    return REPORT(toketype);
 }
 
 /*
@@ -1987,7 +1987,6 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
  *       a keyword (do this if the word is a label, e.g. goto FOO)
  *   int allow_pack : if true, : characters will also be allowed (require,
  *       use, etc. do this)
- *   int allow_initial_tick : used by the "sub" lexer only.
  */
 
 STATIC char *
@@ -3085,6 +3084,7 @@ S_scan_const(pTHX_ char *start)
 	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
 	     * in spite of this, we do have to process \N here while the proper
 	     * charnames handler is in scope.  See bugs #56444 and #62056.
+             *
 	     * There is a complication because \N in a pattern may also stand
 	     * for 'match a non-nl', and not mean a charname, in which case its
 	     * processing should be deferred to the regex compiler.  To be a
@@ -3219,31 +3219,44 @@ S_scan_const(pTHX_ char *start)
 		continue;
 
  	    case 'N':
-		/* In a non-pattern \N must be a named character, like \N{LATIN
-		 * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
-		 * mean to match a non-newline.  For non-patterns, named
-		 * characters are converted to their string equivalents. In
-		 * patterns, named characters are not converted to their
-		 * ultimate forms for the same reasons that other escapes
-		 * aren't.  Instead, they are converted to the \N{U+...} form
-		 * to get the value from the charnames that is in effect right
-		 * now, while preserving the fact that it was a named character
-		 * so that the regex compiler knows this */
-
-		/* The structure of this section of code (besides checking for
+                /* In a non-pattern \N must be like \N{U+0041}, or it can be a
+                 * named character, like \N{LATIN SMALL LETTER A}, or a named
+                 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
+                 * GRAVE}.  For convenience all three forms are referred to as
+                 * "named characters" below.
+                 *
+                 * For patterns, \N also can mean to match a non-newline.  Code
+                 * before this 'switch' statement should already have handled
+                 * this situation, and hence this code only has to deal with
+                 * the named character cases.
+                 *
+                 * For non-patterns, the named characters are converted to
+                 * their string equivalents.  In patterns, named characters are
+                 * not converted to their ultimate forms for the same reasons
+                 * that other escapes aren't.  Instead, they are converted to
+                 * the \N{U+...} form to get the value from the charnames that
+                 * is in effect right now, while preserving the fact that it
+                 * was a named character, so that the regex compiler knows
+                 * this.
+                 *
+		 * The structure of this section of code (besides checking for
 		 * errors and upgrading to utf8) is:
-		 *  Further disambiguate between the two meanings of \N, and if
-		 *	not a charname, go process it elsewhere
-		 *  If of form \N{U+...}, pass it through if a pattern;
-		 *	otherwise convert to utf8
-		 *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
-		 *  pattern; otherwise convert to utf8 */
-
-		/* Here, s points to the 'N'; the test below is guaranteed to
-		 * succeed if we are being called on a pattern as we already
-		 * know from a test above that the next character is a '{'.
-		 * On a non-pattern \N must mean 'named sequence, which
-		 * requires braces */
+                 *  If the named character is of the form \N{U+...}, pass it
+                 *      through if a pattern; otherwise convert the code point
+                 *      to utf8
+                 *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
+                 *      if a pattern; otherwise convert to utf8
+                 *
+                 * If the regex compiler should ever need to differentiate
+                 * between the \N{U+...} and \N{name} forms, that could easily
+                 * be done here by stripping any leading zeros from the
+                 * \N{U+...} case, and adding them to the other one. */
+
+                /* Here, 's' points to the 'N'; the test below is guaranteed to
+		 * succeed if we are being called on a pattern, as we already
+                 * know from a test above that the next character is a '{'.  A
+                 * non-pattern \N must mean 'named character', which requires
+                 * braces */
 		s++;
 		if (*s != '{') {
 		    yyerror("Missing braces on \\N{}"); 
@@ -3268,8 +3281,6 @@ S_scan_const(pTHX_ char *start)
 				| PERL_SCAN_DISALLOW_PREFIX;
 		    STRLEN len;
 
-		    /* For \N{U+...}, the '...' is a unicode value even on
-		     * EBCDIC machines */
 		    s += 2;	    /* Skip to next char after the 'U+' */
 		    len = e - s;
 		    uv = grok_hex(s, &len, &flags, NULL);
@@ -3280,27 +3291,26 @@ S_scan_const(pTHX_ char *start)
 		    }
 
 		    if (PL_lex_inpat) {
-
-			/* On non-EBCDIC platforms, pass through to the regex
-			 * compiler unchanged.  The reason we evaluated the
-			 * number above is to make sure there wasn't a syntax
-			 * error.  But on EBCDIC we convert to native so
-			 * downstream code can continue to assume it's native
-			 */
 			s -= 5;	    /* Include the '\N{U+' */
 #ifdef EBCDIC
-			d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
+                        /* On EBCDIC platforms, in \N{U+...}, the '...' is a
+                         * Unicode value, so convert to native so downstream
+                         * code can continue to assume it's native */
+			d += my_snprintf(d, e - s + 1 + 1,  /* includes the '}'
 							       and the \0 */
-				    "\\N{U+%X}",
-				    (unsigned int) UNI_TO_NATIVE(uv));
+                                         "\\N{U+%X}",
+                                         (unsigned int) UNI_TO_NATIVE(uv));
 #else
-			Copy(s, d, e - s + 1, char);	/* 1 = include the } */
+                        /* On non-EBCDIC platforms, pass it through unchanged.
+                         * The reason we evaluated the number above is to make
+                         * sure there wasn't a syntax error. */
+			Copy(s, d, e - s + 1, char);	/* +1 is for the '}' */
 			d += e - s + 1;
 #endif
 		    }
 		    else {  /* Not a pattern: convert the hex to string */
 
-			 /* If destination is not in utf8, unconditionally
+                         /* If the destination is not in utf8, unconditionally
 			  * recode it to be so.  This is because \N{} implies
 			  * Unicode semantics, and scalars have to be in utf8
 			  * to guarantee those semantics */
@@ -3353,13 +3363,18 @@ S_scan_const(pTHX_ char *start)
                                  * through the string.  Each character takes up
                                  * 2 hex digits plus either a trailing dot or
                                  * the "}" */
+                                const char initial_text[] = "\\N{U+";
+                                const STRLEN initial_len = sizeof(initial_text)
+                                                           - 1;
                                 d = off + SvGROW(sv, off
                                                     + 3 * len
-                                                    + 6 /* For the "\N{U+", and
-                                                           trailing NUL */
+
+                                                    /* +1 for trailing NUL */
+                                                    + initial_len + 1
+
                                                     + (STRLEN)(send - e));
-                                Copy("\\N{U+", d, 5, char);
-                                d += 5;
+                                Copy(initial_text, d, initial_len, char);
+                                d += initial_len;
                                 while (str < str_end) {
                                     char hex_string[4];
                                     int len =
@@ -3371,7 +3386,7 @@ S_scan_const(pTHX_ char *start)
                                     d += 3;
                                     str++;
                                 }
-                                d--;    /* We will overwrite below the final
+                                d--;    /* Below, we will overwrite the final
                                            dot with a right brace */
                             }
                             else {
@@ -5486,9 +5501,10 @@ Perl_yylex(pTHX)
 		    OPERATOR(HASHBRACK);
 		}
 		if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
-		    /* ${...} or @{...} etc., but not print {...} */
-		    PL_expect = XTERM;
-		    break;
+		    /* ${...} or @{...} etc., but not print {...}
+		     * Skip the disambiguation and treat this as a block.
+		     */
+		    goto block_expectation;
 		}
 		/* This hack serves to disambiguate a pair of curlies
 		 * as being a block or an anon hash.  Normally, expectation
@@ -5572,7 +5588,28 @@ Perl_yylex(pTHX)
 				   || (*t == '=' && t[1] == '>')))
 		    OPERATOR(HASHBRACK);
 		if (PL_expect == XREF)
-		    PL_expect = XTERM;
+		{
+		  block_expectation:
+		    /* If there is an opening brace or 'sub:', treat it
+		       as a term to make ${{...}}{k} and &{sub:attr...}
+		       dwim.  Otherwise, treat it as a statement, so
+		       map {no strict; ...} works.
+		     */
+		    s = skipspace(s);
+		    if (*s == '{') {
+			PL_expect = XTERM;
+			break;
+		    }
+		    if (strnEQ(s, "sub", 3)) {
+			d = s + 3;
+			d = skipspace(d);
+			if (*d == ':') {
+			    PL_expect = XTERM;
+			    break;
+			}
+		    }
+		    PL_expect = XSTATE;
+		}
 		else {
 		    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
 		    PL_expect = XSTATE;
@@ -5581,8 +5618,7 @@ Perl_yylex(pTHX)
 	    break;
 	}
 	pl_yylval.ival = CopLINE(PL_curcop);
-	if (isSPACE(*s) || *s == '#')
-	    PL_copline = NOLINE;   /* invalidate current command line number */
+	PL_copline = NOLINE;   /* invalidate current command line number */
 	TOKEN(formbrack ? '=' : '{');
     case '}':
 	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
@@ -5797,7 +5833,7 @@ Perl_yylex(pTHX)
 	if (PL_expect != XOPERATOR) {
 	    if (s[1] != '<' && !strchr(s,'>'))
 		check_uni();
-	    if (s[1] == '<')
+	    if (s[1] == '<' && s[2] != '>')
 		s = scan_heredoc(s);
 	    else
 		s = scan_inputsymbol(s);
@@ -6215,7 +6251,7 @@ Perl_yylex(pTHX)
 	    }
 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
 	    if (!isALPHA(*start) && (PL_expect == XTERM
-			|| PL_expect == XREF || PL_expect == XSTATE
+			|| PL_expect == XSTATE
 			|| PL_expect == XTERMORDORDOR)) {
 		GV *const gv = gv_fetchpvn_flags(s, start - s,
                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
@@ -6552,7 +6588,11 @@ Perl_yylex(pTHX)
 		    rv2cv_op =
 			newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
 		    cv = lex
-			? isGV(gv) ? GvCV(gv) : (CV *)gv
+			? isGV(gv)
+			    ? GvCV(gv)
+			    : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
+				? (CV *)SvRV(gv)
+				: ((CV *)gv)
 			: rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
 		}
 
@@ -6681,7 +6721,6 @@ Perl_yylex(pTHX)
 		/* Not a method, so call it a subroutine (if defined) */
 
 		if (cv) {
-		    OP *gvop;
 		    /* Check for a constant sub */
 		    if ((sv = cv_const_sv_or_av(cv))) {
 		  its_constant:
@@ -6699,20 +6738,6 @@ Perl_yylex(pTHX)
 			TOKEN(WORD);
 		    }
 
-		    /* Resolve to GV now if this is a placeholder. */
-		    if (!off && (gvop = cUNOPx(rv2cv_op)->op_first)
-		     && gvop->op_type == OP_GV) {
-			GV *gv2 = cGVOPx_gv(gvop);
-			if (gv2 && !isGV(gv2)) {
-			    gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
-			    assert (SvTYPE(gv) == SVt_PVGV);
-			    /* cv must have been some sort of placeholder,
-			       so now needs replacing with a real code
-			       reference.  */
-			    cv = GvCV(gv);
-			}
-		    }
-
 		    op_free(pl_yylval.opval);
 		    pl_yylval.opval =
 			off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
@@ -6921,7 +6946,9 @@ Perl_yylex(pTHX)
 	}
 
 	case KEY___SUB__:
-	    FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+	    FUN0OP(CvCLONE(PL_compcv)
+			? newOP(OP_RUNCV, 0)
+			: newPVOP(OP_RUNCV,0,NULL));
 
 	case KEY_AUTOLOAD:
 	case KEY_DESTROY:
@@ -8126,10 +8153,13 @@ S_pending_ident(pTHX)
         }
         else {
             if (has_colon) {
+                /* "my" variable %s can't be in a package */
                 /* PL_no_myglob is constant */
                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
-			    PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+                            PL_in_my == KEY_my ? "my" : "state",
+                            *PL_tokenbuf == '&' ? "subroutin" : "variabl",
+                            PL_tokenbuf),
                             UTF ? SVf_UTF8 : 0);
                 GCC_DIAG_RESTORE;
             }
@@ -8255,12 +8285,20 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
 	    s++;
 	if (*s == ',') {
 	    GV* gv;
+	    PADOFFSET off;
 	    if (keyword(w, s - w, 0))
 		return;
 
 	    gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
 	    if (gv && GvCVu(gv))
 		return;
+	    if (s - w <= 254) {
+		char tmpbuf[256];
+		Copy(w, tmpbuf+1, s - w, char);
+		*tmpbuf = '&';
+		off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+		if (off != NOT_IN_PAD) return;
+	    }
 	    Perl_croak(aTHX_ "No comma allowed after %s", what);
 	}
     }
@@ -8535,25 +8573,52 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
 /* Is the byte 'd' a legal single character identifier name?  'u' is true
  * iff Unicode semantics are to be used.  The legal ones are any of:
- *  a) ASCII digits
- *  b) ASCII punctuation
+ *  a) all ASCII characters except:
+ *          1) space-type ones, like \t and SPACE;
+            2) NUL;
+ *          3) '{'
+ *     The final case currently doesn't get this far in the program, so we
+ *     don't test for it.  If that were to change, it would be ok to allow it.
  *  c) When not under Unicode rules, any upper Latin1 character
- *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
- *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
- *     the \s ones. */
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
-                                   || isDIGIT_A((U8)(d))                    \
-                                   || (!(u) && !isASCII((U8)(d)))           \
-                                   || ((((U8)(d)) < 32)                     \
-                                       && (((((U8)(d)) >= 14)               \
-                                           || (((U8)(d)) <= 8 && (d) != 0) \
-                                           || (((U8)(d)) == 13))))          \
-                                   || (((U8)(d)) == toCTRL('?')))
-    if (s < PL_bufend
-        && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ *  d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ *      Because all ASCII characters have the same representation whether
+ *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ *      '{' without knowing if is UTF-8 or not */
+#ifdef EBCDIC
+#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
+                         ? isIDFIRST_utf8((U8*) (s))                          \
+                         : (isGRAPH_L1(*s)                                    \
+                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+#   define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s))                 \
+                                            && LIKELY(*(s) != '\0')           \
+                                            && (! is_utf8                     \
+                                                || isASCII_utf8((U8*) (s))    \
+                                                || isIDFIRST_utf8((U8*) (s))))
+#endif
+    if ((s <= PL_bufend - (is_utf8)
+                          ? UTF8SKIP(s)
+                          : 1)
+        && VALID_LEN_ONE_IDENT(s, is_utf8))
     {
-        if ( isCNTRL_A((U8)*s) ) {
-            deprecate("literal control characters in variable names");
+        /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
+         * because often it has no graphic representation.  (We can't get to
+         * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
+         * test for it.) */
+        if ((is_utf8)
+            ? ! isGRAPH_utf8( (U8*) s)
+            : (! isGRAPH_L1( (U8) *s)
+               || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
+        {
+            /* Split messages for back compat */
+            if (isCNTRL_A( (U8) *s)) {
+                deprecate("literal control characters in variable names");
+            }
+            else {
+                deprecate("literal non-graphic characters in variable names");
+            }
         }
         
         if (is_utf8) {
@@ -8672,14 +8737,14 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 }
 
 static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
-
-    /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
-     * the parse starting at 's', based on the subset that are valid in this
-     * context input to this routine in 'valid_flags'. Advances s.  Returns
-     * TRUE if the input should be treated as a valid flag, so the next char
-     * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
-     * first call on the current regex.  This routine will set it to any
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
+
+    /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
+     * found in the parse starting at 's', based on the subset that are valid
+     * in this context input to this routine in 'valid_flags'. Advances s.
+     * Returns TRUE if the input should be treated as a valid flag, so the next
+     * char may be as well; otherwise FALSE. 'charset' should point to a NUL
+     * upon first call on the current regex.  This routine will set it to any
      * charset modifier found.  The caller shouldn't change it.  This way,
      * another charset modifier encountered in the parse can be detected as an
      * error, as we have decided to allow only one */
@@ -8701,7 +8766,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
 
     switch (c) {
 
-        CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+        CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
@@ -8776,6 +8841,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
     const char * const valid_flags =
 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
     char charset = '\0';    /* character set modifier */
+    unsigned int x_mod_count = 0;
 
     PERL_ARGS_ASSERT_SCAN_PAT;
 
@@ -8825,7 +8891,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
 	pm->op_pmflags |= PMf_IS_QR;
     }
 
-    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
+    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
+                                &s, &charset, &x_mod_count))
+    {};
     /* issue a warning if /c is specified,but /g is not */
     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
     {
@@ -8833,6 +8901,8 @@ S_scan_pat(pTHX_ char *start, I32 type)
 		       "Use of /c modifier is meaningless without /g" );
     }
 
+    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
     PL_lex_op = (OP*)pm;
     pl_yylval.ival = OP_MATCH;
     return s;
@@ -8847,6 +8917,7 @@ S_scan_subst(pTHX_ char *start)
     line_t first_line;
     I32 es = 0;
     char charset = '\0';    /* character set modifier */
+    unsigned int x_mod_count = 0;
     char *t;
 
     PERL_ARGS_ASSERT_SCAN_SUBST;
@@ -8880,12 +8951,15 @@ S_scan_subst(pTHX_ char *start)
 	    s++;
 	    es++;
 	}
-	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
+                                  &s, &charset, &x_mod_count))
 	{
 	    break;
 	}
     }
 
+    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
     if ((pm->op_pmflags & PMf_CONTINUE)) {
         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
     }
@@ -9203,7 +9277,14 @@ S_scan_heredoc(pTHX_ char *s)
 		    origline + 1 + PL_parser->herelines);
 	if (!lex_next_chunk(LEX_NO_TERM)
 	 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-	    SvREFCNT_dec(linestr_save);
+	    /* Simply freeing linestr_save might seem simpler here, as it
+	       does not matter what PL_linestr points to, since we are
+	       about to croak; but in a quote-like op, linestr_save
+	       will have been prospectively freed already, via
+	       SAVEFREESV(PL_linestr) in sublex_push, so itâs easier to
+	       restore PL_linestr. */
+	    SvREFCNT_dec_NN(PL_linestr);
+	    PL_linestr = linestr_save;
 	    goto interminable;
 	}
 	CopLINE_set(PL_curcop, origline);
@@ -9232,7 +9313,8 @@ S_scan_heredoc(pTHX_ char *s)
 	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
 	    PL_bufend[-1] = '\n';
 #endif
-	if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
+	if (*s == term && PL_bufend-s >= len
+	 && memEQ(s,PL_tokenbuf + 1,len)) {
 	    SvREFCNT_dec(PL_linestr);
 	    PL_linestr = linestr_save;
 	    PL_linestart = SvPVX(linestr_save);
@@ -9273,6 +9355,7 @@ S_scan_heredoc(pTHX_ char *s)
    This code handles:
 
    <>		read from ARGV
+   <<>>		read from ARGV without magic open
    <FH> 	read from filehandle
    <pkg::FH>	read from package qualified filehandle
    <pkg'FH>	read from package qualified filehandle
@@ -9287,6 +9370,7 @@ S_scan_inputsymbol(pTHX_ char *start)
     char *s = start;		/* current position in buffer */
     char *end;
     I32 len;
+    bool nomagicopen = FALSE;
     char *d = PL_tokenbuf;					/* start of temp holding space */
     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
 
@@ -9295,7 +9379,14 @@ S_scan_inputsymbol(pTHX_ char *start)
     end = strchr(s, '\n');
     if (!end)
 	end = PL_bufend;
-    s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
+    if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
+        nomagicopen = TRUE;
+        *d = '\0';
+        len = 0;
+        s += 3;
+    }
+    else
+        s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
 
     /* die if we didn't have space for the contents of the <>,
        or if it didn't end, or if we see a newline
@@ -9405,7 +9496,7 @@ intro_sym:
 			op_append_elem(OP_LIST,
 			    newGVOP(OP_GV, 0, gv),
 			    newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
-		: (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
+		: (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
 	    pl_yylval.ival = OP_NULL;
 	}
     }
@@ -10456,7 +10547,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     CvFLAGS(PL_compcv) |= flags;
 
     PL_subline = CopLINE(PL_curcop);
-    CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
+    CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
     if (outsidecv && CvPADLIST(outsidecv))
@@ -10473,7 +10564,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags)
 
     PL_in_eval |= EVAL_WARNONLY;
     yyerror_pv(s, flags);
-    PL_in_eval &= ~EVAL_WARNONLY;
     return 0;
 }
 
@@ -10577,6 +10667,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
         PL_multi_end = 0;
     }
     if (PL_in_eval & EVAL_WARNONLY) {
+	PL_in_eval &= ~EVAL_WARNONLY;
 	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
     }
     else
@@ -11367,7 +11458,6 @@ S_parse_opt_lexvar(pTHX)
     PL_bufptr = s;
     if (d == PL_tokenbuf+1)
 	return NULL;
-    *d = 0;
     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
 		OPf_MOD | (OPpLVAL_INTRO<<8));
     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
@@ -11448,10 +11538,16 @@ Perl_parse_subsignature(pTHX)
 				scalar(newUNOP(OP_RV2AV, 0,
 				    newGVOP(OP_GV, 0, PL_defgv))),
 				newSVOP(OP_CONST, 0, newSViv(1))),
-			    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-				newSVOP(OP_CONST, 0,
-				    newSVpvs("Odd name/value argument "
-					"for subroutine"))));
+		            op_convert_list(OP_DIE, 0,
+		                op_convert_list(OP_SPRINTF, 0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0,
+		                            newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+		                        newSLICEOP(0,
+		                            op_append_list(OP_LIST,
+		                                newSVOP(OP_CONST, 0, newSViv(1)),
+		                                newSVOP(OP_CONST, 0, newSViv(2))),
+		                            newOP(OP_CALLER, 0))))));
 		    if (pos != min_arity)
 			chkop = newLOGOP(OP_AND, 0,
 				    newBINOP(OP_GT, 0,
@@ -11514,9 +11610,16 @@ Perl_parse_subsignature(pTHX)
 			scalar(newUNOP(OP_RV2AV, 0,
 			    newGVOP(OP_GV, 0, PL_defgv))),
 			newSVOP(OP_CONST, 0, newSViv(min_arity))),
-		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-			newSVOP(OP_CONST, 0,
-			    newSVpvs("Too few arguments for subroutine"))))),
+		    op_convert_list(OP_DIE, 0,
+		        op_convert_list(OP_SPRINTF, 0,
+		            op_append_list(OP_LIST,
+		                newSVOP(OP_CONST, 0,
+		                    newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+		                newSLICEOP(0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0, newSViv(1)),
+		                        newSVOP(OP_CONST, 0, newSViv(2))),
+		                    newOP(OP_CALLER, 0))))))),
 	    initops);
     }
     if (max_arity != -1) {
@@ -11527,9 +11630,16 @@ Perl_parse_subsignature(pTHX)
 			scalar(newUNOP(OP_RV2AV, 0,
 			    newGVOP(OP_GV, 0, PL_defgv))),
 			newSVOP(OP_CONST, 0, newSViv(max_arity))),
-		    newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
-			newSVOP(OP_CONST, 0,
-			    newSVpvs("Too many arguments for subroutine"))))),
+		    op_convert_list(OP_DIE, 0,
+		        op_convert_list(OP_SPRINTF, 0,
+		            op_append_list(OP_LIST,
+		                newSVOP(OP_CONST, 0,
+		                    newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+		                newSLICEOP(0,
+		                    op_append_list(OP_LIST,
+		                        newSVOP(OP_CONST, 0, newSViv(1)),
+		                        newSVOP(OP_CONST, 0, newSViv(2))),
+		                    newOP(OP_CALLER, 0))))))),
 	    initops);
     }
     return initops;