X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/19c6248174a2dd57782271308b166ab6bc3e63c8..39c012bc2fc2f1cf310f6d4ba499ec58a7bad900:/toke.c

diff --git a/toke.c b/toke.c
index 08607ef..439ce15 100644
--- a/toke.c
+++ b/toke.c
@@ -148,6 +148,9 @@ static const char ident_too_long[] = "Identifier too long";
 /* LEX_* are values for PL_lex_state, the state of the lexer.
  * They are arranged oddly so that the guard on the switch statement
  * can get by with a single comparison (if the compiler is smart enough).
+ *
+ * These values refer to the various states within a sublex parse,
+ * i.e. within a double quotish string
  */
 
 /* #define LEX_NOTPARSING		11 is done in perl.h. */
@@ -272,9 +275,9 @@ static const char* const lex_state_names[] = {
  * The UNIDOR macro is for unary functions that can be followed by the //
  * operator (such as C<shift // 0>).
  */
-#define UNI2(f,x) { \
+#define UNI3(f,x,have_x) { \
 	pl_yylval.ival = f; \
-	PL_expect = x; \
+	if (have_x) PL_expect = x; \
 	PL_bufptr = s; \
 	PL_last_uni = PL_oldbufptr; \
 	PL_last_lop_op = f; \
@@ -283,22 +286,14 @@ static const char* const lex_state_names[] = {
 	s = PEEKSPACE(s); \
 	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 	}
-#define UNI(f)    UNI2(f,XTERM)
-#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
+#define UNI(f)    UNI3(f,XTERM,1)
+#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 #define UNIPROTO(f,optional) { \
 	if (optional) PL_last_uni = PL_oldbufptr; \
 	OPERATOR(f); \
 	}
 
-#define UNIBRACK(f) { \
-	pl_yylval.ival = f; \
-	PL_bufptr = s; \
-	PL_last_uni = PL_oldbufptr; \
-	if (*s == '(') \
-	    return REPORT( (int)FUNC1 ); \
-	s = PEEKSPACE(s); \
-	return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
-	}
+#define UNIBRACK(f) UNI3(f,0,0)
 
 /* grandfather return to old style */
 #define OLDLOP(f) \
@@ -359,7 +354,7 @@ static struct debug_tokens {
     { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
     { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
     { IF,		TOKENTYPE_IVAL,		"IF" },
-    { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
+    { LABEL,		TOKENTYPE_OPVAL,	"LABEL" },
     { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
     { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
     { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
@@ -537,24 +532,28 @@ S_no_op(pTHX_ const char *const what, char *s)
 	s = oldbp;
     else
 	PL_bufptr = s;
-    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), 0);
+    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
     if (ckWARN_d(WARN_SYNTAX)) {
 	if (is_first)
 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 		    "\t(Missing semicolon on previous line?)\n");
 	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 	    const char *t;
-	    for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
+	    for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
+                                                            t += UTF ? UTF8SKIP(t) : 1)
 		NOOP;
 	    if (t < PL_bufptr && isSPACE(*t))
 		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-			"\t(Do you need to predeclare %.*s?)\n",
-		    (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
+			"\t(Do you need to predeclare %"SVf"?)\n",
+		    SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
+                                   SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 	}
 	else {
 	    assert(s >= oldbp);
 	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-		    "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
+		    "\t(Missing operator before %"SVf"?)\n",
+                    SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
+                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 	}
     }
     PL_bufptr = oldbp;
@@ -2345,14 +2344,10 @@ S_tokeq(pTHX_ SV *sv)
  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
  * interact with PL_lex_state, and create fake ( ... ) argument lists
  * to handle functions and concatenation.
- * They assume that whoever calls them will be setting up a fake
- * join call, because each subthing puts a ',' after it.  This lets
- *   "lower \luPpEr"
- * become
- *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
- *
- * (I'm not sure whether the spurious commas at the end of lcfirst's
- * arguments and join's arguments are created or not).
+ * For example,
+ *   "foo\lbar"
+ * is tokenised as
+ *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
  */
 
 /*
@@ -2446,6 +2441,7 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_casemods);
     SAVEI32(PL_lex_starts);
     SAVEI8(PL_lex_state);
+    SAVEPPTR(PL_sublex_info.re_eval_start);
     SAVEVPTR(PL_lex_inpat);
     SAVEI16(PL_lex_inwhat);
     SAVECOPLINE(PL_curcop);
@@ -2462,6 +2458,7 @@ S_sublex_push(pTHX)
 
     PL_linestr = PL_lex_stuff;
     PL_lex_stuff = NULL;
+    PL_sublex_info.re_eval_start = NULL;
 
     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
 	= SvPVX(PL_linestr);
@@ -2571,8 +2568,11 @@ S_sublex_done(pTHX)
 /*
   scan_const
 
-  Extracts a pattern, double-quoted string, or transliteration.  This
-  is terrifying code.
+  Extracts the next constant part of a pattern, double-quoted string,
+  or transliteration.  This is terrifying code.
+
+  For example, in parsing the double-quoted string "ab\x63$d", it would
+  stop at the '$' and return an OP_CONST containing 'abc'.
 
   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
   processing a pattern (PL_lex_inpat is true), a transliteration
@@ -2580,15 +2580,22 @@ S_sublex_done(pTHX)
 
   Returns a pointer to the character scanned up to. If this is
   advanced from the start pointer supplied (i.e. if anything was
-  successfully parsed), will leave an OP for the substring scanned
+  successfully parsed), will leave an OP_CONST for the substring scanned
   in pl_yylval. Caller must intuit reason for not parsing further
   by looking at the next characters herself.
 
   In patterns:
-    backslashes:
-      constants: \N{NAME} only
-      case and quoting: \U \Q \E
-    stops on @ and $, but not for $ as tail anchor
+    expand:
+      \N{ABC}  => \N{U+41.42.43}
+
+    pass through:
+	all other \-char, including \N and \N{ apart from \N{ABC}
+
+    stops on:
+	@ and $ where it appears to be a var, but not for $ as tail anchor
+        \l \L \u \U \Q \E
+	(?{  or  (??{
+
 
   In transliterations:
     characters are VERY literal, except for - not at the start or end
@@ -2618,7 +2625,7 @@ S_sublex_done(pTHX)
   it's a tail anchor if $ is the last thing in the string, or if it's
   followed by one of "()| \r\n\t"
 
-  \1 (backreferences) are turned into $1
+  \1 (backreferences) are turned into $1 in substitutions
 
   The structure of the code is
       while (there's a character to process) {
@@ -2657,6 +2664,7 @@ S_scan_const(pTHX_ char *start)
     register char *d = SvPVX(sv);		/* destination for copies */
     bool dorange = FALSE;			/* are we in a translit range? */
     bool didrange = FALSE;		        /* did we just finish a range? */
+    bool in_charclass = FALSE;			/* within /[...]/ */
     bool has_utf8 = FALSE;			/* Output constant is UTF8 */
     bool  this_utf8 = cBOOL(UTF);		/* Is the source string assumed
 						   to be UTF8?  But, this can
@@ -2846,33 +2854,38 @@ S_scan_const(pTHX_ char *start)
 
 	/* if we get here, we're not doing a transliteration */
 
-	/* skip for regexp comments /(?#comment)/ and code /(?{code})/,
-	   except for the last char, which will be done separately. */
+	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+	    char *s1 = s-1;
+	    int esc = 0;
+	    while (s1 >= start && *s1-- == '\\')
+		esc = !esc;
+	    if (!esc)
+		in_charclass = TRUE;
+	}
+
+	else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
+	    char *s1 = s-1;
+	    int esc = 0;
+	    while (s1 >= start && *s1-- == '\\')
+		esc = !esc;
+	    if (!esc)
+		in_charclass = FALSE;
+	}
+
+	/* skip for regexp comments /(?#comment)/, except for the last
+	 * char, which will be done separately.
+	 * Stop on (?{..}) and friends */
+
 	else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
 	    if (s[2] == '#') {
 		while (s+1 < send && *s != ')')
 		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
 	    }
-	    else if (s[2] == '{' /* This should match regcomp.c */
-		    || (s[2] == '?' && s[3] == '{'))
+	    else if (!PL_lex_casemods && !in_charclass &&
+		     (    s[2] == '{' /* This should match regcomp.c */
+		      || (s[2] == '?' && s[3] == '{')))
 	    {
-		I32 count = 1;
-		char *regparse = s + (s[2] == '{' ? 3 : 4);
-		char c;
-
-		while (count && (c = *regparse)) {
-		    if (c == '\\' && regparse[1])
-			regparse++;
-		    else if (c == '{')
-			count++;
-		    else if (c == '}')
-			count--;
-		    regparse++;
-		}
-		if (*regparse != ')')
-		    regparse--;		/* Leave one char for continuation. */
-		while (s < regparse)
-		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+		break;
 	    }
 	}
 
@@ -2883,6 +2896,10 @@ S_scan_const(pTHX_ char *start)
 		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
 	}
 
+	/* no further processing of single-quoted regex */
+	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+	    goto default_action;
+
 	/* check for embedded arrays
 	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
 	   */
@@ -2966,7 +2983,7 @@ S_scan_const(pTHX_ char *start)
 		/* FALL THROUGH */
 	    default:
 	        {
-		    if ((isALPHA(*s) || isDIGIT(*s)))
+		    if ((isALNUMC(*s)))
 			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
 				       "Unrecognized escape \\%c passed through",
 				       *s);
@@ -3002,29 +3019,16 @@ S_scan_const(pTHX_ char *start)
 
 	    /* eg. \x24 indicates the hex constant 0x24 */
 	    case 'x':
-		++s;
-		if (*s == '{') {
-		    char* const e = strchr(s, '}');
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
-                      PERL_SCAN_DISALLOW_PREFIX;
+		{
 		    STRLEN len;
+		    const char* error;
 
-                    ++s;
-		    if (!e) {
-			yyerror("Missing right brace on \\x{}");
+		    bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+		    s += len;
+		    if (! valid) {
+			yyerror(error);
 			continue;
 		    }
-                    len = e - s;
-		    uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
-		    s = e + 1;
-		}
-		else {
-		    {
-			STRLEN len = 2;
-                        I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-			uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
-			s += len;
-		    }
 		}
 
 	      NUM_ESCAPE_INSERT:
@@ -3552,6 +3556,9 @@ S_scan_const(pTHX_ char *start)
 	    } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
 		type = "s";
 		typelen = 1;
+	    } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+		type = "q";
+		typelen = 1;
 	    } else  {
 		type = "qq";
 		typelen = 2;
@@ -3728,7 +3735,7 @@ S_intuit_more(pTHX_ register char *s)
  *
  * First argument is the stuff after the first token, e.g. "bar".
  *
- * Not a method if bar is a filehandle.
+ * Not a method if foo is a filehandle.
  * Not a method if foo is a subroutine prototyped to take a filehandle.
  * Not a method if it's really "Foo $bar"
  * Method if it's "foo $bar"
@@ -3766,8 +3773,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
 			return 0;
 		}
 	    }
-	} else
-	    gv = NULL;
+	}
     }
     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
     /* start is the beginning of the possible filehandle/object,
@@ -3776,7 +3782,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
      */
 
     if (*start == '$') {
-	if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
+	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
 		isUPPER(*PL_tokenbuf))
 	    return 0;
 #ifdef PERL_MAD
@@ -3803,7 +3809,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
 	if (indirgv && GvCVu(indirgv))
 	    return 0;
 	/* filehandle or package name makes it a method */
-	if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
+	if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
 #ifdef PERL_MAD
 	    soff = s - SvPVX(PL_linestr);
 #endif
@@ -4227,6 +4233,7 @@ Perl_madlex(pTHX)
     case FUNC0SUB:
     case UNIOPSUB:
     case LSTOPSUB:
+    case LABEL:
 	if (pl_yylval.opval)
 	    append_madprops(PL_thismad, pl_yylval.opval, 0);
 	PL_thismad = 0;
@@ -4287,10 +4294,6 @@ Perl_madlex(pTHX)
 	}
 	break;
 
-    /* pval */
-    case LABEL:
-	break;
-
     /* ival */
     default:
 	break;
@@ -4614,7 +4617,7 @@ Perl_yylex(pTHX)
     case LEX_INTERPSTART:
 	if (PL_bufptr == PL_bufend)
 	    return REPORT(sublex_done());
-	DEBUG_T({ PerlIO_printf(Perl_debug_log,
+	DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
               "### Interpolated variable\n"); });
 	PL_expect = XTERM;
 	PL_lex_dojoin = (*PL_bufptr == '@');
@@ -4635,6 +4638,18 @@ Perl_yylex(pTHX)
 	    NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
 	    force_next(FUNC);
 	}
+	/* Convert (?{...}) and friends to 'do {...}' */
+	if (PL_lex_inpat && *PL_bufptr == '(') {
+	    PL_sublex_info.re_eval_start = PL_bufptr;
+	    PL_bufptr += 2;
+	    if (*PL_bufptr != '{')
+		PL_bufptr++;
+	    start_force(PL_curforce);
+	    /* XXX probably need a CURMAD(something) here */
+	    PL_expect = XTERMBLOCK;
+	    force_next(DO);
+	}
+
 	if (PL_lex_starts++) {
 	    s = PL_bufptr;
 #ifdef PERL_MAD
@@ -4680,6 +4695,24 @@ Perl_yylex(pTHX)
 		Perl_croak(aTHX_ "Bad evalled substitution pattern");
 	    PL_lex_repl = NULL;
 	}
+	if (PL_sublex_info.re_eval_start) {
+	    if (*PL_bufptr != ')')
+		Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+	    PL_bufptr++;
+	    /* having compiled a (?{..}) expression, return the original
+	     * text too, as a const */
+	    start_force(PL_curforce);
+	    /* XXX probably need a CURMAD(something) here */
+	    NEXTVAL_NEXTTOKE.opval =
+		    (OP*)newSVOP(OP_CONST, 0,
+			newSVpvn(PL_sublex_info.re_eval_start,
+				PL_bufptr - PL_sublex_info.re_eval_start));
+	    force_next(THING);
+	    PL_sublex_info.re_eval_start = NULL;
+	    PL_expect = XTERM;
+	    return REPORT(',');
+	}
+
 	/* FALLTHROUGH */
     case LEX_INTERPCONCAT:
 #ifdef DEBUGGING
@@ -4690,12 +4723,10 @@ Perl_yylex(pTHX)
 	if (PL_bufptr == PL_bufend)
 	    return REPORT(sublex_done());
 
-	if (SvIVX(PL_linestr) == '\'') {
+	/* m'foo' still needs to be parsed for possible (?{...}) */
+	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
 	    SV *sv = newSVsv(PL_linestr);
-	    if (!PL_lex_inpat)
-		sv = tokeq(sv);
-	    else if ( PL_hints & HINT_NEW_RE )
-		sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+	    sv = tokeq(sv);
 	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
 	    s = PL_bufend;
 	}
@@ -4761,7 +4792,12 @@ Perl_yylex(pTHX)
 	if (isIDFIRST_lazy_if(s,UTF))
 	    goto keylookup;
 	{
-        unsigned char c = *s;
+        SV *dsv = newSVpvs_flags("", SVs_TEMP);
+        const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
+                                                    UTF8SKIP(s),
+                                                    SVs_TEMP | SVf_UTF8),
+                                            10, UNI_DISPLAY_ISPRINT))
+                            : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
@@ -4769,7 +4805,10 @@ Perl_yylex(pTHX)
             d = PL_linestart;
         }	
         *s = '\0';
-        Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
+        sv_setpv(dsv, d);
+        if (UTF)
+            SvUTF8_on(dsv);
+        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
     }
     case 4:
     case 26:
@@ -5486,7 +5525,7 @@ Perl_yylex(pTHX)
 		}
 		sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
 		if (*d == '(') {
-		    d = scan_str(d,TRUE,TRUE);
+		    d = scan_str(d,TRUE,TRUE,FALSE);
 		    if (!d) {
 			/* MUST advance bufptr here to avoid bogus
 			   "at end of line" context messages from yyerror().
@@ -6172,10 +6211,12 @@ Perl_yylex(pTHX)
 					      &len);
 				while (isSPACE(*t))
 				    t++;
-				if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
+				if (*t == ';'
+                                       && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
 				    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-						"You need to quote \"%s\"",
-						tmpbuf);
+						"You need to quote \"%"SVf"\"",
+						  SVfARG(newSVpvn_flags(tmpbuf, len, 
+                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 			    }
 			}
 		}
@@ -6254,15 +6295,17 @@ Perl_yylex(pTHX)
 		if (ckWARN(WARN_SYNTAX)) {
 		    const char *t = s + 1;
 		    while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
-			t++;
+			t += UTF ? UTF8SKIP(t) : 1;
 		    if (*t == '}' || *t == ']') {
 			t++;
 			PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
 			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-			    "Scalar value %.*s better written as $%.*s",
-			    (int)(t-PL_bufptr), PL_bufptr,
-			    (int)(t-PL_bufptr-1), PL_bufptr+1);
+			    "Scalar value %"SVf" better written as $%"SVf,
+			    SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
+                            SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
 		    }
 		}
 	    }
@@ -6378,7 +6421,7 @@ Perl_yylex(pTHX)
 	TERM(THING);
 
     case '\'':
-	s = scan_str(s,!!PL_madskills,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6393,7 +6436,7 @@ Perl_yylex(pTHX)
 	TERM(sublex_start());
 
     case '"':
-	s = scan_str(s,!!PL_madskills,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6416,7 +6459,7 @@ Perl_yylex(pTHX)
 	TERM(sublex_start());
 
     case '`':
-	s = scan_str(s,!!PL_madskills,FALSE);
+	s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
 	if (PL_expect == XOPERATOR)
 	    no_op("Backticks",s);
@@ -6557,7 +6600,9 @@ Perl_yylex(pTHX)
 	if (!anydelim && PL_expect == XSTATE
 	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
 	    s = d + 1;
-	    pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
+	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+                                            newSVpvn_flags(PL_tokenbuf,
+                                                        len, UTF ? SVf_UTF8 : 0));
 	    CLINE;
 	    TOKEN(LABEL);
 	}
@@ -6643,7 +6688,9 @@ Perl_yylex(pTHX)
 		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
 				  TRUE, &morelen);
 		    if (!morelen)
-			Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
+			Perl_croak(aTHX_ "Bad name after %"SVf"%s",
+                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
 				*s == '\'' ? "'" : "::");
 		    len += morelen;
 		    pkgname = 1;
@@ -6669,8 +6716,9 @@ Perl_yylex(pTHX)
 		    if (ckWARN(WARN_BAREWORD)
 			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
 			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-		  	    "Bareword \"%s\" refers to nonexistent package",
-			     PL_tokenbuf);
+		  	    "Bareword \"%"SVf"\" refers to nonexistent package",
+			     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
 		    len -= 2;
 		    PL_tokenbuf[len] = '\0';
 		    gv = NULL;
@@ -6851,17 +6899,19 @@ Perl_yylex(pTHX)
 		/* Not a method, so call it a subroutine (if defined) */
 
 		if (cv) {
-		    if (lastchar == '-')
-			Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-					 "Ambiguous use of -%s resolved as -&%s()",
-					 PL_tokenbuf, PL_tokenbuf);
+		    if (lastchar == '-') {
+                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+ 			Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+				"Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
+				SVfARG(tmpsv), SVfARG(tmpsv));
+                    }
 		    /* Check for a constant sub */
 		    if ((sv = cv_const_sv(cv))) {
 		  its_constant:
 			op_free(rv2cv_op);
 			SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
 			((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
-			pl_yylval.opval->op_private = 0;
+			pl_yylval.opval->op_private = OPpCONST_FOLDED;
 			pl_yylval.opval->op_flags |= OPf_SPECIAL;
 			TOKEN(WORD);
 		    }
@@ -7026,8 +7076,10 @@ Perl_yylex(pTHX)
 	    safe_bareword:
 		if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-				     "Operator or semicolon missing before %c%s",
-				     lastchar, PL_tokenbuf);
+				     "Operator or semicolon missing before %c%"SVf,
+				     lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
+                                                    strlen(PL_tokenbuf),
+                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
 				     "Ambiguous use of %c resolved as operator %c",
 				     lastchar, lastchar);
@@ -7178,7 +7230,9 @@ Perl_yylex(pTHX)
 		d = s;
 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
 		if (!(tmp = keyword(PL_tokenbuf, len, 1)))
-		    Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
+		    Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
+                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
 		if (tmp < 0)
 		    tmp = -tmp;
 		else if (tmp == KEY_require || tmp == KEY_do
@@ -7286,8 +7340,13 @@ Perl_yylex(pTHX)
 	    s = SKIPSPACE1(s);
 	    if (*s == '{')
 		PRETERMBLOCK(DO);
-	    if (*s != '\'')
-		s = force_word(s,WORD,TRUE,TRUE,FALSE);
+	    if (*s != '\'') {
+		d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
+		if (len) {
+		    d = SKIPSPACE1(d);
+		    if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
+		}
+	    }
 	    if (orig_keyword == KEY_do) {
 		orig_keyword = 0;
 		pl_yylval.ival = 1;
@@ -7657,7 +7716,7 @@ Perl_yylex(pTHX)
 		    char tmpbuf[1024];
 		    PL_bufptr = s;
 		    my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
-		    yyerror(tmpbuf);
+		    yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
 		}
 #ifdef PERL_MAD
 		if (PL_madskills) {	/* just add type to declarator token */
@@ -7697,8 +7756,14 @@ Perl_yylex(pTHX)
 	    s = SKIPSPACE1(s);
 	    if (isIDFIRST_lazy_if(s,UTF)) {
 		const char *t;
-		for (d = s; isALNUM_lazy_if(d,UTF);)
-		    d++;
+		for (d = s; isALNUM_lazy_if(d,UTF);) {
+		    d += UTF ? UTF8SKIP(d) : 1;
+                    if (UTF) {
+                        while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
+                            d += UTF ? UTF8SKIP(d) : 1;
+                        }
+                    }
+                }
 		for (t=d; isSPACE(*t);)
 		    t++;
 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7707,10 +7772,11 @@ Perl_yylex(pTHX)
 		    && !(t[0] == ':' && t[1] == ':')
 		    && !keyword(s, d-s, 0)
 		) {
-		    int parms_len = (int)(d-s);
+		    SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
 		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-			   "Precedence problem: open %.*s should be open(%.*s)",
-			    parms_len, s, parms_len, s);
+			   "Precedence problem: open %"SVf" should be open(%"SVf")",
+			    SVfARG(tmpsv), SVfARG(tmpsv));
 		}
 	    }
 	    LOP(OP_OPEN,XTERM);
@@ -7764,7 +7830,7 @@ Perl_yylex(pTHX)
 	    LOP(OP_PIPE_OP,XTERM);
 
 	case KEY_q:
-	    s = scan_str(s,!!PL_madskills,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    pl_yylval.ival = OP_CONST;
@@ -7775,7 +7841,7 @@ Perl_yylex(pTHX)
 
 	case KEY_qw: {
 	    OP *words = NULL;
-	    s = scan_str(s,!!PL_madskills,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    PL_expect = XOPERATOR;
@@ -7825,7 +7891,7 @@ Perl_yylex(pTHX)
 	}
 
 	case KEY_qq:
-	    s = scan_str(s,!!PL_madskills,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    pl_yylval.ival = OP_STRINGIFY;
@@ -7838,7 +7904,7 @@ Perl_yylex(pTHX)
 	    TERM(sublex_start());
 
 	case KEY_qx:
-	    s = scan_str(s,!!PL_madskills,FALSE);
+	    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 	    if (!s)
 		missingterm(NULL);
 	    readpipe_override();
@@ -8147,7 +8213,7 @@ Perl_yylex(pTHX)
 		    const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
                     STRLEN tmplen;
 
-		    s = scan_str(s,!!PL_madskills,FALSE);
+		    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
 		    if (!s)
 			Perl_croak(aTHX_ "Prototype not terminated");
 		    /* strip spaces and check for bad characters */
@@ -8199,9 +8265,13 @@ Perl_yylex(pTHX)
 				    "Illegal character %sin prototype for %"SVf" : %s",
 				    seen_underscore ? "after '_' " : "",
 				    SVfARG(PL_subname),
-                                    sv_uni_display(dsv,
-                                         newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
-                                         tmp, UNI_DISPLAY_ISPRINT));
+                                    SvUTF8(PL_lex_stuff)
+                                        ? sv_uni_display(dsv,
+                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
+                                            tmp,
+                                            UNI_DISPLAY_ISPRINT)
+                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
+                                            PERL_PV_ESCAPE_NONASCII));
                     }
                     SvCUR_set(PL_lex_stuff, tmp);
 		    have_proto = TRUE;
@@ -8432,7 +8502,6 @@ static int
 S_pending_ident(pTHX)
 {
     dVAR;
-    register char *d;
     PADOFFSET tmp = 0;
     /* pit holds the identifier we read and pending_ident is reset */
     char pit = PL_pending_ident;
@@ -8454,15 +8523,16 @@ S_pending_ident(pTHX)
     if (PL_in_my) {
         if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
             if (has_colon)
-                yyerror(Perl_form(aTHX_ "No package name allowed for "
+                yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
                                   "variable %s in \"our\"",
-                                  PL_tokenbuf));
+                                  PL_tokenbuf), UTF ? SVf_UTF8 : 0);
             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
         }
         else {
             if (has_colon)
-                yyerror(Perl_form(aTHX_ PL_no_myglob,
-			    PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
+                yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
+			    PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+                            UTF ? SVf_UTF8 : 0);
 
             pl_yylval.opval = newOP(OP_PADANY, 0);
             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
@@ -8473,14 +8543,6 @@ S_pending_ident(pTHX)
 
     /*
        build the ops for accesses to a my() variable.
-
-       Deny my($a) or my($b) in a sort block, *if* $a or $b is
-       then used in a comparison.  This catches most, but not
-       all cases.  For instance, it catches
-           sort { my($a); $a <=> $b }
-       but not
-           sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
-       (although why you'd do that is anyone's guess).
     */
 
     if (!has_colon) {
@@ -8509,23 +8571,6 @@ S_pending_ident(pTHX)
                 return WORD;
             }
 
-            /* if it's a sort block and they're naming $a or $b */
-            if (PL_last_lop_op == OP_SORT &&
-                PL_tokenbuf[0] == '$' &&
-                (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
-                && !PL_tokenbuf[2])
-            {
-                for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
-                     d < PL_bufend && *d != '\n';
-                     d++)
-                {
-                    if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
-                        Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
-                              PL_tokenbuf);
-                    }
-                }
-            }
-
             pl_yylval.opval = newOP(OP_PADANY, 0);
             pl_yylval.opval->op_targ = tmp;
             return PRIVATEREF;
@@ -8549,8 +8594,9 @@ S_pending_ident(pTHX)
         {
             /* Downgraded from fatal to warning 20000522 mjd */
             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-			"Possible unintended interpolation of %s in string",
-			PL_tokenbuf);
+			"Possible unintended interpolation of %"SVf" in string",
+			SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
+                                        SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
         }
     }
 
@@ -8603,9 +8649,10 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     while (s < PL_bufend && isSPACE(*s))
 	s++;
     if (isIDFIRST_lazy_if(s,UTF)) {
-	const char * const w = s++;
+	const char * const w = s;
+        s += UTF ? UTF8SKIP(s) : 1;
 	while (isALNUM_lazy_if(s,UTF))
-	    s++;
+	    s += UTF ? UTF8SKIP(s) : 1;
 	while (s < PL_bufend && isSPACE(*s))
 	    s++;
 	if (*s == ',') {
@@ -8758,7 +8805,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
     for (;;) {
 	if (d >= e)
 	    Perl_croak(aTHX_ ident_too_long);
-	if (isALNUM(*s))	/* UTF handled below */
+	if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))	/* UTF handled below */
 	    *d++ = *s++;
 	else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
 	    *d++ = ':';
@@ -8854,8 +8901,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	bracket = s;
 	s++;
     }
-    else if (ck_uni)
-	check_uni();
     if (s < send) {
         if (UTF) {
             const STRLEN skip = UTF8SKIP(s);
@@ -8873,6 +8918,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	*d = toCTRL(*s);
 	s++;
     }
+    else if (ck_uni && !bracket)
+	check_uni();
     if (bracket) {
 	if (isSPACE(s[-1])) {
 	    while (s < send) {
@@ -8943,13 +8990,15 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	    if (PL_lex_state == LEX_NORMAL) {
 		if (ckWARN(WARN_AMBIGUOUS) &&
 		    (keyword(dest, d - dest, 0)
-		     || get_cvn_flags(dest, d - dest, 0)))
+		     || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
 		{
+                    SV *tmp = newSVpvn_flags( dest, d - dest,
+                                            SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
 		    if (funny == '#')
 			funny = '@';
 		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-			"Ambiguous use of %c{%s} resolved to %c%s",
-			funny, dest, funny, dest);
+			"Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
+			funny, tmp, funny, tmp);
 		}
 	    }
 	}
@@ -8969,18 +9018,24 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
      * the parse starting at 's', based on the subset that are valid in this
      * context input to this routine in 'valid_flags'. Advances s.  Returns
-     * TRUE if the input was a valid flag, so the next char may be as well;
-     * otherwise FALSE. 'charset' should point to a NUL upon first call on the
-     * current regex.  This routine will set it to any charset modifier found.
-     * The caller shouldn't change it.  This way, another charset modifier
-     * encountered in the parse can be detected as an error, as we have decided
-     * allow only one */
+     * TRUE if the input should be treated as a valid flag, so the next char
+     * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+     * first call on the current regex.  This routine will set it to any
+     * charset modifier found.  The caller shouldn't change it.  This way,
+     * another charset modifier encountered in the parse can be detected as an
+     * error, as we have decided to allow only one */
 
     const char c = **s;
-
-    if (! strchr(valid_flags, c)) {
-        if (isALNUM(c)) {
-	    goto deprecate;
+    STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+    if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+        if (isALNUM_lazy_if(*s, UTF)) {
+            yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
+                       UTF ? SVf_UTF8 : 0);
+            (*s) += charlen;
+            /* Pretend that it worked, so will continue processing before
+             * dieing */
+            return TRUE;
         }
         return FALSE;
     }
@@ -8994,34 +9049,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
 	case LOCALE_PAT_MOD:
-
-	    /* In 5.14, qr//lt is legal but deprecated; the 't' means they
-	     * can't be regex modifiers.
-	     * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
-	     * much as easily done.  s///lei, for example, has to mean regex
-	     * modifiers if it's not an error (as does any word character
-	     * following the 'e').  Otherwise, we resolve to the backwards-
-	     * compatible, but less likely 's/// le ...', i.e. as meaning
-	     * less-than-or-equal.  The reason it's not likely is that s//
-	     * returns a number for code in the field (/r returns a string, but
-	     * that wasn't added until the 5.13 series), and so '<=' should be
-	     * used for comparing, not 'le'. */
-	    if (*((*s) + 1) == 't') {
-		goto deprecate;
-	    }
-	    else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
-
-		/* 'e' is valid only for substitutes, s///e.  If it is not
-		 * valid in the current context, then 'm//le' must mean the
-		 * comparison operator, so use the regular deprecation message.
-		 */
-		if (! strchr(valid_flags, 'e')) {
-		    goto deprecate;
-		}
-		Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-		    "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.18, it will be resolved the other way");
-		return FALSE;
-	    }
 	    if (*charset) {
 		goto multiple_charsets;
 	    }
@@ -9029,11 +9056,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
 	    *charset = c;
 	    break;
 	case UNICODE_PAT_MOD:
-	    /* In 5.14, qr//unless and qr//until are legal but deprecated; the
-	     * 'n' means they can't be regex modifiers */
-	    if (*((*s) + 1) == 'n') {
-		goto deprecate;
-	    }
 	    if (*charset) {
 		goto multiple_charsets;
 	    }
@@ -9041,12 +9063,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
 	    *charset = c;
 	    break;
 	case ASCII_RESTRICT_PAT_MOD:
-	    /* In 5.14, qr//and is legal but deprecated; the 'n' means they
-	     * can't be regex modifiers */
-	    if (*((*s) + 1) == 'n') {
-		goto deprecate;
-	    }
-
 	    if (! *charset) {
 		set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
 	    }
@@ -9076,11 +9092,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     (*s)++;
     return TRUE;
 
-    deprecate:
-	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
-	    "Having no space between pattern and following word is deprecated");
-        return FALSE;
-
     multiple_charsets:
 	if (*charset != c) {
 	    yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
@@ -9102,7 +9113,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
 {
     dVAR;
     PMOP *pm;
-    char *s = scan_str(start,!!PL_madskills,FALSE);
+    char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
     const char * const valid_flags =
 	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
     char charset = '\0';    /* character set modifier */
@@ -9112,6 +9123,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
 
     PERL_ARGS_ASSERT_SCAN_PAT;
 
+    /* this was only needed for the initial scan_str; set it to false
+     * so that any (?{}) code blocks etc are parsed normally */
+    PL_reg_state.re_reparsing = FALSE;
     if (!s) {
 	const char * const delimiter = skipspace(start);
 	Perl_croak(aTHX_
@@ -9147,6 +9161,25 @@ S_scan_pat(pTHX_ char *start, I32 type)
 #ifdef PERL_MAD
     modstart = s;
 #endif
+
+    /* if qr/...(?{..}).../, then need to parse the pattern within a new
+     * anon CV. False positives like qr/[(?{]/ are harmless */
+
+    if (type == OP_QR) {
+	STRLEN len;
+	char *e, *p = SvPV(PL_lex_stuff, len);
+	e = p + len;
+	for (; p < e; p++) {
+	    if (p[0] == '(' && p[1] == '?'
+		&& (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
+	    {
+		pm->op_pmflags |= PMf_HAS_CV;
+		break;
+	    }
+	}
+	pm->op_pmflags |= PMf_IS_QR;
+    }
+
     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
 #ifdef PERL_MAD
     if (PL_madskills && modstart != s) {
@@ -9183,7 +9216,7 @@ S_scan_subst(pTHX_ char *start)
 
     pl_yylval.ival = OP_NULL;
 
-    s = scan_str(start,!!PL_madskills,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
 
     if (!s)
 	Perl_croak(aTHX_ "Substitution pattern not terminated");
@@ -9201,7 +9234,7 @@ S_scan_subst(pTHX_ char *start)
 #endif
 
     first_start = PL_multi_start;
-    s = scan_str(s,!!PL_madskills,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
     if (!s) {
 	if (PL_lex_stuff) {
 	    SvREFCNT_dec(PL_lex_stuff);
@@ -9291,7 +9324,7 @@ S_scan_trans(pTHX_ char *start)
 
     pl_yylval.ival = OP_NULL;
 
-    s = scan_str(start,!!PL_madskills,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
     if (!s)
 	Perl_croak(aTHX_ "Transliteration pattern not terminated");
 
@@ -9307,7 +9340,7 @@ S_scan_trans(pTHX_ char *start)
     }
 #endif
 
-    s = scan_str(s,!!PL_madskills,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
     if (!s) {
 	if (PL_lex_stuff) {
 	    SvREFCNT_dec(PL_lex_stuff);
@@ -9699,7 +9732,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 
     if (d - PL_tokenbuf != len) {
 	pl_yylval.ival = OP_GLOB;
-	s = scan_str(start,!!PL_madskills,FALSE);
+	s = scan_str(start,!!PL_madskills,FALSE,FALSE);
 	if (!s)
 	   Perl_croak(aTHX_ "Glob not terminated");
 	return s;
@@ -9799,6 +9832,8 @@ intro_sym:
    takes: start position in buffer
 	  keep_quoted preserve \ on the embedded delimiter(s)
 	  keep_delims preserve the delimiters around the string
+	  re_reparse  compiling a run-time /(?{})/:
+			collapse // to /,  and skip encoding src
    returns: position to continue reading from buffer
    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
    	updates the read buffer.
@@ -9839,7 +9874,7 @@ intro_sym:
 */
 
 STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
 {
     dVAR;
     SV *sv;				/* scalar value: string */
@@ -9918,7 +9953,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     }
 #endif
     for (;;) {
-	if (PL_encoding && !UTF) {
+	if (PL_encoding && !UTF && !re_reparse) {
 	    bool cont = TRUE;
 
 	    while (cont) {
@@ -10000,9 +10035,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 		    CopLINE_inc(PL_curcop);
 		/* handle quoted delimiters */
 		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
-		    if (!keep_quoted && s[1] == term)
+		    if (!keep_quoted
+		        && (s[1] == term
+			    || (re_reparse && s[1] == '\\'))
+		    )
 			s++;
-		/* any other quotes are simply copied straight through */
+		    /* any other quotes are simply copied straight through */
 		    else
 			*to++ = *s++;
 		}
@@ -10103,7 +10141,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     /* at this point, we have successfully read the delimited string */
 
-    if (!PL_encoding || UTF) {
+    if (!PL_encoding || UTF || re_reparse) {
 #ifdef PERL_MAD
 	if (PL_madskills) {
 	    char * const tstart = SvPVX(PL_linestr) + stuffstart;
@@ -10135,7 +10173,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 	}
     }
 #endif
-    if (has_utf8 || PL_encoding)
+    if (has_utf8 || (PL_encoding && !re_reparse))
 	SvUTF8_on(sv);
 
     PL_multi_end = CopLINE(PL_curcop);
@@ -10740,18 +10778,6 @@ Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
 }
 
 int
-Perl_yyerror_sv(pTHX_ SV * sv, U32 flags)
-{
-    char *s;
-    STRLEN len;
-    PERL_ARGS_ASSERT_YYERROR_SV;
-    s = SvPV(sv, len);
-    if (SvUTF8(sv))
-       flags |= SVf_UTF8;
-    return yyerror_pvn(s, len, flags);
-}
-
-int
 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
 {
     dVAR;
@@ -11480,15 +11506,10 @@ Perl_parse_label(pTHX_ U32 flags)
     if (PL_lex_state == LEX_KNOWNEXT) {
 	PL_parser->yychar = yylex();
 	if (PL_parser->yychar == LABEL) {
-	    char *lpv = pl_yylval.pval;
-	    STRLEN llen = strlen(lpv);
 	    SV *lsv;
 	    PL_parser->yychar = YYEMPTY;
 	    lsv = newSV_type(SVt_PV);
-	    SvPV_set(lsv, lpv);
-	    SvCUR_set(lsv, llen);
-	    SvLEN_set(lsv, llen+1);
-	    SvPOK_on(lsv);
+	    sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
 	    return lsv;
 	} else {
 	    yyunlex();
@@ -11496,17 +11517,12 @@ Perl_parse_label(pTHX_ U32 flags)
 	}
     } else {
 	char *s, *t;
-	U8 c;
 	STRLEN wlen, bufptr_pos;
 	lex_read_space(0);
 	t = s = PL_bufptr;
-	c = (U8)*s;
-	if (!isIDFIRST_A(c))
+        if (!isIDFIRST_lazy_if(s, UTF))
 	    goto no_label;
-	do {
-	    c = (U8)*++t;
-	} while(isWORDCHAR_A(c));
-	wlen = t - s;
+	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
 	if (word_takes_any_delimeter(s, wlen))
 	    goto no_label;
 	bufptr_pos = s - SvPVX(PL_linestr);
@@ -11518,7 +11534,7 @@ Perl_parse_label(pTHX_ U32 flags)
 	    PL_oldoldbufptr = PL_oldbufptr;
 	    PL_oldbufptr = s;
 	    PL_bufptr = t+1;
-	    return newSVpvn(s, wlen);
+	    return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
 	} else {
 	    PL_bufptr = s;
 	    no_label:
@@ -11611,29 +11627,12 @@ Perl_parse_stmtseq(pTHX_ U32 flags)
     return stmtseqop;
 }
 
-void
-Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
-{
-    PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
-    deprecate("qw(...) as parentheses");
-    force_next((4<<24)|')');
-    if (qwlist->op_type == OP_STUB) {
-	op_free(qwlist);
-    }
-    else {
-	start_force(PL_curforce);
-	NEXTVAL_NEXTTOKE.opval = qwlist;
-	force_next(THING);
-    }
-    force_next((2<<24)|'(');
-}
-
 /*
  * Local variables:
  * c-indentation-style: bsd
  * c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
  * End:
  *
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
  */