This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix scanning for code blocks
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 346a39d..0570a15 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -148,6 +148,9 @@ static const char ident_too_long[] = "Identifier too long";
 /* LEX_* are values for PL_lex_state, the state of the lexer.
  * They are arranged oddly so that the guard on the switch statement
  * can get by with a single comparison (if the compiler is smart enough).
+ *
+ * These values refer to the various states within a sublex parse,
+ * i.e. within a double quotish string
  */
 
 /* #define LEX_NOTPARSING              11 is done in perl.h. */
@@ -359,7 +362,7 @@ static struct debug_tokens {
     { GIVEN,           TOKENTYPE_IVAL,         "GIVEN" },
     { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
     { IF,              TOKENTYPE_IVAL,         "IF" },
-    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
+    { LABEL,           TOKENTYPE_OPVAL,        "LABEL" },
     { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
     { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
     { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
@@ -537,24 +540,28 @@ S_no_op(pTHX_ const char *const what, char *s)
        s = oldbp;
     else
        PL_bufptr = s;
-    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), 0);
+    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
     if (ckWARN_d(WARN_SYNTAX)) {
        if (is_first)
            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                    "\t(Missing semicolon on previous line?)\n");
        else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
            const char *t;
-           for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
+           for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
+                                                            t += UTF ? UTF8SKIP(t) : 1)
                NOOP;
            if (t < PL_bufptr && isSPACE(*t))
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                       "\t(Do you need to predeclare %.*s?)\n",
-                   (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
+                       "\t(Do you need to predeclare %"SVf"?)\n",
+                   SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
+                                   SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
        }
        else {
            assert(s >= oldbp);
            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                   "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
+                   "\t(Missing operator before %"SVf"?)\n",
+                    SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
+                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
        }
     }
     PL_bufptr = oldbp;
@@ -2345,14 +2352,10 @@ S_tokeq(pTHX_ SV *sv)
  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
  * interact with PL_lex_state, and create fake ( ... ) argument lists
  * to handle functions and concatenation.
- * They assume that whoever calls them will be setting up a fake
- * join call, because each subthing puts a ',' after it.  This lets
- *   "lower \luPpEr"
- * become
- *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
- *
- * (I'm not sure whether the spurious commas at the end of lcfirst's
- * arguments and join's arguments are created or not).
+ * For example,
+ *   "foo\lbar"
+ * is tokenised as
+ *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
  */
 
 /*
@@ -2446,6 +2449,7 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_casemods);
     SAVEI32(PL_lex_starts);
     SAVEI8(PL_lex_state);
+    SAVEPPTR(PL_sublex_info.re_eval_start);
     SAVEVPTR(PL_lex_inpat);
     SAVEI16(PL_lex_inwhat);
     SAVECOPLINE(PL_curcop);
@@ -2462,6 +2466,7 @@ S_sublex_push(pTHX)
 
     PL_linestr = PL_lex_stuff;
     PL_lex_stuff = NULL;
+    PL_sublex_info.re_eval_start = NULL;
 
     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
        = SvPVX(PL_linestr);
@@ -2571,8 +2576,11 @@ S_sublex_done(pTHX)
 /*
   scan_const
 
-  Extracts a pattern, double-quoted string, or transliteration.  This
-  is terrifying code.
+  Extracts the next constant part of a pattern, double-quoted string,
+  or transliteration.  This is terrifying code.
+
+  For example, in parsing the double-quoted string "ab\x63$d", it would
+  stop at the '$' and return an OP_CONST containing 'abc'.
 
   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
   processing a pattern (PL_lex_inpat is true), a transliteration
@@ -2580,15 +2588,22 @@ S_sublex_done(pTHX)
 
   Returns a pointer to the character scanned up to. If this is
   advanced from the start pointer supplied (i.e. if anything was
-  successfully parsed), will leave an OP for the substring scanned
+  successfully parsed), will leave an OP_CONST for the substring scanned
   in pl_yylval. Caller must intuit reason for not parsing further
   by looking at the next characters herself.
 
   In patterns:
-    backslashes:
-      constants: \N{NAME} only
-      case and quoting: \U \Q \E
-    stops on @ and $, but not for $ as tail anchor
+    expand:
+      \N{ABC}  => \N{U+41.42.43}
+
+    pass through:
+       all other \-char, including \N and \N{ apart from \N{ABC}
+
+    stops on:
+       @ and $ where it appears to be a var, but not for $ as tail anchor
+        \l \L \u \U \Q \E
+       (?{  or  (??{
+
 
   In transliterations:
     characters are VERY literal, except for - not at the start or end
@@ -2618,7 +2633,7 @@ S_sublex_done(pTHX)
   it's a tail anchor if $ is the last thing in the string, or if it's
   followed by one of "()| \r\n\t"
 
-  \1 (backreferences) are turned into $1
+  \1 (backreferences) are turned into $1 in substitutions
 
   The structure of the code is
       while (there's a character to process) {
@@ -2657,6 +2672,7 @@ S_scan_const(pTHX_ char *start)
     register char *d = SvPVX(sv);              /* destination for copies */
     bool dorange = FALSE;                      /* are we in a translit range? */
     bool didrange = FALSE;                     /* did we just finish a range? */
+    bool in_charclass = FALSE;                 /* within /[...]/ */
     bool has_utf8 = FALSE;                     /* Output constant is UTF8 */
     bool  this_utf8 = cBOOL(UTF);              /* Is the source string assumed
                                                   to be UTF8?  But, this can
@@ -2846,33 +2862,26 @@ S_scan_const(pTHX_ char *start)
 
        /* if we get here, we're not doing a transliteration */
 
-       /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
-          except for the last char, which will be done separately. */
+       else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\'))
+           in_charclass = FALSE;
+
+       else if (PL_lex_inpat && *s == '[')
+           in_charclass = TRUE;
+
+       /* skip for regexp comments /(?#comment)/, except for the last
+        * char, which will be done separately.
+        * Stop on (?{..}) and friends */
+
        else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
            if (s[2] == '#') {
                while (s+1 < send && *s != ')')
                    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
            }
-           else if (s[2] == '{' /* This should match regcomp.c */
-                   || (s[2] == '?' && s[3] == '{'))
+           else if (!PL_lex_casemods && !in_charclass &&
+                    (    s[2] == '{' /* This should match regcomp.c */
+                     || (s[2] == '?' && s[3] == '{')))
            {
-               I32 count = 1;
-               char *regparse = s + (s[2] == '{' ? 3 : 4);
-               char c;
-
-               while (count && (c = *regparse)) {
-                   if (c == '\\' && regparse[1])
-                       regparse++;
-                   else if (c == '{')
-                       count++;
-                   else if (c == '}')
-                       count--;
-                   regparse++;
-               }
-               if (*regparse != ')')
-                   regparse--;         /* Leave one char for continuation. */
-               while (s < regparse)
-                   *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+               break;
            }
        }
 
@@ -2883,6 +2892,10 @@ S_scan_const(pTHX_ char *start)
                *d++ = NATIVE_TO_NEED(has_utf8,*s++);
        }
 
+       /* no further processing of single-quoted regex */
+       else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+           goto default_action;
+
        /* check for embedded arrays
           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
           */
@@ -3552,6 +3565,9 @@ S_scan_const(pTHX_ char *start)
            } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
                type = "s";
                typelen = 1;
+           } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+               type = "q";
+               typelen = 1;
            } else  {
                type = "qq";
                typelen = 2;
@@ -4227,6 +4243,7 @@ Perl_madlex(pTHX)
     case FUNC0SUB:
     case UNIOPSUB:
     case LSTOPSUB:
+    case LABEL:
        if (pl_yylval.opval)
            append_madprops(PL_thismad, pl_yylval.opval, 0);
        PL_thismad = 0;
@@ -4287,10 +4304,6 @@ Perl_madlex(pTHX)
        }
        break;
 
-    /* pval */
-    case LABEL:
-       break;
-
     /* ival */
     default:
        break;
@@ -4614,7 +4627,7 @@ Perl_yylex(pTHX)
     case LEX_INTERPSTART:
        if (PL_bufptr == PL_bufend)
            return REPORT(sublex_done());
-       DEBUG_T({ PerlIO_printf(Perl_debug_log,
+       DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
               "### Interpolated variable\n"); });
        PL_expect = XTERM;
        PL_lex_dojoin = (*PL_bufptr == '@');
@@ -4635,6 +4648,18 @@ Perl_yylex(pTHX)
            NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
            force_next(FUNC);
        }
+       /* Convert (?{...}) and friends to 'do {...}' */
+       if (PL_lex_inpat && *PL_bufptr == '(') {
+           PL_sublex_info.re_eval_start = PL_bufptr;
+           PL_bufptr += 2;
+           if (*PL_bufptr != '{')
+               PL_bufptr++;
+           start_force(PL_curforce);
+           /* XXX probably need a CURMAD(something) here */
+           PL_expect = XTERMBLOCK;
+           force_next(DO);
+       }
+
        if (PL_lex_starts++) {
            s = PL_bufptr;
 #ifdef PERL_MAD
@@ -4680,6 +4705,24 @@ Perl_yylex(pTHX)
                Perl_croak(aTHX_ "Bad evalled substitution pattern");
            PL_lex_repl = NULL;
        }
+       if (PL_sublex_info.re_eval_start) {
+           if (*PL_bufptr != ')')
+               Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+           PL_bufptr++;
+           /* having compiled a (?{..}) expression, return the original
+            * text too, as a const */
+           start_force(PL_curforce);
+           /* XXX probably need a CURMAD(something) here */
+           NEXTVAL_NEXTTOKE.opval =
+                   (OP*)newSVOP(OP_CONST, 0,
+                       newSVpvn(PL_sublex_info.re_eval_start,
+                               PL_bufptr - PL_sublex_info.re_eval_start));
+           force_next(THING);
+           PL_sublex_info.re_eval_start = NULL;
+           PL_expect = XTERM;
+           return REPORT(',');
+       }
+
        /* FALLTHROUGH */
     case LEX_INTERPCONCAT:
 #ifdef DEBUGGING
@@ -4690,12 +4733,10 @@ Perl_yylex(pTHX)
        if (PL_bufptr == PL_bufend)
            return REPORT(sublex_done());
 
-       if (SvIVX(PL_linestr) == '\'') {
+       /* m'foo' still needs to be parsed for possible (?{...}) */
+       if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
            SV *sv = newSVsv(PL_linestr);
-           if (!PL_lex_inpat)
-               sv = tokeq(sv);
-           else if ( PL_hints & HINT_NEW_RE )
-               sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+           sv = tokeq(sv);
            pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
            s = PL_bufend;
        }
@@ -4761,7 +4802,12 @@ Perl_yylex(pTHX)
        if (isIDFIRST_lazy_if(s,UTF))
            goto keylookup;
        {
-        unsigned char c = *s;
+        SV *dsv = newSVpvs_flags("", SVs_TEMP);
+        const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
+                                                    UTF8SKIP(s),
+                                                    SVs_TEMP | SVf_UTF8),
+                                            10, UNI_DISPLAY_ISPRINT))
+                            : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
@@ -4769,7 +4815,10 @@ Perl_yylex(pTHX)
             d = PL_linestart;
         }      
         *s = '\0';
-        Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
+        sv_setpv(dsv, d);
+        if (UTF)
+            SvUTF8_on(dsv);
+        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
     }
     case 4:
     case 26:
@@ -6172,10 +6221,12 @@ Perl_yylex(pTHX)
                                              &len);
                                while (isSPACE(*t))
                                    t++;
-                               if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
+                               if (*t == ';'
+                                       && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
                                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                               "You need to quote \"%s\"",
-                                               tmpbuf);
+                                               "You need to quote \"%"SVf"\"",
+                                                 SVfARG(newSVpvn_flags(tmpbuf, len, 
+                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
                            }
                        }
                }
@@ -6254,15 +6305,17 @@ Perl_yylex(pTHX)
                if (ckWARN(WARN_SYNTAX)) {
                    const char *t = s + 1;
                    while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
-                       t++;
+                       t += UTF ? UTF8SKIP(t) : 1;
                    if (*t == '}' || *t == ']') {
                        t++;
                        PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                           "Scalar value %.*s better written as $%.*s",
-                           (int)(t-PL_bufptr), PL_bufptr,
-                           (int)(t-PL_bufptr-1), PL_bufptr+1);
+                           "Scalar value %"SVf" better written as $%"SVf,
+                           SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
+                            SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
                    }
                }
            }
@@ -6557,7 +6610,9 @@ Perl_yylex(pTHX)
        if (!anydelim && PL_expect == XSTATE
              && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
            s = d + 1;
-           pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
+           pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+                                            newSVpvn_flags(PL_tokenbuf,
+                                                        len, UTF ? SVf_UTF8 : 0));
            CLINE;
            TOKEN(LABEL);
        }
@@ -6643,7 +6698,9 @@ Perl_yylex(pTHX)
                    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                  TRUE, &morelen);
                    if (!morelen)
-                       Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
+                       Perl_croak(aTHX_ "Bad name after %"SVf"%s",
+                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
                                *s == '\'' ? "'" : "::");
                    len += morelen;
                    pkgname = 1;
@@ -6669,8 +6726,9 @@ Perl_yylex(pTHX)
                    if (ckWARN(WARN_BAREWORD)
                        && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                        Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-                           "Bareword \"%s\" refers to nonexistent package",
-                            PL_tokenbuf);
+                           "Bareword \"%"SVf"\" refers to nonexistent package",
+                            SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
                    len -= 2;
                    PL_tokenbuf[len] = '\0';
                    gv = NULL;
@@ -6851,10 +6909,12 @@ Perl_yylex(pTHX)
                /* Not a method, so call it a subroutine (if defined) */
 
                if (cv) {
-                   if (lastchar == '-')
-                       Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                        "Ambiguous use of -%s resolved as -&%s()",
-                                        PL_tokenbuf, PL_tokenbuf);
+                   if (lastchar == '-') {
+                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+                       Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+                               "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
+                               SVfARG(tmpsv), SVfARG(tmpsv));
+                    }
                    /* Check for a constant sub */
                    if ((sv = cv_const_sv(cv))) {
                  its_constant:
@@ -7026,8 +7086,10 @@ Perl_yylex(pTHX)
            safe_bareword:
                if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
                    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                    "Operator or semicolon missing before %c%s",
-                                    lastchar, PL_tokenbuf);
+                                    "Operator or semicolon missing before %c%"SVf,
+                                    lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
+                                                    strlen(PL_tokenbuf),
+                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
                    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
                                     "Ambiguous use of %c resolved as operator %c",
                                     lastchar, lastchar);
@@ -7178,7 +7240,9 @@ Perl_yylex(pTHX)
                d = s;
                s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
                if (!(tmp = keyword(PL_tokenbuf, len, 1)))
-                   Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
+                   Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
+                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
                if (tmp < 0)
                    tmp = -tmp;
                else if (tmp == KEY_require || tmp == KEY_do
@@ -7657,7 +7721,7 @@ Perl_yylex(pTHX)
                    char tmpbuf[1024];
                    PL_bufptr = s;
                    my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
-                   yyerror(tmpbuf);
+                   yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
                }
 #ifdef PERL_MAD
                if (PL_madskills) {     /* just add type to declarator token */
@@ -7697,8 +7761,14 @@ Perl_yylex(pTHX)
            s = SKIPSPACE1(s);
            if (isIDFIRST_lazy_if(s,UTF)) {
                const char *t;
-               for (d = s; isALNUM_lazy_if(d,UTF);)
-                   d++;
+               for (d = s; isALNUM_lazy_if(d,UTF);) {
+                   d += UTF ? UTF8SKIP(d) : 1;
+                    if (UTF) {
+                        while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
+                            d += UTF ? UTF8SKIP(d) : 1;
+                        }
+                    }
+                }
                for (t=d; isSPACE(*t);)
                    t++;
                if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7707,10 +7777,11 @@ Perl_yylex(pTHX)
                    && !(t[0] == ':' && t[1] == ':')
                    && !keyword(s, d-s, 0)
                ) {
-                   int parms_len = (int)(d-s);
+                   SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
                    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-                          "Precedence problem: open %.*s should be open(%.*s)",
-                           parms_len, s, parms_len, s);
+                          "Precedence problem: open %"SVf" should be open(%"SVf")",
+                           SVfARG(tmpsv), SVfARG(tmpsv));
                }
            }
            LOP(OP_OPEN,XTERM);
@@ -8199,9 +8270,13 @@ Perl_yylex(pTHX)
                                    "Illegal character %sin prototype for %"SVf" : %s",
                                    seen_underscore ? "after '_' " : "",
                                    SVfARG(PL_subname),
-                                    sv_uni_display(dsv,
-                                         newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
-                                         tmp, UNI_DISPLAY_ISPRINT));
+                                    SvUTF8(PL_lex_stuff)
+                                        ? sv_uni_display(dsv,
+                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
+                                            tmp,
+                                            UNI_DISPLAY_ISPRINT)
+                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
+                                            PERL_PV_ESCAPE_NONASCII));
                     }
                     SvCUR_set(PL_lex_stuff, tmp);
                    have_proto = TRUE;
@@ -8454,15 +8529,16 @@ S_pending_ident(pTHX)
     if (PL_in_my) {
         if (PL_in_my == KEY_our) {     /* "our" is merely analogous to "my" */
             if (has_colon)
-                yyerror(Perl_form(aTHX_ "No package name allowed for "
+                yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
                                   "variable %s in \"our\"",
-                                  PL_tokenbuf));
+                                  PL_tokenbuf), UTF ? SVf_UTF8 : 0);
             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
         }
         else {
             if (has_colon)
-                yyerror(Perl_form(aTHX_ PL_no_myglob,
-                           PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
+                yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
+                           PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+                            UTF ? SVf_UTF8 : 0);
 
             pl_yylval.opval = newOP(OP_PADANY, 0);
             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
@@ -8549,8 +8625,9 @@ S_pending_ident(pTHX)
         {
             /* Downgraded from fatal to warning 20000522 mjd */
             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Possible unintended interpolation of %s in string",
-                       PL_tokenbuf);
+                       "Possible unintended interpolation of %"SVf" in string",
+                       SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
+                                        SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
         }
     }
 
@@ -8603,9 +8680,10 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     while (s < PL_bufend && isSPACE(*s))
        s++;
     if (isIDFIRST_lazy_if(s,UTF)) {
-       const char * const w = s++;
+       const char * const w = s;
+        s += UTF ? UTF8SKIP(s) : 1;
        while (isALNUM_lazy_if(s,UTF))
-           s++;
+           s += UTF ? UTF8SKIP(s) : 1;
        while (s < PL_bufend && isSPACE(*s))
            s++;
        if (*s == ',') {
@@ -8758,7 +8836,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
     for (;;) {
        if (d >= e)
            Perl_croak(aTHX_ ident_too_long);
-       if (isALNUM(*s))        /* UTF handled below */
+       if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
            *d++ = *s++;
        else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
            *d++ = ':';
@@ -8854,8 +8932,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
        bracket = s;
        s++;
     }
-    else if (ck_uni)
-       check_uni();
     if (s < send) {
         if (UTF) {
             const STRLEN skip = UTF8SKIP(s);
@@ -8873,6 +8949,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
        *d = toCTRL(*s);
        s++;
     }
+    else if (ck_uni && !bracket)
+       check_uni();
     if (bracket) {
        if (isSPACE(s[-1])) {
            while (s < send) {
@@ -8943,13 +9021,15 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
            if (PL_lex_state == LEX_NORMAL) {
                if (ckWARN(WARN_AMBIGUOUS) &&
                    (keyword(dest, d - dest, 0)
-                    || get_cvn_flags(dest, d - dest, 0)))
+                    || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
                {
+                    SV *tmp = newSVpvn_flags( dest, d - dest,
+                                            SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
                    if (funny == '#')
                        funny = '@';
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Ambiguous use of %c{%s} resolved to %c%s",
-                       funny, dest, funny, dest);
+                       "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
+                       funny, tmp, funny, tmp);
                }
            }
        }
@@ -8974,13 +9054,15 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
      * current regex.  This routine will set it to any charset modifier found.
      * The caller shouldn't change it.  This way, another charset modifier
      * encountered in the parse can be detected as an error, as we have decided
-     * allow only one */
+     * to allow only one */
 
     const char c = **s;
 
     if (! strchr(valid_flags, c)) {
         if (isALNUM(c)) {
-           goto deprecate;
+           yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c));
+            (*s)++;
+            return TRUE;
         }
         return FALSE;
     }
@@ -8994,34 +9076,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
        case LOCALE_PAT_MOD:
-
-           /* In 5.14, qr//lt is legal but deprecated; the 't' means they
-            * can't be regex modifiers.
-            * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
-            * much as easily done.  s///lei, for example, has to mean regex
-            * modifiers if it's not an error (as does any word character
-            * following the 'e').  Otherwise, we resolve to the backwards-
-            * compatible, but less likely 's/// le ...', i.e. as meaning
-            * less-than-or-equal.  The reason it's not likely is that s//
-            * returns a number for code in the field (/r returns a string, but
-            * that wasn't added until the 5.13 series), and so '<=' should be
-            * used for comparing, not 'le'. */
-           if (*((*s) + 1) == 't') {
-               goto deprecate;
-           }
-           else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
-
-               /* 'e' is valid only for substitutes, s///e.  If it is not
-                * valid in the current context, then 'm//le' must mean the
-                * comparison operator, so use the regular deprecation message.
-                */
-               if (! strchr(valid_flags, 'e')) {
-                   goto deprecate;
-               }
-               Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                   "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.18, it will be resolved the other way");
-               return FALSE;
-           }
            if (*charset) {
                goto multiple_charsets;
            }
@@ -9029,11 +9083,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
            *charset = c;
            break;
        case UNICODE_PAT_MOD:
-           /* In 5.14, qr//unless and qr//until are legal but deprecated; the
-            * 'n' means they can't be regex modifiers */
-           if (*((*s) + 1) == 'n') {
-               goto deprecate;
-           }
            if (*charset) {
                goto multiple_charsets;
            }
@@ -9041,12 +9090,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
            *charset = c;
            break;
        case ASCII_RESTRICT_PAT_MOD:
-           /* In 5.14, qr//and is legal but deprecated; the 'n' means they
-            * can't be regex modifiers */
-           if (*((*s) + 1) == 'n') {
-               goto deprecate;
-           }
-
            if (! *charset) {
                set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
            }
@@ -9076,11 +9119,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     (*s)++;
     return TRUE;
 
-    deprecate:
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
-           "Having no space between pattern and following word is deprecated");
-        return FALSE;
-
     multiple_charsets:
        if (*charset != c) {
            yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
@@ -9147,6 +9185,25 @@ S_scan_pat(pTHX_ char *start, I32 type)
 #ifdef PERL_MAD
     modstart = s;
 #endif
+
+    /* if qr/...(?{..}).../, then need to parse the pattern within a new
+     * anon CV. False positives like qr/[(?{]/ are harmless */
+
+    if (type == OP_QR) {
+       STRLEN len;
+       char *e, *p = SvPV(PL_lex_stuff, len);
+       e = p + len;
+       for (; p < e; p++) {
+           if (p[0] == '(' && p[1] == '?'
+               && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
+           {
+               pm->op_pmflags |= PMf_HAS_CV;
+               break;
+           }
+       }
+       pm->op_pmflags |= PMf_IS_QR;
+    }
+
     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
 #ifdef PERL_MAD
     if (PL_madskills && modstart != s) {
@@ -11468,15 +11525,10 @@ Perl_parse_label(pTHX_ U32 flags)
     if (PL_lex_state == LEX_KNOWNEXT) {
        PL_parser->yychar = yylex();
        if (PL_parser->yychar == LABEL) {
-           char *lpv = pl_yylval.pval;
-           STRLEN llen = strlen(lpv);
            SV *lsv;
            PL_parser->yychar = YYEMPTY;
            lsv = newSV_type(SVt_PV);
-           SvPV_set(lsv, lpv);
-           SvCUR_set(lsv, llen);
-           SvLEN_set(lsv, llen+1);
-           SvPOK_on(lsv);
+           sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
            return lsv;
        } else {
            yyunlex();
@@ -11484,17 +11536,12 @@ Perl_parse_label(pTHX_ U32 flags)
        }
     } else {
        char *s, *t;
-       U8 c;
        STRLEN wlen, bufptr_pos;
        lex_read_space(0);
        t = s = PL_bufptr;
-       c = (U8)*s;
-       if (!isIDFIRST_A(c))
+        if (!isIDFIRST_lazy_if(s, UTF))
            goto no_label;
-       do {
-           c = (U8)*++t;
-       } while(isWORDCHAR_A(c));
-       wlen = t - s;
+       t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
        if (word_takes_any_delimeter(s, wlen))
            goto no_label;
        bufptr_pos = s - SvPVX(PL_linestr);
@@ -11506,7 +11553,7 @@ Perl_parse_label(pTHX_ U32 flags)
            PL_oldoldbufptr = PL_oldbufptr;
            PL_oldbufptr = s;
            PL_bufptr = t+1;
-           return newSVpvn(s, wlen);
+           return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
        } else {
            PL_bufptr = s;
            no_label:
@@ -11599,29 +11646,12 @@ Perl_parse_stmtseq(pTHX_ U32 flags)
     return stmtseqop;
 }
 
-void
-Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
-{
-    PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
-    deprecate("qw(...) as parentheses");
-    force_next((4<<24)|')');
-    if (qwlist->op_type == OP_STUB) {
-       op_free(qwlist);
-    }
-    else {
-       start_force(PL_curforce);
-       NEXTVAL_NEXTTOKE.opval = qwlist;
-       force_next(THING);
-    }
-    force_next((2<<24)|'(');
-}
-
 /*
  * Local variables:
  * c-indentation-style: bsd
  * c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
  * End:
  *
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
  */