This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Version bumps
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index b05ad96..7b5c465 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -133,7 +133,7 @@ static const char ident_too_long[] = "Identifier too long";
 #ifdef USE_UTF8_SCRIPTS
 #   define UTF (!IN_BYTES)
 #else
 #ifdef USE_UTF8_SCRIPTS
 #   define UTF (!IN_BYTES)
 #else
-#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
+#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 #endif
 
 /* The maximum number of characters preceding the unrecognized one to display */
 #endif
 
 /* The maximum number of characters preceding the unrecognized one to display */
@@ -669,9 +669,8 @@ from which code will be read to be parsed.  If both are non-null, the
 code in I<line> comes first and must consist of complete lines of input,
 and I<rsfp> supplies the remainder of the source.
 
 code in I<line> comes first and must consist of complete lines of input,
 and I<rsfp> supplies the remainder of the source.
 
-The I<flags> parameter is reserved for future use, and must always
-be zero, except for one flag that is currently reserved for perl's internal
-use.
+The I<flags> parameter is reserved for future use.  Currently it is only
+used by perl internally, so extensions should always pass zero.
 
 =cut
 */
 
 =cut
 */
@@ -684,9 +683,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 {
     dVAR;
     const char *s = NULL;
 {
     dVAR;
     const char *s = NULL;
-    STRLEN len;
     yy_parser *parser, *oparser;
     yy_parser *parser, *oparser;
-    if (flags && flags != LEX_START_SAME_FILTER)
+    if (flags && flags & ~LEX_START_FLAGS)
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 
     /* create and initialise a parser */
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 
     /* create and initialise a parser */
@@ -717,25 +715,27 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->rsfp = rsfp;
     parser->rsfp_filters =
       !(flags & LEX_START_SAME_FILTER) || !oparser
     parser->rsfp = rsfp;
     parser->rsfp_filters =
       !(flags & LEX_START_SAME_FILTER) || !oparser
-        ? newAV()
-        : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
+        ? NULL
+        : MUTABLE_AV(SvREFCNT_inc(
+            oparser->rsfp_filters
+             ? oparser->rsfp_filters
+             : (oparser->rsfp_filters = newAV())
+          ));
 
     Newx(parser->lex_brackstack, 120, char);
     Newx(parser->lex_casestack, 12, char);
     *parser->lex_casestack = '\0';
 
     if (line) {
 
     Newx(parser->lex_brackstack, 120, char);
     Newx(parser->lex_casestack, 12, char);
     *parser->lex_casestack = '\0';
 
     if (line) {
+       STRLEN len;
        s = SvPV_const(line, len);
        s = SvPV_const(line, len);
+       parser->linestr = flags & LEX_START_COPIED
+                           ? SvREFCNT_inc_simple_NN(line)
+                           : newSVpvn_flags(s, len, SvUTF8(line));
+       if (!len || s[len-1] != ';')
+           sv_catpvs(parser->linestr, "\n;");
     } else {
     } else {
-       len = 0;
-    }
-
-    if (!len) {
        parser->linestr = newSVpvs("\n;");
        parser->linestr = newSVpvs("\n;");
-    } else {
-       parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
-       if (s[len-1] != ';')
-           sv_catpvs(parser->linestr, "\n;");
     }
     parser->oldoldbufptr =
        parser->oldbufptr =
     }
     parser->oldoldbufptr =
        parser->oldbufptr =
@@ -743,8 +743,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
        parser->linestart = SvPVX(parser->linestr);
     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
     parser->last_lop = parser->last_uni = NULL;
        parser->linestart = SvPVX(parser->linestr);
     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
     parser->last_lop = parser->last_uni = NULL;
+    parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES);
 
 
-    parser->in_pod = 0;
+    parser->in_pod = parser->filtered = 0;
 }
 
 
 }
 
 
@@ -1262,7 +1263,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     }
     if (flags & LEX_FAKE_EOF) {
        goto eof;
     }
     if (flags & LEX_FAKE_EOF) {
        goto eof;
-    } else if (!PL_parser->rsfp) {
+    } else if (!PL_parser->rsfp && !PL_parser->filtered) {
        got_some = 0;
     } else if (filter_gets(linestr, old_bufend_pos)) {
        got_some = 1;
        got_some = 0;
     } else if (filter_gets(linestr, old_bufend_pos)) {
        got_some = 1;
@@ -1279,7 +1280,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
        else if (PL_parser->rsfp)
            (void)PerlIO_close(PL_parser->rsfp);
        PL_parser->rsfp = NULL;
        else if (PL_parser->rsfp)
            (void)PerlIO_close(PL_parser->rsfp);
        PL_parser->rsfp = NULL;
-       PL_parser->in_pod = 0;
+       PL_parser->in_pod = PL_parser->filtered = 0;
 #ifdef PERL_MAD
        if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
            PL_faketokens = 1;
 #ifdef PERL_MAD
        if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
            PL_faketokens = 1;
@@ -1584,7 +1585,7 @@ S_incline(pTHX_ const char *s)
            tmplen = 0;
        }
 
            tmplen = 0;
        }
 
-       if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
+       if (!PL_rsfp && !PL_parser->filtered) {
            /* must copy *{"::_<(eval N)[oldfilename:L]"}
             * to *{"::_<newfilename"} */
            /* However, the long form of evals is only turned on by the
            /* must copy *{"::_<(eval N)[oldfilename:L]"}
             * to *{"::_<newfilename"} */
            /* However, the long form of evals is only turned on by the
@@ -3739,7 +3740,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
            return 0;
        if (cv) {
            if (SvPOK(cv)) {
            return 0;
        if (cv) {
            if (SvPOK(cv)) {
-               const char *proto = SvPVX_const(cv);
+               const char *proto = CvPROTO(cv);
                if (proto) {
                    if (*proto == ';')
                        proto++;
                if (proto) {
                    if (*proto == ';')
                        proto++;
@@ -3838,6 +3839,9 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
     if (!PL_parser)
        return NULL;
 
     if (!PL_parser)
        return NULL;
 
+    if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
+       Perl_croak(aTHX_ "Source filters apply only to byte streams");
+
     if (!PL_rsfp_filters)
        PL_rsfp_filters = newAV();
     if (!datasv)
     if (!PL_rsfp_filters)
        PL_rsfp_filters = newAV();
     if (!datasv)
@@ -3850,6 +3854,45 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
                          SvPV_nolen(datasv)));
     av_unshift(PL_rsfp_filters, 1);
     av_store(PL_rsfp_filters, 0, datasv) ;
                          SvPV_nolen(datasv)));
     av_unshift(PL_rsfp_filters, 1);
     av_store(PL_rsfp_filters, 0, datasv) ;
+    if (
+       !PL_parser->filtered
+     && PL_parser->lex_flags & LEX_EVALBYTES
+     && PL_bufptr < PL_bufend
+    ) {
+       const char *s = PL_bufptr;
+       while (s < PL_bufend) {
+           if (*s == '\n') {
+               SV *linestr = PL_parser->linestr;
+               char *buf = SvPVX(linestr);
+               STRLEN const bufptr_pos = PL_parser->bufptr - buf;
+               STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
+               STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
+               STRLEN const linestart_pos = PL_parser->linestart - buf;
+               STRLEN const last_uni_pos =
+                   PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+               STRLEN const last_lop_pos =
+                   PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+               av_push(PL_rsfp_filters, linestr);
+               PL_parser->linestr = 
+                   newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
+               buf = SvPVX(PL_parser->linestr);
+               PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
+               PL_parser->bufptr = buf + bufptr_pos;
+               PL_parser->oldbufptr = buf + oldbufptr_pos;
+               PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+               PL_parser->linestart = buf + linestart_pos;
+               if (PL_parser->last_uni)
+                   PL_parser->last_uni = buf + last_uni_pos;
+               if (PL_parser->last_lop)
+                   PL_parser->last_lop = buf + last_lop_pos;
+               SvLEN(linestr) = SvCUR(linestr);
+               SvCUR(linestr) = s-SvPVX(linestr);
+               PL_parser->filtered = 1;
+               break;
+           }
+           s++;
+       }
+    }
     return(datasv);
 }
 
     return(datasv);
 }
 
@@ -3892,7 +3935,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
     /* This API is bad. It should have been using unsigned int for maxlen.
        Not sure if we want to change the API, but if not we should sanity
        check the value here.  */
     /* This API is bad. It should have been using unsigned int for maxlen.
        Not sure if we want to change the API, but if not we should sanity
        check the value here.  */
-    const unsigned int correct_length
+    unsigned int correct_length
        = maxlen < 0 ?
 #ifdef PERL_MICRO
        0x7FFFFFFF
        = maxlen < 0 ?
 #ifdef PERL_MICRO
        0x7FFFFFFF
@@ -3944,6 +3987,31 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
                              idx));
        return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
     }
                              idx));
        return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
     }
+    if (SvTYPE(datasv) != SVt_PVIO) {
+       if (correct_length) {
+           /* Want a block */
+           const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
+           if (!remainder) return 0; /* eof */
+           if (correct_length > remainder) correct_length = remainder;
+           sv_catpvn(buf_sv, SvEND(datasv), correct_length);
+           SvCUR_set(datasv, SvCUR(datasv) + correct_length);
+       } else {
+           /* Want a line */
+           const char *s = SvEND(datasv);
+           const char *send = SvPVX(datasv) + SvLEN(datasv);
+           while (s < send) {
+               if (*s == '\n') {
+                   s++;
+                   break;
+               }
+               s++;
+           }
+           if (s == send) return 0; /* eof */
+           sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
+           SvCUR_set(datasv, s-SvPVX(datasv));
+       }
+       return SvCUR(buf_sv);
+    }
     /* Get function pointer hidden within datasv       */
     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
     DEBUG_P(PerlIO_printf(Perl_debug_log,
     /* Get function pointer hidden within datasv       */
     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
     DEBUG_P(PerlIO_printf(Perl_debug_log,
@@ -3992,20 +4060,20 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
 
     if (len > 2 &&
         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
 
     if (len > 2 &&
         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
-        (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
+        (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
     {
         return GvHV(gv);                       /* Foo:: */
     }
 
     /* use constant CLASS => 'MyClass' */
     {
         return GvHV(gv);                       /* Foo:: */
     }
 
     /* use constant CLASS => 'MyClass' */
-    gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
+    gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
     if (gv && GvCV(gv)) {
        SV * const sv = cv_const_sv(GvCV(gv));
        if (sv)
             pkgname = SvPV_const(sv, len);
     }
 
     if (gv && GvCV(gv)) {
        SV * const sv = cv_const_sv(GvCV(gv));
        if (sv)
             pkgname = SvPV_const(sv, len);
     }
 
-    return gv_stashpvn(pkgname, len, 0);
+    return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
 }
 
 /*
 }
 
 /*
@@ -4096,7 +4164,7 @@ Perl_madlex(pTHX)
        }
 
        /* put off final whitespace till peg */
        }
 
        /* put off final whitespace till peg */
-       if (optype == ';' && !PL_rsfp) {
+       if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
            PL_nextwhite = PL_thiswhite;
            PL_thiswhite = 0;
        }
            PL_nextwhite = PL_thiswhite;
            PL_thiswhite = 0;
        }
@@ -4680,7 +4748,7 @@ Perl_yylex(pTHX)
        if (PL_madskills)
            PL_faketokens = 0;
 #endif
        if (PL_madskills)
            PL_faketokens = 0;
 #endif
-       if (!PL_rsfp) {
+       if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
            PL_last_uni = 0;
            PL_last_lop = 0;
            if (PL_lex_brackets &&
            PL_last_uni = 0;
            PL_last_lop = 0;
            if (PL_lex_brackets &&
@@ -4827,7 +4895,7 @@ Perl_yylex(pTHX)
                    PL_parser->in_pod = 0;
                }
            }
                    PL_parser->in_pod = 0;
                }
            }
-           if (PL_rsfp)
+           if (PL_rsfp || PL_parser->filtered)
                incline(s);
        } while (PL_parser->in_pod);
        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
                incline(s);
        } while (PL_parser->in_pod);
        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
@@ -5053,15 +5121,17 @@ Perl_yylex(pTHX)
        if (PL_madskills)
            PL_faketokens = 0;
 #endif
        if (PL_madskills)
            PL_faketokens = 0;
 #endif
-       if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
-           if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
+       if (PL_lex_state != LEX_NORMAL ||
+            (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
+           if (*s == '#' && s == PL_linestart && PL_in_eval
+            && !PL_rsfp && !PL_parser->filtered) {
                /* handle eval qq[#line 1 "foo"\n ...] */
                CopLINE_dec(PL_curcop);
                incline(s);
            }
            if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
                s = SKIPSPACE0(s);
                /* handle eval qq[#line 1 "foo"\n ...] */
                CopLINE_dec(PL_curcop);
                incline(s);
            }
            if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
                s = SKIPSPACE0(s);
-               if (!PL_in_eval || PL_rsfp)
+               if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
                    incline(s);
            }
            else {
                    incline(s);
            }
            else {
@@ -5382,7 +5452,7 @@ Perl_yylex(pTHX)
                        break;
                    }
                }
                        break;
                    }
                }
-               sv = newSVpvn(s, len);
+               sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
                if (*d == '(') {
                    d = scan_str(d,TRUE,TRUE);
                    if (!d) {
                if (*d == '(') {
                    d = scan_str(d,TRUE,TRUE);
                    if (!d) {
@@ -5838,7 +5908,7 @@ Perl_yylex(pTHX)
            if (PL_expect == XSTATE && isALPHA(tmp) &&
                (s == PL_linestart+1 || s[-2] == '\n') )
                {
            if (PL_expect == XSTATE && isALPHA(tmp) &&
                (s == PL_linestart+1 || s[-2] == '\n') )
                {
-                   if (PL_in_eval && !PL_rsfp) {
+                   if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
                        d = PL_bufend;
                        while (s < d) {
                            if (*s++ == '\n') {
                        d = PL_bufend;
                        while (s < d) {
                            if (*s++ == '\n') {
@@ -6344,7 +6414,8 @@ Perl_yylex(pTHX)
            else if (!isALPHA(*start) && (PL_expect == XTERM
                        || PL_expect == XREF || PL_expect == XSTATE
                        || PL_expect == XTERMORDORDOR)) {
            else if (!isALPHA(*start) && (PL_expect == XTERM
                        || PL_expect == XREF || PL_expect == XSTATE
                        || PL_expect == XTERMORDORDOR)) {
-               GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
+               GV *const gv = gv_fetchpvn_flags(s, start - s,
+                                                    UTF ? SVf_UTF8 : 0, SVt_PVCV);
                if (!gv) {
                    s = scan_num(s, &pl_yylval);
                    TERM(THING);
                if (!gv) {
                    s = scan_num(s, &pl_yylval);
                    TERM(THING);
@@ -6463,7 +6534,8 @@ Perl_yylex(pTHX)
            GV *hgv = NULL;     /* hidden (loser) */
            if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
                CV *cv;
            GV *hgv = NULL;     /* hidden (loser) */
            if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
                CV *cv;
-               if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
+               if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
+                                            UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
                    (cv = GvCVu(gv)))
                {
                    if (GvIMPORTED_CV(gv))
                    (cv = GvCVu(gv)))
                {
                    if (GvIMPORTED_CV(gv))
@@ -6472,7 +6544,8 @@ Perl_yylex(pTHX)
                        hgv = gv;
                }
                if (!ogv &&
                        hgv = gv;
                }
                if (!ogv &&
-                   (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
+                   (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
+                                            UTF ? -(I32)len : (I32)len, FALSE)) &&
                    (gv = *gvp) && isGV_with_GP(gv) &&
                    GvCVu(gv) && GvIMPORTED_CV(gv))
                {
                    (gv = *gvp) && isGV_with_GP(gv) &&
                    GvCVu(gv) && GvIMPORTED_CV(gv))
                {
@@ -6561,7 +6634,7 @@ Perl_yylex(pTHX)
                    PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
                {
                    if (ckWARN(WARN_BAREWORD)
                    PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
                {
                    if (ckWARN(WARN_BAREWORD)
-                       && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
+                       && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                        Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
                            "Bareword \"%s\" refers to nonexistent package",
                             PL_tokenbuf);
                        Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
                            "Bareword \"%s\" refers to nonexistent package",
                             PL_tokenbuf);
@@ -6577,7 +6650,8 @@ Perl_yylex(pTHX)
                           constants that might already be there into full
                           blown PVGVs with attached PVCV.  */
                        gv = gv_fetchpvn_flags(PL_tokenbuf, len,
                           constants that might already be there into full
                           blown PVGVs with attached PVCV.  */
                        gv = gv_fetchpvn_flags(PL_tokenbuf, len,
-                                              GV_NOADD_NOINIT, SVt_PVCV);
+                                              GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
+                                              SVt_PVCV);
                    }
                    len = 0;
                }
                    }
                    len = 0;
                }
@@ -6771,8 +6845,8 @@ Perl_yylex(pTHX)
 #endif
                        SvPOK(cv))
                    {
 #endif
                        SvPOK(cv))
                    {
-                       STRLEN protolen;
-                       const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
+                       STRLEN protolen = CvPROTOLEN(cv);
+                       const char *proto = CvPROTO(cv);
                        if (!protolen)
                            TERM(FUNC0SUB);
                        while (*proto == ';')
                        if (!protolen)
                            TERM(FUNC0SUB);
                        while (*proto == ';')
@@ -6849,7 +6923,8 @@ Perl_yylex(pTHX)
                        }
                    }
                    if (probable_sub) {
                        }
                    }
                    if (probable_sub) {
-                       gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
+                       gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
+                                        SVt_PVCV);
                        op_free(pl_yylval.opval);
                        pl_yylval.opval = rv2cv_op;
                        pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
                        op_free(pl_yylval.opval);
                        pl_yylval.opval = rv2cv_op;
                        pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
@@ -6903,7 +6978,7 @@ Perl_yylex(pTHX)
                            d = PL_tokenbuf;
                            while (isLOWER(*d))
                                d++;
                            d = PL_tokenbuf;
                            while (isLOWER(*d))
                                d++;
-                           if (!*d && !gv_stashpv(PL_tokenbuf, 0))
+                           if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
                                Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
                                       PL_tokenbuf);
                        }
                                Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
                                       PL_tokenbuf);
                        }
@@ -6947,10 +7022,20 @@ Perl_yylex(pTHX)
            GV *gv;
            if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
                const char *pname = "main";
            GV *gv;
            if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
                const char *pname = "main";
+               STRLEN plen = 4;
+               U32 putf8 = 0;
                if (PL_tokenbuf[2] == 'D')
                if (PL_tokenbuf[2] == 'D')
-                   pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
-               gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
-                               SVt_PVIO);
+               {
+                   HV * const stash =
+                       PL_curstash ? PL_curstash : PL_defstash;
+                   pname = HvNAME_get(stash);
+                   plen  = HvNAMELEN (stash);
+                   if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
+               }
+               gv = gv_fetchpvn_flags(
+                       Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
+                       plen+6, GV_ADD|putf8, SVt_PVIO
+               );
                GvMULTI_on(gv);
                if (!GvIO(gv))
                    GvIOp(gv) = newIO();
                GvMULTI_on(gv);
                if (!GvIO(gv))
                    GvIOp(gv) = newIO();
@@ -7034,6 +7119,9 @@ Perl_yylex(pTHX)
            goto fake_eof;
        }
 
            goto fake_eof;
        }
 
+       case KEY___SUB__:
+           FUN0(OP_RUNCV);
+
        case KEY_AUTOLOAD:
        case KEY_DESTROY:
        case KEY_BEGIN:
        case KEY_AUTOLOAD:
        case KEY_DESTROY:
        case KEY_BEGIN:
@@ -7056,7 +7144,8 @@ Perl_yylex(pTHX)
                    Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
                if (tmp < 0)
                    tmp = -tmp;
                    Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
                if (tmp < 0)
                    tmp = -tmp;
-               else if (tmp == KEY_require || tmp == KEY_do)
+               else if (tmp == KEY_require || tmp == KEY_do
+                     || tmp == KEY_glob)
                    /* that's a way to remember we saw "CORE::" */
                    orig_keyword = tmp;
                goto reserved_word;
                    /* that's a way to remember we saw "CORE::" */
                    orig_keyword = tmp;
                goto reserved_word;
@@ -7228,6 +7317,10 @@ Perl_yylex(pTHX)
                UNIBRACK(OP_ENTEREVAL);
            }
 
                UNIBRACK(OP_ENTEREVAL);
            }
 
+       case KEY_evalbytes:
+           PL_expect = XTERM;
+           UNIBRACK(-OP_ENTEREVAL);
+
        case KEY_eof:
            UNI(OP_EOF);
 
        case KEY_eof:
            UNI(OP_EOF);
 
@@ -7408,7 +7501,10 @@ Perl_yylex(pTHX)
            OPERATOR(GIVEN);
 
        case KEY_glob:
            OPERATOR(GIVEN);
 
        case KEY_glob:
-           LOP(OP_GLOB,XTERM);
+           LOP(
+            orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
+            XTERM
+           );
 
        case KEY_hex:
            UNI(OP_HEX);
 
        case KEY_hex:
            UNI(OP_HEX);
@@ -7720,7 +7816,8 @@ Perl_yylex(pTHX)
                *PL_tokenbuf = '\0';
                s = force_word(s,WORD,TRUE,TRUE,FALSE);
                if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
                *PL_tokenbuf = '\0';
                s = force_word(s,WORD,TRUE,TRUE,FALSE);
                if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
-                   gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
+                   gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
+                                GV_ADD | (UTF ? SVf_UTF8 : 0));
                else if (*s == '<')
                    yyerror("<> should be quotes");
            }
                else if (*s == '<')
                    yyerror("<> should be quotes");
            }
@@ -7925,7 +8022,7 @@ Perl_yylex(pTHX)
                SV *tmpwhite = 0;
 
                char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
                SV *tmpwhite = 0;
 
                char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
-               SV *subtoken = newSVpvn(tstart, s - tstart);
+               SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
                PL_thistoken = 0;
 
                d = s;
                PL_thistoken = 0;
 
                d = s;
@@ -7948,7 +8045,7 @@ Perl_yylex(pTHX)
                    d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
 #ifdef PERL_MAD
                    if (PL_madskills)
                    d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
 #ifdef PERL_MAD
                    if (PL_madskills)
-                       nametoke = newSVpvn(s, d - s);
+                       nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
 #endif
                    if (memchr(tmpbuf, ':', len))
                        sv_setpvn(PL_subname, tmpbuf, len);
 #endif
                    if (memchr(tmpbuf, ':', len))
                        sv_setpvn(PL_subname, tmpbuf, len);
@@ -7957,6 +8054,8 @@ Perl_yylex(pTHX)
                        sv_catpvs(PL_subname,"::");
                        sv_catpvn(PL_subname,tmpbuf,len);
                    }
                        sv_catpvs(PL_subname,"::");
                        sv_catpvn(PL_subname,tmpbuf,len);
                    }
+                    if (SvUTF8(PL_linestr))
+                        SvUTF8_on(PL_subname);
                    have_name = TRUE;
 
 #ifdef PERL_MAD
                    have_name = TRUE;
 
 #ifdef PERL_MAD
@@ -8006,21 +8105,22 @@ Perl_yylex(pTHX)
                    bool underscore = FALSE;
                    bool seen_underscore = FALSE;
                    const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
                    bool underscore = FALSE;
                    bool seen_underscore = FALSE;
                    const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
+                    STRLEN tmplen;
 
                    s = scan_str(s,!!PL_madskills,FALSE);
                    if (!s)
                        Perl_croak(aTHX_ "Prototype not terminated");
                    /* strip spaces and check for bad characters */
 
                    s = scan_str(s,!!PL_madskills,FALSE);
                    if (!s)
                        Perl_croak(aTHX_ "Prototype not terminated");
                    /* strip spaces and check for bad characters */
-                   d = SvPVX(PL_lex_stuff);
+                   d = SvPV(PL_lex_stuff, tmplen);
                    tmp = 0;
                    tmp = 0;
-                   for (p = d; *p; ++p) {
+                   for (p = d; tmplen; tmplen--, ++p) {
                        if (!isSPACE(*p)) {
                        if (!isSPACE(*p)) {
-                           d[tmp++] = *p;
+                            d[tmp++] = *p;
 
                            if (warnillegalproto) {
                                if (must_be_last)
                                    proto_after_greedy_proto = TRUE;
 
                            if (warnillegalproto) {
                                if (must_be_last)
                                    proto_after_greedy_proto = TRUE;
-                               if (!strchr("$@%*;[]&\\_+", *p)) {
+                               if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
                                    bad_proto = TRUE;
                                }
                                else {
                                    bad_proto = TRUE;
                                }
                                else {
@@ -8048,17 +8148,22 @@ Perl_yylex(pTHX)
                            }
                        }
                    }
                            }
                        }
                    }
-                   d[tmp] = '\0';
+                    d[tmp] = '\0';
                    if (proto_after_greedy_proto)
                        Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Prototype after '%c' for %"SVf" : %s",
                                    greedy_proto, SVfARG(PL_subname), d);
                    if (proto_after_greedy_proto)
                        Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Prototype after '%c' for %"SVf" : %s",
                                    greedy_proto, SVfARG(PL_subname), d);
-                   if (bad_proto)
+                   if (bad_proto) {
+                        SV *dsv = newSVpvs_flags("", SVs_TEMP);
                        Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Illegal character %sin prototype for %"SVf" : %s",
                                    seen_underscore ? "after '_' " : "",
                        Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Illegal character %sin prototype for %"SVf" : %s",
                                    seen_underscore ? "after '_' " : "",
-                                   SVfARG(PL_subname), d);
-                   SvCUR_set(PL_lex_stuff, tmp);
+                                   SVfARG(PL_subname),
+                                    sv_uni_display(dsv,
+                                         newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
+                                         tmp, UNI_DISPLAY_ISPRINT));
+                    }
+                    SvCUR_set(PL_lex_stuff, tmp);
                    have_proto = TRUE;
 
 #ifdef PERL_MAD
                    have_proto = TRUE;
 
 #ifdef PERL_MAD
@@ -8350,7 +8455,7 @@ S_pending_ident(pTHX)
                HEK * const stashname = HvNAME_HEK(stash);
                SV *  const sym = newSVhek(stashname);
                 sv_catpvs(sym, "::");
                HEK * const stashname = HvNAME_HEK(stash);
                SV *  const sym = newSVhek(stashname);
                 sv_catpvs(sym, "::");
-                sv_catsv(sym, newSVpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, SVs_TEMP | (UTF ? SVf_UTF8 : 0 )));
+                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
                 gv_fetchsv(sym,
                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
                 gv_fetchsv(sym,
@@ -8468,7 +8573,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
            if (keyword(w, s - w, 0))
                return;
 
            if (keyword(w, s - w, 0))
                return;
 
-           gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
+           gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
            if (gv && GvCVu(gv))
                return;
            Perl_croak(aTHX_ "No comma allowed after %s", what);
            if (gv && GvCVu(gv))
                return;
            Perl_croak(aTHX_ "No comma allowed after %s", what);
@@ -8697,9 +8802,19 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
     }
     else if (ck_uni)
        check_uni();
     }
     else if (ck_uni)
        check_uni();
-    if (s < send)
-       *d = *s++;
-    d[1] = '\0';
+    if (s < send) {
+        if (UTF) {
+            const STRLEN skip = UTF8SKIP(s);
+            STRLEN i;
+            d[skip] = '\0';
+            for ( i = 0; i < skip; i++ )
+                d[i] = *s++;
+        }
+        else {
+            *d = *s++;
+            d[1] = '\0';
+        }
+    }
     if (*d == '^' && *s && isCONTROLVAR(*s)) {
        *d = toCTRL(*s);
        s++;
     if (*d == '^' && *s && isCONTROLVAR(*s)) {
        *d = toCTRL(*s);
        s++;
@@ -8715,7 +8830,7 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
            }
        }
        if (isIDFIRST_lazy_if(d,UTF)) {
            }
        }
        if (isIDFIRST_lazy_if(d,UTF)) {
-           d++;
+           d += UTF8SKIP(d);
            if (UTF) {
                char *end = s;
                while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
            if (UTF) {
                char *end = s;
                while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
@@ -9213,7 +9328,8 @@ S_scan_heredoc(pTHX_ register char *s)
     register char *d;
     register char *e;
     char *peek;
     register char *d;
     register char *e;
     char *peek;
-    const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
+    const int outer = (PL_rsfp || PL_parser->filtered)
+                  && !(PL_lex_inwhat == OP_SCALAR);
 #ifdef PERL_MAD
     I32 stuffstart = s - SvPVX(PL_linestr);
     char *tstart;
 #ifdef PERL_MAD
     I32 stuffstart = s - SvPVX(PL_linestr);
     char *tstart;
@@ -9337,7 +9453,8 @@ S_scan_heredoc(pTHX_ register char *s)
     PL_multi_start = CopLINE(PL_curcop);
     PL_multi_open = PL_multi_close = '<';
     term = *PL_tokenbuf;
     PL_multi_start = CopLINE(PL_curcop);
     PL_multi_open = PL_multi_close = '<';
     term = *PL_tokenbuf;
-    if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
+    if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
+     && !PL_parser->filtered) {
        char * const bufptr = PL_sublex_info.super_bufptr;
        char * const bufend = PL_sublex_info.super_bufend;
        char * const olds = s - SvCUR(herewas);
        char * const bufptr = PL_sublex_info.super_bufptr;
        char * const bufend = PL_sublex_info.super_bufend;
        char * const olds = s - SvCUR(herewas);
@@ -9520,7 +9637,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 
     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
 
     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
-       d++;
+       d += UTF ? UTF8SKIP(d) : 1;
 
     /* If we've tried to read what we allow filehandles to look like, and
        there's still text left, then it must be a glob() and not a getline.
 
     /* If we've tried to read what we allow filehandles to look like, and
        there's still text left, then it must be a glob() and not a getline.
@@ -9591,7 +9708,7 @@ intro_sym:
                gv = gv_fetchpv(d,
                                (PL_in_eval
                                 ? (GV_ADDMULTI | GV_ADDINEVAL)
                gv = gv_fetchpv(d,
                                (PL_in_eval
                                 ? (GV_ADDMULTI | GV_ADDINEVAL)
-                                : GV_ADDMULTI),
+                                : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
                                SVt_PV);
                PL_lex_op = readline_overriden
                    ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
                                SVt_PV);
                PL_lex_op = readline_overriden
                    ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
@@ -9611,7 +9728,7 @@ intro_sym:
        /* If it's none of the above, it must be a literal filehandle
           (<Foo::BAR> or <FOO>) so build a simple readline OP */
        else {
        /* If it's none of the above, it must be a literal filehandle
           (<Foo::BAR> or <FOO>) so build a simple readline OP */
        else {
-           GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
+           GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
            PL_lex_op = readline_overriden
                ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
                        op_append_elem(OP_LIST,
            PL_lex_op = readline_overriden
                ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
                        op_append_elem(OP_LIST,
@@ -9760,7 +9877,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
                char * const svlast = SvEND(sv) - 1;
 
                for (; s < ns; s++) {
                char * const svlast = SvEND(sv) - 1;
 
                for (; s < ns; s++) {
-                   if (*s == '\n' && !PL_rsfp)
+                   if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
                        CopLINE_inc(PL_curcop);
                }
                if (!found)
                        CopLINE_inc(PL_curcop);
                }
                if (!found)
@@ -9827,7 +9944,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
        if (PL_multi_open == PL_multi_close) {
            for (; s < PL_bufend; s++,to++) {
                /* embedded newlines increment the current line number */
        if (PL_multi_open == PL_multi_close) {
            for (; s < PL_bufend; s++,to++) {
                /* embedded newlines increment the current line number */
-               if (*s == '\n' && !PL_rsfp)
+               if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
                    CopLINE_inc(PL_curcop);
                /* handle quoted delimiters */
                if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
                    CopLINE_inc(PL_curcop);
                /* handle quoted delimiters */
                if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
@@ -9859,7 +9976,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
            /* read until we run out of string, or we find the terminator */
            for (; s < PL_bufend; s++,to++) {
                /* embedded newlines increment the line count */
            /* read until we run out of string, or we find the terminator */
            for (; s < PL_bufend; s++,to++) {
                /* embedded newlines increment the line count */
-               if (*s == '\n' && !PL_rsfp)
+               if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
                    CopLINE_inc(PL_curcop);
                /* backslashes can escape the open or closing characters */
                if (*s == '\\' && s+1 < PL_bufend) {
                    CopLINE_inc(PL_curcop);
                /* backslashes can escape the open or closing characters */
                if (*s == '\\' && s+1 < PL_bufend) {
@@ -10419,7 +10536,7 @@ S_scan_formline(pTHX_ register char *s)
                break;
             }
        }
                break;
             }
        }
-       if (PL_in_eval && !PL_rsfp) {
+       if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
            eol = (char *) memchr(s,'\n',PL_bufend-s);
            if (!eol++)
                eol = PL_bufend;
            eol = (char *) memchr(s,'\n',PL_bufend-s);
            if (!eol++)
                eol = PL_bufend;
@@ -10450,7 +10567,7 @@ S_scan_formline(pTHX_ register char *s)
              break;
        }
        s = (char*)eol;
              break;
        }
        s = (char*)eol;
-       if (PL_rsfp) {
+       if (PL_rsfp || PL_parser->filtered) {
            bool got_some;
 #ifdef PERL_MAD
            if (PL_madskills) {
            bool got_some;
 #ifdef PERL_MAD
            if (PL_madskills) {