X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d63c20f27b4a88c274844b2b635deb3c6588cccd..b71c54b89aa7c965d23cc7cfdb7dff179d8f4163:/toke.c?ds=sidebyside diff --git a/toke.c b/toke.c index d42f653..d6ac752 100644 --- a/toke.c +++ b/toke.c @@ -275,9 +275,9 @@ static const char* const lex_state_names[] = { * The UNIDOR macro is for unary functions that can be followed by the // * operator (such as C). */ -#define UNI2(f,x) { \ +#define UNI3(f,x,have_x) { \ pl_yylval.ival = f; \ - PL_expect = x; \ + if (have_x) PL_expect = x; \ PL_bufptr = s; \ PL_last_uni = PL_oldbufptr; \ PL_last_lop_op = f; \ @@ -286,22 +286,14 @@ static const char* const lex_state_names[] = { s = PEEKSPACE(s); \ return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ } -#define UNI(f) UNI2(f,XTERM) -#define UNIDOR(f) UNI2(f,XTERMORDORDOR) +#define UNI(f) UNI3(f,XTERM,1) +#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) #define UNIPROTO(f,optional) { \ if (optional) PL_last_uni = PL_oldbufptr; \ OPERATOR(f); \ } -#define UNIBRACK(f) { \ - pl_yylval.ival = f; \ - PL_bufptr = s; \ - PL_last_uni = PL_oldbufptr; \ - if (*s == '(') \ - return REPORT( (int)FUNC1 ); \ - s = PEEKSPACE(s); \ - return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \ - } +#define UNIBRACK(f) UNI3(f,0,0) /* grandfather return to old style */ #define OLDLOP(f) \ @@ -314,6 +306,15 @@ static const char* const lex_state_names[] = { return (int)LSTOP; \ } while(0) +#define COPLINE_INC_WITH_HERELINES \ + STMT_START { \ + CopLINE_inc(PL_curcop); \ + if (PL_parser->lex_shared->herelines) \ + CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \ + PL_parser->lex_shared->herelines = 0; \ + } STMT_END + + #ifdef DEBUGGING /* how to interpret the pl_yylval associated with the token */ @@ -322,8 +323,7 @@ enum token_type { TOKENTYPE_IVAL, TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ TOKENTYPE_PVAL, - TOKENTYPE_OPVAL, - TOKENTYPE_GVVAL + TOKENTYPE_OPVAL }; static struct debug_tokens { @@ -353,6 +353,8 @@ static struct debug_tokens { { EQOP, TOKENTYPE_OPNUM, "EQOP" }, { FOR, TOKENTYPE_IVAL, "FOR" }, { FORMAT, TOKENTYPE_NONE, "FORMAT" }, + { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, + { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, { FUNC, TOKENTYPE_OPNUM, "FUNC" }, { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, @@ -377,6 +379,7 @@ static struct debug_tokens { { OROP, TOKENTYPE_IVAL, "OROP" }, { OROR, TOKENTYPE_NONE, "OROR" }, { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, + { PEG, TOKENTYPE_NONE, "PEG" }, { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, @@ -386,8 +389,10 @@ static struct debug_tokens { { PREDEC, TOKENTYPE_NONE, "PREDEC" }, { PREINC, TOKENTYPE_NONE, "PREINC" }, { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, + { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, { REFGEN, TOKENTYPE_NONE, "REFGEN" }, { RELOP, TOKENTYPE_OPNUM, "RELOP" }, + { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, { SUB, TOKENTYPE_NONE, "SUB" }, { THING, TOKENTYPE_OPVAL, "THING" }, @@ -428,7 +433,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) } if (name) Perl_sv_catpv(aTHX_ report, name); - else if ((char)rv > ' ' && (char)rv < '~') + else if ((char)rv > ' ' && (char)rv <= '~') Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); else if (!rv) sv_catpvs(report, "EOF"); @@ -436,7 +441,6 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); switch (type) { case TOKENTYPE_NONE: - case TOKENTYPE_GVVAL: /* doesn't appear to be used */ break; case TOKENTYPE_IVAL: Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); @@ -634,8 +638,8 @@ Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) static void strip_return(SV *sv) { - register const char *s = SvPVX_const(sv); - register const char * const e = s + SvCUR(sv); + const char *s = SvPVX_const(sv); + const char * const e = s + SvCUR(sv); PERL_ARGS_ASSERT_STRIP_RETURN; @@ -643,7 +647,7 @@ strip_return(SV *sv) while (s < e) { if (*s++ == '\r' && *s == '\n') { /* hit a CR-LF, need to copy the rest */ - register char *d = s - 1; + char *d = s - 1; *d++ = *s++; while (s < e) { if (*s == '\r' && s[1] == '\n') @@ -746,6 +750,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) Newx(parser->lex_brackstack, 120, char); Newx(parser->lex_casestack, 12, char); *parser->lex_casestack = '\0'; + Newxz(parser->lex_shared, 1, LEXSHARED); if (line) { STRLEN len; @@ -753,8 +758,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->linestr = flags & LEX_START_COPIED ? SvREFCNT_inc_simple_NN(line) : newSVpvn_flags(s, len, SvUTF8(line)); - if (!len || s[len-1] != ';') - sv_catpvs(parser->linestr, "\n;"); + sv_catpvs(parser->linestr, "\n;"); } else { parser->linestr = newSVpvs("\n;"); } @@ -790,6 +794,7 @@ Perl_parser_free(pTHX_ const yy_parser *parser) Safefree(parser->lex_brackstack); Safefree(parser->lex_casestack); + Safefree(parser->lex_shared); PL_parser = parser->old_parser; Safefree(parser); } @@ -913,7 +918,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) SV *linestr; char *buf; STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; - STRLEN linestart_pos, last_uni_pos, last_lop_pos; + STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; linestr = PL_parser->linestr; buf = SvPVX(linestr); if (len <= SvLEN(linestr)) @@ -925,7 +930,11 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) linestart_pos = PL_parser->linestart - buf; last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; + re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? + PL_parser->lex_shared->re_eval_start - buf : 0; + buf = sv_grow(linestr, len); + PL_parser->bufend = buf + bufend_pos; PL_parser->bufptr = buf + bufptr_pos; PL_parser->oldbufptr = buf + oldbufptr_pos; @@ -935,6 +944,8 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) PL_parser->last_lop = buf + last_lop_pos; + if (PL_parser->lex_shared->re_eval_start) + PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; return buf; } @@ -1165,7 +1176,7 @@ Perl_lex_read_to(pTHX_ char *ptr) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); for (; s != ptr; s++) if (*s == '\n') { - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; PL_parser->linestart = s+1; } PL_parser->bufptr = ptr; @@ -1248,6 +1259,7 @@ buffer has reached the end of the input text. */ #define LEX_FAKE_EOF 0x80000000 +#define LEX_NO_TERM 0x40000000 bool Perl_lex_next_chunk(pTHX_ U32 flags) @@ -1259,7 +1271,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags) STRLEN linestart_pos, last_uni_pos, last_lop_pos; bool got_some_for_debugger = 0; bool got_some; - if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF)) + if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); linestr = PL_parser->linestr; buf = SvPVX(linestr); @@ -1290,6 +1302,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags) } else if (filter_gets(linestr, old_bufend_pos)) { got_some = 1; got_some_for_debugger = 1; + } else if (flags & LEX_NO_TERM) { + got_some = 0; } else { if (!SvPOK(linestr)) /* can get undefined by filter_gets */ sv_setpvs(linestr, ""); @@ -1439,7 +1453,7 @@ Perl_lex_read_unichar(pTHX_ U32 flags) c = lex_peek_unichar(flags); if (c != -1) { if (c == '\n') - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; if (UTF) PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); else @@ -1508,7 +1522,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (flags & LEX_NO_NEXT_CHUNK) break; PL_parser->bufptr = s; - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; got_more = lex_next_chunk(flags); CopLINE_dec(PL_curcop); s = PL_parser->bufptr; @@ -1551,7 +1565,7 @@ S_incline(pTHX_ const char *s) PERL_ARGS_ASSERT_INCLINE; - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; if (*s++ != '#') return; while (SPACE_OR_TAB(*s)) @@ -1987,6 +2001,11 @@ S_force_next(pTHX_ I32 type) tokereport(type, &NEXTVAL_NEXTTOKE); } #endif + /* Don’t let opslab_force_free snatch it */ + if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) { + assert(!NEXTVAL_NEXTTOKE.opval->op_savefree); + NEXTVAL_NEXTTOKE.opval->op_savefree = 1; + } #ifdef PERL_MAD if (PL_curforce < 0) start_force(PL_lasttoke); @@ -2062,7 +2081,7 @@ STATIC char * S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) { dVAR; - register char *s; + char *s; STRLEN len; PERL_ARGS_ASSERT_FORCE_WORD; @@ -2307,9 +2326,9 @@ STATIC SV * S_tokeq(pTHX_ SV *sv) { dVAR; - register char *s; - register char *send; - register char *d; + char *s; + char *send; + char *d; STRLEN len = 0; SV *pv = sv; @@ -2378,7 +2397,7 @@ STATIC I32 S_sublex_start(pTHX) { dVAR; - register const I32 op_type = pl_yylval.ival; + const I32 op_type = pl_yylval.ival; if (op_type == OP_NULL) { pl_yylval.opval = PL_lex_op; @@ -2439,17 +2458,19 @@ STATIC I32 S_sublex_push(pTHX) { dVAR; + LEXSHARED *shared; ENTER; PL_lex_state = PL_sublex_info.super_state; SAVEBOOL(PL_lex_dojoin); SAVEI32(PL_lex_brackets); SAVEI32(PL_lex_allbrackets); + SAVEI32(PL_lex_formbrack); SAVEI8(PL_lex_fakeeof); SAVEI32(PL_lex_casemods); SAVEI32(PL_lex_starts); SAVEI8(PL_lex_state); - SAVEPPTR(PL_sublex_info.re_eval_start); + SAVESPTR(PL_lex_repl); SAVEVPTR(PL_lex_inpat); SAVEI16(PL_lex_inwhat); SAVECOPLINE(PL_curcop); @@ -2463,19 +2484,29 @@ S_sublex_push(pTHX) SAVESPTR(PL_linestr); SAVEGENERICPV(PL_lex_brackstack); SAVEGENERICPV(PL_lex_casestack); + SAVEGENERICPV(PL_parser->lex_shared); + + /* The here-doc parser needs to be able to peek into outer lexing + scopes to find the body of the here-doc. So we put PL_linestr and + PL_bufptr into lex_shared, to ‘share’ those values. + */ + PL_parser->lex_shared->ls_linestr = PL_linestr; + PL_parser->lex_shared->ls_bufptr = PL_bufptr; PL_linestr = PL_lex_stuff; + PL_lex_repl = PL_sublex_info.repl; PL_lex_stuff = NULL; - PL_sublex_info.re_eval_start = NULL; + PL_sublex_info.repl = NULL; PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; SAVEFREESV(PL_linestr); + if (PL_lex_repl) SAVEFREESV(PL_lex_repl); PL_lex_dojoin = FALSE; - PL_lex_brackets = 0; + PL_lex_brackets = PL_lex_formbrack = 0; PL_lex_allbrackets = 0; PL_lex_fakeeof = LEX_FAKEEOF_NEVER; Newx(PL_lex_brackstack, 120, char); @@ -2485,6 +2516,10 @@ S_sublex_push(pTHX) PL_lex_starts = 0; PL_lex_state = LEX_INTERPCONCAT; CopLINE_set(PL_curcop, (line_t)PL_multi_start); + + Newxz(shared, 1, LEXSHARED); + shared->ls_prev = PL_parser->lex_shared; + PL_parser->lex_shared = shared; PL_lex_inwhat = PL_sublex_info.sub_inwhat; if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; @@ -2527,7 +2562,6 @@ S_sublex_done(pTHX) PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; - SAVEFREESV(PL_linestr); PL_lex_dojoin = FALSE; PL_lex_brackets = 0; PL_lex_allbrackets = 0; @@ -2665,11 +2699,11 @@ STATIC char * S_scan_const(pTHX_ char *start) { dVAR; - register char *send = PL_bufend; /* end of the constant */ + char *send = PL_bufend; /* end of the constant */ SV *sv = newSV(send - start); /* sv for the constant. See note below on sizing. */ - register char *s = start; /* start of the constant */ - register char *d = SvPVX(sv); /* destination for copies */ + char *s = start; /* start of the constant */ + char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ bool in_charclass = FALSE; /* within /[...]/ */ @@ -2779,6 +2813,7 @@ S_scan_const(pTHX_ char *start) #endif if (min > max) { + SvREFCNT_dec(sv); Perl_croak(aTHX_ "Invalid range \"%c-%c\" in transliteration operator", (char)min, (char)max); @@ -2837,6 +2872,7 @@ S_scan_const(pTHX_ char *start) /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { if (didrange) { + SvREFCNT_dec(sv); Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); } if (has_utf8 @@ -2862,11 +2898,23 @@ S_scan_const(pTHX_ char *start) /* if we get here, we're not doing a transliteration */ - else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\')) - in_charclass = FALSE; + else if (*s == '[' && PL_lex_inpat && !in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = TRUE; + } - else if (PL_lex_inpat && *s == '[') - in_charclass = TRUE; + else if (*s == ']' && PL_lex_inpat && in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = FALSE; + } /* skip for regexp comments /(?#comment)/, except for the last * char, which will be done separately. @@ -2979,7 +3027,7 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s))) + if ((isALNUMC(*s))) Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Unrecognized escape \\%c passed through", *s); @@ -3015,29 +3063,16 @@ S_scan_const(pTHX_ char *start) /* eg. \x24 indicates the hex constant 0x24 */ case 'x': - ++s; - if (*s == '{') { - char* const e = strchr(s, '}'); - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | - PERL_SCAN_DISALLOW_PREFIX; + { STRLEN len; + const char* error; - ++s; - if (!e) { - yyerror("Missing right brace on \\x{}"); + bool valid = grok_bslash_x(s, &uv, &len, &error, 1); + s += len; + if (! valid) { + yyerror(error); continue; } - len = e - s; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s = e + 1; - } - else { - { - STRLEN len = 2; - I32 flags = PERL_SCAN_DISALLOW_PREFIX; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s += len; - } } NUM_ESCAPE_INSERT: @@ -3744,7 +3779,7 @@ S_intuit_more(pTHX_ register char *s) * * First argument is the stuff after the first token, e.g. "bar". * - * Not a method if bar is a filehandle. + * Not a method if foo is a filehandle. * Not a method if foo is a subroutine prototyped to take a filehandle. * Not a method if it's really "Foo $bar" * Method if it's "foo $bar" @@ -3769,11 +3804,9 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) PERL_ARGS_ASSERT_INTUIT_METHOD; - if (gv) { - if (SvTYPE(gv) == SVt_PVGV && GvIO(gv)) + if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) return 0; - if (cv) { - if (SvPOK(cv)) { + if (cv && SvPOK(cv)) { const char *proto = CvPROTO(cv); if (proto) { if (*proto == ';') @@ -3781,9 +3814,6 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (*proto == '*') return 0; } - } - } else - gv = NULL; } s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); /* start is the beginning of the possible filehandle/object, @@ -3792,7 +3822,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) */ if (*start == '$') { - if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || + if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) return 0; #ifdef PERL_MAD @@ -3819,7 +3849,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (indirgv && GvCVu(indirgv)) return 0; /* filehandle or package name makes it a method */ - if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { + if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { #ifdef PERL_MAD soff = s - SvPVX(PL_linestr); #endif @@ -4326,6 +4356,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) { if (PL_expect != XSTATE) yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); + PL_expect = XTERM; s = SKIPSPACE1(s); if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { s = force_version(s, TRUE); @@ -4397,10 +4428,11 @@ int Perl_yylex(pTHX) { dVAR; - register char *s = PL_bufptr; - register char *d; + char *s = PL_bufptr; + char *d; STRLEN len; bool bof = FALSE; + U8 formbrack = 0; U32 fake_eof = 0; /* orig_keyword, gvp, and gv are initialized here because @@ -4483,12 +4515,9 @@ Perl_yylex(pTHX) PL_lex_allbrackets--; next_type &= 0xffff; } -#ifdef PERL_MAD - /* FIXME - can these be merged? */ - return next_type; -#else + if (S_is_opval_token(next_type) && pl_yylval.opval) + pl_yylval.opval->op_savefree = 0; /* release */ return REPORT(next_type); -#endif } /* interpolated case modifiers like \L \U, including \Q and \E. @@ -4650,7 +4679,7 @@ Perl_yylex(pTHX) } /* Convert (?{...}) and friends to 'do {...}' */ if (PL_lex_inpat && *PL_bufptr == '(') { - PL_sublex_info.re_eval_start = PL_bufptr; + PL_parser->lex_shared->re_eval_start = PL_bufptr; PL_bufptr += 2; if (*PL_bufptr != '{') PL_bufptr++; @@ -4705,20 +4734,34 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Bad evalled substitution pattern"); PL_lex_repl = NULL; } - if (PL_sublex_info.re_eval_start) { + /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets + re_eval_str. If the here-doc body’s length equals the previous + value of re_eval_start, re_eval_start will now be null. So + check re_eval_str as well. */ + if (PL_parser->lex_shared->re_eval_start + || PL_parser->lex_shared->re_eval_str) { + SV *sv; if (*PL_bufptr != ')') Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); PL_bufptr++; /* having compiled a (?{..}) expression, return the original * text too, as a const */ + if (PL_parser->lex_shared->re_eval_str) { + sv = PL_parser->lex_shared->re_eval_str; + PL_parser->lex_shared->re_eval_str = NULL; + SvCUR_set(sv, + PL_bufptr - PL_parser->lex_shared->re_eval_start); + SvPV_shrink_to_cur(sv); + } + else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, + PL_bufptr - PL_parser->lex_shared->re_eval_start); start_force(PL_curforce); /* XXX probably need a CURMAD(something) here */ NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, - newSVpvn(PL_sublex_info.re_eval_start, - PL_bufptr - PL_sublex_info.re_eval_start)); + sv); force_next(THING); - PL_sublex_info.re_eval_start = NULL; + PL_parser->lex_shared->re_eval_start = NULL; PL_expect = XTERM; return REPORT(','); } @@ -4778,11 +4821,14 @@ Perl_yylex(pTHX) return yylex(); case LEX_FORMLINE: - PL_lex_state = LEX_NORMAL; s = scan_formline(PL_bufptr); if (!PL_lex_formbrack) + { + formbrack = 1; goto rightbracket; - OPERATOR(';'); + } + PL_bufptr = s; + return yylex(); } s = PL_bufptr; @@ -4930,7 +4976,7 @@ Perl_yylex(pTHX) fake_eof = LEX_FAKE_EOF; } PL_bufptr = PL_bufend; - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; if (!lex_next_chunk(fake_eof)) { CopLINE_dec(PL_curcop); s = PL_bufptr; @@ -5174,9 +5220,11 @@ Perl_yylex(pTHX) } } if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - PL_bufptr = s; PL_lex_state = LEX_FORMLINE; - return yylex(); + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.ival = 0; + force_next(FORMRBRACK); + TOKEN(';'); } goto retry; case '\r': @@ -5215,6 +5263,7 @@ Perl_yylex(pTHX) incline(s); } else { + const bool in_comment = *s == '#'; d = s; while (d < PL_bufend && *d != '\n') d++; @@ -5228,12 +5277,18 @@ Perl_yylex(pTHX) PL_thiswhite = newSVpvn(s, d - s); #endif s = d; - incline(s); + if (in_comment && d == PL_bufend + && PL_lex_state == LEX_INTERPNORMAL + && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr + && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; + else incline(s); } if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - PL_bufptr = s; PL_lex_state = LEX_FORMLINE; - return yylex(); + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.ival = 0; + force_next(FORMRBRACK); + TOKEN(';'); } } else { @@ -5535,7 +5590,7 @@ Perl_yylex(pTHX) } sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); if (*d == '(') { - d = scan_str(d,TRUE,TRUE); + d = scan_str(d,TRUE,TRUE,FALSE); if (!d) { /* MUST advance bufptr here to avoid bogus "at end of line" context messages from yyerror(). @@ -5691,21 +5746,14 @@ Perl_yylex(pTHX) } TERM(']'); case '{': - leftbracket: s++; + leftbracket: if (PL_lex_brackets > 100) { Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); } switch (PL_expect) { case XTERM: - if (PL_lex_formbrack) { - s--; - PRETERMBLOCK(DO); - } - if (PL_oldoldbufptr == PL_last_lop) - PL_lex_brackstack[PL_lex_brackets++] = XTERM; - else - PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; PL_lex_allbrackets++; OPERATOR(HASHBRACK); case XOPERATOR: @@ -5855,7 +5903,7 @@ Perl_yylex(pTHX) pl_yylval.ival = CopLINE(PL_curcop); if (isSPACE(*s) || *s == '#') PL_copline = NOLINE; /* invalidate current command line number */ - TOKEN('{'); + TOKEN(formbrack ? '=' : '{'); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) TOKEN(0); @@ -5866,8 +5914,6 @@ Perl_yylex(pTHX) else PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; PL_lex_allbrackets--; - if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL) - PL_lex_formbrack = 0; if (PL_lex_state == LEX_INTERPNORMAL) { if (PL_lex_brackets == 0) { if (PL_expect & XFAKEBRACK) { @@ -5883,7 +5929,10 @@ Perl_yylex(pTHX) #endif return yylex(); /* ignore fake brackets */ } - if (*s == '-' && s[1] == '>') + if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr + && SvEVALED(PL_lex_repl)) + PL_lex_state = LEX_INTERPEND; + else if (*s == '-' && s[1] == '>') PL_lex_state = LEX_INTERPENDMAYBE; else if (*s != '[' && *s != '{') PL_lex_state = LEX_INTERPEND; @@ -5899,11 +5948,17 @@ Perl_yylex(pTHX) curmad('X', newSVpvn(s-1,1)); CURMAD('_', PL_thiswhite); } - force_next('}'); + force_next(formbrack ? '.' : '}'); + if (formbrack) LEAVE; #ifdef PERL_MAD if (!PL_thistoken) PL_thistoken = newSVpvs(""); #endif + if (formbrack == 2) { /* means . where arguments were expected */ + start_force(PL_curforce); + force_next(';'); + TOKEN(FORMRBRACK); + } TOKEN(';'); case '&': s++; @@ -5989,7 +6044,8 @@ Perl_yylex(pTHX) if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) { - if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) { + if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) + || PL_lex_state != LEX_NORMAL) { d = PL_bufend; while (s < d) { if (*s++ == '\n') { @@ -6020,7 +6076,7 @@ Perl_yylex(pTHX) goto retry; } } - if (PL_lex_brackets < PL_lex_formbrack) { + if (PL_expect == XBLOCK) { const char *t = s; #ifdef PERL_STRICT_CR while (SPACE_OR_TAB(*t)) @@ -6029,8 +6085,12 @@ Perl_yylex(pTHX) #endif t++; if (*t == '\n' || *t == '#') { - s--; - PL_expect = XBLOCK; + formbrack = 1; + ENTER; + SAVEI8(PL_parser->form_lex_state); + SAVEI32(PL_lex_formbrack); + PL_parser->form_lex_state = PL_lex_state; + PL_lex_formbrack = PL_lex_brackets + 1; goto leftbracket; } } @@ -6081,7 +6141,8 @@ Perl_yylex(pTHX) s = scan_heredoc(s); else s = scan_inputsymbol(s); - TERM(sublex_start()); + PL_expect = XOPERATOR; + TOKEN(sublex_start()); } s++; { @@ -6389,8 +6450,8 @@ Perl_yylex(pTHX) #endif && (s == PL_linestart || s[-1] == '\n') ) { - PL_lex_formbrack = 0; PL_expect = XSTATE; + formbrack = 2; /* dot seen where arguments expected */ goto rightbracket; } if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { @@ -6431,7 +6492,7 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6446,7 +6507,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6469,7 +6530,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '`': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); if (PL_expect == XOPERATOR) no_op("Backticks",s); @@ -6921,7 +6982,7 @@ Perl_yylex(pTHX) op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - pl_yylval.opval->op_private = 0; + pl_yylval.opval->op_private = OPpCONST_FOLDED; pl_yylval.opval->op_flags |= OPf_SPECIAL; TOKEN(WORD); } @@ -7236,10 +7297,19 @@ Perl_yylex(pTHX) case KEY_CORE: if (*s == ':' && s[1] == ':') { - s += 2; + STRLEN olen = len; d = s; + s += 2; s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); - if (!(tmp = keyword(PL_tokenbuf, len, 1))) + if ((*s == ':' && s[1] == ':') + || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) + { + s = d; + len = olen; + Copy(PL_bufptr, PL_tokenbuf, olen, char); + goto just_a_word; + } + if (!tmp) Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", SVfARG(newSVpvn_flags(PL_tokenbuf, len, (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); @@ -7350,8 +7420,13 @@ Perl_yylex(pTHX) s = SKIPSPACE1(s); if (*s == '{') PRETERMBLOCK(DO); - if (*s != '\'') - s = force_word(s,WORD,TRUE,TRUE,FALSE); + if (*s != '\'') { + d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len); + if (len) { + d = SKIPSPACE1(d); + if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE); + } + } if (orig_keyword == KEY_do) { orig_keyword = 0; pl_yylval.ival = 1; @@ -7745,7 +7820,7 @@ Perl_yylex(pTHX) case KEY_no: s = tokenize_use(0, s); - OPERATOR(USE); + TERM(USE); case KEY_not: if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) @@ -7835,7 +7910,7 @@ Perl_yylex(pTHX) LOP(OP_PIPE_OP,XTERM); case KEY_q: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_CONST; @@ -7846,7 +7921,7 @@ Perl_yylex(pTHX) case KEY_qw: { OP *words = NULL; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); PL_expect = XOPERATOR; @@ -7896,7 +7971,7 @@ Perl_yylex(pTHX) } case KEY_qq: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_STRINGIFY; @@ -7909,7 +7984,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case KEY_qx: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); readpipe_override(); @@ -8192,8 +8267,6 @@ Perl_yylex(pTHX) } if (key == KEY_format) { - if (*s == '=') - PL_lex_formbrack = PL_lex_brackets + 1; #ifdef PERL_MAD PL_thistoken = subtoken; s = d; @@ -8202,7 +8275,7 @@ Perl_yylex(pTHX) (void) force_word(PL_oldbufptr + tboffset, WORD, FALSE, TRUE, TRUE); #endif - OPERATOR(FORMAT); + PREBLOCK(FORMAT); } /* Look for a prototype */ @@ -8218,7 +8291,7 @@ Perl_yylex(pTHX) const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); /* strip spaces and check for bad characters */ @@ -8365,6 +8438,7 @@ Perl_yylex(pTHX) LOP(OP_SYSWRITE,XTERM); case KEY_tr: + case KEY_y: s = scan_trans(s); TERM(sublex_start()); @@ -8492,10 +8566,6 @@ Perl_yylex(pTHX) return REPORT(0); pl_yylval.ival = OP_XOR; OPERATOR(OROP); - - case KEY_y: - s = scan_trans(s); - TERM(sublex_start()); } }} } @@ -8507,7 +8577,6 @@ static int S_pending_ident(pTHX) { dVAR; - register char *d; PADOFFSET tmp = 0; /* pit holds the identifier we read and pending_ident is reset */ char pit = PL_pending_ident; @@ -8549,14 +8618,6 @@ S_pending_ident(pTHX) /* build the ops for accesses to a my() variable. - - Deny my($a) or my($b) in a sort block, *if* $a or $b is - then used in a comparison. This catches most, but not - all cases. For instance, it catches - sort { my($a); $a <=> $b } - but not - sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } - (although why you'd do that is anyone's guess). */ if (!has_colon) { @@ -8585,23 +8646,6 @@ S_pending_ident(pTHX) return WORD; } - /* if it's a sort block and they're naming $a or $b */ - if (PL_last_lop_op == OP_SORT && - PL_tokenbuf[0] == '$' && - (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b') - && !PL_tokenbuf[2]) - { - for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart; - d < PL_bufend && *d != '\n'; - d++) - { - if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { - Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison", - PL_tokenbuf); - } - } - } - pl_yylval.opval = newOP(OP_PADANY, 0); pl_yylval.opval->op_targ = tmp; return PRIVATEREF; @@ -8828,8 +8872,8 @@ STATIC char * S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { dVAR; - register char *d = dest; - register char * const e = d + destlen - 3; /* two-character token, ending NUL */ + char *d = dest; + char * const e = d + destlen - 3; /* two-character token, ending NUL */ PERL_ARGS_ASSERT_SCAN_WORD; @@ -8873,8 +8917,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL dVAR; char *bracket = NULL; char funny = *s++; - register char *d = dest; - register char * const e = d + destlen - 3; /* two-character token, ending NUL */ + char *d = dest; + char * const e = d + destlen - 3; /* two-character token, ending NUL */ PERL_ARGS_ASSERT_SCAN_IDENT; @@ -9049,19 +9093,23 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in * the parse starting at 's', based on the subset that are valid in this * context input to this routine in 'valid_flags'. Advances s. Returns - * TRUE if the input was a valid flag, so the next char may be as well; - * otherwise FALSE. 'charset' should point to a NUL upon first call on the - * current regex. This routine will set it to any charset modifier found. - * The caller shouldn't change it. This way, another charset modifier - * encountered in the parse can be detected as an error, as we have decided - * to allow only one */ + * TRUE if the input should be treated as a valid flag, so the next char + * may be as well; otherwise FALSE. 'charset' should point to a NUL upon + * first call on the current regex. This routine will set it to any + * charset modifier found. The caller shouldn't change it. This way, + * another charset modifier encountered in the parse can be detected as an + * error, as we have decided to allow only one */ const char c = **s; - - if (! strchr(valid_flags, c)) { - if (isALNUM(c)) { - yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c)); - (*s)++; + STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; + + if ( charlen != 1 || ! strchr(valid_flags, c) ) { + if (isALNUM_lazy_if(*s, UTF)) { + yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), + UTF ? SVf_UTF8 : 0); + (*s) += charlen; + /* Pretend that it worked, so will continue processing before + * dieing */ return TRUE; } return FALSE; @@ -9140,7 +9188,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE); + char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing); const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9150,6 +9198,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; + /* this was only needed for the initial scan_str; set it to false + * so that any (?{}) code blocks etc are parsed normally */ + PL_reg_state.re_reparsing = FALSE; if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -9190,8 +9241,10 @@ S_scan_pat(pTHX_ char *start, I32 type) * anon CV. False positives like qr/[(?{]/ are harmless */ if (type == OP_QR) { - char *p; - for (p = start; p < s; p++) { + STRLEN len; + char *e, *p = SvPV(PL_lex_stuff, len); + e = p + len; + for (; p < e; p++) { if (p[0] == '(' && p[1] == '?' && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) { @@ -9199,6 +9252,7 @@ S_scan_pat(pTHX_ char *start, I32 type) break; } } + pm->op_pmflags |= PMf_IS_QR; } while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; @@ -9225,7 +9279,7 @@ S_scan_subst(pTHX_ char *start) { dVAR; char *s; - register PMOP *pm; + PMOP *pm; I32 first_start; I32 es = 0; char charset = '\0'; /* character set modifier */ @@ -9237,7 +9291,7 @@ S_scan_subst(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Substitution pattern not terminated"); @@ -9255,7 +9309,7 @@ S_scan_subst(pTHX_ char *start) #endif first_start = PL_multi_start; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9302,8 +9356,6 @@ S_scan_subst(pTHX_ char *start) if (es) { SV * const repl = newSVpvs(""); - PL_sublex_info.super_bufptr = s; - PL_sublex_info.super_bufend = PL_bufend; PL_multi_end = 0; pm->op_pmflags |= PMf_EVAL; while (es-- > 0) { @@ -9313,13 +9365,11 @@ S_scan_subst(pTHX_ char *start) sv_catpvs(repl, "do "); } sv_catpvs(repl, "{"); - sv_catsv(repl, PL_lex_repl); - if (strchr(SvPVX(PL_lex_repl), '#')) - sv_catpvs(repl, "\n"); + sv_catsv(repl, PL_sublex_info.repl); sv_catpvs(repl, "}"); SvEVALED_on(repl); - SvREFCNT_dec(PL_lex_repl); - PL_lex_repl = repl; + SvREFCNT_dec(PL_sublex_info.repl); + PL_sublex_info.repl = repl; } PL_lex_op = (OP*)pm; @@ -9331,7 +9381,7 @@ STATIC char * S_scan_trans(pTHX_ char *start) { dVAR; - register char* s; + char* s; OP *o; U8 squash; U8 del; @@ -9345,7 +9395,7 @@ S_scan_trans(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Transliteration pattern not terminated"); @@ -9361,7 +9411,7 @@ S_scan_trans(pTHX_ char *start) } #endif - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9404,7 +9454,7 @@ S_scan_trans(pTHX_ char *start) o->op_private &= ~OPpTRANS_ALL; o->op_private |= del|squash|complement| (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| - (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); + (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); PL_lex_op = o; pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; @@ -9421,21 +9471,43 @@ S_scan_trans(pTHX_ char *start) return s; } +/* scan_heredoc + Takes a pointer to the first < in <filtered) - && !(PL_lex_inwhat == OP_SCALAR); + const bool infile = PL_rsfp || PL_parser->filtered; + LEXSHARED *shared = PL_parser->lex_shared; #ifdef PERL_MAD I32 stuffstart = s - SvPVX(PL_linestr); char *tstart; @@ -9446,10 +9518,9 @@ S_scan_heredoc(pTHX_ register char *s) PERL_ARGS_ASSERT_SCAN_HEREDOC; s += 2; - d = PL_tokenbuf; + d = PL_tokenbuf + 1; e = PL_tokenbuf + sizeof PL_tokenbuf - 1; - if (!outer) - *d++ = '\n'; + *PL_tokenbuf = '\n'; peek = s; while (SPACE_OR_TAB(*peek)) peek++; @@ -9457,12 +9528,14 @@ S_scan_heredoc(pTHX_ register char *s) s = peek; term = *s++; s = delimcpy(d, e, s, PL_bufend, term, &len); + if (s == PL_bufend) + Perl_croak(aTHX_ "Unterminated delimiter for here document"); d += len; - if (s < PL_bufend) - s++; + s++; } else { if (*s == '\\') + /* <<\FOO is equivalent to <<'FOO' */ s++, term = '\''; else term = '"'; @@ -9481,8 +9554,8 @@ S_scan_heredoc(pTHX_ register char *s) #ifdef PERL_MAD if (PL_madskills) { - tstart = PL_tokenbuf + !outer; - PL_thisclose = newSVpvn(tstart, len - !outer); + tstart = PL_tokenbuf + 1; + PL_thisclose = newSVpvn(tstart, len - 1); tstart = SvPVX(PL_linestr) + stuffstart; PL_thisopen = newSVpvn(tstart, s - tstart); stuffstart = s - SvPVX(PL_linestr); @@ -9513,20 +9586,6 @@ S_scan_heredoc(pTHX_ register char *s) } #endif #ifdef PERL_MAD - found_newline = 0; -#endif - if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) { - herewas = newSVpvn(s,PL_bufend-s); - } - else { -#ifdef PERL_MAD - herewas = newSVpvn(s-1,found_newline-s+1); -#else - s--; - herewas = newSVpvn(s,found_newline-s); -#endif - } -#ifdef PERL_MAD if (PL_madskills) { tstart = SvPVX(PL_linestr) + stuffstart; if (PL_thisstuff) @@ -9534,14 +9593,8 @@ S_scan_heredoc(pTHX_ register char *s) else PL_thisstuff = newSVpvn(tstart, s - tstart); } -#endif - s += SvCUR(herewas); -#ifdef PERL_MAD stuffstart = s - SvPVX(PL_linestr); - - if (found_newline) - s--; #endif tmpstr = newSV_type(SVt_PVIV); @@ -9555,47 +9608,57 @@ S_scan_heredoc(pTHX_ register char *s) SvIV_set(tmpstr, '\\'); } - CLINE; - PL_multi_start = CopLINE(PL_curcop); + PL_multi_start = CopLINE(PL_curcop) + 1; PL_multi_open = PL_multi_close = '<'; - term = *PL_tokenbuf; - if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp - && !PL_parser->filtered) { - char * const bufptr = PL_sublex_info.super_bufptr; - char * const bufend = PL_sublex_info.super_bufend; - char * const olds = s - SvCUR(herewas); - s = strchr(bufptr, '\n'); - if (!s) - s = bufend; + /* inside a string eval or quote-like operator */ + if (!infile || PL_lex_inwhat) { + SV *linestr; + char *bufend; + char * const olds = s; + PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; + /* These two fields are not set until an inner lexing scope is + entered. But we need them set here. */ + shared->ls_bufptr = s; + shared->ls_linestr = PL_linestr; + if (PL_lex_inwhat) + /* Look for a newline. If the current buffer does not have one, + peek into the line buffer of the parent lexing scope, going + up as many levels as necessary to find one with a newline + after bufptr. + */ + while (!(s = (char *)memchr( + (void *)shared->ls_bufptr, '\n', + SvEND(shared->ls_linestr)-shared->ls_bufptr + ))) { + shared = shared->ls_prev; + /* shared is only null if we have gone beyond the outermost + lexing scope. In a file, we will have broken out of the + loop in the previous iteration. In an eval, the string buf- + fer ends with "\n;", so the while condition below will have + evaluated to false. So shared can never be null. */ + assert(shared); + /* A LEXSHARED struct with a null ls_prev pointer is the outer- + most lexing scope. In a file, shared->ls_linestr at that + level is just one line, so there is no body to steal. */ + if (infile && !shared->ls_prev) { + s = olds; + goto streaming; + } + } + else { /* eval */ + s = (char*)memchr((void*)s, '\n', PL_bufend - s); + assert(s); + } + linestr = shared->ls_linestr; + bufend = SvEND(linestr); d = s; while (s < bufend && - (*s != term || memNE(s,PL_tokenbuf,len)) ) { + (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) { if (*s++ == '\n') - CopLINE_inc(PL_curcop); + ++shared->herelines; } if (s >= bufend) { - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - missingterm(PL_tokenbuf); - } - sv_setpvn(herewas,bufptr,d-bufptr+1); - sv_setpvn(tmpstr,d+1,s-d); - s += len - 1; - sv_catpvn(herewas,s,bufend-s); - Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char); - - s = olds; - goto retval; - } - else if (!outer) { - d = s; - while (s < PL_bufend && - (*s != term || memNE(s,PL_tokenbuf,len)) ) { - if (*s++ == '\n') - CopLINE_inc(PL_curcop); - } - if (s >= PL_bufend) { - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - missingterm(PL_tokenbuf); + goto interminable; } sv_setpvn(tmpstr,d+1,s-d); #ifdef PERL_MAD @@ -9608,17 +9671,52 @@ S_scan_heredoc(pTHX_ register char *s) } #endif s += len - 1; - CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */ + /* the preceding stmt passes a newline */ + shared->herelines++; - sv_catpvn(herewas,s,PL_bufend-s); - sv_setsv(PL_linestr,herewas); - PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; + /* s now points to the newline after the heredoc terminator. + d points to the newline before the body of the heredoc. + */ + + /* We are going to modify linestr in place here, so set + aside copies of the string if necessary for re-evals or + (caller $n)[6]. */ + /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we + check shared->re_eval_str. */ + if (shared->re_eval_start || shared->re_eval_str) { + /* Set aside the rest of the regexp */ + if (!shared->re_eval_str) + shared->re_eval_str = + newSVpvn(shared->re_eval_start, + bufend - shared->re_eval_start); + shared->re_eval_start -= s-d; + } + if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL + && cx->blk_eval.cur_text == linestr) { + cx->blk_eval.cur_text = newSVsv(linestr); + SvSCREAM_on(cx->blk_eval.cur_text); + } + /* Copy everything from s onwards back to d. */ + Move(s,d,bufend-s + 1,char); + SvCUR_set(linestr, SvCUR(linestr) - (s-d)); + /* Setting PL_bufend only applies when we have not dug deeper + into other scopes, because sublex_done sets PL_bufend to + SvEND(PL_linestr). */ + if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); + s = olds; } else - sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ - while (s >= PL_bufend) { /* multiple line string? */ + { + SV *linestr_save; + streaming: + sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ + term = PL_tokenbuf[1]; + len--; + linestr_save = PL_linestr; /* must restore this afterwards */ + d = s; /* and this */ + PL_linestr = newSVpvs(""); + PL_bufend = SvPVX(PL_linestr); + while (1) { #ifdef PERL_MAD if (PL_madskills) { tstart = SvPVX(PL_linestr) + stuffstart; @@ -9628,19 +9726,24 @@ S_scan_heredoc(pTHX_ register char *s) PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); } #endif - PL_bufptr = s; - CopLINE_inc(PL_curcop); - if (!outer || !lex_next_chunk(0)) { - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - missingterm(PL_tokenbuf); + PL_bufptr = PL_bufend; + CopLINE_set(PL_curcop, + PL_multi_start + shared->herelines); + if (!lex_next_chunk(LEX_NO_TERM) + && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { + SvREFCNT_dec(linestr_save); + goto interminable; + } + CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); + if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { + lex_grow_linestr(SvCUR(PL_linestr) + 2); + sv_catpvs(PL_linestr, "\n\0"); } - CopLINE_dec(PL_curcop); s = PL_bufptr; #ifdef PERL_MAD stuffstart = s - SvPVX(PL_linestr); #endif - CopLINE_inc(PL_curcop); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + shared->herelines++; PL_last_lop = PL_last_uni = NULL; #ifndef PERL_STRICT_CR if (PL_bufend - PL_linestart >= 2) { @@ -9657,26 +9760,23 @@ S_scan_heredoc(pTHX_ register char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (*s == term && memEQ(s,PL_tokenbuf,len)) { - STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr); - *(SvPVX(PL_linestr) + off ) = ' '; - lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1); - sv_catsv(PL_linestr,herewas); + if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { + SvREFCNT_dec(PL_linestr); + PL_linestr = linestr_save; + PL_linestart = SvPVX(linestr_save); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */ + s = d; + break; } else { - s = PL_bufend; sv_catsv(tmpstr,PL_linestr); } + } } - s++; -retval: PL_multi_end = CopLINE(PL_curcop); if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { SvPV_shrink_to_cur(tmpstr); } - SvREFCNT_dec(herewas); if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) SvUTF8_on(tmpstr); @@ -9686,6 +9786,11 @@ retval: PL_lex_stuff = tmpstr; pl_yylval.ival = op_type; return s; + + interminable: + SvREFCNT_dec(tmpstr); + CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); + missingterm(PL_tokenbuf + 1); } /* scan_inputsymbol @@ -9708,7 +9813,7 @@ STATIC char * S_scan_inputsymbol(pTHX_ char *start) { dVAR; - register char *s = start; /* current position in buffer */ + char *s = start; /* current position in buffer */ char *end; I32 len; char *d = PL_tokenbuf; /* start of temp holding space */ @@ -9753,7 +9858,7 @@ S_scan_inputsymbol(pTHX_ char *start) if (d - PL_tokenbuf != len) { pl_yylval.ival = OP_GLOB; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Glob not terminated"); return s; @@ -9853,6 +9958,8 @@ intro_sym: takes: start position in buffer keep_quoted preserve \ on the embedded delimiter(s) keep_delims preserve the delimiters around the string + re_reparse compiling a run-time /(?{})/: + collapse // to /, and skip encoding src returns: position to continue reading from buffer side-effects: multi_start, multi_close, lex_repl or lex_stuff, and updates the read buffer. @@ -9893,14 +10000,14 @@ intro_sym: */ STATIC char * -S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) +S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) { dVAR; SV *sv; /* scalar value: string */ const char *tmps; /* temp string, used for delimiter matching */ - register char *s = start; /* current position in the buffer */ - register char term; /* terminating character */ - register char *to; /* current position in the sv's data */ + char *s = start; /* current position in the buffer */ + char term; /* terminating character */ + char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ I32 termcode; /* terminating char. code */ @@ -9972,7 +10079,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif for (;;) { - if (PL_encoding && !UTF) { + if (PL_encoding && !UTF && !re_reparse) { bool cont = TRUE; while (cont) { @@ -9984,7 +10091,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) for (; s < ns; s++) { if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; } if (!found) goto read_more_line; @@ -10051,12 +10158,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the current line number */ if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; /* handle quoted delimiters */ if (*s == '\\' && s+1 < PL_bufend && term != '\\') { - if (!keep_quoted && s[1] == term) + if (!keep_quoted + && (s[1] == term + || (re_reparse && s[1] == '\\')) + ) s++; - /* any other quotes are simply copied straight through */ + /* any other quotes are simply copied straight through */ else *to++ = *s++; } @@ -10083,7 +10193,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the line count */ if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; /* backslashes can escape the open or closing characters */ if (*s == '\\' && s+1 < PL_bufend) { if (!keep_quoted && @@ -10142,7 +10252,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); } #endif - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; PL_bufptr = PL_bufend; if (!lex_next_chunk(0)) { sv_free(sv); @@ -10157,7 +10267,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (!PL_encoding || UTF) { + if (!PL_encoding || UTF || re_reparse) { #ifdef PERL_MAD if (PL_madskills) { char * const tstart = SvPVX(PL_linestr) + stuffstart; @@ -10189,7 +10299,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } } #endif - if (has_utf8 || PL_encoding) + if (has_utf8 || (PL_encoding && !re_reparse)) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); @@ -10205,7 +10315,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) */ if (PL_lex_stuff) - PL_lex_repl = sv; + PL_sublex_info.repl = sv; else PL_lex_stuff = sv; return s; @@ -10237,9 +10347,9 @@ char * Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) { dVAR; - register const char *s = start; /* current position in buffer */ - register char *d; /* destination in temp buffer */ - register char *e; /* end of temp buffer */ + const char *s = start; /* current position in buffer */ + char *d; /* destination in temp buffer */ + char *e; /* end of temp buffer */ NV nv; /* number read, as a double */ SV *sv = NULL; /* place to put the converted number */ bool floatit; /* boolean: int or float? */ @@ -10610,8 +10720,8 @@ STATIC char * S_scan_formline(pTHX_ register char *s) { dVAR; - register char *eol; - register char *t; + char *eol; + char *t; SV * const stuff = newSVpvs(""); bool needargs = FALSE; bool eofmt = FALSE; @@ -10642,13 +10752,9 @@ S_scan_formline(pTHX_ register char *s) break; } } - if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) { - eol = (char *) memchr(s,'\n',PL_bufend-s); - if (!eol++) + eol = (char *) memchr(s,'\n',PL_bufend-s); + if (!eol++) eol = PL_bufend; - } - else - eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); if (*s != '#') { for (t = s; t < eol; t++) { if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { @@ -10673,7 +10779,8 @@ S_scan_formline(pTHX_ register char *s) break; } s = (char*)eol; - if (PL_rsfp || PL_parser->filtered) { + if ((PL_rsfp || PL_parser->filtered) + && PL_parser->form_lex_state == LEX_NORMAL) { bool got_some; #ifdef PERL_MAD if (PL_madskills) { @@ -10684,7 +10791,7 @@ S_scan_formline(pTHX_ register char *s) } #endif PL_bufptr = PL_bufend; - CopLINE_inc(PL_curcop); + COPLINE_INC_WITH_HERELINES; got_some = lex_next_chunk(0); CopLINE_dec(PL_curcop); s = PL_bufptr; @@ -10697,16 +10804,15 @@ S_scan_formline(pTHX_ register char *s) incline(s); } enough: + if (!SvCUR(stuff) || needargs) + PL_lex_state = PL_parser->form_lex_state; if (SvCUR(stuff)) { - PL_expect = XTERM; + PL_expect = XSTATE; if (needargs) { - PL_lex_state = LEX_NORMAL; start_force(PL_curforce); NEXTVAL_NEXTTOKE.ival = 0; - force_next(','); + force_next(FORMLBRACK); } - else - PL_lex_state = LEX_FORMLINE; if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) SvUTF8_on(stuff); @@ -10716,15 +10822,11 @@ S_scan_formline(pTHX_ register char *s) start_force(PL_curforce); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); - start_force(PL_curforce); - NEXTVAL_NEXTTOKE.ival = OP_FORMLINE; - force_next(LSTOP); } else { SvREFCNT_dec(stuff); if (eofmt) PL_lex_formbrack = 0; - PL_bufptr = s; } #ifdef PERL_MAD if (PL_madskills) { @@ -10745,9 +10847,6 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) const I32 oldsavestack_ix = PL_savestack_ix; CV* const outsidecv = PL_compcv; - if (PL_compcv) { - assert(SvTYPE(PL_compcv) == SVt_PVCV); - } SAVEI32(PL_subline); save_item(PL_subname); SAVESPTR(PL_compcv); @@ -10759,6 +10858,8 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; + if (outsidecv && CvPADLIST(outsidecv)) + CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; return oldsavestack_ix; }