X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/74e8ce349633219f5a1aba2c2aaa959675e24299..74401f9c0e7d7248493d3f2bb0f4fdb0b20a92aa:/toke.c diff --git a/toke.c b/toke.c index 832b9e9..b6f9cc9 100644 --- a/toke.c +++ b/toke.c @@ -124,8 +124,9 @@ static const char ident_too_long[] = "Identifier too long"; # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] #endif -#define XFAKEBRACK 128 -#define XENUMMASK 127 +#define XENUMMASK 0x3f +#define XFAKEEOF 0x40 +#define XFAKEBRACK 0x80 #ifdef USE_UTF8_SCRIPTS # define UTF (!IN_BYTES) @@ -643,29 +644,39 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) } #endif +/* +=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags + +Creates and initialises a new lexer/parser state object, supplying +a context in which to lex and parse from a new source of Perl code. +A pointer to the new state object is placed in L. An entry +is made on the save stack so that upon unwinding the new state object +will be destroyed and the former value of L will be restored. +Nothing else need be done to clean up the parsing context. + +The code to be parsed comes from I and I. I, if +non-null, provides a string (in SV form) containing code to be parsed. +A copy of the string is made, so subsequent modification of I +does not affect parsing. I, if non-null, provides an input stream +from which code will be read to be parsed. If both are non-null, the +code in I comes first and must consist of complete lines of input, +and I supplies the remainder of the source. +The I parameter is reserved for future use, and must always +be zero. -/* - * Perl_lex_start - * - * Create a parser object and initialise its parser and lexer fields - * - * rsfp is the opened file handle to read from (if any), - * - * line holds any initial content already read from the file (or in - * the case of no file, such as an eval, the whole contents); - * - * new_filter indicates that this is a new file and it shouldn't inherit - * the filters from the current parser (ie require). - */ +=cut +*/ void -Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) +Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) { dVAR; const char *s = NULL; STRLEN len; yy_parser *parser, *oparser; + if (flags) + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); /* create and initialise a parser */ @@ -693,8 +704,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) parser->lex_state = LEX_NORMAL; parser->expect = XSTATE; parser->rsfp = rsfp; - parser->rsfp_filters = (new_filter || !oparser) ? newAV() - : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters)); + parser->rsfp_filters = newAV(); Newx(parser->lex_brackstack, 120, char); Newx(parser->lex_casestack, 12, char); @@ -708,15 +718,10 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) if (!len) { parser->linestr = newSVpvs("\n;"); - } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) { - /* avoid tie/overload weirdness */ + } else { parser->linestr = newSVpvn_flags(s, len, SvUTF8(line)); if (s[len-1] != ';') sv_catpvs(parser->linestr, "\n;"); - } else { - SvTEMP_off(line); - SvREFCNT_inc_simple_void_NN(line); - parser->linestr = line; } parser->oldoldbufptr = parser->oldbufptr = @@ -724,6 +729,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) parser->linestart = SvPVX(parser->linestr); parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; + + parser->in_pod = 0; } @@ -752,19 +759,6 @@ Perl_parser_free(pTHX_ const yy_parser *parser) /* - * Perl_lex_end - * Finalizer for lexing operations. Must be called when the parser is - * done with the lexer. - */ - -void -Perl_lex_end(pTHX) -{ - dVAR; - PL_doextract = FALSE; -} - -/* =for apidoc AmxU|SV *|PL_parser-Elinestr Buffer scalar containing the chunk currently under consideration of the @@ -1271,7 +1265,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags) else if (PL_parser->rsfp) (void)PerlIO_close(PL_parser->rsfp); PL_parser->rsfp = NULL; - PL_doextract = FALSE; + PL_parser->in_pod = 0; #ifdef PERL_MAD if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n)) PL_faketokens = 1; @@ -1954,10 +1948,17 @@ S_force_next(pTHX_ I32 type) void Perl_yyunlex(pTHX) { - if (PL_parser->yychar != YYEMPTY) { - start_force(-1); - NEXTVAL_NEXTTOKE = PL_parser->yylval; - force_next(PL_parser->yychar); + int yyc = PL_parser->yychar; + if (yyc != YYEMPTY) { + if (yyc) { + start_force(-1); + NEXTVAL_NEXTTOKE = PL_parser->yylval; + if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { + PL_lex_brackets--; + yyc |= (1<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); + } + force_next(yyc); + } PL_parser->yychar = YYEMPTY; } } @@ -2252,7 +2253,8 @@ S_tokeq(pTHX_ SV *sv) if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) goto finish; send = s + len; - while (s < send && *s != '\\') + /* This is relying on the SV being "well formed" with a trailing '\0' */ + while (s < send && !(*s == '\\' && s[1] == '\\')) s++; if (s == send) goto finish; @@ -3952,7 +3954,7 @@ S_readpipe_override(pTHX) && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))) { PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, - append_elem(OP_LIST, + op_append_elem(OP_LIST, newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */ newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe)))); } @@ -4279,12 +4281,26 @@ Perl_yylex(pTHX) PL_lex_defer = LEX_NORMAL; } #endif + { + I32 next_type; +#ifdef PERL_MAD + next_type = PL_nexttoke[PL_lasttoke].next_type; +#else + next_type = PL_nexttype[PL_nexttoke]; +#endif + if (next_type & (1<<24)) { + if (PL_lex_brackets > 100) + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); + PL_lex_brackstack[PL_lex_brackets++] = (next_type >> 16) & 0xff; + next_type &= 0xffff; + } #ifdef PERL_MAD - /* FIXME - can these be merged? */ - return(PL_nexttoke[PL_lasttoke].next_type); + /* FIXME - can these be merged? */ + return next_type; #else - return REPORT(PL_nexttype[PL_nexttoke]); + return REPORT(next_type); #endif + } /* interpolated case modifiers like \L \U, including \Q and \E. when we get here, PL_bufptr is at the \ @@ -4574,7 +4590,8 @@ Perl_yylex(pTHX) if (!PL_rsfp) { PL_last_uni = 0; PL_last_lop = 0; - if (PL_lex_brackets) { + if (PL_lex_brackets && + PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) { yyerror((const char *) (PL_lex_formbrack ? "Format not terminated" @@ -4697,7 +4714,7 @@ Perl_yylex(pTHX) s = swallow_bom((U8*)s); } } - if (PL_doextract) { + if (PL_parser->in_pod) { /* Incest with pod. */ #ifdef PERL_MAD if (PL_madskills) @@ -4708,12 +4725,12 @@ Perl_yylex(pTHX) PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; - PL_doextract = FALSE; + PL_parser->in_pod = 0; } } if (PL_rsfp) incline(s); - } while (PL_doextract); + } while (PL_parser->in_pod); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; @@ -5163,7 +5180,9 @@ Perl_yylex(pTHX) s++; BOop(OP_BIT_XOR); case '[': - PL_lex_brackets++; + if (PL_lex_brackets > 100) + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); + PL_lex_brackstack[PL_lex_brackets++] = 0; { const char tmp = *s++; OPERATOR(tmp); @@ -5247,7 +5266,7 @@ Perl_yylex(pTHX) } if (PL_lex_stuff) { sv_catsv(sv, PL_lex_stuff); - attrs = append_elem(OP_LIST, attrs, + attrs = op_append_elem(OP_LIST, attrs, newSVOP(OP_CONST, 0, sv)); SvREFCNT_dec(PL_lex_stuff); PL_lex_stuff = NULL; @@ -5287,7 +5306,7 @@ Perl_yylex(pTHX) justified by the performance win for the common case of applying only built-in attributes.) */ else - attrs = append_elem(OP_LIST, attrs, + attrs = op_append_elem(OP_LIST, attrs, newSVOP(OP_CONST, 0, sv)); } @@ -5363,6 +5382,8 @@ Perl_yylex(pTHX) TERM(tmp); } case ']': + if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) + TOKEN(0); s++; if (PL_lex_brackets <= 0) yyerror("Unmatched right square bracket"); @@ -5540,6 +5561,8 @@ Perl_yylex(pTHX) PL_copline = NOLINE; /* invalidate current command line number */ TOKEN('{'); case '}': + if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) + TOKEN(0); rightbracket: s++; if (PL_lex_brackets <= 0) @@ -5662,7 +5685,7 @@ Perl_yylex(pTHX) } #endif s = PL_bufend; - PL_doextract = TRUE; + PL_parser->in_pod = 1; goto retry; } } @@ -6340,29 +6363,12 @@ Perl_yylex(pTHX) if (len) goto safe_bareword; - cv = NULL; { OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv)); const_op->op_private = OPpCONST_BARE; rv2cv_op = newCVREF(0, const_op); } - if (rv2cv_op->op_type == OP_RV2CV && - (rv2cv_op->op_flags & OPf_KIDS)) { - OP *rv_op = cUNOPx(rv2cv_op)->op_first; - switch (rv_op->op_type) { - case OP_CONST: { - SV *sv = cSVOPx_sv(rv_op); - if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) - cv = (CV*)SvRV(sv); - } break; - case OP_GV: { - GV *gv = cGVOPx_gv(rv_op); - CV *maybe_cv = GvCVu(gv); - if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV) - cv = maybe_cv; - } break; - } - } + cv = rv2cv_op_cv(rv2cv_op, 0); /* See if it's the indirect object for a list operator. */ @@ -6518,7 +6524,7 @@ Perl_yylex(pTHX) ( ( *proto == '$' || *proto == '_' - || *proto == '*' + || *proto == '*' || *proto == '+' ) && proto[1] == '\0' ) @@ -7380,7 +7386,7 @@ Perl_yylex(pTHX) /**/; } sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); - words = append_elem(OP_LIST, words, + words = op_append_elem(OP_LIST, words, newSVOP(OP_CONST, 0, tokeq(sv))); } } @@ -7730,7 +7736,7 @@ Perl_yylex(pTHX) if (warnillegalproto) { if (must_be_last) proto_after_greedy_proto = TRUE; - if (!strchr("$@%*;[]&\\_", *p)) { + if (!strchr("$@%*;[]&\\_+", *p)) { bad_proto = TRUE; } else { @@ -12547,7 +12553,7 @@ S_scan_inputsymbol(pTHX_ char *start) o->op_targ = tmp; PL_lex_op = readline_overriden ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, - append_elem(OP_LIST, o, + op_append_elem(OP_LIST, o, newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) : (OP*)newUNOP(OP_READLINE, 0, o); } @@ -12563,7 +12569,7 @@ intro_sym: SVt_PV); PL_lex_op = readline_overriden ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, - append_elem(OP_LIST, + op_append_elem(OP_LIST, newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) : (OP*)newUNOP(OP_READLINE, 0, @@ -12582,7 +12588,7 @@ intro_sym: GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO); PL_lex_op = readline_overriden ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, - append_elem(OP_LIST, + op_append_elem(OP_LIST, newGVOP(OP_GV, 0, gv), newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); @@ -13967,6 +13973,68 @@ Perl_keyword_plugin_standard(pTHX_ return KEYWORD_PLUGIN_DECLINE; } +#define parse_recdescent(g) S_parse_recdescent(aTHX_ g) +static void +S_parse_recdescent(pTHX_ int gramtype) +{ + SAVEI32(PL_lex_brackets); + if (PL_lex_brackets > 100) + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); + PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; + if(yyparse(gramtype) && !PL_parser->error_count) + qerror(Perl_mess(aTHX_ "Parse error")); +} + +#define parse_recdescent_for_op(g) S_parse_recdescent_for_op(aTHX_ g) +static OP * +S_parse_recdescent_for_op(pTHX_ int gramtype) +{ + OP *o; + ENTER; + SAVEVPTR(PL_eval_root); + PL_eval_root = NULL; + parse_recdescent(gramtype); + o = PL_eval_root; + LEAVE; + return o; +} + +/* +=for apidoc Amx|OP *|parse_block|U32 flags + +Parse a single complete Perl code block. This consists of an opening +brace, a sequence of statements, and a closing brace. The block +constitutes a lexical scope, so C variables and various compile-time +effects can be contained within it. It is up to the caller to ensure +that the dynamic parser state (L et al) is correctly set to +reflect the source of the code to be parsed and the lexical context for +the statement. + +The op tree representing the code block is returned. This is always a +real op, never a null pointer. It will normally be a C list, +including C or equivalent ops. No ops to construct any kind +of runtime scope are included by virtue of it being a block. + +If an error occurs in parsing or compilation, in most cases a valid op +tree (most likely null) is returned anyway. The error is reflected in +the parser state, normally resulting in a single exception at the top +level of parsing which covers all the compilation errors that occurred. +Some compilation errors, however, will throw an exception immediately. + +The I parameter is reserved for future use, and must always +be zero. + +=cut +*/ + +OP * +Perl_parse_block(pTHX_ U32 flags) +{ + if (flags) + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); + return parse_recdescent_for_op(GRAMBLOCK); +} + /* =for apidoc Amx|OP *|parse_fullstmt|U32 flags @@ -13997,17 +14065,9 @@ be zero. OP * Perl_parse_fullstmt(pTHX_ U32 flags) { - OP *fullstmtop; if (flags) Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); - ENTER; - SAVEVPTR(PL_eval_root); - PL_eval_root = NULL; - if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count) - qerror(Perl_mess(aTHX_ "Parse error")); - fullstmtop = PL_eval_root; - LEAVE; - return fullstmtop; + return parse_recdescent_for_op(GRAMFULLSTMT); } /* @@ -14044,15 +14104,13 @@ OP * Perl_parse_stmtseq(pTHX_ U32 flags) { OP *stmtseqop; + I32 c; if (flags) Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); - ENTER; - SAVEVPTR(PL_eval_root); - PL_eval_root = NULL; - if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count) + stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ); + c = lex_peek_unichar(0); + if (c != -1 && c != /*{*/'}') qerror(Perl_mess(aTHX_ "Parse error")); - stmtseqop = PL_eval_root; - LEAVE; return stmtseqop; }