# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
#endif
-#define XFAKEBRACK 128
-#define XENUMMASK 127
+#define XENUMMASK 0x3f
+#define XFAKEEOF 0x40
+#define XFAKEBRACK 0x80
#ifdef USE_UTF8_SCRIPTS
# define UTF (!IN_BYTES)
}
#endif
+/*
+=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
+
+Creates and initialises a new lexer/parser state object, supplying
+a context in which to lex and parse from a new source of Perl code.
+A pointer to the new state object is placed in L</PL_parser>. An entry
+is made on the save stack so that upon unwinding the new state object
+will be destroyed and the former value of L</PL_parser> will be restored.
+Nothing else need be done to clean up the parsing context.
+
+The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
+non-null, provides a string (in SV form) containing code to be parsed.
+A copy of the string is made, so subsequent modification of I<line>
+does not affect parsing. I<rsfp>, if non-null, provides an input stream
+from which code will be read to be parsed. If both are non-null, the
+code in I<line> comes first and must consist of complete lines of input,
+and I<rsfp> supplies the remainder of the source.
+The I<flags> parameter is reserved for future use, and must always
+be zero.
-/*
- * Perl_lex_start
- *
- * Create a parser object and initialise its parser and lexer fields
- *
- * rsfp is the opened file handle to read from (if any),
- *
- * line holds any initial content already read from the file (or in
- * the case of no file, such as an eval, the whole contents);
- *
- * new_filter indicates that this is a new file and it shouldn't inherit
- * the filters from the current parser (ie require).
- */
+=cut
+*/
void
-Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
+Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
dVAR;
const char *s = NULL;
STRLEN len;
yy_parser *parser, *oparser;
+ if (flags)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->rsfp_filters = (new_filter || !oparser) ? newAV()
- : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
+ parser->rsfp_filters = newAV();
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
if (!len) {
parser->linestr = newSVpvs("\n;");
- } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
- /* avoid tie/overload weirdness */
+ } else {
parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
if (s[len-1] != ';')
sv_catpvs(parser->linestr, "\n;");
- } else {
- SvTEMP_off(line);
- SvREFCNT_inc_simple_void_NN(line);
- parser->linestr = line;
}
parser->oldoldbufptr =
parser->oldbufptr =
parser->linestart = SvPVX(parser->linestr);
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
+
+ parser->in_pod = 0;
}
/*
- * Perl_lex_end
- * Finalizer for lexing operations. Must be called when the parser is
- * done with the lexer.
- */
-
-void
-Perl_lex_end(pTHX)
-{
- dVAR;
- PL_doextract = FALSE;
-}
-
-/*
=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
Buffer scalar containing the chunk currently under consideration of the
else if (PL_parser->rsfp)
(void)PerlIO_close(PL_parser->rsfp);
PL_parser->rsfp = NULL;
- PL_doextract = FALSE;
+ PL_parser->in_pod = 0;
#ifdef PERL_MAD
if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
PL_faketokens = 1;
void
Perl_yyunlex(pTHX)
{
- if (PL_parser->yychar != YYEMPTY) {
- start_force(-1);
- NEXTVAL_NEXTTOKE = PL_parser->yylval;
- force_next(PL_parser->yychar);
+ int yyc = PL_parser->yychar;
+ if (yyc != YYEMPTY) {
+ if (yyc) {
+ start_force(-1);
+ NEXTVAL_NEXTTOKE = PL_parser->yylval;
+ if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
+ PL_lex_brackets--;
+ yyc |= (1<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
+ }
+ force_next(yyc);
+ }
PL_parser->yychar = YYEMPTY;
}
}
if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
goto finish;
send = s + len;
- while (s < send && *s != '\\')
+ /* This is relying on the SV being "well formed" with a trailing '\0' */
+ while (s < send && !(*s == '\\' && s[1] == '\\'))
s++;
if (s == send)
goto finish;
&& GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
{
PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
- append_elem(OP_LIST,
+ op_append_elem(OP_LIST,
newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
}
PL_lex_defer = LEX_NORMAL;
}
#endif
+ {
+ I32 next_type;
+#ifdef PERL_MAD
+ next_type = PL_nexttoke[PL_lasttoke].next_type;
+#else
+ next_type = PL_nexttype[PL_nexttoke];
+#endif
+ if (next_type & (1<<24)) {
+ if (PL_lex_brackets > 100)
+ Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
+ PL_lex_brackstack[PL_lex_brackets++] = (next_type >> 16) & 0xff;
+ next_type &= 0xffff;
+ }
#ifdef PERL_MAD
- /* FIXME - can these be merged? */
- return(PL_nexttoke[PL_lasttoke].next_type);
+ /* FIXME - can these be merged? */
+ return next_type;
#else
- return REPORT(PL_nexttype[PL_nexttoke]);
+ return REPORT(next_type);
#endif
+ }
/* interpolated case modifiers like \L \U, including \Q and \E.
when we get here, PL_bufptr is at the \
if (!PL_rsfp) {
PL_last_uni = 0;
PL_last_lop = 0;
- if (PL_lex_brackets) {
+ if (PL_lex_brackets &&
+ PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
yyerror((const char *)
(PL_lex_formbrack
? "Format not terminated"
s = swallow_bom((U8*)s);
}
}
- if (PL_doextract) {
+ if (PL_parser->in_pod) {
/* Incest with pod. */
#ifdef PERL_MAD
if (PL_madskills)
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
- PL_doextract = FALSE;
+ PL_parser->in_pod = 0;
}
}
if (PL_rsfp)
incline(s);
- } while (PL_doextract);
+ } while (PL_parser->in_pod);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
s++;
BOop(OP_BIT_XOR);
case '[':
- PL_lex_brackets++;
+ if (PL_lex_brackets > 100)
+ Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
+ PL_lex_brackstack[PL_lex_brackets++] = 0;
{
const char tmp = *s++;
OPERATOR(tmp);
}
if (PL_lex_stuff) {
sv_catsv(sv, PL_lex_stuff);
- attrs = append_elem(OP_LIST, attrs,
+ attrs = op_append_elem(OP_LIST, attrs,
newSVOP(OP_CONST, 0, sv));
SvREFCNT_dec(PL_lex_stuff);
PL_lex_stuff = NULL;
justified by the performance win for the common case
of applying only built-in attributes.) */
else
- attrs = append_elem(OP_LIST, attrs,
+ attrs = op_append_elem(OP_LIST, attrs,
newSVOP(OP_CONST, 0,
sv));
}
TERM(tmp);
}
case ']':
+ if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
+ TOKEN(0);
s++;
if (PL_lex_brackets <= 0)
yyerror("Unmatched right square bracket");
PL_copline = NOLINE; /* invalidate current command line number */
TOKEN('{');
case '}':
+ if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
+ TOKEN(0);
rightbracket:
s++;
if (PL_lex_brackets <= 0)
}
#endif
s = PL_bufend;
- PL_doextract = TRUE;
+ PL_parser->in_pod = 1;
goto retry;
}
}
if (len)
goto safe_bareword;
- cv = NULL;
{
OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
}
- if (rv2cv_op->op_type == OP_RV2CV &&
- (rv2cv_op->op_flags & OPf_KIDS)) {
- OP *rv_op = cUNOPx(rv2cv_op)->op_first;
- switch (rv_op->op_type) {
- case OP_CONST: {
- SV *sv = cSVOPx_sv(rv_op);
- if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
- cv = (CV*)SvRV(sv);
- } break;
- case OP_GV: {
- GV *gv = cGVOPx_gv(rv_op);
- CV *maybe_cv = GvCVu(gv);
- if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
- cv = maybe_cv;
- } break;
- }
- }
+ cv = rv2cv_op_cv(rv2cv_op, 0);
/* See if it's the indirect object for a list operator. */
(
(
*proto == '$' || *proto == '_'
- || *proto == '*'
+ || *proto == '*' || *proto == '+'
)
&& proto[1] == '\0'
)
/**/;
}
sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
- words = append_elem(OP_LIST, words,
+ words = op_append_elem(OP_LIST, words,
newSVOP(OP_CONST, 0, tokeq(sv)));
}
}
if (warnillegalproto) {
if (must_be_last)
proto_after_greedy_proto = TRUE;
- if (!strchr("$@%*;[]&\\_", *p)) {
+ if (!strchr("$@%*;[]&\\_+", *p)) {
bad_proto = TRUE;
}
else {
o->op_targ = tmp;
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
- append_elem(OP_LIST, o,
+ op_append_elem(OP_LIST, o,
newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
: (OP*)newUNOP(OP_READLINE, 0, o);
}
SVt_PV);
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
- append_elem(OP_LIST,
+ op_append_elem(OP_LIST,
newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
: (OP*)newUNOP(OP_READLINE, 0,
GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
- append_elem(OP_LIST,
+ op_append_elem(OP_LIST,
newGVOP(OP_GV, 0, gv),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
: (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
return KEYWORD_PLUGIN_DECLINE;
}
+#define parse_recdescent(g) S_parse_recdescent(aTHX_ g)
+static void
+S_parse_recdescent(pTHX_ int gramtype)
+{
+ SAVEI32(PL_lex_brackets);
+ if (PL_lex_brackets > 100)
+ Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
+ PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
+ if(yyparse(gramtype) && !PL_parser->error_count)
+ qerror(Perl_mess(aTHX_ "Parse error"));
+}
+
+#define parse_recdescent_for_op(g) S_parse_recdescent_for_op(aTHX_ g)
+static OP *
+S_parse_recdescent_for_op(pTHX_ int gramtype)
+{
+ OP *o;
+ ENTER;
+ SAVEVPTR(PL_eval_root);
+ PL_eval_root = NULL;
+ parse_recdescent(gramtype);
+ o = PL_eval_root;
+ LEAVE;
+ return o;
+}
+
+/*
+=for apidoc Amx|OP *|parse_block|U32 flags
+
+Parse a single complete Perl code block. This consists of an opening
+brace, a sequence of statements, and a closing brace. The block
+constitutes a lexical scope, so C<my> variables and various compile-time
+effects can be contained within it. It is up to the caller to ensure
+that the dynamic parser state (L</PL_parser> et al) is correctly set to
+reflect the source of the code to be parsed and the lexical context for
+the statement.
+
+The op tree representing the code block is returned. This is always a
+real op, never a null pointer. It will normally be a C<lineseq> list,
+including C<nextstate> or equivalent ops. No ops to construct any kind
+of runtime scope are included by virtue of it being a block.
+
+If an error occurs in parsing or compilation, in most cases a valid op
+tree (most likely null) is returned anyway. The error is reflected in
+the parser state, normally resulting in a single exception at the top
+level of parsing which covers all the compilation errors that occurred.
+Some compilation errors, however, will throw an exception immediately.
+
+The I<flags> parameter is reserved for future use, and must always
+be zero.
+
+=cut
+*/
+
+OP *
+Perl_parse_block(pTHX_ U32 flags)
+{
+ if (flags)
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
+ return parse_recdescent_for_op(GRAMBLOCK);
+}
+
/*
=for apidoc Amx|OP *|parse_fullstmt|U32 flags
OP *
Perl_parse_fullstmt(pTHX_ U32 flags)
{
- OP *fullstmtop;
if (flags)
Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
- ENTER;
- SAVEVPTR(PL_eval_root);
- PL_eval_root = NULL;
- if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
- qerror(Perl_mess(aTHX_ "Parse error"));
- fullstmtop = PL_eval_root;
- LEAVE;
- return fullstmtop;
+ return parse_recdescent_for_op(GRAMFULLSTMT);
}
/*
Perl_parse_stmtseq(pTHX_ U32 flags)
{
OP *stmtseqop;
+ I32 c;
if (flags)
Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
- ENTER;
- SAVEVPTR(PL_eval_root);
- PL_eval_root = NULL;
- if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count)
+ stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ);
+ c = lex_peek_unichar(0);
+ if (c != -1 && c != /*{*/'}')
qerror(Perl_mess(aTHX_ "Parse error"));
- stmtseqop = PL_eval_root;
- LEAVE;
return stmtseqop;
}