/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
+ *
+ * These values refer to the various states within a sublex parse,
+ * i.e. within a double quotish string
*/
/* #define LEX_NOTPARSING 11 is done in perl.h. */
* The UNIDOR macro is for unary functions that can be followed by the //
* operator (such as C<shift // 0>).
*/
-#define UNI2(f,x) { \
+#define UNI3(f,x,have_x) { \
pl_yylval.ival = f; \
- PL_expect = x; \
+ if (have_x) PL_expect = x; \
PL_bufptr = s; \
PL_last_uni = PL_oldbufptr; \
PL_last_lop_op = f; \
s = PEEKSPACE(s); \
return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
}
-#define UNI(f) UNI2(f,XTERM)
-#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
+#define UNI(f) UNI3(f,XTERM,1)
+#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
#define UNIPROTO(f,optional) { \
if (optional) PL_last_uni = PL_oldbufptr; \
OPERATOR(f); \
}
-#define UNIBRACK(f) { \
- pl_yylval.ival = f; \
- PL_bufptr = s; \
- PL_last_uni = PL_oldbufptr; \
- if (*s == '(') \
- return REPORT( (int)FUNC1 ); \
- s = PEEKSPACE(s); \
- return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
- }
+#define UNIBRACK(f) UNI3(f,0,0)
/* grandfather return to old style */
#define OLDLOP(f) \
return (int)LSTOP; \
} while(0)
+#define COPLINE_INC_WITH_HERELINES \
+ STMT_START { \
+ CopLINE_inc(PL_curcop); \
+ if (PL_parser->lex_shared->herelines) \
+ CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
+ PL_parser->lex_shared->herelines = 0; \
+ } STMT_END
+
+
#ifdef DEBUGGING
/* how to interpret the pl_yylval associated with the token */
TOKENTYPE_IVAL,
TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
TOKENTYPE_PVAL,
- TOKENTYPE_OPVAL,
- TOKENTYPE_GVVAL
+ TOKENTYPE_OPVAL
};
static struct debug_tokens {
{ EQOP, TOKENTYPE_OPNUM, "EQOP" },
{ FOR, TOKENTYPE_IVAL, "FOR" },
{ FORMAT, TOKENTYPE_NONE, "FORMAT" },
+ { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
+ { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
{ FUNC, TOKENTYPE_OPNUM, "FUNC" },
{ FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
{ FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
{ OROP, TOKENTYPE_IVAL, "OROP" },
{ OROR, TOKENTYPE_NONE, "OROR" },
{ PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
+ { PEG, TOKENTYPE_NONE, "PEG" },
{ PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
{ PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
{ PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
{ PREDEC, TOKENTYPE_NONE, "PREDEC" },
{ PREINC, TOKENTYPE_NONE, "PREINC" },
{ PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
+ { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
{ REFGEN, TOKENTYPE_NONE, "REFGEN" },
{ RELOP, TOKENTYPE_OPNUM, "RELOP" },
+ { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
{ SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
{ SUB, TOKENTYPE_NONE, "SUB" },
{ THING, TOKENTYPE_OPVAL, "THING" },
Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
switch (type) {
case TOKENTYPE_NONE:
- case TOKENTYPE_GVVAL: /* doesn't appear to be used */
break;
case TOKENTYPE_IVAL:
Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
static void
strip_return(SV *sv)
{
- register const char *s = SvPVX_const(sv);
- register const char * const e = s + SvCUR(sv);
+ const char *s = SvPVX_const(sv);
+ const char * const e = s + SvCUR(sv);
PERL_ARGS_ASSERT_STRIP_RETURN;
while (s < e) {
if (*s++ == '\r' && *s == '\n') {
/* hit a CR-LF, need to copy the rest */
- register char *d = s - 1;
+ char *d = s - 1;
*d++ = *s++;
while (s < e) {
if (*s == '\r' && s[1] == '\n')
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
*parser->lex_casestack = '\0';
+ Newxz(parser->lex_shared, 1, LEXSHARED);
if (line) {
STRLEN len;
parser->linestr = flags & LEX_START_COPIED
? SvREFCNT_inc_simple_NN(line)
: newSVpvn_flags(s, len, SvUTF8(line));
- if (!len || s[len-1] != ';')
- sv_catpvs(parser->linestr, "\n;");
+ sv_catpvs(parser->linestr, "\n;");
} else {
parser->linestr = newSVpvs("\n;");
}
Safefree(parser->lex_brackstack);
Safefree(parser->lex_casestack);
+ Safefree(parser->lex_shared);
PL_parser = parser->old_parser;
Safefree(parser);
}
SV *linestr;
char *buf;
STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
- STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+ STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
if (len <= SvLEN(linestr))
linestart_pos = PL_parser->linestart - buf;
last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+ PL_parser->lex_shared->re_eval_start - buf : 0;
+
buf = sv_grow(linestr, len);
+
PL_parser->bufend = buf + bufend_pos;
PL_parser->bufptr = buf + bufptr_pos;
PL_parser->oldbufptr = buf + oldbufptr_pos;
PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
PL_parser->last_lop = buf + last_lop_pos;
+ if (PL_parser->lex_shared->re_eval_start)
+ PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
return buf;
}
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
for (; s != ptr; s++)
if (*s == '\n') {
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
PL_parser->linestart = s+1;
}
PL_parser->bufptr = ptr;
*/
#define LEX_FAKE_EOF 0x80000000
+#define LEX_NO_TERM 0x40000000
bool
Perl_lex_next_chunk(pTHX_ U32 flags)
STRLEN linestart_pos, last_uni_pos, last_lop_pos;
bool got_some_for_debugger = 0;
bool got_some;
- if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
+ if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
} else if (filter_gets(linestr, old_bufend_pos)) {
got_some = 1;
got_some_for_debugger = 1;
+ } else if (flags & LEX_NO_TERM) {
+ got_some = 0;
} else {
if (!SvPOK(linestr)) /* can get undefined by filter_gets */
sv_setpvs(linestr, "");
c = lex_peek_unichar(flags);
if (c != -1) {
if (c == '\n')
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
if (UTF)
PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
else
if (flags & LEX_NO_NEXT_CHUNK)
break;
PL_parser->bufptr = s;
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
got_more = lex_next_chunk(flags);
CopLINE_dec(PL_curcop);
s = PL_parser->bufptr;
PERL_ARGS_ASSERT_INCLINE;
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
if (*s++ != '#')
return;
while (SPACE_OR_TAB(*s))
tokereport(type, &NEXTVAL_NEXTTOKE);
}
#endif
+ /* Don’t let opslab_force_free snatch it */
+ if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
+ assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
+ NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
+ }
#ifdef PERL_MAD
if (PL_curforce < 0)
start_force(PL_lasttoke);
S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
{
dVAR;
- register char *s;
+ char *s;
STRLEN len;
PERL_ARGS_ASSERT_FORCE_WORD;
S_tokeq(pTHX_ SV *sv)
{
dVAR;
- register char *s;
- register char *send;
- register char *d;
+ char *s;
+ char *send;
+ char *d;
STRLEN len = 0;
SV *pv = sv;
* converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
* interact with PL_lex_state, and create fake ( ... ) argument lists
* to handle functions and concatenation.
- * They assume that whoever calls them will be setting up a fake
- * join call, because each subthing puts a ',' after it. This lets
- * "lower \luPpEr"
- * become
- * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
- *
- * (I'm not sure whether the spurious commas at the end of lcfirst's
- * arguments and join's arguments are created or not).
+ * For example,
+ * "foo\lbar"
+ * is tokenised as
+ * stringify ( const[foo] concat lcfirst ( const[bar] ) )
*/
/*
S_sublex_start(pTHX)
{
dVAR;
- register const I32 op_type = pl_yylval.ival;
+ const I32 op_type = pl_yylval.ival;
if (op_type == OP_NULL) {
pl_yylval.opval = PL_lex_op;
S_sublex_push(pTHX)
{
dVAR;
+ LEXSHARED *shared;
ENTER;
PL_lex_state = PL_sublex_info.super_state;
SAVEBOOL(PL_lex_dojoin);
SAVEI32(PL_lex_brackets);
SAVEI32(PL_lex_allbrackets);
+ SAVEI32(PL_lex_formbrack);
SAVEI8(PL_lex_fakeeof);
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
+ SAVESPTR(PL_lex_repl);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
SAVESPTR(PL_linestr);
SAVEGENERICPV(PL_lex_brackstack);
SAVEGENERICPV(PL_lex_casestack);
+ SAVEGENERICPV(PL_parser->lex_shared);
+
+ /* The here-doc parser needs to be able to peek into outer lexing
+ scopes to find the body of the here-doc. So we put PL_linestr and
+ PL_bufptr into lex_shared, to ‘share’ those values.
+ */
+ PL_parser->lex_shared->ls_linestr = PL_linestr;
+ PL_parser->lex_shared->ls_bufptr = PL_bufptr;
PL_linestr = PL_lex_stuff;
+ PL_lex_repl = PL_sublex_info.repl;
PL_lex_stuff = NULL;
+ PL_sublex_info.repl = NULL;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
SAVEFREESV(PL_linestr);
+ if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
PL_lex_dojoin = FALSE;
- PL_lex_brackets = 0;
+ PL_lex_brackets = PL_lex_formbrack = 0;
PL_lex_allbrackets = 0;
PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
Newx(PL_lex_brackstack, 120, char);
PL_lex_starts = 0;
PL_lex_state = LEX_INTERPCONCAT;
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+
+ Newxz(shared, 1, LEXSHARED);
+ shared->ls_prev = PL_parser->lex_shared;
+ PL_parser->lex_shared = shared;
PL_lex_inwhat = PL_sublex_info.sub_inwhat;
if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
- SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
PL_lex_brackets = 0;
PL_lex_allbrackets = 0;
/*
scan_const
- Extracts a pattern, double-quoted string, or transliteration. This
- is terrifying code.
+ Extracts the next constant part of a pattern, double-quoted string,
+ or transliteration. This is terrifying code.
+
+ For example, in parsing the double-quoted string "ab\x63$d", it would
+ stop at the '$' and return an OP_CONST containing 'abc'.
It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
processing a pattern (PL_lex_inpat is true), a transliteration
Returns a pointer to the character scanned up to. If this is
advanced from the start pointer supplied (i.e. if anything was
- successfully parsed), will leave an OP for the substring scanned
+ successfully parsed), will leave an OP_CONST for the substring scanned
in pl_yylval. Caller must intuit reason for not parsing further
by looking at the next characters herself.
In patterns:
- backslashes:
- constants: \N{NAME} only
- case and quoting: \U \Q \E
- stops on @ and $, but not for $ as tail anchor
+ expand:
+ \N{ABC} => \N{U+41.42.43}
+
+ pass through:
+ all other \-char, including \N and \N{ apart from \N{ABC}
+
+ stops on:
+ @ and $ where it appears to be a var, but not for $ as tail anchor
+ \l \L \u \U \Q \E
+ (?{ or (??{
+
In transliterations:
characters are VERY literal, except for - not at the start or end
it's a tail anchor if $ is the last thing in the string, or if it's
followed by one of "()| \r\n\t"
- \1 (backreferences) are turned into $1
+ \1 (backreferences) are turned into $1 in substitutions
The structure of the code is
while (there's a character to process) {
S_scan_const(pTHX_ char *start)
{
dVAR;
- register char *send = PL_bufend; /* end of the constant */
+ char *send = PL_bufend; /* end of the constant */
SV *sv = newSV(send - start); /* sv for the constant. See
note below on sizing. */
- register char *s = start; /* start of the constant */
- register char *d = SvPVX(sv); /* destination for copies */
+ char *s = start; /* start of the constant */
+ char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
+ bool in_charclass = FALSE; /* within /[...]/ */
bool has_utf8 = FALSE; /* Output constant is UTF8 */
bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
to be UTF8? But, this can
#endif
if (min > max) {
+ SvREFCNT_dec(sv);
Perl_croak(aTHX_
"Invalid range \"%c-%c\" in transliteration operator",
(char)min, (char)max);
/* range begins (ignore - as first or last char) */
else if (*s == '-' && s+1 < send && s != start) {
if (didrange) {
+ SvREFCNT_dec(sv);
Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
}
if (has_utf8
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
- except for the last char, which will be done separately. */
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
+
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
+
+ /* skip for regexp comments /(?#comment)/, except for the last
+ * char, which will be done separately.
+ * Stop on (?{..}) and friends */
+
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
- else if (s[2] == '{' /* This should match regcomp.c */
- || (s[2] == '?' && s[3] == '{'))
+ else if (!PL_lex_casemods && !in_charclass &&
+ ( s[2] == '{' /* This should match regcomp.c */
+ || (s[2] == '?' && s[3] == '{')))
{
- I32 count = 1;
- char *regparse = s + (s[2] == '{' ? 3 : 4);
- char c;
-
- while (count && (c = *regparse)) {
- if (c == '\\' && regparse[1])
- regparse++;
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- regparse++;
- }
- if (*regparse != ')')
- regparse--; /* Leave one char for continuation. */
- while (s < regparse)
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ break;
}
}
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
+ /* no further processing of single-quoted regex */
+ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+ goto default_action;
+
/* check for embedded arrays
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)))
+ if ((isALNUMC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
- ++s;
- if (*s == '{') {
- char* const e = strchr(s, '}');
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
- PERL_SCAN_DISALLOW_PREFIX;
+ {
STRLEN len;
+ const char* error;
- ++s;
- if (!e) {
- yyerror("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+ s += len;
+ if (! valid) {
+ yyerror(error);
continue;
}
- len = e - s;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s = e + 1;
- }
- else {
- {
- STRLEN len = 2;
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s += len;
- }
}
NUM_ESCAPE_INSERT:
} else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
type = "s";
typelen = 1;
+ } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+ type = "q";
+ typelen = 1;
} else {
type = "qq";
typelen = 2;
*
* First argument is the stuff after the first token, e.g. "bar".
*
- * Not a method if bar is a filehandle.
+ * Not a method if foo is a filehandle.
* Not a method if foo is a subroutine prototyped to take a filehandle.
* Not a method if it's really "Foo $bar"
* Method if it's "foo $bar"
PERL_ARGS_ASSERT_INTUIT_METHOD;
- if (gv) {
- if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
+ if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
return 0;
- if (cv) {
- if (SvPOK(cv)) {
+ if (cv && SvPOK(cv)) {
const char *proto = CvPROTO(cv);
if (proto) {
if (*proto == ';')
if (*proto == '*')
return 0;
}
- }
- } else
- gv = NULL;
}
s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
/* start is the beginning of the possible filehandle/object,
*/
if (*start == '$') {
- if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
+ if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
isUPPER(*PL_tokenbuf))
return 0;
#ifdef PERL_MAD
if (indirgv && GvCVu(indirgv))
return 0;
/* filehandle or package name makes it a method */
- if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
+ if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
#ifdef PERL_MAD
soff = s - SvPVX(PL_linestr);
#endif
if (PL_expect != XSTATE)
yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
is_use ? "use" : "no"));
+ PL_expect = XTERM;
s = SKIPSPACE1(s);
if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
s = force_version(s, TRUE);
Perl_yylex(pTHX)
{
dVAR;
- register char *s = PL_bufptr;
- register char *d;
+ char *s = PL_bufptr;
+ char *d;
STRLEN len;
bool bof = FALSE;
+ U8 formbrack = 0;
U32 fake_eof = 0;
/* orig_keyword, gvp, and gv are initialized here because
PL_lex_allbrackets--;
next_type &= 0xffff;
}
-#ifdef PERL_MAD
- /* FIXME - can these be merged? */
- return next_type;
-#else
+ if (S_is_opval_token(next_type) && pl_yylval.opval)
+ pl_yylval.opval->op_savefree = 0; /* release */
return REPORT(next_type);
-#endif
}
/* interpolated case modifiers like \L \U, including \Q and \E.
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
PL_lex_dojoin = (*PL_bufptr == '@');
NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
force_next(FUNC);
}
+ /* Convert (?{...}) and friends to 'do {...}' */
+ if (PL_lex_inpat && *PL_bufptr == '(') {
+ PL_parser->lex_shared->re_eval_start = PL_bufptr;
+ PL_bufptr += 2;
+ if (*PL_bufptr != '{')
+ PL_bufptr++;
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
+ PL_expect = XTERMBLOCK;
+ force_next(DO);
+ }
+
if (PL_lex_starts++) {
s = PL_bufptr;
#ifdef PERL_MAD
Perl_croak(aTHX_ "Bad evalled substitution pattern");
PL_lex_repl = NULL;
}
+ /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
+ re_eval_str. If the here-doc body’s length equals the previous
+ value of re_eval_start, re_eval_start will now be null. So
+ check re_eval_str as well. */
+ if (PL_parser->lex_shared->re_eval_start
+ || PL_parser->lex_shared->re_eval_str) {
+ SV *sv;
+ if (*PL_bufptr != ')')
+ Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+ PL_bufptr++;
+ /* having compiled a (?{..}) expression, return the original
+ * text too, as a const */
+ if (PL_parser->lex_shared->re_eval_str) {
+ sv = PL_parser->lex_shared->re_eval_str;
+ PL_parser->lex_shared->re_eval_str = NULL;
+ SvCUR_set(sv,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
+ SvPV_shrink_to_cur(sv);
+ }
+ else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
+ NEXTVAL_NEXTTOKE.opval =
+ (OP*)newSVOP(OP_CONST, 0,
+ sv);
+ force_next(THING);
+ PL_parser->lex_shared->re_eval_start = NULL;
+ PL_expect = XTERM;
+ return REPORT(',');
+ }
+
/* FALLTHROUGH */
case LEX_INTERPCONCAT:
#ifdef DEBUGGING
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- if (SvIVX(PL_linestr) == '\'') {
+ /* m'foo' still needs to be parsed for possible (?{...}) */
+ if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
SV *sv = newSVsv(PL_linestr);
- if (!PL_lex_inpat)
- sv = tokeq(sv);
- else if ( PL_hints & HINT_NEW_RE )
- sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+ sv = tokeq(sv);
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
s = PL_bufend;
}
return yylex();
case LEX_FORMLINE:
- PL_lex_state = LEX_NORMAL;
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
+ {
+ formbrack = 1;
goto rightbracket;
- OPERATOR(';');
+ }
+ PL_bufptr = s;
+ return yylex();
}
s = PL_bufptr;
fake_eof = LEX_FAKE_EOF;
}
PL_bufptr = PL_bufend;
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
if (!lex_next_chunk(fake_eof)) {
CopLINE_dec(PL_curcop);
s = PL_bufptr;
}
}
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
- PL_bufptr = s;
PL_lex_state = LEX_FORMLINE;
- return yylex();
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(FORMRBRACK);
+ TOKEN(';');
}
goto retry;
case '\r':
incline(s);
}
else {
+ const bool in_comment = *s == '#';
d = s;
while (d < PL_bufend && *d != '\n')
d++;
PL_thiswhite = newSVpvn(s, d - s);
#endif
s = d;
- incline(s);
+ if (in_comment && d == PL_bufend
+ && PL_lex_state == LEX_INTERPNORMAL
+ && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+ && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
+ else incline(s);
}
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
- PL_bufptr = s;
PL_lex_state = LEX_FORMLINE;
- return yylex();
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.ival = 0;
+ force_next(FORMRBRACK);
+ TOKEN(';');
}
}
else {
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE);
+ d = scan_str(d,TRUE,TRUE,FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
}
TERM(']');
case '{':
- leftbracket:
s++;
+ leftbracket:
if (PL_lex_brackets > 100) {
Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
}
switch (PL_expect) {
case XTERM:
- if (PL_lex_formbrack) {
- s--;
- PRETERMBLOCK(DO);
- }
if (PL_oldoldbufptr == PL_last_lop)
PL_lex_brackstack[PL_lex_brackets++] = XTERM;
else
pl_yylval.ival = CopLINE(PL_curcop);
if (isSPACE(*s) || *s == '#')
PL_copline = NOLINE; /* invalidate current command line number */
- TOKEN('{');
+ TOKEN(formbrack ? '=' : '{');
case '}':
if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
TOKEN(0);
else
PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
PL_lex_allbrackets--;
- if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
- PL_lex_formbrack = 0;
if (PL_lex_state == LEX_INTERPNORMAL) {
if (PL_lex_brackets == 0) {
if (PL_expect & XFAKEBRACK) {
#endif
return yylex(); /* ignore fake brackets */
}
- if (*s == '-' && s[1] == '>')
+ if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+ && SvEVALED(PL_lex_repl))
+ PL_lex_state = LEX_INTERPEND;
+ else if (*s == '-' && s[1] == '>')
PL_lex_state = LEX_INTERPENDMAYBE;
else if (*s != '[' && *s != '{')
PL_lex_state = LEX_INTERPEND;
curmad('X', newSVpvn(s-1,1));
CURMAD('_', PL_thiswhite);
}
- force_next('}');
+ force_next(formbrack ? '.' : '}');
+ if (formbrack) LEAVE;
#ifdef PERL_MAD
if (!PL_thistoken)
PL_thistoken = newSVpvs("");
#endif
+ if (formbrack == 2) { /* means . where arguments were expected */
+ start_force(PL_curforce);
+ force_next(';');
+ TOKEN(FORMRBRACK);
+ }
TOKEN(';');
case '&':
s++;
if (PL_expect == XSTATE && isALPHA(tmp) &&
(s == PL_linestart+1 || s[-2] == '\n') )
{
- if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
+ if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+ || PL_lex_state != LEX_NORMAL) {
d = PL_bufend;
while (s < d) {
if (*s++ == '\n') {
goto retry;
}
}
- if (PL_lex_brackets < PL_lex_formbrack) {
+ if (PL_expect == XBLOCK) {
const char *t = s;
#ifdef PERL_STRICT_CR
while (SPACE_OR_TAB(*t))
#endif
t++;
if (*t == '\n' || *t == '#') {
- s--;
- PL_expect = XBLOCK;
+ formbrack = 1;
+ ENTER;
+ SAVEI8(PL_parser->form_lex_state);
+ SAVEI32(PL_lex_formbrack);
+ PL_parser->form_lex_state = PL_lex_state;
+ PL_lex_formbrack = PL_lex_brackets + 1;
goto leftbracket;
}
}
s = scan_heredoc(s);
else
s = scan_inputsymbol(s);
- TERM(sublex_start());
+ PL_expect = XOPERATOR;
+ TOKEN(sublex_start());
}
s++;
{
#endif
&& (s == PL_linestart || s[-1] == '\n') )
{
- PL_lex_formbrack = 0;
PL_expect = XSTATE;
+ formbrack = 2; /* dot seen where arguments expected */
goto rightbracket;
}
if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
op_free(rv2cv_op);
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
- pl_yylval.opval->op_private = 0;
+ pl_yylval.opval->op_private = OPpCONST_FOLDED;
pl_yylval.opval->op_flags |= OPf_SPECIAL;
TOKEN(WORD);
}
case KEY_CORE:
if (*s == ':' && s[1] == ':') {
- s += 2;
+ STRLEN olen = len;
d = s;
+ s += 2;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 1)))
+ if ((*s == ':' && s[1] == ':')
+ || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
+ {
+ s = d;
+ len = olen;
+ Copy(PL_bufptr, PL_tokenbuf, olen, char);
+ goto just_a_word;
+ }
+ if (!tmp)
Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
SVfARG(newSVpvn_flags(PL_tokenbuf, len,
(UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
s = SKIPSPACE1(s);
if (*s == '{')
PRETERMBLOCK(DO);
- if (*s != '\'')
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ if (*s != '\'') {
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
+ if (len) {
+ d = SKIPSPACE1(d);
+ if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ }
+ }
if (orig_keyword == KEY_do) {
orig_keyword = 0;
pl_yylval.ival = 1;
case KEY_no:
s = tokenize_use(0, s);
- OPERATOR(USE);
+ TERM(USE);
case KEY_not:
if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
}
if (key == KEY_format) {
- if (*s == '=')
- PL_lex_formbrack = PL_lex_brackets + 1;
#ifdef PERL_MAD
PL_thistoken = subtoken;
s = d;
(void) force_word(PL_oldbufptr + tboffset, WORD,
FALSE, TRUE, TRUE);
#endif
- OPERATOR(FORMAT);
+ PREBLOCK(FORMAT);
}
/* Look for a prototype */
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
LOP(OP_SYSWRITE,XTERM);
case KEY_tr:
+ case KEY_y:
s = scan_trans(s);
TERM(sublex_start());
return REPORT(0);
pl_yylval.ival = OP_XOR;
OPERATOR(OROP);
-
- case KEY_y:
- s = scan_trans(s);
- TERM(sublex_start());
}
}}
}
S_pending_ident(pTHX)
{
dVAR;
- register char *d;
PADOFFSET tmp = 0;
/* pit holds the identifier we read and pending_ident is reset */
char pit = PL_pending_ident;
/*
build the ops for accesses to a my() variable.
-
- Deny my($a) or my($b) in a sort block, *if* $a or $b is
- then used in a comparison. This catches most, but not
- all cases. For instance, it catches
- sort { my($a); $a <=> $b }
- but not
- sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
- (although why you'd do that is anyone's guess).
*/
if (!has_colon) {
return WORD;
}
- /* if it's a sort block and they're naming $a or $b */
- if (PL_last_lop_op == OP_SORT &&
- PL_tokenbuf[0] == '$' &&
- (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
- && !PL_tokenbuf[2])
- {
- for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
- d < PL_bufend && *d != '\n';
- d++)
- {
- if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
- Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
- PL_tokenbuf);
- }
- }
- }
-
pl_yylval.opval = newOP(OP_PADANY, 0);
pl_yylval.opval->op_targ = tmp;
return PRIVATEREF;
S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
dVAR;
- register char *d = dest;
- register char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ char *d = dest;
+ char * const e = d + destlen - 3; /* two-character token, ending NUL */
PERL_ARGS_ASSERT_SCAN_WORD;
dVAR;
char *bracket = NULL;
char funny = *s++;
- register char *d = dest;
- register char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ char *d = dest;
+ char * const e = d + destlen - 3; /* two-character token, ending NUL */
PERL_ARGS_ASSERT_SCAN_IDENT;
bracket = s;
s++;
}
- else if (ck_uni)
- check_uni();
if (s < send) {
if (UTF) {
const STRLEN skip = UTF8SKIP(s);
*d = toCTRL(*s);
s++;
}
+ else if (ck_uni && !bracket)
+ check_uni();
if (bracket) {
if (isSPACE(s[-1])) {
while (s < send) {
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE. 'charset' should point to a NUL upon first call on the
- * current regex. This routine will set it to any charset modifier found.
- * The caller shouldn't change it. This way, another charset modifier
- * encountered in the parse can be detected as an error, as we have decided
- * allow only one */
+ * TRUE if the input should be treated as a valid flag, so the next char
+ * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+ * first call on the current regex. This routine will set it to any
+ * charset modifier found. The caller shouldn't change it. This way,
+ * another charset modifier encountered in the parse can be detected as an
+ * error, as we have decided to allow only one */
const char c = **s;
-
- if (! strchr(valid_flags, c)) {
- if (isALNUM(c)) {
- goto deprecate;
+ STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+ if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+ if (isALNUM_lazy_if(*s, UTF)) {
+ yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
+ UTF ? SVf_UTF8 : 0);
+ (*s) += charlen;
+ /* Pretend that it worked, so will continue processing before
+ * dieing */
+ return TRUE;
}
return FALSE;
}
case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
case LOCALE_PAT_MOD:
-
- /* In 5.14, qr//lt is legal but deprecated; the 't' means they
- * can't be regex modifiers.
- * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
- * much as easily done. s///lei, for example, has to mean regex
- * modifiers if it's not an error (as does any word character
- * following the 'e'). Otherwise, we resolve to the backwards-
- * compatible, but less likely 's/// le ...', i.e. as meaning
- * less-than-or-equal. The reason it's not likely is that s//
- * returns a number for code in the field (/r returns a string, but
- * that wasn't added until the 5.13 series), and so '<=' should be
- * used for comparing, not 'le'. */
- if (*((*s) + 1) == 't') {
- goto deprecate;
- }
- else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
-
- /* 'e' is valid only for substitutes, s///e. If it is not
- * valid in the current context, then 'm//le' must mean the
- * comparison operator, so use the regular deprecation message.
- */
- if (! strchr(valid_flags, 'e')) {
- goto deprecate;
- }
- Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.18, it will be resolved the other way");
- return FALSE;
- }
if (*charset) {
goto multiple_charsets;
}
*charset = c;
break;
case UNICODE_PAT_MOD:
- /* In 5.14, qr//unless and qr//until are legal but deprecated; the
- * 'n' means they can't be regex modifiers */
- if (*((*s) + 1) == 'n') {
- goto deprecate;
- }
if (*charset) {
goto multiple_charsets;
}
*charset = c;
break;
case ASCII_RESTRICT_PAT_MOD:
- /* In 5.14, qr//and is legal but deprecated; the 'n' means they
- * can't be regex modifiers */
- if (*((*s) + 1) == 'n') {
- goto deprecate;
- }
-
if (! *charset) {
set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
}
(*s)++;
return TRUE;
- deprecate:
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
- "Having no space between pattern and following word is deprecated");
- return FALSE;
-
multiple_charsets:
if (*charset != c) {
yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE);
+ char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
+ /* this was only needed for the initial scan_str; set it to false
+ * so that any (?{}) code blocks etc are parsed normally */
+ PL_reg_state.re_reparsing = FALSE;
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
#ifdef PERL_MAD
modstart = s;
#endif
+
+ /* if qr/...(?{..}).../, then need to parse the pattern within a new
+ * anon CV. False positives like qr/[(?{]/ are harmless */
+
+ if (type == OP_QR) {
+ STRLEN len;
+ char *e, *p = SvPV(PL_lex_stuff, len);
+ e = p + len;
+ for (; p < e; p++) {
+ if (p[0] == '(' && p[1] == '?'
+ && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
+ {
+ pm->op_pmflags |= PMf_HAS_CV;
+ break;
+ }
+ }
+ pm->op_pmflags |= PMf_IS_QR;
+ }
+
while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
{
dVAR;
char *s;
- register PMOP *pm;
+ PMOP *pm;
I32 first_start;
I32 es = 0;
char charset = '\0'; /* character set modifier */
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
if (es) {
SV * const repl = newSVpvs("");
- PL_sublex_info.super_bufptr = s;
- PL_sublex_info.super_bufend = PL_bufend;
PL_multi_end = 0;
pm->op_pmflags |= PMf_EVAL;
while (es-- > 0) {
sv_catpvs(repl, "do ");
}
sv_catpvs(repl, "{");
- sv_catsv(repl, PL_lex_repl);
- if (strchr(SvPVX(PL_lex_repl), '#'))
- sv_catpvs(repl, "\n");
+ sv_catsv(repl, PL_sublex_info.repl);
sv_catpvs(repl, "}");
SvEVALED_on(repl);
- SvREFCNT_dec(PL_lex_repl);
- PL_lex_repl = repl;
+ SvREFCNT_dec(PL_sublex_info.repl);
+ PL_sublex_info.repl = repl;
}
PL_lex_op = (OP*)pm;
S_scan_trans(pTHX_ char *start)
{
dVAR;
- register char* s;
+ char* s;
OP *o;
U8 squash;
U8 del;
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
o->op_private &= ~OPpTRANS_ALL;
o->op_private |= del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
- (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
+ (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
return s;
}
+/* scan_heredoc
+ Takes a pointer to the first < in <<FOO.
+ Returns a pointer to the byte following <<FOO.
+
+ This function scans a heredoc, which involves different methods
+ depending on whether we are in a string eval, quoted construct, etc.
+ This is because PL_linestr could containing a single line of input, or
+ a whole string being evalled, or the contents of the current quote-
+ like operator.
+
+ The two basic methods are:
+ - Steal lines from the input stream
+ - Scan the heredoc in PL_linestr and remove it therefrom
+
+ In a file scope or filtered eval, the first method is used; in a
+ string eval, the second.
+
+ In a quote-like operator, we have to choose between the two,
+ depending on where we can find a newline. We peek into outer lex-
+ ing scopes until we find one with a newline in it. If we reach the
+ outermost lexing scope and it is a file, we use the stream method.
+ Otherwise it is treated as an eval.
+*/
+
STATIC char *
S_scan_heredoc(pTHX_ register char *s)
{
dVAR;
- SV *herewas;
I32 op_type = OP_SCALAR;
I32 len;
SV *tmpstr;
char term;
- const char *found_newline;
- register char *d;
- register char *e;
+ char *d;
+ char *e;
char *peek;
- const int outer = (PL_rsfp || PL_parser->filtered)
- && !(PL_lex_inwhat == OP_SCALAR);
+ const bool infile = PL_rsfp || PL_parser->filtered;
+ LEXSHARED *shared = PL_parser->lex_shared;
#ifdef PERL_MAD
I32 stuffstart = s - SvPVX(PL_linestr);
char *tstart;
PERL_ARGS_ASSERT_SCAN_HEREDOC;
s += 2;
- d = PL_tokenbuf;
+ d = PL_tokenbuf + 1;
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
- if (!outer)
- *d++ = '\n';
+ *PL_tokenbuf = '\n';
peek = s;
while (SPACE_OR_TAB(*peek))
peek++;
s = peek;
term = *s++;
s = delimcpy(d, e, s, PL_bufend, term, &len);
+ if (s == PL_bufend)
+ Perl_croak(aTHX_ "Unterminated delimiter for here document");
d += len;
- if (s < PL_bufend)
- s++;
+ s++;
}
else {
if (*s == '\\')
+ /* <<\FOO is equivalent to <<'FOO' */
s++, term = '\'';
else
term = '"';
#ifdef PERL_MAD
if (PL_madskills) {
- tstart = PL_tokenbuf + !outer;
- PL_thisclose = newSVpvn(tstart, len - !outer);
+ tstart = PL_tokenbuf + 1;
+ PL_thisclose = newSVpvn(tstart, len - 1);
tstart = SvPVX(PL_linestr) + stuffstart;
PL_thisopen = newSVpvn(tstart, s - tstart);
stuffstart = s - SvPVX(PL_linestr);
}
#endif
#ifdef PERL_MAD
- found_newline = 0;
-#endif
- if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
- herewas = newSVpvn(s,PL_bufend-s);
- }
- else {
-#ifdef PERL_MAD
- herewas = newSVpvn(s-1,found_newline-s+1);
-#else
- s--;
- herewas = newSVpvn(s,found_newline-s);
-#endif
- }
-#ifdef PERL_MAD
if (PL_madskills) {
tstart = SvPVX(PL_linestr) + stuffstart;
if (PL_thisstuff)
else
PL_thisstuff = newSVpvn(tstart, s - tstart);
}
-#endif
- s += SvCUR(herewas);
-#ifdef PERL_MAD
stuffstart = s - SvPVX(PL_linestr);
-
- if (found_newline)
- s--;
#endif
tmpstr = newSV_type(SVt_PVIV);
SvIV_set(tmpstr, '\\');
}
- CLINE;
- PL_multi_start = CopLINE(PL_curcop);
+ PL_multi_start = CopLINE(PL_curcop) + 1;
PL_multi_open = PL_multi_close = '<';
- term = *PL_tokenbuf;
- if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
- && !PL_parser->filtered) {
- char * const bufptr = PL_sublex_info.super_bufptr;
- char * const bufend = PL_sublex_info.super_bufend;
- char * const olds = s - SvCUR(herewas);
- s = strchr(bufptr, '\n');
- if (!s)
- s = bufend;
+ /* inside a string eval or quote-like operator */
+ if (!infile || PL_lex_inwhat) {
+ SV *linestr;
+ char *bufptr, *bufend;
+ char * const olds = s;
+ PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ /* These two fields are not set until an inner lexing scope is
+ entered. But we need them set here. */
+ shared->ls_bufptr = s;
+ shared->ls_linestr = PL_linestr;
+ if (PL_lex_inwhat)
+ /* Look for a newline. If the current buffer does not have one,
+ peek into the line buffer of the parent lexing scope, going
+ up as many levels as necessary to find one with a newline
+ after bufptr.
+ */
+ while (!(s = (char *)memchr(
+ (void *)shared->ls_bufptr, '\n',
+ SvEND(shared->ls_linestr)-shared->ls_bufptr
+ ))) {
+ shared = shared->ls_prev;
+ /* shared is only null if we have gone beyond the outermost
+ lexing scope. In a file, we will have broken out of the
+ loop in the previous iteration. In an eval, the string buf-
+ fer ends with "\n;", so the while condition below will have
+ evaluated to false. So shared can never be null. */
+ assert(shared);
+ /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+ most lexing scope. In a file, shared->ls_linestr at that
+ level is just one line, so there is no body to steal. */
+ if (infile && !shared->ls_prev) {
+ s = olds;
+ goto streaming;
+ }
+ }
+ else { /* eval */
+ s = (char*)memchr((void*)s, '\n', PL_bufend - s);
+ assert(s);
+ }
+ bufptr = shared->ls_bufptr;
+ linestr = shared->ls_linestr;
+ bufend = SvEND(linestr);
d = s;
while (s < bufend &&
- (*s != term || memNE(s,PL_tokenbuf,len)) ) {
+ (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
if (*s++ == '\n')
- CopLINE_inc(PL_curcop);
+ ++shared->herelines;
}
if (s >= bufend) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf);
- }
- sv_setpvn(herewas,bufptr,d-bufptr+1);
- sv_setpvn(tmpstr,d+1,s-d);
- s += len - 1;
- sv_catpvn(herewas,s,bufend-s);
- Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
-
- s = olds;
- goto retval;
- }
- else if (!outer) {
- d = s;
- while (s < PL_bufend &&
- (*s != term || memNE(s,PL_tokenbuf,len)) ) {
- if (*s++ == '\n')
- CopLINE_inc(PL_curcop);
- }
- if (s >= PL_bufend) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf);
+ goto interminable;
}
sv_setpvn(tmpstr,d+1,s-d);
#ifdef PERL_MAD
}
#endif
s += len - 1;
- CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
+ /* the preceding stmt passes a newline */
+ shared->herelines++;
- sv_catpvn(herewas,s,PL_bufend-s);
- sv_setsv(PL_linestr,herewas);
- PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
+ /* s now points to the newline after the heredoc terminator.
+ d points to the newline before the body of the heredoc.
+ */
+
+ /* We are going to modify linestr in place here, so set
+ aside copies of the string if necessary for re-evals or
+ (caller $n)[6]. */
+ /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
+ check shared->re_eval_str. */
+ if (shared->re_eval_start || shared->re_eval_str) {
+ /* Set aside the rest of the regexp */
+ if (!shared->re_eval_str)
+ shared->re_eval_str =
+ newSVpvn(shared->re_eval_start,
+ bufend - shared->re_eval_start);
+ shared->re_eval_start -= s-d;
+ }
+ if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
+ && cx->blk_eval.cur_text == linestr) {
+ cx->blk_eval.cur_text = newSVsv(linestr);
+ SvSCREAM_on(cx->blk_eval.cur_text);
+ }
+ /* Copy everything from s onwards back to d. */
+ Move(s,d,bufend-s + 1,char);
+ SvCUR_set(linestr, SvCUR(linestr) - (s-d));
+ /* Setting PL_bufend only applies when we have not dug deeper
+ into other scopes, because sublex_done sets PL_bufend to
+ SvEND(PL_linestr). */
+ if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+ s = olds;
}
else
- sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
- while (s >= PL_bufend) { /* multiple line string? */
+ {
+ SV *linestr_save;
+ streaming:
+ sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
+ linestr_save = PL_linestr; /* must restore this afterwards */
+ d = s; /* and this */
+ PL_linestr = newSVpvs("");
+ PL_bufend = SvPVX(PL_linestr);
+ while (1) {
#ifdef PERL_MAD
if (PL_madskills) {
tstart = SvPVX(PL_linestr) + stuffstart;
PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
}
#endif
- PL_bufptr = s;
- CopLINE_inc(PL_curcop);
- if (!outer || !lex_next_chunk(0)) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf);
+ PL_bufptr = PL_bufend;
+ CopLINE_set(PL_curcop,
+ PL_multi_start + shared->herelines);
+ if (!lex_next_chunk(LEX_NO_TERM)
+ && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
+ SvREFCNT_dec(linestr_save);
+ goto interminable;
+ }
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+ if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+ lex_grow_linestr(SvCUR(PL_linestr) + 2);
+ sv_catpvs(PL_linestr, "\n\0");
}
- CopLINE_dec(PL_curcop);
s = PL_bufptr;
#ifdef PERL_MAD
stuffstart = s - SvPVX(PL_linestr);
#endif
- CopLINE_inc(PL_curcop);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ shared->herelines++;
PL_last_lop = PL_last_uni = NULL;
#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
PL_bufend[-1] = '\n';
#endif
- if (*s == term && memEQ(s,PL_tokenbuf,len)) {
- STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
- *(SvPVX(PL_linestr) + off ) = ' ';
- lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
- sv_catsv(PL_linestr,herewas);
+ if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
+ s = d;
+ break;
}
else {
- s = PL_bufend;
sv_catsv(tmpstr,PL_linestr);
}
+ }
}
- s++;
-retval:
PL_multi_end = CopLINE(PL_curcop);
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
SvPV_shrink_to_cur(tmpstr);
}
- SvREFCNT_dec(herewas);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
return s;
+
+ interminable:
+ SvREFCNT_dec(tmpstr);
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+ missingterm(PL_tokenbuf + 1);
}
/* scan_inputsymbol
S_scan_inputsymbol(pTHX_ char *start)
{
dVAR;
- register char *s = start; /* current position in buffer */
+ char *s = start; /* current position in buffer */
char *end;
I32 len;
char *d = PL_tokenbuf; /* start of temp holding space */
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
takes: start position in buffer
keep_quoted preserve \ on the embedded delimiter(s)
keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
{
dVAR;
SV *sv; /* scalar value: string */
const char *tmps; /* temp string, used for delimiter matching */
- register char *s = start; /* current position in the buffer */
- register char term; /* terminating character */
- register char *to; /* current position in the sv's data */
+ char *s = start; /* current position in the buffer */
+ char term; /* terminating character */
+ char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
bool has_utf8 = FALSE; /* is there any utf8 content? */
I32 termcode; /* terminating char. code */
}
#endif
for (;;) {
- if (PL_encoding && !UTF) {
+ if (PL_encoding && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
for (; s < ns; s++) {
if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
}
if (!found)
goto read_more_line;
for (; s < PL_bufend; s++,to++) {
/* embedded newlines increment the current line number */
if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
/* handle quoted delimiters */
if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
- if (!keep_quoted && s[1] == term)
+ if (!keep_quoted
+ && (s[1] == term
+ || (re_reparse && s[1] == '\\'))
+ )
s++;
- /* any other quotes are simply copied straight through */
+ /* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
for (; s < PL_bufend; s++,to++) {
/* embedded newlines increment the line count */
if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
/* backslashes can escape the open or closing characters */
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_quoted &&
PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
}
#endif
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
PL_bufptr = PL_bufend;
if (!lex_next_chunk(0)) {
sv_free(sv);
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF) {
+ if (!PL_encoding || UTF || re_reparse) {
#ifdef PERL_MAD
if (PL_madskills) {
char * const tstart = SvPVX(PL_linestr) + stuffstart;
}
}
#endif
- if (has_utf8 || PL_encoding)
+ if (has_utf8 || (PL_encoding && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
*/
if (PL_lex_stuff)
- PL_lex_repl = sv;
+ PL_sublex_info.repl = sv;
else
PL_lex_stuff = sv;
return s;
Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
{
dVAR;
- register const char *s = start; /* current position in buffer */
- register char *d; /* destination in temp buffer */
- register char *e; /* end of temp buffer */
+ const char *s = start; /* current position in buffer */
+ char *d; /* destination in temp buffer */
+ char *e; /* end of temp buffer */
NV nv; /* number read, as a double */
SV *sv = NULL; /* place to put the converted number */
bool floatit; /* boolean: int or float? */
S_scan_formline(pTHX_ register char *s)
{
dVAR;
- register char *eol;
- register char *t;
+ char *eol;
+ char *t;
SV * const stuff = newSVpvs("");
bool needargs = FALSE;
bool eofmt = FALSE;
break;
}
}
- if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
- eol = (char *) memchr(s,'\n',PL_bufend-s);
- if (!eol++)
+ eol = (char *) memchr(s,'\n',PL_bufend-s);
+ if (!eol++)
eol = PL_bufend;
- }
- else
- eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
if (*s != '#') {
for (t = s; t < eol; t++) {
if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
break;
}
s = (char*)eol;
- if (PL_rsfp || PL_parser->filtered) {
+ if ((PL_rsfp || PL_parser->filtered)
+ && PL_parser->form_lex_state == LEX_NORMAL) {
bool got_some;
#ifdef PERL_MAD
if (PL_madskills) {
}
#endif
PL_bufptr = PL_bufend;
- CopLINE_inc(PL_curcop);
+ COPLINE_INC_WITH_HERELINES;
got_some = lex_next_chunk(0);
CopLINE_dec(PL_curcop);
s = PL_bufptr;
incline(s);
}
enough:
+ if (!SvCUR(stuff) || needargs)
+ PL_lex_state = PL_parser->form_lex_state;
if (SvCUR(stuff)) {
- PL_expect = XTERM;
+ PL_expect = XSTATE;
if (needargs) {
- PL_lex_state = LEX_NORMAL;
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.ival = 0;
- force_next(',');
+ force_next(FORMLBRACK);
}
- else
- PL_lex_state = LEX_FORMLINE;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);
- start_force(PL_curforce);
- NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
- force_next(LSTOP);
}
else {
SvREFCNT_dec(stuff);
if (eofmt)
PL_lex_formbrack = 0;
- PL_bufptr = s;
}
#ifdef PERL_MAD
if (PL_madskills) {
const I32 oldsavestack_ix = PL_savestack_ix;
CV* const outsidecv = PL_compcv;
- if (PL_compcv) {
- assert(SvTYPE(PL_compcv) == SVt_PVCV);
- }
SAVEI32(PL_subline);
save_item(PL_subname);
SAVESPTR(PL_compcv);
CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
+ if (outsidecv && CvPADLIST(outsidecv))
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
return stmtseqop;
}
-void
-Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
-{
- PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
- deprecate("qw(...) as parentheses");
- force_next((4<<24)|')');
- if (qwlist->op_type == OP_STUB) {
- op_free(qwlist);
- }
- else {
- start_force(PL_curforce);
- NEXTVAL_NEXTTOKE.opval = qwlist;
- force_next(THING);
- }
- force_next((2<<24)|'(');
-}
-
/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/