/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
+ *
+ * These values refer to the various states within a sublex parse,
+ * i.e. within a double quotish string
*/
/* #define LEX_NOTPARSING 11 is done in perl.h. */
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
+ SAVEPPTR(PL_sublex_info.re_eval_start);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
PL_linestr = PL_lex_stuff;
PL_lex_stuff = NULL;
+ PL_sublex_info.re_eval_start = NULL;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
/*
scan_const
- Extracts a pattern, double-quoted string, or transliteration. This
- is terrifying code.
+ Extracts the next constant part of a pattern, double-quoted string,
+ or transliteration. This is terrifying code.
+
+ For example, in parsing the double-quoted string "ab\x63$d", it would
+ stop at the '$' and return an OP_CONST containing 'abc'.
It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
processing a pattern (PL_lex_inpat is true), a transliteration
Returns a pointer to the character scanned up to. If this is
advanced from the start pointer supplied (i.e. if anything was
- successfully parsed), will leave an OP for the substring scanned
+ successfully parsed), will leave an OP_CONST for the substring scanned
in pl_yylval. Caller must intuit reason for not parsing further
by looking at the next characters herself.
In patterns:
- backslashes:
- constants: \N{NAME} only
- case and quoting: \U \Q \E
- stops on @ and $, but not for $ as tail anchor
+ expand:
+ \N{ABC} => \N{U+41.42.43}
+
+ pass through:
+ all other \-char, including \N and \N{ apart from \N{ABC}
+
+ stops on:
+ @ and $ where it appears to be a var, but not for $ as tail anchor
+ \l \L \u \U \Q \E
+ (?{ or (??{
+
In transliterations:
characters are VERY literal, except for - not at the start or end
it's a tail anchor if $ is the last thing in the string, or if it's
followed by one of "()| \r\n\t"
- \1 (backreferences) are turned into $1
+ \1 (backreferences) are turned into $1 in substitutions
The structure of the code is
while (there's a character to process) {
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
- except for the last char, which will be done separately. */
+ /* skip for regexp comments /(?#comment)/, except for the last
+ * char, which will be done separately.
+ * Stop on (?{..}) and friends */
+
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
else if (s[2] == '{' /* This should match regcomp.c */
|| (s[2] == '?' && s[3] == '{'))
{
- I32 count = 1;
- char *regparse = s + (s[2] == '{' ? 3 : 4);
- char c;
-
- while (count && (c = *regparse)) {
- if (c == '\\' && regparse[1])
- regparse++;
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- regparse++;
- }
- if (*regparse != ')')
- regparse--; /* Leave one char for continuation. */
- while (s < regparse)
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ break;
}
}
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
+ /* no further processing of single-quoted regex */
+ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+ goto default_action;
+
/* check for embedded arrays
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
} else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
type = "s";
typelen = 1;
+ } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+ type = "q";
+ typelen = 1;
} else {
type = "qq";
typelen = 2;
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
PL_lex_dojoin = (*PL_bufptr == '@');
NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
force_next(FUNC);
}
+ /* Convert (?{...}) and friends to 'do {...}' */
+ if (PL_lex_inpat && *PL_bufptr == '(') {
+ PL_sublex_info.re_eval_start = PL_bufptr;
+ PL_bufptr += 2;
+ if (*PL_bufptr != '{')
+ PL_bufptr++;
+ PL_expect = XTERMBLOCK;
+ force_next(DO);
+ }
+
if (PL_lex_starts++) {
s = PL_bufptr;
#ifdef PERL_MAD
Perl_croak(aTHX_ "Bad evalled substitution pattern");
PL_lex_repl = NULL;
}
+ if (PL_sublex_info.re_eval_start) {
+ if (*PL_bufptr != ')')
+ Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+ PL_bufptr++;
+ /* having compiled a (?{..}) expression, return the original
+ * text too, as a const */
+ PL_nextval[PL_nexttoke].opval =
+ (OP*)newSVOP(OP_CONST, 0,
+ newSVpvn(PL_sublex_info.re_eval_start,
+ PL_bufptr - PL_sublex_info.re_eval_start));
+ force_next(THING);
+ PL_sublex_info.re_eval_start = NULL;
+ PL_expect = XTERM;
+ return REPORT(',');
+ }
+
/* FALLTHROUGH */
case LEX_INTERPCONCAT:
#ifdef DEBUGGING
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- if (SvIVX(PL_linestr) == '\'') {
+ /* m'foo' still needs to be parsed for possible (?{...}) */
+ if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
SV *sv = newSVsv(PL_linestr);
- if (!PL_lex_inpat)
- sv = tokeq(sv);
- else if ( PL_hints & HINT_NEW_RE )
- sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+ sv = tokeq(sv);
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
s = PL_bufend;
}