register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
+ bool in_charclass = FALSE; /* within /[...]/ */
bool has_utf8 = FALSE; /* Output constant is UTF8 */
bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
to be UTF8? But, this can
/* if we get here, we're not doing a transliteration */
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
+
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
+
/* skip for regexp comments /(?#comment)/, except for the last
* char, which will be done separately.
* Stop on (?{..}) and friends */
while (s+1 < send && *s != ')')
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
- else if (s[2] == '{' /* This should match regcomp.c */
- || (s[2] == '?' && s[3] == '{'))
+ else if (!PL_lex_casemods && !in_charclass &&
+ ( s[2] == '{' /* This should match regcomp.c */
+ || (s[2] == '?' && s[3] == '{')))
{
break;
}
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)))
+ if ((isALNUMC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
- ++s;
- if (*s == '{') {
- char* const e = strchr(s, '}');
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
- PERL_SCAN_DISALLOW_PREFIX;
+ {
STRLEN len;
+ const char* error;
- ++s;
- if (!e) {
- yyerror("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+ s += len;
+ if (! valid) {
+ yyerror(error);
continue;
}
- len = e - s;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s = e + 1;
- }
- else {
- {
- STRLEN len = 2;
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s += len;
- }
}
NUM_ESCAPE_INSERT:
PL_bufptr += 2;
if (*PL_bufptr != '{')
PL_bufptr++;
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
PL_expect = XTERMBLOCK;
force_next(DO);
}
PL_bufptr++;
/* having compiled a (?{..}) expression, return the original
* text too, as a const */
- PL_nextval[PL_nexttoke].opval =
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
+ NEXTVAL_NEXTTOKE.opval =
(OP*)newSVOP(OP_CONST, 0,
newSVpvn(PL_sublex_info.re_eval_start,
PL_bufptr - PL_sublex_info.re_eval_start));
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE);
+ d = scan_str(d,TRUE,TRUE,FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE. 'charset' should point to a NUL upon first call on the
- * current regex. This routine will set it to any charset modifier found.
- * The caller shouldn't change it. This way, another charset modifier
- * encountered in the parse can be detected as an error, as we have decided
- * to allow only one */
+ * TRUE if the input should be treated as a valid flag, so the next char
+ * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+ * first call on the current regex. This routine will set it to any
+ * charset modifier found. The caller shouldn't change it. This way,
+ * another charset modifier encountered in the parse can be detected as an
+ * error, as we have decided to allow only one */
const char c = **s;
-
- if (! strchr(valid_flags, c)) {
- if (isALNUM(c)) {
- yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c));
- (*s)++;
+ STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+ if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+ if (isALNUM_lazy_if(*s, UTF)) {
+ yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", charlen, *s),
+ UTF ? SVf_UTF8 : 0);
+ (*s) += charlen;
+ /* Pretend that it worked, so will continue processing before
+ * dieing */
return TRUE;
}
return FALSE;
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE);
+ char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
+ /* this was only needed for the initial scan_str; set it to false
+ * so that any (?{}) code blocks etc are parsed normally */
+ PL_reg_state.re_reparsing = FALSE;
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
#ifdef PERL_MAD
modstart = s;
#endif
+
+ /* if qr/...(?{..}).../, then need to parse the pattern within a new
+ * anon CV. False positives like qr/[(?{]/ are harmless */
+
+ if (type == OP_QR) {
+ STRLEN len;
+ char *e, *p = SvPV(PL_lex_stuff, len);
+ e = p + len;
+ for (; p < e; p++) {
+ if (p[0] == '(' && p[1] == '?'
+ && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
+ {
+ pm->op_pmflags |= PMf_HAS_CV;
+ break;
+ }
+ }
+ pm->op_pmflags |= PMf_IS_QR;
+ }
+
while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
takes: start position in buffer
keep_quoted preserve \ on the embedded delimiter(s)
keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
{
dVAR;
SV *sv; /* scalar value: string */
}
#endif
for (;;) {
- if (PL_encoding && !UTF) {
+ if (PL_encoding && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
CopLINE_inc(PL_curcop);
/* handle quoted delimiters */
if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
- if (!keep_quoted && s[1] == term)
+ if (!keep_quoted
+ && (s[1] == term
+ || (re_reparse && s[1] == '\\'))
+ )
s++;
- /* any other quotes are simply copied straight through */
+ /* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF) {
+ if (!PL_encoding || UTF || re_reparse) {
#ifdef PERL_MAD
if (PL_madskills) {
char * const tstart = SvPVX(PL_linestr) + stuffstart;
}
}
#endif
- if (has_utf8 || PL_encoding)
+ if (has_utf8 || (PL_encoding && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);