/* if we get here, we're not doing a transliteration */
- else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\'))
- in_charclass = FALSE;
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
- else if (PL_lex_inpat && *s == '[')
- in_charclass = TRUE;
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
/* skip for regexp comments /(?#comment)/, except for the last
* char, which will be done separately.
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)))
+ if ((isALNUMC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
- ++s;
- if (*s == '{') {
- char* const e = strchr(s, '}');
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
- PERL_SCAN_DISALLOW_PREFIX;
+ {
STRLEN len;
+ const char* error;
- ++s;
- if (!e) {
- yyerror("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+ s += len;
+ if (! valid) {
+ yyerror(error);
continue;
}
- len = e - s;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s = e + 1;
- }
- else {
- {
- STRLEN len = 2;
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s += len;
- }
}
NUM_ESCAPE_INSERT:
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE);
+ d = scan_str(d,TRUE,TRUE,FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE. 'charset' should point to a NUL upon first call on the
- * current regex. This routine will set it to any charset modifier found.
- * The caller shouldn't change it. This way, another charset modifier
- * encountered in the parse can be detected as an error, as we have decided
- * to allow only one */
+ * TRUE if the input should be treated as a valid flag, so the next char
+ * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+ * first call on the current regex. This routine will set it to any
+ * charset modifier found. The caller shouldn't change it. This way,
+ * another charset modifier encountered in the parse can be detected as an
+ * error, as we have decided to allow only one */
const char c = **s;
-
- if (! strchr(valid_flags, c)) {
- if (isALNUM(c)) {
- yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c));
- (*s)++;
+ STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+ if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+ if (isALNUM_lazy_if(*s, UTF)) {
+ yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", charlen, *s),
+ UTF ? SVf_UTF8 : 0);
+ (*s) += charlen;
+ /* Pretend that it worked, so will continue processing before
+ * dieing */
return TRUE;
}
return FALSE;
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE);
+ char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
+ /* this was only needed for the initial scan_str; set it to false
+ * so that any (?{}) code blocks etc are parsed normally */
+ PL_reg_state.re_reparsing = FALSE;
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
takes: start position in buffer
keep_quoted preserve \ on the embedded delimiter(s)
keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
{
dVAR;
SV *sv; /* scalar value: string */
}
#endif
for (;;) {
- if (PL_encoding && !UTF) {
+ if (PL_encoding && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
CopLINE_inc(PL_curcop);
/* handle quoted delimiters */
if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
- if (!keep_quoted && s[1] == term)
+ if (!keep_quoted
+ && (s[1] == term
+ || (re_reparse && s[1] == '\\'))
+ )
s++;
- /* any other quotes are simply copied straight through */
+ /* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF) {
+ if (!PL_encoding || UTF || re_reparse) {
#ifdef PERL_MAD
if (PL_madskills) {
char * const tstart = SvPVX(PL_linestr) + stuffstart;
}
}
#endif
- if (has_utf8 || PL_encoding)
+ if (has_utf8 || (PL_encoding && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);