X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2866decb530d50f622ad6853fed7f30562e474ce..6429869b8e4c941462c3b00b0e44dcccb91d5564:/toke.c?ds=sidebyside diff --git a/toke.c b/toke.c index 59d3d6b..df73b88 100644 --- a/toke.c +++ b/toke.c @@ -2862,11 +2862,23 @@ S_scan_const(pTHX_ char *start) /* if we get here, we're not doing a transliteration */ - else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\')) - in_charclass = FALSE; + else if (*s == '[' && PL_lex_inpat && !in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = TRUE; + } - else if (PL_lex_inpat && *s == '[') - in_charclass = TRUE; + else if (*s == ']' && PL_lex_inpat && in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = FALSE; + } /* skip for regexp comments /(?#comment)/, except for the last * char, which will be done separately. @@ -2979,7 +2991,7 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s))) + if ((isALNUMC(*s))) Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Unrecognized escape \\%c passed through", *s); @@ -3015,29 +3027,16 @@ S_scan_const(pTHX_ char *start) /* eg. \x24 indicates the hex constant 0x24 */ case 'x': - ++s; - if (*s == '{') { - char* const e = strchr(s, '}'); - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | - PERL_SCAN_DISALLOW_PREFIX; + { STRLEN len; + const char* error; - ++s; - if (!e) { - yyerror("Missing right brace on \\x{}"); + bool valid = grok_bslash_x(s, &uv, &len, &error, 1); + s += len; + if (! valid) { + yyerror(error); continue; } - len = e - s; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s = e + 1; - } - else { - { - STRLEN len = 2; - I32 flags = PERL_SCAN_DISALLOW_PREFIX; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s += len; - } } NUM_ESCAPE_INSERT: @@ -5535,7 +5534,7 @@ Perl_yylex(pTHX) } sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); if (*d == '(') { - d = scan_str(d,TRUE,TRUE); + d = scan_str(d,TRUE,TRUE,FALSE); if (!d) { /* MUST advance bufptr here to avoid bogus "at end of line" context messages from yyerror(). @@ -6431,7 +6430,7 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6446,7 +6445,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6469,7 +6468,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '`': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); if (PL_expect == XOPERATOR) no_op("Backticks",s); @@ -7835,7 +7834,7 @@ Perl_yylex(pTHX) LOP(OP_PIPE_OP,XTERM); case KEY_q: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_CONST; @@ -7846,7 +7845,7 @@ Perl_yylex(pTHX) case KEY_qw: { OP *words = NULL; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); PL_expect = XOPERATOR; @@ -7896,7 +7895,7 @@ Perl_yylex(pTHX) } case KEY_qq: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_STRINGIFY; @@ -7909,7 +7908,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case KEY_qx: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); readpipe_override(); @@ -8218,7 +8217,7 @@ Perl_yylex(pTHX) const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); /* strip spaces and check for bad characters */ @@ -9049,19 +9048,23 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in * the parse starting at 's', based on the subset that are valid in this * context input to this routine in 'valid_flags'. Advances s. Returns - * TRUE if the input was a valid flag, so the next char may be as well; - * otherwise FALSE. 'charset' should point to a NUL upon first call on the - * current regex. This routine will set it to any charset modifier found. - * The caller shouldn't change it. This way, another charset modifier - * encountered in the parse can be detected as an error, as we have decided - * to allow only one */ + * TRUE if the input should be treated as a valid flag, so the next char + * may be as well; otherwise FALSE. 'charset' should point to a NUL upon + * first call on the current regex. This routine will set it to any + * charset modifier found. The caller shouldn't change it. This way, + * another charset modifier encountered in the parse can be detected as an + * error, as we have decided to allow only one */ const char c = **s; - - if (! strchr(valid_flags, c)) { - if (isALNUM(c)) { - yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c)); - (*s)++; + STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; + + if ( charlen != 1 || ! strchr(valid_flags, c) ) { + if (isALNUM_lazy_if(*s, UTF)) { + yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", charlen, *s), + UTF ? SVf_UTF8 : 0); + (*s) += charlen; + /* Pretend that it worked, so will continue processing before + * dieing */ return TRUE; } return FALSE; @@ -9140,7 +9143,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE); + char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing); const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9150,6 +9153,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; + /* this was only needed for the initial scan_str; set it to false + * so that any (?{}) code blocks etc are parsed normally */ + PL_reg_state.re_reparsing = FALSE; if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -9185,6 +9191,25 @@ S_scan_pat(pTHX_ char *start, I32 type) #ifdef PERL_MAD modstart = s; #endif + + /* if qr/...(?{..}).../, then need to parse the pattern within a new + * anon CV. False positives like qr/[(?{]/ are harmless */ + + if (type == OP_QR) { + STRLEN len; + char *e, *p = SvPV(PL_lex_stuff, len); + e = p + len; + for (; p < e; p++) { + if (p[0] == '(' && p[1] == '?' + && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) + { + pm->op_pmflags |= PMf_HAS_CV; + break; + } + } + pm->op_pmflags |= PMf_IS_QR; + } + while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; #ifdef PERL_MAD if (PL_madskills && modstart != s) { @@ -9221,7 +9246,7 @@ S_scan_subst(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Substitution pattern not terminated"); @@ -9239,7 +9264,7 @@ S_scan_subst(pTHX_ char *start) #endif first_start = PL_multi_start; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9329,7 +9354,7 @@ S_scan_trans(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Transliteration pattern not terminated"); @@ -9345,7 +9370,7 @@ S_scan_trans(pTHX_ char *start) } #endif - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9737,7 +9762,7 @@ S_scan_inputsymbol(pTHX_ char *start) if (d - PL_tokenbuf != len) { pl_yylval.ival = OP_GLOB; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Glob not terminated"); return s; @@ -9837,6 +9862,8 @@ intro_sym: takes: start position in buffer keep_quoted preserve \ on the embedded delimiter(s) keep_delims preserve the delimiters around the string + re_reparse compiling a run-time /(?{})/: + collapse // to /, and skip encoding src returns: position to continue reading from buffer side-effects: multi_start, multi_close, lex_repl or lex_stuff, and updates the read buffer. @@ -9877,7 +9904,7 @@ intro_sym: */ STATIC char * -S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) +S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) { dVAR; SV *sv; /* scalar value: string */ @@ -9956,7 +9983,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif for (;;) { - if (PL_encoding && !UTF) { + if (PL_encoding && !UTF && !re_reparse) { bool cont = TRUE; while (cont) { @@ -10038,9 +10065,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) CopLINE_inc(PL_curcop); /* handle quoted delimiters */ if (*s == '\\' && s+1 < PL_bufend && term != '\\') { - if (!keep_quoted && s[1] == term) + if (!keep_quoted + && (s[1] == term + || (re_reparse && s[1] == '\\')) + ) s++; - /* any other quotes are simply copied straight through */ + /* any other quotes are simply copied straight through */ else *to++ = *s++; } @@ -10141,7 +10171,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (!PL_encoding || UTF) { + if (!PL_encoding || UTF || re_reparse) { #ifdef PERL_MAD if (PL_madskills) { char * const tstart = SvPVX(PL_linestr) + stuffstart; @@ -10173,7 +10203,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } } #endif - if (has_utf8 || PL_encoding) + if (has_utf8 || (PL_encoding && !re_reparse)) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop);