From fee505829585692618c3f9bb28a8f0464553ec94 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Mon, 5 Oct 2015 00:03:10 +0200 Subject: [PATCH] fix perl #126186 make all verbs allow an optional arg In perl #126186 it was pointed out we had started allowing name arguments for verbs where we did not document them to be supported, albeit in an inconsistent way. The previous patch cleaned up some of the cause of this, but it seems better to just generally allow the existing verbs to all support a mark name argument. So this patch reverses the effect of the previous patch, and makes all verbs, FAIL, ACCEPT, etc, allow an optional argument, and set REGERROR/REGMARK appropriately as well. --- pod/perldebguts.pod | 5 +-- pod/perlre.pod | 23 +++++++------- regcomp.c | 90 +++++++++++++++++++++++++---------------------------- regcomp.sym | 5 ++- regexec.c | 17 +++++++--- regnodes.h | 8 ++--- t/re/pat_advanced.t | 9 ++++-- t/re/re_tests | 5 +-- 8 files changed, 84 insertions(+), 78 deletions(-) diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 6bd38c1..5024d98 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -755,8 +755,9 @@ will be lost. # Backtracking Verbs ENDLIKE none Used only for the type field of verbs - OPFAIL none Same as (?!) - ACCEPT parno 1 Accepts the current matched string. + OPFAIL no-sv 1 Same as (?!), but with verb arg + ACCEPT no-sv/num Accepts the current matched string, with + 2L verbar # Verbs With Arguments VERB no-sv 1 Used only for the type field of verbs diff --git a/pod/perlre.pod b/pod/perlre.pod index a262b4c..d97cfa3 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -1856,7 +1856,7 @@ See L. These special patterns are generally of the form C<(*VERB:ARG)>. Unless otherwise stated the ARG argument is optional; in some cases, it is -forbidden. +mandatory. Any pattern containing a special backtracking verb that allows an argument has the special behaviour that when executed it sets the current package's @@ -1884,7 +1884,7 @@ argument, then C<$REGERROR> and C<$REGMARK> are not touched at all. =over 3 -=item Verbs that take an argument +=item Verbs =over 4 @@ -2040,13 +2040,7 @@ is not the same as as after matching the A but failing on the B the C<(*THEN)> verb will backtrack and try C; but the C<(*PRUNE)> verb will simply fail. -=back - -=item Verbs without an argument - -=over 4 - -=item C<(*COMMIT)> +=item C<(*COMMIT)> C<(*COMMIT:args)> X<(*COMMIT)> This is the Perl 6 "commit pattern" C<< >> or C<:::>. It's a @@ -2067,16 +2061,18 @@ In other words, once the C<(*COMMIT)> has been entered, and if the pattern does not match, the regex engine will not try any further matching on the rest of the string. -=item C<(*FAIL)> C<(*F)> +=item C<(*FAIL)> C<(*F)> C<(*FAIL:arg)> X<(*FAIL)> X<(*F)> This pattern matches nothing and always fails. It can be used to force the engine to backtrack. It is equivalent to C<(?!)>, but easier to read. In -fact, C<(?!)> gets optimised into C<(*FAIL)> internally. +fact, C<(?!)> gets optimised into C<(*FAIL)> internally. You can provide +an argument so that if the match fails because of this FAIL directive +the argument can be obtained from $REGERROR. It is probably useful only when combined with C<(?{})> or C<(??{})>. -=item C<(*ACCEPT)> +=item C<(*ACCEPT)> C<(*ACCEPT:arg)> X<(*ACCEPT)> This pattern matches nothing and causes the end of successful matching at @@ -2095,6 +2091,9 @@ will match, and C<$1> will be C and C<$2> will be C, C<$3> will not be set. If another branch in the inner parentheses was matched, such as in the string 'ACDE', then the C and C would have to be matched as well. +You can provide an argument, which will be available in the var $REGMARK +after the match completes. + =back =back diff --git a/regcomp.c b/regcomp.c index b5d98e7..f6dd9f0 100644 --- a/regcomp.c +++ b/regcomp.c @@ -9860,7 +9860,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) STRLEN verb_len = 0; char *start_arg = NULL; unsigned char op = 0; - int argok = 1; + int arg_required = 0; int internal_argval = -1; /* if >-1 we are not allowed an argument*/ if (has_intervening_patws) { @@ -9903,14 +9903,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case 'F': /* (*FAIL) */ if ( verb_len==1 || memEQs(start_verb,verb_len,"FAIL") ) { op = OPFAIL; - argok = 0; } break; case ':': /* (*:NAME) */ case 'M': /* (*MARK:NAME) */ if ( verb_len==0 || memEQs(start_verb,verb_len,"MARK") ) { op = MARKPOINT; - argok = -1; + arg_required = 1; } break; case 'P': /* (*PRUNE) */ @@ -9935,40 +9934,30 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) "Unknown verb pattern '%"UTF8f"'", UTF8fARG(UTF, verb_len, start_verb)); } - if ( argok ) { - if ( start_arg && internal_argval != -1 ) { - vFAIL3("Verb pattern '%.*s' may not have an argument", - verb_len, start_verb); - } else if ( argok < 0 && !start_arg ) { - vFAIL3("Verb pattern '%.*s' has a mandatory argument", - verb_len, start_verb); - } else if ( internal_argval == -1 ) { - ret = reganode(pRExC_state, op, 0); - if ( ! SIZE_ONLY ) { - if (start_arg) { - SV *sv = newSVpvn( start_arg, - RExC_parse - start_arg); - ARG(ret) = add_data( pRExC_state, - STR_WITH_LEN("S")); - RExC_rxi->data->data[ARG(ret)]=(void*)sv; - ret->flags = 0; - } else { - ret->flags = 1; - } - } - RExC_seen |= REG_VERBARG_SEEN; - } else { - /* ACCEPT does not allow :args like the rest of the verbs - * as it currently uses its arg slot for something else. - * We can change that in a future commit. */ - ret = reganode(pRExC_state, op, internal_argval); + if ( arg_required && !start_arg ) { + vFAIL3("Verb pattern '%.*s' has a mandatory argument", + verb_len, start_verb); + } + if (internal_argval == -1) { + ret = reganode(pRExC_state, op, 0); + } else { + ret = reg2Lanode(pRExC_state, op, 0, internal_argval); + } + RExC_seen |= REG_VERBARG_SEEN; + if ( ! SIZE_ONLY ) { + if (start_arg) { + SV *sv = newSVpvn( start_arg, + RExC_parse - start_arg); + ARG(ret) = add_data( pRExC_state, + STR_WITH_LEN("S")); + RExC_rxi->data->data[ARG(ret)]=(void*)sv; + ret->flags = 1; + } else { + ret->flags = 0; } - } else if ( start_arg ) { - vFAIL3("Verb pattern '%.*s' may not have an argument", - verb_len, start_verb); - } else { - ret = reg_node(pRExC_state, op); - } + if ( internal_argval != -1 ) + ARG2L_SET(ret, internal_argval); + } nextchar(pRExC_state); return ret; } @@ -10131,7 +10120,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) --RExC_parse; nextchar(pRExC_state); if (*RExC_parse == ')') { - ret=reg_node(pRExC_state, OPFAIL); + ret=reganode(pRExC_state, OPFAIL, 0); nextchar(pRExC_state); return ret; } @@ -10948,7 +10937,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match"); RExC_emit = orig_emit; } - ret = reg_node(pRExC_state, OPFAIL); + ret = reganode(pRExC_state, OPFAIL, 0); return ret; } else if (min == max && RExC_parse < RExC_end && *RExC_parse == '?') @@ -16094,7 +16083,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, RExC_parse = (char *)orig_parse; RExC_emit = (regnode *)orig_emit; - ret = reg_node(pRExC_state, op); + if (regarglen[op]) { + ret = reganode(pRExC_state, op, 0); + } else { + ret = reg_node(pRExC_state, op); + } RExC_parse = (char *)cur_parse; @@ -17090,7 +17083,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ || k == GROUPP || OP(o)==ACCEPT) { AV *name_list= NULL; - Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */ + U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o); + Perl_sv_catpvf(aTHX_ sv, "%"UVuf, (UV)parno); /* Parenth number */ if ( RXp_PAREN_NAMES(prog) ) { name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]); } else if ( pRExC_state ) { @@ -17098,12 +17092,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } if (name_list) { if ( k != REF || (OP(o) < NREF)) { - SV **name= av_fetch(name_list, ARG(o), 0 ); + SV **name= av_fetch(name_list, parno, 0 ); if (name) Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name)); } else { - SV *sv_dat= MUTABLE_SV(progi->data->data[ ARG( o ) ]); + SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]); I32 *nums=(I32*)SvPVX(sv_dat); SV **name= av_fetch(name_list, nums[0], 0 ); I32 n; @@ -17146,11 +17140,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name)); } } - else if (k == VERB) { - if (!o->flags) - Perl_sv_catpvf(aTHX_ sv, ":%"SVf, - SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ])))); - } else if (k == LOGICAL) + else if (k == LOGICAL) /* 2: embedded, otherwise 1 */ Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); else if (k == ANYOF) { @@ -17329,6 +17319,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags)); else if (OP(o) == SBOL) Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^"); + + /* add on the verb argument if there is one */ + if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) { + Perl_sv_catpvf(aTHX_ sv, ":%"SVf, + SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ])))); + } #else PERL_UNUSED_CONTEXT; PERL_UNUSED_ARG(sv); diff --git a/regcomp.sym b/regcomp.sym index 201c65e..8f9861a 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -199,9 +199,8 @@ DEFINEP DEFINEP, none 1 ; Never execute directly. #*Backtracking Verbs ENDLIKE ENDLIKE, none ; Used only for the type field of verbs -OPFAIL ENDLIKE, none ; Same as (?!) -ACCEPT ENDLIKE, parno 1 ; Accepts the current matched string. - +OPFAIL ENDLIKE, no-sv 1 ; Same as (?!), but with verb arg +ACCEPT ENDLIKE, no-sv/num 2L ; Accepts the current matched string, with verbar #*Verbs With Arguments VERB VERB, no-sv 1 ; Used only for the type field of verbs diff --git a/regexec.c b/regexec.c index 96def0f..5535a0e 100644 --- a/regexec.c +++ b/regexec.c @@ -6541,7 +6541,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) break; case ACCEPT: /* (*ACCEPT) */ - if (ARG(scan)){ + if (scan->flags) + sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]); + if (ARG2L(scan)){ regnode *cursor; for (cursor=scan; cursor && OP(cursor)!=END; @@ -7013,8 +7015,9 @@ NULL NOT_REACHED; /* NOTREACHED */ case CUTGROUP: /* /(*THEN)/ */ - sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL : - MUTABLE_SV(rexi->data->data[ ARG( scan ) ]); + sv_yes_mark = st->u.mark.mark_name = scan->flags + ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ]) + : NULL; PUSH_STATE_GOTO(CUTGROUP_next, next, locinput); /* NOTREACHED */ NOT_REACHED; /* NOTREACHED */ @@ -7711,7 +7714,7 @@ NULL /* FALLTHROUGH */ case PRUNE: /* (*PRUNE) */ - if (!scan->flags) + if (scan->flags) sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]); PUSH_STATE_GOTO(COMMIT_next, next, locinput); /* NOTREACHED */ @@ -7720,8 +7723,12 @@ NULL case COMMIT_next_fail: no_final = 1; /* FALLTHROUGH */ + sayNO; + NOT_REACHED; /* NOTREACHED */ case OPFAIL: /* (*FAIL) */ + if (scan->flags) + sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]); if (logical) { /* deal with (?(?!)X|Y) properly, * make sure we trigger the no branch @@ -7774,7 +7781,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case SKIP: /* (*SKIP) */ - if (scan->flags) { + if (!scan->flags) { /* (*SKIP) : if we fail we cut here*/ ST.mark_name = NULL; ST.mark_loc = locinput; diff --git a/regnodes.h b/regnodes.h index cc3da9d..c05003d 100644 --- a/regnodes.h +++ b/regnodes.h @@ -93,8 +93,8 @@ #define INSUBP 79 /* 0x4f Whether we are in a specific recurse. */ #define DEFINEP 80 /* 0x50 Never execute directly. */ #define ENDLIKE 81 /* 0x51 Used only for the type field of verbs */ -#define OPFAIL 82 /* 0x52 Same as (?!) */ -#define ACCEPT 83 /* 0x53 Accepts the current matched string. */ +#define OPFAIL 82 /* 0x52 Same as (?!), but with verb arg */ +#define ACCEPT 83 /* 0x53 Accepts the current matched string, with verbar */ #define VERB 84 /* 0x54 Used only for the type field of verbs */ #define PRUNE 85 /* 0x55 Pattern fails at this startpoint if no-backtracking through this */ #define MARKPOINT 86 /* 0x56 Push the current location for rollback by cut. */ @@ -377,8 +377,8 @@ static const U8 regarglen[] = { EXTRA_SIZE(struct regnode_1), /* INSUBP */ EXTRA_SIZE(struct regnode_1), /* DEFINEP */ 0, /* ENDLIKE */ - 0, /* OPFAIL */ - EXTRA_SIZE(struct regnode_1), /* ACCEPT */ + EXTRA_SIZE(struct regnode_1), /* OPFAIL */ + EXTRA_SIZE(struct regnode_2L), /* ACCEPT */ EXTRA_SIZE(struct regnode_1), /* VERB */ EXTRA_SIZE(struct regnode_1), /* PRUNE */ EXTRA_SIZE(struct regnode_1), /* MARKPOINT */ diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index b267649..a32af20 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -15,6 +15,7 @@ BEGIN { use strict; use warnings; use 5.010; +our ($REGMARK, $REGERROR); sub run_tests; @@ -1286,7 +1287,7 @@ sub run_tests { { # Test named commits and the $REGERROR var - our $REGERROR; + local $REGERROR; for my $name ('', ':foo') { for my $pat ("(*PRUNE$name)", ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", @@ -1305,6 +1306,7 @@ sub run_tests { # Test named commits and the $REGERROR var package Fnorble; our $REGERROR; + local $REGERROR; for my $name ('', ':foo') { for my $pat ("(*PRUNE$name)", ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", @@ -1322,7 +1324,7 @@ sub run_tests { { # Test named commits and the $REGERROR var my $message = '$REGERROR'; - our $REGERROR; + local $REGERROR; for my $word (qw (bar baz bop)) { $REGERROR = ""; "aaaaa$word" =~ @@ -1392,7 +1394,8 @@ sub run_tests { { my $message = '$REGMARK'; our @r = (); - our ($REGMARK, $REGERROR); + local $REGMARK; + local $REGERROR; like('foofoo', qr/foo (*MARK:foo) (?{push @r,$REGMARK}) /x, $message); is("@r","foo", $message); is($REGMARK, "foo", $message); diff --git a/t/re/re_tests b/t/re/re_tests index 8da653a..4255fbc 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1933,7 +1933,8 @@ A+(*PRUNE)BC(?{}) AAABC y $& AAABC ^((?(?!)xb|ya)z) xbz n - - ^((?(?!)xb|ya)z) yaz y $1 yaz # [perl-126222] -(*ACCEPT:foo) foo c - Verb pattern 'ACCEPT' may not have an argument -((*ACCEPT:foo)) foo c - Verb pattern 'ACCEPT' may not have an argument +foo(*ACCEPT:foo) foo y $::REGMARK foo +(foo(*ACCEPT:foo)) foo y $::REGMARK foo +A(*FAIL:foo)[BC] A n $::REGERROR foo # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- 1.8.3.1