This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add more backtracking control verbs to regex engine (?CUT), (?ERROR)
authorYves Orton <demerphq@gmail.com>
Thu, 2 Nov 2006 12:35:10 +0000 (13:35 +0100)
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>
Thu, 2 Nov 2006 12:26:47 +0000 (12:26 +0000)
Message-ID: <9b18b3110611020335h7ea469a8g28ca483f6832816d@mail.gmail.com>

p4raw-id: //depot/perl@29189

13 files changed:
embed.fnc
ext/re/re.pm
pod/perl595delta.pod
pod/perlre.pod
proto.h
regcomp.c
regcomp.h
regcomp.pl
regcomp.sym
regexec.c
regexp.h
regnodes.h
t/op/pat.t

index 2d88011..7511a88 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1357,9 +1357,9 @@ Es        |U8     |regtail_study  |NN struct RExC_state_t *state|NN regnode *p|NN const regn
 #endif
 
 #if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
-ERs    |I32    |regmatch       |NN const regmatch_info *reginfo|NN regnode *prog
+ERs    |I32    |regmatch       |NN regmatch_info *reginfo|NN regnode *prog
 ERs    |I32    |regrepeat      |NN const regexp *prog|NN const regnode *p|I32 max
-ERs    |I32    |regtry         |NN const regmatch_info *reginfo|NN char *startpos
+ERs    |I32    |regtry         |NN regmatch_info *reginfo|NN char **startpos
 ERs    |bool   |reginclass     |NULLOK const regexp *prog|NN const regnode *n|NN const U8 *p|NULLOK STRLEN *lenp\
                                |bool do_utf8sv_is_utf8
 Es     |CHECKPOINT|regcppush   |I32 parenfloor
@@ -1369,7 +1369,7 @@ ERsn      |U8*    |reghop3        |NN U8 *pos|I32 off|NN const U8 *lim
 ERsn   |U8*    |reghop4        |NN U8 *pos|I32 off|NN const U8 *llim|NN const U8 *rlim
 #endif
 ERsn   |U8*    |reghopmaybe3   |NN U8 *pos|I32 off|NN const U8 *lim
-ERs    |char*  |find_byclass   |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK const regmatch_info *reginfo
+ERs    |char*  |find_byclass   |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
 Es     |void   |to_utf8_substr |NN regexp * prog
 Es     |void   |to_byte_substr |NN regexp * prog
 ERs    |I32    |reg_check_named_buff_matched   |NN const regexp *rex|NN const regnode *prog
index 5c54ae3..ac71f0a 100644 (file)
@@ -95,7 +95,7 @@ Turns on debug output related to the process of parsing the pattern.
 
 Enables output related to the optimisation phase of compilation.
 
-=item TRIE_COMPILE
+=item TRIEC
 
 Detailed info about trie compilation.
 
@@ -103,16 +103,6 @@ Detailed info about trie compilation.
 
 Dump the final program out after it is compiled and optimised.
 
-=item OFFSETS
-
-Dump offset information. This can be used to see how regops correlate
-to the pattern. Output format is
-
-   NODENUM:POSITION[LENGTH]
-
-Where 1 is the position of the first char in the string. Note that position
-can be 0, or larger than the actual length of the pattern, likewise length
-can be zero.
 
 =back
 
@@ -128,7 +118,7 @@ Turns on all execute related debug options.
 
 Turns on debugging of the main matching loop.
 
-=item TRIE_EXECUTE
+=item TRIEE
 
 Extra debugging of how tries execute.
 
@@ -146,12 +136,38 @@ Enable debugging of start point optimisations.
 
 Turns on all "extra" debugging options.
 
-=item TRIE_MORE
+=item TRIEM
+
+Enable enhanced TRIE debugging. Enhances both TRIEE
+and TRIEC.
+
+=item STATE
+
+Enable debugging of states in the engine. 
+
+=item STACK
 
-Enable enhanced TRIE debugging. Enhances both TRIE_EXECUTE
-and TRIE_COMPILE.
+Enable debugging of the recursion stack in the engine. Enabling
+or disabling this option automatically does the same for debugging
+states as well. This output from this can be quite large.
+
+=item OPTIMISEM
+
+Enable enhanced optimisation debugging and start point optimisations.
+Probably not useful except when debugging the regex engine itself.
+
+=item OFFSETS
+
+Dump offset information. This can be used to see how regops correlate
+to the pattern. Output format is
+
+   NODENUM:POSITION[LENGTH]
+
+Where 1 is the position of the first char in the string. Note that position
+can be 0, or larger than the actual length of the pattern, likewise length
+can be zero.
 
-=item OFFSETS_DEBUG
+=item OFFSETSDBG
 
 Enable debugging of offsets information. This emits copious
 amounts of trace information and doesn't mesh well with other
@@ -182,7 +198,7 @@ Enable DUMP and all execute options. Equivalent to:
 
 =item More
 
-Enable TRIE_MORE and all execute compile and execute options.
+Enable TRIEM and all execute compile and execute options.
 
 =back
 
@@ -239,6 +255,7 @@ my %flags = (
     OFFSETSDBG      => 0x040000,
     STATE           => 0x080000,
     OPTIMISEM       => 0x100000,
+    STACK           => 0x280000,
 );
 $flags{ALL} = -1;
 $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
index 5d7e686..ff8efcd 100644 (file)
@@ -104,6 +104,12 @@ similar to non-greedy matching, except instead of using a '?' as the modifier
 the '+' is used. Thus C<?+>, C<*+>, C<++>, C<{min,max}+> are now legal
 quantifiers. (Yves Orton)
 
+=item Backtracking control verbs
+
+The regex engine now supports a number of special purpose backtrack
+control verbs: (?COMMIT), (?CUT), (?ERROR) and (?FAIL). See L<perlre>
+for their descriptions.
+
 =back
 
 =head2 The C<_> prototype
index 4e683a3..bce7291 100644 (file)
@@ -1094,6 +1094,48 @@ Any number of C<(?COMMIT)> assertions may be used in a pattern.
 See also C<< (?>pattern) >> and possessive quantifiers for other
 ways to control backtracking.
 
+=item C<(?CUT)>
+X<(?CUT)>
+
+This zero-width pattern is similar to C<(?COMMIT)>, except that on
+failure it also signifies that whatever text that was matched leading
+up to the C<(?CUT)> pattern cannot match, I<even from another
+starting point>.
+
+Compare the following to the examples in C<(?COMMIT)>, note the string
+is twice as long:
+
+    'aaabaaab'=~/a+b?(?CUT)(?{print "$&\n"; $count++})(?FAIL)/;
+    print "Count=$count\n";
+
+outputs
+
+    aaab
+    aaab
+    Count=2
+
+Once the 'aaab' at the start of the string has matched and the C<(?CUT)>
+executed the next startpoint will be where the cursor was when the
+C<(?CUT)> was executed.
+
+=item C<(?ERROR)>
+X<(?ERROR)>
+
+This zero-width pattern is similar to C<(?CUT)> except that it causes
+the match to fail outright. No attempts to match will occur again.
+
+    'aaabaaab'=~/a+b?(?ERROR)(?{print "$&\n"; $count++})(?FAIL)/;
+    print "Count=$count\n";
+
+outputs
+
+    aaab
+    Count=1
+
+In other words, once the C<(?ERROR)> has been entered and then pattern
+does not match then the regex engine will not try any further matching at
+all on the rest of the string.
+
 =item C<(?(condition)yes-pattern|no-pattern)>
 X<(?()>
 
diff --git a/proto.h b/proto.h
index c6f398a..47f302e 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -3691,7 +3691,7 @@ STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *state, regnode *p, const re
 #endif
 
 #if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
-STATIC I32     S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
+STATIC I32     S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
@@ -3701,7 +3701,7 @@ STATIC I32        S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
 
-STATIC I32     S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
+STATIC I32     S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
@@ -3733,7 +3733,7 @@ STATIC U8*        S_reghopmaybe3(U8 *pos, I32 off, const U8 *lim)
                        __attribute__nonnull__(1)
                        __attribute__nonnull__(3);
 
-STATIC char*   S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, const regmatch_info *reginfo)
+STATIC char*   S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
index 6938954..1523fc1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4717,7 +4717,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            case ':':           /* (?:...) */
            case '>':           /* (?>...) */
                break;
-           case 'C':
+            case 'C':           /* (?CUT) and (?COMMIT) */
                if (RExC_parse[0] == 'O' &&
                    RExC_parse[1] == 'M' &&
                    RExC_parse[2] == 'M' &&
@@ -4727,12 +4727,34 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                {
                    RExC_parse+=5;
                    ret = reg_node(pRExC_state, COMMIT);
+                } else if (
+                    RExC_parse[0] == 'U' &&
+                    RExC_parse[1] == 'T' &&
+                    RExC_parse[2] == ')') 
+                {
+                    RExC_parse+=2;
+                    ret = reg_node(pRExC_state, CUT);
                } else {
                    vFAIL("Sequence (?C... not terminated");
                }
                nextchar(pRExC_state);
                return ret;
                break;
+            case 'E':            /* (?ERROR) */
+                if (RExC_parse[0] == 'R' &&
+                    RExC_parse[1] == 'R' &&
+                    RExC_parse[2] == 'O' &&
+                    RExC_parse[3] == 'R' &&
+                    RExC_parse[4] == ')') 
+                {
+                    RExC_parse+=4;
+                    ret = reg_node(pRExC_state, OPERROR);
+                } else {
+                    vFAIL("Sequence (?E... not terminated"); 
+                }
+               nextchar(pRExC_state);
+               return ret;
+                break;                
             case 'F':
                 if (RExC_parse[0] == 'A' &&
                     RExC_parse[1] == 'I' &&
@@ -8669,7 +8691,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                         (dist ? this_trie + dist : next) - start);
                     if (dist) {
                         if (!nextbranch)
-                           nextbranch = this_trie + trie->jump[0];
+                            nextbranch= this_trie + trie->jump[0];    
                        DUMPUNTIL(this_trie + dist, nextbranch);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
index f7082bf..360e2a9 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -624,6 +624,8 @@ re.pm, especially to the documentation.
 #define RE_DEBUG_EXTRA_OFFDEBUG    0x040000
 #define RE_DEBUG_EXTRA_STATE       0x080000
 #define RE_DEBUG_EXTRA_OPTIMISE    0x100000
+/* combined */
+#define RE_DEBUG_EXTRA_STACK       0x280000
 
 #define RE_DEBUG_FLAG(x) (re_debug_flags & x)
 /* Compile */
@@ -657,6 +659,8 @@ re.pm, especially to the documentation.
     if (re_debug_flags & RE_DEBUG_EXTRA_OFFSETS) x  )
 #define DEBUG_STATE_r(x) DEBUG_r( \
     if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x )
+#define DEBUG_STACK_r(x) DEBUG_r( \
+    if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x )
 #define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
     if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
          (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x )
index 2e84604..700268d 100644 (file)
@@ -48,7 +48,7 @@ while (<DESC>) {
                     $ind++;
                     $name[$ind]="$real$suffix";
                     $type[$ind]=$type;
-                    $rest[$ind]="Regmatch state for $type";
+                    $rest[$ind]="state for $type";
                 }
             }
         }
@@ -92,13 +92,16 @@ EOP
     -$width, REGMATCH_STATE_MAX => $tot - 1
 ;
 
-$ind = 0;
-while (++$ind <= $tot) {
+
+for ($ind=1; $ind <= $lastregop ; $ind++) {
   my $oind = $ind - 1;
   printf OUT "#define\t%*s\t%d\t/* %#04x %s */\n",
     -$width, $name[$ind], $ind-1, $ind-1, $rest[$ind];
-  print OUT "\n\t/* ------------ States ------------- */\n\n"
-    if $ind == $lastregop and $lastregop != $tot;
+}
+print OUT "\t/* ------------ States ------------- */\n";
+for ( ; $ind <= $tot ; $ind++) {
+  printf OUT "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n",
+    -$width, $name[$ind], $ind - $lastregop, $rest[$ind];
 }
 
 print OUT <<EOP;
@@ -164,13 +167,19 @@ const char * reg_name[] = {
 EOP
 
 $ind = 0;
+my $ofs = 1;
+my $sym = "";
 while (++$ind <= $tot) {
   my $size = $longj[$ind] || 0;
 
-  printf OUT "\t%*s\t/* %#04x */\n",
-       -3-$width,qq("$name[$ind]",),$ind-1;
-  print OUT "\t/* ------------ States ------------- */\n"
-    if $ind == $lastregop and $lastregop != $tot;
+  printf OUT "\t%*s\t/* $sym%#04x */\n",
+       -3-$width,qq("$name[$ind]",), $ind - $ofs;
+  if ($ind == $lastregop and $lastregop != $tot) {
+    print OUT "\t/* ------------ States ------------- */\n";
+    $ofs = $lastregop;
+    $sym = 'REGNODE_MAX +';
+  }
+    
 }
 
 print OUT <<EOP;
index f60368c..e673313 100644 (file)
@@ -170,7 +170,9 @@ DEFINEP             DEFINEP,   none 1       Never execute directly.
 
 #*Bactracking 
 OPFAIL         OPFAIL, none            Same as (?!)
-COMMIT         COMMIT, node            Pattern fails if backtracking through this 
+COMMIT         COMMIT, none            Pattern fails if backtracking through this 
+CUT            COMMIT, none            ... and restarts at the cursor point
+OPERROR                OPERROR,none            Pattern fails outright if backtracking through this
 
 # NEW STUFF ABOVE THIS LINE -- Please update counts below. 
 
@@ -207,4 +209,5 @@ BRANCH      next:FAIL
 CURLYM         A,B:FAIL        
 IFMATCH        A:FAIL  
 CURLY          B_min_known,B_min,B_max:FAIL    
-COMMIT         next:FAIL       
+COMMIT         next:FAIL
+
index 2380b3e..f7fd347 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1018,7 +1018,7 @@ foldlen, foldbuf, uniflags) STMT_START {                                    \
         && (ln == len ||                              \
             ibcmp_utf8(s, NULL, 0,  do_utf8,          \
                        m, NULL, ln, (bool)UTF))       \
-        && (!reginfo || regtry(reginfo, s)) )         \
+        && (!reginfo || regtry(reginfo, &s)) )         \
        goto got_it;                                   \
     else {                                             \
         U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
@@ -1031,7 +1031,7 @@ foldlen, foldbuf, uniflags) STMT_START {                                    \
                              NULL, foldlen, do_utf8,  \
                              m,                       \
                              NULL, ln, (bool)UTF))    \
-             && (!reginfo || regtry(reginfo, s)) )    \
+             && (!reginfo || regtry(reginfo, &s)) )    \
              goto got_it;                             \
     }                                                  \
     s += len
@@ -1043,7 +1043,7 @@ STMT_START {                                              \
             && (ln == 1 || !(OP(c) == EXACTF             \
                              ? ibcmp(s, m, ln)           \
                              : ibcmp_locale(s, m, ln)))  \
-            && (!reginfo || regtry(reginfo, s)) )        \
+            && (!reginfo || regtry(reginfo, &s)) )        \
            goto got_it;                                  \
        s++;                                              \
     }                                                     \
@@ -1068,7 +1068,7 @@ STMT_START {                                          \
 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
 REXEC_FBC_UTF8_SCAN(                                  \
     if (CoNd) {                                       \
-       if (tmp && (!reginfo || regtry(reginfo, s)))  \
+       if (tmp && (!reginfo || regtry(reginfo, &s)))  \
            goto got_it;                              \
        else                                          \
            tmp = doevery;                            \
@@ -1080,7 +1080,7 @@ REXEC_FBC_UTF8_SCAN(                                  \
 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
 REXEC_FBC_SCAN(                                       \
     if (CoNd) {                                       \
-       if (tmp && (!reginfo || regtry(reginfo, s)))  \
+       if (tmp && (!reginfo || regtry(reginfo, &s)))  \
            goto got_it;                              \
        else                                          \
            tmp = doevery;                            \
@@ -1090,7 +1090,7 @@ REXEC_FBC_SCAN(                                       \
 )
 
 #define REXEC_FBC_TRYIT               \
-if ((!reginfo || regtry(reginfo, s))) \
+if ((!reginfo || regtry(reginfo, &s))) \
     goto got_it
 
 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
@@ -1123,7 +1123,7 @@ if ((!reginfo || regtry(reginfo, s))) \
 
 STATIC char *
 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
-    const char *strend, const regmatch_info *reginfo)
+    const char *strend, regmatch_info *reginfo)
 {
        dVAR;
        const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
@@ -1155,7 +1155,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                           /* The assignment of 2 is intentional:
                            * for the folded sharp s, the skip is 2. */
                           (skip = SHARP_S_SKIP))) {
-                          if (tmp && (!reginfo || regtry(reginfo, s)))
+                          if (tmp && (!reginfo || regtry(reginfo, &s)))
                                goto got_it;
                           else
                                tmp = doevery;
@@ -1168,7 +1168,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
            break;
        case CANY:
            REXEC_FBC_SCAN(
-               if (tmp && (!reginfo || regtry(reginfo, s)))
+               if (tmp && (!reginfo || regtry(reginfo, &s)))
                    goto got_it;
                else
                    tmp = doevery;
@@ -1302,7 +1302,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                }
                );
            }
-           if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
+           if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
                goto got_it;
            break;
        case NBOUNDL:
@@ -1338,7 +1338,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                    else REXEC_FBC_TRYIT;
                );
            }
-           if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
+           if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
                goto got_it;
            break;
        case ALNUM:
@@ -1598,7 +1598,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                 (UV)accepted_word, s - real_start
                             );
                         });
-                        if (!reginfo || regtry(reginfo, s)) {
+                        if (!reginfo || regtry(reginfo, &s)) {
                             FREETMPS;
                            LEAVE;
                             goto got_it;
@@ -1639,9 +1639,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
 /* nosave: For optimizations. */
 {
     dVAR;
-    register char *s;
+    /*register*/ char *s;
     register regnode *c;
-    register char *startpos = stringarg;
+    /*register*/ char *startpos = stringarg;
     I32 minlen;                /* must match at least this many chars */
     I32 dontbother = 0;        /* how many characters not to try at end */
     I32 end_shift = 0;                 /* Same for the end. */         /* CC */
@@ -1744,7 +1744,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     /* Simplest case:  anchored match need be tried only once. */
     /*  [unless only anchor is BOL and multiline is set] */
     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
-       if (s == startpos && regtry(&reginfo, startpos))
+       if (s == startpos && regtry(&reginfo, &startpos))
            goto got_it;
        else if (multiline || (prog->reganch & ROPT_IMPLICIT)
                 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
@@ -1759,7 +1759,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                if (s == startpos)
                    goto after_try;
                while (1) {
-                   if (regtry(&reginfo, s))
+                   if (regtry(&reginfo, &s))
                        goto got_it;
                  after_try:
                    if (s >= end)
@@ -1777,7 +1777,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                    s--;
                while (s < end) {
                    if (*s++ == '\n') { /* don't need PL_utf8skip here */
-                       if (regtry(&reginfo, s))
+                       if (regtry(&reginfo, &s))
                            goto got_it;
                    }
                }               
@@ -1789,7 +1789,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
         /* the warning about reginfo.ganch being used without intialization
            is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN 
            and we only enter this block when the same bit is set. */
-       if (regtry(&reginfo, reginfo.ganch))
+       if (regtry(&reginfo, &reginfo.ganch))
            goto got_it;
        goto phooey;
     }
@@ -1810,7 +1810,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            REXEC_FBC_SCAN(
                if (*s == ch) {
                    DEBUG_EXECUTE_r( did_match = 1 );
-                   if (regtry(&reginfo, s)) goto got_it;
+                   if (regtry(&reginfo, &s)) goto got_it;
                    s += UTF8SKIP(s);
                    while (s < strend && *s == ch)
                        s += UTF8SKIP(s);
@@ -1821,7 +1821,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            REXEC_FBC_SCAN(
                if (*s == ch) {
                    DEBUG_EXECUTE_r( did_match = 1 );
-                   if (regtry(&reginfo, s)) goto got_it;
+                   if (regtry(&reginfo, &s)) goto got_it;
                    s++;
                    while (s < strend && *s == ch)
                        s++;
@@ -1903,14 +1903,14 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            }
            if (do_utf8) {
                while (s <= last1) {
-                   if (regtry(&reginfo, s))
+                   if (regtry(&reginfo, &s))
                        goto got_it;
                    s += UTF8SKIP(s);
                }
            }
            else {
                while (s <= last1) {
-                   if (regtry(&reginfo, s))
+                   if (regtry(&reginfo, &s))
                        goto got_it;
                    s++;
                }
@@ -2004,7 +2004,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
        /* We don't know much -- general case. */
        if (do_utf8) {
            for (;;) {
-               if (regtry(&reginfo, s))
+               if (regtry(&reginfo, &s))
                    goto got_it;
                if (s >= strend)
                    break;
@@ -2013,7 +2013,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
        }
        else {
            do {
-               if (regtry(&reginfo, s))
+               if (regtry(&reginfo, &s))
                    goto got_it;
            } while (s++ < strend);
        }
@@ -2082,7 +2082,7 @@ phooey:
  - regtry - try match at specific point
  */
 STATIC I32                     /* 0 failure, 1 success */
-S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
+S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
 {
     dVAR;
     register I32 *sp;
@@ -2090,6 +2090,7 @@ S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
     CHECKPOINT lastcp;
     regexp *prog = reginfo->prog;
     GET_RE_DEBUG_FLAGS_DECL;
+    reginfo->cutpoint=NULL;
 
     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
        MAGIC *mg;
@@ -2161,9 +2162,9 @@ S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
        prog->subbeg = PL_bostr;
        prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
     }
-    DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
-    prog->startp[0] = startpos - PL_bostr;
-    PL_reginput = startpos;
+    DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
+    prog->startp[0] = *startpos - PL_bostr;
+    PL_reginput = *startpos;
     PL_reglastparen = &prog->lastparen;
     PL_reglastcloseparen = &prog->lastcloseparen;
     prog->lastparen = 0;
@@ -2209,6 +2210,8 @@ S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
        prog->endp[0] = PL_reginput - PL_bostr;
        return 1;
     }
+    if (reginfo->cutpoint)
+        *startpos= reginfo->cutpoint;
     REGCP_UNWIND(lastcp);
     return 0;
 }
@@ -2538,7 +2541,7 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
 }
 
 STATIC I32                     /* 0 failure, 1 success */
-S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
+S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 {
 #if PERL_VERSION < 9
     dMY_CXT;
@@ -2571,7 +2574,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
     U32 state_num;
-    bool no_final = 0;      /* if true then we dont backtrack on failure */
+    bool no_final = 0;
 
     /* these three flags are set by various ops to signal information to
      * the very next op. They have a useful lifetime of exactly one loop
@@ -2592,6 +2595,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
     GET_RE_DEBUG_FLAGS_DECL;
 #endif
 
+    DEBUG_STACK_r( {    
+           PerlIO_printf(Perl_debug_log,"regmatch start\n");
+    });
     /* on first ever call to regmatch, allocate first slab */
     if (!PL_regmatch_slab) {
        Newx(PL_regmatch_slab, 1, regmatch_slab);
@@ -3414,7 +3420,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
        case GOSTART:
        case GOSUB: /*    /(...(?1))/      */
             if (cur_eval && cur_eval->locinput==locinput) {
-                if (cur_eval->u.eval.close_paren == ARG(scan)) 
+                if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) 
                     Perl_croak(aTHX_ "Infinite recursion in regex");
                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH ) 
                     Perl_croak(aTHX_ 
@@ -3451,7 +3457,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
            
                n = ARG(scan);
                PL_op = (OP_4tree*)rex->data->data[n];
-               DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
+               DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, 
+                   "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
                PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
                PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
 
@@ -4083,7 +4090,7 @@ NULL
            locinput = PL_reginput;
                        
            if (cur_eval && cur_eval->u.eval.close_paren && 
-               cur_eval->u.eval.close_paren == ST.me->flags) 
+               cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
                goto fake_end;
                
            if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
@@ -4095,7 +4102,7 @@ NULL
 
            if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ 
                || (cur_eval && cur_eval->u.eval.close_paren &&
-                   cur_eval->u.eval.close_paren == ST.me->flags))
+                   cur_eval->u.eval.close_paren == (U32)ST.me->flags))
                sayNO;
 
          curlym_do_B: /* execute the B in /A{m,n}B/  */
@@ -4149,7 +4156,7 @@ NULL
                else
                    PL_regendp[paren] = -1;
                if (cur_eval && cur_eval->u.eval.close_paren &&
-                   cur_eval->u.eval.close_paren == ST.me->flags) 
+                   cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
                {
                    if (ST.count) 
                        goto fake_end;
@@ -4615,7 +4622,17 @@ NULL
            if (next == scan)
                next = NULL;
            break;
+       case OPERROR:
+           reginfo->cutpoint=PL_regeol;
+           goto do_commit;
+           /* NOTREACHED */
+       case CUT:
+           if ( locinput > reginfo->bol )
+               reginfo->cutpoint = HOPBACKc(locinput, 1);
+           /* FALLTHROUGH */       
        case COMMIT:
+         do_commit:
+           PL_reginput = locinput;
            PUSH_STATE_GOTO(COMMIT_next,next);
            /* NOTREACHED */
        case COMMIT_next_fail:
@@ -4643,7 +4660,27 @@ NULL
        {
            regmatch_state *newst;
 
-           DEBUG_STATE_pp("push");
+           DEBUG_STACK_r({
+               regmatch_state *cur = st;
+               regmatch_state *curyes = yes_state;
+               int curd = depth;
+               regmatch_slab *slab = PL_regmatch_slab;
+                for (;curd > -1;cur--,curd--) {
+                    if (cur < SLAB_FIRST(slab)) {
+                       slab = slab->prev;
+                       cur = SLAB_LAST(slab);
+                    }
+                    PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
+                        REPORT_CODE_OFF + 2 + depth * 2,"",
+                        curd, reg_name[cur->resume_state],
+                        (curyes == cur) ? "yes" : ""
+                    );
+                    if (curyes == cur)
+                       curyes = cur->u.yes.prev_yes_state;
+                }
+            } else 
+                DEBUG_STATE_pp("push")
+            );
            depth++;
            st->locinput = locinput;
            newst = st+1; 
@@ -4702,6 +4739,7 @@ yes:
        st = yes_state;
        yes_state = st->u.yes.prev_yes_state;
        PL_regmatch_state = st;
+        
 
        state_num = st->resume_state + no_final;
        goto reenter_switch;
index 89fcea7..f13a5c5 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -202,6 +202,7 @@ typedef struct {
     char *till;
     SV *sv;
     char *ganch;
+    char *cutpoint;
 } regmatch_info;
  
 
index c42fcf8..010b943 100644 (file)
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX            76
-#define REGMATCH_STATE_MAX     108
+#define REGNODE_MAX            78
+#define REGMATCH_STATE_MAX     110
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
 #define        DEFINEP                 72      /* 0x48 Never execute directly. */
 #define        OPFAIL                  73      /* 0x49 Same as (?!) */
 #define        COMMIT                  74      /* 0x4a Pattern fails if backtracking through this */
-#define        OPTIMIZED               75      /* 0x4b Placeholder for dump. */
-#define        PSEUDO                  76      /* 0x4c Pseudo opcode for internal use. */
-
+#define        CUT                     75      /* 0x4b ... and restarts at the cursor point */
+#define        OPERROR                 76      /* 0x4c Pattern fails outright if backtracking through this */
+#define        OPTIMIZED               77      /* 0x4d Placeholder for dump. */
+#define        PSEUDO                  78      /* 0x4e Pseudo opcode for internal use. */
        /* ------------ States ------------- */
-
-#define        TRIE_next               77      /* 0x4d Regmatch state for TRIE */
-#define        TRIE_next_fail          78      /* 0x4e Regmatch state for TRIE */
-#define        EVAL_AB                 79      /* 0x4f Regmatch state for EVAL */
-#define        EVAL_AB_fail            80      /* 0x50 Regmatch state for EVAL */
-#define        CURLYX_end              81      /* 0x51 Regmatch state for CURLYX */
-#define        CURLYX_end_fail         82      /* 0x52 Regmatch state for CURLYX */
-#define        WHILEM_A_pre            83      /* 0x53 Regmatch state for WHILEM */
-#define        WHILEM_A_pre_fail       84      /* 0x54 Regmatch state for WHILEM */
-#define        WHILEM_A_min            85      /* 0x55 Regmatch state for WHILEM */
-#define        WHILEM_A_min_fail       86      /* 0x56 Regmatch state for WHILEM */
-#define        WHILEM_A_max            87      /* 0x57 Regmatch state for WHILEM */
-#define        WHILEM_A_max_fail       88      /* 0x58 Regmatch state for WHILEM */
-#define        WHILEM_B_min            89      /* 0x59 Regmatch state for WHILEM */
-#define        WHILEM_B_min_fail       90      /* 0x5a Regmatch state for WHILEM */
-#define        WHILEM_B_max            91      /* 0x5b Regmatch state for WHILEM */
-#define        WHILEM_B_max_fail       92      /* 0x5c Regmatch state for WHILEM */
-#define        BRANCH_next             93      /* 0x5d Regmatch state for BRANCH */
-#define        BRANCH_next_fail        94      /* 0x5e Regmatch state for BRANCH */
-#define        CURLYM_A                95      /* 0x5f Regmatch state for CURLYM */
-#define        CURLYM_A_fail           96      /* 0x60 Regmatch state for CURLYM */
-#define        CURLYM_B                97      /* 0x61 Regmatch state for CURLYM */
-#define        CURLYM_B_fail           98      /* 0x62 Regmatch state for CURLYM */
-#define        IFMATCH_A               99      /* 0x63 Regmatch state for IFMATCH */
-#define        IFMATCH_A_fail          100     /* 0x64 Regmatch state for IFMATCH */
-#define        CURLY_B_min_known       101     /* 0x65 Regmatch state for CURLY */
-#define        CURLY_B_min_known_fail  102     /* 0x66 Regmatch state for CURLY */
-#define        CURLY_B_min             103     /* 0x67 Regmatch state for CURLY */
-#define        CURLY_B_min_fail        104     /* 0x68 Regmatch state for CURLY */
-#define        CURLY_B_max             105     /* 0x69 Regmatch state for CURLY */
-#define        CURLY_B_max_fail        106     /* 0x6a Regmatch state for CURLY */
-#define        COMMIT_next             107     /* 0x6b Regmatch state for COMMIT */
-#define        COMMIT_next_fail        108     /* 0x6c Regmatch state for COMMIT */
+#define        TRIE_next               (REGNODE_MAX + 1)       /* state for TRIE */
+#define        TRIE_next_fail          (REGNODE_MAX + 2)       /* state for TRIE */
+#define        EVAL_AB                 (REGNODE_MAX + 3)       /* state for EVAL */
+#define        EVAL_AB_fail            (REGNODE_MAX + 4)       /* state for EVAL */
+#define        CURLYX_end              (REGNODE_MAX + 5)       /* state for CURLYX */
+#define        CURLYX_end_fail         (REGNODE_MAX + 6)       /* state for CURLYX */
+#define        WHILEM_A_pre            (REGNODE_MAX + 7)       /* state for WHILEM */
+#define        WHILEM_A_pre_fail       (REGNODE_MAX + 8)       /* state for WHILEM */
+#define        WHILEM_A_min            (REGNODE_MAX + 9)       /* state for WHILEM */
+#define        WHILEM_A_min_fail       (REGNODE_MAX + 10)      /* state for WHILEM */
+#define        WHILEM_A_max            (REGNODE_MAX + 11)      /* state for WHILEM */
+#define        WHILEM_A_max_fail       (REGNODE_MAX + 12)      /* state for WHILEM */
+#define        WHILEM_B_min            (REGNODE_MAX + 13)      /* state for WHILEM */
+#define        WHILEM_B_min_fail       (REGNODE_MAX + 14)      /* state for WHILEM */
+#define        WHILEM_B_max            (REGNODE_MAX + 15)      /* state for WHILEM */
+#define        WHILEM_B_max_fail       (REGNODE_MAX + 16)      /* state for WHILEM */
+#define        BRANCH_next             (REGNODE_MAX + 17)      /* state for BRANCH */
+#define        BRANCH_next_fail        (REGNODE_MAX + 18)      /* state for BRANCH */
+#define        CURLYM_A                (REGNODE_MAX + 19)      /* state for CURLYM */
+#define        CURLYM_A_fail           (REGNODE_MAX + 20)      /* state for CURLYM */
+#define        CURLYM_B                (REGNODE_MAX + 21)      /* state for CURLYM */
+#define        CURLYM_B_fail           (REGNODE_MAX + 22)      /* state for CURLYM */
+#define        IFMATCH_A               (REGNODE_MAX + 23)      /* state for IFMATCH */
+#define        IFMATCH_A_fail          (REGNODE_MAX + 24)      /* state for IFMATCH */
+#define        CURLY_B_min_known       (REGNODE_MAX + 25)      /* state for CURLY */
+#define        CURLY_B_min_known_fail  (REGNODE_MAX + 26)      /* state for CURLY */
+#define        CURLY_B_min             (REGNODE_MAX + 27)      /* state for CURLY */
+#define        CURLY_B_min_fail        (REGNODE_MAX + 28)      /* state for CURLY */
+#define        CURLY_B_max             (REGNODE_MAX + 29)      /* state for CURLY */
+#define        CURLY_B_max_fail        (REGNODE_MAX + 30)      /* state for CURLY */
+#define        COMMIT_next             (REGNODE_MAX + 31)      /* state for COMMIT */
+#define        COMMIT_next_fail        (REGNODE_MAX + 32)      /* state for COMMIT */
 
 /* PL_regkind[] What type of regop or state is this. */
 
@@ -203,6 +203,8 @@ EXTCONST U8 PL_regkind[] = {
        DEFINEP,        /* DEFINEP                */
        OPFAIL,         /* OPFAIL                 */
        COMMIT,         /* COMMIT                 */
+       COMMIT,         /* CUT                    */
+       OPERROR,        /* OPERROR                */
        NOTHING,        /* OPTIMIZED              */
        PSEUDO,         /* PSEUDO                 */
        /* ------------ States ------------- */
@@ -320,6 +322,8 @@ static const U8 regarglen[] = {
        EXTRA_SIZE(struct regnode_1),           /* DEFINEP      */
        0,                                      /* OPFAIL       */
        0,                                      /* COMMIT       */
+       0,                                      /* CUT          */
+       0,                                      /* OPERROR      */
        0,                                      /* OPTIMIZED    */
        0,                                      /* PSEUDO       */
 };
@@ -402,6 +406,8 @@ static const char reg_off_by_arg[] = {
        0,      /* DEFINEP      */
        0,      /* OPFAIL       */
        0,      /* COMMIT       */
+       0,      /* CUT          */
+       0,      /* OPERROR      */
        0,      /* OPTIMIZED    */
        0,      /* PSEUDO       */
 };
@@ -485,41 +491,43 @@ const char * reg_name[] = {
        "DEFINEP",                      /* 0x48 */
        "OPFAIL",                       /* 0x49 */
        "COMMIT",                       /* 0x4a */
-       "OPTIMIZED",                    /* 0x4b */
-       "PSEUDO",                       /* 0x4c */
+       "CUT",                          /* 0x4b */
+       "OPERROR",                      /* 0x4c */
+       "OPTIMIZED",                    /* 0x4d */
+       "PSEUDO",                       /* 0x4e */
        /* ------------ States ------------- */
-       "TRIE_next",                    /* 0x4d */
-       "TRIE_next_fail",               /* 0x4e */
-       "EVAL_AB",                      /* 0x4f */
-       "EVAL_AB_fail",                 /* 0x50 */
-       "CURLYX_end",                   /* 0x51 */
-       "CURLYX_end_fail",              /* 0x52 */
-       "WHILEM_A_pre",                 /* 0x53 */
-       "WHILEM_A_pre_fail",            /* 0x54 */
-       "WHILEM_A_min",                 /* 0x55 */
-       "WHILEM_A_min_fail",            /* 0x56 */
-       "WHILEM_A_max",                 /* 0x57 */
-       "WHILEM_A_max_fail",            /* 0x58 */
-       "WHILEM_B_min",                 /* 0x59 */
-       "WHILEM_B_min_fail",            /* 0x5a */
-       "WHILEM_B_max",                 /* 0x5b */
-       "WHILEM_B_max_fail",            /* 0x5c */
-       "BRANCH_next",                  /* 0x5d */
-       "BRANCH_next_fail",             /* 0x5e */
-       "CURLYM_A",                     /* 0x5f */
-       "CURLYM_A_fail",                /* 0x60 */
-       "CURLYM_B",                     /* 0x61 */
-       "CURLYM_B_fail",                /* 0x62 */
-       "IFMATCH_A",                    /* 0x63 */
-       "IFMATCH_A_fail",               /* 0x64 */
-       "CURLY_B_min_known",            /* 0x65 */
-       "CURLY_B_min_known_fail",       /* 0x66 */
-       "CURLY_B_min",                  /* 0x67 */
-       "CURLY_B_min_fail",             /* 0x68 */
-       "CURLY_B_max",                  /* 0x69 */
-       "CURLY_B_max_fail",             /* 0x6a */
-       "COMMIT_next",                  /* 0x6b */
-       "COMMIT_next_fail",             /* 0x6c */
+       "TRIE_next",                    /* REGNODE_MAX +0x01 */
+       "TRIE_next_fail",               /* REGNODE_MAX +0x02 */
+       "EVAL_AB",                      /* REGNODE_MAX +0x03 */
+       "EVAL_AB_fail",                 /* REGNODE_MAX +0x04 */
+       "CURLYX_end",                   /* REGNODE_MAX +0x05 */
+       "CURLYX_end_fail",              /* REGNODE_MAX +0x06 */
+       "WHILEM_A_pre",                 /* REGNODE_MAX +0x07 */
+       "WHILEM_A_pre_fail",            /* REGNODE_MAX +0x08 */
+       "WHILEM_A_min",                 /* REGNODE_MAX +0x09 */
+       "WHILEM_A_min_fail",            /* REGNODE_MAX +0x0a */
+       "WHILEM_A_max",                 /* REGNODE_MAX +0x0b */
+       "WHILEM_A_max_fail",            /* REGNODE_MAX +0x0c */
+       "WHILEM_B_min",                 /* REGNODE_MAX +0x0d */
+       "WHILEM_B_min_fail",            /* REGNODE_MAX +0x0e */
+       "WHILEM_B_max",                 /* REGNODE_MAX +0x0f */
+       "WHILEM_B_max_fail",            /* REGNODE_MAX +0x10 */
+       "BRANCH_next",                  /* REGNODE_MAX +0x11 */
+       "BRANCH_next_fail",             /* REGNODE_MAX +0x12 */
+       "CURLYM_A",                     /* REGNODE_MAX +0x13 */
+       "CURLYM_A_fail",                /* REGNODE_MAX +0x14 */
+       "CURLYM_B",                     /* REGNODE_MAX +0x15 */
+       "CURLYM_B_fail",                /* REGNODE_MAX +0x16 */
+       "IFMATCH_A",                    /* REGNODE_MAX +0x17 */
+       "IFMATCH_A_fail",               /* REGNODE_MAX +0x18 */
+       "CURLY_B_min_known",            /* REGNODE_MAX +0x19 */
+       "CURLY_B_min_known_fail",       /* REGNODE_MAX +0x1a */
+       "CURLY_B_min",                  /* REGNODE_MAX +0x1b */
+       "CURLY_B_min_fail",             /* REGNODE_MAX +0x1c */
+       "CURLY_B_max",                  /* REGNODE_MAX +0x1d */
+       "CURLY_B_max_fail",             /* REGNODE_MAX +0x1e */
+       "COMMIT_next",                  /* REGNODE_MAX +0x1f */
+       "COMMIT_next_fail",             /* REGNODE_MAX +0x20 */
 };
 #endif /* DEBUGGING */
 #else
index 1686234..67be900 100755 (executable)
@@ -3719,14 +3719,7 @@ sub iseq($$;$) {
     ';
     ok(!$@,'lvalue $+{...} should not throw an exception');
 }
-{
-    our $count = 0;
-    'aaab'=~/a+b?(?{$count++})(?FAIL)/;
-    iseq($count,9,"expect 9 for no (?COMMIT)");
-    $count = 0;
-    'aaab'=~/a+b?(?COMMIT)(?{$count++})(?FAIL)/;
-    iseq($count,3,"expect 3 with (?COMMIT)");
-}
+
 # stress test CURLYX/WHILEM.
 #
 # This test includes varying levels of nesting, and according to
@@ -3734,7 +3727,9 @@ sub iseq($$;$) {
 # CURLYX and WHILEM blocks, except those related to LONGJMP, the
 # super-linear cache and warnings. It executes about 0.5M regexes
 
-{
+if ($ENV{PERL_SKIP_PSYCHO_TEST}){
+  printf "ok %d Skip: No psycho tests\n", $test++;
+} else {    
   my $r = qr/^
            (?:
                ( (?:a|z+)+ )
@@ -3856,6 +3851,57 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     iseq($count,1,"should have matched once only [RT#36046]");
 }
 
+{   # Test the (?COMMIT) pattern
+    our $count = 0;
+    'aaab'=~/a+b?(?{$count++})(?FAIL)/;
+    iseq($count,9,"expect 9 for no (?COMMIT)");
+    $count = 0;
+    'aaab'=~/a+b?(?COMMIT)(?{$count++})(?FAIL)/;
+    iseq($count,3,"expect 3 with (?COMMIT)");
+    local $_='aaab';
+    $count=0;
+    1 while /.(?COMMIT)(?{$count++})(?FAIL)/g;
+    iseq($count,4,"/.(?COMMIT)/");
+    $count = 0;
+    'aaab'=~/a+b?(??{'(?COMMIT)'})(?{$count++})(?FAIL)/;
+    iseq($count,3,"expect 3 with (?COMMIT)");
+    local $_='aaab';
+    $count=0;
+    1 while /.(??{'(?COMMIT)'})(?{$count++})(?FAIL)/g;
+    iseq($count,4,"/.(?COMMIT)/");
+}
+{   # Test the (?CUT) pattern
+    our $count = 0;
+    'aaab'=~/a+b?(?CUT)(?{$count++})(?FAIL)/;
+    iseq($count,1,"expect 1 with (?CUT)");
+    local $_='aaab';
+    $count=0;
+    1 while /.(?CUT)(?{$count++})(?FAIL)/g;
+    iseq($count,4,"/.(?CUT)/");
+    $_='aaabaaab';
+    $count=0;
+    our @res=();
+    1 while /(a+b?)(?CUT)(?{$count++; push @res,$1})(?FAIL)/g;
+    iseq($count,2,"Expect 2 with (?CUT)" );
+    iseq("@res","aaab aaab","adjacent (?CUT) works as expected" );
+}
+{   # Test the (?ERROR) pattern
+    our $count = 0;
+    'aaabaaab'=~/a+b?(?ERROR)(?{$count++})(?FAIL)/;
+    iseq($count,1,"expect 1 with (?ERROR)");
+    local $_='aaab';
+    $count=0;
+    1 while /.(?ERROR)(?{$count++})(?FAIL)/g;
+    iseq($count,1,"/.(?ERROR)/");
+    $_='aaabaaab';
+    $count=0;
+    our @res=();
+    1 while /(a+b?)(?ERROR)(?{$count++; push @res,$1})(?FAIL)/g;
+    iseq($count,1,"Expect 1 with (?ERROR)" );
+    iseq("@res","aaab","adjacent (?ERROR) works as expected" );
+}
+#-------------------------------------------------------------------
+
 # Keep the following tests last -- they may crash perl
 
 ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
@@ -3865,6 +3911,8 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
         "Regexp /^(??{'(.)'x 100})/ crashes older perls")
     or print "# Unexpected outcome: should pass or crash perl\n";
 
+# Put new tests above the line, not here.
+
 # Don't forget to update this!
-BEGIN{print "1..1289\n"};
+BEGIN{print "1..1300\n"};