This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
8947cce1aeef9d09cbad11e8317e13ebc08caf7c
[perl5.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  
17  */
18
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20  * confused with the original package (see point 3 below).  Thanks, Henry!
21  */
22
23 /* Additional note: this code is very heavily munged from Henry's version
24  * in places.  In some spots I've traded clarity for efficiency, so don't
25  * blame Henry for some of the lack of readability.
26  */
27
28 /* The names of the functions have been changed from regcomp and
29  * regexec to  pregcomp and pregexec in order to avoid conflicts
30  * with the POSIX routines of the same names.
31 */
32
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 #  ifndef PERL_IN_XSUB_RE
36 #    define PERL_IN_XSUB_RE
37 #  endif
38 /* need access to debugger hooks */
39 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
40 #    define DEBUGGING
41 #  endif
42 #endif
43
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 #  define Perl_regexec_flags my_regexec
47 #  define Perl_regdump my_regdump
48 #  define Perl_regprop my_regprop
49 #  define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_pregexec my_pregexec
52 #  define Perl_reginitcolors my_reginitcolors
53 #  define Perl_regclass_swash my_regclass_swash
54
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57
58 /*SUPPRESS 112*/
59 /*
60  * pregcomp and pregexec -- regsub and regerror are not used in perl
61  *
62  *      Copyright (c) 1986 by University of Toronto.
63  *      Written by Henry Spencer.  Not derived from licensed software.
64  *
65  *      Permission is granted to anyone to use this software for any
66  *      purpose on any computer system, and to redistribute it freely,
67  *      subject to the following restrictions:
68  *
69  *      1. The author is not responsible for the consequences of use of
70  *              this software, no matter how awful, even if they arise
71  *              from defects in it.
72  *
73  *      2. The origin of this software must not be misrepresented, either
74  *              by explicit claim or by omission.
75  *
76  *      3. Altered versions must be plainly marked as such, and must not
77  *              be misrepresented as being the original software.
78  *
79  ****    Alterations to Henry's code are...
80  ****
81  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
82  ****    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
83  ****
84  ****    You may distribute under the terms of either the GNU General Public
85  ****    License or the Artistic License, as specified in the README file.
86  *
87  * Beware that some of this code is subtly aware of the way operator
88  * precedence is structured in regular expressions.  Serious changes in
89  * regular-expression syntax might require a total rethink.
90  */
91 #include "EXTERN.h"
92 #define PERL_IN_REGEXEC_C
93 #include "perl.h"
94
95 #include "regcomp.h"
96
97 #define RF_tainted      1               /* tainted information used? */
98 #define RF_warned       2               /* warned about big count? */
99 #define RF_evaled       4               /* Did an EVAL with setting? */
100 #define RF_utf8         8               /* String contains multibyte chars? */
101
102 #define UTF ((PL_reg_flags & RF_utf8) != 0)
103
104 #define RS_init         1               /* eval environment created */
105 #define RS_set          2               /* replsv value is set */
106
107 #ifndef STATIC
108 #define STATIC  static
109 #endif
110
111 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
112
113 /*
114  * Forwards.
115  */
116
117 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
118 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
119
120 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
121 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
122 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
123 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
124 #define HOPc(pos,off) ((char*)HOP(pos,off))
125 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
126
127 #define HOPBACK(pos, off) (             \
128     (PL_reg_match_utf8)                 \
129         ? reghopmaybe((U8*)pos, -off)   \
130     : (pos - off >= PL_bostr)           \
131         ? (U8*)(pos - off)              \
132     : (U8*)NULL                         \
133 )
134 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
135
136 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
137 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
138 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
139 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
140 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
141 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
142
143 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
144
145 /* for use after a quantifier and before an EXACT-like node -- japhy */
146 #define JUMPABLE(rn) ( \
147     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
148     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
149     OP(rn) == PLUS || OP(rn) == MINMOD || \
150     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
151 )
152
153 #define HAS_TEXT(rn) ( \
154     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
155 )
156
157 /*
158   Search for mandatory following text node; for lookahead, the text must
159   follow but for lookbehind (rn->flags != 0) we skip to the next step.
160 */
161 #define FIND_NEXT_IMPT(rn) STMT_START { \
162     while (JUMPABLE(rn)) \
163         if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
164             rn = NEXTOPER(NEXTOPER(rn)); \
165         else if (OP(rn) == PLUS) \
166             rn = NEXTOPER(rn); \
167         else if (OP(rn) == IFMATCH) \
168             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
169         else rn += NEXT_OFF(rn); \
170 } STMT_END 
171
172 static void restore_pos(pTHX_ void *arg);
173
174 STATIC CHECKPOINT
175 S_regcppush(pTHX_ I32 parenfloor)
176 {
177     int retval = PL_savestack_ix;
178 #define REGCP_PAREN_ELEMS 4
179     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
180     int p;
181
182     if (paren_elems_to_push < 0)
183         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
184
185 #define REGCP_OTHER_ELEMS 6
186     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
187     for (p = PL_regsize; p > parenfloor; p--) {
188 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
189         SSPUSHINT(PL_regendp[p]);
190         SSPUSHINT(PL_regstartp[p]);
191         SSPUSHPTR(PL_reg_start_tmp[p]);
192         SSPUSHINT(p);
193     }
194 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
195     SSPUSHINT(PL_regsize);
196     SSPUSHINT(*PL_reglastparen);
197     SSPUSHINT(*PL_reglastcloseparen);
198     SSPUSHPTR(PL_reginput);
199 #define REGCP_FRAME_ELEMS 2
200 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
201  * are needed for the regexp context stack bookkeeping. */
202     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
203     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
204
205     return retval;
206 }
207
208 /* These are needed since we do not localize EVAL nodes: */
209 #  define REGCP_SET(cp)  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,          \
210                              "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
211                              (IV)PL_savestack_ix)); cp = PL_savestack_ix
212
213 #  define REGCP_UNWIND(cp)  DEBUG_EXECUTE_r(cp != PL_savestack_ix ?             \
214                                 PerlIO_printf(Perl_debug_log,           \
215                                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
216                                 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
217
218 STATIC char *
219 S_regcppop(pTHX)
220 {
221     I32 i;
222     U32 paren = 0;
223     char *input;
224     I32 tmps;
225
226     GET_RE_DEBUG_FLAGS_DECL;
227
228     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
229     i = SSPOPINT;
230     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
231     i = SSPOPINT; /* Parentheses elements to pop. */
232     input = (char *) SSPOPPTR;
233     *PL_reglastcloseparen = SSPOPINT;
234     *PL_reglastparen = SSPOPINT;
235     PL_regsize = SSPOPINT;
236
237     /* Now restore the parentheses context. */
238     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
239          i > 0; i -= REGCP_PAREN_ELEMS) {
240         paren = (U32)SSPOPINT;
241         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
242         PL_regstartp[paren] = SSPOPINT;
243         tmps = SSPOPINT;
244         if (paren <= *PL_reglastparen)
245             PL_regendp[paren] = tmps;
246         DEBUG_EXECUTE_r(
247             PerlIO_printf(Perl_debug_log,
248                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
249                           (UV)paren, (IV)PL_regstartp[paren],
250                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
251                           (IV)PL_regendp[paren],
252                           (paren > *PL_reglastparen ? "(no)" : ""));
253         );
254     }
255     DEBUG_EXECUTE_r(
256         if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
257             PerlIO_printf(Perl_debug_log,
258                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
259                           (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
260         }
261     );
262 #if 1
263     /* It would seem that the similar code in regtry()
264      * already takes care of this, and in fact it is in
265      * a better location to since this code can #if 0-ed out
266      * but the code in regtry() is needed or otherwise tests
267      * requiring null fields (pat.t#187 and split.t#{13,14}
268      * (as of patchlevel 7877)  will fail.  Then again,
269      * this code seems to be necessary or otherwise
270      * building DynaLoader will fail:
271      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
272      * --jhi */
273     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
274         if ((I32)paren > PL_regsize)
275             PL_regstartp[paren] = -1;
276         PL_regendp[paren] = -1;
277     }
278 #endif
279     return input;
280 }
281
282 STATIC char *
283 S_regcp_set_to(pTHX_ I32 ss)
284 {
285     I32 tmp = PL_savestack_ix;
286
287     PL_savestack_ix = ss;
288     regcppop();
289     PL_savestack_ix = tmp;
290     return Nullch;
291 }
292
293 typedef struct re_cc_state
294 {
295     I32 ss;
296     regnode *node;
297     struct re_cc_state *prev;
298     CURCUR *cc;
299     regexp *re;
300 } re_cc_state;
301
302 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
303
304 #define TRYPAREN(paren, n, input) {                             \
305     if (paren) {                                                \
306         if (n) {                                                \
307             PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;   \
308             PL_regendp[paren] = input - PL_bostr;               \
309         }                                                       \
310         else                                                    \
311             PL_regendp[paren] = -1;                             \
312     }                                                           \
313     if (regmatch(next))                                         \
314         sayYES;                                                 \
315     if (paren && n)                                             \
316         PL_regendp[paren] = -1;                                 \
317 }
318
319
320 /*
321  * pregexec and friends
322  */
323
324 /*
325  - pregexec - match a regexp against a string
326  */
327 I32
328 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
329          char *strbeg, I32 minend, SV *screamer, U32 nosave)
330 /* strend: pointer to null at end of string */
331 /* strbeg: real beginning of string */
332 /* minend: end of match must be >=minend after stringarg. */
333 /* nosave: For optimizations. */
334 {
335     return
336         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
337                       nosave ? 0 : REXEC_COPY_STR);
338 }
339
340 STATIC void
341 S_cache_re(pTHX_ regexp *prog)
342 {
343     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
344 #ifdef DEBUGGING
345     PL_regprogram = prog->program;
346 #endif
347     PL_regnpar = prog->nparens;
348     PL_regdata = prog->data;
349     PL_reg_re = prog;
350 }
351
352 /*
353  * Need to implement the following flags for reg_anch:
354  *
355  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
356  * USE_INTUIT_ML
357  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
358  * INTUIT_AUTORITATIVE_ML
359  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
360  * INTUIT_ONCE_ML
361  *
362  * Another flag for this function: SECOND_TIME (so that float substrs
363  * with giant delta may be not rechecked).
364  */
365
366 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
367
368 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
369    Otherwise, only SvCUR(sv) is used to get strbeg. */
370
371 /* XXXX We assume that strpos is strbeg unless sv. */
372
373 /* XXXX Some places assume that there is a fixed substring.
374         An update may be needed if optimizer marks as "INTUITable"
375         RExen without fixed substrings.  Similarly, it is assumed that
376         lengths of all the strings are no more than minlen, thus they
377         cannot come from lookahead.
378         (Or minlen should take into account lookahead.) */
379
380 /* A failure to find a constant substring means that there is no need to make
381    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
382    finding a substring too deep into the string means that less calls to
383    regtry() should be needed.
384
385    REx compiler's optimizer found 4 possible hints:
386         a) Anchored substring;
387         b) Fixed substring;
388         c) Whether we are anchored (beginning-of-line or \G);
389         d) First node (of those at offset 0) which may distingush positions;
390    We use a)b)d) and multiline-part of c), and try to find a position in the
391    string which does not contradict any of them.
392  */
393
394 /* Most of decisions we do here should have been done at compile time.
395    The nodes of the REx which we used for the search should have been
396    deleted from the finite automaton. */
397
398 char *
399 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
400                      char *strend, U32 flags, re_scream_pos_data *data)
401 {
402     register I32 start_shift = 0;
403     /* Should be nonnegative! */
404     register I32 end_shift   = 0;
405     register char *s;
406     register SV *check;
407     char *strbeg;
408     char *t;
409     int do_utf8 = sv ? SvUTF8(sv) : 0;  /* if no sv we have to assume bytes */
410     I32 ml_anch;
411     register char *other_last = Nullch; /* other substr checked before this */
412     char *check_at = Nullch;            /* check substr found at this pos */
413     I32 multiline = prog->reganch & PMf_MULTILINE;
414 #ifdef DEBUGGING
415     char *i_strpos = strpos;
416     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
417 #endif
418
419     GET_RE_DEBUG_FLAGS_DECL;
420
421     RX_MATCH_UTF8_set(prog,do_utf8);
422
423     if (prog->reganch & ROPT_UTF8) {
424         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
425                               "UTF-8 regex...\n"));
426         PL_reg_flags |= RF_utf8;
427     }
428
429     DEBUG_EXECUTE_r({
430          char *s   = PL_reg_match_utf8 ?
431                          sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
432                          strpos;
433          int   len = PL_reg_match_utf8 ?
434                          strlen(s) : strend - strpos;
435          if (!PL_colorset)
436               reginitcolors();
437          if (PL_reg_match_utf8)
438              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
439                                    "UTF-8 target...\n"));
440          PerlIO_printf(Perl_debug_log,
441                        "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
442                        PL_colors[4], PL_colors[5], PL_colors[0],
443                        prog->precomp,
444                        PL_colors[1],
445                        (strlen(prog->precomp) > 60 ? "..." : ""),
446                        PL_colors[0],
447                        (int)(len > 60 ? 60 : len),
448                        s, PL_colors[1],
449                        (len > 60 ? "..." : "")
450               );
451     });
452
453     /* CHR_DIST() would be more correct here but it makes things slow. */
454     if (prog->minlen > strend - strpos) {
455         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
456                               "String too short... [re_intuit_start]\n"));
457         goto fail;
458     }
459     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
460     PL_regeol = strend;
461     if (do_utf8) {
462         if (!prog->check_utf8 && prog->check_substr)
463             to_utf8_substr(prog);
464         check = prog->check_utf8;
465     } else {
466         if (!prog->check_substr && prog->check_utf8)
467             to_byte_substr(prog);
468         check = prog->check_substr;
469     }
470    if (check == &PL_sv_undef) {
471         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
472                 "Non-utf string cannot match utf check string\n"));
473         goto fail;
474     }
475     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
476         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
477                      || ( (prog->reganch & ROPT_ANCH_BOL)
478                           && !multiline ) );    /* Check after \n? */
479
480         if (!ml_anch) {
481           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
482                                   | ROPT_IMPLICIT)) /* not a real BOL */
483                /* SvCUR is not set on references: SvRV and SvPVX overlap */
484                && sv && !SvROK(sv)
485                && (strpos != strbeg)) {
486               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
487               goto fail;
488           }
489           if (prog->check_offset_min == prog->check_offset_max &&
490               !(prog->reganch & ROPT_CANY_SEEN)) {
491             /* Substring at constant offset from beg-of-str... */
492             I32 slen;
493
494             s = HOP3c(strpos, prog->check_offset_min, strend);
495             if (SvTAIL(check)) {
496                 slen = SvCUR(check);    /* >= 1 */
497
498                 if ( strend - s > slen || strend - s < slen - 1
499                      || (strend - s == slen && strend[-1] != '\n')) {
500                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
501                     goto fail_finish;
502                 }
503                 /* Now should match s[0..slen-2] */
504                 slen--;
505                 if (slen && (*SvPVX(check) != *s
506                              || (slen > 1
507                                  && memNE(SvPVX(check), s, slen)))) {
508                   report_neq:
509                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
510                     goto fail_finish;
511                 }
512             }
513             else if (*SvPVX(check) != *s
514                      || ((slen = SvCUR(check)) > 1
515                          && memNE(SvPVX(check), s, slen)))
516                 goto report_neq;
517             goto success_at_start;
518           }
519         }
520         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
521         s = strpos;
522         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
523         end_shift = prog->minlen - start_shift -
524             CHR_SVLEN(check) + (SvTAIL(check) != 0);
525         if (!ml_anch) {
526             I32 end = prog->check_offset_max + CHR_SVLEN(check)
527                                          - (SvTAIL(check) != 0);
528             I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
529
530             if (end_shift < eshift)
531                 end_shift = eshift;
532         }
533     }
534     else {                              /* Can match at random position */
535         ml_anch = 0;
536         s = strpos;
537         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
538         /* Should be nonnegative! */
539         end_shift = prog->minlen - start_shift -
540             CHR_SVLEN(check) + (SvTAIL(check) != 0);
541     }
542
543 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
544     if (end_shift < 0)
545         Perl_croak(aTHX_ "panic: end_shift");
546 #endif
547
548   restart:
549     /* Find a possible match in the region s..strend by looking for
550        the "check" substring in the region corrected by start/end_shift. */
551     if (flags & REXEC_SCREAM) {
552         I32 p = -1;                     /* Internal iterator of scream. */
553         I32 *pp = data ? data->scream_pos : &p;
554
555         if (PL_screamfirst[BmRARE(check)] >= 0
556             || ( BmRARE(check) == '\n'
557                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
558                  && SvTAIL(check) ))
559             s = screaminstr(sv, check,
560                             start_shift + (s - strbeg), end_shift, pp, 0);
561         else
562             goto fail_finish;
563         /* we may be pointing at the wrong string */
564         if (s && RX_MATCH_COPIED(prog))
565             s = strbeg + (s - SvPVX(sv));
566         if (data)
567             *data->scream_olds = s;
568     }
569     else if (prog->reganch & ROPT_CANY_SEEN)
570         s = fbm_instr((U8*)(s + start_shift),
571                       (U8*)(strend - end_shift),
572                       check, multiline ? FBMrf_MULTILINE : 0);
573     else
574         s = fbm_instr(HOP3(s, start_shift, strend),
575                       HOP3(strend, -end_shift, strbeg),
576                       check, multiline ? FBMrf_MULTILINE : 0);
577
578     /* Update the count-of-usability, remove useless subpatterns,
579         unshift s.  */
580
581     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
582                           (s ? "Found" : "Did not find"),
583                           (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
584                           PL_colors[0],
585                           (int)(SvCUR(check) - (SvTAIL(check)!=0)),
586                           SvPVX(check),
587                           PL_colors[1], (SvTAIL(check) ? "$" : ""),
588                           (s ? " at offset " : "...\n") ) );
589
590     if (!s)
591         goto fail_finish;
592
593     check_at = s;
594
595     /* Finish the diagnostic message */
596     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
597
598     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
599        Start with the other substr.
600        XXXX no SCREAM optimization yet - and a very coarse implementation
601        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
602                 *always* match.  Probably should be marked during compile...
603        Probably it is right to do no SCREAM here...
604      */
605
606     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
607         /* Take into account the "other" substring. */
608         /* XXXX May be hopelessly wrong for UTF... */
609         if (!other_last)
610             other_last = strpos;
611         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
612           do_other_anchored:
613             {
614                 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
615                 char *s1 = s;
616                 SV* must;
617
618                 t = s - prog->check_offset_max;
619                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
620                     && (!do_utf8
621                         || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
622                             && t > strpos)))
623                     /* EMPTY */;
624                 else
625                     t = strpos;
626                 t = HOP3c(t, prog->anchored_offset, strend);
627                 if (t < other_last)     /* These positions already checked */
628                     t = other_last;
629                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
630                 if (last < last1)
631                     last1 = last;
632  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
633                 /* On end-of-str: see comment below. */
634                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
635                 if (must == &PL_sv_undef) {
636                     s = (char*)NULL;
637                     DEBUG_EXECUTE_r(must = prog->anchored_utf8);        /* for debug */
638                 }
639                 else
640                     s = fbm_instr(
641                         (unsigned char*)t,
642                         HOP3(HOP3(last1, prog->anchored_offset, strend)
643                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
644                         must,
645                         multiline ? FBMrf_MULTILINE : 0
646                     );
647                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
648                         "%s anchored substr `%s%.*s%s'%s",
649                         (s ? "Found" : "Contradicts"),
650                         PL_colors[0],
651                           (int)(SvCUR(must)
652                           - (SvTAIL(must)!=0)),
653                           SvPVX(must),
654                           PL_colors[1], (SvTAIL(must) ? "$" : "")));
655                 if (!s) {
656                     if (last1 >= last2) {
657                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
658                                                 ", giving up...\n"));
659                         goto fail_finish;
660                     }
661                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
662                         ", trying floating at offset %ld...\n",
663                         (long)(HOP3c(s1, 1, strend) - i_strpos)));
664                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
665                     s = HOP3c(last, 1, strend);
666                     goto restart;
667                 }
668                 else {
669                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
670                           (long)(s - i_strpos)));
671                     t = HOP3c(s, -prog->anchored_offset, strbeg);
672                     other_last = HOP3c(s, 1, strend);
673                     s = s1;
674                     if (t == strpos)
675                         goto try_at_start;
676                     goto try_at_offset;
677                 }
678             }
679         }
680         else {          /* Take into account the floating substring. */
681             char *last, *last1;
682             char *s1 = s;
683             SV* must;
684
685             t = HOP3c(s, -start_shift, strbeg);
686             last1 = last =
687                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
688             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
689                 last = HOP3c(t, prog->float_max_offset, strend);
690             s = HOP3c(t, prog->float_min_offset, strend);
691             if (s < other_last)
692                 s = other_last;
693  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
694             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
695             /* fbm_instr() takes into account exact value of end-of-str
696                if the check is SvTAIL(ed).  Since false positives are OK,
697                and end-of-str is not later than strend we are OK. */
698             if (must == &PL_sv_undef) {
699                 s = (char*)NULL;
700                 DEBUG_EXECUTE_r(must = prog->float_utf8);       /* for debug message */
701             }
702             else
703                 s = fbm_instr((unsigned char*)s,
704                               (unsigned char*)last + SvCUR(must)
705                                   - (SvTAIL(must)!=0),
706                               must, multiline ? FBMrf_MULTILINE : 0);
707             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
708                     (s ? "Found" : "Contradicts"),
709                     PL_colors[0],
710                       (int)(SvCUR(must) - (SvTAIL(must)!=0)),
711                       SvPVX(must),
712                       PL_colors[1], (SvTAIL(must) ? "$" : "")));
713             if (!s) {
714                 if (last1 == last) {
715                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
716                                             ", giving up...\n"));
717                     goto fail_finish;
718                 }
719                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
720                     ", trying anchored starting at offset %ld...\n",
721                     (long)(s1 + 1 - i_strpos)));
722                 other_last = last;
723                 s = HOP3c(t, 1, strend);
724                 goto restart;
725             }
726             else {
727                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
728                       (long)(s - i_strpos)));
729                 other_last = s; /* Fix this later. --Hugo */
730                 s = s1;
731                 if (t == strpos)
732                     goto try_at_start;
733                 goto try_at_offset;
734             }
735         }
736     }
737
738     t = s - prog->check_offset_max;
739     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
740         && (!do_utf8
741             || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
742                  && t > strpos))) {
743         /* Fixed substring is found far enough so that the match
744            cannot start at strpos. */
745       try_at_offset:
746         if (ml_anch && t[-1] != '\n') {
747             /* Eventually fbm_*() should handle this, but often
748                anchored_offset is not 0, so this check will not be wasted. */
749             /* XXXX In the code below we prefer to look for "^" even in
750                presence of anchored substrings.  And we search even
751                beyond the found float position.  These pessimizations
752                are historical artefacts only.  */
753           find_anchor:
754             while (t < strend - prog->minlen) {
755                 if (*t == '\n') {
756                     if (t < check_at - prog->check_offset_min) {
757                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
758                             /* Since we moved from the found position,
759                                we definitely contradict the found anchored
760                                substr.  Due to the above check we do not
761                                contradict "check" substr.
762                                Thus we can arrive here only if check substr
763                                is float.  Redo checking for "other"=="fixed".
764                              */
765                             strpos = t + 1;                     
766                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
767                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
768                             goto do_other_anchored;
769                         }
770                         /* We don't contradict the found floating substring. */
771                         /* XXXX Why not check for STCLASS? */
772                         s = t + 1;
773                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
774                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
775                         goto set_useful;
776                     }
777                     /* Position contradicts check-string */
778                     /* XXXX probably better to look for check-string
779                        than for "\n", so one should lower the limit for t? */
780                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
781                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
782                     other_last = strpos = s = t + 1;
783                     goto restart;
784                 }
785                 t++;
786             }
787             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
788                         PL_colors[0], PL_colors[1]));
789             goto fail_finish;
790         }
791         else {
792             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
793                         PL_colors[0], PL_colors[1]));
794         }
795         s = t;
796       set_useful:
797         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
798     }
799     else {
800         /* The found string does not prohibit matching at strpos,
801            - no optimization of calling REx engine can be performed,
802            unless it was an MBOL and we are not after MBOL,
803            or a future STCLASS check will fail this. */
804       try_at_start:
805         /* Even in this situation we may use MBOL flag if strpos is offset
806            wrt the start of the string. */
807         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
808             && (strpos != strbeg) && strpos[-1] != '\n'
809             /* May be due to an implicit anchor of m{.*foo}  */
810             && !(prog->reganch & ROPT_IMPLICIT))
811         {
812             t = strpos;
813             goto find_anchor;
814         }
815         DEBUG_EXECUTE_r( if (ml_anch)
816             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
817                         (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
818         );
819       success_at_start:
820         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
821             && (do_utf8 ? (
822                 prog->check_utf8                /* Could be deleted already */
823                 && --BmUSEFUL(prog->check_utf8) < 0
824                 && (prog->check_utf8 == prog->float_utf8)
825             ) : (
826                 prog->check_substr              /* Could be deleted already */
827                 && --BmUSEFUL(prog->check_substr) < 0
828                 && (prog->check_substr == prog->float_substr)
829             )))
830         {
831             /* If flags & SOMETHING - do not do it many times on the same match */
832             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
833             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
834             if (do_utf8 ? prog->check_substr : prog->check_utf8)
835                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
836             prog->check_substr = prog->check_utf8 = Nullsv;     /* disable */
837             prog->float_substr = prog->float_utf8 = Nullsv;     /* clear */
838             check = Nullsv;                     /* abort */
839             s = strpos;
840             /* XXXX This is a remnant of the old implementation.  It
841                     looks wasteful, since now INTUIT can use many
842                     other heuristics. */
843             prog->reganch &= ~RE_USE_INTUIT;
844         }
845         else
846             s = strpos;
847     }
848
849     /* Last resort... */
850     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
851     if (prog->regstclass) {
852         /* minlen == 0 is possible if regstclass is \b or \B,
853            and the fixed substr is ''$.
854            Since minlen is already taken into account, s+1 is before strend;
855            accidentally, minlen >= 1 guaranties no false positives at s + 1
856            even for \b or \B.  But (minlen? 1 : 0) below assumes that
857            regstclass does not come from lookahead...  */
858         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
859            This leaves EXACTF only, which is dealt with in find_byclass().  */
860         const U8* str = (U8*)STRING(prog->regstclass);
861         const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
862                     ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
863                     : 1);
864         char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
865                 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
866                 : (prog->float_substr || prog->float_utf8
867                    ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
868                            cl_l, strend)
869                    : strend);
870
871         t = s;
872         cache_re(prog);
873         s = find_byclass(prog, prog->regstclass, s, endpos, 1);
874         if (!s) {
875 #ifdef DEBUGGING
876             const char *what = 0;
877 #endif
878             if (endpos == strend) {
879                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
880                                 "Could not match STCLASS...\n") );
881                 goto fail;
882             }
883             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
884                                    "This position contradicts STCLASS...\n") );
885             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
886                 goto fail;
887             /* Contradict one of substrings */
888             if (prog->anchored_substr || prog->anchored_utf8) {
889                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
890                     DEBUG_EXECUTE_r( what = "anchored" );
891                   hop_and_restart:
892                     s = HOP3c(t, 1, strend);
893                     if (s + start_shift + end_shift > strend) {
894                         /* XXXX Should be taken into account earlier? */
895                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
896                                                "Could not match STCLASS...\n") );
897                         goto fail;
898                     }
899                     if (!check)
900                         goto giveup;
901                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
902                                 "Looking for %s substr starting at offset %ld...\n",
903                                  what, (long)(s + start_shift - i_strpos)) );
904                     goto restart;
905                 }
906                 /* Have both, check_string is floating */
907                 if (t + start_shift >= check_at) /* Contradicts floating=check */
908                     goto retry_floating_check;
909                 /* Recheck anchored substring, but not floating... */
910                 s = check_at;
911                 if (!check)
912                     goto giveup;
913                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
914                           "Looking for anchored substr starting at offset %ld...\n",
915                           (long)(other_last - i_strpos)) );
916                 goto do_other_anchored;
917             }
918             /* Another way we could have checked stclass at the
919                current position only: */
920             if (ml_anch) {
921                 s = t = t + 1;
922                 if (!check)
923                     goto giveup;
924                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
925                           "Looking for /%s^%s/m starting at offset %ld...\n",
926                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
927                 goto try_at_offset;
928             }
929             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
930                 goto fail;
931             /* Check is floating subtring. */
932           retry_floating_check:
933             t = check_at - start_shift;
934             DEBUG_EXECUTE_r( what = "floating" );
935             goto hop_and_restart;
936         }
937         if (t != s) {
938             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
939                         "By STCLASS: moving %ld --> %ld\n",
940                                   (long)(t - i_strpos), (long)(s - i_strpos))
941                    );
942         }
943         else {
944             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
945                                   "Does not contradict STCLASS...\n"); 
946                    );
947         }
948     }
949   giveup:
950     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
951                           PL_colors[4], (check ? "Guessed" : "Giving up"),
952                           PL_colors[5], (long)(s - i_strpos)) );
953     return s;
954
955   fail_finish:                          /* Substring not found */
956     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
957         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
958   fail:
959     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
960                           PL_colors[4], PL_colors[5]));
961     return Nullch;
962 }
963
964 /* We know what class REx starts with.  Try to find this position... */
965 STATIC char *
966 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, I32 norun)
967 {
968         dVAR;
969         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
970         char *m;
971         STRLEN ln;
972         STRLEN lnc;
973         register STRLEN uskip;
974         unsigned int c1;
975         unsigned int c2;
976         char *e;
977         register I32 tmp = 1;   /* Scratch variable? */
978         register bool do_utf8 = PL_reg_match_utf8;
979
980         /* We know what class it must start with. */
981         switch (OP(c)) {
982         case ANYOF:
983             if (do_utf8) {
984                  while (s + (uskip = UTF8SKIP(s)) <= strend) {
985                       if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
986                           !UTF8_IS_INVARIANT((U8)s[0]) ?
987                           reginclass(c, (U8*)s, 0, do_utf8) :
988                           REGINCLASS(c, (U8*)s)) {
989                            if (tmp && (norun || regtry(prog, s)))
990                                 goto got_it;
991                            else
992                                 tmp = doevery;
993                       }
994                       else 
995                            tmp = 1;
996                       s += uskip;
997                  }
998             }
999             else {
1000                  while (s < strend) {
1001                       STRLEN skip = 1;
1002
1003                       if (REGINCLASS(c, (U8*)s) ||
1004                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1005                            /* The assignment of 2 is intentional:
1006                             * for the folded sharp s, the skip is 2. */
1007                            (skip = SHARP_S_SKIP))) {
1008                            if (tmp && (norun || regtry(prog, s)))
1009                                 goto got_it;
1010                            else
1011                                 tmp = doevery;
1012                       }
1013                       else 
1014                            tmp = 1;
1015                       s += skip;
1016                  }
1017             }
1018             break;
1019         case CANY:
1020             while (s < strend) {
1021                 if (tmp && (norun || regtry(prog, s)))
1022                     goto got_it;
1023                 else
1024                     tmp = doevery;
1025                 s++;
1026             }
1027             break;
1028         case EXACTF:
1029             m   = STRING(c);
1030             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1031             lnc = (I32) ln;     /* length to match in characters */
1032             if (UTF) {
1033                 STRLEN ulen1, ulen2;
1034                 U8 *sm = (U8 *) m;
1035                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1036                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1037
1038                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1039                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1040
1041                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE, 
1042                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1043                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1044                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1045                 lnc = 0;
1046                 while (sm < ((U8 *) m + ln)) {
1047                     lnc++;
1048                     sm += UTF8SKIP(sm);
1049                 }
1050             }
1051             else {
1052                 c1 = *(U8*)m;
1053                 c2 = PL_fold[c1];
1054             }
1055             goto do_exactf;
1056         case EXACTFL:
1057             m   = STRING(c);
1058             ln  = STR_LEN(c);
1059             lnc = (I32) ln;
1060             c1 = *(U8*)m;
1061             c2 = PL_fold_locale[c1];
1062           do_exactf:
1063             e = HOP3c(strend, -((I32)lnc), s);
1064
1065             if (norun && e < s)
1066                 e = s;                  /* Due to minlen logic of intuit() */
1067
1068             /* The idea in the EXACTF* cases is to first find the
1069              * first character of the EXACTF* node and then, if
1070              * necessary, case-insensitively compare the full
1071              * text of the node.  The c1 and c2 are the first
1072              * characters (though in Unicode it gets a bit
1073              * more complicated because there are more cases
1074              * than just upper and lower: one needs to use
1075              * the so-called folding case for case-insensitive
1076              * matching (called "loose matching" in Unicode).
1077              * ibcmp_utf8() will do just that. */
1078
1079             if (do_utf8) {
1080                 UV c, f;
1081                 U8 tmpbuf [UTF8_MAXBYTES+1];
1082                 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1083                 STRLEN len, foldlen;
1084                 
1085                 if (c1 == c2) {
1086                     /* Upper and lower of 1st char are equal -
1087                      * probably not a "letter". */
1088                     while (s <= e) {
1089                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1090                                            ckWARN(WARN_UTF8) ?
1091                                            0 : UTF8_ALLOW_ANY);
1092                         if ( c == c1
1093                              && (ln == len ||
1094                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1095                                             m, (char **)0, ln, (bool)UTF))
1096                              && (norun || regtry(prog, s)) )
1097                             goto got_it;
1098                         else {
1099                              uvchr_to_utf8(tmpbuf, c);
1100                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1101                              if ( f != c
1102                                   && (f == c1 || f == c2)
1103                                   && (ln == foldlen ||
1104                                       !ibcmp_utf8((char *) foldbuf,
1105                                                   (char **)0, foldlen, do_utf8,
1106                                                   m,
1107                                                   (char **)0, ln, (bool)UTF))
1108                                   && (norun || regtry(prog, s)) )
1109                                   goto got_it;
1110                         }
1111                         s += len;
1112                     }
1113                 }
1114                 else {
1115                     while (s <= e) {
1116                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1117                                            ckWARN(WARN_UTF8) ?
1118                                            0 : UTF8_ALLOW_ANY);
1119
1120                         /* Handle some of the three Greek sigmas cases.
1121                          * Note that not all the possible combinations
1122                          * are handled here: some of them are handled
1123                          * by the standard folding rules, and some of
1124                          * them (the character class or ANYOF cases)
1125                          * are handled during compiletime in
1126                          * regexec.c:S_regclass(). */
1127                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1128                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1129                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1130
1131                         if ( (c == c1 || c == c2)
1132                              && (ln == len ||
1133                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1134                                             m, (char **)0, ln, (bool)UTF))
1135                              && (norun || regtry(prog, s)) )
1136                             goto got_it;
1137                         else {
1138                              uvchr_to_utf8(tmpbuf, c);
1139                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1140                              if ( f != c
1141                                   && (f == c1 || f == c2)
1142                                   && (ln == foldlen ||
1143                                       !ibcmp_utf8((char *) foldbuf,
1144                                                   (char **)0, foldlen, do_utf8,
1145                                                   m,
1146                                                   (char **)0, ln, (bool)UTF))
1147                                   && (norun || regtry(prog, s)) )
1148                                   goto got_it;
1149                         }
1150                         s += len;
1151                     }
1152                 }
1153             }
1154             else {
1155                 if (c1 == c2)
1156                     while (s <= e) {
1157                         if ( *(U8*)s == c1
1158                              && (ln == 1 || !(OP(c) == EXACTF
1159                                               ? ibcmp(s, m, ln)
1160                                               : ibcmp_locale(s, m, ln)))
1161                              && (norun || regtry(prog, s)) )
1162                             goto got_it;
1163                         s++;
1164                     }
1165                 else
1166                     while (s <= e) {
1167                         if ( (*(U8*)s == c1 || *(U8*)s == c2)
1168                              && (ln == 1 || !(OP(c) == EXACTF
1169                                               ? ibcmp(s, m, ln)
1170                                               : ibcmp_locale(s, m, ln)))
1171                              && (norun || regtry(prog, s)) )
1172                             goto got_it;
1173                         s++;
1174                     }
1175             }
1176             break;
1177         case BOUNDL:
1178             PL_reg_flags |= RF_tainted;
1179             /* FALL THROUGH */
1180         case BOUND:
1181             if (do_utf8) {
1182                 if (s == PL_bostr)
1183                     tmp = '\n';
1184                 else {
1185                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1186                 
1187                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1188                 }
1189                 tmp = ((OP(c) == BOUND ?
1190                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1191                 LOAD_UTF8_CHARCLASS(alnum,"a");
1192                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1193                     if (tmp == !(OP(c) == BOUND ?
1194                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1195                                  isALNUM_LC_utf8((U8*)s)))
1196                     {
1197                         tmp = !tmp;
1198                         if ((norun || regtry(prog, s)))
1199                             goto got_it;
1200                     }
1201                     s += uskip;
1202                 }
1203             }
1204             else {
1205                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1206                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1207                 while (s < strend) {
1208                     if (tmp ==
1209                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1210                         tmp = !tmp;
1211                         if ((norun || regtry(prog, s)))
1212                             goto got_it;
1213                     }
1214                     s++;
1215                 }
1216             }
1217             if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1218                 goto got_it;
1219             break;
1220         case NBOUNDL:
1221             PL_reg_flags |= RF_tainted;
1222             /* FALL THROUGH */
1223         case NBOUND:
1224             if (do_utf8) {
1225                 if (s == PL_bostr)
1226                     tmp = '\n';
1227                 else {
1228                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1229                 
1230                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1231                 }
1232                 tmp = ((OP(c) == NBOUND ?
1233                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1234                 LOAD_UTF8_CHARCLASS(alnum,"a");
1235                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1236                     if (tmp == !(OP(c) == NBOUND ?
1237                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1238                                  isALNUM_LC_utf8((U8*)s)))
1239                         tmp = !tmp;
1240                     else if ((norun || regtry(prog, s)))
1241                         goto got_it;
1242                     s += uskip;
1243                 }
1244             }
1245             else {
1246                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1247                 tmp = ((OP(c) == NBOUND ?
1248                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1249                 while (s < strend) {
1250                     if (tmp ==
1251                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1252                         tmp = !tmp;
1253                     else if ((norun || regtry(prog, s)))
1254                         goto got_it;
1255                     s++;
1256                 }
1257             }
1258             if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1259                 goto got_it;
1260             break;
1261         case ALNUM:
1262             if (do_utf8) {
1263                 LOAD_UTF8_CHARCLASS(alnum,"a");
1264                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1265                     if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1266                         if (tmp && (norun || regtry(prog, s)))
1267                             goto got_it;
1268                         else
1269                             tmp = doevery;
1270                     }
1271                     else
1272                         tmp = 1;
1273                     s += uskip;
1274                 }
1275             }
1276             else {
1277                 while (s < strend) {
1278                     if (isALNUM(*s)) {
1279                         if (tmp && (norun || regtry(prog, s)))
1280                             goto got_it;
1281                         else
1282                             tmp = doevery;
1283                     }
1284                     else
1285                         tmp = 1;
1286                     s++;
1287                 }
1288             }
1289             break;
1290         case ALNUML:
1291             PL_reg_flags |= RF_tainted;
1292             if (do_utf8) {
1293                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1294                     if (isALNUM_LC_utf8((U8*)s)) {
1295                         if (tmp && (norun || regtry(prog, s)))
1296                             goto got_it;
1297                         else
1298                             tmp = doevery;
1299                     }
1300                     else
1301                         tmp = 1;
1302                     s += uskip;
1303                 }
1304             }
1305             else {
1306                 while (s < strend) {
1307                     if (isALNUM_LC(*s)) {
1308                         if (tmp && (norun || regtry(prog, s)))
1309                             goto got_it;
1310                         else
1311                             tmp = doevery;
1312                     }
1313                     else
1314                         tmp = 1;
1315                     s++;
1316                 }
1317             }
1318             break;
1319         case NALNUM:
1320             if (do_utf8) {
1321                 LOAD_UTF8_CHARCLASS(alnum,"a");
1322                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1323                     if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1324                         if (tmp && (norun || regtry(prog, s)))
1325                             goto got_it;
1326                         else
1327                             tmp = doevery;
1328                     }
1329                     else
1330                         tmp = 1;
1331                     s += uskip;
1332                 }
1333             }
1334             else {
1335                 while (s < strend) {
1336                     if (!isALNUM(*s)) {
1337                         if (tmp && (norun || regtry(prog, s)))
1338                             goto got_it;
1339                         else
1340                             tmp = doevery;
1341                     }
1342                     else
1343                         tmp = 1;
1344                     s++;
1345                 }
1346             }
1347             break;
1348         case NALNUML:
1349             PL_reg_flags |= RF_tainted;
1350             if (do_utf8) {
1351                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1352                     if (!isALNUM_LC_utf8((U8*)s)) {
1353                         if (tmp && (norun || regtry(prog, s)))
1354                             goto got_it;
1355                         else
1356                             tmp = doevery;
1357                     }
1358                     else
1359                         tmp = 1;
1360                     s += uskip;
1361                 }
1362             }
1363             else {
1364                 while (s < strend) {
1365                     if (!isALNUM_LC(*s)) {
1366                         if (tmp && (norun || regtry(prog, s)))
1367                             goto got_it;
1368                         else
1369                             tmp = doevery;
1370                     }
1371                     else
1372                         tmp = 1;
1373                     s++;
1374                 }
1375             }
1376             break;
1377         case SPACE:
1378             if (do_utf8) {
1379                 LOAD_UTF8_CHARCLASS(space," ");
1380                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1381                     if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1382                         if (tmp && (norun || regtry(prog, s)))
1383                             goto got_it;
1384                         else
1385                             tmp = doevery;
1386                     }
1387                     else
1388                         tmp = 1;
1389                     s += uskip;
1390                 }
1391             }
1392             else {
1393                 while (s < strend) {
1394                     if (isSPACE(*s)) {
1395                         if (tmp && (norun || regtry(prog, s)))
1396                             goto got_it;
1397                         else
1398                             tmp = doevery;
1399                     }
1400                     else
1401                         tmp = 1;
1402                     s++;
1403                 }
1404             }
1405             break;
1406         case SPACEL:
1407             PL_reg_flags |= RF_tainted;
1408             if (do_utf8) {
1409                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1410                     if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1411                         if (tmp && (norun || regtry(prog, s)))
1412                             goto got_it;
1413                         else
1414                             tmp = doevery;
1415                     }
1416                     else
1417                         tmp = 1;
1418                     s += uskip;
1419                 }
1420             }
1421             else {
1422                 while (s < strend) {
1423                     if (isSPACE_LC(*s)) {
1424                         if (tmp && (norun || regtry(prog, s)))
1425                             goto got_it;
1426                         else
1427                             tmp = doevery;
1428                     }
1429                     else
1430                         tmp = 1;
1431                     s++;
1432                 }
1433             }
1434             break;
1435         case NSPACE:
1436             if (do_utf8) {
1437                 LOAD_UTF8_CHARCLASS(space," ");
1438                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1439                     if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1440                         if (tmp && (norun || regtry(prog, s)))
1441                             goto got_it;
1442                         else
1443                             tmp = doevery;
1444                     }
1445                     else
1446                         tmp = 1;
1447                     s += uskip;
1448                 }
1449             }
1450             else {
1451                 while (s < strend) {
1452                     if (!isSPACE(*s)) {
1453                         if (tmp && (norun || regtry(prog, s)))
1454                             goto got_it;
1455                         else
1456                             tmp = doevery;
1457                     }
1458                     else
1459                         tmp = 1;
1460                     s++;
1461                 }
1462             }
1463             break;
1464         case NSPACEL:
1465             PL_reg_flags |= RF_tainted;
1466             if (do_utf8) {
1467                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1468                     if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1469                         if (tmp && (norun || regtry(prog, s)))
1470                             goto got_it;
1471                         else
1472                             tmp = doevery;
1473                     }
1474                     else
1475                         tmp = 1;
1476                     s += uskip;
1477                 }
1478             }
1479             else {
1480                 while (s < strend) {
1481                     if (!isSPACE_LC(*s)) {
1482                         if (tmp && (norun || regtry(prog, s)))
1483                             goto got_it;
1484                         else
1485                             tmp = doevery;
1486                     }
1487                     else
1488                         tmp = 1;
1489                     s++;
1490                 }
1491             }
1492             break;
1493         case DIGIT:
1494             if (do_utf8) {
1495                 LOAD_UTF8_CHARCLASS(digit,"0");
1496                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1497                     if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1498                         if (tmp && (norun || regtry(prog, s)))
1499                             goto got_it;
1500                         else
1501                             tmp = doevery;
1502                     }
1503                     else
1504                         tmp = 1;
1505                     s += uskip;
1506                 }
1507             }
1508             else {
1509                 while (s < strend) {
1510                     if (isDIGIT(*s)) {
1511                         if (tmp && (norun || regtry(prog, s)))
1512                             goto got_it;
1513                         else
1514                             tmp = doevery;
1515                     }
1516                     else
1517                         tmp = 1;
1518                     s++;
1519                 }
1520             }
1521             break;
1522         case DIGITL:
1523             PL_reg_flags |= RF_tainted;
1524             if (do_utf8) {
1525                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1526                     if (isDIGIT_LC_utf8((U8*)s)) {
1527                         if (tmp && (norun || regtry(prog, s)))
1528                             goto got_it;
1529                         else
1530                             tmp = doevery;
1531                     }
1532                     else
1533                         tmp = 1;
1534                     s += uskip;
1535                 }
1536             }
1537             else {
1538                 while (s < strend) {
1539                     if (isDIGIT_LC(*s)) {
1540                         if (tmp && (norun || regtry(prog, s)))
1541                             goto got_it;
1542                         else
1543                             tmp = doevery;
1544                     }
1545                     else
1546                         tmp = 1;
1547                     s++;
1548                 }
1549             }
1550             break;
1551         case NDIGIT:
1552             if (do_utf8) {
1553                 LOAD_UTF8_CHARCLASS(digit,"0");
1554                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1555                     if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1556                         if (tmp && (norun || regtry(prog, s)))
1557                             goto got_it;
1558                         else
1559                             tmp = doevery;
1560                     }
1561                     else
1562                         tmp = 1;
1563                     s += uskip;
1564                 }
1565             }
1566             else {
1567                 while (s < strend) {
1568                     if (!isDIGIT(*s)) {
1569                         if (tmp && (norun || regtry(prog, s)))
1570                             goto got_it;
1571                         else
1572                             tmp = doevery;
1573                     }
1574                     else
1575                         tmp = 1;
1576                     s++;
1577                 }
1578             }
1579             break;
1580         case NDIGITL:
1581             PL_reg_flags |= RF_tainted;
1582             if (do_utf8) {
1583                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1584                     if (!isDIGIT_LC_utf8((U8*)s)) {
1585                         if (tmp && (norun || regtry(prog, s)))
1586                             goto got_it;
1587                         else
1588                             tmp = doevery;
1589                     }
1590                     else
1591                         tmp = 1;
1592                     s += uskip;
1593                 }
1594             }
1595             else {
1596                 while (s < strend) {
1597                     if (!isDIGIT_LC(*s)) {
1598                         if (tmp && (norun || regtry(prog, s)))
1599                             goto got_it;
1600                         else
1601                             tmp = doevery;
1602                     }
1603                     else
1604                         tmp = 1;
1605                     s++;
1606                 }
1607             }
1608             break;
1609         default:
1610             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1611             break;
1612         }
1613         return 0;
1614       got_it:
1615         return s;
1616 }
1617
1618 /*
1619  - regexec_flags - match a regexp against a string
1620  */
1621 I32
1622 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1623               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1624 /* strend: pointer to null at end of string */
1625 /* strbeg: real beginning of string */
1626 /* minend: end of match must be >=minend after stringarg. */
1627 /* data: May be used for some additional optimizations. */
1628 /* nosave: For optimizations. */
1629 {
1630     register char *s;
1631     register regnode *c;
1632     register char *startpos = stringarg;
1633     I32 minlen;         /* must match at least this many chars */
1634     I32 dontbother = 0; /* how many characters not to try at end */
1635     /* I32 start_shift = 0; */          /* Offset of the start to find
1636                                          constant substr. */            /* CC */
1637     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1638     I32 scream_pos = -1;                /* Internal iterator of scream. */
1639     char *scream_olds;
1640     SV* oreplsv = GvSV(PL_replgv);
1641     bool do_utf8 = DO_UTF8(sv);
1642     I32 multiline = prog->reganch & PMf_MULTILINE;
1643 #ifdef DEBUGGING
1644     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1645     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1646 #endif
1647
1648     GET_RE_DEBUG_FLAGS_DECL;
1649
1650     RX_MATCH_UTF8_set(prog,do_utf8);
1651
1652     PL_regcc = 0;
1653
1654     cache_re(prog);
1655 #ifdef DEBUGGING
1656     PL_regnarrate = DEBUG_r_TEST;
1657 #endif
1658
1659     /* Be paranoid... */
1660     if (prog == NULL || startpos == NULL) {
1661         Perl_croak(aTHX_ "NULL regexp parameter");
1662         return 0;
1663     }
1664
1665     minlen = prog->minlen;
1666     if (strend - startpos < minlen) {
1667         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1668                               "String too short [regexec_flags]...\n"));
1669         goto phooey;
1670     }
1671
1672     /* Check validity of program. */
1673     if (UCHARAT(prog->program) != REG_MAGIC) {
1674         Perl_croak(aTHX_ "corrupted regexp program");
1675     }
1676
1677     PL_reg_flags = 0;
1678     PL_reg_eval_set = 0;
1679     PL_reg_maxiter = 0;
1680
1681     if (prog->reganch & ROPT_UTF8)
1682         PL_reg_flags |= RF_utf8;
1683
1684     /* Mark beginning of line for ^ and lookbehind. */
1685     PL_regbol = startpos;
1686     PL_bostr  = strbeg;
1687     PL_reg_sv = sv;
1688
1689     /* Mark end of line for $ (and such) */
1690     PL_regeol = strend;
1691
1692     /* see how far we have to get to not match where we matched before */
1693     PL_regtill = startpos+minend;
1694
1695     /* We start without call_cc context.  */
1696     PL_reg_call_cc = 0;
1697
1698     /* If there is a "must appear" string, look for it. */
1699     s = startpos;
1700
1701     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1702         MAGIC *mg;
1703
1704         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1705             PL_reg_ganch = startpos;
1706         else if (sv && SvTYPE(sv) >= SVt_PVMG
1707                   && SvMAGIC(sv)
1708                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1709                   && mg->mg_len >= 0) {
1710             PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1711             if (prog->reganch & ROPT_ANCH_GPOS) {
1712                 if (s > PL_reg_ganch)
1713                     goto phooey;
1714                 s = PL_reg_ganch;
1715             }
1716         }
1717         else                            /* pos() not defined */
1718             PL_reg_ganch = strbeg;
1719     }
1720
1721     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1722         re_scream_pos_data d;
1723
1724         d.scream_olds = &scream_olds;
1725         d.scream_pos = &scream_pos;
1726         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1727         if (!s) {
1728             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1729             goto phooey;        /* not present */
1730         }
1731     }
1732
1733     DEBUG_EXECUTE_r({
1734          char *s0   = UTF ?
1735            pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1736                           UNI_DISPLAY_REGEX) :
1737            prog->precomp;
1738          int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1739          char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1740                                                UNI_DISPLAY_REGEX) : startpos;
1741          int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1742          if (!PL_colorset)
1743              reginitcolors();
1744          PerlIO_printf(Perl_debug_log,
1745                        "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1746                        PL_colors[4], PL_colors[5], PL_colors[0],
1747                        len0, len0, s0,
1748                        PL_colors[1],
1749                        len0 > 60 ? "..." : "",
1750                        PL_colors[0],
1751                        (int)(len1 > 60 ? 60 : len1),
1752                        s1, PL_colors[1],
1753                        (len1 > 60 ? "..." : "")
1754               );
1755     });
1756
1757     /* Simplest case:  anchored match need be tried only once. */
1758     /*  [unless only anchor is BOL and multiline is set] */
1759     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1760         if (s == startpos && regtry(prog, startpos))
1761             goto got_it;
1762         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1763                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1764         {
1765             char *end;
1766
1767             if (minlen)
1768                 dontbother = minlen - 1;
1769             end = HOP3c(strend, -dontbother, strbeg) - 1;
1770             /* for multiline we only have to try after newlines */
1771             if (prog->check_substr || prog->check_utf8) {
1772                 if (s == startpos)
1773                     goto after_try;
1774                 while (1) {
1775                     if (regtry(prog, s))
1776                         goto got_it;
1777                   after_try:
1778                     if (s >= end)
1779                         goto phooey;
1780                     if (prog->reganch & RE_USE_INTUIT) {
1781                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1782                         if (!s)
1783                             goto phooey;
1784                     }
1785                     else
1786                         s++;
1787                 }               
1788             } else {
1789                 if (s > startpos)
1790                     s--;
1791                 while (s < end) {
1792                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1793                         if (regtry(prog, s))
1794                             goto got_it;
1795                     }
1796                 }               
1797             }
1798         }
1799         goto phooey;
1800     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1801         if (regtry(prog, PL_reg_ganch))
1802             goto got_it;
1803         goto phooey;
1804     }
1805
1806     /* Messy cases:  unanchored match. */
1807     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1808         /* we have /x+whatever/ */
1809         /* it must be a one character string (XXXX Except UTF?) */
1810         char ch;
1811 #ifdef DEBUGGING
1812         int did_match = 0;
1813 #endif
1814         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1815             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1816         ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1817
1818         if (do_utf8) {
1819             while (s < strend) {
1820                 if (*s == ch) {
1821                     DEBUG_EXECUTE_r( did_match = 1 );
1822                     if (regtry(prog, s)) goto got_it;
1823                     s += UTF8SKIP(s);
1824                     while (s < strend && *s == ch)
1825                         s += UTF8SKIP(s);
1826                 }
1827                 s += UTF8SKIP(s);
1828             }
1829         }
1830         else {
1831             while (s < strend) {
1832                 if (*s == ch) {
1833                     DEBUG_EXECUTE_r( did_match = 1 );
1834                     if (regtry(prog, s)) goto got_it;
1835                     s++;
1836                     while (s < strend && *s == ch)
1837                         s++;
1838                 }
1839                 s++;
1840             }
1841         }
1842         DEBUG_EXECUTE_r(if (!did_match)
1843                 PerlIO_printf(Perl_debug_log,
1844                                   "Did not find anchored character...\n")
1845                );
1846     }
1847     /*SUPPRESS 560*/
1848     else if (prog->anchored_substr != Nullsv
1849               || prog->anchored_utf8 != Nullsv
1850               || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1851                   && prog->float_max_offset < strend - s)) {
1852         SV *must;
1853         I32 back_max;
1854         I32 back_min;
1855         char *last;
1856         char *last1;            /* Last position checked before */
1857 #ifdef DEBUGGING
1858         int did_match = 0;
1859 #endif
1860         if (prog->anchored_substr || prog->anchored_utf8) {
1861             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1862                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1863             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1864             back_max = back_min = prog->anchored_offset;
1865         } else {
1866             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1867                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1868             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1869             back_max = prog->float_max_offset;
1870             back_min = prog->float_min_offset;
1871         }
1872         if (must == &PL_sv_undef)
1873             /* could not downgrade utf8 check substring, so must fail */
1874             goto phooey;
1875
1876         last = HOP3c(strend,    /* Cannot start after this */
1877                           -(I32)(CHR_SVLEN(must)
1878                                  - (SvTAIL(must) != 0) + back_min), strbeg);
1879
1880         if (s > PL_bostr)
1881             last1 = HOPc(s, -1);
1882         else
1883             last1 = s - 1;      /* bogus */
1884
1885         /* XXXX check_substr already used to find `s', can optimize if
1886            check_substr==must. */
1887         scream_pos = -1;
1888         dontbother = end_shift;
1889         strend = HOPc(strend, -dontbother);
1890         while ( (s <= last) &&
1891                 ((flags & REXEC_SCREAM)
1892                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1893                                     end_shift, &scream_pos, 0))
1894                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1895                                   (unsigned char*)strend, must,
1896                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1897             /* we may be pointing at the wrong string */
1898             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1899                 s = strbeg + (s - SvPVX(sv));
1900             DEBUG_EXECUTE_r( did_match = 1 );
1901             if (HOPc(s, -back_max) > last1) {
1902                 last1 = HOPc(s, -back_min);
1903                 s = HOPc(s, -back_max);
1904             }
1905             else {
1906                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1907
1908                 last1 = HOPc(s, -back_min);
1909                 s = t;          
1910             }
1911             if (do_utf8) {
1912                 while (s <= last1) {
1913                     if (regtry(prog, s))
1914                         goto got_it;
1915                     s += UTF8SKIP(s);
1916                 }
1917             }
1918             else {
1919                 while (s <= last1) {
1920                     if (regtry(prog, s))
1921                         goto got_it;
1922                     s++;
1923                 }
1924             }
1925         }
1926         DEBUG_EXECUTE_r(if (!did_match)
1927                     PerlIO_printf(Perl_debug_log, 
1928                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1929                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1930                                ? "anchored" : "floating"),
1931                               PL_colors[0],
1932                               (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1933                               SvPVX(must),
1934                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1935                );
1936         goto phooey;
1937     }
1938     else if ((c = prog->regstclass)) {
1939         if (minlen) {
1940             I32 op = (U8)OP(prog->regstclass);
1941             /* don't bother with what can't match */
1942             if (PL_regkind[op] != EXACT && op != CANY)
1943                 strend = HOPc(strend, -(minlen - 1));
1944         }
1945         DEBUG_EXECUTE_r({
1946             SV *prop = sv_newmortal();
1947             char *s0;
1948             char *s1;
1949             int len0;
1950             int len1;
1951
1952             regprop(prop, c);
1953             s0 = UTF ?
1954               pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1955                              UNI_DISPLAY_REGEX) :
1956               SvPVX(prop);
1957             len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1958             s1 = UTF ?
1959               sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1960             len1 = UTF ? SvCUR(dsv1) : strend - s;
1961             PerlIO_printf(Perl_debug_log,
1962                           "Matching stclass `%*.*s' against `%*.*s'\n",
1963                           len0, len0, s0,
1964                           len1, len1, s1);
1965         });
1966         if (find_byclass(prog, c, s, strend, 0))
1967             goto got_it;
1968         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1969     }
1970     else {
1971         dontbother = 0;
1972         if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1973             /* Trim the end. */
1974             char *last;
1975             SV* float_real;
1976
1977             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1978                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1979             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1980
1981             if (flags & REXEC_SCREAM) {
1982                 last = screaminstr(sv, float_real, s - strbeg,
1983                                    end_shift, &scream_pos, 1); /* last one */
1984                 if (!last)
1985                     last = scream_olds; /* Only one occurrence. */
1986                 /* we may be pointing at the wrong string */
1987                 else if (RX_MATCH_COPIED(prog))
1988                     s = strbeg + (s - SvPVX(sv));
1989             }
1990             else {
1991                 STRLEN len;
1992                 const char * const little = SvPV(float_real, len);
1993
1994                 if (SvTAIL(float_real)) {
1995                     if (memEQ(strend - len + 1, little, len - 1))
1996                         last = strend - len + 1;
1997                     else if (!multiline)
1998                         last = memEQ(strend - len, little, len)
1999                             ? strend - len : Nullch;
2000                     else
2001                         goto find_last;
2002                 } else {
2003                   find_last:
2004                     if (len)
2005                         last = rninstr(s, strend, little, little + len);
2006                     else
2007                         last = strend;  /* matching `$' */
2008                 }
2009             }
2010             if (last == NULL) {
2011                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2012                                       "%sCan't trim the tail, match fails (should not happen)%s\n",
2013                                       PL_colors[4], PL_colors[5]));
2014                 goto phooey; /* Should not happen! */
2015             }
2016             dontbother = strend - last + prog->float_min_offset;
2017         }
2018         if (minlen && (dontbother < minlen))
2019             dontbother = minlen - 1;
2020         strend -= dontbother;              /* this one's always in bytes! */
2021         /* We don't know much -- general case. */
2022         if (do_utf8) {
2023             for (;;) {
2024                 if (regtry(prog, s))
2025                     goto got_it;
2026                 if (s >= strend)
2027                     break;
2028                 s += UTF8SKIP(s);
2029             };
2030         }
2031         else {
2032             do {
2033                 if (regtry(prog, s))
2034                     goto got_it;
2035             } while (s++ < strend);
2036         }
2037     }
2038
2039     /* Failure. */
2040     goto phooey;
2041
2042 got_it:
2043     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2044
2045     if (PL_reg_eval_set) {
2046         /* Preserve the current value of $^R */
2047         if (oreplsv != GvSV(PL_replgv))
2048             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2049                                                   restored, the value remains
2050                                                   the same. */
2051         restore_pos(aTHX_ 0);
2052     }
2053
2054     /* make sure $`, $&, $', and $digit will work later */
2055     if ( !(flags & REXEC_NOT_FIRST) ) {
2056         RX_MATCH_COPY_FREE(prog);
2057         if (flags & REXEC_COPY_STR) {
2058             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2059 #ifdef PERL_COPY_ON_WRITE
2060             if ((SvIsCOW(sv)
2061                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2062                 if (DEBUG_C_TEST) {
2063                     PerlIO_printf(Perl_debug_log,
2064                                   "Copy on write: regexp capture, type %d\n",
2065                                   (int) SvTYPE(sv));
2066                 }
2067                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2068                 prog->subbeg = SvPVX(prog->saved_copy);
2069                 assert (SvPOKp(prog->saved_copy));
2070             } else
2071 #endif
2072             {
2073                 RX_MATCH_COPIED_on(prog);
2074                 s = savepvn(strbeg, i);
2075                 prog->subbeg = s;
2076             }
2077             prog->sublen = i;
2078         }
2079         else {
2080             prog->subbeg = strbeg;
2081             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2082         }
2083     }
2084
2085     return 1;
2086
2087 phooey:
2088     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2089                           PL_colors[4], PL_colors[5]));
2090     if (PL_reg_eval_set)
2091         restore_pos(aTHX_ 0);
2092     return 0;
2093 }
2094
2095 /*
2096  - regtry - try match at specific point
2097  */
2098 STATIC I32                      /* 0 failure, 1 success */
2099 S_regtry(pTHX_ regexp *prog, char *startpos)
2100 {
2101     register I32 i;
2102     register I32 *sp;
2103     register I32 *ep;
2104     CHECKPOINT lastcp;
2105     GET_RE_DEBUG_FLAGS_DECL;
2106
2107 #ifdef DEBUGGING
2108     PL_regindent = 0;   /* XXXX Not good when matches are reenterable... */
2109 #endif
2110     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2111         MAGIC *mg;
2112
2113         PL_reg_eval_set = RS_init;
2114         DEBUG_EXECUTE_r(DEBUG_s(
2115             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2116                           (IV)(PL_stack_sp - PL_stack_base));
2117             ));
2118         SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2119         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2120         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2121         SAVETMPS;
2122         /* Apparently this is not needed, judging by wantarray. */
2123         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2124            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2125
2126         if (PL_reg_sv) {
2127             /* Make $_ available to executed code. */
2128             if (PL_reg_sv != DEFSV) {
2129                 SAVE_DEFSV;
2130                 DEFSV = PL_reg_sv;
2131             }
2132         
2133             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2134                   && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2135                 /* prepare for quick setting of pos */
2136                 sv_magic(PL_reg_sv, (SV*)0,
2137                         PERL_MAGIC_regex_global, Nullch, 0);
2138                 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2139                 mg->mg_len = -1;
2140             }
2141             PL_reg_magic    = mg;
2142             PL_reg_oldpos   = mg->mg_len;
2143             SAVEDESTRUCTOR_X(restore_pos, 0);
2144         }
2145         if (!PL_reg_curpm) {
2146             Newz(22, PL_reg_curpm, 1, PMOP);
2147 #ifdef USE_ITHREADS
2148             {
2149                 SV* repointer = newSViv(0);
2150                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2151                 SvFLAGS(repointer) |= SVf_BREAK;
2152                 av_push(PL_regex_padav,repointer);
2153                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2154                 PL_regex_pad = AvARRAY(PL_regex_padav);
2155             }
2156 #endif      
2157         }
2158         PM_SETRE(PL_reg_curpm, prog);
2159         PL_reg_oldcurpm = PL_curpm;
2160         PL_curpm = PL_reg_curpm;
2161         if (RX_MATCH_COPIED(prog)) {
2162             /*  Here is a serious problem: we cannot rewrite subbeg,
2163                 since it may be needed if this match fails.  Thus
2164                 $` inside (?{}) could fail... */
2165             PL_reg_oldsaved = prog->subbeg;
2166             PL_reg_oldsavedlen = prog->sublen;
2167 #ifdef PERL_COPY_ON_WRITE
2168             PL_nrs = prog->saved_copy;
2169 #endif
2170             RX_MATCH_COPIED_off(prog);
2171         }
2172         else
2173             PL_reg_oldsaved = Nullch;
2174         prog->subbeg = PL_bostr;
2175         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2176     }
2177     prog->startp[0] = startpos - PL_bostr;
2178     PL_reginput = startpos;
2179     PL_regstartp = prog->startp;
2180     PL_regendp = prog->endp;
2181     PL_reglastparen = &prog->lastparen;
2182     PL_reglastcloseparen = &prog->lastcloseparen;
2183     prog->lastparen = 0;
2184     prog->lastcloseparen = 0;
2185     PL_regsize = 0;
2186     DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2187     if (PL_reg_start_tmpl <= prog->nparens) {
2188         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2189         if(PL_reg_start_tmp)
2190             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2191         else
2192             New(22, PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2193     }
2194
2195     /* XXXX What this code is doing here?!!!  There should be no need
2196        to do this again and again, PL_reglastparen should take care of
2197        this!  --ilya*/
2198
2199     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2200      * Actually, the code in regcppop() (which Ilya may be meaning by
2201      * PL_reglastparen), is not needed at all by the test suite
2202      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2203      * enough, for building DynaLoader, or otherwise this
2204      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2205      * will happen.  Meanwhile, this code *is* needed for the
2206      * above-mentioned test suite tests to succeed.  The common theme
2207      * on those tests seems to be returning null fields from matches.
2208      * --jhi */
2209 #if 1
2210     sp = prog->startp;
2211     ep = prog->endp;
2212     if (prog->nparens) {
2213         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2214             *++sp = -1;
2215             *++ep = -1;
2216         }
2217     }
2218 #endif
2219     REGCP_SET(lastcp);
2220     if (regmatch(prog->program + 1)) {
2221         prog->endp[0] = PL_reginput - PL_bostr;
2222         return 1;
2223     }
2224     REGCP_UNWIND(lastcp);
2225     return 0;
2226 }
2227
2228 #define RE_UNWIND_BRANCH        1
2229 #define RE_UNWIND_BRANCHJ       2
2230
2231 union re_unwind_t;
2232
2233 typedef struct {                /* XX: makes sense to enlarge it... */
2234     I32 type;
2235     I32 prev;
2236     CHECKPOINT lastcp;
2237 } re_unwind_generic_t;
2238
2239 typedef struct {
2240     I32 type;
2241     I32 prev;
2242     CHECKPOINT lastcp;
2243     I32 lastparen;
2244     regnode *next;
2245     char *locinput;
2246     I32 nextchr;
2247 #ifdef DEBUGGING
2248     int regindent;
2249 #endif
2250 } re_unwind_branch_t;
2251
2252 typedef union re_unwind_t {
2253     I32 type;
2254     re_unwind_generic_t generic;
2255     re_unwind_branch_t branch;
2256 } re_unwind_t;
2257
2258 #define sayYES goto yes
2259 #define sayNO goto no
2260 #define sayNO_ANYOF goto no_anyof
2261 #define sayYES_FINAL goto yes_final
2262 #define sayYES_LOUD  goto yes_loud
2263 #define sayNO_FINAL  goto no_final
2264 #define sayNO_SILENT goto do_no
2265 #define saySAME(x) if (x) goto yes; else goto no
2266
2267 #define POSCACHE_SUCCESS 0      /* caching success rather than failure */
2268 #define POSCACHE_SEEN 1         /* we know what we're caching */
2269 #define POSCACHE_START 2        /* the real cache: this bit maps to pos 0 */
2270 #define CACHEsayYES STMT_START { \
2271     if (cache_offset | cache_bit) { \
2272         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2273             PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2274         else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2275             /* cache records failure, but this is success */ \
2276             DEBUG_r( \
2277                 PerlIO_printf(Perl_debug_log, \
2278                     "%*s  (remove success from failure cache)\n", \
2279                     REPORT_CODE_OFF+PL_regindent*2, "") \
2280             ); \
2281             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2282         } \
2283     } \
2284     sayYES; \
2285 } STMT_END
2286 #define CACHEsayNO STMT_START { \
2287     if (cache_offset | cache_bit) { \
2288         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2289             PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2290         else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2291             /* cache records success, but this is failure */ \
2292             DEBUG_r( \
2293                 PerlIO_printf(Perl_debug_log, \
2294                     "%*s  (remove failure from success cache)\n", \
2295                     REPORT_CODE_OFF+PL_regindent*2, "") \
2296             ); \
2297             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2298         } \
2299     } \
2300     sayNO; \
2301 } STMT_END
2302
2303 /* this is used to determine how far from the left messages like
2304    'failed...' are printed. Currently 29 makes these messages line
2305    up with the opcode they refer to. Earlier perls used 25 which
2306    left these messages outdented making reviewing a debug output
2307    quite difficult.
2308 */
2309 #define REPORT_CODE_OFF 29
2310
2311
2312 /* Make sure there is a test for this +1 options in re_tests */
2313 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2314
2315 #define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START {                       \
2316     if ( trie->states[ state ].wordnum ) {                               \
2317         if ( !accepted ) {                                               \
2318             ENTER;                                                       \
2319             SAVETMPS;                                                    \
2320             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ;                       \
2321             sv_accept_buff=NEWSV( 1234,                                  \
2322               bufflen * sizeof(reg_trie_accepted) - 1 );                 \
2323             SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) );      \
2324             SvPOK_on( sv_accept_buff );                                  \
2325             sv_2mortal( sv_accept_buff );                                \
2326             accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2327         } else {                                                         \
2328             if ( accepted >= bufflen ) {                                 \
2329                 bufflen *= 2;                                            \
2330                 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2331                     bufflen * sizeof(reg_trie_accepted) );               \
2332             }                                                            \
2333             SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff )            \
2334                 + sizeof( reg_trie_accepted ) );                         \
2335         }                                                                \
2336         accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2337         accept_buff[ accepted ].endpos = uc;                             \
2338         ++accepted;                                                      \
2339     } } STMT_END
2340
2341 #define TRIE_HANDLE_CHAR STMT_START {                                   \
2342         if ( uvc < 256 ) {                                              \
2343             charid = trie->charmap[ uvc ];                              \
2344         } else {                                                        \
2345             charid = 0;                                                 \
2346             if( trie->widecharmap ) {                                   \
2347             SV** svpp = (SV**)NULL;                                     \
2348             svpp = hv_fetch( trie->widecharmap, (char*)&uvc,            \
2349                           sizeof( UV ), 0 );                            \
2350             if ( svpp ) {                                               \
2351                 charid = (U16)SvIV( *svpp );                            \
2352                 }                                                       \
2353             }                                                           \
2354         }                                                               \
2355         if ( charid &&                                                  \
2356              ( base + charid > trie->uniquecharcount ) &&               \
2357              ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
2358              trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2359         {                                                               \
2360             state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next;     \
2361         } else {                                                        \
2362             state = 0;                                                  \
2363         }                                                               \
2364         uc += len;                                                      \
2365     } STMT_END
2366
2367 /*
2368  - regmatch - main matching routine
2369  *
2370  * Conceptually the strategy is simple:  check to see whether the current
2371  * node matches, call self recursively to see whether the rest matches,
2372  * and then act accordingly.  In practice we make some effort to avoid
2373  * recursion, in particular by going through "ordinary" nodes (that don't
2374  * need to know whether the rest of the match failed) by a loop instead of
2375  * by recursion.
2376  */
2377 /* [lwall] I've hoisted the register declarations to the outer block in order to
2378  * maybe save a little bit of pushing and popping on the stack.  It also takes
2379  * advantage of machines that use a register save mask on subroutine entry.
2380  */
2381 STATIC I32                      /* 0 failure, 1 success */
2382 S_regmatch(pTHX_ regnode *prog)
2383 {
2384     dVAR;
2385     register regnode *scan;     /* Current node. */
2386     regnode *next;              /* Next node. */
2387     regnode *inner;             /* Next node in internal branch. */
2388     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2389                                    function of same name */
2390     register I32 n;             /* no or next */
2391     register I32 ln = 0;        /* len or last */
2392     register char *s = Nullch;  /* operand or save */
2393     register char *locinput = PL_reginput;
2394     register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2395     int minmod = 0, sw = 0, logical = 0;
2396     I32 unwind = 0;
2397
2398     /* used by the trie code */
2399     SV                 *sv_accept_buff = 0;  /* accepting states we have traversed */
2400     reg_trie_accepted  *accept_buff = 0;     /* "" */
2401     reg_trie_data      *trie;                /* what trie are we using right now */
2402     U32 accepted = 0;                        /* how many accepting states we have seen*/
2403
2404 #if 0
2405     I32 firstcp = PL_savestack_ix;
2406 #endif
2407     register bool do_utf8 = PL_reg_match_utf8;
2408 #ifdef DEBUGGING
2409     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2410     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2411     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2412
2413     SV *re_debug_flags = NULL;
2414 #endif
2415
2416     GET_RE_DEBUG_FLAGS;
2417
2418 #ifdef DEBUGGING
2419     PL_regindent++;
2420 #endif
2421
2422
2423     /* Note that nextchr is a byte even in UTF */
2424     nextchr = UCHARAT(locinput);
2425     scan = prog;
2426     while (scan != NULL) {
2427
2428         DEBUG_EXECUTE_r( {
2429             SV *prop = sv_newmortal();
2430             int docolor = *PL_colors[0];
2431             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2432             int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2433             /* The part of the string before starttry has one color
2434                (pref0_len chars), between starttry and current
2435                position another one (pref_len - pref0_len chars),
2436                after the current position the third one.
2437                We assume that pref0_len <= pref_len, otherwise we
2438                decrease pref0_len.  */
2439             int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2440                 ? (5 + taill) - l : locinput - PL_bostr;
2441             int pref0_len;
2442
2443             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2444                 pref_len++;
2445             pref0_len = pref_len  - (locinput - PL_reg_starttry);
2446             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2447                 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2448                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2449             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2450                 l--;
2451             if (pref0_len < 0)
2452                 pref0_len = 0;
2453             if (pref0_len > pref_len)
2454                 pref0_len = pref_len;
2455             regprop(prop, scan);
2456             {
2457               char *s0 =
2458                 do_utf8 && OP(scan) != CANY ?
2459                 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2460                                pref0_len, 60, UNI_DISPLAY_REGEX) :
2461                 locinput - pref_len;
2462               int len0 = do_utf8 ? strlen(s0) : pref0_len;
2463               char *s1 = do_utf8 && OP(scan) != CANY ?
2464                 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2465                                pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2466                 locinput - pref_len + pref0_len;
2467               int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2468               char *s2 = do_utf8 && OP(scan) != CANY ?
2469                 pv_uni_display(dsv2, (U8*)locinput,
2470                                PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2471                 locinput;
2472               int len2 = do_utf8 ? strlen(s2) : l;
2473               PerlIO_printf(Perl_debug_log,
2474                             "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2475                             (IV)(locinput - PL_bostr),
2476                             PL_colors[4],
2477                             len0, s0,
2478                             PL_colors[5],
2479                             PL_colors[2],
2480                             len1, s1,
2481                             PL_colors[3],
2482                             (docolor ? "" : "> <"),
2483                             PL_colors[0],
2484                             len2, s2,
2485                             PL_colors[1],
2486                             15 - l - pref_len + 1,
2487                             "",
2488                             (IV)(scan - PL_regprogram), PL_regindent*2, "",
2489                             SvPVX(prop));
2490             }
2491         });
2492
2493         next = scan + NEXT_OFF(scan);
2494         if (next == scan)
2495             next = NULL;
2496
2497         switch (OP(scan)) {
2498         case BOL:
2499             if (locinput == PL_bostr)
2500             {
2501                 /* regtill = regbol; */
2502                 break;
2503             }
2504             sayNO;
2505         case MBOL:
2506             if (locinput == PL_bostr ||
2507                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2508             {
2509                 break;
2510             }
2511             sayNO;
2512         case SBOL:
2513             if (locinput == PL_bostr)
2514                 break;
2515             sayNO;
2516         case GPOS:
2517             if (locinput == PL_reg_ganch)
2518                 break;
2519             sayNO;
2520         case EOL:
2521                 goto seol;
2522         case MEOL:
2523             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2524                 sayNO;
2525             break;
2526         case SEOL:
2527           seol:
2528             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2529                 sayNO;
2530             if (PL_regeol - locinput > 1)
2531                 sayNO;
2532             break;
2533         case EOS:
2534             if (PL_regeol != locinput)
2535                 sayNO;
2536             break;
2537         case SANY:
2538             if (!nextchr && locinput >= PL_regeol)
2539                 sayNO;
2540             if (do_utf8) {
2541                 locinput += PL_utf8skip[nextchr];
2542                 if (locinput > PL_regeol)
2543                     sayNO;
2544                 nextchr = UCHARAT(locinput);
2545             }
2546             else
2547                 nextchr = UCHARAT(++locinput);
2548             break;
2549         case CANY:
2550             if (!nextchr && locinput >= PL_regeol)
2551                 sayNO;
2552             nextchr = UCHARAT(++locinput);
2553             break;
2554         case REG_ANY:
2555             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2556                 sayNO;
2557             if (do_utf8) {
2558                 locinput += PL_utf8skip[nextchr];
2559                 if (locinput > PL_regeol)
2560                     sayNO;
2561                 nextchr = UCHARAT(locinput);
2562             }
2563             else
2564                 nextchr = UCHARAT(++locinput);
2565             break;
2566
2567
2568
2569         /*
2570            traverse the TRIE keeping track of all accepting states
2571            we transition through until we get to a failing node.
2572
2573            we use two slightly different pieces of code to handle
2574            the traversal depending on whether its case sensitive or
2575            not. we reuse the accept code however. (this should probably
2576            be turned into a macro.)
2577
2578         */
2579         case TRIEF:
2580         case TRIEFL:
2581             {
2582
2583                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2584                 U8 *uc = ( U8* )locinput;
2585                 U32 state = 1;
2586                 U16 charid = 0;
2587                 U32 base = 0;
2588                 UV uvc = 0;
2589                 STRLEN len = 0;
2590                 STRLEN foldlen = 0;
2591                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2592                 U8 *uscan = (U8*)NULL;
2593                 STRLEN bufflen=0;
2594                 accepted = 0;
2595
2596                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2597
2598                 while ( state && uc <= (U8*)PL_regeol ) {
2599
2600                     TRIE_CHECK_STATE_IS_ACCEPTING;
2601
2602                     base = trie->states[ state ].trans.base;
2603
2604                     DEBUG_TRIE_EXECUTE_r(
2605                                 PerlIO_printf( Perl_debug_log,
2606                                     "%*s  %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
2607                                     REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2608                                     (UV)state, (UV)base, (UV)accepted );
2609                     );
2610
2611                     if ( base ) {
2612
2613                         if ( do_utf8 || UTF ) {
2614                             if ( foldlen>0 ) {
2615                                 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2616                                 foldlen -= len;
2617                                 uscan += len;
2618                                 len=0;
2619                             } else {
2620                                 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2621                                 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2622                                 foldlen -= UNISKIP( uvc );
2623                                 uscan = foldbuf + UNISKIP( uvc );
2624                             }
2625                         } else {
2626                             uvc = (UV)*uc;
2627                             len = 1;
2628                         }
2629
2630                         TRIE_HANDLE_CHAR;
2631
2632                     } else {
2633                         state = 0;
2634                     }
2635                     DEBUG_TRIE_EXECUTE_r(
2636                         PerlIO_printf( Perl_debug_log,
2637                             "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2638                             charid, uvc, (UV)state, PL_colors[5] );
2639                     );
2640                 }
2641                 if ( !accepted ) {
2642                    sayNO;
2643                 } else {
2644                     goto TrieAccept;
2645                 }
2646             }
2647             /* unreached codepoint: we jump into the middle of the next case
2648                from previous if blocks */
2649         case TRIE:
2650             {
2651                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2652                 U8 *uc = (U8*)locinput;
2653                 U32 state = 1;
2654                 U16 charid = 0;
2655                 U32 base = 0;
2656                 UV uvc = 0;
2657                 STRLEN len = 0;
2658                 STRLEN bufflen = 0;
2659                 accepted = 0;
2660
2661                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2662
2663                 while ( state && uc <= (U8*)PL_regeol ) {
2664
2665                     TRIE_CHECK_STATE_IS_ACCEPTING;
2666
2667                     base = trie->states[ state ].trans.base;
2668
2669                     DEBUG_TRIE_EXECUTE_r(
2670                             PerlIO_printf( Perl_debug_log,
2671                                 "%*s  %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
2672                                 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2673                                 (UV)state, (UV)base, (UV)accepted );
2674                     );
2675
2676                     if ( base ) {
2677
2678                         if ( do_utf8 || UTF ) {
2679                             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2680                         } else {
2681                             uvc = (U32)*uc;
2682                             len = 1;
2683                         }
2684
2685                         TRIE_HANDLE_CHAR;
2686
2687                     } else {
2688                         state = 0;
2689                     }
2690                     DEBUG_TRIE_EXECUTE_r(
2691                             PerlIO_printf( Perl_debug_log,
2692                                 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2693                                 charid, uvc, (UV)state, PL_colors[5] );
2694                     );
2695                 }
2696                 if ( !accepted ) {
2697                    sayNO;
2698                 }
2699             }
2700
2701
2702             /*
2703                There was at least one accepting state that we
2704                transitioned through. Presumably the number of accepting
2705                states is going to be low, typically one or two. So we
2706                simply scan through to find the one with lowest wordnum.
2707                Once we find it, we swap the last state into its place
2708                and decrement the size. We then try to match the rest of
2709                the pattern at the point where the word ends, if we
2710                succeed then we end the loop, otherwise the loop
2711                eventually terminates once all of the accepting states
2712                have been tried.
2713             */
2714         TrieAccept:
2715             {
2716                 int gotit = 0;
2717
2718                 if ( accepted == 1 ) {
2719                     DEBUG_EXECUTE_r({
2720                         SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2721                         PerlIO_printf( Perl_debug_log,
2722                             "%*s  %sonly one match : #%d <%s>%s\n",
2723                             REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
2724                             accept_buff[ 0 ].wordnum,
2725                             tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",
2726                             PL_colors[5] );
2727                     });
2728                     PL_reginput = (char *)accept_buff[ 0 ].endpos;
2729                     /* in this case we free tmps/leave before we call regmatch
2730                        as we wont be using accept_buff again. */
2731                     FREETMPS;
2732                     LEAVE;
2733                     gotit = regmatch( scan + NEXT_OFF( scan ) );
2734                 } else {
2735                     DEBUG_EXECUTE_r(
2736                         PerlIO_printf( Perl_debug_log,"%*s  %sgot %"IVdf" possible matches%s\n",
2737                             REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
2738                             PL_colors[5] );
2739                     );
2740                     while ( !gotit && accepted-- ) {
2741                         U32 best = 0;
2742                         U32 cur;
2743                         for( cur = 1 ; cur <= accepted ; cur++ ) {
2744                             DEBUG_TRIE_EXECUTE_r(
2745                                 PerlIO_printf( Perl_debug_log,
2746                                     "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2747                                     REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2748                                     (IV)best, accept_buff[ best ].wordnum, (IV)cur,
2749                                     accept_buff[ cur ].wordnum, PL_colors[5] );
2750                             );
2751
2752                             if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2753                                     best = cur;
2754                         }
2755                         DEBUG_EXECUTE_r({
2756                             SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2757                             PerlIO_printf( Perl_debug_log, "%*s  %strying alternation #%d <%s> at 0x%p%s\n",
2758                                 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
2759                                 accept_buff[best].wordnum,
2760                                 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan,
2761                                 PL_colors[5] );
2762                         });
2763                         if ( best<accepted ) {
2764                             reg_trie_accepted tmp = accept_buff[ best ];
2765                             accept_buff[ best ] = accept_buff[ accepted ];
2766                             accept_buff[ accepted ] = tmp;
2767                             best = accepted;
2768                         }
2769                         PL_reginput = (char *)accept_buff[ best ].endpos;
2770
2771                         /* 
2772                            as far as I can tell we only need the SAVETMPS/FREETMPS 
2773                            for re's with EVAL in them but I'm leaving them in for 
2774                            all until I can be sure.
2775                          */
2776                         SAVETMPS;
2777                         gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2778                         FREETMPS;
2779                     }
2780                     FREETMPS;
2781                     LEAVE;
2782                 }
2783                 
2784                 if ( gotit ) {
2785                     sayYES;
2786                 } else {
2787                     sayNO;
2788                 }
2789             }
2790             /* unreached codepoint */
2791         case EXACT:
2792             s = STRING(scan);
2793             ln = STR_LEN(scan);
2794             if (do_utf8 != UTF) {
2795                 /* The target and the pattern have differing utf8ness. */
2796                 char *l = locinput;
2797                 char *e = s + ln;
2798                 STRLEN ulen;
2799
2800                 if (do_utf8) {
2801                     /* The target is utf8, the pattern is not utf8. */
2802                     while (s < e) {
2803                         if (l >= PL_regeol)
2804                              sayNO;
2805                         if (NATIVE_TO_UNI(*(U8*)s) !=
2806                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2807                                            ckWARN(WARN_UTF8) ?
2808                                            0 : UTF8_ALLOW_ANY))
2809                              sayNO;
2810                         l += ulen;
2811                         s ++;
2812                     }
2813                 }
2814                 else {
2815                     /* The target is not utf8, the pattern is utf8. */
2816                     while (s < e) {
2817                         if (l >= PL_regeol)
2818                             sayNO;
2819                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2820                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2821                                            ckWARN(WARN_UTF8) ?
2822                                            0 : UTF8_ALLOW_ANY))
2823                             sayNO;
2824                         s += ulen;
2825                         l ++;
2826                     }
2827                 }
2828                 locinput = l;
2829                 nextchr = UCHARAT(locinput);
2830                 break;
2831             }
2832             /* The target and the pattern have the same utf8ness. */
2833             /* Inline the first character, for speed. */
2834             if (UCHARAT(s) != nextchr)
2835                 sayNO;
2836             if (PL_regeol - locinput < ln)
2837                 sayNO;
2838             if (ln > 1 && memNE(s, locinput, ln))
2839                 sayNO;
2840             locinput += ln;
2841             nextchr = UCHARAT(locinput);
2842             break;
2843         case EXACTFL:
2844             PL_reg_flags |= RF_tainted;
2845             /* FALL THROUGH */
2846         case EXACTF:
2847             s = STRING(scan);
2848             ln = STR_LEN(scan);
2849
2850             if (do_utf8 || UTF) {
2851               /* Either target or the pattern are utf8. */
2852                 char *l = locinput;
2853                 char *e = PL_regeol;
2854
2855                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2856                                l, &e, 0,  do_utf8)) {
2857                      /* One more case for the sharp s:
2858                       * pack("U0U*", 0xDF) =~ /ss/i,
2859                       * the 0xC3 0x9F are the UTF-8
2860                       * byte sequence for the U+00DF. */
2861                      if (!(do_utf8 &&
2862                            toLOWER(s[0]) == 's' &&
2863                            ln >= 2 &&
2864                            toLOWER(s[1]) == 's' &&
2865                            (U8)l[0] == 0xC3 &&
2866                            e - l >= 2 &&
2867                            (U8)l[1] == 0x9F))
2868                           sayNO;
2869                 }
2870                 locinput = e;
2871                 nextchr = UCHARAT(locinput);
2872                 break;
2873             }
2874
2875             /* Neither the target and the pattern are utf8. */
2876
2877             /* Inline the first character, for speed. */
2878             if (UCHARAT(s) != nextchr &&
2879                 UCHARAT(s) != ((OP(scan) == EXACTF)
2880                                ? PL_fold : PL_fold_locale)[nextchr])
2881                 sayNO;
2882             if (PL_regeol - locinput < ln)
2883                 sayNO;
2884             if (ln > 1 && (OP(scan) == EXACTF
2885                            ? ibcmp(s, locinput, ln)
2886                            : ibcmp_locale(s, locinput, ln)))
2887                 sayNO;
2888             locinput += ln;
2889             nextchr = UCHARAT(locinput);
2890             break;
2891         case ANYOF:
2892             if (do_utf8) {
2893                 STRLEN inclasslen = PL_regeol - locinput;
2894
2895                 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2896                     sayNO_ANYOF;
2897                 if (locinput >= PL_regeol)
2898                     sayNO;
2899                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2900                 nextchr = UCHARAT(locinput);
2901                 break;
2902             }
2903             else {
2904                 if (nextchr < 0)
2905                     nextchr = UCHARAT(locinput);
2906                 if (!REGINCLASS(scan, (U8*)locinput))
2907                     sayNO_ANYOF;
2908                 if (!nextchr && locinput >= PL_regeol)
2909                     sayNO;
2910                 nextchr = UCHARAT(++locinput);
2911                 break;
2912             }
2913         no_anyof:
2914             /* If we might have the case of the German sharp s
2915              * in a casefolding Unicode character class. */
2916
2917             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2918                  locinput += SHARP_S_SKIP;
2919                  nextchr = UCHARAT(locinput);
2920             }
2921             else
2922                  sayNO;
2923             break;
2924         case ALNUML:
2925             PL_reg_flags |= RF_tainted;
2926             /* FALL THROUGH */
2927         case ALNUM:
2928             if (!nextchr)
2929                 sayNO;
2930             if (do_utf8) {
2931                 LOAD_UTF8_CHARCLASS(alnum,"a");
2932                 if (!(OP(scan) == ALNUM
2933                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2934                       : isALNUM_LC_utf8((U8*)locinput)))
2935                 {
2936                     sayNO;
2937                 }
2938                 locinput += PL_utf8skip[nextchr];
2939                 nextchr = UCHARAT(locinput);
2940                 break;
2941             }
2942             if (!(OP(scan) == ALNUM
2943                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2944                 sayNO;
2945             nextchr = UCHARAT(++locinput);
2946             break;
2947         case NALNUML:
2948             PL_reg_flags |= RF_tainted;
2949             /* FALL THROUGH */
2950         case NALNUM:
2951             if (!nextchr && locinput >= PL_regeol)
2952                 sayNO;
2953             if (do_utf8) {
2954                 LOAD_UTF8_CHARCLASS(alnum,"a");
2955                 if (OP(scan) == NALNUM
2956                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2957                     : isALNUM_LC_utf8((U8*)locinput))
2958                 {
2959                     sayNO;
2960                 }
2961                 locinput += PL_utf8skip[nextchr];
2962                 nextchr = UCHARAT(locinput);
2963                 break;
2964             }
2965             if (OP(scan) == NALNUM
2966                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2967                 sayNO;
2968             nextchr = UCHARAT(++locinput);
2969             break;
2970         case BOUNDL:
2971         case NBOUNDL:
2972             PL_reg_flags |= RF_tainted;
2973             /* FALL THROUGH */
2974         case BOUND:
2975         case NBOUND:
2976             /* was last char in word? */
2977             if (do_utf8) {
2978                 if (locinput == PL_bostr)
2979                     ln = '\n';
2980                 else {
2981                     U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2982                 
2983                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2984                 }
2985                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2986                     ln = isALNUM_uni(ln);
2987                     LOAD_UTF8_CHARCLASS(alnum,"a");
2988                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2989                 }
2990                 else {
2991                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2992                     n = isALNUM_LC_utf8((U8*)locinput);
2993                 }
2994             }
2995             else {
2996                 ln = (locinput != PL_bostr) ?
2997                     UCHARAT(locinput - 1) : '\n';
2998                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2999                     ln = isALNUM(ln);
3000                     n = isALNUM(nextchr);
3001                 }
3002                 else {
3003                     ln = isALNUM_LC(ln);
3004                     n = isALNUM_LC(nextchr);
3005                 }
3006             }
3007             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3008                                     OP(scan) == BOUNDL))
3009                     sayNO;
3010             break;
3011         case SPACEL:
3012             PL_reg_flags |= RF_tainted;
3013             /* FALL THROUGH */
3014         case SPACE:
3015             if (!nextchr)
3016                 sayNO;
3017             if (do_utf8) {
3018                 if (UTF8_IS_CONTINUED(nextchr)) {
3019                     LOAD_UTF8_CHARCLASS(space," ");
3020                     if (!(OP(scan) == SPACE
3021                           ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3022                           : isSPACE_LC_utf8((U8*)locinput)))
3023                     {
3024                         sayNO;
3025                     }
3026                     locinput += PL_utf8skip[nextchr];
3027                     nextchr = UCHARAT(locinput);
3028                     break;
3029                 }
3030                 if (!(OP(scan) == SPACE
3031                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3032                     sayNO;
3033                 nextchr = UCHARAT(++locinput);
3034             }
3035             else {
3036                 if (!(OP(scan) == SPACE
3037                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3038                     sayNO;
3039                 nextchr = UCHARAT(++locinput);
3040             }
3041             break;
3042         case NSPACEL:
3043             PL_reg_flags |= RF_tainted;
3044             /* FALL THROUGH */
3045         case NSPACE:
3046             if (!nextchr && locinput >= PL_regeol)
3047                 sayNO;
3048             if (do_utf8) {
3049                 LOAD_UTF8_CHARCLASS(space," ");
3050                 if (OP(scan) == NSPACE
3051                     ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3052                     : isSPACE_LC_utf8((U8*)locinput))
3053                 {
3054                     sayNO;
3055                 }
3056                 locinput += PL_utf8skip[nextchr];
3057                 nextchr = UCHARAT(locinput);
3058                 break;
3059             }
3060             if (OP(scan) == NSPACE
3061                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3062                 sayNO;
3063             nextchr = UCHARAT(++locinput);
3064             break;
3065         case DIGITL:
3066             PL_reg_flags |= RF_tainted;
3067             /* FALL THROUGH */
3068         case DIGIT:
3069             if (!nextchr)
3070                 sayNO;
3071             if (do_utf8) {
3072                 LOAD_UTF8_CHARCLASS(digit,"0");
3073                 if (!(OP(scan) == DIGIT
3074                       ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3075                       : isDIGIT_LC_utf8((U8*)locinput)))
3076                 {
3077                     sayNO;
3078                 }
3079                 locinput += PL_utf8skip[nextchr];
3080                 nextchr = UCHARAT(locinput);
3081                 break;
3082             }
3083             if (!(OP(scan) == DIGIT
3084                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3085                 sayNO;
3086             nextchr = UCHARAT(++locinput);
3087             break;
3088         case NDIGITL:
3089             PL_reg_flags |= RF_tainted;
3090             /* FALL THROUGH */
3091         case NDIGIT:
3092             if (!nextchr && locinput >= PL_regeol)
3093                 sayNO;
3094             if (do_utf8) {
3095                 LOAD_UTF8_CHARCLASS(digit,"0");
3096                 if (OP(scan) == NDIGIT
3097                     ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3098                     : isDIGIT_LC_utf8((U8*)locinput))
3099                 {
3100                     sayNO;
3101                 }
3102                 locinput += PL_utf8skip[nextchr];
3103                 nextchr = UCHARAT(locinput);
3104                 break;
3105             }
3106             if (OP(scan) == NDIGIT
3107                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3108                 sayNO;
3109             nextchr = UCHARAT(++locinput);
3110             break;
3111         case CLUMP:
3112             if (locinput >= PL_regeol)
3113                 sayNO;
3114             if  (do_utf8) {
3115                 LOAD_UTF8_CHARCLASS(mark,"~");
3116                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3117                     sayNO;
3118                 locinput += PL_utf8skip[nextchr];
3119                 while (locinput < PL_regeol &&
3120                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3121                     locinput += UTF8SKIP(locinput);
3122                 if (locinput > PL_regeol)
3123                     sayNO;
3124             } 
3125             else
3126                locinput++;
3127             nextchr = UCHARAT(locinput);
3128             break;
3129         case REFFL:
3130             PL_reg_flags |= RF_tainted;
3131             /* FALL THROUGH */
3132         case REF:
3133         case REFF:
3134             n = ARG(scan);  /* which paren pair */
3135             ln = PL_regstartp[n];
3136             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3137             if ((I32)*PL_reglastparen < n || ln == -1)
3138                 sayNO;                  /* Do not match unless seen CLOSEn. */
3139             if (ln == PL_regendp[n])
3140                 break;
3141
3142             s = PL_bostr + ln;
3143             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
3144                 char *l = locinput;
3145                 char *e = PL_bostr + PL_regendp[n];
3146                 /*
3147                  * Note that we can't do the "other character" lookup trick as
3148                  * in the 8-bit case (no pun intended) because in Unicode we
3149                  * have to map both upper and title case to lower case.
3150                  */
3151                 if (OP(scan) == REFF) {
3152                     STRLEN ulen1, ulen2;
3153                     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3154                     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3155                     while (s < e) {
3156                         if (l >= PL_regeol)
3157                             sayNO;
3158                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3159                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3160                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3161                             sayNO;
3162                         s += ulen1;
3163                         l += ulen2;
3164                     }
3165                 }
3166                 locinput = l;
3167                 nextchr = UCHARAT(locinput);
3168                 break;
3169             }
3170
3171             /* Inline the first character, for speed. */
3172             if (UCHARAT(s) != nextchr &&
3173                 (OP(scan) == REF ||
3174                  (UCHARAT(s) != ((OP(scan) == REFF
3175                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3176                 sayNO;
3177             ln = PL_regendp[n] - ln;
3178             if (locinput + ln > PL_regeol)
3179                 sayNO;
3180             if (ln > 1 && (OP(scan) == REF
3181                            ? memNE(s, locinput, ln)
3182                            : (OP(scan) == REFF
3183                               ? ibcmp(s, locinput, ln)
3184                               : ibcmp_locale(s, locinput, ln))))
3185                 sayNO;
3186             locinput += ln;
3187             nextchr = UCHARAT(locinput);
3188             break;
3189
3190         case NOTHING:
3191         case TAIL:
3192             break;
3193         case BACK:
3194             break;
3195         case EVAL:
3196         {
3197             dSP;
3198             OP_4tree *oop = PL_op;
3199             COP *ocurcop = PL_curcop;
3200             PAD *old_comppad;
3201             SV *ret;
3202             struct regexp *oreg = PL_reg_re;
3203         
3204             n = ARG(scan);
3205             PL_op = (OP_4tree*)PL_regdata->data[n];
3206             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3207             PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
3208             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3209
3210             {
3211                 SV **before = SP;
3212                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3213                 SPAGAIN;
3214                 if (SP == before)
3215                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3216                 else {
3217                     ret = POPs;
3218                     PUTBACK;
3219                 }
3220             }
3221
3222             PL_op = oop;
3223             PAD_RESTORE_LOCAL(old_comppad);
3224             PL_curcop = ocurcop;
3225             if (logical) {
3226                 if (logical == 2) {     /* Postponed subexpression. */
3227                     regexp *re;
3228                     MAGIC *mg = Null(MAGIC*);
3229                     re_cc_state state;
3230                     CHECKPOINT cp, lastcp;
3231                     int toggleutf;
3232                     register SV *sv;
3233
3234                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3235                         mg = mg_find(sv, PERL_MAGIC_qr);
3236                     else if (SvSMAGICAL(ret)) {
3237                         if (SvGMAGICAL(ret))
3238                             sv_unmagic(ret, PERL_MAGIC_qr);
3239                         else
3240                             mg = mg_find(ret, PERL_MAGIC_qr);
3241                     }
3242
3243                     if (mg) {
3244                         re = (regexp *)mg->mg_obj;
3245                         (void)ReREFCNT_inc(re);
3246                     }
3247                     else {
3248                         STRLEN len;
3249                         char *t = SvPV(ret, len);
3250                         PMOP pm;
3251                         char *oprecomp = PL_regprecomp;
3252                         I32 osize = PL_regsize;
3253                         I32 onpar = PL_regnpar;
3254
3255                         Zero(&pm, 1, PMOP);
3256                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3257                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3258                         if (!(SvFLAGS(ret)
3259                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3260                                 | SVs_GMG)))
3261                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3262                                         PERL_MAGIC_qr,0,0);
3263                         PL_regprecomp = oprecomp;
3264                         PL_regsize = osize;
3265                         PL_regnpar = onpar;
3266                     }
3267                     DEBUG_EXECUTE_r(
3268                         PerlIO_printf(Perl_debug_log,
3269                                       "Entering embedded `%s%.60s%s%s'\n",
3270                                       PL_colors[0],
3271                                       re->precomp,
3272                                       PL_colors[1],
3273                                       (strlen(re->precomp) > 60 ? "..." : ""))
3274                         );
3275                     state.node = next;
3276                     state.prev = PL_reg_call_cc;
3277                     state.cc = PL_regcc;
3278                     state.re = PL_reg_re;
3279
3280                     PL_regcc = 0;
3281                 
3282                     cp = regcppush(0);  /* Save *all* the positions. */
3283                     REGCP_SET(lastcp);
3284                     cache_re(re);
3285                     state.ss = PL_savestack_ix;
3286                     *PL_reglastparen = 0;
3287                     *PL_reglastcloseparen = 0;
3288                     PL_reg_call_cc = &state;
3289                     PL_reginput = locinput;
3290                     toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3291                                 ((re->reganch & ROPT_UTF8) != 0);
3292                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3293
3294                     /* XXXX This is too dramatic a measure... */
3295                     PL_reg_maxiter = 0;
3296
3297                     if (regmatch(re->program + 1)) {
3298                         /* Even though we succeeded, we need to restore
3299                            global variables, since we may be wrapped inside
3300                            SUSPEND, thus the match may be not finished yet. */
3301
3302                         /* XXXX Do this only if SUSPENDed? */
3303                         PL_reg_call_cc = state.prev;
3304                         PL_regcc = state.cc;
3305                         PL_reg_re = state.re;
3306                         cache_re(PL_reg_re);
3307                         if (toggleutf) PL_reg_flags ^= RF_utf8;
3308
3309                         /* XXXX This is too dramatic a measure... */
3310                         PL_reg_maxiter = 0;
3311
3312                         /* These are needed even if not SUSPEND. */
3313                         ReREFCNT_dec(re);
3314                         regcpblow(cp);
3315                         sayYES;
3316                     }
3317                     ReREFCNT_dec(re);
3318                     REGCP_UNWIND(lastcp);
3319                     regcppop();
3320                     PL_reg_call_cc = state.prev;
3321                     PL_regcc = state.cc;
3322                     PL_reg_re = state.re;
3323                     cache_re(PL_reg_re);
3324                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3325
3326                     /* XXXX This is too dramatic a measure... */
3327                     PL_reg_maxiter = 0;
3328
3329                     logical = 0;
3330                     sayNO;
3331                 }
3332                 sw = SvTRUE(ret);
3333                 logical = 0;
3334             }
3335             else {
3336                 sv_setsv(save_scalar(PL_replgv), ret);
3337                 cache_re(oreg);
3338             }
3339             break;
3340         }
3341         case OPEN:
3342             n = ARG(scan);  /* which paren pair */
3343             PL_reg_start_tmp[n] = locinput;
3344             if (n > PL_regsize)
3345                 PL_regsize = n;
3346             break;
3347         case CLOSE:
3348             n = ARG(scan);  /* which paren pair */
3349             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3350             PL_regendp[n] = locinput - PL_bostr;
3351             if (n > (I32)*PL_reglastparen)
3352                 *PL_reglastparen = n;
3353             *PL_reglastcloseparen = n;
3354             break;
3355         case GROUPP:
3356             n = ARG(scan);  /* which paren pair */
3357             sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3358             break;
3359         case IFTHEN:
3360             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3361             if (sw)
3362                 next = NEXTOPER(NEXTOPER(scan));
3363             else {
3364                 next = scan + ARG(scan);
3365                 if (OP(next) == IFTHEN) /* Fake one. */
3366                     next = NEXTOPER(NEXTOPER(next));
3367             }
3368             break;
3369         case LOGICAL:
3370             logical = scan->flags;
3371             break;
3372 /*******************************************************************
3373  PL_regcc contains infoblock about the innermost (...)* loop, and
3374  a pointer to the next outer infoblock.
3375
3376  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3377
3378    1) After matching X, regnode for CURLYX is processed;
3379
3380    2) This regnode creates infoblock on the stack, and calls
3381       regmatch() recursively with the starting point at WHILEM node;
3382
3383    3) Each hit of WHILEM node tries to match A and Z (in the order
3384       depending on the current iteration, min/max of {min,max} and
3385       greediness).  The information about where are nodes for "A"
3386       and "Z" is read from the infoblock, as is info on how many times "A"
3387       was already matched, and greediness.
3388
3389    4) After A matches, the same WHILEM node is hit again.
3390
3391    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3392       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3393       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3394       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3395       of the external loop.
3396
3397  Currently present infoblocks form a tree with a stem formed by PL_curcc
3398  and whatever it mentions via ->next, and additional attached trees
3399  corresponding to temporarily unset infoblocks as in "5" above.
3400
3401  In the following picture infoblocks for outer loop of
3402  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3403  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3404  infoblocks are drawn below the "reset" infoblock.
3405
3406  In fact in the picture below we do not show failed matches for Z and T
3407  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3408  more obvious *why* one needs to *temporary* unset infoblocks.]
3409
3410   Matched       REx position    InfoBlocks      Comment
3411                 (Y(A)*?Z)*?T    x
3412                 Y(A)*?Z)*?T     x <- O
3413   Y             (A)*?Z)*?T      x <- O
3414   Y             A)*?Z)*?T       x <- O <- I
3415   YA            )*?Z)*?T        x <- O <- I
3416   YA            A)*?Z)*?T       x <- O <- I
3417   YAA           )*?Z)*?T        x <- O <- I
3418   YAA           Z)*?T           x <- O          # Temporary unset I
3419                                      I
3420
3421   YAAZ          Y(A)*?Z)*?T     x <- O
3422                                      I
3423
3424   YAAZY         (A)*?Z)*?T      x <- O
3425                                      I
3426
3427   YAAZY         A)*?Z)*?T       x <- O <- I
3428                                      I
3429
3430   YAAZYA        )*?Z)*?T        x <- O <- I     
3431                                      I
3432
3433   YAAZYA        Z)*?T           x <- O          # Temporary unset I
3434                                      I,I
3435
3436   YAAZYAZ       )*?T            x <- O
3437                                      I,I
3438
3439   YAAZYAZ       T               x               # Temporary unset O
3440                                 O
3441                                 I,I
3442
3443   YAAZYAZT                      x
3444                                 O
3445                                 I,I
3446  *******************************************************************/
3447         case CURLYX: {
3448                 CURCUR cc;
3449                 CHECKPOINT cp = PL_savestack_ix;
3450                 /* No need to save/restore up to this paren */
3451                 I32 parenfloor = scan->flags;
3452
3453                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3454                     next += ARG(next);
3455                 cc.oldcc = PL_regcc;
3456                 PL_regcc = &cc;
3457                 /* XXXX Probably it is better to teach regpush to support
3458                    parenfloor > PL_regsize... */
3459                 if (parenfloor > (I32)*PL_reglastparen)
3460                     parenfloor = *PL_reglastparen; /* Pessimization... */
3461                 cc.parenfloor = parenfloor;
3462                 cc.cur = -1;
3463                 cc.min = ARG1(scan);
3464                 cc.max  = ARG2(scan);
3465                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3466                 cc.next = next;
3467                 cc.minmod = minmod;
3468                 cc.lastloc = 0;
3469                 PL_reginput = locinput;
3470                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3471                 regcpblow(cp);
3472                 PL_regcc = cc.oldcc;
3473                 saySAME(n);
3474             }
3475             /* NOT REACHED */
3476         case WHILEM: {
3477                 /*
3478                  * This is really hard to understand, because after we match
3479                  * what we're trying to match, we must make sure the rest of
3480                  * the REx is going to match for sure, and to do that we have
3481                  * to go back UP the parse tree by recursing ever deeper.  And
3482                  * if it fails, we have to reset our parent's current state
3483                  * that we can try again after backing off.
3484                  */
3485
3486                 CHECKPOINT cp, lastcp;
3487                 CURCUR* cc = PL_regcc;
3488                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3489                 I32 cache_offset = 0, cache_bit = 0;
3490                 
3491                 n = cc->cur + 1;        /* how many we know we matched */
3492                 PL_reginput = locinput;
3493
3494                 DEBUG_EXECUTE_r(
3495                     PerlIO_printf(Perl_debug_log,
3496                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3497                                   REPORT_CODE_OFF+PL_regindent*2, "",
3498                                   (long)n, (long)cc->min,
3499                                   (long)cc->max, PTR2UV(cc))
3500                     );
3501
3502                 /* If degenerate scan matches "", assume scan done. */
3503
3504                 if (locinput == cc->lastloc && n >= cc->min) {
3505                     PL_regcc = cc->oldcc;
3506                     if (PL_regcc)
3507                         ln = PL_regcc->cur;
3508                     DEBUG_EXECUTE_r(
3509                         PerlIO_printf(Perl_debug_log,
3510                            "%*s  empty match detected, try continuation...\n",
3511                            REPORT_CODE_OFF+PL_regindent*2, "")
3512                         );
3513                     if (regmatch(cc->next))
3514                         sayYES;
3515                     if (PL_regcc)
3516                         PL_regcc->cur = ln;
3517                     PL_regcc = cc;
3518                     sayNO;
3519                 }
3520
3521                 /* First just match a string of min scans. */
3522
3523                 if (n < cc->min) {
3524                     cc->cur = n;
3525                     cc->lastloc = locinput;
3526                     if (regmatch(cc->scan))
3527                         sayYES;
3528                     cc->cur = n - 1;
3529                     cc->lastloc = lastloc;
3530                     sayNO;
3531                 }
3532
3533                 if (scan->flags) {
3534                     /* Check whether we already were at this position.
3535                         Postpone detection until we know the match is not
3536                         *that* much linear. */
3537                 if (!PL_reg_maxiter) {
3538                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3539                     PL_reg_leftiter = PL_reg_maxiter;
3540                 }
3541                 if (PL_reg_leftiter-- == 0) {
3542                     I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
3543                     if (PL_reg_poscache) {
3544                         if ((I32)PL_reg_poscache_size < size) {
3545                             Renew(PL_reg_poscache, size, char);
3546                             PL_reg_poscache_size = size;
3547                         }
3548                         Zero(PL_reg_poscache, size, char);
3549                     }
3550                     else {
3551                         PL_reg_poscache_size = size;
3552                         Newz(29, PL_reg_poscache, size, char);
3553                     }
3554                     DEBUG_EXECUTE_r(
3555                         PerlIO_printf(Perl_debug_log,
3556               "%sDetected a super-linear match, switching on caching%s...\n",
3557                                       PL_colors[4], PL_colors[5])
3558                         );
3559                 }
3560                 if (PL_reg_leftiter < 0) {
3561                     cache_offset = locinput - PL_bostr;
3562
3563                     cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3564                             + cache_offset * (scan->flags>>4);
3565                     cache_bit = cache_offset % 8;
3566                     cache_offset /= 8;
3567                     if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
3568                     DEBUG_EXECUTE_r(
3569                         PerlIO_printf(Perl_debug_log,
3570                                       "%*s  already tried at this position...\n",
3571                                       REPORT_CODE_OFF+PL_regindent*2, "")
3572                         );
3573                         if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3574                             /* cache records success */
3575                             sayYES;
3576                         else
3577                             /* cache records failure */
3578                             sayNO_SILENT;
3579                     }
3580                     PL_reg_poscache[cache_offset] |= (1<<cache_bit);
3581                 }
3582                 }
3583
3584                 /* Prefer next over scan for minimal matching. */
3585
3586                 if (cc->minmod) {
3587                     PL_regcc = cc->oldcc;
3588                     if (PL_regcc)
3589                         ln = PL_regcc->cur;
3590                     cp = regcppush(cc->parenfloor);
3591                     REGCP_SET(lastcp);
3592                     if (regmatch(cc->next)) {
3593                         regcpblow(cp);
3594                         CACHEsayYES;    /* All done. */
3595                     }
3596                     REGCP_UNWIND(lastcp);
3597                     regcppop();
3598                     if (PL_regcc)
3599                         PL_regcc->cur = ln;
3600                     PL_regcc = cc;
3601
3602                     if (n >= cc->max) { /* Maximum greed exceeded? */
3603                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3604                             && !(PL_reg_flags & RF_warned)) {
3605                             PL_reg_flags |= RF_warned;
3606                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3607                                  "Complex regular subexpression recursion",
3608                                  REG_INFTY - 1);
3609                         }
3610                         CACHEsayNO;
3611                     }
3612
3613                     DEBUG_EXECUTE_r(
3614                         PerlIO_printf(Perl_debug_log,
3615                                       "%*s  trying longer...\n",
3616                                       REPORT_CODE_OFF+PL_regindent*2, "")
3617                         );
3618                     /* Try scanning more and see if it helps. */
3619                     PL_reginput = locinput;
3620                     cc->cur = n;
3621                     cc->lastloc = locinput;
3622                     cp = regcppush(cc->parenfloor);
3623                     REGCP_SET(lastcp);
3624                     if (regmatch(cc->scan)) {
3625                         regcpblow(cp);
3626                         CACHEsayYES;
3627                     }
3628                     REGCP_UNWIND(lastcp);
3629                     regcppop();
3630                     cc->cur = n - 1;
3631                     cc->lastloc = lastloc;
3632                     CACHEsayNO;
3633                 }
3634
3635                 /* Prefer scan over next for maximal matching. */
3636
3637                 if (n < cc->max) {      /* More greed allowed? */
3638                     cp = regcppush(cc->parenfloor);
3639                     cc->cur = n;
3640                     cc->lastloc = locinput;
3641                     REGCP_SET(lastcp);
3642                     if (regmatch(cc->scan)) {
3643                         regcpblow(cp);
3644                         CACHEsayYES;
3645                     }
3646                     REGCP_UNWIND(lastcp);
3647                     regcppop();         /* Restore some previous $<digit>s? */
3648                     PL_reginput = locinput;
3649                     DEBUG_EXECUTE_r(
3650                         PerlIO_printf(Perl_debug_log,
3651                                       "%*s  failed, try continuation...\n",
3652                                       REPORT_CODE_OFF+PL_regindent*2, "")
3653                         );
3654                 }
3655                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3656                         && !(PL_reg_flags & RF_warned)) {
3657                     PL_reg_flags |= RF_warned;
3658                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3659                          "Complex regular subexpression recursion",
3660                          REG_INFTY - 1);
3661                 }
3662
3663                 /* Failed deeper matches of scan, so see if this one works. */
3664                 PL_regcc = cc->oldcc;
3665                 if (PL_regcc)
3666                     ln = PL_regcc->cur;
3667                 if (regmatch(cc->next))
3668                     CACHEsayYES;
3669                 if (PL_regcc)
3670                     PL_regcc->cur = ln;
3671                 PL_regcc = cc;
3672                 cc->cur = n - 1;
3673                 cc->lastloc = lastloc;
3674                 CACHEsayNO;
3675             }
3676             /* NOT REACHED */
3677         case BRANCHJ:
3678             next = scan + ARG(scan);
3679             if (next == scan)
3680                 next = NULL;
3681             inner = NEXTOPER(NEXTOPER(scan));
3682             goto do_branch;
3683         case BRANCH:
3684             inner = NEXTOPER(scan);
3685           do_branch:
3686             {
3687                 c1 = OP(scan);
3688                 if (OP(next) != c1)     /* No choice. */
3689                     next = inner;       /* Avoid recursion. */
3690                 else {
3691                     I32 lastparen = *PL_reglastparen;
3692                     I32 unwind1;
3693                     re_unwind_branch_t *uw;
3694
3695                     /* Put unwinding data on stack */
3696                     unwind1 = SSNEWt(1,re_unwind_branch_t);
3697                     uw = SSPTRt(unwind1,re_unwind_branch_t);
3698                     uw->prev = unwind;
3699                     unwind = unwind1;
3700                     uw->type = ((c1 == BRANCH)
3701                                 ? RE_UNWIND_BRANCH
3702                                 : RE_UNWIND_BRANCHJ);
3703                     uw->lastparen = lastparen;
3704                     uw->next = next;
3705                     uw->locinput = locinput;
3706                     uw->nextchr = nextchr;
3707 #ifdef DEBUGGING
3708                     uw->regindent = ++PL_regindent;
3709 #endif
3710
3711                     REGCP_SET(uw->lastcp);
3712
3713                     /* Now go into the first branch */
3714                     next = inner;
3715                 }
3716             }
3717             break;
3718         case MINMOD:
3719             minmod = 1;
3720             break;
3721         case CURLYM:
3722         {
3723             I32 l = 0;
3724             CHECKPOINT lastcp;
3725         
3726             /* We suppose that the next guy does not need
3727                backtracking: in particular, it is of constant non-zero length,
3728                and has no parenths to influence future backrefs. */
3729             ln = ARG1(scan);  /* min to match */
3730             n  = ARG2(scan);  /* max to match */
3731             paren = scan->flags;
3732             if (paren) {
3733                 if (paren > PL_regsize)
3734                     PL_regsize = paren;
3735                 if (paren > (I32)*PL_reglastparen)
3736                     *PL_reglastparen = paren;
3737             }
3738             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3739             if (paren)
3740                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3741             PL_reginput = locinput;
3742             if (minmod) {
3743                 minmod = 0;
3744                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3745                     sayNO;
3746                 locinput = PL_reginput;
3747                 if (HAS_TEXT(next) || JUMPABLE(next)) {
3748                     regnode *text_node = next;
3749
3750                     if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3751
3752                     if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3753                     else {
3754                         if (PL_regkind[(U8)OP(text_node)] == REF) {
3755                             c1 = c2 = -1000;
3756                             goto assume_ok_MM;
3757                         }
3758                         else { c1 = (U8)*STRING(text_node); }
3759                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3760                             c2 = PL_fold[c1];
3761                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3762                             c2 = PL_fold_locale[c1];
3763                         else
3764                             c2 = c1;
3765                     }
3766                 }
3767                 else
3768                     c1 = c2 = -1000;
3769             assume_ok_MM:
3770                 REGCP_SET(lastcp);
3771                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3772                     /* If it could work, try it. */
3773                     if (c1 == -1000 ||
3774                         UCHARAT(PL_reginput) == c1 ||
3775                         UCHARAT(PL_reginput) == c2)
3776                     {
3777                         if (paren) {
3778                             if (ln) {
3779                                 PL_regstartp[paren] =
3780                                     HOPc(PL_reginput, -l) - PL_bostr;
3781                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3782                             }
3783                             else
3784                                 PL_regendp[paren] = -1;
3785                         }
3786                         if (regmatch(next))
3787                             sayYES;
3788                         REGCP_UNWIND(lastcp);
3789                     }
3790                     /* Couldn't or didn't -- move forward. */
3791                     PL_reginput = locinput;
3792                     if (regrepeat_hard(scan, 1, &l)) {
3793                         ln++;
3794                         locinput = PL_reginput;
3795                     }
3796                     else
3797                         sayNO;
3798                 }
3799             }
3800             else {
3801                 n = regrepeat_hard(scan, n, &l);
3802                 locinput = PL_reginput;
3803                 DEBUG_EXECUTE_r(
3804                     PerlIO_printf(Perl_debug_log,
3805                                   "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3806                                   (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3807                                   (IV) n, (IV)l)
3808                     );
3809                 if (n >= ln) {
3810                     if (HAS_TEXT(next) || JUMPABLE(next)) {
3811                         regnode *text_node = next;
3812
3813                         if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3814
3815                         if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3816                         else {
3817                             if (PL_regkind[(U8)OP(text_node)] == REF) {
3818                                 c1 = c2 = -1000;
3819                                 goto assume_ok_REG;
3820                             }
3821                             else { c1 = (U8)*STRING(text_node); }
3822
3823                             if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3824                                 c2 = PL_fold[c1];
3825                             else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3826                                 c2 = PL_fold_locale[c1];
3827                             else
3828                                 c2 = c1;
3829                         }
3830                     }
3831                     else
3832                         c1 = c2 = -1000;
3833                 }
3834             assume_ok_REG:
3835                 REGCP_SET(lastcp);
3836                 while (n >= ln) {
3837                     /* If it could work, try it. */
3838                     if (c1 == -1000 ||
3839                         UCHARAT(PL_reginput) == c1 ||
3840                         UCHARAT(PL_reginput) == c2)
3841                     {
3842                         DEBUG_EXECUTE_r(
3843                                 PerlIO_printf(Perl_debug_log,
3844                                               "%*s  trying tail with n=%"IVdf"...\n",
3845                                               (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3846                             );
3847                         if (paren) {
3848                             if (n) {
3849                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3850                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3851                             }
3852                             else
3853                                 PL_regendp[paren] = -1;
3854                         }
3855                         if (regmatch(next))
3856                             sayYES;
3857                         REGCP_UNWIND(lastcp);
3858                     }
3859                     /* Couldn't or didn't -- back up. */
3860                     n--;
3861                     locinput = HOPc(locinput, -l);
3862                     PL_reginput = locinput;
3863                 }
3864             }
3865             sayNO;
3866             break;
3867         }
3868         case CURLYN:
3869             paren = scan->flags;        /* Which paren to set */
3870             if (paren > PL_regsize)
3871                 PL_regsize = paren;
3872             if (paren > (I32)*PL_reglastparen)
3873                 *PL_reglastparen = paren;
3874             ln = ARG1(scan);  /* min to match */
3875             n  = ARG2(scan);  /* max to match */
3876             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3877             goto repeat;
3878         case CURLY:
3879             paren = 0;
3880             ln = ARG1(scan);  /* min to match */
3881             n  = ARG2(scan);  /* max to match */
3882             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3883             goto repeat;
3884         case STAR:
3885             ln = 0;
3886             n = REG_INFTY;
3887             scan = NEXTOPER(scan);
3888             paren = 0;
3889             goto repeat;
3890         case PLUS:
3891             ln = 1;
3892             n = REG_INFTY;
3893             scan = NEXTOPER(scan);
3894             paren = 0;
3895           repeat:
3896             /*
3897             * Lookahead to avoid useless match attempts
3898             * when we know what character comes next.
3899             */
3900
3901             /*
3902             * Used to only do .*x and .*?x, but now it allows
3903             * for )'s, ('s and (?{ ... })'s to be in the way
3904             * of the quantifier and the EXACT-like node.  -- japhy
3905             */
3906
3907             if (HAS_TEXT(next) || JUMPABLE(next)) {
3908                 U8 *s;
3909                 regnode *text_node = next;
3910
3911                 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3912
3913                 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3914                 else {
3915                     if (PL_regkind[(U8)OP(text_node)] == REF) {
3916                         c1 = c2 = -1000;
3917                         goto assume_ok_easy;
3918                     }
3919                     else { s = (U8*)STRING(text_node); }
3920
3921                     if (!UTF) {
3922                         c2 = c1 = *s;
3923                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3924                             c2 = PL_fold[c1];
3925                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3926                             c2 = PL_fold_locale[c1];
3927                     }
3928                     else { /* UTF */
3929                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3930                              STRLEN ulen1, ulen2;
3931                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3932                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3933
3934                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3935                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3936
3937                              c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3938                                                  ckWARN(WARN_UTF8) ?
3939                                                  0 : UTF8_ALLOW_ANY);
3940                              c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3941                                                  ckWARN(WARN_UTF8) ?
3942                                                  0 : UTF8_ALLOW_ANY);
3943                         }
3944                         else {
3945                             c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3946                                                      ckWARN(WARN_UTF8) ?
3947                                                      0 : UTF8_ALLOW_ANY);
3948                         }
3949                     }
3950                 }
3951             }
3952             else
3953                 c1 = c2 = -1000;
3954         assume_ok_easy:
3955             PL_reginput = locinput;
3956             if (minmod) {
3957                 CHECKPOINT lastcp;
3958                 minmod = 0;
3959                 if (ln && regrepeat(scan, ln) < ln)
3960                     sayNO;
3961                 locinput = PL_reginput;
3962                 REGCP_SET(lastcp);
3963                 if (c1 != -1000) {
3964                     char *e; /* Should not check after this */
3965                     char *old = locinput;
3966                     int count = 0;
3967
3968                     if  (n == REG_INFTY) {
3969                         e = PL_regeol - 1;
3970                         if (do_utf8)
3971                             while (UTF8_IS_CONTINUATION(*(U8*)e))
3972                                 e--;
3973                     }
3974                     else if (do_utf8) {
3975                         int m = n - ln;
3976                         for (e = locinput;
3977                              m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3978                             e += UTF8SKIP(e);
3979                     }
3980                     else {
3981                         e = locinput + n - ln;
3982                         if (e >= PL_regeol)
3983                             e = PL_regeol - 1;
3984                     }
3985                     while (1) {
3986                         /* Find place 'next' could work */
3987                         if (!do_utf8) {
3988                             if (c1 == c2) {
3989                                 while (locinput <= e &&
3990                                        UCHARAT(locinput) != c1)
3991                                     locinput++;
3992                             } else {
3993                                 while (locinput <= e
3994                                        && UCHARAT(locinput) != c1
3995                                        && UCHARAT(locinput) != c2)
3996                                     locinput++;
3997                             }
3998                             count = locinput - old;
3999                         }
4000                         else {
4001                             STRLEN len;
4002                             if (c1 == c2) {
4003                                 /* count initialised to
4004                                  * utf8_distance(old, locinput) */
4005                                 while (locinput <= e &&
4006                                        utf8n_to_uvchr((U8*)locinput,
4007                                                       UTF8_MAXBYTES, &len,
4008                                                       ckWARN(WARN_UTF8) ?
4009                                                       0 : UTF8_ALLOW_ANY) != (UV)c1) {
4010                                     locinput += len;
4011                                     count++;
4012                                 }
4013                             } else {
4014                                 /* count initialised to
4015                                  * utf8_distance(old, locinput) */
4016                                 while (locinput <= e) {
4017                                     UV c = utf8n_to_uvchr((U8*)locinput,
4018                                                           UTF8_MAXBYTES, &len,
4019                                                           ckWARN(WARN_UTF8) ?
4020                                                           0 : UTF8_ALLOW_ANY);
4021                                     if (c == (UV)c1 || c == (UV)c2)
4022                                         break;
4023                                     locinput += len;
4024                                     count++;
4025                                 }
4026                             }
4027                         }
4028                         if (locinput > e)
4029                             sayNO;
4030                         /* PL_reginput == old now */
4031                         if (locinput != old) {
4032                             ln = 1;     /* Did some */
4033                             if (regrepeat(scan, count) < count)
4034                                 sayNO;
4035                         }
4036                         /* PL_reginput == locinput now */
4037                         TRYPAREN(paren, ln, locinput);
4038                         PL_reginput = locinput; /* Could be reset... */
4039                         REGCP_UNWIND(lastcp);
4040                         /* Couldn't or didn't -- move forward. */
4041                         old = locinput;
4042                         if (do_utf8)
4043                             locinput += UTF8SKIP(locinput);
4044                         else
4045                             locinput++;
4046                         count = 1;
4047                     }
4048                 }
4049                 else
4050                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
4051                     UV c;
4052                     if (c1 != -1000) {
4053                         if (do_utf8)
4054                             c = utf8n_to_uvchr((U8*)PL_reginput,
4055                                                UTF8_MAXBYTES, 0,
4056                                                ckWARN(WARN_UTF8) ?
4057                                                0 : UTF8_ALLOW_ANY);
4058                         else
4059                             c = UCHARAT(PL_reginput);
4060                         /* If it could work, try it. */
4061                         if (c == (UV)c1 || c == (UV)c2)
4062                         {
4063                             TRYPAREN(paren, ln, PL_reginput);
4064                             REGCP_UNWIND(lastcp);
4065                         }
4066                     }
4067                     /* If it could work, try it. */
4068                     else if (c1 == -1000)
4069                     {
4070                         TRYPAREN(paren, ln, PL_reginput);
4071                         REGCP_UNWIND(lastcp);
4072                     }
4073                     /* Couldn't or didn't -- move forward. */
4074                     PL_reginput = locinput;
4075                     if (regrepeat(scan, 1)) {
4076                         ln++;
4077                         locinput = PL_reginput;
4078                     }
4079                     else
4080                         sayNO;
4081                 }
4082             }
4083             else {
4084                 CHECKPOINT lastcp;
4085                 n = regrepeat(scan, n);
4086                 locinput = PL_reginput;
4087                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
4088                     (OP(next) != MEOL ||
4089                         OP(next) == SEOL || OP(next) == EOS))
4090                 {
4091                     ln = n;                     /* why back off? */
4092                     /* ...because $ and \Z can match before *and* after
4093                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4094                        We should back off by one in this case. */
4095                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4096                         ln--;
4097                 }
4098                 REGCP_SET(lastcp);
4099                 if (paren) {
4100                     UV c = 0;
4101                     while (n >= ln) {
4102                         if (c1 != -1000) {
4103                             if (do_utf8)
4104                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4105                                                    UTF8_MAXBYTES, 0,
4106                                                    ckWARN(WARN_UTF8) ?
4107                                                    0 : UTF8_ALLOW_ANY);
4108                             else
4109                                 c = UCHARAT(PL_reginput);
4110                         }
4111                         /* If it could work, try it. */
4112                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4113                             {
4114                                 TRYPAREN(paren, n, PL_reginput);
4115                                 REGCP_UNWIND(lastcp);
4116                             }
4117                         /* Couldn't or didn't -- back up. */
4118                         n--;
4119                         PL_reginput = locinput = HOPc(locinput, -1);
4120                     }
4121                 }
4122                 else {
4123                     UV c = 0;
4124                     while (n >= ln) {
4125                         if (c1 != -1000) {
4126                             if (do_utf8)
4127                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4128                                                    UTF8_MAXBYTES, 0,
4129                                                    ckWARN(WARN_UTF8) ?
4130                                                    0 : UTF8_ALLOW_ANY);
4131                             else
4132                                 c = UCHARAT(PL_reginput);
4133                         }
4134                         /* If it could work, try it. */
4135                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4136                             {
4137                                 TRYPAREN(paren, n, PL_reginput);
4138                                 REGCP_UNWIND(lastcp);
4139                             }
4140                         /* Couldn't or didn't -- back up. */
4141                         n--;
4142                         PL_reginput = locinput = HOPc(locinput, -1);
4143                     }
4144                 }
4145             }
4146             sayNO;
4147             break;
4148         case END:
4149             if (PL_reg_call_cc) {
4150                 re_cc_state *cur_call_cc = PL_reg_call_cc;
4151                 CURCUR *cctmp = PL_regcc;
4152                 regexp *re = PL_reg_re;
4153                 CHECKPOINT cp, lastcp;
4154                 
4155                 cp = regcppush(0);      /* Save *all* the positions. */
4156                 REGCP_SET(lastcp);
4157                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4158                                                     the caller. */
4159                 PL_reginput = locinput; /* Make position available to
4160                                            the callcc. */
4161                 cache_re(PL_reg_call_cc->re);
4162                 PL_regcc = PL_reg_call_cc->cc;
4163                 PL_reg_call_cc = PL_reg_call_cc->prev;
4164                 if (regmatch(cur_call_cc->node)) {
4165                     PL_reg_call_cc = cur_call_cc;
4166                     regcpblow(cp);
4167                     sayYES;
4168                 }
4169                 REGCP_UNWIND(lastcp);
4170                 regcppop();
4171                 PL_reg_call_cc = cur_call_cc;
4172                 PL_regcc = cctmp;
4173                 PL_reg_re = re;
4174                 cache_re(re);
4175
4176                 DEBUG_EXECUTE_r(
4177                     PerlIO_printf(Perl_debug_log,
4178                                   "%*s  continuation failed...\n",
4179                                   REPORT_CODE_OFF+PL_regindent*2, "")
4180                     );
4181                 sayNO_SILENT;
4182             }
4183             if (locinput < PL_regtill) {
4184                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4185                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4186                                       PL_colors[4],
4187                                       (long)(locinput - PL_reg_starttry),
4188                                       (long)(PL_regtill - PL_reg_starttry),
4189                                       PL_colors[5]));
4190                 sayNO_FINAL;            /* Cannot match: too short. */
4191             }
4192             PL_reginput = locinput;     /* put where regtry can find it */
4193             sayYES_FINAL;               /* Success! */
4194         case SUCCEED:
4195             PL_reginput = locinput;     /* put where regtry can find it */
4196             sayYES_LOUD;                /* Success! */
4197         case SUSPEND:
4198             n = 1;
4199             PL_reginput = locinput;
4200             goto do_ifmatch;    
4201         case UNLESSM:
4202             n = 0;
4203             if (scan->flags) {
4204                 s = HOPBACKc(locinput, scan->flags);
4205                 if (!s)
4206                     goto say_yes;
4207                 PL_reginput = s;
4208             }
4209             else
4210                 PL_reginput = locinput;
4211             goto do_ifmatch;
4212         case IFMATCH:
4213             n = 1;
4214             if (scan->flags) {
4215                 s = HOPBACKc(locinput, scan->flags);
4216                 if (!s)
4217                     goto say_no;
4218                 PL_reginput = s;
4219             }
4220             else
4221                 PL_reginput = locinput;
4222
4223           do_ifmatch:
4224             inner = NEXTOPER(NEXTOPER(scan));
4225             if (regmatch(inner) != n) {
4226               say_no:
4227                 if (logical) {
4228                     logical = 0;
4229                     sw = 0;
4230                     goto do_longjump;
4231                 }
4232                 else
4233                     sayNO;
4234             }
4235           say_yes:
4236             if (logical) {
4237                 logical = 0;
4238                 sw = 1;
4239             }
4240             if (OP(scan) == SUSPEND) {
4241                 locinput = PL_reginput;
4242                 nextchr = UCHARAT(locinput);
4243             }
4244             /* FALL THROUGH. */
4245         case LONGJMP:
4246           do_longjump:
4247             next = scan + ARG(scan);
4248             if (next == scan)
4249                 next = NULL;
4250             break;
4251         default:
4252             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4253                           PTR2UV(scan), OP(scan));
4254             Perl_croak(aTHX_ "regexp memory corruption");
4255         }
4256       reenter:
4257         scan = next;
4258     }
4259
4260     /*
4261     * We get here only if there's trouble -- normally "case END" is
4262     * the terminating point.
4263     */
4264     Perl_croak(aTHX_ "corrupted regexp pointers");
4265     /*NOTREACHED*/
4266     sayNO;
4267
4268 yes_loud:
4269     DEBUG_EXECUTE_r(
4270         PerlIO_printf(Perl_debug_log,
4271                       "%*s  %scould match...%s\n",
4272                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
4273         );
4274     goto yes;
4275 yes_final:
4276     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4277                           PL_colors[4], PL_colors[5]));
4278 yes:
4279 #ifdef DEBUGGING
4280     PL_regindent--;
4281 #endif
4282
4283 #if 0                                   /* Breaks $^R */
4284     if (unwind)
4285         regcpblow(firstcp);
4286 #endif
4287     return 1;
4288
4289 no:
4290     DEBUG_EXECUTE_r(
4291         PerlIO_printf(Perl_debug_log,
4292                       "%*s  %sfailed...%s\n",
4293                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
4294         );
4295     goto do_no;
4296 no_final:
4297 do_no:
4298     if (unwind) {
4299         re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4300
4301         switch (uw->type) {
4302         case RE_UNWIND_BRANCH:
4303         case RE_UNWIND_BRANCHJ:
4304         {
4305             re_unwind_branch_t *uwb = &(uw->branch);
4306             I32 lastparen = uwb->lastparen;
4307         
4308             REGCP_UNWIND(uwb->lastcp);
4309             for (n = *PL_reglastparen; n > lastparen; n--)
4310                 PL_regendp[n] = -1;
4311             *PL_reglastparen = n;
4312             scan = next = uwb->next;
4313             if ( !scan ||
4314                  OP(scan) != (uwb->type == RE_UNWIND_BRANCH
4315                               ? BRANCH : BRANCHJ) ) {           /* Failure */
4316                 unwind = uwb->prev;
4317 #ifdef DEBUGGING
4318                 PL_regindent--;
4319 #endif
4320                 goto do_no;
4321             }
4322             /* Have more choice yet.  Reuse the same uwb.  */
4323             /*SUPPRESS 560*/
4324             if ((n = (uwb->type == RE_UNWIND_BRANCH
4325                       ? NEXT_OFF(next) : ARG(next))))
4326                 next += n;
4327             else
4328                 next = NULL;    /* XXXX Needn't unwinding in this case... */
4329             uwb->next = next;
4330             next = NEXTOPER(scan);
4331             if (uwb->type == RE_UNWIND_BRANCHJ)
4332                 next = NEXTOPER(next);
4333             locinput = uwb->locinput;
4334             nextchr = uwb->nextchr;
4335 #ifdef DEBUGGING
4336             PL_regindent = uwb->regindent;
4337 #endif
4338
4339             goto reenter;
4340         }
4341         /* NOT REACHED */
4342         default:
4343             Perl_croak(aTHX_ "regexp unwind memory corruption");
4344         }
4345         /* NOT REACHED */
4346     }
4347 #ifdef DEBUGGING
4348     PL_regindent--;
4349 #endif
4350     return 0;
4351 }
4352
4353 /*
4354  - regrepeat - repeatedly match something simple, report how many
4355  */
4356 /*
4357  * [This routine now assumes that it will only match on things of length 1.
4358  * That was true before, but now we assume scan - reginput is the count,
4359  * rather than incrementing count on every character.  [Er, except utf8.]]
4360  */
4361 STATIC I32
4362 S_regrepeat(pTHX_ regnode *p, I32 max)
4363 {
4364     dVAR;
4365     register char *scan;
4366     register I32 c;
4367     register char *loceol = PL_regeol;
4368     register I32 hardcount = 0;
4369     register bool do_utf8 = PL_reg_match_utf8;
4370
4371     scan = PL_reginput;
4372     if (max == REG_INFTY)
4373         max = I32_MAX;
4374     else if (max < loceol - scan)
4375       loceol = scan + max;
4376     switch (OP(p)) {
4377     case REG_ANY:
4378         if (do_utf8) {
4379             loceol = PL_regeol;
4380             while (scan < loceol && hardcount < max && *scan != '\n') {
4381                 scan += UTF8SKIP(scan);
4382                 hardcount++;
4383             }
4384         } else {
4385             while (scan < loceol && *scan != '\n')
4386                 scan++;
4387         }
4388         break;
4389     case SANY:
4390         if (do_utf8) {
4391             loceol = PL_regeol;
4392             while (scan < loceol && hardcount < max) {
4393                 scan += UTF8SKIP(scan);
4394                 hardcount++;
4395             }
4396         }
4397         else
4398             scan = loceol;
4399         break;
4400     case CANY:
4401         scan = loceol;
4402         break;
4403     case EXACT:         /* length of string is 1 */
4404         c = (U8)*STRING(p);
4405         while (scan < loceol && UCHARAT(scan) == c)
4406             scan++;
4407         break;
4408     case EXACTF:        /* length of string is 1 */
4409         c = (U8)*STRING(p);
4410         while (scan < loceol &&
4411                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4412             scan++;
4413         break;
4414     case EXACTFL:       /* length of string is 1 */
4415         PL_reg_flags |= RF_tainted;
4416         c = (U8)*STRING(p);
4417         while (scan < loceol &&
4418                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4419             scan++;
4420         break;
4421     case ANYOF:
4422         if (do_utf8) {
4423             loceol = PL_regeol;
4424             while (hardcount < max && scan < loceol &&
4425                    reginclass(p, (U8*)scan, 0, do_utf8)) {
4426                 scan += UTF8SKIP(scan);
4427                 hardcount++;
4428             }
4429         } else {
4430             while (scan < loceol && REGINCLASS(p, (U8*)scan))
4431                 scan++;
4432         }
4433         break;
4434     case ALNUM:
4435         if (do_utf8) {
4436             loceol = PL_regeol;
4437             LOAD_UTF8_CHARCLASS(alnum,"a");
4438             while (hardcount < max && scan < loceol &&
4439                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4440                 scan += UTF8SKIP(scan);
4441                 hardcount++;
4442             }
4443         } else {
4444             while (scan < loceol && isALNUM(*scan))
4445                 scan++;
4446         }
4447         break;
4448     case ALNUML:
4449         PL_reg_flags |= RF_tainted;
4450         if (do_utf8) {
4451             loceol = PL_regeol;
4452             while (hardcount < max && scan < loceol &&
4453                    isALNUM_LC_utf8((U8*)scan)) {
4454                 scan += UTF8SKIP(scan);
4455                 hardcount++;
4456             }
4457         } else {
4458             while (scan < loceol && isALNUM_LC(*scan))
4459                 scan++;
4460         }
4461         break;
4462     case NALNUM:
4463         if (do_utf8) {
4464             loceol = PL_regeol;
4465             LOAD_UTF8_CHARCLASS(alnum,"a");
4466             while (hardcount < max && scan < loceol &&
4467                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4468                 scan += UTF8SKIP(scan);
4469                 hardcount++;
4470             }
4471         } else {
4472             while (scan < loceol && !isALNUM(*scan))
4473                 scan++;
4474         }
4475         break;
4476     case NALNUML:
4477         PL_reg_flags |= RF_tainted;
4478         if (do_utf8) {
4479             loceol = PL_regeol;
4480             while (hardcount < max && scan < loceol &&
4481                    !isALNUM_LC_utf8((U8*)scan)) {
4482                 scan += UTF8SKIP(scan);
4483                 hardcount++;
4484             }
4485         } else {
4486             while (scan < loceol && !isALNUM_LC(*scan))
4487                 scan++;
4488         }
4489         break;
4490     case SPACE:
4491         if (do_utf8) {
4492             loceol = PL_regeol;
4493             LOAD_UTF8_CHARCLASS(space," ");
4494             while (hardcount < max && scan < loceol &&
4495                    (*scan == ' ' ||
4496                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4497                 scan += UTF8SKIP(scan);
4498                 hardcount++;
4499             }
4500         } else {
4501             while (scan < loceol && isSPACE(*scan))
4502                 scan++;
4503         }
4504         break;
4505     case SPACEL:
4506         PL_reg_flags |= RF_tainted;
4507         if (do_utf8) {
4508             loceol = PL_regeol;
4509             while (hardcount < max && scan < loceol &&
4510                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4511                 scan += UTF8SKIP(scan);
4512                 hardcount++;
4513             }
4514         } else {
4515             while (scan < loceol && isSPACE_LC(*scan))
4516                 scan++;
4517         }
4518         break;
4519     case NSPACE:
4520         if (do_utf8) {
4521             loceol = PL_regeol;
4522             LOAD_UTF8_CHARCLASS(space," ");
4523             while (hardcount < max && scan < loceol &&
4524                    !(*scan == ' ' ||
4525                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4526                 scan += UTF8SKIP(scan);
4527                 hardcount++;
4528             }
4529         } else {
4530             while (scan < loceol && !isSPACE(*scan))
4531                 scan++;
4532             break;
4533         }
4534     case NSPACEL:
4535         PL_reg_flags |= RF_tainted;
4536         if (do_utf8) {
4537             loceol = PL_regeol;
4538             while (hardcount < max && scan < loceol &&
4539                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4540                 scan += UTF8SKIP(scan);
4541                 hardcount++;
4542             }
4543         } else {
4544             while (scan < loceol && !isSPACE_LC(*scan))
4545                 scan++;
4546         }
4547         break;
4548     case DIGIT:
4549         if (do_utf8) {
4550             loceol = PL_regeol;
4551             LOAD_UTF8_CHARCLASS(digit,"0");
4552             while (hardcount < max && scan < loceol &&
4553                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4554                 scan += UTF8SKIP(scan);
4555                 hardcount++;
4556             }
4557         } else {
4558             while (scan < loceol && isDIGIT(*scan))
4559                 scan++;
4560         }
4561         break;
4562     case NDIGIT:
4563         if (do_utf8) {
4564             loceol = PL_regeol;
4565             LOAD_UTF8_CHARCLASS(digit,"0");
4566             while (hardcount < max && scan < loceol &&
4567                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4568                 scan += UTF8SKIP(scan);
4569                 hardcount++;
4570             }
4571         } else {
4572             while (scan < loceol && !isDIGIT(*scan))
4573                 scan++;
4574         }
4575         break;
4576     default:            /* Called on something of 0 width. */
4577         break;          /* So match right here or not at all. */
4578     }
4579
4580     if (hardcount)
4581         c = hardcount;
4582     else
4583         c = scan - PL_reginput;
4584     PL_reginput = scan;
4585
4586     DEBUG_r({
4587                 SV *re_debug_flags = NULL;
4588                 SV *prop = sv_newmortal();
4589                 GET_RE_DEBUG_FLAGS;
4590                 DEBUG_EXECUTE_r({
4591                 regprop(prop, p);
4592                 PerlIO_printf(Perl_debug_log,
4593                               "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4594                               REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4595         });
4596         });
4597
4598     return(c);
4599 }
4600
4601 /*
4602  - regrepeat_hard - repeatedly match something, report total lenth and length
4603  *
4604  * The repeater is supposed to have constant non-zero length.
4605  */
4606
4607 STATIC I32
4608 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4609 {
4610     register char *scan = Nullch;
4611     register char *start;
4612     register char *loceol = PL_regeol;
4613     I32 l = 0;
4614     I32 count = 0, res = 1;
4615
4616     if (!max)
4617         return 0;
4618
4619     start = PL_reginput;
4620     if (PL_reg_match_utf8) {
4621         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4622             if (!count++) {
4623                 l = 0;
4624                 while (start < PL_reginput) {
4625                     l++;
4626                     start += UTF8SKIP(start);
4627                 }
4628                 *lp = l;
4629                 if (l == 0)
4630                     return max;
4631             }
4632             if (count == max)
4633                 return count;
4634         }
4635     }
4636     else {
4637         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4638             if (!count++) {
4639                 *lp = l = PL_reginput - start;
4640                 if (max != REG_INFTY && l*max < loceol - scan)
4641                     loceol = scan + l*max;
4642                 if (l == 0)
4643                     return max;
4644             }
4645         }
4646     }
4647     if (!res)
4648         PL_reginput = scan;
4649
4650     return count;
4651 }
4652
4653 /*
4654 - regclass_swash - prepare the utf8 swash
4655 */
4656
4657 SV *
4658 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4659 {
4660     SV *sw  = NULL;
4661     SV *si  = NULL;
4662     SV *alt = NULL;
4663
4664     if (PL_regdata && PL_regdata->count) {
4665         U32 n = ARG(node);
4666
4667         if (PL_regdata->what[n] == 's') {
4668             SV *rv = (SV*)PL_regdata->data[n];
4669             AV *av = (AV*)SvRV((SV*)rv);
4670             SV **ary = AvARRAY(av);
4671             SV **a, **b;
4672         
4673             /* See the end of regcomp.c:S_reglass() for
4674              * documentation of these array elements. */
4675
4676             si = *ary;
4677             a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4678             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4679
4680             if (a)
4681                 sw = *a;
4682             else if (si && doinit) {
4683                 sw = swash_init("utf8", "", si, 1, 0);
4684                 (void)av_store(av, 1, sw);
4685             }
4686             if (b)
4687                 alt = *b;
4688         }
4689     }
4690         
4691     if (listsvp)
4692         *listsvp = si;
4693     if (altsvp)
4694         *altsvp  = alt;
4695
4696     return sw;
4697 }
4698
4699 /*
4700  - reginclass - determine if a character falls into a character class
4701  
4702   The n is the ANYOF regnode, the p is the target string, lenp
4703   is pointer to the maximum length of how far to go in the p
4704   (if the lenp is zero, UTF8SKIP(p) is used),
4705   do_utf8 tells whether the target string is in UTF-8.
4706
4707  */
4708
4709 STATIC bool
4710 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4711 {
4712     dVAR;
4713     char flags = ANYOF_FLAGS(n);
4714     bool match = FALSE;
4715     UV c = *p;
4716     STRLEN len = 0;
4717     STRLEN plen;
4718
4719     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4720          c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4721                             ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4722
4723     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4724     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4725         if (lenp)
4726             *lenp = 0;
4727         if (do_utf8 && !ANYOF_RUNTIME(n)) {
4728             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4729                 match = TRUE;
4730         }
4731         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4732             match = TRUE;
4733         if (!match) {
4734             AV *av;
4735             SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4736         
4737             if (sw) {
4738                 if (swash_fetch(sw, p, do_utf8))
4739                     match = TRUE;
4740                 else if (flags & ANYOF_FOLD) {
4741                     if (!match && lenp && av) {
4742                         I32 i;
4743                       
4744                         for (i = 0; i <= av_len(av); i++) {
4745                             SV* sv = *av_fetch(av, i, FALSE);
4746                             STRLEN len;
4747                             char *s = SvPV(sv, len);
4748                         
4749                             if (len <= plen && memEQ(s, (char*)p, len)) {
4750                                 *lenp = len;
4751                                 match = TRUE;
4752                                 break;
4753                             }
4754                         }
4755                     }
4756                     if (!match) {
4757                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4758                         STRLEN tmplen;
4759
4760                         to_utf8_fold(p, tmpbuf, &tmplen);
4761                         if (swash_fetch(sw, tmpbuf, do_utf8))
4762                             match = TRUE;
4763                     }
4764                 }
4765             }
4766         }
4767         if (match && lenp && *lenp == 0)
4768             *lenp = UNISKIP(NATIVE_TO_UNI(c));
4769     }
4770     if (!match && c < 256) {
4771         if (ANYOF_BITMAP_TEST(n, c))
4772             match = TRUE;
4773         else if (flags & ANYOF_FOLD) {
4774             U8 f;
4775
4776             if (flags & ANYOF_LOCALE) {
4777                 PL_reg_flags |= RF_tainted;
4778                 f = PL_fold_locale[c];
4779             }
4780             else
4781                 f = PL_fold[c];
4782             if (f != c && ANYOF_BITMAP_TEST(n, f))
4783                 match = TRUE;
4784         }
4785         
4786         if (!match && (flags & ANYOF_CLASS)) {
4787             PL_reg_flags |= RF_tainted;
4788             if (
4789                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4790                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4791                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4792                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4793                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4794                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4795                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4796                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4797                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4798                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4799                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4800                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4801                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4802                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4803                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4804                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4805                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4806                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4807                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4808                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4809                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4810                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4811                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4812                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4813                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4814                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4815                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4816                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4817                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4818                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4819                 ) /* How's that for a conditional? */
4820             {
4821                 match = TRUE;
4822             }
4823         }
4824     }
4825
4826     return (flags & ANYOF_INVERT) ? !match : match;
4827 }
4828
4829 STATIC U8 *
4830 S_reghop(pTHX_ U8 *s, I32 off)
4831 {
4832     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4833 }
4834
4835 STATIC U8 *
4836 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4837 {
4838     if (off >= 0) {
4839         while (off-- && s < lim) {
4840             /* XXX could check well-formedness here */
4841             s += UTF8SKIP(s);
4842         }
4843     }
4844     else {
4845         while (off++) {
4846             if (s > lim) {
4847                 s--;
4848                 if (UTF8_IS_CONTINUED(*s)) {
4849                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4850                         s--;
4851                 }
4852                 /* XXX could check well-formedness here */
4853             }
4854         }
4855     }
4856     return s;
4857 }
4858
4859 STATIC U8 *
4860 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4861 {
4862     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4863 }
4864
4865 STATIC U8 *
4866 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4867 {
4868     if (off >= 0) {
4869         while (off-- && s < lim) {
4870             /* XXX could check well-formedness here */
4871             s += UTF8SKIP(s);
4872         }
4873         if (off >= 0)
4874             return 0;
4875     }
4876     else {
4877         while (off++) {
4878             if (s > lim) {
4879                 s--;
4880                 if (UTF8_IS_CONTINUED(*s)) {
4881                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4882                         s--;
4883                 }
4884                 /* XXX could check well-formedness here */
4885             }
4886             else
4887                 break;
4888         }
4889         if (off <= 0)
4890             return 0;
4891     }
4892     return s;
4893 }
4894
4895 static void
4896 restore_pos(pTHX_ void *arg)
4897 {
4898     (void)arg; /* unused */
4899     if (PL_reg_eval_set) {
4900         if (PL_reg_oldsaved) {
4901             PL_reg_re->subbeg = PL_reg_oldsaved;
4902             PL_reg_re->sublen = PL_reg_oldsavedlen;
4903 #ifdef PERL_COPY_ON_WRITE
4904             PL_reg_re->saved_copy = PL_nrs;
4905 #endif
4906             RX_MATCH_COPIED_on(PL_reg_re);
4907         }
4908         PL_reg_magic->mg_len = PL_reg_oldpos;
4909         PL_reg_eval_set = 0;
4910         PL_curpm = PL_reg_oldcurpm;
4911     }   
4912 }
4913
4914 STATIC void
4915 S_to_utf8_substr(pTHX_ register regexp *prog)
4916 {
4917     SV* sv;
4918     if (prog->float_substr && !prog->float_utf8) {
4919         prog->float_utf8 = sv = newSVsv(prog->float_substr);
4920         sv_utf8_upgrade(sv);
4921         if (SvTAIL(prog->float_substr))
4922             SvTAIL_on(sv);
4923         if (prog->float_substr == prog->check_substr)
4924             prog->check_utf8 = sv;
4925     }
4926     if (prog->anchored_substr && !prog->anchored_utf8) {
4927         prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
4928         sv_utf8_upgrade(sv);
4929         if (SvTAIL(prog->anchored_substr))
4930             SvTAIL_on(sv);
4931         if (prog->anchored_substr == prog->check_substr)
4932             prog->check_utf8 = sv;
4933     }
4934 }
4935
4936 STATIC void
4937 S_to_byte_substr(pTHX_ register regexp *prog)
4938 {
4939     SV* sv;
4940     if (prog->float_utf8 && !prog->float_substr) {
4941         prog->float_substr = sv = newSVsv(prog->float_utf8);
4942         if (sv_utf8_downgrade(sv, TRUE)) {
4943             if (SvTAIL(prog->float_utf8))
4944                 SvTAIL_on(sv);
4945         } else {
4946             SvREFCNT_dec(sv);
4947             prog->float_substr = sv = &PL_sv_undef;
4948         }
4949         if (prog->float_utf8 == prog->check_utf8)
4950             prog->check_substr = sv;
4951     }
4952     if (prog->anchored_utf8 && !prog->anchored_substr) {
4953         prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
4954         if (sv_utf8_downgrade(sv, TRUE)) {
4955             if (SvTAIL(prog->anchored_utf8))
4956                 SvTAIL_on(sv);
4957         } else {
4958             SvREFCNT_dec(sv);
4959             prog->anchored_substr = sv = &PL_sv_undef;
4960         }
4961         if (prog->anchored_utf8 == prog->check_utf8)
4962             prog->check_substr = sv;
4963     }
4964 }