This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Third consting batch
[perl5.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  
17  */
18
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20  * confused with the original package (see point 3 below).  Thanks, Henry!
21  */
22
23 /* Additional note: this code is very heavily munged from Henry's version
24  * in places.  In some spots I've traded clarity for efficiency, so don't
25  * blame Henry for some of the lack of readability.
26  */
27
28 /* The names of the functions have been changed from regcomp and
29  * regexec to  pregcomp and pregexec in order to avoid conflicts
30  * with the POSIX routines of the same names.
31 */
32
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 #  ifndef PERL_IN_XSUB_RE
36 #    define PERL_IN_XSUB_RE
37 #  endif
38 /* need access to debugger hooks */
39 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
40 #    define DEBUGGING
41 #  endif
42 #endif
43
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 #  define Perl_regexec_flags my_regexec
47 #  define Perl_regdump my_regdump
48 #  define Perl_regprop my_regprop
49 #  define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_pregexec my_pregexec
52 #  define Perl_reginitcolors my_reginitcolors
53 #  define Perl_regclass_swash my_regclass_swash
54
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57
58 /*SUPPRESS 112*/
59 /*
60  * pregcomp and pregexec -- regsub and regerror are not used in perl
61  *
62  *      Copyright (c) 1986 by University of Toronto.
63  *      Written by Henry Spencer.  Not derived from licensed software.
64  *
65  *      Permission is granted to anyone to use this software for any
66  *      purpose on any computer system, and to redistribute it freely,
67  *      subject to the following restrictions:
68  *
69  *      1. The author is not responsible for the consequences of use of
70  *              this software, no matter how awful, even if they arise
71  *              from defects in it.
72  *
73  *      2. The origin of this software must not be misrepresented, either
74  *              by explicit claim or by omission.
75  *
76  *      3. Altered versions must be plainly marked as such, and must not
77  *              be misrepresented as being the original software.
78  *
79  ****    Alterations to Henry's code are...
80  ****
81  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
82  ****    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
83  ****
84  ****    You may distribute under the terms of either the GNU General Public
85  ****    License or the Artistic License, as specified in the README file.
86  *
87  * Beware that some of this code is subtly aware of the way operator
88  * precedence is structured in regular expressions.  Serious changes in
89  * regular-expression syntax might require a total rethink.
90  */
91 #include "EXTERN.h"
92 #define PERL_IN_REGEXEC_C
93 #include "perl.h"
94
95 #include "regcomp.h"
96
97 #define RF_tainted      1               /* tainted information used? */
98 #define RF_warned       2               /* warned about big count? */
99 #define RF_evaled       4               /* Did an EVAL with setting? */
100 #define RF_utf8         8               /* String contains multibyte chars? */
101 #define RF_false        16              /* odd number of nested negatives */
102
103 #define UTF ((PL_reg_flags & RF_utf8) != 0)
104
105 #define RS_init         1               /* eval environment created */
106 #define RS_set          2               /* replsv value is set */
107
108 #ifndef STATIC
109 #define STATIC  static
110 #endif
111
112 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
113
114 /*
115  * Forwards.
116  */
117
118 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
119 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
120
121 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
122 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
123 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
124 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
125 #define HOPc(pos,off) ((char*)HOP(pos,off))
126 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
127
128 #define HOPBACK(pos, off) (             \
129     (PL_reg_match_utf8)                 \
130         ? reghopmaybe((U8*)pos, -off)   \
131     : (pos - off >= PL_bostr)           \
132         ? (U8*)(pos - off)              \
133     : (U8*)NULL                         \
134 )
135 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
136
137 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
138 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
139 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
140 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
141 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
142 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
143
144 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
145
146 /* for use after a quantifier and before an EXACT-like node -- japhy */
147 #define JUMPABLE(rn) ( \
148     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
149     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
150     OP(rn) == PLUS || OP(rn) == MINMOD || \
151     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
152 )
153
154 #define HAS_TEXT(rn) ( \
155     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
156 )
157
158 /*
159   Search for mandatory following text node; for lookahead, the text must
160   follow but for lookbehind (rn->flags != 0) we skip to the next step.
161 */
162 #define FIND_NEXT_IMPT(rn) STMT_START { \
163     while (JUMPABLE(rn)) \
164         if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
165             rn = NEXTOPER(NEXTOPER(rn)); \
166         else if (OP(rn) == PLUS) \
167             rn = NEXTOPER(rn); \
168         else if (OP(rn) == IFMATCH) \
169             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
170         else rn += NEXT_OFF(rn); \
171 } STMT_END 
172
173 static void restore_pos(pTHX_ void *arg);
174
175 STATIC CHECKPOINT
176 S_regcppush(pTHX_ I32 parenfloor)
177 {
178     int retval = PL_savestack_ix;
179 #define REGCP_PAREN_ELEMS 4
180     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
181     int p;
182
183     if (paren_elems_to_push < 0)
184         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
185
186 #define REGCP_OTHER_ELEMS 6
187     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
188     for (p = PL_regsize; p > parenfloor; p--) {
189 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
190         SSPUSHINT(PL_regendp[p]);
191         SSPUSHINT(PL_regstartp[p]);
192         SSPUSHPTR(PL_reg_start_tmp[p]);
193         SSPUSHINT(p);
194     }
195 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
196     SSPUSHINT(PL_regsize);
197     SSPUSHINT(*PL_reglastparen);
198     SSPUSHINT(*PL_reglastcloseparen);
199     SSPUSHPTR(PL_reginput);
200 #define REGCP_FRAME_ELEMS 2
201 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
202  * are needed for the regexp context stack bookkeeping. */
203     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
204     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
205
206     return retval;
207 }
208
209 /* These are needed since we do not localize EVAL nodes: */
210 #  define REGCP_SET(cp)  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,          \
211                              "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
212                              (IV)PL_savestack_ix)); cp = PL_savestack_ix
213
214 #  define REGCP_UNWIND(cp)  DEBUG_EXECUTE_r(cp != PL_savestack_ix ?             \
215                                 PerlIO_printf(Perl_debug_log,           \
216                                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
217                                 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
218
219 STATIC char *
220 S_regcppop(pTHX)
221 {
222     I32 i;
223     U32 paren = 0;
224     char *input;
225     I32 tmps;
226
227     GET_RE_DEBUG_FLAGS_DECL;
228
229     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
230     i = SSPOPINT;
231     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232     i = SSPOPINT; /* Parentheses elements to pop. */
233     input = (char *) SSPOPPTR;
234     *PL_reglastcloseparen = SSPOPINT;
235     *PL_reglastparen = SSPOPINT;
236     PL_regsize = SSPOPINT;
237
238     /* Now restore the parentheses context. */
239     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240          i > 0; i -= REGCP_PAREN_ELEMS) {
241         paren = (U32)SSPOPINT;
242         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
243         PL_regstartp[paren] = SSPOPINT;
244         tmps = SSPOPINT;
245         if (paren <= *PL_reglastparen)
246             PL_regendp[paren] = tmps;
247         DEBUG_EXECUTE_r(
248             PerlIO_printf(Perl_debug_log,
249                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
250                           (UV)paren, (IV)PL_regstartp[paren],
251                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
252                           (IV)PL_regendp[paren],
253                           (paren > *PL_reglastparen ? "(no)" : ""));
254         );
255     }
256     DEBUG_EXECUTE_r(
257         if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
258             PerlIO_printf(Perl_debug_log,
259                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
260                           (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
261         }
262     );
263 #if 1
264     /* It would seem that the similar code in regtry()
265      * already takes care of this, and in fact it is in
266      * a better location to since this code can #if 0-ed out
267      * but the code in regtry() is needed or otherwise tests
268      * requiring null fields (pat.t#187 and split.t#{13,14}
269      * (as of patchlevel 7877)  will fail.  Then again,
270      * this code seems to be necessary or otherwise
271      * building DynaLoader will fail:
272      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
273      * --jhi */
274     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
275         if ((I32)paren > PL_regsize)
276             PL_regstartp[paren] = -1;
277         PL_regendp[paren] = -1;
278     }
279 #endif
280     return input;
281 }
282
283 STATIC char *
284 S_regcp_set_to(pTHX_ I32 ss)
285 {
286     I32 tmp = PL_savestack_ix;
287
288     PL_savestack_ix = ss;
289     regcppop();
290     PL_savestack_ix = tmp;
291     return Nullch;
292 }
293
294 typedef struct re_cc_state
295 {
296     I32 ss;
297     regnode *node;
298     struct re_cc_state *prev;
299     CURCUR *cc;
300     regexp *re;
301 } re_cc_state;
302
303 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
304
305 #define TRYPAREN(paren, n, input) {                             \
306     if (paren) {                                                \
307         if (n) {                                                \
308             PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;   \
309             PL_regendp[paren] = input - PL_bostr;               \
310         }                                                       \
311         else                                                    \
312             PL_regendp[paren] = -1;                             \
313     }                                                           \
314     if (regmatch(next))                                         \
315         sayYES;                                                 \
316     if (paren && n)                                             \
317         PL_regendp[paren] = -1;                                 \
318 }
319
320
321 /*
322  * pregexec and friends
323  */
324
325 /*
326  - pregexec - match a regexp against a string
327  */
328 I32
329 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
330          char *strbeg, I32 minend, SV *screamer, U32 nosave)
331 /* strend: pointer to null at end of string */
332 /* strbeg: real beginning of string */
333 /* minend: end of match must be >=minend after stringarg. */
334 /* nosave: For optimizations. */
335 {
336     return
337         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
338                       nosave ? 0 : REXEC_COPY_STR);
339 }
340
341 STATIC void
342 S_cache_re(pTHX_ regexp *prog)
343 {
344     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
345 #ifdef DEBUGGING
346     PL_regprogram = prog->program;
347 #endif
348     PL_regnpar = prog->nparens;
349     PL_regdata = prog->data;
350     PL_reg_re = prog;
351 }
352
353 /*
354  * Need to implement the following flags for reg_anch:
355  *
356  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
357  * USE_INTUIT_ML
358  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
359  * INTUIT_AUTORITATIVE_ML
360  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
361  * INTUIT_ONCE_ML
362  *
363  * Another flag for this function: SECOND_TIME (so that float substrs
364  * with giant delta may be not rechecked).
365  */
366
367 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
368
369 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
370    Otherwise, only SvCUR(sv) is used to get strbeg. */
371
372 /* XXXX We assume that strpos is strbeg unless sv. */
373
374 /* XXXX Some places assume that there is a fixed substring.
375         An update may be needed if optimizer marks as "INTUITable"
376         RExen without fixed substrings.  Similarly, it is assumed that
377         lengths of all the strings are no more than minlen, thus they
378         cannot come from lookahead.
379         (Or minlen should take into account lookahead.) */
380
381 /* A failure to find a constant substring means that there is no need to make
382    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
383    finding a substring too deep into the string means that less calls to
384    regtry() should be needed.
385
386    REx compiler's optimizer found 4 possible hints:
387         a) Anchored substring;
388         b) Fixed substring;
389         c) Whether we are anchored (beginning-of-line or \G);
390         d) First node (of those at offset 0) which may distingush positions;
391    We use a)b)d) and multiline-part of c), and try to find a position in the
392    string which does not contradict any of them.
393  */
394
395 /* Most of decisions we do here should have been done at compile time.
396    The nodes of the REx which we used for the search should have been
397    deleted from the finite automaton. */
398
399 char *
400 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
401                      char *strend, U32 flags, re_scream_pos_data *data)
402 {
403     register I32 start_shift = 0;
404     /* Should be nonnegative! */
405     register I32 end_shift   = 0;
406     register char *s;
407     register SV *check;
408     char *strbeg;
409     char *t;
410     int do_utf8 = sv ? SvUTF8(sv) : 0;  /* if no sv we have to assume bytes */
411     I32 ml_anch;
412     register char *other_last = Nullch; /* other substr checked before this */
413     char *check_at = Nullch;            /* check substr found at this pos */
414     I32 multiline = prog->reganch & PMf_MULTILINE;
415 #ifdef DEBUGGING
416     char *i_strpos = strpos;
417     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
418 #endif
419
420     GET_RE_DEBUG_FLAGS_DECL;
421
422     RX_MATCH_UTF8_set(prog,do_utf8);
423
424     if (prog->reganch & ROPT_UTF8) {
425         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
426                               "UTF-8 regex...\n"));
427         PL_reg_flags |= RF_utf8;
428     }
429
430     DEBUG_EXECUTE_r({
431          char *s   = PL_reg_match_utf8 ?
432                          sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
433                          strpos;
434          int   len = PL_reg_match_utf8 ?
435                          strlen(s) : strend - strpos;
436          if (!PL_colorset)
437               reginitcolors();
438          if (PL_reg_match_utf8)
439              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
440                                    "UTF-8 target...\n"));
441          PerlIO_printf(Perl_debug_log,
442                        "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
443                        PL_colors[4],PL_colors[5],PL_colors[0],
444                        prog->precomp,
445                        PL_colors[1],
446                        (strlen(prog->precomp) > 60 ? "..." : ""),
447                        PL_colors[0],
448                        (int)(len > 60 ? 60 : len),
449                        s, PL_colors[1],
450                        (len > 60 ? "..." : "")
451               );
452     });
453
454     /* CHR_DIST() would be more correct here but it makes things slow. */
455     if (prog->minlen > strend - strpos) {
456         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
457                               "String too short... [re_intuit_start]\n"));
458         goto fail;
459     }
460     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
461     PL_regeol = strend;
462     if (do_utf8) {
463         if (!prog->check_utf8 && prog->check_substr)
464             to_utf8_substr(prog);
465         check = prog->check_utf8;
466     } else {
467         if (!prog->check_substr && prog->check_utf8)
468             to_byte_substr(prog);
469         check = prog->check_substr;
470     }
471    if (check == &PL_sv_undef) {
472         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
473                 "Non-utf string cannot match utf check string\n"));
474         goto fail;
475     }
476     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
477         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
478                      || ( (prog->reganch & ROPT_ANCH_BOL)
479                           && !multiline ) );    /* Check after \n? */
480
481         if (!ml_anch) {
482           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
483                                   | ROPT_IMPLICIT)) /* not a real BOL */
484                /* SvCUR is not set on references: SvRV and SvPVX overlap */
485                && sv && !SvROK(sv)
486                && (strpos != strbeg)) {
487               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
488               goto fail;
489           }
490           if (prog->check_offset_min == prog->check_offset_max &&
491               !(prog->reganch & ROPT_CANY_SEEN)) {
492             /* Substring at constant offset from beg-of-str... */
493             I32 slen;
494
495             s = HOP3c(strpos, prog->check_offset_min, strend);
496             if (SvTAIL(check)) {
497                 slen = SvCUR(check);    /* >= 1 */
498
499                 if ( strend - s > slen || strend - s < slen - 1
500                      || (strend - s == slen && strend[-1] != '\n')) {
501                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
502                     goto fail_finish;
503                 }
504                 /* Now should match s[0..slen-2] */
505                 slen--;
506                 if (slen && (*SvPVX(check) != *s
507                              || (slen > 1
508                                  && memNE(SvPVX(check), s, slen)))) {
509                   report_neq:
510                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
511                     goto fail_finish;
512                 }
513             }
514             else if (*SvPVX(check) != *s
515                      || ((slen = SvCUR(check)) > 1
516                          && memNE(SvPVX(check), s, slen)))
517                 goto report_neq;
518             goto success_at_start;
519           }
520         }
521         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
522         s = strpos;
523         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
524         end_shift = prog->minlen - start_shift -
525             CHR_SVLEN(check) + (SvTAIL(check) != 0);
526         if (!ml_anch) {
527             I32 end = prog->check_offset_max + CHR_SVLEN(check)
528                                          - (SvTAIL(check) != 0);
529             I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
530
531             if (end_shift < eshift)
532                 end_shift = eshift;
533         }
534     }
535     else {                              /* Can match at random position */
536         ml_anch = 0;
537         s = strpos;
538         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
539         /* Should be nonnegative! */
540         end_shift = prog->minlen - start_shift -
541             CHR_SVLEN(check) + (SvTAIL(check) != 0);
542     }
543
544 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
545     if (end_shift < 0)
546         Perl_croak(aTHX_ "panic: end_shift");
547 #endif
548
549   restart:
550     /* Find a possible match in the region s..strend by looking for
551        the "check" substring in the region corrected by start/end_shift. */
552     if (flags & REXEC_SCREAM) {
553         I32 p = -1;                     /* Internal iterator of scream. */
554         I32 *pp = data ? data->scream_pos : &p;
555
556         if (PL_screamfirst[BmRARE(check)] >= 0
557             || ( BmRARE(check) == '\n'
558                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
559                  && SvTAIL(check) ))
560             s = screaminstr(sv, check,
561                             start_shift + (s - strbeg), end_shift, pp, 0);
562         else
563             goto fail_finish;
564         /* we may be pointing at the wrong string */
565         if (s && RX_MATCH_COPIED(prog))
566             s = strbeg + (s - SvPVX(sv));
567         if (data)
568             *data->scream_olds = s;
569     }
570     else if (prog->reganch & ROPT_CANY_SEEN)
571         s = fbm_instr((U8*)(s + start_shift),
572                       (U8*)(strend - end_shift),
573                       check, multiline ? FBMrf_MULTILINE : 0);
574     else
575         s = fbm_instr(HOP3(s, start_shift, strend),
576                       HOP3(strend, -end_shift, strbeg),
577                       check, multiline ? FBMrf_MULTILINE : 0);
578
579     /* Update the count-of-usability, remove useless subpatterns,
580         unshift s.  */
581
582     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
583                           (s ? "Found" : "Did not find"),
584                           (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
585                           PL_colors[0],
586                           (int)(SvCUR(check) - (SvTAIL(check)!=0)),
587                           SvPVX(check),
588                           PL_colors[1], (SvTAIL(check) ? "$" : ""),
589                           (s ? " at offset " : "...\n") ) );
590
591     if (!s)
592         goto fail_finish;
593
594     check_at = s;
595
596     /* Finish the diagnostic message */
597     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
598
599     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
600        Start with the other substr.
601        XXXX no SCREAM optimization yet - and a very coarse implementation
602        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
603                 *always* match.  Probably should be marked during compile...
604        Probably it is right to do no SCREAM here...
605      */
606
607     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
608         /* Take into account the "other" substring. */
609         /* XXXX May be hopelessly wrong for UTF... */
610         if (!other_last)
611             other_last = strpos;
612         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
613           do_other_anchored:
614             {
615                 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
616                 char *s1 = s;
617                 SV* must;
618
619                 t = s - prog->check_offset_max;
620                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
621                     && (!do_utf8
622                         || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
623                             && t > strpos)))
624                     /* EMPTY */;
625                 else
626                     t = strpos;
627                 t = HOP3c(t, prog->anchored_offset, strend);
628                 if (t < other_last)     /* These positions already checked */
629                     t = other_last;
630                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
631                 if (last < last1)
632                     last1 = last;
633  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
634                 /* On end-of-str: see comment below. */
635                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
636                 if (must == &PL_sv_undef) {
637                     s = (char*)NULL;
638                     DEBUG_EXECUTE_r(must = prog->anchored_utf8);        /* for debug */
639                 }
640                 else
641                     s = fbm_instr(
642                         (unsigned char*)t,
643                         HOP3(HOP3(last1, prog->anchored_offset, strend)
644                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
645                         must,
646                         multiline ? FBMrf_MULTILINE : 0
647                     );
648                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
649                         "%s anchored substr `%s%.*s%s'%s",
650                         (s ? "Found" : "Contradicts"),
651                         PL_colors[0],
652                           (int)(SvCUR(must)
653                           - (SvTAIL(must)!=0)),
654                           SvPVX(must),
655                           PL_colors[1], (SvTAIL(must) ? "$" : "")));
656                 if (!s) {
657                     if (last1 >= last2) {
658                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
659                                                 ", giving up...\n"));
660                         goto fail_finish;
661                     }
662                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
663                         ", trying floating at offset %ld...\n",
664                         (long)(HOP3c(s1, 1, strend) - i_strpos)));
665                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
666                     s = HOP3c(last, 1, strend);
667                     goto restart;
668                 }
669                 else {
670                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
671                           (long)(s - i_strpos)));
672                     t = HOP3c(s, -prog->anchored_offset, strbeg);
673                     other_last = HOP3c(s, 1, strend);
674                     s = s1;
675                     if (t == strpos)
676                         goto try_at_start;
677                     goto try_at_offset;
678                 }
679             }
680         }
681         else {          /* Take into account the floating substring. */
682             char *last, *last1;
683             char *s1 = s;
684             SV* must;
685
686             t = HOP3c(s, -start_shift, strbeg);
687             last1 = last =
688                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
689             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
690                 last = HOP3c(t, prog->float_max_offset, strend);
691             s = HOP3c(t, prog->float_min_offset, strend);
692             if (s < other_last)
693                 s = other_last;
694  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
695             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
696             /* fbm_instr() takes into account exact value of end-of-str
697                if the check is SvTAIL(ed).  Since false positives are OK,
698                and end-of-str is not later than strend we are OK. */
699             if (must == &PL_sv_undef) {
700                 s = (char*)NULL;
701                 DEBUG_EXECUTE_r(must = prog->float_utf8);       /* for debug message */
702             }
703             else
704                 s = fbm_instr((unsigned char*)s,
705                               (unsigned char*)last + SvCUR(must)
706                                   - (SvTAIL(must)!=0),
707                               must, multiline ? FBMrf_MULTILINE : 0);
708             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
709                     (s ? "Found" : "Contradicts"),
710                     PL_colors[0],
711                       (int)(SvCUR(must) - (SvTAIL(must)!=0)),
712                       SvPVX(must),
713                       PL_colors[1], (SvTAIL(must) ? "$" : "")));
714             if (!s) {
715                 if (last1 == last) {
716                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
717                                             ", giving up...\n"));
718                     goto fail_finish;
719                 }
720                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
721                     ", trying anchored starting at offset %ld...\n",
722                     (long)(s1 + 1 - i_strpos)));
723                 other_last = last;
724                 s = HOP3c(t, 1, strend);
725                 goto restart;
726             }
727             else {
728                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
729                       (long)(s - i_strpos)));
730                 other_last = s; /* Fix this later. --Hugo */
731                 s = s1;
732                 if (t == strpos)
733                     goto try_at_start;
734                 goto try_at_offset;
735             }
736         }
737     }
738
739     t = s - prog->check_offset_max;
740     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
741         && (!do_utf8
742             || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
743                  && t > strpos))) {
744         /* Fixed substring is found far enough so that the match
745            cannot start at strpos. */
746       try_at_offset:
747         if (ml_anch && t[-1] != '\n') {
748             /* Eventually fbm_*() should handle this, but often
749                anchored_offset is not 0, so this check will not be wasted. */
750             /* XXXX In the code below we prefer to look for "^" even in
751                presence of anchored substrings.  And we search even
752                beyond the found float position.  These pessimizations
753                are historical artefacts only.  */
754           find_anchor:
755             while (t < strend - prog->minlen) {
756                 if (*t == '\n') {
757                     if (t < check_at - prog->check_offset_min) {
758                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
759                             /* Since we moved from the found position,
760                                we definitely contradict the found anchored
761                                substr.  Due to the above check we do not
762                                contradict "check" substr.
763                                Thus we can arrive here only if check substr
764                                is float.  Redo checking for "other"=="fixed".
765                              */
766                             strpos = t + 1;                     
767                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
768                                 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
769                             goto do_other_anchored;
770                         }
771                         /* We don't contradict the found floating substring. */
772                         /* XXXX Why not check for STCLASS? */
773                         s = t + 1;
774                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
775                             PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
776                         goto set_useful;
777                     }
778                     /* Position contradicts check-string */
779                     /* XXXX probably better to look for check-string
780                        than for "\n", so one should lower the limit for t? */
781                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
782                         PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
783                     other_last = strpos = s = t + 1;
784                     goto restart;
785                 }
786                 t++;
787             }
788             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
789                         PL_colors[0],PL_colors[1]));
790             goto fail_finish;
791         }
792         else {
793             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
794                         PL_colors[0],PL_colors[1]));
795         }
796         s = t;
797       set_useful:
798         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
799     }
800     else {
801         /* The found string does not prohibit matching at strpos,
802            - no optimization of calling REx engine can be performed,
803            unless it was an MBOL and we are not after MBOL,
804            or a future STCLASS check will fail this. */
805       try_at_start:
806         /* Even in this situation we may use MBOL flag if strpos is offset
807            wrt the start of the string. */
808         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
809             && (strpos != strbeg) && strpos[-1] != '\n'
810             /* May be due to an implicit anchor of m{.*foo}  */
811             && !(prog->reganch & ROPT_IMPLICIT))
812         {
813             t = strpos;
814             goto find_anchor;
815         }
816         DEBUG_EXECUTE_r( if (ml_anch)
817             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
818                         (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
819         );
820       success_at_start:
821         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
822             && (do_utf8 ? (
823                 prog->check_utf8                /* Could be deleted already */
824                 && --BmUSEFUL(prog->check_utf8) < 0
825                 && (prog->check_utf8 == prog->float_utf8)
826             ) : (
827                 prog->check_substr              /* Could be deleted already */
828                 && --BmUSEFUL(prog->check_substr) < 0
829                 && (prog->check_substr == prog->float_substr)
830             )))
831         {
832             /* If flags & SOMETHING - do not do it many times on the same match */
833             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
834             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
835             if (do_utf8 ? prog->check_substr : prog->check_utf8)
836                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
837             prog->check_substr = prog->check_utf8 = Nullsv;     /* disable */
838             prog->float_substr = prog->float_utf8 = Nullsv;     /* clear */
839             check = Nullsv;                     /* abort */
840             s = strpos;
841             /* XXXX This is a remnant of the old implementation.  It
842                     looks wasteful, since now INTUIT can use many
843                     other heuristics. */
844             prog->reganch &= ~RE_USE_INTUIT;
845         }
846         else
847             s = strpos;
848     }
849
850     /* Last resort... */
851     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
852     if (prog->regstclass) {
853         /* minlen == 0 is possible if regstclass is \b or \B,
854            and the fixed substr is ''$.
855            Since minlen is already taken into account, s+1 is before strend;
856            accidentally, minlen >= 1 guaranties no false positives at s + 1
857            even for \b or \B.  But (minlen? 1 : 0) below assumes that
858            regstclass does not come from lookahead...  */
859         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
860            This leaves EXACTF only, which is dealt with in find_byclass().  */
861         U8* str = (U8*)STRING(prog->regstclass);
862         int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
863                     ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
864                     : 1);
865         char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
866                 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
867                 : (prog->float_substr || prog->float_utf8
868                    ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
869                            cl_l, strend)
870                    : strend);
871         char *startpos = strbeg;
872
873         t = s;
874         cache_re(prog);
875         s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
876         if (!s) {
877 #ifdef DEBUGGING
878             const char *what = 0;
879 #endif
880             if (endpos == strend) {
881                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
882                                 "Could not match STCLASS...\n") );
883                 goto fail;
884             }
885             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
886                                    "This position contradicts STCLASS...\n") );
887             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
888                 goto fail;
889             /* Contradict one of substrings */
890             if (prog->anchored_substr || prog->anchored_utf8) {
891                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
892                     DEBUG_EXECUTE_r( what = "anchored" );
893                   hop_and_restart:
894                     s = HOP3c(t, 1, strend);
895                     if (s + start_shift + end_shift > strend) {
896                         /* XXXX Should be taken into account earlier? */
897                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
898                                                "Could not match STCLASS...\n") );
899                         goto fail;
900                     }
901                     if (!check)
902                         goto giveup;
903                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
904                                 "Looking for %s substr starting at offset %ld...\n",
905                                  what, (long)(s + start_shift - i_strpos)) );
906                     goto restart;
907                 }
908                 /* Have both, check_string is floating */
909                 if (t + start_shift >= check_at) /* Contradicts floating=check */
910                     goto retry_floating_check;
911                 /* Recheck anchored substring, but not floating... */
912                 s = check_at;
913                 if (!check)
914                     goto giveup;
915                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
916                           "Looking for anchored substr starting at offset %ld...\n",
917                           (long)(other_last - i_strpos)) );
918                 goto do_other_anchored;
919             }
920             /* Another way we could have checked stclass at the
921                current position only: */
922             if (ml_anch) {
923                 s = t = t + 1;
924                 if (!check)
925                     goto giveup;
926                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
927                           "Looking for /%s^%s/m starting at offset %ld...\n",
928                           PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
929                 goto try_at_offset;
930             }
931             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
932                 goto fail;
933             /* Check is floating subtring. */
934           retry_floating_check:
935             t = check_at - start_shift;
936             DEBUG_EXECUTE_r( what = "floating" );
937             goto hop_and_restart;
938         }
939         if (t != s) {
940             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
941                         "By STCLASS: moving %ld --> %ld\n",
942                                   (long)(t - i_strpos), (long)(s - i_strpos))
943                    );
944         }
945         else {
946             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
947                                   "Does not contradict STCLASS...\n"); 
948                    );
949         }
950     }
951   giveup:
952     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
953                           PL_colors[4], (check ? "Guessed" : "Giving up"),
954                           PL_colors[5], (long)(s - i_strpos)) );
955     return s;
956
957   fail_finish:                          /* Substring not found */
958     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
959         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
960   fail:
961     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
962                           PL_colors[4],PL_colors[5]));
963     return Nullch;
964 }
965
966 /* We know what class REx starts with.  Try to find this position... */
967 STATIC char *
968 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
969 {
970         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
971         char *m;
972         STRLEN ln;
973         STRLEN lnc;
974         register STRLEN uskip;
975         unsigned int c1;
976         unsigned int c2;
977         char *e;
978         register I32 tmp = 1;   /* Scratch variable? */
979         register bool do_utf8 = PL_reg_match_utf8;
980
981         /* We know what class it must start with. */
982         switch (OP(c)) {
983         case ANYOF:
984             if (do_utf8) {
985                  while (s + (uskip = UTF8SKIP(s)) <= strend) {
986                       if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
987                           !UTF8_IS_INVARIANT((U8)s[0]) ?
988                           reginclass(c, (U8*)s, 0, do_utf8) :
989                           REGINCLASS(c, (U8*)s)) {
990                            if (tmp && (norun || regtry(prog, s)))
991                                 goto got_it;
992                            else
993                                 tmp = doevery;
994                       }
995                       else 
996                            tmp = 1;
997                       s += uskip;
998                  }
999             }
1000             else {
1001                  while (s < strend) {
1002                       STRLEN skip = 1;
1003
1004                       if (REGINCLASS(c, (U8*)s) ||
1005                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1006                            /* The assignment of 2 is intentional:
1007                             * for the folded sharp s, the skip is 2. */
1008                            (skip = SHARP_S_SKIP))) {
1009                            if (tmp && (norun || regtry(prog, s)))
1010                                 goto got_it;
1011                            else
1012                                 tmp = doevery;
1013                       }
1014                       else 
1015                            tmp = 1;
1016                       s += skip;
1017                  }
1018             }
1019             break;
1020         case CANY:
1021             while (s < strend) {
1022                 if (tmp && (norun || regtry(prog, s)))
1023                     goto got_it;
1024                 else
1025                     tmp = doevery;
1026                 s++;
1027             }
1028             break;
1029         case EXACTF:
1030             m   = STRING(c);
1031             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1032             lnc = (I32) ln;     /* length to match in characters */
1033             if (UTF) {
1034                 STRLEN ulen1, ulen2;
1035                 U8 *sm = (U8 *) m;
1036                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1037                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1038
1039                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1040                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1041
1042                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE, 
1043                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1044                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1045                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1046                 lnc = 0;
1047                 while (sm < ((U8 *) m + ln)) {
1048                     lnc++;
1049                     sm += UTF8SKIP(sm);
1050                 }
1051             }
1052             else {
1053                 c1 = *(U8*)m;
1054                 c2 = PL_fold[c1];
1055             }
1056             goto do_exactf;
1057         case EXACTFL:
1058             m   = STRING(c);
1059             ln  = STR_LEN(c);
1060             lnc = (I32) ln;
1061             c1 = *(U8*)m;
1062             c2 = PL_fold_locale[c1];
1063           do_exactf:
1064             e = HOP3c(strend, -((I32)lnc), s);
1065
1066             if (norun && e < s)
1067                 e = s;                  /* Due to minlen logic of intuit() */
1068
1069             /* The idea in the EXACTF* cases is to first find the
1070              * first character of the EXACTF* node and then, if
1071              * necessary, case-insensitively compare the full
1072              * text of the node.  The c1 and c2 are the first
1073              * characters (though in Unicode it gets a bit
1074              * more complicated because there are more cases
1075              * than just upper and lower: one needs to use
1076              * the so-called folding case for case-insensitive
1077              * matching (called "loose matching" in Unicode).
1078              * ibcmp_utf8() will do just that. */
1079
1080             if (do_utf8) {
1081                 UV c, f;
1082                 U8 tmpbuf [UTF8_MAXBYTES+1];
1083                 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1084                 STRLEN len, foldlen;
1085                 
1086                 if (c1 == c2) {
1087                     /* Upper and lower of 1st char are equal -
1088                      * probably not a "letter". */
1089                     while (s <= e) {
1090                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1091                                            ckWARN(WARN_UTF8) ?
1092                                            0 : UTF8_ALLOW_ANY);
1093                         if ( c == c1
1094                              && (ln == len ||
1095                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1096                                             m, (char **)0, ln, (bool)UTF))
1097                              && (norun || regtry(prog, s)) )
1098                             goto got_it;
1099                         else {
1100                              uvchr_to_utf8(tmpbuf, c);
1101                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1102                              if ( f != c
1103                                   && (f == c1 || f == c2)
1104                                   && (ln == foldlen ||
1105                                       !ibcmp_utf8((char *) foldbuf,
1106                                                   (char **)0, foldlen, do_utf8,
1107                                                   m,
1108                                                   (char **)0, ln, (bool)UTF))
1109                                   && (norun || regtry(prog, s)) )
1110                                   goto got_it;
1111                         }
1112                         s += len;
1113                     }
1114                 }
1115                 else {
1116                     while (s <= e) {
1117                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1118                                            ckWARN(WARN_UTF8) ?
1119                                            0 : UTF8_ALLOW_ANY);
1120
1121                         /* Handle some of the three Greek sigmas cases.
1122                          * Note that not all the possible combinations
1123                          * are handled here: some of them are handled
1124                          * by the standard folding rules, and some of
1125                          * them (the character class or ANYOF cases)
1126                          * are handled during compiletime in
1127                          * regexec.c:S_regclass(). */
1128                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1129                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1130                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1131
1132                         if ( (c == c1 || c == c2)
1133                              && (ln == len ||
1134                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1135                                             m, (char **)0, ln, (bool)UTF))
1136                              && (norun || regtry(prog, s)) )
1137                             goto got_it;
1138                         else {
1139                              uvchr_to_utf8(tmpbuf, c);
1140                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1141                              if ( f != c
1142                                   && (f == c1 || f == c2)
1143                                   && (ln == foldlen ||
1144                                       !ibcmp_utf8((char *) foldbuf,
1145                                                   (char **)0, foldlen, do_utf8,
1146                                                   m,
1147                                                   (char **)0, ln, (bool)UTF))
1148                                   && (norun || regtry(prog, s)) )
1149                                   goto got_it;
1150                         }
1151                         s += len;
1152                     }
1153                 }
1154             }
1155             else {
1156                 if (c1 == c2)
1157                     while (s <= e) {
1158                         if ( *(U8*)s == c1
1159                              && (ln == 1 || !(OP(c) == EXACTF
1160                                               ? ibcmp(s, m, ln)
1161                                               : ibcmp_locale(s, m, ln)))
1162                              && (norun || regtry(prog, s)) )
1163                             goto got_it;
1164                         s++;
1165                     }
1166                 else
1167                     while (s <= e) {
1168                         if ( (*(U8*)s == c1 || *(U8*)s == c2)
1169                              && (ln == 1 || !(OP(c) == EXACTF
1170                                               ? ibcmp(s, m, ln)
1171                                               : ibcmp_locale(s, m, ln)))
1172                              && (norun || regtry(prog, s)) )
1173                             goto got_it;
1174                         s++;
1175                     }
1176             }
1177             break;
1178         case BOUNDL:
1179             PL_reg_flags |= RF_tainted;
1180             /* FALL THROUGH */
1181         case BOUND:
1182             if (do_utf8) {
1183                 if (s == PL_bostr)
1184                     tmp = '\n';
1185                 else {
1186                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1187                 
1188                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1189                 }
1190                 tmp = ((OP(c) == BOUND ?
1191                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1192                 LOAD_UTF8_CHARCLASS(alnum,"a");
1193                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1194                     if (tmp == !(OP(c) == BOUND ?
1195                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1196                                  isALNUM_LC_utf8((U8*)s)))
1197                     {
1198                         tmp = !tmp;
1199                         if ((norun || regtry(prog, s)))
1200                             goto got_it;
1201                     }
1202                     s += uskip;
1203                 }
1204             }
1205             else {
1206                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1207                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1208                 while (s < strend) {
1209                     if (tmp ==
1210                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1211                         tmp = !tmp;
1212                         if ((norun || regtry(prog, s)))
1213                             goto got_it;
1214                     }
1215                     s++;
1216                 }
1217             }
1218             if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1219                 goto got_it;
1220             break;
1221         case NBOUNDL:
1222             PL_reg_flags |= RF_tainted;
1223             /* FALL THROUGH */
1224         case NBOUND:
1225             if (do_utf8) {
1226                 if (s == PL_bostr)
1227                     tmp = '\n';
1228                 else {
1229                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1230                 
1231                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1232                 }
1233                 tmp = ((OP(c) == NBOUND ?
1234                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1235                 LOAD_UTF8_CHARCLASS(alnum,"a");
1236                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1237                     if (tmp == !(OP(c) == NBOUND ?
1238                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1239                                  isALNUM_LC_utf8((U8*)s)))
1240                         tmp = !tmp;
1241                     else if ((norun || regtry(prog, s)))
1242                         goto got_it;
1243                     s += uskip;
1244                 }
1245             }
1246             else {
1247                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1248                 tmp = ((OP(c) == NBOUND ?
1249                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1250                 while (s < strend) {
1251                     if (tmp ==
1252                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1253                         tmp = !tmp;
1254                     else if ((norun || regtry(prog, s)))
1255                         goto got_it;
1256                     s++;
1257                 }
1258             }
1259             if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1260                 goto got_it;
1261             break;
1262         case ALNUM:
1263             if (do_utf8) {
1264                 LOAD_UTF8_CHARCLASS(alnum,"a");
1265                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1266                     if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1267                         if (tmp && (norun || regtry(prog, s)))
1268                             goto got_it;
1269                         else
1270                             tmp = doevery;
1271                     }
1272                     else
1273                         tmp = 1;
1274                     s += uskip;
1275                 }
1276             }
1277             else {
1278                 while (s < strend) {
1279                     if (isALNUM(*s)) {
1280                         if (tmp && (norun || regtry(prog, s)))
1281                             goto got_it;
1282                         else
1283                             tmp = doevery;
1284                     }
1285                     else
1286                         tmp = 1;
1287                     s++;
1288                 }
1289             }
1290             break;
1291         case ALNUML:
1292             PL_reg_flags |= RF_tainted;
1293             if (do_utf8) {
1294                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1295                     if (isALNUM_LC_utf8((U8*)s)) {
1296                         if (tmp && (norun || regtry(prog, s)))
1297                             goto got_it;
1298                         else
1299                             tmp = doevery;
1300                     }
1301                     else
1302                         tmp = 1;
1303                     s += uskip;
1304                 }
1305             }
1306             else {
1307                 while (s < strend) {
1308                     if (isALNUM_LC(*s)) {
1309                         if (tmp && (norun || regtry(prog, s)))
1310                             goto got_it;
1311                         else
1312                             tmp = doevery;
1313                     }
1314                     else
1315                         tmp = 1;
1316                     s++;
1317                 }
1318             }
1319             break;
1320         case NALNUM:
1321             if (do_utf8) {
1322                 LOAD_UTF8_CHARCLASS(alnum,"a");
1323                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1324                     if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1325                         if (tmp && (norun || regtry(prog, s)))
1326                             goto got_it;
1327                         else
1328                             tmp = doevery;
1329                     }
1330                     else
1331                         tmp = 1;
1332                     s += uskip;
1333                 }
1334             }
1335             else {
1336                 while (s < strend) {
1337                     if (!isALNUM(*s)) {
1338                         if (tmp && (norun || regtry(prog, s)))
1339                             goto got_it;
1340                         else
1341                             tmp = doevery;
1342                     }
1343                     else
1344                         tmp = 1;
1345                     s++;
1346                 }
1347             }
1348             break;
1349         case NALNUML:
1350             PL_reg_flags |= RF_tainted;
1351             if (do_utf8) {
1352                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1353                     if (!isALNUM_LC_utf8((U8*)s)) {
1354                         if (tmp && (norun || regtry(prog, s)))
1355                             goto got_it;
1356                         else
1357                             tmp = doevery;
1358                     }
1359                     else
1360                         tmp = 1;
1361                     s += uskip;
1362                 }
1363             }
1364             else {
1365                 while (s < strend) {
1366                     if (!isALNUM_LC(*s)) {
1367                         if (tmp && (norun || regtry(prog, s)))
1368                             goto got_it;
1369                         else
1370                             tmp = doevery;
1371                     }
1372                     else
1373                         tmp = 1;
1374                     s++;
1375                 }
1376             }
1377             break;
1378         case SPACE:
1379             if (do_utf8) {
1380                 LOAD_UTF8_CHARCLASS(space," ");
1381                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1382                     if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1383                         if (tmp && (norun || regtry(prog, s)))
1384                             goto got_it;
1385                         else
1386                             tmp = doevery;
1387                     }
1388                     else
1389                         tmp = 1;
1390                     s += uskip;
1391                 }
1392             }
1393             else {
1394                 while (s < strend) {
1395                     if (isSPACE(*s)) {
1396                         if (tmp && (norun || regtry(prog, s)))
1397                             goto got_it;
1398                         else
1399                             tmp = doevery;
1400                     }
1401                     else
1402                         tmp = 1;
1403                     s++;
1404                 }
1405             }
1406             break;
1407         case SPACEL:
1408             PL_reg_flags |= RF_tainted;
1409             if (do_utf8) {
1410                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1411                     if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1412                         if (tmp && (norun || regtry(prog, s)))
1413                             goto got_it;
1414                         else
1415                             tmp = doevery;
1416                     }
1417                     else
1418                         tmp = 1;
1419                     s += uskip;
1420                 }
1421             }
1422             else {
1423                 while (s < strend) {
1424                     if (isSPACE_LC(*s)) {
1425                         if (tmp && (norun || regtry(prog, s)))
1426                             goto got_it;
1427                         else
1428                             tmp = doevery;
1429                     }
1430                     else
1431                         tmp = 1;
1432                     s++;
1433                 }
1434             }
1435             break;
1436         case NSPACE:
1437             if (do_utf8) {
1438                 LOAD_UTF8_CHARCLASS(space," ");
1439                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1440                     if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1441                         if (tmp && (norun || regtry(prog, s)))
1442                             goto got_it;
1443                         else
1444                             tmp = doevery;
1445                     }
1446                     else
1447                         tmp = 1;
1448                     s += uskip;
1449                 }
1450             }
1451             else {
1452                 while (s < strend) {
1453                     if (!isSPACE(*s)) {
1454                         if (tmp && (norun || regtry(prog, s)))
1455                             goto got_it;
1456                         else
1457                             tmp = doevery;
1458                     }
1459                     else
1460                         tmp = 1;
1461                     s++;
1462                 }
1463             }
1464             break;
1465         case NSPACEL:
1466             PL_reg_flags |= RF_tainted;
1467             if (do_utf8) {
1468                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1469                     if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1470                         if (tmp && (norun || regtry(prog, s)))
1471                             goto got_it;
1472                         else
1473                             tmp = doevery;
1474                     }
1475                     else
1476                         tmp = 1;
1477                     s += uskip;
1478                 }
1479             }
1480             else {
1481                 while (s < strend) {
1482                     if (!isSPACE_LC(*s)) {
1483                         if (tmp && (norun || regtry(prog, s)))
1484                             goto got_it;
1485                         else
1486                             tmp = doevery;
1487                     }
1488                     else
1489                         tmp = 1;
1490                     s++;
1491                 }
1492             }
1493             break;
1494         case DIGIT:
1495             if (do_utf8) {
1496                 LOAD_UTF8_CHARCLASS(digit,"0");
1497                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1498                     if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1499                         if (tmp && (norun || regtry(prog, s)))
1500                             goto got_it;
1501                         else
1502                             tmp = doevery;
1503                     }
1504                     else
1505                         tmp = 1;
1506                     s += uskip;
1507                 }
1508             }
1509             else {
1510                 while (s < strend) {
1511                     if (isDIGIT(*s)) {
1512                         if (tmp && (norun || regtry(prog, s)))
1513                             goto got_it;
1514                         else
1515                             tmp = doevery;
1516                     }
1517                     else
1518                         tmp = 1;
1519                     s++;
1520                 }
1521             }
1522             break;
1523         case DIGITL:
1524             PL_reg_flags |= RF_tainted;
1525             if (do_utf8) {
1526                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1527                     if (isDIGIT_LC_utf8((U8*)s)) {
1528                         if (tmp && (norun || regtry(prog, s)))
1529                             goto got_it;
1530                         else
1531                             tmp = doevery;
1532                     }
1533                     else
1534                         tmp = 1;
1535                     s += uskip;
1536                 }
1537             }
1538             else {
1539                 while (s < strend) {
1540                     if (isDIGIT_LC(*s)) {
1541                         if (tmp && (norun || regtry(prog, s)))
1542                             goto got_it;
1543                         else
1544                             tmp = doevery;
1545                     }
1546                     else
1547                         tmp = 1;
1548                     s++;
1549                 }
1550             }
1551             break;
1552         case NDIGIT:
1553             if (do_utf8) {
1554                 LOAD_UTF8_CHARCLASS(digit,"0");
1555                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1556                     if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1557                         if (tmp && (norun || regtry(prog, s)))
1558                             goto got_it;
1559                         else
1560                             tmp = doevery;
1561                     }
1562                     else
1563                         tmp = 1;
1564                     s += uskip;
1565                 }
1566             }
1567             else {
1568                 while (s < strend) {
1569                     if (!isDIGIT(*s)) {
1570                         if (tmp && (norun || regtry(prog, s)))
1571                             goto got_it;
1572                         else
1573                             tmp = doevery;
1574                     }
1575                     else
1576                         tmp = 1;
1577                     s++;
1578                 }
1579             }
1580             break;
1581         case NDIGITL:
1582             PL_reg_flags |= RF_tainted;
1583             if (do_utf8) {
1584                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1585                     if (!isDIGIT_LC_utf8((U8*)s)) {
1586                         if (tmp && (norun || regtry(prog, s)))
1587                             goto got_it;
1588                         else
1589                             tmp = doevery;
1590                     }
1591                     else
1592                         tmp = 1;
1593                     s += uskip;
1594                 }
1595             }
1596             else {
1597                 while (s < strend) {
1598                     if (!isDIGIT_LC(*s)) {
1599                         if (tmp && (norun || regtry(prog, s)))
1600                             goto got_it;
1601                         else
1602                             tmp = doevery;
1603                     }
1604                     else
1605                         tmp = 1;
1606                     s++;
1607                 }
1608             }
1609             break;
1610         default:
1611             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1612             break;
1613         }
1614         return 0;
1615       got_it:
1616         return s;
1617 }
1618
1619 /*
1620  - regexec_flags - match a regexp against a string
1621  */
1622 I32
1623 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1624               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1625 /* strend: pointer to null at end of string */
1626 /* strbeg: real beginning of string */
1627 /* minend: end of match must be >=minend after stringarg. */
1628 /* data: May be used for some additional optimizations. */
1629 /* nosave: For optimizations. */
1630 {
1631     register char *s;
1632     register regnode *c;
1633     register char *startpos = stringarg;
1634     I32 minlen;         /* must match at least this many chars */
1635     I32 dontbother = 0; /* how many characters not to try at end */
1636     /* I32 start_shift = 0; */          /* Offset of the start to find
1637                                          constant substr. */            /* CC */
1638     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1639     I32 scream_pos = -1;                /* Internal iterator of scream. */
1640     char *scream_olds;
1641     SV* oreplsv = GvSV(PL_replgv);
1642     bool do_utf8 = DO_UTF8(sv);
1643     I32 multiline = prog->reganch & PMf_MULTILINE;
1644 #ifdef DEBUGGING
1645     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1646     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1647 #endif
1648
1649     GET_RE_DEBUG_FLAGS_DECL;
1650
1651     RX_MATCH_UTF8_set(prog,do_utf8);
1652
1653     PL_regcc = 0;
1654
1655     cache_re(prog);
1656 #ifdef DEBUGGING
1657     PL_regnarrate = DEBUG_r_TEST;
1658 #endif
1659
1660     /* Be paranoid... */
1661     if (prog == NULL || startpos == NULL) {
1662         Perl_croak(aTHX_ "NULL regexp parameter");
1663         return 0;
1664     }
1665
1666     minlen = prog->minlen;
1667     if (strend - startpos < minlen) {
1668         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1669                               "String too short [regexec_flags]...\n"));
1670         goto phooey;
1671     }
1672
1673     /* Check validity of program. */
1674     if (UCHARAT(prog->program) != REG_MAGIC) {
1675         Perl_croak(aTHX_ "corrupted regexp program");
1676     }
1677
1678     PL_reg_flags = 0;
1679     PL_reg_eval_set = 0;
1680     PL_reg_maxiter = 0;
1681
1682     if (prog->reganch & ROPT_UTF8)
1683         PL_reg_flags |= RF_utf8;
1684
1685     /* Mark beginning of line for ^ and lookbehind. */
1686     PL_regbol = startpos;
1687     PL_bostr  = strbeg;
1688     PL_reg_sv = sv;
1689
1690     /* Mark end of line for $ (and such) */
1691     PL_regeol = strend;
1692
1693     /* see how far we have to get to not match where we matched before */
1694     PL_regtill = startpos+minend;
1695
1696     /* We start without call_cc context.  */
1697     PL_reg_call_cc = 0;
1698
1699     /* If there is a "must appear" string, look for it. */
1700     s = startpos;
1701
1702     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1703         MAGIC *mg;
1704
1705         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1706             PL_reg_ganch = startpos;
1707         else if (sv && SvTYPE(sv) >= SVt_PVMG
1708                   && SvMAGIC(sv)
1709                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1710                   && mg->mg_len >= 0) {
1711             PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1712             if (prog->reganch & ROPT_ANCH_GPOS) {
1713                 if (s > PL_reg_ganch)
1714                     goto phooey;
1715                 s = PL_reg_ganch;
1716             }
1717         }
1718         else                            /* pos() not defined */
1719             PL_reg_ganch = strbeg;
1720     }
1721
1722     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1723         re_scream_pos_data d;
1724
1725         d.scream_olds = &scream_olds;
1726         d.scream_pos = &scream_pos;
1727         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1728         if (!s) {
1729             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1730             goto phooey;        /* not present */
1731         }
1732     }
1733
1734     DEBUG_EXECUTE_r({
1735          char *s0   = UTF ?
1736            pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1737                           UNI_DISPLAY_REGEX) :
1738            prog->precomp;
1739          int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1740          char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1741                                                UNI_DISPLAY_REGEX) : startpos;
1742          int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1743          if (!PL_colorset)
1744              reginitcolors();
1745          PerlIO_printf(Perl_debug_log,
1746                        "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1747                        PL_colors[4],PL_colors[5],PL_colors[0],
1748                        len0, len0, s0,
1749                        PL_colors[1],
1750                        len0 > 60 ? "..." : "",
1751                        PL_colors[0],
1752                        (int)(len1 > 60 ? 60 : len1),
1753                        s1, PL_colors[1],
1754                        (len1 > 60 ? "..." : "")
1755               );
1756     });
1757
1758     /* Simplest case:  anchored match need be tried only once. */
1759     /*  [unless only anchor is BOL and multiline is set] */
1760     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1761         if (s == startpos && regtry(prog, startpos))
1762             goto got_it;
1763         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1764                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1765         {
1766             char *end;
1767
1768             if (minlen)
1769                 dontbother = minlen - 1;
1770             end = HOP3c(strend, -dontbother, strbeg) - 1;
1771             /* for multiline we only have to try after newlines */
1772             if (prog->check_substr || prog->check_utf8) {
1773                 if (s == startpos)
1774                     goto after_try;
1775                 while (1) {
1776                     if (regtry(prog, s))
1777                         goto got_it;
1778                   after_try:
1779                     if (s >= end)
1780                         goto phooey;
1781                     if (prog->reganch & RE_USE_INTUIT) {
1782                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1783                         if (!s)
1784                             goto phooey;
1785                     }
1786                     else
1787                         s++;
1788                 }               
1789             } else {
1790                 if (s > startpos)
1791                     s--;
1792                 while (s < end) {
1793                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1794                         if (regtry(prog, s))
1795                             goto got_it;
1796                     }
1797                 }               
1798             }
1799         }
1800         goto phooey;
1801     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1802         if (regtry(prog, PL_reg_ganch))
1803             goto got_it;
1804         goto phooey;
1805     }
1806
1807     /* Messy cases:  unanchored match. */
1808     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1809         /* we have /x+whatever/ */
1810         /* it must be a one character string (XXXX Except UTF?) */
1811         char ch;
1812 #ifdef DEBUGGING
1813         int did_match = 0;
1814 #endif
1815         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1816             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1817         ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1818
1819         if (do_utf8) {
1820             while (s < strend) {
1821                 if (*s == ch) {
1822                     DEBUG_EXECUTE_r( did_match = 1 );
1823                     if (regtry(prog, s)) goto got_it;
1824                     s += UTF8SKIP(s);
1825                     while (s < strend && *s == ch)
1826                         s += UTF8SKIP(s);
1827                 }
1828                 s += UTF8SKIP(s);
1829             }
1830         }
1831         else {
1832             while (s < strend) {
1833                 if (*s == ch) {
1834                     DEBUG_EXECUTE_r( did_match = 1 );
1835                     if (regtry(prog, s)) goto got_it;
1836                     s++;
1837                     while (s < strend && *s == ch)
1838                         s++;
1839                 }
1840                 s++;
1841             }
1842         }
1843         DEBUG_EXECUTE_r(if (!did_match)
1844                 PerlIO_printf(Perl_debug_log,
1845                                   "Did not find anchored character...\n")
1846                );
1847     }
1848     /*SUPPRESS 560*/
1849     else if (prog->anchored_substr != Nullsv
1850               || prog->anchored_utf8 != Nullsv
1851               || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1852                   && prog->float_max_offset < strend - s)) {
1853         SV *must;
1854         I32 back_max;
1855         I32 back_min;
1856         char *last;
1857         char *last1;            /* Last position checked before */
1858 #ifdef DEBUGGING
1859         int did_match = 0;
1860 #endif
1861         if (prog->anchored_substr || prog->anchored_utf8) {
1862             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1863                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1865             back_max = back_min = prog->anchored_offset;
1866         } else {
1867             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1868                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1869             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1870             back_max = prog->float_max_offset;
1871             back_min = prog->float_min_offset;
1872         }
1873         if (must == &PL_sv_undef)
1874             /* could not downgrade utf8 check substring, so must fail */
1875             goto phooey;
1876
1877         last = HOP3c(strend,    /* Cannot start after this */
1878                           -(I32)(CHR_SVLEN(must)
1879                                  - (SvTAIL(must) != 0) + back_min), strbeg);
1880
1881         if (s > PL_bostr)
1882             last1 = HOPc(s, -1);
1883         else
1884             last1 = s - 1;      /* bogus */
1885
1886         /* XXXX check_substr already used to find `s', can optimize if
1887            check_substr==must. */
1888         scream_pos = -1;
1889         dontbother = end_shift;
1890         strend = HOPc(strend, -dontbother);
1891         while ( (s <= last) &&
1892                 ((flags & REXEC_SCREAM)
1893                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1894                                     end_shift, &scream_pos, 0))
1895                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1896                                   (unsigned char*)strend, must,
1897                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1898             /* we may be pointing at the wrong string */
1899             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1900                 s = strbeg + (s - SvPVX(sv));
1901             DEBUG_EXECUTE_r( did_match = 1 );
1902             if (HOPc(s, -back_max) > last1) {
1903                 last1 = HOPc(s, -back_min);
1904                 s = HOPc(s, -back_max);
1905             }
1906             else {
1907                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1908
1909                 last1 = HOPc(s, -back_min);
1910                 s = t;          
1911             }
1912             if (do_utf8) {
1913                 while (s <= last1) {
1914                     if (regtry(prog, s))
1915                         goto got_it;
1916                     s += UTF8SKIP(s);
1917                 }
1918             }
1919             else {
1920                 while (s <= last1) {
1921                     if (regtry(prog, s))
1922                         goto got_it;
1923                     s++;
1924                 }
1925             }
1926         }
1927         DEBUG_EXECUTE_r(if (!did_match)
1928                     PerlIO_printf(Perl_debug_log, 
1929                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1930                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1931                                ? "anchored" : "floating"),
1932                               PL_colors[0],
1933                               (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1934                               SvPVX(must),
1935                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1936                );
1937         goto phooey;
1938     }
1939     else if ((c = prog->regstclass)) {
1940         if (minlen) {
1941             I32 op = (U8)OP(prog->regstclass);
1942             /* don't bother with what can't match */
1943             if (PL_regkind[op] != EXACT && op != CANY)
1944                 strend = HOPc(strend, -(minlen - 1));
1945         }
1946         DEBUG_EXECUTE_r({
1947             SV *prop = sv_newmortal();
1948             char *s0;
1949             char *s1;
1950             int len0;
1951             int len1;
1952
1953             regprop(prop, c);
1954             s0 = UTF ?
1955               pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1956                              UNI_DISPLAY_REGEX) :
1957               SvPVX(prop);
1958             len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1959             s1 = UTF ?
1960               sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1961             len1 = UTF ? SvCUR(dsv1) : strend - s;
1962             PerlIO_printf(Perl_debug_log,
1963                           "Matching stclass `%*.*s' against `%*.*s'\n",
1964                           len0, len0, s0,
1965                           len1, len1, s1);
1966         });
1967         if (find_byclass(prog, c, s, strend, startpos, 0))
1968             goto got_it;
1969         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1970     }
1971     else {
1972         dontbother = 0;
1973         if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1974             /* Trim the end. */
1975             char *last;
1976             SV* float_real;
1977
1978             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1979                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1980             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1981
1982             if (flags & REXEC_SCREAM) {
1983                 last = screaminstr(sv, float_real, s - strbeg,
1984                                    end_shift, &scream_pos, 1); /* last one */
1985                 if (!last)
1986                     last = scream_olds; /* Only one occurrence. */
1987                 /* we may be pointing at the wrong string */
1988                 else if (RX_MATCH_COPIED(prog))
1989                     s = strbeg + (s - SvPVX(sv));
1990             }
1991             else {
1992                 STRLEN len;
1993                 char *little = SvPV(float_real, len);
1994
1995                 if (SvTAIL(float_real)) {
1996                     if (memEQ(strend - len + 1, little, len - 1))
1997                         last = strend - len + 1;
1998                     else if (!multiline)
1999                         last = memEQ(strend - len, little, len)
2000                             ? strend - len : Nullch;
2001                     else
2002                         goto find_last;
2003                 } else {
2004                   find_last:
2005                     if (len)
2006                         last = rninstr(s, strend, little, little + len);
2007                     else
2008                         last = strend;  /* matching `$' */
2009                 }
2010             }
2011             if (last == NULL) {
2012                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2013                                       "%sCan't trim the tail, match fails (should not happen)%s\n",
2014                                       PL_colors[4],PL_colors[5]));
2015                 goto phooey; /* Should not happen! */
2016             }
2017             dontbother = strend - last + prog->float_min_offset;
2018         }
2019         if (minlen && (dontbother < minlen))
2020             dontbother = minlen - 1;
2021         strend -= dontbother;              /* this one's always in bytes! */
2022         /* We don't know much -- general case. */
2023         if (do_utf8) {
2024             for (;;) {
2025                 if (regtry(prog, s))
2026                     goto got_it;
2027                 if (s >= strend)
2028                     break;
2029                 s += UTF8SKIP(s);
2030             };
2031         }
2032         else {
2033             do {
2034                 if (regtry(prog, s))
2035                     goto got_it;
2036             } while (s++ < strend);
2037         }
2038     }
2039
2040     /* Failure. */
2041     goto phooey;
2042
2043 got_it:
2044     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2045
2046     if (PL_reg_eval_set) {
2047         /* Preserve the current value of $^R */
2048         if (oreplsv != GvSV(PL_replgv))
2049             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2050                                                   restored, the value remains
2051                                                   the same. */
2052         restore_pos(aTHX_ 0);
2053     }
2054
2055     /* make sure $`, $&, $', and $digit will work later */
2056     if ( !(flags & REXEC_NOT_FIRST) ) {
2057         RX_MATCH_COPY_FREE(prog);
2058         if (flags & REXEC_COPY_STR) {
2059             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2060 #ifdef PERL_COPY_ON_WRITE
2061             if ((SvIsCOW(sv)
2062                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2063                 if (DEBUG_C_TEST) {
2064                     PerlIO_printf(Perl_debug_log,
2065                                   "Copy on write: regexp capture, type %d\n",
2066                                   (int) SvTYPE(sv));
2067                 }
2068                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2069                 prog->subbeg = SvPVX(prog->saved_copy);
2070                 assert (SvPOKp(prog->saved_copy));
2071             } else
2072 #endif
2073             {
2074                 RX_MATCH_COPIED_on(prog);
2075                 s = savepvn(strbeg, i);
2076                 prog->subbeg = s;
2077             }
2078             prog->sublen = i;
2079         }
2080         else {
2081             prog->subbeg = strbeg;
2082             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2083         }
2084     }
2085
2086     return 1;
2087
2088 phooey:
2089     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2090                           PL_colors[4],PL_colors[5]));
2091     if (PL_reg_eval_set)
2092         restore_pos(aTHX_ 0);
2093     return 0;
2094 }
2095
2096 /*
2097  - regtry - try match at specific point
2098  */
2099 STATIC I32                      /* 0 failure, 1 success */
2100 S_regtry(pTHX_ regexp *prog, char *startpos)
2101 {
2102     register I32 i;
2103     register I32 *sp;
2104     register I32 *ep;
2105     CHECKPOINT lastcp;
2106     GET_RE_DEBUG_FLAGS_DECL;
2107
2108 #ifdef DEBUGGING
2109     PL_regindent = 0;   /* XXXX Not good when matches are reenterable... */
2110 #endif
2111     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2112         MAGIC *mg;
2113
2114         PL_reg_eval_set = RS_init;
2115         DEBUG_EXECUTE_r(DEBUG_s(
2116             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2117                           (IV)(PL_stack_sp - PL_stack_base));
2118             ));
2119         SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2120         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2121         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2122         SAVETMPS;
2123         /* Apparently this is not needed, judging by wantarray. */
2124         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2125            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2126
2127         if (PL_reg_sv) {
2128             /* Make $_ available to executed code. */
2129             if (PL_reg_sv != DEFSV) {
2130                 SAVE_DEFSV;
2131                 DEFSV = PL_reg_sv;
2132             }
2133         
2134             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2135                   && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2136                 /* prepare for quick setting of pos */
2137                 sv_magic(PL_reg_sv, (SV*)0,
2138                         PERL_MAGIC_regex_global, Nullch, 0);
2139                 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2140                 mg->mg_len = -1;
2141             }
2142             PL_reg_magic    = mg;
2143             PL_reg_oldpos   = mg->mg_len;
2144             SAVEDESTRUCTOR_X(restore_pos, 0);
2145         }
2146         if (!PL_reg_curpm) {
2147             Newz(22,PL_reg_curpm, 1, PMOP);
2148 #ifdef USE_ITHREADS
2149             {
2150                 SV* repointer = newSViv(0);
2151                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2152                 SvFLAGS(repointer) |= SVf_BREAK;
2153                 av_push(PL_regex_padav,repointer);
2154                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2155                 PL_regex_pad = AvARRAY(PL_regex_padav);
2156             }
2157 #endif      
2158         }
2159         PM_SETRE(PL_reg_curpm, prog);
2160         PL_reg_oldcurpm = PL_curpm;
2161         PL_curpm = PL_reg_curpm;
2162         if (RX_MATCH_COPIED(prog)) {
2163             /*  Here is a serious problem: we cannot rewrite subbeg,
2164                 since it may be needed if this match fails.  Thus
2165                 $` inside (?{}) could fail... */
2166             PL_reg_oldsaved = prog->subbeg;
2167             PL_reg_oldsavedlen = prog->sublen;
2168 #ifdef PERL_COPY_ON_WRITE
2169             PL_nrs = prog->saved_copy;
2170 #endif
2171             RX_MATCH_COPIED_off(prog);
2172         }
2173         else
2174             PL_reg_oldsaved = Nullch;
2175         prog->subbeg = PL_bostr;
2176         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2177     }
2178     prog->startp[0] = startpos - PL_bostr;
2179     PL_reginput = startpos;
2180     PL_regstartp = prog->startp;
2181     PL_regendp = prog->endp;
2182     PL_reglastparen = &prog->lastparen;
2183     PL_reglastcloseparen = &prog->lastcloseparen;
2184     prog->lastparen = 0;
2185     prog->lastcloseparen = 0;
2186     PL_regsize = 0;
2187     DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2188     if (PL_reg_start_tmpl <= prog->nparens) {
2189         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2190         if(PL_reg_start_tmp)
2191             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2192         else
2193             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2194     }
2195
2196     /* XXXX What this code is doing here?!!!  There should be no need
2197        to do this again and again, PL_reglastparen should take care of
2198        this!  --ilya*/
2199
2200     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2201      * Actually, the code in regcppop() (which Ilya may be meaning by
2202      * PL_reglastparen), is not needed at all by the test suite
2203      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2204      * enough, for building DynaLoader, or otherwise this
2205      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2206      * will happen.  Meanwhile, this code *is* needed for the
2207      * above-mentioned test suite tests to succeed.  The common theme
2208      * on those tests seems to be returning null fields from matches.
2209      * --jhi */
2210 #if 1
2211     sp = prog->startp;
2212     ep = prog->endp;
2213     if (prog->nparens) {
2214         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2215             *++sp = -1;
2216             *++ep = -1;
2217         }
2218     }
2219 #endif
2220     REGCP_SET(lastcp);
2221     if (regmatch(prog->program + 1)) {
2222         prog->endp[0] = PL_reginput - PL_bostr;
2223         return 1;
2224     }
2225     REGCP_UNWIND(lastcp);
2226     return 0;
2227 }
2228
2229 #define RE_UNWIND_BRANCH        1
2230 #define RE_UNWIND_BRANCHJ       2
2231
2232 union re_unwind_t;
2233
2234 typedef struct {                /* XX: makes sense to enlarge it... */
2235     I32 type;
2236     I32 prev;
2237     CHECKPOINT lastcp;
2238 } re_unwind_generic_t;
2239
2240 typedef struct {
2241     I32 type;
2242     I32 prev;
2243     CHECKPOINT lastcp;
2244     I32 lastparen;
2245     regnode *next;
2246     char *locinput;
2247     I32 nextchr;
2248 #ifdef DEBUGGING
2249     int regindent;
2250 #endif
2251 } re_unwind_branch_t;
2252
2253 typedef union re_unwind_t {
2254     I32 type;
2255     re_unwind_generic_t generic;
2256     re_unwind_branch_t branch;
2257 } re_unwind_t;
2258
2259 #define sayYES goto yes
2260 #define sayNO goto no
2261 #define sayNO_ANYOF goto no_anyof
2262 #define sayYES_FINAL goto yes_final
2263 #define sayYES_LOUD  goto yes_loud
2264 #define sayNO_FINAL  goto no_final
2265 #define sayNO_SILENT goto do_no
2266 #define saySAME(x) if (x) goto yes; else goto no
2267
2268 /* this is used to determine how far from the left messages like
2269    'failed...' are printed. Currently 29 makes these messages line
2270    up with the opcode they refer to. Earlier perls used 25 which
2271    left these messages outdented making reviewing a debug output
2272    quite difficult.
2273 */
2274 #define REPORT_CODE_OFF 29
2275
2276
2277 /* Make sure there is a test for this +1 options in re_tests */
2278 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2279
2280 #define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START {                       \
2281     if ( trie->states[ state ].wordnum ) {                               \
2282         if ( !accepted ) {                                               \
2283             ENTER;                                                       \
2284             SAVETMPS;                                                    \
2285             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ;                       \
2286             sv_accept_buff=NEWSV( 1234,                                  \
2287               bufflen * sizeof(reg_trie_accepted) - 1 );                 \
2288             SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) );      \
2289             SvPOK_on( sv_accept_buff );                                  \
2290             sv_2mortal( sv_accept_buff );                                \
2291             accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2292         } else {                                                         \
2293             if ( accepted >= bufflen ) {                                 \
2294                 bufflen *= 2;                                            \
2295                 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2296                     bufflen * sizeof(reg_trie_accepted) );               \
2297             }                                                            \
2298             SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff )            \
2299                 + sizeof( reg_trie_accepted ) );                         \
2300         }                                                                \
2301         accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2302         accept_buff[ accepted ].endpos = uc;                             \
2303         ++accepted;                                                      \
2304     } } STMT_END
2305
2306 #define TRIE_HANDLE_CHAR STMT_START {                                   \
2307         if ( uvc < 256 ) {                                              \
2308             charid = trie->charmap[ uvc ];                              \
2309         } else {                                                        \
2310             charid = 0;                                                 \
2311             if( trie->widecharmap ) {                                   \
2312             SV** svpp = (SV**)NULL;                                     \
2313             svpp = hv_fetch( trie->widecharmap, (char*)&uvc,            \
2314                           sizeof( UV ), 0 );                            \
2315             if ( svpp ) {                                               \
2316                 charid = (U16)SvIV( *svpp );                            \
2317                 }                                                       \
2318             }                                                           \
2319         }                                                               \
2320         if ( charid &&                                                  \
2321              ( base + charid > trie->uniquecharcount ) &&               \
2322              ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
2323              trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2324         {                                                               \
2325             state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next;     \
2326         } else {                                                        \
2327             state = 0;                                                  \
2328         }                                                               \
2329         uc += len;                                                      \
2330     } STMT_END
2331
2332 /*
2333  - regmatch - main matching routine
2334  *
2335  * Conceptually the strategy is simple:  check to see whether the current
2336  * node matches, call self recursively to see whether the rest matches,
2337  * and then act accordingly.  In practice we make some effort to avoid
2338  * recursion, in particular by going through "ordinary" nodes (that don't
2339  * need to know whether the rest of the match failed) by a loop instead of
2340  * by recursion.
2341  */
2342 /* [lwall] I've hoisted the register declarations to the outer block in order to
2343  * maybe save a little bit of pushing and popping on the stack.  It also takes
2344  * advantage of machines that use a register save mask on subroutine entry.
2345  */
2346 STATIC I32                      /* 0 failure, 1 success */
2347 S_regmatch(pTHX_ regnode *prog)
2348 {
2349     register regnode *scan;     /* Current node. */
2350     regnode *next;              /* Next node. */
2351     regnode *inner;             /* Next node in internal branch. */
2352     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2353                                    function of same name */
2354     register I32 n;             /* no or next */
2355     register I32 ln = 0;        /* len or last */
2356     register char *s = Nullch;  /* operand or save */
2357     register char *locinput = PL_reginput;
2358     register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2359     int minmod = 0, sw = 0, logical = 0;
2360     I32 unwind = 0;
2361
2362     /* used by the trie code */
2363     SV                 *sv_accept_buff;  /* accepting states we have traversed */
2364     reg_trie_accepted  *accept_buff;     /* "" */
2365     reg_trie_data      *trie;            /* what trie are we using right now */
2366     U32 accepted = 0;                    /* how many accepting states we have seen*/
2367
2368 #if 0
2369     I32 firstcp = PL_savestack_ix;
2370 #endif
2371     register bool do_utf8 = PL_reg_match_utf8;
2372 #ifdef DEBUGGING
2373     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2374     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2375     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2376
2377     SV *re_debug_flags;
2378 #endif
2379
2380     GET_RE_DEBUG_FLAGS;
2381
2382 #ifdef DEBUGGING
2383     PL_regindent++;
2384 #endif
2385
2386
2387     /* Note that nextchr is a byte even in UTF */
2388     nextchr = UCHARAT(locinput);
2389     scan = prog;
2390     while (scan != NULL) {
2391
2392         DEBUG_EXECUTE_r( {
2393             SV *prop = sv_newmortal();
2394             int docolor = *PL_colors[0];
2395             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2396             int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2397             /* The part of the string before starttry has one color
2398                (pref0_len chars), between starttry and current
2399                position another one (pref_len - pref0_len chars),
2400                after the current position the third one.
2401                We assume that pref0_len <= pref_len, otherwise we
2402                decrease pref0_len.  */
2403             int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2404                 ? (5 + taill) - l : locinput - PL_bostr;
2405             int pref0_len;
2406
2407             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2408                 pref_len++;
2409             pref0_len = pref_len  - (locinput - PL_reg_starttry);
2410             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2411                 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2412                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2413             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2414                 l--;
2415             if (pref0_len < 0)
2416                 pref0_len = 0;
2417             if (pref0_len > pref_len)
2418                 pref0_len = pref_len;
2419             regprop(prop, scan);
2420             {
2421               char *s0 =
2422                 do_utf8 && OP(scan) != CANY ?
2423                 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2424                                pref0_len, 60, UNI_DISPLAY_REGEX) :
2425                 locinput - pref_len;
2426               int len0 = do_utf8 ? strlen(s0) : pref0_len;
2427               char *s1 = do_utf8 && OP(scan) != CANY ?
2428                 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2429                                pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2430                 locinput - pref_len + pref0_len;
2431               int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2432               char *s2 = do_utf8 && OP(scan) != CANY ?
2433                 pv_uni_display(dsv2, (U8*)locinput,
2434                                PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2435                 locinput;
2436               int len2 = do_utf8 ? strlen(s2) : l;
2437               PerlIO_printf(Perl_debug_log,
2438                             "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2439                             (IV)(locinput - PL_bostr),
2440                             PL_colors[4],
2441                             len0, s0,
2442                             PL_colors[5],
2443                             PL_colors[2],
2444                             len1, s1,
2445                             PL_colors[3],
2446                             (docolor ? "" : "> <"),
2447                             PL_colors[0],
2448                             len2, s2,
2449                             PL_colors[1],
2450                             15 - l - pref_len + 1,
2451                             "",
2452                             (IV)(scan - PL_regprogram), PL_regindent*2, "",
2453                             SvPVX(prop));
2454             }
2455         });
2456
2457         next = scan + NEXT_OFF(scan);
2458         if (next == scan)
2459             next = NULL;
2460
2461         switch (OP(scan)) {
2462         case BOL:
2463             if (locinput == PL_bostr)
2464             {
2465                 /* regtill = regbol; */
2466                 break;
2467             }
2468             sayNO;
2469         case MBOL:
2470             if (locinput == PL_bostr ||
2471                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2472             {
2473                 break;
2474             }
2475             sayNO;
2476         case SBOL:
2477             if (locinput == PL_bostr)
2478                 break;
2479             sayNO;
2480         case GPOS:
2481             if (locinput == PL_reg_ganch)
2482                 break;
2483             sayNO;
2484         case EOL:
2485                 goto seol;
2486         case MEOL:
2487             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2488                 sayNO;
2489             break;
2490         case SEOL:
2491           seol:
2492             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2493                 sayNO;
2494             if (PL_regeol - locinput > 1)
2495                 sayNO;
2496             break;
2497         case EOS:
2498             if (PL_regeol != locinput)
2499                 sayNO;
2500             break;
2501         case SANY:
2502             if (!nextchr && locinput >= PL_regeol)
2503                 sayNO;
2504             if (do_utf8) {
2505                 locinput += PL_utf8skip[nextchr];
2506                 if (locinput > PL_regeol)
2507                     sayNO;
2508                 nextchr = UCHARAT(locinput);
2509             }
2510             else
2511                 nextchr = UCHARAT(++locinput);
2512             break;
2513         case CANY:
2514             if (!nextchr && locinput >= PL_regeol)
2515                 sayNO;
2516             nextchr = UCHARAT(++locinput);
2517             break;
2518         case REG_ANY:
2519             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2520                 sayNO;
2521             if (do_utf8) {
2522                 locinput += PL_utf8skip[nextchr];
2523                 if (locinput > PL_regeol)
2524                     sayNO;
2525                 nextchr = UCHARAT(locinput);
2526             }
2527             else
2528                 nextchr = UCHARAT(++locinput);
2529             break;
2530
2531
2532
2533         /*
2534            traverse the TRIE keeping track of all accepting states
2535            we transition through until we get to a failing node.
2536
2537            we use two slightly different pieces of code to handle
2538            the traversal depending on whether its case sensitive or
2539            not. we reuse the accept code however. (this should probably
2540            be turned into a macro.)
2541
2542         */
2543         case TRIEF:
2544         case TRIEFL:
2545             {
2546
2547                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2548                 U8 *uc = ( U8* )locinput;
2549                 U32 state = 1;
2550                 U16 charid = 0;
2551                 U32 base = 0;
2552                 UV uvc = 0;
2553                 STRLEN len = 0;
2554                 STRLEN foldlen = 0;
2555                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2556                 U8 *uscan = (U8*)NULL;
2557                 STRLEN bufflen=0;
2558                 accepted = 0;
2559
2560                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2561
2562                 while ( state && uc <= (U8*)PL_regeol ) {
2563
2564                     TRIE_CHECK_STATE_IS_ACCEPTING;
2565
2566                     base = trie->states[ state ].trans.base;
2567
2568                     DEBUG_TRIE_EXECUTE_r(
2569                                 PerlIO_printf( Perl_debug_log,
2570                                     "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2571                                     REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2572                                     state, base, accepted );
2573                     );
2574
2575                     if ( base ) {
2576
2577                         if ( do_utf8 || UTF ) {
2578                             if ( foldlen>0 ) {
2579                                 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2580                                 foldlen -= len;
2581                                 uscan += len;
2582                                 len=0;
2583                             } else {
2584                                 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2585                                 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2586                                 foldlen -= UNISKIP( uvc );
2587                                 uscan = foldbuf + UNISKIP( uvc );
2588                             }
2589                         } else {
2590                             uvc = (U32)*uc;
2591                             len = 1;
2592                         }
2593
2594                         TRIE_HANDLE_CHAR;
2595
2596                     } else {
2597                         state = 0;
2598                     }
2599                     DEBUG_TRIE_EXECUTE_r(
2600                                 PerlIO_printf( Perl_debug_log,
2601                                     "Charid:%3x CV:%4x After State: %4x%s\n",
2602                                     charid, uvc, state, PL_colors[5] );
2603                     );
2604                 }
2605                 if ( !accepted ) {
2606                    sayNO;
2607                 } else {
2608                     goto TrieAccept;
2609                 }
2610             }
2611             /* unreached codepoint: we jump into the middle of the next case
2612                from previous if blocks */
2613         case TRIE:
2614             {
2615                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2616                 U8 *uc = (U8*)locinput;
2617                 U32 state = 1;
2618                 U16 charid = 0;
2619                 U32 base = 0;
2620                 UV uvc = 0;
2621                 STRLEN len = 0;
2622                 STRLEN bufflen = 0;
2623                 accepted = 0;
2624
2625                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2626
2627                 while ( state && uc <= (U8*)PL_regeol ) {
2628
2629                     TRIE_CHECK_STATE_IS_ACCEPTING;
2630
2631                     base = trie->states[ state ].trans.base;
2632
2633                     DEBUG_TRIE_EXECUTE_r(
2634                             PerlIO_printf( Perl_debug_log,
2635                                 "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2636                                 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2637                                 state, base, accepted );
2638                     );
2639
2640                     if ( base ) {
2641
2642                         if ( do_utf8 || UTF ) {
2643                             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2644                         } else {
2645                             uvc = (U32)*uc;
2646                             len = 1;
2647                         }
2648
2649                         TRIE_HANDLE_CHAR;
2650
2651                     } else {
2652                         state = 0;
2653                     }
2654                     DEBUG_TRIE_EXECUTE_r(
2655                             PerlIO_printf( Perl_debug_log,
2656                                 "Charid:%3x CV:%4x After State: %4x%s\n",
2657                                 charid, uvc, state, PL_colors[5] );
2658                     );
2659                 }
2660                 if ( !accepted ) {
2661                    sayNO;
2662                 }
2663             }
2664
2665
2666             /*
2667                There was at least one accepting state that we
2668                transitioned through. Presumably the number of accepting
2669                states is going to be low, typically one or two. So we
2670                simply scan through to find the one with lowest wordnum.
2671                Once we find it, we swap the last state into its place
2672                and decrement the size. We then try to match the rest of
2673                the pattern at the point where the word ends, if we
2674                succeed then we end the loop, otherwise the loop
2675                eventually terminates once all of the accepting states
2676                have been tried.
2677             */
2678         TrieAccept:
2679             {
2680                 int gotit = 0;
2681
2682                 if ( accepted == 1 ) {
2683                     DEBUG_EXECUTE_r({
2684                         SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2685                         PerlIO_printf( Perl_debug_log,
2686                             "%*s  %sonly one match : #%d <%s>%s\n",
2687                             REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2688                             accept_buff[ 0 ].wordnum,
2689                             tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",
2690                             PL_colors[5] );
2691                     });
2692                     PL_reginput = (char *)accept_buff[ 0 ].endpos;
2693                     /* in this case we free tmps/leave before we call regmatch
2694                        as we wont be using accept_buff again. */
2695                     FREETMPS;
2696                     LEAVE;
2697                     gotit = regmatch( scan + NEXT_OFF( scan ) );
2698                 } else {
2699                     DEBUG_EXECUTE_r(
2700                         PerlIO_printf( Perl_debug_log,"%*s  %sgot %d possible matches%s\n",
2701                             REPORT_CODE_OFF + PL_regindent * 2, "",PL_colors[4], accepted,
2702                             PL_colors[5] );
2703                     );
2704                     while ( !gotit && accepted-- ) {
2705                         U32 best = 0;
2706                         U32 cur;
2707                         for( cur = 1 ; cur <= accepted ; cur++ ) {
2708                              DEBUG_TRIE_EXECUTE_r(
2709                                         PerlIO_printf( Perl_debug_log,
2710                                             "%*s  %sgot %d (%d) as best, looking at %d (%d)%s\n",
2711                                             REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2712                                             best, accept_buff[ best ].wordnum, cur,
2713                                             accept_buff[ cur ].wordnum, PL_colors[5] );
2714                              );
2715
2716                             if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2717                                     best = cur;
2718                         }
2719                         DEBUG_EXECUTE_r({
2720                             SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2721                             PerlIO_printf( Perl_debug_log, "%*s  %strying alternation #%d <%s> at 0x%p%s\n",
2722                                 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2723                                 accept_buff[best].wordnum,
2724                                 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan,
2725                                 PL_colors[5] );
2726                         });
2727                         if ( best<accepted ) {
2728                             reg_trie_accepted tmp = accept_buff[ best ];
2729                             accept_buff[ best ] = accept_buff[ accepted ];
2730                             accept_buff[ accepted ] = tmp;
2731                             best = accepted;
2732                         }
2733                         PL_reginput = (char *)accept_buff[ best ].endpos;
2734
2735                         /* 
2736                            as far as I can tell we only need the SAVETMPS/FREETMPS 
2737                            for re's with EVAL in them but I'm leaving them in for 
2738                            all until I can be sure.
2739                          */
2740                         SAVETMPS;
2741                         gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2742                         FREETMPS;
2743                     }
2744                     FREETMPS;
2745                     LEAVE;
2746                 }
2747                 
2748                 if ( gotit ) {
2749                     sayYES;
2750                 } else {
2751                     sayNO;
2752                 }
2753             }
2754             /* unreached codepoint */
2755         case EXACT:
2756             s = STRING(scan);
2757             ln = STR_LEN(scan);
2758             if (do_utf8 != UTF) {
2759                 /* The target and the pattern have differing utf8ness. */
2760                 char *l = locinput;
2761                 char *e = s + ln;
2762                 STRLEN ulen;
2763
2764                 if (do_utf8) {
2765                     /* The target is utf8, the pattern is not utf8. */
2766                     while (s < e) {
2767                         if (l >= PL_regeol)
2768                              sayNO;
2769                         if (NATIVE_TO_UNI(*(U8*)s) !=
2770                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2771                                            ckWARN(WARN_UTF8) ?
2772                                            0 : UTF8_ALLOW_ANY))
2773                              sayNO;
2774                         l += ulen;
2775                         s ++;
2776                     }
2777                 }
2778                 else {
2779                     /* The target is not utf8, the pattern is utf8. */
2780                     while (s < e) {
2781                         if (l >= PL_regeol)
2782                             sayNO;
2783                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2784                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2785                                            ckWARN(WARN_UTF8) ?
2786                                            0 : UTF8_ALLOW_ANY))
2787                             sayNO;
2788                         s += ulen;
2789                         l ++;
2790                     }
2791                 }
2792                 locinput = l;
2793                 nextchr = UCHARAT(locinput);
2794                 break;
2795             }
2796             /* The target and the pattern have the same utf8ness. */
2797             /* Inline the first character, for speed. */
2798             if (UCHARAT(s) != nextchr)
2799                 sayNO;
2800             if (PL_regeol - locinput < ln)
2801                 sayNO;
2802             if (ln > 1 && memNE(s, locinput, ln))
2803                 sayNO;
2804             locinput += ln;
2805             nextchr = UCHARAT(locinput);
2806             break;
2807         case EXACTFL:
2808             PL_reg_flags |= RF_tainted;
2809             /* FALL THROUGH */
2810         case EXACTF:
2811             s = STRING(scan);
2812             ln = STR_LEN(scan);
2813
2814             if (do_utf8 || UTF) {
2815               /* Either target or the pattern are utf8. */
2816                 char *l = locinput;
2817                 char *e = PL_regeol;
2818
2819                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2820                                l, &e, 0,  do_utf8)) {
2821                      /* One more case for the sharp s:
2822                       * pack("U0U*", 0xDF) =~ /ss/i,
2823                       * the 0xC3 0x9F are the UTF-8
2824                       * byte sequence for the U+00DF. */
2825                      if (!(do_utf8 &&
2826                            toLOWER(s[0]) == 's' &&
2827                            ln >= 2 &&
2828                            toLOWER(s[1]) == 's' &&
2829                            (U8)l[0] == 0xC3 &&
2830                            e - l >= 2 &&
2831                            (U8)l[1] == 0x9F))
2832                           sayNO;
2833                 }
2834                 locinput = e;
2835                 nextchr = UCHARAT(locinput);
2836                 break;
2837             }
2838
2839             /* Neither the target and the pattern are utf8. */
2840
2841             /* Inline the first character, for speed. */
2842             if (UCHARAT(s) != nextchr &&
2843                 UCHARAT(s) != ((OP(scan) == EXACTF)
2844                                ? PL_fold : PL_fold_locale)[nextchr])
2845                 sayNO;
2846             if (PL_regeol - locinput < ln)
2847                 sayNO;
2848             if (ln > 1 && (OP(scan) == EXACTF
2849                            ? ibcmp(s, locinput, ln)
2850                            : ibcmp_locale(s, locinput, ln)))
2851                 sayNO;
2852             locinput += ln;
2853             nextchr = UCHARAT(locinput);
2854             break;
2855         case ANYOF:
2856             if (do_utf8) {
2857                 STRLEN inclasslen = PL_regeol - locinput;
2858
2859                 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2860                     sayNO_ANYOF;
2861                 if (locinput >= PL_regeol)
2862                     sayNO;
2863                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2864                 nextchr = UCHARAT(locinput);
2865                 break;
2866             }
2867             else {
2868                 if (nextchr < 0)
2869                     nextchr = UCHARAT(locinput);
2870                 if (!REGINCLASS(scan, (U8*)locinput))
2871                     sayNO_ANYOF;
2872                 if (!nextchr && locinput >= PL_regeol)
2873                     sayNO;
2874                 nextchr = UCHARAT(++locinput);
2875                 break;
2876             }
2877         no_anyof:
2878             /* If we might have the case of the German sharp s
2879              * in a casefolding Unicode character class. */
2880
2881             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2882                  locinput += SHARP_S_SKIP;
2883                  nextchr = UCHARAT(locinput);
2884             }
2885             else
2886                  sayNO;
2887             break;
2888         case ALNUML:
2889             PL_reg_flags |= RF_tainted;
2890             /* FALL THROUGH */
2891         case ALNUM:
2892             if (!nextchr)
2893                 sayNO;
2894             if (do_utf8) {
2895                 LOAD_UTF8_CHARCLASS(alnum,"a");
2896                 if (!(OP(scan) == ALNUM
2897                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2898                       : isALNUM_LC_utf8((U8*)locinput)))
2899                 {
2900                     sayNO;
2901                 }
2902                 locinput += PL_utf8skip[nextchr];
2903                 nextchr = UCHARAT(locinput);
2904                 break;
2905             }
2906             if (!(OP(scan) == ALNUM
2907                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2908                 sayNO;
2909             nextchr = UCHARAT(++locinput);
2910             break;
2911         case NALNUML:
2912             PL_reg_flags |= RF_tainted;
2913             /* FALL THROUGH */
2914         case NALNUM:
2915             if (!nextchr && locinput >= PL_regeol)
2916                 sayNO;
2917             if (do_utf8) {
2918                 LOAD_UTF8_CHARCLASS(alnum,"a");
2919                 if (OP(scan) == NALNUM
2920                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2921                     : isALNUM_LC_utf8((U8*)locinput))
2922                 {
2923                     sayNO;
2924                 }
2925                 locinput += PL_utf8skip[nextchr];
2926                 nextchr = UCHARAT(locinput);
2927                 break;
2928             }
2929             if (OP(scan) == NALNUM
2930                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2931                 sayNO;
2932             nextchr = UCHARAT(++locinput);
2933             break;
2934         case BOUNDL:
2935         case NBOUNDL:
2936             PL_reg_flags |= RF_tainted;
2937             /* FALL THROUGH */
2938         case BOUND:
2939         case NBOUND:
2940             /* was last char in word? */
2941             if (do_utf8) {
2942                 if (locinput == PL_bostr)
2943                     ln = '\n';
2944                 else {
2945                     U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2946                 
2947                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2948                 }
2949                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2950                     ln = isALNUM_uni(ln);
2951                     LOAD_UTF8_CHARCLASS(alnum,"a");
2952                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2953                 }
2954                 else {
2955                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2956                     n = isALNUM_LC_utf8((U8*)locinput);
2957                 }
2958             }
2959             else {
2960                 ln = (locinput != PL_bostr) ?
2961                     UCHARAT(locinput - 1) : '\n';
2962                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2963                     ln = isALNUM(ln);
2964                     n = isALNUM(nextchr);
2965                 }
2966                 else {
2967                     ln = isALNUM_LC(ln);
2968                     n = isALNUM_LC(nextchr);
2969                 }
2970             }
2971             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2972                                     OP(scan) == BOUNDL))
2973                     sayNO;
2974             break;
2975         case SPACEL:
2976             PL_reg_flags |= RF_tainted;
2977             /* FALL THROUGH */
2978         case SPACE:
2979             if (!nextchr)
2980                 sayNO;
2981             if (do_utf8) {
2982                 if (UTF8_IS_CONTINUED(nextchr)) {
2983                     LOAD_UTF8_CHARCLASS(space," ");
2984                     if (!(OP(scan) == SPACE
2985                           ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2986                           : isSPACE_LC_utf8((U8*)locinput)))
2987                     {
2988                         sayNO;
2989                     }
2990                     locinput += PL_utf8skip[nextchr];
2991                     nextchr = UCHARAT(locinput);
2992                     break;
2993                 }
2994                 if (!(OP(scan) == SPACE
2995                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2996                     sayNO;
2997                 nextchr = UCHARAT(++locinput);
2998             }
2999             else {
3000                 if (!(OP(scan) == SPACE
3001                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3002                     sayNO;
3003                 nextchr = UCHARAT(++locinput);
3004             }
3005             break;
3006         case NSPACEL:
3007             PL_reg_flags |= RF_tainted;
3008             /* FALL THROUGH */
3009         case NSPACE:
3010             if (!nextchr && locinput >= PL_regeol)
3011                 sayNO;
3012             if (do_utf8) {
3013                 LOAD_UTF8_CHARCLASS(space," ");
3014                 if (OP(scan) == NSPACE
3015                     ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3016                     : isSPACE_LC_utf8((U8*)locinput))
3017                 {
3018                     sayNO;
3019                 }
3020                 locinput += PL_utf8skip[nextchr];
3021                 nextchr = UCHARAT(locinput);
3022                 break;
3023             }
3024             if (OP(scan) == NSPACE
3025                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3026                 sayNO;
3027             nextchr = UCHARAT(++locinput);
3028             break;
3029         case DIGITL:
3030             PL_reg_flags |= RF_tainted;
3031             /* FALL THROUGH */
3032         case DIGIT:
3033             if (!nextchr)
3034                 sayNO;
3035             if (do_utf8) {
3036                 LOAD_UTF8_CHARCLASS(digit,"0");
3037                 if (!(OP(scan) == DIGIT
3038                       ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3039                       : isDIGIT_LC_utf8((U8*)locinput)))
3040                 {
3041                     sayNO;
3042                 }
3043                 locinput += PL_utf8skip[nextchr];
3044                 nextchr = UCHARAT(locinput);
3045                 break;
3046             }
3047             if (!(OP(scan) == DIGIT
3048                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3049                 sayNO;
3050             nextchr = UCHARAT(++locinput);
3051             break;
3052         case NDIGITL:
3053             PL_reg_flags |= RF_tainted;
3054             /* FALL THROUGH */
3055         case NDIGIT:
3056             if (!nextchr && locinput >= PL_regeol)
3057                 sayNO;
3058             if (do_utf8) {
3059                 LOAD_UTF8_CHARCLASS(digit,"0");
3060                 if (OP(scan) == NDIGIT
3061                     ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3062                     : isDIGIT_LC_utf8((U8*)locinput))
3063                 {
3064                     sayNO;
3065                 }
3066                 locinput += PL_utf8skip[nextchr];
3067                 nextchr = UCHARAT(locinput);
3068                 break;
3069             }
3070             if (OP(scan) == NDIGIT
3071                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3072                 sayNO;
3073             nextchr = UCHARAT(++locinput);
3074             break;
3075         case CLUMP:
3076             if (locinput >= PL_regeol)
3077                 sayNO;
3078             if  (do_utf8) {
3079                 LOAD_UTF8_CHARCLASS(mark,"~");
3080                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3081                     sayNO;
3082                 locinput += PL_utf8skip[nextchr];
3083                 while (locinput < PL_regeol &&
3084                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3085                     locinput += UTF8SKIP(locinput);
3086                 if (locinput > PL_regeol)
3087                     sayNO;
3088             } 
3089             else
3090                locinput++;
3091             nextchr = UCHARAT(locinput);
3092             break;
3093         case REFFL:
3094             PL_reg_flags |= RF_tainted;
3095             /* FALL THROUGH */
3096         case REF:
3097         case REFF:
3098             n = ARG(scan);  /* which paren pair */
3099             ln = PL_regstartp[n];
3100             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3101             if ((I32)*PL_reglastparen < n || ln == -1)
3102                 sayNO;                  /* Do not match unless seen CLOSEn. */
3103             if (ln == PL_regendp[n])
3104                 break;
3105
3106             s = PL_bostr + ln;
3107             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
3108                 char *l = locinput;
3109                 char *e = PL_bostr + PL_regendp[n];
3110                 /*
3111                  * Note that we can't do the "other character" lookup trick as
3112                  * in the 8-bit case (no pun intended) because in Unicode we
3113                  * have to map both upper and title case to lower case.
3114                  */
3115                 if (OP(scan) == REFF) {
3116                     STRLEN ulen1, ulen2;
3117                     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3118                     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3119                     while (s < e) {
3120                         if (l >= PL_regeol)
3121                             sayNO;
3122                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3123                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3124                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3125                             sayNO;
3126                         s += ulen1;
3127                         l += ulen2;
3128                     }
3129                 }
3130                 locinput = l;
3131                 nextchr = UCHARAT(locinput);
3132                 break;
3133             }
3134
3135             /* Inline the first character, for speed. */
3136             if (UCHARAT(s) != nextchr &&
3137                 (OP(scan) == REF ||
3138                  (UCHARAT(s) != ((OP(scan) == REFF
3139                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3140                 sayNO;
3141             ln = PL_regendp[n] - ln;
3142             if (locinput + ln > PL_regeol)
3143                 sayNO;
3144             if (ln > 1 && (OP(scan) == REF
3145                            ? memNE(s, locinput, ln)
3146                            : (OP(scan) == REFF
3147                               ? ibcmp(s, locinput, ln)
3148                               : ibcmp_locale(s, locinput, ln))))
3149                 sayNO;
3150             locinput += ln;
3151             nextchr = UCHARAT(locinput);
3152             break;
3153
3154         case NOTHING:
3155         case TAIL:
3156             break;
3157         case BACK:
3158             break;
3159         case EVAL:
3160         {
3161             dSP;
3162             OP_4tree *oop = PL_op;
3163             COP *ocurcop = PL_curcop;
3164             PAD *old_comppad;
3165             SV *ret;
3166             struct regexp *oreg = PL_reg_re;
3167         
3168             n = ARG(scan);
3169             PL_op = (OP_4tree*)PL_regdata->data[n];
3170             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3171             PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
3172             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3173
3174             {
3175                 SV **before = SP;
3176                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3177                 SPAGAIN;
3178                 if (SP == before)
3179                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3180                 else {
3181                     ret = POPs;
3182                     PUTBACK;
3183                 }
3184             }
3185
3186             PL_op = oop;
3187             PAD_RESTORE_LOCAL(old_comppad);
3188             PL_curcop = ocurcop;
3189             if (logical) {
3190                 if (logical == 2) {     /* Postponed subexpression. */
3191                     regexp *re;
3192                     MAGIC *mg = Null(MAGIC*);
3193                     re_cc_state state;
3194                     CHECKPOINT cp, lastcp;
3195                     int toggleutf;
3196                     register SV *sv;
3197
3198                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3199                         mg = mg_find(sv, PERL_MAGIC_qr);
3200                     else if (SvSMAGICAL(ret)) {
3201                         if (SvGMAGICAL(ret))
3202                             sv_unmagic(ret, PERL_MAGIC_qr);
3203                         else
3204                             mg = mg_find(ret, PERL_MAGIC_qr);
3205                     }
3206
3207                     if (mg) {
3208                         re = (regexp *)mg->mg_obj;
3209                         (void)ReREFCNT_inc(re);
3210                     }
3211                     else {
3212                         STRLEN len;
3213                         char *t = SvPV(ret, len);
3214                         PMOP pm;
3215                         char *oprecomp = PL_regprecomp;
3216                         I32 osize = PL_regsize;
3217                         I32 onpar = PL_regnpar;
3218
3219                         Zero(&pm, 1, PMOP);
3220                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3221                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3222                         if (!(SvFLAGS(ret)
3223                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3224                                 | SVs_GMG)))
3225                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3226                                         PERL_MAGIC_qr,0,0);
3227                         PL_regprecomp = oprecomp;
3228                         PL_regsize = osize;
3229                         PL_regnpar = onpar;
3230                     }
3231                     DEBUG_EXECUTE_r(
3232                         PerlIO_printf(Perl_debug_log,
3233                                       "Entering embedded `%s%.60s%s%s'\n",
3234                                       PL_colors[0],
3235                                       re->precomp,
3236                                       PL_colors[1],
3237                                       (strlen(re->precomp) > 60 ? "..." : ""))
3238                         );
3239                     state.node = next;
3240                     state.prev = PL_reg_call_cc;
3241                     state.cc = PL_regcc;
3242                     state.re = PL_reg_re;
3243
3244                     PL_regcc = 0;
3245                 
3246                     cp = regcppush(0);  /* Save *all* the positions. */
3247                     REGCP_SET(lastcp);
3248                     cache_re(re);
3249                     state.ss = PL_savestack_ix;
3250                     *PL_reglastparen = 0;
3251                     *PL_reglastcloseparen = 0;
3252                     PL_reg_call_cc = &state;
3253                     PL_reginput = locinput;
3254                     toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3255                                 ((re->reganch & ROPT_UTF8) != 0);
3256                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3257
3258                     /* XXXX This is too dramatic a measure... */
3259                     PL_reg_maxiter = 0;
3260
3261                     if (regmatch(re->program + 1)) {
3262                         /* Even though we succeeded, we need to restore
3263                            global variables, since we may be wrapped inside
3264                            SUSPEND, thus the match may be not finished yet. */
3265
3266                         /* XXXX Do this only if SUSPENDed? */
3267                         PL_reg_call_cc = state.prev;
3268                         PL_regcc = state.cc;
3269                         PL_reg_re = state.re;
3270                         cache_re(PL_reg_re);
3271                         if (toggleutf) PL_reg_flags ^= RF_utf8;
3272
3273                         /* XXXX This is too dramatic a measure... */
3274                         PL_reg_maxiter = 0;
3275
3276                         /* These are needed even if not SUSPEND. */
3277                         ReREFCNT_dec(re);
3278                         regcpblow(cp);
3279                         sayYES;
3280                     }
3281                     ReREFCNT_dec(re);
3282                     REGCP_UNWIND(lastcp);
3283                     regcppop();
3284                     PL_reg_call_cc = state.prev;
3285                     PL_regcc = state.cc;
3286                     PL_reg_re = state.re;
3287                     cache_re(PL_reg_re);
3288                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3289
3290                     /* XXXX This is too dramatic a measure... */
3291                     PL_reg_maxiter = 0;
3292
3293                     logical = 0;
3294                     sayNO;
3295                 }
3296                 sw = SvTRUE(ret);
3297                 logical = 0;
3298             }
3299             else {
3300                 sv_setsv(save_scalar(PL_replgv), ret);
3301                 cache_re(oreg);
3302             }
3303             break;
3304         }
3305         case OPEN:
3306             n = ARG(scan);  /* which paren pair */
3307             PL_reg_start_tmp[n] = locinput;
3308             if (n > PL_regsize)
3309                 PL_regsize = n;
3310             break;
3311         case CLOSE:
3312             n = ARG(scan);  /* which paren pair */
3313             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3314             PL_regendp[n] = locinput - PL_bostr;
3315             if (n > (I32)*PL_reglastparen)
3316                 *PL_reglastparen = n;
3317             *PL_reglastcloseparen = n;
3318             break;
3319         case GROUPP:
3320             n = ARG(scan);  /* which paren pair */
3321             sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3322             break;
3323         case IFTHEN:
3324             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3325             if (sw)
3326                 next = NEXTOPER(NEXTOPER(scan));
3327             else {
3328                 next = scan + ARG(scan);
3329                 if (OP(next) == IFTHEN) /* Fake one. */
3330                     next = NEXTOPER(NEXTOPER(next));
3331             }
3332             break;
3333         case LOGICAL:
3334             logical = scan->flags;
3335             break;
3336 /*******************************************************************
3337  PL_regcc contains infoblock about the innermost (...)* loop, and
3338  a pointer to the next outer infoblock.
3339
3340  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3341
3342    1) After matching X, regnode for CURLYX is processed;
3343
3344    2) This regnode creates infoblock on the stack, and calls
3345       regmatch() recursively with the starting point at WHILEM node;
3346
3347    3) Each hit of WHILEM node tries to match A and Z (in the order
3348       depending on the current iteration, min/max of {min,max} and
3349       greediness).  The information about where are nodes for "A"
3350       and "Z" is read from the infoblock, as is info on how many times "A"
3351       was already matched, and greediness.
3352
3353    4) After A matches, the same WHILEM node is hit again.
3354
3355    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3356       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3357       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3358       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3359       of the external loop.
3360
3361  Currently present infoblocks form a tree with a stem formed by PL_curcc
3362  and whatever it mentions via ->next, and additional attached trees
3363  corresponding to temporarily unset infoblocks as in "5" above.
3364
3365  In the following picture infoblocks for outer loop of
3366  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3367  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3368  infoblocks are drawn below the "reset" infoblock.
3369
3370  In fact in the picture below we do not show failed matches for Z and T
3371  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3372  more obvious *why* one needs to *temporary* unset infoblocks.]
3373
3374   Matched       REx position    InfoBlocks      Comment
3375                 (Y(A)*?Z)*?T    x
3376                 Y(A)*?Z)*?T     x <- O
3377   Y             (A)*?Z)*?T      x <- O
3378   Y             A)*?Z)*?T       x <- O <- I
3379   YA            )*?Z)*?T        x <- O <- I
3380   YA            A)*?Z)*?T       x <- O <- I
3381   YAA           )*?Z)*?T        x <- O <- I
3382   YAA           Z)*?T           x <- O          # Temporary unset I
3383                                      I
3384
3385   YAAZ          Y(A)*?Z)*?T     x <- O
3386                                      I
3387
3388   YAAZY         (A)*?Z)*?T      x <- O
3389                                      I
3390
3391   YAAZY         A)*?Z)*?T       x <- O <- I
3392                                      I
3393
3394   YAAZYA        )*?Z)*?T        x <- O <- I     
3395                                      I
3396
3397   YAAZYA        Z)*?T           x <- O          # Temporary unset I
3398                                      I,I
3399
3400   YAAZYAZ       )*?T            x <- O
3401                                      I,I
3402
3403   YAAZYAZ       T               x               # Temporary unset O
3404                                 O
3405                                 I,I
3406
3407   YAAZYAZT                      x
3408                                 O
3409                                 I,I
3410  *******************************************************************/
3411         case CURLYX: {
3412                 CURCUR cc;
3413                 CHECKPOINT cp = PL_savestack_ix;
3414                 /* No need to save/restore up to this paren */
3415                 I32 parenfloor = scan->flags;
3416
3417                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3418                     next += ARG(next);
3419                 cc.oldcc = PL_regcc;
3420                 PL_regcc = &cc;
3421                 /* XXXX Probably it is better to teach regpush to support
3422                    parenfloor > PL_regsize... */
3423                 if (parenfloor > (I32)*PL_reglastparen)
3424                     parenfloor = *PL_reglastparen; /* Pessimization... */
3425                 cc.parenfloor = parenfloor;
3426                 cc.cur = -1;
3427                 cc.min = ARG1(scan);
3428                 cc.max  = ARG2(scan);
3429                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3430                 cc.next = next;
3431                 cc.minmod = minmod;
3432                 cc.lastloc = 0;
3433                 PL_reginput = locinput;
3434                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3435                 regcpblow(cp);
3436                 PL_regcc = cc.oldcc;
3437                 saySAME(n);
3438             }
3439             /* NOT REACHED */
3440         case WHILEM: {
3441                 /*
3442                  * This is really hard to understand, because after we match
3443                  * what we're trying to match, we must make sure the rest of
3444                  * the REx is going to match for sure, and to do that we have
3445                  * to go back UP the parse tree by recursing ever deeper.  And
3446                  * if it fails, we have to reset our parent's current state
3447                  * that we can try again after backing off.
3448                  */
3449
3450                 CHECKPOINT cp, lastcp;
3451                 CURCUR* cc = PL_regcc;
3452                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3453                 
3454                 n = cc->cur + 1;        /* how many we know we matched */
3455                 PL_reginput = locinput;
3456
3457                 DEBUG_EXECUTE_r(
3458                     PerlIO_printf(Perl_debug_log,
3459                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3460                                   REPORT_CODE_OFF+PL_regindent*2, "",
3461                                   (long)n, (long)cc->min,
3462                                   (long)cc->max, PTR2UV(cc))
3463                     );
3464
3465                 /* If degenerate scan matches "", assume scan done. */
3466
3467                 if (locinput == cc->lastloc && n >= cc->min) {
3468                     PL_regcc = cc->oldcc;
3469                     if (PL_regcc)
3470                         ln = PL_regcc->cur;
3471                     DEBUG_EXECUTE_r(
3472                         PerlIO_printf(Perl_debug_log,
3473                            "%*s  empty match detected, try continuation...\n",
3474                            REPORT_CODE_OFF+PL_regindent*2, "")
3475                         );
3476                     if (regmatch(cc->next))
3477                         sayYES;
3478                     if (PL_regcc)
3479                         PL_regcc->cur = ln;
3480                     PL_regcc = cc;
3481                     sayNO;
3482                 }
3483
3484                 /* First just match a string of min scans. */
3485
3486                 if (n < cc->min) {
3487                     cc->cur = n;
3488                     cc->lastloc = locinput;
3489                     if (regmatch(cc->scan))
3490                         sayYES;
3491                     cc->cur = n - 1;
3492                     cc->lastloc = lastloc;
3493                     sayNO;
3494                 }
3495
3496                 if (scan->flags) {
3497                     /* Check whether we already were at this position.
3498                         Postpone detection until we know the match is not
3499                         *that* much linear. */
3500                 if (!PL_reg_maxiter) {
3501                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3502                     PL_reg_leftiter = PL_reg_maxiter;
3503                 }
3504                 if (PL_reg_leftiter-- == 0) {
3505                     I32 size = (PL_reg_maxiter + 7)/8;
3506                     if (PL_reg_poscache) {
3507                         if ((I32)PL_reg_poscache_size < size) {
3508                             Renew(PL_reg_poscache, size, char);
3509                             PL_reg_poscache_size = size;
3510                         }
3511                         Zero(PL_reg_poscache, size, char);
3512                     }
3513                     else {
3514                         PL_reg_poscache_size = size;
3515                         Newz(29, PL_reg_poscache, size, char);
3516                     }
3517                     DEBUG_EXECUTE_r(
3518                         PerlIO_printf(Perl_debug_log,
3519               "%sDetected a super-linear match, switching on caching%s...\n",
3520                                       PL_colors[4], PL_colors[5])
3521                         );
3522                 }
3523                 if (PL_reg_leftiter < 0) {
3524                     I32 o = locinput - PL_bostr, b;
3525
3526                     o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3527                     b = o % 8;
3528                     o /= 8;
3529                     if (PL_reg_poscache[o] & (1<<b)) {
3530                     DEBUG_EXECUTE_r(
3531                         PerlIO_printf(Perl_debug_log,
3532                                       "%*s  already tried at this position...\n",
3533                                       REPORT_CODE_OFF+PL_regindent*2, "")
3534                         );
3535                         if (PL_reg_flags & RF_false)
3536                             sayYES;
3537                         else
3538                             sayNO_SILENT;
3539                     }
3540                     PL_reg_poscache[o] |= (1<<b);
3541                 }
3542                 }
3543
3544                 /* Prefer next over scan for minimal matching. */
3545
3546                 if (cc->minmod) {
3547                     PL_regcc = cc->oldcc;
3548                     if (PL_regcc)
3549                         ln = PL_regcc->cur;
3550                     cp = regcppush(cc->parenfloor);
3551                     REGCP_SET(lastcp);
3552                     if (regmatch(cc->next)) {
3553                         regcpblow(cp);
3554                         sayYES; /* All done. */
3555                     }
3556                     REGCP_UNWIND(lastcp);
3557                     regcppop();
3558                     if (PL_regcc)
3559                         PL_regcc->cur = ln;
3560                     PL_regcc = cc;
3561
3562                     if (n >= cc->max) { /* Maximum greed exceeded? */
3563                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3564                             && !(PL_reg_flags & RF_warned)) {
3565                             PL_reg_flags |= RF_warned;
3566                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3567                                  "Complex regular subexpression recursion",
3568                                  REG_INFTY - 1);
3569                         }
3570                         sayNO;
3571                     }
3572
3573                     DEBUG_EXECUTE_r(
3574                         PerlIO_printf(Perl_debug_log,
3575                                       "%*s  trying longer...\n",
3576                                       REPORT_CODE_OFF+PL_regindent*2, "")
3577                         );
3578                     /* Try scanning more and see if it helps. */
3579                     PL_reginput = locinput;
3580                     cc->cur = n;
3581                     cc->lastloc = locinput;
3582                     cp = regcppush(cc->parenfloor);
3583                     REGCP_SET(lastcp);
3584                     if (regmatch(cc->scan)) {
3585                         regcpblow(cp);
3586                         sayYES;
3587                     }
3588                     REGCP_UNWIND(lastcp);
3589                     regcppop();
3590                     cc->cur = n - 1;
3591                     cc->lastloc = lastloc;
3592                     sayNO;
3593                 }
3594
3595                 /* Prefer scan over next for maximal matching. */
3596
3597                 if (n < cc->max) {      /* More greed allowed? */
3598                     cp = regcppush(cc->parenfloor);
3599                     cc->cur = n;
3600                     cc->lastloc = locinput;
3601                     REGCP_SET(lastcp);
3602                     if (regmatch(cc->scan)) {
3603                         regcpblow(cp);
3604                         sayYES;
3605                     }
3606                     REGCP_UNWIND(lastcp);
3607                     regcppop();         /* Restore some previous $<digit>s? */
3608                     PL_reginput = locinput;
3609                     DEBUG_EXECUTE_r(
3610                         PerlIO_printf(Perl_debug_log,
3611                                       "%*s  failed, try continuation...\n",
3612                                       REPORT_CODE_OFF+PL_regindent*2, "")
3613                         );
3614                 }
3615                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3616                         && !(PL_reg_flags & RF_warned)) {
3617                     PL_reg_flags |= RF_warned;
3618                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3619                          "Complex regular subexpression recursion",
3620                          REG_INFTY - 1);
3621                 }
3622
3623                 /* Failed deeper matches of scan, so see if this one works. */
3624                 PL_regcc = cc->oldcc;
3625                 if (PL_regcc)
3626                     ln = PL_regcc->cur;
3627                 if (regmatch(cc->next))
3628                     sayYES;
3629                 if (PL_regcc)
3630                     PL_regcc->cur = ln;
3631                 PL_regcc = cc;
3632                 cc->cur = n - 1;
3633                 cc->lastloc = lastloc;
3634                 sayNO;
3635             }
3636             /* NOT REACHED */
3637         case BRANCHJ:
3638             next = scan + ARG(scan);
3639             if (next == scan)
3640                 next = NULL;
3641             inner = NEXTOPER(NEXTOPER(scan));
3642             goto do_branch;
3643         case BRANCH:
3644             inner = NEXTOPER(scan);
3645           do_branch:
3646             {
3647                 c1 = OP(scan);
3648                 if (OP(next) != c1)     /* No choice. */
3649                     next = inner;       /* Avoid recursion. */
3650                 else {
3651                     I32 lastparen = *PL_reglastparen;
3652                     I32 unwind1;
3653                     re_unwind_branch_t *uw;
3654
3655                     /* Put unwinding data on stack */
3656                     unwind1 = SSNEWt(1,re_unwind_branch_t);
3657                     uw = SSPTRt(unwind1,re_unwind_branch_t);
3658                     uw->prev = unwind;
3659                     unwind = unwind1;
3660                     uw->type = ((c1 == BRANCH)
3661                                 ? RE_UNWIND_BRANCH
3662                                 : RE_UNWIND_BRANCHJ);
3663                     uw->lastparen = lastparen;
3664                     uw->next = next;
3665                     uw->locinput = locinput;
3666                     uw->nextchr = nextchr;
3667 #ifdef DEBUGGING
3668                     uw->regindent = ++PL_regindent;
3669 #endif
3670
3671                     REGCP_SET(uw->lastcp);
3672
3673                     /* Now go into the first branch */
3674                     next = inner;
3675                 }
3676             }
3677             break;
3678         case MINMOD:
3679             minmod = 1;
3680             break;
3681         case CURLYM:
3682         {
3683             I32 l = 0;
3684             CHECKPOINT lastcp;
3685         
3686             /* We suppose that the next guy does not need
3687                backtracking: in particular, it is of constant non-zero length,
3688                and has no parenths to influence future backrefs. */
3689             ln = ARG1(scan);  /* min to match */
3690             n  = ARG2(scan);  /* max to match */
3691             paren = scan->flags;
3692             if (paren) {
3693                 if (paren > PL_regsize)
3694                     PL_regsize = paren;
3695                 if (paren > (I32)*PL_reglastparen)
3696                     *PL_reglastparen = paren;
3697             }
3698             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3699             if (paren)
3700                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3701             PL_reginput = locinput;
3702             if (minmod) {
3703                 minmod = 0;
3704                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3705                     sayNO;
3706                 locinput = PL_reginput;
3707                 if (HAS_TEXT(next) || JUMPABLE(next)) {
3708                     regnode *text_node = next;
3709
3710                     if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3711
3712                     if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3713                     else {
3714                         if (PL_regkind[(U8)OP(text_node)] == REF) {
3715                             c1 = c2 = -1000;
3716                             goto assume_ok_MM;
3717                         }
3718                         else { c1 = (U8)*STRING(text_node); }
3719                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3720                             c2 = PL_fold[c1];
3721                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3722                             c2 = PL_fold_locale[c1];
3723                         else
3724                             c2 = c1;
3725                     }
3726                 }
3727                 else
3728                     c1 = c2 = -1000;
3729             assume_ok_MM:
3730                 REGCP_SET(lastcp);
3731                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3732                     /* If it could work, try it. */
3733                     if (c1 == -1000 ||
3734                         UCHARAT(PL_reginput) == c1 ||
3735                         UCHARAT(PL_reginput) == c2)
3736                     {
3737                         if (paren) {
3738                             if (ln) {
3739                                 PL_regstartp[paren] =
3740                                     HOPc(PL_reginput, -l) - PL_bostr;
3741                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3742                             }
3743                             else
3744                                 PL_regendp[paren] = -1;
3745                         }
3746                         if (regmatch(next))
3747                             sayYES;
3748                         REGCP_UNWIND(lastcp);
3749                     }
3750                     /* Couldn't or didn't -- move forward. */
3751                     PL_reginput = locinput;
3752                     if (regrepeat_hard(scan, 1, &l)) {
3753                         ln++;
3754                         locinput = PL_reginput;
3755                     }
3756                     else
3757                         sayNO;
3758                 }
3759             }
3760             else {
3761                 n = regrepeat_hard(scan, n, &l);
3762                 locinput = PL_reginput;
3763                 DEBUG_EXECUTE_r(
3764                     PerlIO_printf(Perl_debug_log,
3765                                   "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3766                                   (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3767                                   (IV) n, (IV)l)
3768                     );
3769                 if (n >= ln) {
3770                     if (HAS_TEXT(next) || JUMPABLE(next)) {
3771                         regnode *text_node = next;
3772
3773                         if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3774
3775                         if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3776                         else {
3777                             if (PL_regkind[(U8)OP(text_node)] == REF) {
3778                                 c1 = c2 = -1000;
3779                                 goto assume_ok_REG;
3780                             }
3781                             else { c1 = (U8)*STRING(text_node); }
3782
3783                             if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3784                                 c2 = PL_fold[c1];
3785                             else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3786                                 c2 = PL_fold_locale[c1];
3787                             else
3788                                 c2 = c1;
3789                         }
3790                     }
3791                     else
3792                         c1 = c2 = -1000;
3793                 }
3794             assume_ok_REG:
3795                 REGCP_SET(lastcp);
3796                 while (n >= ln) {
3797                     /* If it could work, try it. */
3798                     if (c1 == -1000 ||
3799                         UCHARAT(PL_reginput) == c1 ||
3800                         UCHARAT(PL_reginput) == c2)
3801                     {
3802                         DEBUG_EXECUTE_r(
3803                                 PerlIO_printf(Perl_debug_log,
3804                                               "%*s  trying tail with n=%"IVdf"...\n",
3805                                               (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3806                             );
3807                         if (paren) {
3808                             if (n) {
3809                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3810                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3811                             }
3812                             else
3813                                 PL_regendp[paren] = -1;
3814                         }
3815                         if (regmatch(next))
3816                             sayYES;
3817                         REGCP_UNWIND(lastcp);
3818                     }
3819                     /* Couldn't or didn't -- back up. */
3820                     n--;
3821                     locinput = HOPc(locinput, -l);
3822                     PL_reginput = locinput;
3823                 }
3824             }
3825             sayNO;
3826             break;
3827         }
3828         case CURLYN:
3829             paren = scan->flags;        /* Which paren to set */
3830             if (paren > PL_regsize)
3831                 PL_regsize = paren;
3832             if (paren > (I32)*PL_reglastparen)
3833                 *PL_reglastparen = paren;
3834             ln = ARG1(scan);  /* min to match */
3835             n  = ARG2(scan);  /* max to match */
3836             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3837             goto repeat;
3838         case CURLY:
3839             paren = 0;
3840             ln = ARG1(scan);  /* min to match */
3841             n  = ARG2(scan);  /* max to match */
3842             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3843             goto repeat;
3844         case STAR:
3845             ln = 0;
3846             n = REG_INFTY;
3847             scan = NEXTOPER(scan);
3848             paren = 0;
3849             goto repeat;
3850         case PLUS:
3851             ln = 1;
3852             n = REG_INFTY;
3853             scan = NEXTOPER(scan);
3854             paren = 0;
3855           repeat:
3856             /*
3857             * Lookahead to avoid useless match attempts
3858             * when we know what character comes next.
3859             */
3860
3861             /*
3862             * Used to only do .*x and .*?x, but now it allows
3863             * for )'s, ('s and (?{ ... })'s to be in the way
3864             * of the quantifier and the EXACT-like node.  -- japhy
3865             */
3866
3867             if (HAS_TEXT(next) || JUMPABLE(next)) {
3868                 U8 *s;
3869                 regnode *text_node = next;
3870
3871                 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3872
3873                 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3874                 else {
3875                     if (PL_regkind[(U8)OP(text_node)] == REF) {
3876                         c1 = c2 = -1000;
3877                         goto assume_ok_easy;
3878                     }
3879                     else { s = (U8*)STRING(text_node); }
3880
3881                     if (!UTF) {
3882                         c2 = c1 = *s;
3883                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3884                             c2 = PL_fold[c1];
3885                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3886                             c2 = PL_fold_locale[c1];
3887                     }
3888                     else { /* UTF */
3889                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3890                              STRLEN ulen1, ulen2;
3891                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3892                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3893
3894                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3895                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3896
3897                              c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3898                                                  ckWARN(WARN_UTF8) ?
3899                                                  0 : UTF8_ALLOW_ANY);
3900                              c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3901                                                  ckWARN(WARN_UTF8) ?
3902                                                  0 : UTF8_ALLOW_ANY);
3903                         }
3904                         else {
3905                             c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3906                                                      ckWARN(WARN_UTF8) ?
3907                                                      0 : UTF8_ALLOW_ANY);
3908                         }
3909                     }
3910                 }
3911             }
3912             else
3913                 c1 = c2 = -1000;
3914         assume_ok_easy:
3915             PL_reginput = locinput;
3916             if (minmod) {
3917                 CHECKPOINT lastcp;
3918                 minmod = 0;
3919                 if (ln && regrepeat(scan, ln) < ln)
3920                     sayNO;
3921                 locinput = PL_reginput;
3922                 REGCP_SET(lastcp);
3923                 if (c1 != -1000) {
3924                     char *e; /* Should not check after this */
3925                     char *old = locinput;
3926                     int count = 0;
3927
3928                     if  (n == REG_INFTY) {
3929                         e = PL_regeol - 1;
3930                         if (do_utf8)
3931                             while (UTF8_IS_CONTINUATION(*(U8*)e))
3932                                 e--;
3933                     }
3934                     else if (do_utf8) {
3935                         int m = n - ln;
3936                         for (e = locinput;
3937                              m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3938                             e += UTF8SKIP(e);
3939                     }
3940                     else {
3941                         e = locinput + n - ln;
3942                         if (e >= PL_regeol)
3943                             e = PL_regeol - 1;
3944                     }
3945                     while (1) {
3946                         /* Find place 'next' could work */
3947                         if (!do_utf8) {
3948                             if (c1 == c2) {
3949                                 while (locinput <= e &&
3950                                        UCHARAT(locinput) != c1)
3951                                     locinput++;
3952                             } else {
3953                                 while (locinput <= e
3954                                        && UCHARAT(locinput) != c1
3955                                        && UCHARAT(locinput) != c2)
3956                                     locinput++;
3957                             }
3958                             count = locinput - old;
3959                         }
3960                         else {
3961                             STRLEN len;
3962                             if (c1 == c2) {
3963                                 /* count initialised to
3964                                  * utf8_distance(old, locinput) */
3965                                 while (locinput <= e &&
3966                                        utf8n_to_uvchr((U8*)locinput,
3967                                                       UTF8_MAXBYTES, &len,
3968                                                       ckWARN(WARN_UTF8) ?
3969                                                       0 : UTF8_ALLOW_ANY) != (UV)c1) {
3970                                     locinput += len;
3971                                     count++;
3972                                 }
3973                             } else {
3974                                 /* count initialised to
3975                                  * utf8_distance(old, locinput) */
3976                                 while (locinput <= e) {
3977                                     UV c = utf8n_to_uvchr((U8*)locinput,
3978                                                           UTF8_MAXBYTES, &len,
3979                                                           ckWARN(WARN_UTF8) ?
3980                                                           0 : UTF8_ALLOW_ANY);
3981                                     if (c == (UV)c1 || c == (UV)c2)
3982                                         break;
3983                                     locinput += len;
3984                                     count++;
3985                                 }
3986                             }
3987                         }
3988                         if (locinput > e)
3989                             sayNO;
3990                         /* PL_reginput == old now */
3991                         if (locinput != old) {
3992                             ln = 1;     /* Did some */
3993                             if (regrepeat(scan, count) < count)
3994                                 sayNO;
3995                         }
3996                         /* PL_reginput == locinput now */
3997                         TRYPAREN(paren, ln, locinput);
3998                         PL_reginput = locinput; /* Could be reset... */
3999                         REGCP_UNWIND(lastcp);
4000                         /* Couldn't or didn't -- move forward. */
4001                         old = locinput;
4002                         if (do_utf8)
4003                             locinput += UTF8SKIP(locinput);
4004                         else
4005                             locinput++;
4006                         count = 1;
4007                     }
4008                 }
4009                 else
4010                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
4011                     UV c;
4012                     if (c1 != -1000) {
4013                         if (do_utf8)
4014                             c = utf8n_to_uvchr((U8*)PL_reginput,
4015                                                UTF8_MAXBYTES, 0,
4016                                                ckWARN(WARN_UTF8) ?
4017                                                0 : UTF8_ALLOW_ANY);
4018                         else
4019                             c = UCHARAT(PL_reginput);
4020                         /* If it could work, try it. */
4021                         if (c == (UV)c1 || c == (UV)c2)
4022                         {
4023                             TRYPAREN(paren, ln, PL_reginput);
4024                             REGCP_UNWIND(lastcp);
4025                         }
4026                     }
4027                     /* If it could work, try it. */
4028                     else if (c1 == -1000)
4029                     {
4030                         TRYPAREN(paren, ln, PL_reginput);
4031                         REGCP_UNWIND(lastcp);
4032                     }
4033                     /* Couldn't or didn't -- move forward. */
4034                     PL_reginput = locinput;
4035                     if (regrepeat(scan, 1)) {
4036                         ln++;
4037                         locinput = PL_reginput;
4038                     }
4039                     else
4040                         sayNO;
4041                 }
4042             }
4043             else {
4044                 CHECKPOINT lastcp;
4045                 n = regrepeat(scan, n);
4046                 locinput = PL_reginput;
4047                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
4048                     (OP(next) != MEOL ||
4049                         OP(next) == SEOL || OP(next) == EOS))
4050                 {
4051                     ln = n;                     /* why back off? */
4052                     /* ...because $ and \Z can match before *and* after
4053                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4054                        We should back off by one in this case. */
4055                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4056                         ln--;
4057                 }
4058                 REGCP_SET(lastcp);
4059                 if (paren) {
4060                     UV c = 0;
4061                     while (n >= ln) {
4062                         if (c1 != -1000) {
4063                             if (do_utf8)
4064                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4065                                                    UTF8_MAXBYTES, 0,
4066                                                    ckWARN(WARN_UTF8) ?
4067                                                    0 : UTF8_ALLOW_ANY);
4068                             else
4069                                 c = UCHARAT(PL_reginput);
4070                         }
4071                         /* If it could work, try it. */
4072                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4073                             {
4074                                 TRYPAREN(paren, n, PL_reginput);
4075                                 REGCP_UNWIND(lastcp);
4076                             }
4077                         /* Couldn't or didn't -- back up. */
4078                         n--;
4079                         PL_reginput = locinput = HOPc(locinput, -1);
4080                     }
4081                 }
4082                 else {
4083                     UV c = 0;
4084                     while (n >= ln) {
4085                         if (c1 != -1000) {
4086                             if (do_utf8)
4087                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4088                                                    UTF8_MAXBYTES, 0,
4089                                                    ckWARN(WARN_UTF8) ?
4090                                                    0 : UTF8_ALLOW_ANY);
4091                             else
4092                                 c = UCHARAT(PL_reginput);
4093                         }
4094                         /* If it could work, try it. */
4095                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4096                             {
4097                                 TRYPAREN(paren, n, PL_reginput);
4098                                 REGCP_UNWIND(lastcp);
4099                             }
4100                         /* Couldn't or didn't -- back up. */
4101                         n--;
4102                         PL_reginput = locinput = HOPc(locinput, -1);
4103                     }
4104                 }
4105             }
4106             sayNO;
4107             break;
4108         case END:
4109             if (PL_reg_call_cc) {
4110                 re_cc_state *cur_call_cc = PL_reg_call_cc;
4111                 CURCUR *cctmp = PL_regcc;
4112                 regexp *re = PL_reg_re;
4113                 CHECKPOINT cp, lastcp;
4114                 
4115                 cp = regcppush(0);      /* Save *all* the positions. */
4116                 REGCP_SET(lastcp);
4117                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4118                                                     the caller. */
4119                 PL_reginput = locinput; /* Make position available to
4120                                            the callcc. */
4121                 cache_re(PL_reg_call_cc->re);
4122                 PL_regcc = PL_reg_call_cc->cc;
4123                 PL_reg_call_cc = PL_reg_call_cc->prev;
4124                 if (regmatch(cur_call_cc->node)) {
4125                     PL_reg_call_cc = cur_call_cc;
4126                     regcpblow(cp);
4127                     sayYES;
4128                 }
4129                 REGCP_UNWIND(lastcp);
4130                 regcppop();
4131                 PL_reg_call_cc = cur_call_cc;
4132                 PL_regcc = cctmp;
4133                 PL_reg_re = re;
4134                 cache_re(re);
4135
4136                 DEBUG_EXECUTE_r(
4137                     PerlIO_printf(Perl_debug_log,
4138                                   "%*s  continuation failed...\n",
4139                                   REPORT_CODE_OFF+PL_regindent*2, "")
4140                     );
4141                 sayNO_SILENT;
4142             }
4143             if (locinput < PL_regtill) {
4144                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4145                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4146                                       PL_colors[4],
4147                                       (long)(locinput - PL_reg_starttry),
4148                                       (long)(PL_regtill - PL_reg_starttry),
4149                                       PL_colors[5]));
4150                 sayNO_FINAL;            /* Cannot match: too short. */
4151             }
4152             PL_reginput = locinput;     /* put where regtry can find it */
4153             sayYES_FINAL;               /* Success! */
4154         case SUCCEED:
4155             PL_reginput = locinput;     /* put where regtry can find it */
4156             sayYES_LOUD;                /* Success! */
4157         case SUSPEND:
4158             n = 1;
4159             PL_reginput = locinput;
4160             goto do_ifmatch;    
4161         case UNLESSM:
4162             n = 0;
4163             if (scan->flags) {
4164                 s = HOPBACKc(locinput, scan->flags);
4165                 if (!s)
4166                     goto say_yes;
4167                 PL_reginput = s;
4168             }
4169             else
4170                 PL_reginput = locinput;
4171             PL_reg_flags ^= RF_false;
4172             goto do_ifmatch;
4173         case IFMATCH:
4174             n = 1;
4175             if (scan->flags) {
4176                 s = HOPBACKc(locinput, scan->flags);
4177                 if (!s)
4178                     goto say_no;
4179                 PL_reginput = s;
4180             }
4181             else
4182                 PL_reginput = locinput;
4183
4184           do_ifmatch:
4185             inner = NEXTOPER(NEXTOPER(scan));
4186             if (regmatch(inner) != n) {
4187                 if (n == 0)
4188                     PL_reg_flags ^= RF_false;
4189               say_no:
4190                 if (logical) {
4191                     logical = 0;
4192                     sw = 0;
4193                     goto do_longjump;
4194                 }
4195                 else
4196                     sayNO;
4197             }
4198             if (n == 0)
4199                 PL_reg_flags ^= RF_false;
4200           say_yes:
4201             if (logical) {
4202                 logical = 0;
4203                 sw = 1;
4204             }
4205             if (OP(scan) == SUSPEND) {
4206                 locinput = PL_reginput;
4207                 nextchr = UCHARAT(locinput);
4208             }
4209             /* FALL THROUGH. */
4210         case LONGJMP:
4211           do_longjump:
4212             next = scan + ARG(scan);
4213             if (next == scan)
4214                 next = NULL;
4215             break;
4216         default:
4217             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4218                           PTR2UV(scan), OP(scan));
4219             Perl_croak(aTHX_ "regexp memory corruption");
4220         }
4221       reenter:
4222         scan = next;
4223     }
4224
4225     /*
4226     * We get here only if there's trouble -- normally "case END" is
4227     * the terminating point.
4228     */
4229     Perl_croak(aTHX_ "corrupted regexp pointers");
4230     /*NOTREACHED*/
4231     sayNO;
4232
4233 yes_loud:
4234     DEBUG_EXECUTE_r(
4235         PerlIO_printf(Perl_debug_log,
4236                       "%*s  %scould match...%s\n",
4237                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
4238         );
4239     goto yes;
4240 yes_final:
4241     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4242                           PL_colors[4],PL_colors[5]));
4243 yes:
4244 #ifdef DEBUGGING
4245     PL_regindent--;
4246 #endif
4247
4248 #if 0                                   /* Breaks $^R */
4249     if (unwind)
4250         regcpblow(firstcp);
4251 #endif
4252     return 1;
4253
4254 no:
4255     DEBUG_EXECUTE_r(
4256         PerlIO_printf(Perl_debug_log,
4257                       "%*s  %sfailed...%s\n",
4258                       REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
4259         );
4260     goto do_no;
4261 no_final:
4262 do_no:
4263     if (unwind) {
4264         re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4265
4266         switch (uw->type) {
4267         case RE_UNWIND_BRANCH:
4268         case RE_UNWIND_BRANCHJ:
4269         {
4270             re_unwind_branch_t *uwb = &(uw->branch);
4271             I32 lastparen = uwb->lastparen;
4272         
4273             REGCP_UNWIND(uwb->lastcp);
4274             for (n = *PL_reglastparen; n > lastparen; n--)
4275                 PL_regendp[n] = -1;
4276             *PL_reglastparen = n;
4277             scan = next = uwb->next;
4278             if ( !scan ||
4279                  OP(scan) != (uwb->type == RE_UNWIND_BRANCH
4280                               ? BRANCH : BRANCHJ) ) {           /* Failure */
4281                 unwind = uwb->prev;
4282 #ifdef DEBUGGING
4283                 PL_regindent--;
4284 #endif
4285                 goto do_no;
4286             }
4287             /* Have more choice yet.  Reuse the same uwb.  */
4288             /*SUPPRESS 560*/
4289             if ((n = (uwb->type == RE_UNWIND_BRANCH
4290                       ? NEXT_OFF(next) : ARG(next))))
4291                 next += n;
4292             else
4293                 next = NULL;    /* XXXX Needn't unwinding in this case... */
4294             uwb->next = next;
4295             next = NEXTOPER(scan);
4296             if (uwb->type == RE_UNWIND_BRANCHJ)
4297                 next = NEXTOPER(next);
4298             locinput = uwb->locinput;
4299             nextchr = uwb->nextchr;
4300 #ifdef DEBUGGING
4301             PL_regindent = uwb->regindent;
4302 #endif
4303
4304             goto reenter;
4305         }
4306         /* NOT REACHED */
4307         default:
4308             Perl_croak(aTHX_ "regexp unwind memory corruption");
4309         }
4310         /* NOT REACHED */
4311     }
4312 #ifdef DEBUGGING
4313     PL_regindent--;
4314 #endif
4315     return 0;
4316 }
4317
4318 /*
4319  - regrepeat - repeatedly match something simple, report how many
4320  */
4321 /*
4322  * [This routine now assumes that it will only match on things of length 1.
4323  * That was true before, but now we assume scan - reginput is the count,
4324  * rather than incrementing count on every character.  [Er, except utf8.]]
4325  */
4326 STATIC I32
4327 S_regrepeat(pTHX_ regnode *p, I32 max)
4328 {
4329     register char *scan;
4330     register I32 c;
4331     register char *loceol = PL_regeol;
4332     register I32 hardcount = 0;
4333     register bool do_utf8 = PL_reg_match_utf8;
4334
4335     scan = PL_reginput;
4336     if (max == REG_INFTY)
4337         max = I32_MAX;
4338     else if (max < loceol - scan)
4339       loceol = scan + max;
4340     switch (OP(p)) {
4341     case REG_ANY:
4342         if (do_utf8) {
4343             loceol = PL_regeol;
4344             while (scan < loceol && hardcount < max && *scan != '\n') {
4345                 scan += UTF8SKIP(scan);
4346                 hardcount++;
4347             }
4348         } else {
4349             while (scan < loceol && *scan != '\n')
4350                 scan++;
4351         }
4352         break;
4353     case SANY:
4354         if (do_utf8) {
4355             loceol = PL_regeol;
4356             while (scan < loceol && hardcount < max) {
4357                 scan += UTF8SKIP(scan);
4358                 hardcount++;
4359             }
4360         }
4361         else
4362             scan = loceol;
4363         break;
4364     case CANY:
4365         scan = loceol;
4366         break;
4367     case EXACT:         /* length of string is 1 */
4368         c = (U8)*STRING(p);
4369         while (scan < loceol && UCHARAT(scan) == c)
4370             scan++;
4371         break;
4372     case EXACTF:        /* length of string is 1 */
4373         c = (U8)*STRING(p);
4374         while (scan < loceol &&
4375                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4376             scan++;
4377         break;
4378     case EXACTFL:       /* length of string is 1 */
4379         PL_reg_flags |= RF_tainted;
4380         c = (U8)*STRING(p);
4381         while (scan < loceol &&
4382                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4383             scan++;
4384         break;
4385     case ANYOF:
4386         if (do_utf8) {
4387             loceol = PL_regeol;
4388             while (hardcount < max && scan < loceol &&
4389                    reginclass(p, (U8*)scan, 0, do_utf8)) {
4390                 scan += UTF8SKIP(scan);
4391                 hardcount++;
4392             }
4393         } else {
4394             while (scan < loceol && REGINCLASS(p, (U8*)scan))
4395                 scan++;
4396         }
4397         break;
4398     case ALNUM:
4399         if (do_utf8) {
4400             loceol = PL_regeol;
4401             LOAD_UTF8_CHARCLASS(alnum,"a");
4402             while (hardcount < max && scan < loceol &&
4403                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4404                 scan += UTF8SKIP(scan);
4405                 hardcount++;
4406             }
4407         } else {
4408             while (scan < loceol && isALNUM(*scan))
4409                 scan++;
4410         }
4411         break;
4412     case ALNUML:
4413         PL_reg_flags |= RF_tainted;
4414         if (do_utf8) {
4415             loceol = PL_regeol;
4416             while (hardcount < max && scan < loceol &&
4417                    isALNUM_LC_utf8((U8*)scan)) {
4418                 scan += UTF8SKIP(scan);
4419                 hardcount++;
4420             }
4421         } else {
4422             while (scan < loceol && isALNUM_LC(*scan))
4423                 scan++;
4424         }
4425         break;
4426     case NALNUM:
4427         if (do_utf8) {
4428             loceol = PL_regeol;
4429             LOAD_UTF8_CHARCLASS(alnum,"a");
4430             while (hardcount < max && scan < loceol &&
4431                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4432                 scan += UTF8SKIP(scan);
4433                 hardcount++;
4434             }
4435         } else {
4436             while (scan < loceol && !isALNUM(*scan))
4437                 scan++;
4438         }
4439         break;
4440     case NALNUML:
4441         PL_reg_flags |= RF_tainted;
4442         if (do_utf8) {
4443             loceol = PL_regeol;
4444             while (hardcount < max && scan < loceol &&
4445                    !isALNUM_LC_utf8((U8*)scan)) {
4446                 scan += UTF8SKIP(scan);
4447                 hardcount++;
4448             }
4449         } else {
4450             while (scan < loceol && !isALNUM_LC(*scan))
4451                 scan++;
4452         }
4453         break;
4454     case SPACE:
4455         if (do_utf8) {
4456             loceol = PL_regeol;
4457             LOAD_UTF8_CHARCLASS(space," ");
4458             while (hardcount < max && scan < loceol &&
4459                    (*scan == ' ' ||
4460                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4461                 scan += UTF8SKIP(scan);
4462                 hardcount++;
4463             }
4464         } else {
4465             while (scan < loceol && isSPACE(*scan))
4466                 scan++;
4467         }
4468         break;
4469     case SPACEL:
4470         PL_reg_flags |= RF_tainted;
4471         if (do_utf8) {
4472             loceol = PL_regeol;
4473             while (hardcount < max && scan < loceol &&
4474                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4475                 scan += UTF8SKIP(scan);
4476                 hardcount++;
4477             }
4478         } else {
4479             while (scan < loceol && isSPACE_LC(*scan))
4480                 scan++;
4481         }
4482         break;
4483     case NSPACE:
4484         if (do_utf8) {
4485             loceol = PL_regeol;
4486             LOAD_UTF8_CHARCLASS(space," ");
4487             while (hardcount < max && scan < loceol &&
4488                    !(*scan == ' ' ||
4489                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4490                 scan += UTF8SKIP(scan);
4491                 hardcount++;
4492             }
4493         } else {
4494             while (scan < loceol && !isSPACE(*scan))
4495                 scan++;
4496             break;
4497         }
4498     case NSPACEL:
4499         PL_reg_flags |= RF_tainted;
4500         if (do_utf8) {
4501             loceol = PL_regeol;
4502             while (hardcount < max && scan < loceol &&
4503                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4504                 scan += UTF8SKIP(scan);
4505                 hardcount++;
4506             }
4507         } else {
4508             while (scan < loceol && !isSPACE_LC(*scan))
4509                 scan++;
4510         }
4511         break;
4512     case DIGIT:
4513         if (do_utf8) {
4514             loceol = PL_regeol;
4515             LOAD_UTF8_CHARCLASS(digit,"0");
4516             while (hardcount < max && scan < loceol &&
4517                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4518                 scan += UTF8SKIP(scan);
4519                 hardcount++;
4520             }
4521         } else {
4522             while (scan < loceol && isDIGIT(*scan))
4523                 scan++;
4524         }
4525         break;
4526     case NDIGIT:
4527         if (do_utf8) {
4528             loceol = PL_regeol;
4529             LOAD_UTF8_CHARCLASS(digit,"0");
4530             while (hardcount < max && scan < loceol &&
4531                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4532                 scan += UTF8SKIP(scan);
4533                 hardcount++;
4534             }
4535         } else {
4536             while (scan < loceol && !isDIGIT(*scan))
4537                 scan++;
4538         }
4539         break;
4540     default:            /* Called on something of 0 width. */
4541         break;          /* So match right here or not at all. */
4542     }
4543
4544     if (hardcount)
4545         c = hardcount;
4546     else
4547         c = scan - PL_reginput;
4548     PL_reginput = scan;
4549
4550     DEBUG_r({
4551                 SV *re_debug_flags;
4552                 SV *prop = sv_newmortal();
4553                 GET_RE_DEBUG_FLAGS;
4554                 DEBUG_EXECUTE_r({
4555                 regprop(prop, p);
4556                 PerlIO_printf(Perl_debug_log,
4557                               "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4558                               REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4559         });
4560         });
4561
4562     return(c);
4563 }
4564
4565 /*
4566  - regrepeat_hard - repeatedly match something, report total lenth and length
4567  *
4568  * The repeater is supposed to have constant non-zero length.
4569  */
4570
4571 STATIC I32
4572 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4573 {
4574     register char *scan = Nullch;
4575     register char *start;
4576     register char *loceol = PL_regeol;
4577     I32 l = 0;
4578     I32 count = 0, res = 1;
4579
4580     if (!max)
4581         return 0;
4582
4583     start = PL_reginput;
4584     if (PL_reg_match_utf8) {
4585         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4586             if (!count++) {
4587                 l = 0;
4588                 while (start < PL_reginput) {
4589                     l++;
4590                     start += UTF8SKIP(start);
4591                 }
4592                 *lp = l;
4593                 if (l == 0)
4594                     return max;
4595             }
4596             if (count == max)
4597                 return count;
4598         }
4599     }
4600     else {
4601         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4602             if (!count++) {
4603                 *lp = l = PL_reginput - start;
4604                 if (max != REG_INFTY && l*max < loceol - scan)
4605                     loceol = scan + l*max;
4606                 if (l == 0)
4607                     return max;
4608             }
4609         }
4610     }
4611     if (!res)
4612         PL_reginput = scan;
4613
4614     return count;
4615 }
4616
4617 /*
4618 - regclass_swash - prepare the utf8 swash
4619 */
4620
4621 SV *
4622 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4623 {
4624     SV *sw  = NULL;
4625     SV *si  = NULL;
4626     SV *alt = NULL;
4627
4628     if (PL_regdata && PL_regdata->count) {
4629         U32 n = ARG(node);
4630
4631         if (PL_regdata->what[n] == 's') {
4632             SV *rv = (SV*)PL_regdata->data[n];
4633             AV *av = (AV*)SvRV((SV*)rv);
4634             SV **ary = AvARRAY(av);
4635             SV **a, **b;
4636         
4637             /* See the end of regcomp.c:S_reglass() for
4638              * documentation of these array elements. */
4639
4640             si = *ary;
4641             a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4642             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4643
4644             if (a)
4645                 sw = *a;
4646             else if (si && doinit) {
4647                 sw = swash_init("utf8", "", si, 1, 0);
4648                 (void)av_store(av, 1, sw);
4649             }
4650             if (b)
4651                 alt = *b;
4652         }
4653     }
4654         
4655     if (listsvp)
4656         *listsvp = si;
4657     if (altsvp)
4658         *altsvp  = alt;
4659
4660     return sw;
4661 }
4662
4663 /*
4664  - reginclass - determine if a character falls into a character class
4665  
4666   The n is the ANYOF regnode, the p is the target string, lenp
4667   is pointer to the maximum length of how far to go in the p
4668   (if the lenp is zero, UTF8SKIP(p) is used),
4669   do_utf8 tells whether the target string is in UTF-8.
4670
4671  */
4672
4673 STATIC bool
4674 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4675 {
4676     char flags = ANYOF_FLAGS(n);
4677     bool match = FALSE;
4678     UV c = *p;
4679     STRLEN len = 0;
4680     STRLEN plen;
4681
4682     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4683          c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4684                             ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4685
4686     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4687     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4688         if (lenp)
4689             *lenp = 0;
4690         if (do_utf8 && !ANYOF_RUNTIME(n)) {
4691             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4692                 match = TRUE;
4693         }
4694         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4695             match = TRUE;
4696         if (!match) {
4697             AV *av;
4698             SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4699         
4700             if (sw) {
4701                 if (swash_fetch(sw, p, do_utf8))
4702                     match = TRUE;
4703                 else if (flags & ANYOF_FOLD) {
4704                     if (!match && lenp && av) {
4705                         I32 i;
4706                       
4707                         for (i = 0; i <= av_len(av); i++) {
4708                             SV* sv = *av_fetch(av, i, FALSE);
4709                             STRLEN len;
4710                             char *s = SvPV(sv, len);
4711                         
4712                             if (len <= plen && memEQ(s, (char*)p, len)) {
4713                                 *lenp = len;
4714                                 match = TRUE;
4715                                 break;
4716                             }
4717                         }
4718                     }
4719                     if (!match) {
4720                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4721                         STRLEN tmplen;
4722
4723                         to_utf8_fold(p, tmpbuf, &tmplen);
4724                         if (swash_fetch(sw, tmpbuf, do_utf8))
4725                             match = TRUE;
4726                     }
4727                 }
4728             }
4729         }
4730         if (match && lenp && *lenp == 0)
4731             *lenp = UNISKIP(NATIVE_TO_UNI(c));
4732     }
4733     if (!match && c < 256) {
4734         if (ANYOF_BITMAP_TEST(n, c))
4735             match = TRUE;
4736         else if (flags & ANYOF_FOLD) {
4737             U8 f;
4738
4739             if (flags & ANYOF_LOCALE) {
4740                 PL_reg_flags |= RF_tainted;
4741                 f = PL_fold_locale[c];
4742             }
4743             else
4744                 f = PL_fold[c];
4745             if (f != c && ANYOF_BITMAP_TEST(n, f))
4746                 match = TRUE;
4747         }
4748         
4749         if (!match && (flags & ANYOF_CLASS)) {
4750             PL_reg_flags |= RF_tainted;
4751             if (
4752                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4753                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4754                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4755                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4756                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4757                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4758                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4759                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4760                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4761                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4762                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4763                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4764                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4765                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4766                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4767                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4768                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4769                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4770                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4771                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4772                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4773                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4774                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4775                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4776                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4777                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4778                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4779                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4780                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4781                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4782                 ) /* How's that for a conditional? */
4783             {
4784                 match = TRUE;
4785             }
4786         }
4787     }
4788
4789     return (flags & ANYOF_INVERT) ? !match : match;
4790 }
4791
4792 STATIC U8 *
4793 S_reghop(pTHX_ U8 *s, I32 off)
4794 {
4795     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4796 }
4797
4798 STATIC U8 *
4799 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4800 {
4801     if (off >= 0) {
4802         while (off-- && s < lim) {
4803             /* XXX could check well-formedness here */
4804             s += UTF8SKIP(s);
4805         }
4806     }
4807     else {
4808         while (off++) {
4809             if (s > lim) {
4810                 s--;
4811                 if (UTF8_IS_CONTINUED(*s)) {
4812                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4813                         s--;
4814                 }
4815                 /* XXX could check well-formedness here */
4816             }
4817         }
4818     }
4819     return s;
4820 }
4821
4822 STATIC U8 *
4823 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4824 {
4825     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4826 }
4827
4828 STATIC U8 *
4829 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4830 {
4831     if (off >= 0) {
4832         while (off-- && s < lim) {
4833             /* XXX could check well-formedness here */
4834             s += UTF8SKIP(s);
4835         }
4836         if (off >= 0)
4837             return 0;
4838     }
4839     else {
4840         while (off++) {
4841             if (s > lim) {
4842                 s--;
4843                 if (UTF8_IS_CONTINUED(*s)) {
4844                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4845                         s--;
4846                 }
4847                 /* XXX could check well-formedness here */
4848             }
4849             else
4850                 break;
4851         }
4852         if (off <= 0)
4853             return 0;
4854     }
4855     return s;
4856 }
4857
4858 static void
4859 restore_pos(pTHX_ void *arg)
4860 {
4861     if (PL_reg_eval_set) {
4862         if (PL_reg_oldsaved) {
4863             PL_reg_re->subbeg = PL_reg_oldsaved;
4864             PL_reg_re->sublen = PL_reg_oldsavedlen;
4865 #ifdef PERL_COPY_ON_WRITE
4866             PL_reg_re->saved_copy = PL_nrs;
4867 #endif
4868             RX_MATCH_COPIED_on(PL_reg_re);
4869         }
4870         PL_reg_magic->mg_len = PL_reg_oldpos;
4871         PL_reg_eval_set = 0;
4872         PL_curpm = PL_reg_oldcurpm;
4873     }   
4874 }
4875
4876 STATIC void
4877 S_to_utf8_substr(pTHX_ register regexp *prog)
4878 {
4879     SV* sv;
4880     if (prog->float_substr && !prog->float_utf8) {
4881         prog->float_utf8 = sv = newSVsv(prog->float_substr);
4882         sv_utf8_upgrade(sv);
4883         if (SvTAIL(prog->float_substr))
4884             SvTAIL_on(sv);
4885         if (prog->float_substr == prog->check_substr)
4886             prog->check_utf8 = sv;
4887     }
4888     if (prog->anchored_substr && !prog->anchored_utf8) {
4889         prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
4890         sv_utf8_upgrade(sv);
4891         if (SvTAIL(prog->anchored_substr))
4892             SvTAIL_on(sv);
4893         if (prog->anchored_substr == prog->check_substr)
4894             prog->check_utf8 = sv;
4895     }
4896 }
4897
4898 STATIC void
4899 S_to_byte_substr(pTHX_ register regexp *prog)
4900 {
4901     SV* sv;
4902     if (prog->float_utf8 && !prog->float_substr) {
4903         prog->float_substr = sv = newSVsv(prog->float_utf8);
4904         if (sv_utf8_downgrade(sv, TRUE)) {
4905             if (SvTAIL(prog->float_utf8))
4906                 SvTAIL_on(sv);
4907         } else {
4908             SvREFCNT_dec(sv);
4909             prog->float_substr = sv = &PL_sv_undef;
4910         }
4911         if (prog->float_utf8 == prog->check_utf8)
4912             prog->check_substr = sv;
4913     }
4914     if (prog->anchored_utf8 && !prog->anchored_substr) {
4915         prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
4916         if (sv_utf8_downgrade(sv, TRUE)) {
4917             if (SvTAIL(prog->anchored_utf8))
4918                 SvTAIL_on(sv);
4919         } else {
4920             SvREFCNT_dec(sv);
4921             prog->anchored_substr = sv = &PL_sv_undef;
4922         }
4923         if (prog->anchored_utf8 == prog->check_utf8)
4924             prog->check_substr = sv;
4925     }
4926 }