This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a note about installhtml
[perl5.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  
17  */
18
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20  * confused with the original package (see point 3 below).  Thanks, Henry!
21  */
22
23 /* Additional note: this code is very heavily munged from Henry's version
24  * in places.  In some spots I've traded clarity for efficiency, so don't
25  * blame Henry for some of the lack of readability.
26  */
27
28 /* The names of the functions have been changed from regcomp and
29  * regexec to  pregcomp and pregexec in order to avoid conflicts
30  * with the POSIX routines of the same names.
31 */
32
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 #  ifndef PERL_IN_XSUB_RE
36 #    define PERL_IN_XSUB_RE
37 #  endif
38 /* need access to debugger hooks */
39 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
40 #    define DEBUGGING
41 #  endif
42 #endif
43
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 #  define Perl_regexec_flags my_regexec
47 #  define Perl_regdump my_regdump
48 #  define Perl_regprop my_regprop
49 #  define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_pregexec my_pregexec
52 #  define Perl_reginitcolors my_reginitcolors
53 #  define Perl_regclass_swash my_regclass_swash
54
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57
58 /*SUPPRESS 112*/
59 /*
60  * pregcomp and pregexec -- regsub and regerror are not used in perl
61  *
62  *      Copyright (c) 1986 by University of Toronto.
63  *      Written by Henry Spencer.  Not derived from licensed software.
64  *
65  *      Permission is granted to anyone to use this software for any
66  *      purpose on any computer system, and to redistribute it freely,
67  *      subject to the following restrictions:
68  *
69  *      1. The author is not responsible for the consequences of use of
70  *              this software, no matter how awful, even if they arise
71  *              from defects in it.
72  *
73  *      2. The origin of this software must not be misrepresented, either
74  *              by explicit claim or by omission.
75  *
76  *      3. Altered versions must be plainly marked as such, and must not
77  *              be misrepresented as being the original software.
78  *
79  ****    Alterations to Henry's code are...
80  ****
81  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
82  ****    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
83  ****
84  ****    You may distribute under the terms of either the GNU General Public
85  ****    License or the Artistic License, as specified in the README file.
86  *
87  * Beware that some of this code is subtly aware of the way operator
88  * precedence is structured in regular expressions.  Serious changes in
89  * regular-expression syntax might require a total rethink.
90  */
91 #include "EXTERN.h"
92 #define PERL_IN_REGEXEC_C
93 #include "perl.h"
94
95 #include "regcomp.h"
96
97 #define RF_tainted      1               /* tainted information used? */
98 #define RF_warned       2               /* warned about big count? */
99 #define RF_evaled       4               /* Did an EVAL with setting? */
100 #define RF_utf8         8               /* String contains multibyte chars? */
101
102 #define UTF ((PL_reg_flags & RF_utf8) != 0)
103
104 #define RS_init         1               /* eval environment created */
105 #define RS_set          2               /* replsv value is set */
106
107 #ifndef STATIC
108 #define STATIC  static
109 #endif
110
111 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
112
113 /*
114  * Forwards.
115  */
116
117 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
118 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
119
120 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
121 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
122 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
123 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
124 #define HOPc(pos,off) ((char*)HOP(pos,off))
125 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
126
127 #define HOPBACK(pos, off) (             \
128     (PL_reg_match_utf8)                 \
129         ? reghopmaybe((U8*)pos, -off)   \
130     : (pos - off >= PL_bostr)           \
131         ? (U8*)(pos - off)              \
132     : (U8*)NULL                         \
133 )
134 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
135
136 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
137 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
138 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
139 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
140 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
141 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
142
143 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
144
145 /* for use after a quantifier and before an EXACT-like node -- japhy */
146 #define JUMPABLE(rn) ( \
147     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
148     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
149     OP(rn) == PLUS || OP(rn) == MINMOD || \
150     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
151 )
152
153 #define HAS_TEXT(rn) ( \
154     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
155 )
156
157 /*
158   Search for mandatory following text node; for lookahead, the text must
159   follow but for lookbehind (rn->flags != 0) we skip to the next step.
160 */
161 #define FIND_NEXT_IMPT(rn) STMT_START { \
162     while (JUMPABLE(rn)) \
163         if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
164             rn = NEXTOPER(NEXTOPER(rn)); \
165         else if (OP(rn) == PLUS) \
166             rn = NEXTOPER(rn); \
167         else if (OP(rn) == IFMATCH) \
168             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
169         else rn += NEXT_OFF(rn); \
170 } STMT_END 
171
172 static void restore_pos(pTHX_ void *arg);
173
174 STATIC CHECKPOINT
175 S_regcppush(pTHX_ I32 parenfloor)
176 {
177     int retval = PL_savestack_ix;
178 #define REGCP_PAREN_ELEMS 4
179     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
180     int p;
181
182     if (paren_elems_to_push < 0)
183         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
184
185 #define REGCP_OTHER_ELEMS 6
186     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
187     for (p = PL_regsize; p > parenfloor; p--) {
188 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
189         SSPUSHINT(PL_regendp[p]);
190         SSPUSHINT(PL_regstartp[p]);
191         SSPUSHPTR(PL_reg_start_tmp[p]);
192         SSPUSHINT(p);
193     }
194 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
195     SSPUSHINT(PL_regsize);
196     SSPUSHINT(*PL_reglastparen);
197     SSPUSHINT(*PL_reglastcloseparen);
198     SSPUSHPTR(PL_reginput);
199 #define REGCP_FRAME_ELEMS 2
200 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
201  * are needed for the regexp context stack bookkeeping. */
202     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
203     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
204
205     return retval;
206 }
207
208 /* These are needed since we do not localize EVAL nodes: */
209 #  define REGCP_SET(cp)  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,          \
210                              "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
211                              (IV)PL_savestack_ix)); cp = PL_savestack_ix
212
213 #  define REGCP_UNWIND(cp)  DEBUG_EXECUTE_r(cp != PL_savestack_ix ?             \
214                                 PerlIO_printf(Perl_debug_log,           \
215                                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
216                                 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
217
218 STATIC char *
219 S_regcppop(pTHX)
220 {
221     I32 i;
222     U32 paren = 0;
223     char *input;
224     I32 tmps;
225
226     GET_RE_DEBUG_FLAGS_DECL;
227
228     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
229     i = SSPOPINT;
230     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
231     i = SSPOPINT; /* Parentheses elements to pop. */
232     input = (char *) SSPOPPTR;
233     *PL_reglastcloseparen = SSPOPINT;
234     *PL_reglastparen = SSPOPINT;
235     PL_regsize = SSPOPINT;
236
237     /* Now restore the parentheses context. */
238     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
239          i > 0; i -= REGCP_PAREN_ELEMS) {
240         paren = (U32)SSPOPINT;
241         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
242         PL_regstartp[paren] = SSPOPINT;
243         tmps = SSPOPINT;
244         if (paren <= *PL_reglastparen)
245             PL_regendp[paren] = tmps;
246         DEBUG_EXECUTE_r(
247             PerlIO_printf(Perl_debug_log,
248                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
249                           (UV)paren, (IV)PL_regstartp[paren],
250                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
251                           (IV)PL_regendp[paren],
252                           (paren > *PL_reglastparen ? "(no)" : ""));
253         );
254     }
255     DEBUG_EXECUTE_r(
256         if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
257             PerlIO_printf(Perl_debug_log,
258                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
259                           (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
260         }
261     );
262 #if 1
263     /* It would seem that the similar code in regtry()
264      * already takes care of this, and in fact it is in
265      * a better location to since this code can #if 0-ed out
266      * but the code in regtry() is needed or otherwise tests
267      * requiring null fields (pat.t#187 and split.t#{13,14}
268      * (as of patchlevel 7877)  will fail.  Then again,
269      * this code seems to be necessary or otherwise
270      * building DynaLoader will fail:
271      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
272      * --jhi */
273     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
274         if ((I32)paren > PL_regsize)
275             PL_regstartp[paren] = -1;
276         PL_regendp[paren] = -1;
277     }
278 #endif
279     return input;
280 }
281
282 STATIC char *
283 S_regcp_set_to(pTHX_ I32 ss)
284 {
285     I32 tmp = PL_savestack_ix;
286
287     PL_savestack_ix = ss;
288     regcppop();
289     PL_savestack_ix = tmp;
290     return Nullch;
291 }
292
293 typedef struct re_cc_state
294 {
295     I32 ss;
296     regnode *node;
297     struct re_cc_state *prev;
298     CURCUR *cc;
299     regexp *re;
300 } re_cc_state;
301
302 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
303
304 #define TRYPAREN(paren, n, input) {                             \
305     if (paren) {                                                \
306         if (n) {                                                \
307             PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;   \
308             PL_regendp[paren] = input - PL_bostr;               \
309         }                                                       \
310         else                                                    \
311             PL_regendp[paren] = -1;                             \
312     }                                                           \
313     if (regmatch(next))                                         \
314         sayYES;                                                 \
315     if (paren && n)                                             \
316         PL_regendp[paren] = -1;                                 \
317 }
318
319
320 /*
321  * pregexec and friends
322  */
323
324 /*
325  - pregexec - match a regexp against a string
326  */
327 I32
328 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
329          char *strbeg, I32 minend, SV *screamer, U32 nosave)
330 /* strend: pointer to null at end of string */
331 /* strbeg: real beginning of string */
332 /* minend: end of match must be >=minend after stringarg. */
333 /* nosave: For optimizations. */
334 {
335     return
336         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
337                       nosave ? 0 : REXEC_COPY_STR);
338 }
339
340 STATIC void
341 S_cache_re(pTHX_ regexp *prog)
342 {
343     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
344 #ifdef DEBUGGING
345     PL_regprogram = prog->program;
346 #endif
347     PL_regnpar = prog->nparens;
348     PL_regdata = prog->data;
349     PL_reg_re = prog;
350 }
351
352 /*
353  * Need to implement the following flags for reg_anch:
354  *
355  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
356  * USE_INTUIT_ML
357  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
358  * INTUIT_AUTORITATIVE_ML
359  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
360  * INTUIT_ONCE_ML
361  *
362  * Another flag for this function: SECOND_TIME (so that float substrs
363  * with giant delta may be not rechecked).
364  */
365
366 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
367
368 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
369    Otherwise, only SvCUR(sv) is used to get strbeg. */
370
371 /* XXXX We assume that strpos is strbeg unless sv. */
372
373 /* XXXX Some places assume that there is a fixed substring.
374         An update may be needed if optimizer marks as "INTUITable"
375         RExen without fixed substrings.  Similarly, it is assumed that
376         lengths of all the strings are no more than minlen, thus they
377         cannot come from lookahead.
378         (Or minlen should take into account lookahead.) */
379
380 /* A failure to find a constant substring means that there is no need to make
381    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
382    finding a substring too deep into the string means that less calls to
383    regtry() should be needed.
384
385    REx compiler's optimizer found 4 possible hints:
386         a) Anchored substring;
387         b) Fixed substring;
388         c) Whether we are anchored (beginning-of-line or \G);
389         d) First node (of those at offset 0) which may distingush positions;
390    We use a)b)d) and multiline-part of c), and try to find a position in the
391    string which does not contradict any of them.
392  */
393
394 /* Most of decisions we do here should have been done at compile time.
395    The nodes of the REx which we used for the search should have been
396    deleted from the finite automaton. */
397
398 char *
399 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
400                      char *strend, U32 flags, re_scream_pos_data *data)
401 {
402     register I32 start_shift = 0;
403     /* Should be nonnegative! */
404     register I32 end_shift   = 0;
405     register char *s;
406     register SV *check;
407     char *strbeg;
408     char *t;
409     int do_utf8 = sv ? SvUTF8(sv) : 0;  /* if no sv we have to assume bytes */
410     I32 ml_anch;
411     register char *other_last = Nullch; /* other substr checked before this */
412     char *check_at = Nullch;            /* check substr found at this pos */
413     I32 multiline = prog->reganch & PMf_MULTILINE;
414 #ifdef DEBUGGING
415     char *i_strpos = strpos;
416     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
417 #endif
418
419     GET_RE_DEBUG_FLAGS_DECL;
420
421     RX_MATCH_UTF8_set(prog,do_utf8);
422
423     if (prog->reganch & ROPT_UTF8) {
424         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
425                               "UTF-8 regex...\n"));
426         PL_reg_flags |= RF_utf8;
427     }
428
429     DEBUG_EXECUTE_r({
430          char *s   = PL_reg_match_utf8 ?
431                          sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
432                          strpos;
433          int   len = PL_reg_match_utf8 ?
434                          strlen(s) : strend - strpos;
435          if (!PL_colorset)
436               reginitcolors();
437          if (PL_reg_match_utf8)
438              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
439                                    "UTF-8 target...\n"));
440          PerlIO_printf(Perl_debug_log,
441                        "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
442                        PL_colors[4],PL_colors[5],PL_colors[0],
443                        prog->precomp,
444                        PL_colors[1],
445                        (strlen(prog->precomp) > 60 ? "..." : ""),
446                        PL_colors[0],
447                        (int)(len > 60 ? 60 : len),
448                        s, PL_colors[1],
449                        (len > 60 ? "..." : "")
450               );
451     });
452
453     /* CHR_DIST() would be more correct here but it makes things slow. */
454     if (prog->minlen > strend - strpos) {
455         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
456                               "String too short... [re_intuit_start]\n"));
457         goto fail;
458     }
459     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
460     PL_regeol = strend;
461     if (do_utf8) {
462         if (!prog->check_utf8 && prog->check_substr)
463             to_utf8_substr(prog);
464         check = prog->check_utf8;
465     } else {
466         if (!prog->check_substr && prog->check_utf8)
467             to_byte_substr(prog);
468         check = prog->check_substr;
469     }
470    if (check == &PL_sv_undef) {
471         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
472                 "Non-utf string cannot match utf check string\n"));
473         goto fail;
474     }
475     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
476         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
477                      || ( (prog->reganch & ROPT_ANCH_BOL)
478                           && !multiline ) );    /* Check after \n? */
479
480         if (!ml_anch) {
481           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
482                                   | ROPT_IMPLICIT)) /* not a real BOL */
483                /* SvCUR is not set on references: SvRV and SvPVX overlap */
484                && sv && !SvROK(sv)
485                && (strpos != strbeg)) {
486               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
487               goto fail;
488           }
489           if (prog->check_offset_min == prog->check_offset_max &&
490               !(prog->reganch & ROPT_CANY_SEEN)) {
491             /* Substring at constant offset from beg-of-str... */
492             I32 slen;
493
494             s = HOP3c(strpos, prog->check_offset_min, strend);
495             if (SvTAIL(check)) {
496                 slen = SvCUR(check);    /* >= 1 */
497
498                 if ( strend - s > slen || strend - s < slen - 1
499                      || (strend - s == slen && strend[-1] != '\n')) {
500                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
501                     goto fail_finish;
502                 }
503                 /* Now should match s[0..slen-2] */
504                 slen--;
505                 if (slen && (*SvPVX(check) != *s
506                              || (slen > 1
507                                  && memNE(SvPVX(check), s, slen)))) {
508                   report_neq:
509                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
510                     goto fail_finish;
511                 }
512             }
513             else if (*SvPVX(check) != *s
514                      || ((slen = SvCUR(check)) > 1
515                          && memNE(SvPVX(check), s, slen)))
516                 goto report_neq;
517             goto success_at_start;
518           }
519         }
520         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
521         s = strpos;
522         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
523         end_shift = prog->minlen - start_shift -
524             CHR_SVLEN(check) + (SvTAIL(check) != 0);
525         if (!ml_anch) {
526             I32 end = prog->check_offset_max + CHR_SVLEN(check)
527                                          - (SvTAIL(check) != 0);
528             I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
529
530             if (end_shift < eshift)
531                 end_shift = eshift;
532         }
533     }
534     else {                              /* Can match at random position */
535         ml_anch = 0;
536         s = strpos;
537         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
538         /* Should be nonnegative! */
539         end_shift = prog->minlen - start_shift -
540             CHR_SVLEN(check) + (SvTAIL(check) != 0);
541     }
542
543 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
544     if (end_shift < 0)
545         Perl_croak(aTHX_ "panic: end_shift");
546 #endif
547
548   restart:
549     /* Find a possible match in the region s..strend by looking for
550        the "check" substring in the region corrected by start/end_shift. */
551     if (flags & REXEC_SCREAM) {
552         I32 p = -1;                     /* Internal iterator of scream. */
553         I32 *pp = data ? data->scream_pos : &p;
554
555         if (PL_screamfirst[BmRARE(check)] >= 0
556             || ( BmRARE(check) == '\n'
557                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
558                  && SvTAIL(check) ))
559             s = screaminstr(sv, check,
560                             start_shift + (s - strbeg), end_shift, pp, 0);
561         else
562             goto fail_finish;
563         /* we may be pointing at the wrong string */
564         if (s && RX_MATCH_COPIED(prog))
565             s = strbeg + (s - SvPVX(sv));
566         if (data)
567             *data->scream_olds = s;
568     }
569     else if (prog->reganch & ROPT_CANY_SEEN)
570         s = fbm_instr((U8*)(s + start_shift),
571                       (U8*)(strend - end_shift),
572                       check, multiline ? FBMrf_MULTILINE : 0);
573     else
574         s = fbm_instr(HOP3(s, start_shift, strend),
575                       HOP3(strend, -end_shift, strbeg),
576                       check, multiline ? FBMrf_MULTILINE : 0);
577
578     /* Update the count-of-usability, remove useless subpatterns,
579         unshift s.  */
580
581     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
582                           (s ? "Found" : "Did not find"),
583                           (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
584                           PL_colors[0],
585                           (int)(SvCUR(check) - (SvTAIL(check)!=0)),
586                           SvPVX(check),
587                           PL_colors[1], (SvTAIL(check) ? "$" : ""),
588                           (s ? " at offset " : "...\n") ) );
589
590     if (!s)
591         goto fail_finish;
592
593     check_at = s;
594
595     /* Finish the diagnostic message */
596     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
597
598     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
599        Start with the other substr.
600        XXXX no SCREAM optimization yet - and a very coarse implementation
601        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
602                 *always* match.  Probably should be marked during compile...
603        Probably it is right to do no SCREAM here...
604      */
605
606     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
607         /* Take into account the "other" substring. */
608         /* XXXX May be hopelessly wrong for UTF... */
609         if (!other_last)
610             other_last = strpos;
611         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
612           do_other_anchored:
613             {
614                 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
615                 char *s1 = s;
616                 SV* must;
617
618                 t = s - prog->check_offset_max;
619                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
620                     && (!do_utf8
621                         || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
622                             && t > strpos)))
623                     /* EMPTY */;
624                 else
625                     t = strpos;
626                 t = HOP3c(t, prog->anchored_offset, strend);
627                 if (t < other_last)     /* These positions already checked */
628                     t = other_last;
629                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
630                 if (last < last1)
631                     last1 = last;
632  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
633                 /* On end-of-str: see comment below. */
634                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
635                 if (must == &PL_sv_undef) {
636                     s = (char*)NULL;
637                     DEBUG_EXECUTE_r(must = prog->anchored_utf8);        /* for debug */
638                 }
639                 else
640                     s = fbm_instr(
641                         (unsigned char*)t,
642                         HOP3(HOP3(last1, prog->anchored_offset, strend)
643                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
644                         must,
645                         multiline ? FBMrf_MULTILINE : 0
646                     );
647                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
648                         "%s anchored substr `%s%.*s%s'%s",
649                         (s ? "Found" : "Contradicts"),
650                         PL_colors[0],
651                           (int)(SvCUR(must)
652                           - (SvTAIL(must)!=0)),
653                           SvPVX(must),
654                           PL_colors[1], (SvTAIL(must) ? "$" : "")));
655                 if (!s) {
656                     if (last1 >= last2) {
657                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
658                                                 ", giving up...\n"));
659                         goto fail_finish;
660                     }
661                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
662                         ", trying floating at offset %ld...\n",
663                         (long)(HOP3c(s1, 1, strend) - i_strpos)));
664                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
665                     s = HOP3c(last, 1, strend);
666                     goto restart;
667                 }
668                 else {
669                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
670                           (long)(s - i_strpos)));
671                     t = HOP3c(s, -prog->anchored_offset, strbeg);
672                     other_last = HOP3c(s, 1, strend);
673                     s = s1;
674                     if (t == strpos)
675                         goto try_at_start;
676                     goto try_at_offset;
677                 }
678             }
679         }
680         else {          /* Take into account the floating substring. */
681             char *last, *last1;
682             char *s1 = s;
683             SV* must;
684
685             t = HOP3c(s, -start_shift, strbeg);
686             last1 = last =
687                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
688             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
689                 last = HOP3c(t, prog->float_max_offset, strend);
690             s = HOP3c(t, prog->float_min_offset, strend);
691             if (s < other_last)
692                 s = other_last;
693  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
694             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
695             /* fbm_instr() takes into account exact value of end-of-str
696                if the check is SvTAIL(ed).  Since false positives are OK,
697                and end-of-str is not later than strend we are OK. */
698             if (must == &PL_sv_undef) {
699                 s = (char*)NULL;
700                 DEBUG_EXECUTE_r(must = prog->float_utf8);       /* for debug message */
701             }
702             else
703                 s = fbm_instr((unsigned char*)s,
704                               (unsigned char*)last + SvCUR(must)
705                                   - (SvTAIL(must)!=0),
706                               must, multiline ? FBMrf_MULTILINE : 0);
707             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
708                     (s ? "Found" : "Contradicts"),
709                     PL_colors[0],
710                       (int)(SvCUR(must) - (SvTAIL(must)!=0)),
711                       SvPVX(must),
712                       PL_colors[1], (SvTAIL(must) ? "$" : "")));
713             if (!s) {
714                 if (last1 == last) {
715                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
716                                             ", giving up...\n"));
717                     goto fail_finish;
718                 }
719                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
720                     ", trying anchored starting at offset %ld...\n",
721                     (long)(s1 + 1 - i_strpos)));
722                 other_last = last;
723                 s = HOP3c(t, 1, strend);
724                 goto restart;
725             }
726             else {
727                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
728                       (long)(s - i_strpos)));
729                 other_last = s; /* Fix this later. --Hugo */
730                 s = s1;
731                 if (t == strpos)
732                     goto try_at_start;
733                 goto try_at_offset;
734             }
735         }
736     }
737
738     t = s - prog->check_offset_max;
739     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
740         && (!do_utf8
741             || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
742                  && t > strpos))) {
743         /* Fixed substring is found far enough so that the match
744            cannot start at strpos. */
745       try_at_offset:
746         if (ml_anch && t[-1] != '\n') {
747             /* Eventually fbm_*() should handle this, but often
748                anchored_offset is not 0, so this check will not be wasted. */
749             /* XXXX In the code below we prefer to look for "^" even in
750                presence of anchored substrings.  And we search even
751                beyond the found float position.  These pessimizations
752                are historical artefacts only.  */
753           find_anchor:
754             while (t < strend - prog->minlen) {
755                 if (*t == '\n') {
756                     if (t < check_at - prog->check_offset_min) {
757                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
758                             /* Since we moved from the found position,
759                                we definitely contradict the found anchored
760                                substr.  Due to the above check we do not
761                                contradict "check" substr.
762                                Thus we can arrive here only if check substr
763                                is float.  Redo checking for "other"=="fixed".
764                              */
765                             strpos = t + 1;                     
766                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
767                                 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
768                             goto do_other_anchored;
769                         }
770                         /* We don't contradict the found floating substring. */
771                         /* XXXX Why not check for STCLASS? */
772                         s = t + 1;
773                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
774                             PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
775                         goto set_useful;
776                     }
777                     /* Position contradicts check-string */
778                     /* XXXX probably better to look for check-string
779                        than for "\n", so one should lower the limit for t? */
780                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
781                         PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
782                     other_last = strpos = s = t + 1;
783                     goto restart;
784                 }
785                 t++;
786             }
787             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
788                         PL_colors[0],PL_colors[1]));
789             goto fail_finish;
790         }
791         else {
792             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
793                         PL_colors[0],PL_colors[1]));
794         }
795         s = t;
796       set_useful:
797         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
798     }
799     else {
800         /* The found string does not prohibit matching at strpos,
801            - no optimization of calling REx engine can be performed,
802            unless it was an MBOL and we are not after MBOL,
803            or a future STCLASS check will fail this. */
804       try_at_start:
805         /* Even in this situation we may use MBOL flag if strpos is offset
806            wrt the start of the string. */
807         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
808             && (strpos != strbeg) && strpos[-1] != '\n'
809             /* May be due to an implicit anchor of m{.*foo}  */
810             && !(prog->reganch & ROPT_IMPLICIT))
811         {
812             t = strpos;
813             goto find_anchor;
814         }
815         DEBUG_EXECUTE_r( if (ml_anch)
816             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
817                         (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
818         );
819       success_at_start:
820         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
821             && (do_utf8 ? (
822                 prog->check_utf8                /* Could be deleted already */
823                 && --BmUSEFUL(prog->check_utf8) < 0
824                 && (prog->check_utf8 == prog->float_utf8)
825             ) : (
826                 prog->check_substr              /* Could be deleted already */
827                 && --BmUSEFUL(prog->check_substr) < 0
828                 && (prog->check_substr == prog->float_substr)
829             )))
830         {
831             /* If flags & SOMETHING - do not do it many times on the same match */
832             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
833             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
834             if (do_utf8 ? prog->check_substr : prog->check_utf8)
835                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
836             prog->check_substr = prog->check_utf8 = Nullsv;     /* disable */
837             prog->float_substr = prog->float_utf8 = Nullsv;     /* clear */
838             check = Nullsv;                     /* abort */
839             s = strpos;
840             /* XXXX This is a remnant of the old implementation.  It
841                     looks wasteful, since now INTUIT can use many
842                     other heuristics. */
843             prog->reganch &= ~RE_USE_INTUIT;
844         }
845         else
846             s = strpos;
847     }
848
849     /* Last resort... */
850     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
851     if (prog->regstclass) {
852         /* minlen == 0 is possible if regstclass is \b or \B,
853            and the fixed substr is ''$.
854            Since minlen is already taken into account, s+1 is before strend;
855            accidentally, minlen >= 1 guaranties no false positives at s + 1
856            even for \b or \B.  But (minlen? 1 : 0) below assumes that
857            regstclass does not come from lookahead...  */
858         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
859            This leaves EXACTF only, which is dealt with in find_byclass().  */
860         const U8* str = (U8*)STRING(prog->regstclass);
861         const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
862                     ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
863                     : 1);
864         char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
865                 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
866                 : (prog->float_substr || prog->float_utf8
867                    ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
868                            cl_l, strend)
869                    : strend);
870
871         t = s;
872         cache_re(prog);
873         s = find_byclass(prog, prog->regstclass, s, endpos, 1);
874         if (!s) {
875 #ifdef DEBUGGING
876             const char *what = 0;
877 #endif
878             if (endpos == strend) {
879                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
880                                 "Could not match STCLASS...\n") );
881                 goto fail;
882             }
883             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
884                                    "This position contradicts STCLASS...\n") );
885             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
886                 goto fail;
887             /* Contradict one of substrings */
888             if (prog->anchored_substr || prog->anchored_utf8) {
889                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
890                     DEBUG_EXECUTE_r( what = "anchored" );
891                   hop_and_restart:
892                     s = HOP3c(t, 1, strend);
893                     if (s + start_shift + end_shift > strend) {
894                         /* XXXX Should be taken into account earlier? */
895                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
896                                                "Could not match STCLASS...\n") );
897                         goto fail;
898                     }
899                     if (!check)
900                         goto giveup;
901                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
902                                 "Looking for %s substr starting at offset %ld...\n",
903                                  what, (long)(s + start_shift - i_strpos)) );
904                     goto restart;
905                 }
906                 /* Have both, check_string is floating */
907                 if (t + start_shift >= check_at) /* Contradicts floating=check */
908                     goto retry_floating_check;
909                 /* Recheck anchored substring, but not floating... */
910                 s = check_at;
911                 if (!check)
912                     goto giveup;
913                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
914                           "Looking for anchored substr starting at offset %ld...\n",
915                           (long)(other_last - i_strpos)) );
916                 goto do_other_anchored;
917             }
918             /* Another way we could have checked stclass at the
919                current position only: */
920             if (ml_anch) {
921                 s = t = t + 1;
922                 if (!check)
923                     goto giveup;
924                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
925                           "Looking for /%s^%s/m starting at offset %ld...\n",
926                           PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
927                 goto try_at_offset;
928             }
929             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
930                 goto fail;
931             /* Check is floating subtring. */
932           retry_floating_check:
933             t = check_at - start_shift;
934             DEBUG_EXECUTE_r( what = "floating" );
935             goto hop_and_restart;
936         }
937         if (t != s) {
938             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
939                         "By STCLASS: moving %ld --> %ld\n",
940                                   (long)(t - i_strpos), (long)(s - i_strpos))
941                    );
942         }
943         else {
944             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
945                                   "Does not contradict STCLASS...\n"); 
946                    );
947         }
948     }
949   giveup:
950     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
951                           PL_colors[4], (check ? "Guessed" : "Giving up"),
952                           PL_colors[5], (long)(s - i_strpos)) );
953     return s;
954
955   fail_finish:                          /* Substring not found */
956     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
957         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
958   fail:
959     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
960                           PL_colors[4],PL_colors[5]));
961     return Nullch;
962 }
963
964 /* We know what class REx starts with.  Try to find this position... */
965 STATIC char *
966 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, I32 norun)
967 {
968         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
969         char *m;
970         STRLEN ln;
971         STRLEN lnc;
972         register STRLEN uskip;
973         unsigned int c1;
974         unsigned int c2;
975         char *e;
976         register I32 tmp = 1;   /* Scratch variable? */
977         register bool do_utf8 = PL_reg_match_utf8;
978
979         /* We know what class it must start with. */
980         switch (OP(c)) {
981         case ANYOF:
982             if (do_utf8) {
983                  while (s + (uskip = UTF8SKIP(s)) <= strend) {
984                       if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
985                           !UTF8_IS_INVARIANT((U8)s[0]) ?
986                           reginclass(c, (U8*)s, 0, do_utf8) :
987                           REGINCLASS(c, (U8*)s)) {
988                            if (tmp && (norun || regtry(prog, s)))
989                                 goto got_it;
990                            else
991                                 tmp = doevery;
992                       }
993                       else 
994                            tmp = 1;
995                       s += uskip;
996                  }
997             }
998             else {
999                  while (s < strend) {
1000                       STRLEN skip = 1;
1001
1002                       if (REGINCLASS(c, (U8*)s) ||
1003                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1004                            /* The assignment of 2 is intentional:
1005                             * for the folded sharp s, the skip is 2. */
1006                            (skip = SHARP_S_SKIP))) {
1007                            if (tmp && (norun || regtry(prog, s)))
1008                                 goto got_it;
1009                            else
1010                                 tmp = doevery;
1011                       }
1012                       else 
1013                            tmp = 1;
1014                       s += skip;
1015                  }
1016             }
1017             break;
1018         case CANY:
1019             while (s < strend) {
1020                 if (tmp && (norun || regtry(prog, s)))
1021                     goto got_it;
1022                 else
1023                     tmp = doevery;
1024                 s++;
1025             }
1026             break;
1027         case EXACTF:
1028             m   = STRING(c);
1029             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1030             lnc = (I32) ln;     /* length to match in characters */
1031             if (UTF) {
1032                 STRLEN ulen1, ulen2;
1033                 U8 *sm = (U8 *) m;
1034                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1035                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1036
1037                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1038                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1039
1040                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE, 
1041                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1042                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1043                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1044                 lnc = 0;
1045                 while (sm < ((U8 *) m + ln)) {
1046                     lnc++;
1047                     sm += UTF8SKIP(sm);
1048                 }
1049             }
1050             else {
1051                 c1 = *(U8*)m;
1052                 c2 = PL_fold[c1];
1053             }
1054             goto do_exactf;
1055         case EXACTFL:
1056             m   = STRING(c);
1057             ln  = STR_LEN(c);
1058             lnc = (I32) ln;
1059             c1 = *(U8*)m;
1060             c2 = PL_fold_locale[c1];
1061           do_exactf:
1062             e = HOP3c(strend, -((I32)lnc), s);
1063
1064             if (norun && e < s)
1065                 e = s;                  /* Due to minlen logic of intuit() */
1066
1067             /* The idea in the EXACTF* cases is to first find the
1068              * first character of the EXACTF* node and then, if
1069              * necessary, case-insensitively compare the full
1070              * text of the node.  The c1 and c2 are the first
1071              * characters (though in Unicode it gets a bit
1072              * more complicated because there are more cases
1073              * than just upper and lower: one needs to use
1074              * the so-called folding case for case-insensitive
1075              * matching (called "loose matching" in Unicode).
1076              * ibcmp_utf8() will do just that. */
1077
1078             if (do_utf8) {
1079                 UV c, f;
1080                 U8 tmpbuf [UTF8_MAXBYTES+1];
1081                 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1082                 STRLEN len, foldlen;
1083                 
1084                 if (c1 == c2) {
1085                     /* Upper and lower of 1st char are equal -
1086                      * probably not a "letter". */
1087                     while (s <= e) {
1088                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1089                                            ckWARN(WARN_UTF8) ?
1090                                            0 : UTF8_ALLOW_ANY);
1091                         if ( c == c1
1092                              && (ln == len ||
1093                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1094                                             m, (char **)0, ln, (bool)UTF))
1095                              && (norun || regtry(prog, s)) )
1096                             goto got_it;
1097                         else {
1098                              uvchr_to_utf8(tmpbuf, c);
1099                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1100                              if ( f != c
1101                                   && (f == c1 || f == c2)
1102                                   && (ln == foldlen ||
1103                                       !ibcmp_utf8((char *) foldbuf,
1104                                                   (char **)0, foldlen, do_utf8,
1105                                                   m,
1106                                                   (char **)0, ln, (bool)UTF))
1107                                   && (norun || regtry(prog, s)) )
1108                                   goto got_it;
1109                         }
1110                         s += len;
1111                     }
1112                 }
1113                 else {
1114                     while (s <= e) {
1115                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1116                                            ckWARN(WARN_UTF8) ?
1117                                            0 : UTF8_ALLOW_ANY);
1118
1119                         /* Handle some of the three Greek sigmas cases.
1120                          * Note that not all the possible combinations
1121                          * are handled here: some of them are handled
1122                          * by the standard folding rules, and some of
1123                          * them (the character class or ANYOF cases)
1124                          * are handled during compiletime in
1125                          * regexec.c:S_regclass(). */
1126                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1127                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1128                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1129
1130                         if ( (c == c1 || c == c2)
1131                              && (ln == len ||
1132                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1133                                             m, (char **)0, ln, (bool)UTF))
1134                              && (norun || regtry(prog, s)) )
1135                             goto got_it;
1136                         else {
1137                              uvchr_to_utf8(tmpbuf, c);
1138                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1139                              if ( f != c
1140                                   && (f == c1 || f == c2)
1141                                   && (ln == foldlen ||
1142                                       !ibcmp_utf8((char *) foldbuf,
1143                                                   (char **)0, foldlen, do_utf8,
1144                                                   m,
1145                                                   (char **)0, ln, (bool)UTF))
1146                                   && (norun || regtry(prog, s)) )
1147                                   goto got_it;
1148                         }
1149                         s += len;
1150                     }
1151                 }
1152             }
1153             else {
1154                 if (c1 == c2)
1155                     while (s <= e) {
1156                         if ( *(U8*)s == c1
1157                              && (ln == 1 || !(OP(c) == EXACTF
1158                                               ? ibcmp(s, m, ln)
1159                                               : ibcmp_locale(s, m, ln)))
1160                              && (norun || regtry(prog, s)) )
1161                             goto got_it;
1162                         s++;
1163                     }
1164                 else
1165                     while (s <= e) {
1166                         if ( (*(U8*)s == c1 || *(U8*)s == c2)
1167                              && (ln == 1 || !(OP(c) == EXACTF
1168                                               ? ibcmp(s, m, ln)
1169                                               : ibcmp_locale(s, m, ln)))
1170                              && (norun || regtry(prog, s)) )
1171                             goto got_it;
1172                         s++;
1173                     }
1174             }
1175             break;
1176         case BOUNDL:
1177             PL_reg_flags |= RF_tainted;
1178             /* FALL THROUGH */
1179         case BOUND:
1180             if (do_utf8) {
1181                 if (s == PL_bostr)
1182                     tmp = '\n';
1183                 else {
1184                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1185                 
1186                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1187                 }
1188                 tmp = ((OP(c) == BOUND ?
1189                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1190                 LOAD_UTF8_CHARCLASS(alnum,"a");
1191                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1192                     if (tmp == !(OP(c) == BOUND ?
1193                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1194                                  isALNUM_LC_utf8((U8*)s)))
1195                     {
1196                         tmp = !tmp;
1197                         if ((norun || regtry(prog, s)))
1198                             goto got_it;
1199                     }
1200                     s += uskip;
1201                 }
1202             }
1203             else {
1204                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1205                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1206                 while (s < strend) {
1207                     if (tmp ==
1208                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1209                         tmp = !tmp;
1210                         if ((norun || regtry(prog, s)))
1211                             goto got_it;
1212                     }
1213                     s++;
1214                 }
1215             }
1216             if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1217                 goto got_it;
1218             break;
1219         case NBOUNDL:
1220             PL_reg_flags |= RF_tainted;
1221             /* FALL THROUGH */
1222         case NBOUND:
1223             if (do_utf8) {
1224                 if (s == PL_bostr)
1225                     tmp = '\n';
1226                 else {
1227                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1228                 
1229                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1230                 }
1231                 tmp = ((OP(c) == NBOUND ?
1232                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1233                 LOAD_UTF8_CHARCLASS(alnum,"a");
1234                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1235                     if (tmp == !(OP(c) == NBOUND ?
1236                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1237                                  isALNUM_LC_utf8((U8*)s)))
1238                         tmp = !tmp;
1239                     else if ((norun || regtry(prog, s)))
1240                         goto got_it;
1241                     s += uskip;
1242                 }
1243             }
1244             else {
1245                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1246                 tmp = ((OP(c) == NBOUND ?
1247                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1248                 while (s < strend) {
1249                     if (tmp ==
1250                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1251                         tmp = !tmp;
1252                     else if ((norun || regtry(prog, s)))
1253                         goto got_it;
1254                     s++;
1255                 }
1256             }
1257             if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1258                 goto got_it;
1259             break;
1260         case ALNUM:
1261             if (do_utf8) {
1262                 LOAD_UTF8_CHARCLASS(alnum,"a");
1263                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1264                     if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1265                         if (tmp && (norun || regtry(prog, s)))
1266                             goto got_it;
1267                         else
1268                             tmp = doevery;
1269                     }
1270                     else
1271                         tmp = 1;
1272                     s += uskip;
1273                 }
1274             }
1275             else {
1276                 while (s < strend) {
1277                     if (isALNUM(*s)) {
1278                         if (tmp && (norun || regtry(prog, s)))
1279                             goto got_it;
1280                         else
1281                             tmp = doevery;
1282                     }
1283                     else
1284                         tmp = 1;
1285                     s++;
1286                 }
1287             }
1288             break;
1289         case ALNUML:
1290             PL_reg_flags |= RF_tainted;
1291             if (do_utf8) {
1292                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1293                     if (isALNUM_LC_utf8((U8*)s)) {
1294                         if (tmp && (norun || regtry(prog, s)))
1295                             goto got_it;
1296                         else
1297                             tmp = doevery;
1298                     }
1299                     else
1300                         tmp = 1;
1301                     s += uskip;
1302                 }
1303             }
1304             else {
1305                 while (s < strend) {
1306                     if (isALNUM_LC(*s)) {
1307                         if (tmp && (norun || regtry(prog, s)))
1308                             goto got_it;
1309                         else
1310                             tmp = doevery;
1311                     }
1312                     else
1313                         tmp = 1;
1314                     s++;
1315                 }
1316             }
1317             break;
1318         case NALNUM:
1319             if (do_utf8) {
1320                 LOAD_UTF8_CHARCLASS(alnum,"a");
1321                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1322                     if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1323                         if (tmp && (norun || regtry(prog, s)))
1324                             goto got_it;
1325                         else
1326                             tmp = doevery;
1327                     }
1328                     else
1329                         tmp = 1;
1330                     s += uskip;
1331                 }
1332             }
1333             else {
1334                 while (s < strend) {
1335                     if (!isALNUM(*s)) {
1336                         if (tmp && (norun || regtry(prog, s)))
1337                             goto got_it;
1338                         else
1339                             tmp = doevery;
1340                     }
1341                     else
1342                         tmp = 1;
1343                     s++;
1344                 }
1345             }
1346             break;
1347         case NALNUML:
1348             PL_reg_flags |= RF_tainted;
1349             if (do_utf8) {
1350                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1351                     if (!isALNUM_LC_utf8((U8*)s)) {
1352                         if (tmp && (norun || regtry(prog, s)))
1353                             goto got_it;
1354                         else
1355                             tmp = doevery;
1356                     }
1357                     else
1358                         tmp = 1;
1359                     s += uskip;
1360                 }
1361             }
1362             else {
1363                 while (s < strend) {
1364                     if (!isALNUM_LC(*s)) {
1365                         if (tmp && (norun || regtry(prog, s)))
1366                             goto got_it;
1367                         else
1368                             tmp = doevery;
1369                     }
1370                     else
1371                         tmp = 1;
1372                     s++;
1373                 }
1374             }
1375             break;
1376         case SPACE:
1377             if (do_utf8) {
1378                 LOAD_UTF8_CHARCLASS(space," ");
1379                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1380                     if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1381                         if (tmp && (norun || regtry(prog, s)))
1382                             goto got_it;
1383                         else
1384                             tmp = doevery;
1385                     }
1386                     else
1387                         tmp = 1;
1388                     s += uskip;
1389                 }
1390             }
1391             else {
1392                 while (s < strend) {
1393                     if (isSPACE(*s)) {
1394                         if (tmp && (norun || regtry(prog, s)))
1395                             goto got_it;
1396                         else
1397                             tmp = doevery;
1398                     }
1399                     else
1400                         tmp = 1;
1401                     s++;
1402                 }
1403             }
1404             break;
1405         case SPACEL:
1406             PL_reg_flags |= RF_tainted;
1407             if (do_utf8) {
1408                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1409                     if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1410                         if (tmp && (norun || regtry(prog, s)))
1411                             goto got_it;
1412                         else
1413                             tmp = doevery;
1414                     }
1415                     else
1416                         tmp = 1;
1417                     s += uskip;
1418                 }
1419             }
1420             else {
1421                 while (s < strend) {
1422                     if (isSPACE_LC(*s)) {
1423                         if (tmp && (norun || regtry(prog, s)))
1424                             goto got_it;
1425                         else
1426                             tmp = doevery;
1427                     }
1428                     else
1429                         tmp = 1;
1430                     s++;
1431                 }
1432             }
1433             break;
1434         case NSPACE:
1435             if (do_utf8) {
1436                 LOAD_UTF8_CHARCLASS(space," ");
1437                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1438                     if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1439                         if (tmp && (norun || regtry(prog, s)))
1440                             goto got_it;
1441                         else
1442                             tmp = doevery;
1443                     }
1444                     else
1445                         tmp = 1;
1446                     s += uskip;
1447                 }
1448             }
1449             else {
1450                 while (s < strend) {
1451                     if (!isSPACE(*s)) {
1452                         if (tmp && (norun || regtry(prog, s)))
1453                             goto got_it;
1454                         else
1455                             tmp = doevery;
1456                     }
1457                     else
1458                         tmp = 1;
1459                     s++;
1460                 }
1461             }
1462             break;
1463         case NSPACEL:
1464             PL_reg_flags |= RF_tainted;
1465             if (do_utf8) {
1466                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1467                     if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1468                         if (tmp && (norun || regtry(prog, s)))
1469                             goto got_it;
1470                         else
1471                             tmp = doevery;
1472                     }
1473                     else
1474                         tmp = 1;
1475                     s += uskip;
1476                 }
1477             }
1478             else {
1479                 while (s < strend) {
1480                     if (!isSPACE_LC(*s)) {
1481                         if (tmp && (norun || regtry(prog, s)))
1482                             goto got_it;
1483                         else
1484                             tmp = doevery;
1485                     }
1486                     else
1487                         tmp = 1;
1488                     s++;
1489                 }
1490             }
1491             break;
1492         case DIGIT:
1493             if (do_utf8) {
1494                 LOAD_UTF8_CHARCLASS(digit,"0");
1495                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1496                     if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1497                         if (tmp && (norun || regtry(prog, s)))
1498                             goto got_it;
1499                         else
1500                             tmp = doevery;
1501                     }
1502                     else
1503                         tmp = 1;
1504                     s += uskip;
1505                 }
1506             }
1507             else {
1508                 while (s < strend) {
1509                     if (isDIGIT(*s)) {
1510                         if (tmp && (norun || regtry(prog, s)))
1511                             goto got_it;
1512                         else
1513                             tmp = doevery;
1514                     }
1515                     else
1516                         tmp = 1;
1517                     s++;
1518                 }
1519             }
1520             break;
1521         case DIGITL:
1522             PL_reg_flags |= RF_tainted;
1523             if (do_utf8) {
1524                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1525                     if (isDIGIT_LC_utf8((U8*)s)) {
1526                         if (tmp && (norun || regtry(prog, s)))
1527                             goto got_it;
1528                         else
1529                             tmp = doevery;
1530                     }
1531                     else
1532                         tmp = 1;
1533                     s += uskip;
1534                 }
1535             }
1536             else {
1537                 while (s < strend) {
1538                     if (isDIGIT_LC(*s)) {
1539                         if (tmp && (norun || regtry(prog, s)))
1540                             goto got_it;
1541                         else
1542                             tmp = doevery;
1543                     }
1544                     else
1545                         tmp = 1;
1546                     s++;
1547                 }
1548             }
1549             break;
1550         case NDIGIT:
1551             if (do_utf8) {
1552                 LOAD_UTF8_CHARCLASS(digit,"0");
1553                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1554                     if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1555                         if (tmp && (norun || regtry(prog, s)))
1556                             goto got_it;
1557                         else
1558                             tmp = doevery;
1559                     }
1560                     else
1561                         tmp = 1;
1562                     s += uskip;
1563                 }
1564             }
1565             else {
1566                 while (s < strend) {
1567                     if (!isDIGIT(*s)) {
1568                         if (tmp && (norun || regtry(prog, s)))
1569                             goto got_it;
1570                         else
1571                             tmp = doevery;
1572                     }
1573                     else
1574                         tmp = 1;
1575                     s++;
1576                 }
1577             }
1578             break;
1579         case NDIGITL:
1580             PL_reg_flags |= RF_tainted;
1581             if (do_utf8) {
1582                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1583                     if (!isDIGIT_LC_utf8((U8*)s)) {
1584                         if (tmp && (norun || regtry(prog, s)))
1585                             goto got_it;
1586                         else
1587                             tmp = doevery;
1588                     }
1589                     else
1590                         tmp = 1;
1591                     s += uskip;
1592                 }
1593             }
1594             else {
1595                 while (s < strend) {
1596                     if (!isDIGIT_LC(*s)) {
1597                         if (tmp && (norun || regtry(prog, s)))
1598                             goto got_it;
1599                         else
1600                             tmp = doevery;
1601                     }
1602                     else
1603                         tmp = 1;
1604                     s++;
1605                 }
1606             }
1607             break;
1608         default:
1609             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1610             break;
1611         }
1612         return 0;
1613       got_it:
1614         return s;
1615 }
1616
1617 /*
1618  - regexec_flags - match a regexp against a string
1619  */
1620 I32
1621 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1622               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1623 /* strend: pointer to null at end of string */
1624 /* strbeg: real beginning of string */
1625 /* minend: end of match must be >=minend after stringarg. */
1626 /* data: May be used for some additional optimizations. */
1627 /* nosave: For optimizations. */
1628 {
1629     register char *s;
1630     register regnode *c;
1631     register char *startpos = stringarg;
1632     I32 minlen;         /* must match at least this many chars */
1633     I32 dontbother = 0; /* how many characters not to try at end */
1634     /* I32 start_shift = 0; */          /* Offset of the start to find
1635                                          constant substr. */            /* CC */
1636     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1637     I32 scream_pos = -1;                /* Internal iterator of scream. */
1638     char *scream_olds;
1639     SV* oreplsv = GvSV(PL_replgv);
1640     bool do_utf8 = DO_UTF8(sv);
1641     I32 multiline = prog->reganch & PMf_MULTILINE;
1642 #ifdef DEBUGGING
1643     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1644     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1645 #endif
1646
1647     GET_RE_DEBUG_FLAGS_DECL;
1648
1649     RX_MATCH_UTF8_set(prog,do_utf8);
1650
1651     PL_regcc = 0;
1652
1653     cache_re(prog);
1654 #ifdef DEBUGGING
1655     PL_regnarrate = DEBUG_r_TEST;
1656 #endif
1657
1658     /* Be paranoid... */
1659     if (prog == NULL || startpos == NULL) {
1660         Perl_croak(aTHX_ "NULL regexp parameter");
1661         return 0;
1662     }
1663
1664     minlen = prog->minlen;
1665     if (strend - startpos < minlen) {
1666         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1667                               "String too short [regexec_flags]...\n"));
1668         goto phooey;
1669     }
1670
1671     /* Check validity of program. */
1672     if (UCHARAT(prog->program) != REG_MAGIC) {
1673         Perl_croak(aTHX_ "corrupted regexp program");
1674     }
1675
1676     PL_reg_flags = 0;
1677     PL_reg_eval_set = 0;
1678     PL_reg_maxiter = 0;
1679
1680     if (prog->reganch & ROPT_UTF8)
1681         PL_reg_flags |= RF_utf8;
1682
1683     /* Mark beginning of line for ^ and lookbehind. */
1684     PL_regbol = startpos;
1685     PL_bostr  = strbeg;
1686     PL_reg_sv = sv;
1687
1688     /* Mark end of line for $ (and such) */
1689     PL_regeol = strend;
1690
1691     /* see how far we have to get to not match where we matched before */
1692     PL_regtill = startpos+minend;
1693
1694     /* We start without call_cc context.  */
1695     PL_reg_call_cc = 0;
1696
1697     /* If there is a "must appear" string, look for it. */
1698     s = startpos;
1699
1700     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1701         MAGIC *mg;
1702
1703         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1704             PL_reg_ganch = startpos;
1705         else if (sv && SvTYPE(sv) >= SVt_PVMG
1706                   && SvMAGIC(sv)
1707                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1708                   && mg->mg_len >= 0) {
1709             PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1710             if (prog->reganch & ROPT_ANCH_GPOS) {
1711                 if (s > PL_reg_ganch)
1712                     goto phooey;
1713                 s = PL_reg_ganch;
1714             }
1715         }
1716         else                            /* pos() not defined */
1717             PL_reg_ganch = strbeg;
1718     }
1719
1720     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1721         re_scream_pos_data d;
1722
1723         d.scream_olds = &scream_olds;
1724         d.scream_pos = &scream_pos;
1725         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1726         if (!s) {
1727             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1728             goto phooey;        /* not present */
1729         }
1730     }
1731
1732     DEBUG_EXECUTE_r({
1733          char *s0   = UTF ?
1734            pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1735                           UNI_DISPLAY_REGEX) :
1736            prog->precomp;
1737          int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1738          char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1739                                                UNI_DISPLAY_REGEX) : startpos;
1740          int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1741          if (!PL_colorset)
1742              reginitcolors();
1743          PerlIO_printf(Perl_debug_log,
1744                        "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1745                        PL_colors[4],PL_colors[5],PL_colors[0],
1746                        len0, len0, s0,
1747                        PL_colors[1],
1748                        len0 > 60 ? "..." : "",
1749                        PL_colors[0],
1750                        (int)(len1 > 60 ? 60 : len1),
1751                        s1, PL_colors[1],
1752                        (len1 > 60 ? "..." : "")
1753               );
1754     });
1755
1756     /* Simplest case:  anchored match need be tried only once. */
1757     /*  [unless only anchor is BOL and multiline is set] */
1758     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1759         if (s == startpos && regtry(prog, startpos))
1760             goto got_it;
1761         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1762                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1763         {
1764             char *end;
1765
1766             if (minlen)
1767                 dontbother = minlen - 1;
1768             end = HOP3c(strend, -dontbother, strbeg) - 1;
1769             /* for multiline we only have to try after newlines */
1770             if (prog->check_substr || prog->check_utf8) {
1771                 if (s == startpos)
1772                     goto after_try;
1773                 while (1) {
1774                     if (regtry(prog, s))
1775                         goto got_it;
1776                   after_try:
1777                     if (s >= end)
1778                         goto phooey;
1779                     if (prog->reganch & RE_USE_INTUIT) {
1780                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1781                         if (!s)
1782                             goto phooey;
1783                     }
1784                     else
1785                         s++;
1786                 }               
1787             } else {
1788                 if (s > startpos)
1789                     s--;
1790                 while (s < end) {
1791                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1792                         if (regtry(prog, s))
1793                             goto got_it;
1794                     }
1795                 }               
1796             }
1797         }
1798         goto phooey;
1799     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1800         if (regtry(prog, PL_reg_ganch))
1801             goto got_it;
1802         goto phooey;
1803     }
1804
1805     /* Messy cases:  unanchored match. */
1806     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1807         /* we have /x+whatever/ */
1808         /* it must be a one character string (XXXX Except UTF?) */
1809         char ch;
1810 #ifdef DEBUGGING
1811         int did_match = 0;
1812 #endif
1813         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1814             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1815         ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1816
1817         if (do_utf8) {
1818             while (s < strend) {
1819                 if (*s == ch) {
1820                     DEBUG_EXECUTE_r( did_match = 1 );
1821                     if (regtry(prog, s)) goto got_it;
1822                     s += UTF8SKIP(s);
1823                     while (s < strend && *s == ch)
1824                         s += UTF8SKIP(s);
1825                 }
1826                 s += UTF8SKIP(s);
1827             }
1828         }
1829         else {
1830             while (s < strend) {
1831                 if (*s == ch) {
1832                     DEBUG_EXECUTE_r( did_match = 1 );
1833                     if (regtry(prog, s)) goto got_it;
1834                     s++;
1835                     while (s < strend && *s == ch)
1836                         s++;
1837                 }
1838                 s++;
1839             }
1840         }
1841         DEBUG_EXECUTE_r(if (!did_match)
1842                 PerlIO_printf(Perl_debug_log,
1843                                   "Did not find anchored character...\n")
1844                );
1845     }
1846     /*SUPPRESS 560*/
1847     else if (prog->anchored_substr != Nullsv
1848               || prog->anchored_utf8 != Nullsv
1849               || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1850                   && prog->float_max_offset < strend - s)) {
1851         SV *must;
1852         I32 back_max;
1853         I32 back_min;
1854         char *last;
1855         char *last1;            /* Last position checked before */
1856 #ifdef DEBUGGING
1857         int did_match = 0;
1858 #endif
1859         if (prog->anchored_substr || prog->anchored_utf8) {
1860             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1861                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1862             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1863             back_max = back_min = prog->anchored_offset;
1864         } else {
1865             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1866                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1867             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1868             back_max = prog->float_max_offset;
1869             back_min = prog->float_min_offset;
1870         }
1871         if (must == &PL_sv_undef)
1872             /* could not downgrade utf8 check substring, so must fail */
1873             goto phooey;
1874
1875         last = HOP3c(strend,    /* Cannot start after this */
1876                           -(I32)(CHR_SVLEN(must)
1877                                  - (SvTAIL(must) != 0) + back_min), strbeg);
1878
1879         if (s > PL_bostr)
1880             last1 = HOPc(s, -1);
1881         else
1882             last1 = s - 1;      /* bogus */
1883
1884         /* XXXX check_substr already used to find `s', can optimize if
1885            check_substr==must. */
1886         scream_pos = -1;
1887         dontbother = end_shift;
1888         strend = HOPc(strend, -dontbother);
1889         while ( (s <= last) &&
1890                 ((flags & REXEC_SCREAM)
1891                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1892                                     end_shift, &scream_pos, 0))
1893                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1894                                   (unsigned char*)strend, must,
1895                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1896             /* we may be pointing at the wrong string */
1897             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1898                 s = strbeg + (s - SvPVX(sv));
1899             DEBUG_EXECUTE_r( did_match = 1 );
1900             if (HOPc(s, -back_max) > last1) {
1901                 last1 = HOPc(s, -back_min);
1902                 s = HOPc(s, -back_max);
1903             }
1904             else {
1905                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1906
1907                 last1 = HOPc(s, -back_min);
1908                 s = t;          
1909             }
1910             if (do_utf8) {
1911                 while (s <= last1) {
1912                     if (regtry(prog, s))
1913                         goto got_it;
1914                     s += UTF8SKIP(s);
1915                 }
1916             }
1917             else {
1918                 while (s <= last1) {
1919                     if (regtry(prog, s))
1920                         goto got_it;
1921                     s++;
1922                 }
1923             }
1924         }
1925         DEBUG_EXECUTE_r(if (!did_match)
1926                     PerlIO_printf(Perl_debug_log, 
1927                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1928                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1929                                ? "anchored" : "floating"),
1930                               PL_colors[0],
1931                               (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1932                               SvPVX(must),
1933                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1934                );
1935         goto phooey;
1936     }
1937     else if ((c = prog->regstclass)) {
1938         if (minlen) {
1939             I32 op = (U8)OP(prog->regstclass);
1940             /* don't bother with what can't match */
1941             if (PL_regkind[op] != EXACT && op != CANY)
1942                 strend = HOPc(strend, -(minlen - 1));
1943         }
1944         DEBUG_EXECUTE_r({
1945             SV *prop = sv_newmortal();
1946             char *s0;
1947             char *s1;
1948             int len0;
1949             int len1;
1950
1951             regprop(prop, c);
1952             s0 = UTF ?
1953               pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1954                              UNI_DISPLAY_REGEX) :
1955               SvPVX(prop);
1956             len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1957             s1 = UTF ?
1958               sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1959             len1 = UTF ? SvCUR(dsv1) : strend - s;
1960             PerlIO_printf(Perl_debug_log,
1961                           "Matching stclass `%*.*s' against `%*.*s'\n",
1962                           len0, len0, s0,
1963                           len1, len1, s1);
1964         });
1965         if (find_byclass(prog, c, s, strend, 0))
1966             goto got_it;
1967         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1968     }
1969     else {
1970         dontbother = 0;
1971         if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1972             /* Trim the end. */
1973             char *last;
1974             SV* float_real;
1975
1976             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1977                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1978             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1979
1980             if (flags & REXEC_SCREAM) {
1981                 last = screaminstr(sv, float_real, s - strbeg,
1982                                    end_shift, &scream_pos, 1); /* last one */
1983                 if (!last)
1984                     last = scream_olds; /* Only one occurrence. */
1985                 /* we may be pointing at the wrong string */
1986                 else if (RX_MATCH_COPIED(prog))
1987                     s = strbeg + (s - SvPVX(sv));
1988             }
1989             else {
1990                 STRLEN len;
1991                 const char * const little = SvPV(float_real, len);
1992
1993                 if (SvTAIL(float_real)) {
1994                     if (memEQ(strend - len + 1, little, len - 1))
1995                         last = strend - len + 1;
1996                     else if (!multiline)
1997                         last = memEQ(strend - len, little, len)
1998                             ? strend - len : Nullch;
1999                     else
2000                         goto find_last;
2001                 } else {
2002                   find_last:
2003                     if (len)
2004                         last = rninstr(s, strend, little, little + len);
2005                     else
2006                         last = strend;  /* matching `$' */
2007                 }
2008             }
2009             if (last == NULL) {
2010                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2011                                       "%sCan't trim the tail, match fails (should not happen)%s\n",
2012                                       PL_colors[4],PL_colors[5]));
2013                 goto phooey; /* Should not happen! */
2014             }
2015             dontbother = strend - last + prog->float_min_offset;
2016         }
2017         if (minlen && (dontbother < minlen))
2018             dontbother = minlen - 1;
2019         strend -= dontbother;              /* this one's always in bytes! */
2020         /* We don't know much -- general case. */
2021         if (do_utf8) {
2022             for (;;) {
2023                 if (regtry(prog, s))
2024                     goto got_it;
2025                 if (s >= strend)
2026                     break;
2027                 s += UTF8SKIP(s);
2028             };
2029         }
2030         else {
2031             do {
2032                 if (regtry(prog, s))
2033                     goto got_it;
2034             } while (s++ < strend);
2035         }
2036     }
2037
2038     /* Failure. */
2039     goto phooey;
2040
2041 got_it:
2042     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2043
2044     if (PL_reg_eval_set) {
2045         /* Preserve the current value of $^R */
2046         if (oreplsv != GvSV(PL_replgv))
2047             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2048                                                   restored, the value remains
2049                                                   the same. */
2050         restore_pos(aTHX_ 0);
2051     }
2052
2053     /* make sure $`, $&, $', and $digit will work later */
2054     if ( !(flags & REXEC_NOT_FIRST) ) {
2055         RX_MATCH_COPY_FREE(prog);
2056         if (flags & REXEC_COPY_STR) {
2057             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2058 #ifdef PERL_COPY_ON_WRITE
2059             if ((SvIsCOW(sv)
2060                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2061                 if (DEBUG_C_TEST) {
2062                     PerlIO_printf(Perl_debug_log,
2063                                   "Copy on write: regexp capture, type %d\n",
2064                                   (int) SvTYPE(sv));
2065                 }
2066                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2067                 prog->subbeg = SvPVX(prog->saved_copy);
2068                 assert (SvPOKp(prog->saved_copy));
2069             } else
2070 #endif
2071             {
2072                 RX_MATCH_COPIED_on(prog);
2073                 s = savepvn(strbeg, i);
2074                 prog->subbeg = s;
2075             }
2076             prog->sublen = i;
2077         }
2078         else {
2079             prog->subbeg = strbeg;
2080             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2081         }
2082     }
2083
2084     return 1;
2085
2086 phooey:
2087     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2088                           PL_colors[4],PL_colors[5]));
2089     if (PL_reg_eval_set)
2090         restore_pos(aTHX_ 0);
2091     return 0;
2092 }
2093
2094 /*
2095  - regtry - try match at specific point
2096  */
2097 STATIC I32                      /* 0 failure, 1 success */
2098 S_regtry(pTHX_ regexp *prog, char *startpos)
2099 {
2100     register I32 i;
2101     register I32 *sp;
2102     register I32 *ep;
2103     CHECKPOINT lastcp;
2104     GET_RE_DEBUG_FLAGS_DECL;
2105
2106 #ifdef DEBUGGING
2107     PL_regindent = 0;   /* XXXX Not good when matches are reenterable... */
2108 #endif
2109     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2110         MAGIC *mg;
2111
2112         PL_reg_eval_set = RS_init;
2113         DEBUG_EXECUTE_r(DEBUG_s(
2114             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2115                           (IV)(PL_stack_sp - PL_stack_base));
2116             ));
2117         SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2118         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2119         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2120         SAVETMPS;
2121         /* Apparently this is not needed, judging by wantarray. */
2122         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2123            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2124
2125         if (PL_reg_sv) {
2126             /* Make $_ available to executed code. */
2127             if (PL_reg_sv != DEFSV) {
2128                 SAVE_DEFSV;
2129                 DEFSV = PL_reg_sv;
2130             }
2131         
2132             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2133                   && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2134                 /* prepare for quick setting of pos */
2135                 sv_magic(PL_reg_sv, (SV*)0,
2136                         PERL_MAGIC_regex_global, Nullch, 0);
2137                 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2138                 mg->mg_len = -1;
2139             }
2140             PL_reg_magic    = mg;
2141             PL_reg_oldpos   = mg->mg_len;
2142             SAVEDESTRUCTOR_X(restore_pos, 0);
2143         }
2144         if (!PL_reg_curpm) {
2145             Newz(22,PL_reg_curpm, 1, PMOP);
2146 #ifdef USE_ITHREADS
2147             {
2148                 SV* repointer = newSViv(0);
2149                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2150                 SvFLAGS(repointer) |= SVf_BREAK;
2151                 av_push(PL_regex_padav,repointer);
2152                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2153                 PL_regex_pad = AvARRAY(PL_regex_padav);
2154             }
2155 #endif      
2156         }
2157         PM_SETRE(PL_reg_curpm, prog);
2158         PL_reg_oldcurpm = PL_curpm;
2159         PL_curpm = PL_reg_curpm;
2160         if (RX_MATCH_COPIED(prog)) {
2161             /*  Here is a serious problem: we cannot rewrite subbeg,
2162                 since it may be needed if this match fails.  Thus
2163                 $` inside (?{}) could fail... */
2164             PL_reg_oldsaved = prog->subbeg;
2165             PL_reg_oldsavedlen = prog->sublen;
2166 #ifdef PERL_COPY_ON_WRITE
2167             PL_nrs = prog->saved_copy;
2168 #endif
2169             RX_MATCH_COPIED_off(prog);
2170         }
2171         else
2172             PL_reg_oldsaved = Nullch;
2173         prog->subbeg = PL_bostr;
2174         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2175     }
2176     prog->startp[0] = startpos - PL_bostr;
2177     PL_reginput = startpos;
2178     PL_regstartp = prog->startp;
2179     PL_regendp = prog->endp;
2180     PL_reglastparen = &prog->lastparen;
2181     PL_reglastcloseparen = &prog->lastcloseparen;
2182     prog->lastparen = 0;
2183     prog->lastcloseparen = 0;
2184     PL_regsize = 0;
2185     DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2186     if (PL_reg_start_tmpl <= prog->nparens) {
2187         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2188         if(PL_reg_start_tmp)
2189             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2190         else
2191             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2192     }
2193
2194     /* XXXX What this code is doing here?!!!  There should be no need
2195        to do this again and again, PL_reglastparen should take care of
2196        this!  --ilya*/
2197
2198     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2199      * Actually, the code in regcppop() (which Ilya may be meaning by
2200      * PL_reglastparen), is not needed at all by the test suite
2201      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2202      * enough, for building DynaLoader, or otherwise this
2203      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2204      * will happen.  Meanwhile, this code *is* needed for the
2205      * above-mentioned test suite tests to succeed.  The common theme
2206      * on those tests seems to be returning null fields from matches.
2207      * --jhi */
2208 #if 1
2209     sp = prog->startp;
2210     ep = prog->endp;
2211     if (prog->nparens) {
2212         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2213             *++sp = -1;
2214             *++ep = -1;
2215         }
2216     }
2217 #endif
2218     REGCP_SET(lastcp);
2219     if (regmatch(prog->program + 1)) {
2220         prog->endp[0] = PL_reginput - PL_bostr;
2221         return 1;
2222     }
2223     REGCP_UNWIND(lastcp);
2224     return 0;
2225 }
2226
2227 #define RE_UNWIND_BRANCH        1
2228 #define RE_UNWIND_BRANCHJ       2
2229
2230 union re_unwind_t;
2231
2232 typedef struct {                /* XX: makes sense to enlarge it... */
2233     I32 type;
2234     I32 prev;
2235     CHECKPOINT lastcp;
2236 } re_unwind_generic_t;
2237
2238 typedef struct {
2239     I32 type;
2240     I32 prev;
2241     CHECKPOINT lastcp;
2242     I32 lastparen;
2243     regnode *next;
2244     char *locinput;
2245     I32 nextchr;
2246 #ifdef DEBUGGING
2247     int regindent;
2248 #endif
2249 } re_unwind_branch_t;
2250
2251 typedef union re_unwind_t {
2252     I32 type;
2253     re_unwind_generic_t generic;
2254     re_unwind_branch_t branch;
2255 } re_unwind_t;
2256
2257 #define sayYES goto yes
2258 #define sayNO goto no
2259 #define sayNO_ANYOF goto no_anyof
2260 #define sayYES_FINAL goto yes_final
2261 #define sayYES_LOUD  goto yes_loud
2262 #define sayNO_FINAL  goto no_final
2263 #define sayNO_SILENT goto do_no
2264 #define saySAME(x) if (x) goto yes; else goto no
2265
2266 #define POSCACHE_SUCCESS 0      /* caching success rather than failure */
2267 #define POSCACHE_SEEN 1         /* we know what we're caching */
2268 #define POSCACHE_START 2        /* the real cache: this bit maps to pos 0 */
2269 #define CACHEsayYES STMT_START { \
2270     if (cache_offset | cache_bit) { \
2271         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2272             PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2273         else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2274             /* cache records failure, but this is success */ \
2275             DEBUG_r( \
2276                 PerlIO_printf(Perl_debug_log, \
2277                     "%*s  (remove success from failure cache)\n", \
2278                     REPORT_CODE_OFF+PL_regindent*2, "") \
2279             ); \
2280             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2281         } \
2282     } \
2283     sayYES; \
2284 } STMT_END
2285 #define CACHEsayNO STMT_START { \
2286     if (cache_offset | cache_bit) { \
2287         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2288             PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2289         else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2290             /* cache records success, but this is failure */ \
2291             DEBUG_r( \
2292                 PerlIO_printf(Perl_debug_log, \
2293                     "%*s  (remove failure from success cache)\n", \
2294                     REPORT_CODE_OFF+PL_regindent*2, "") \
2295             ); \
2296             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2297         } \
2298     } \
2299     sayNO; \
2300 } STMT_END
2301
2302 /* this is used to determine how far from the left messages like
2303    'failed...' are printed. Currently 29 makes these messages line
2304    up with the opcode they refer to. Earlier perls used 25 which
2305    left these messages outdented making reviewing a debug output
2306    quite difficult.
2307 */
2308 #define REPORT_CODE_OFF 29
2309
2310
2311 /* Make sure there is a test for this +1 options in re_tests */
2312 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2313
2314 #define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START {                       \
2315     if ( trie->states[ state ].wordnum ) {                               \
2316         if ( !accepted ) {                                               \
2317             ENTER;                                                       \
2318             SAVETMPS;                                                    \
2319             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ;                       \
2320             sv_accept_buff=NEWSV( 1234,                                  \
2321               bufflen * sizeof(reg_trie_accepted) - 1 );                 \
2322             SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) );      \
2323             SvPOK_on( sv_accept_buff );                                  \
2324             sv_2mortal( sv_accept_buff );                                \
2325             accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2326         } else {                                                         \
2327             if ( accepted >= bufflen ) {                                 \
2328                 bufflen *= 2;                                            \
2329                 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2330                     bufflen * sizeof(reg_trie_accepted) );               \
2331             }                                                            \
2332             SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff )            \
2333                 + sizeof( reg_trie_accepted ) );                         \
2334         }                                                                \
2335         accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2336         accept_buff[ accepted ].endpos = uc;                             \
2337         ++accepted;                                                      \
2338     } } STMT_END
2339
2340 #define TRIE_HANDLE_CHAR STMT_START {                                   \
2341         if ( uvc < 256 ) {                                              \
2342             charid = trie->charmap[ uvc ];                              \
2343         } else {                                                        \
2344             charid = 0;                                                 \
2345             if( trie->widecharmap ) {                                   \
2346             SV** svpp = (SV**)NULL;                                     \
2347             svpp = hv_fetch( trie->widecharmap, (char*)&uvc,            \
2348                           sizeof( UV ), 0 );                            \
2349             if ( svpp ) {                                               \
2350                 charid = (U16)SvIV( *svpp );                            \
2351                 }                                                       \
2352             }                                                           \
2353         }                                                               \
2354         if ( charid &&                                                  \
2355              ( base + charid > trie->uniquecharcount ) &&               \
2356              ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
2357              trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2358         {                                                               \
2359             state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next;     \
2360         } else {                                                        \
2361             state = 0;                                                  \
2362         }                                                               \
2363         uc += len;                                                      \
2364     } STMT_END
2365
2366 /*
2367  - regmatch - main matching routine
2368  *
2369  * Conceptually the strategy is simple:  check to see whether the current
2370  * node matches, call self recursively to see whether the rest matches,
2371  * and then act accordingly.  In practice we make some effort to avoid
2372  * recursion, in particular by going through "ordinary" nodes (that don't
2373  * need to know whether the rest of the match failed) by a loop instead of
2374  * by recursion.
2375  */
2376 /* [lwall] I've hoisted the register declarations to the outer block in order to
2377  * maybe save a little bit of pushing and popping on the stack.  It also takes
2378  * advantage of machines that use a register save mask on subroutine entry.
2379  */
2380 STATIC I32                      /* 0 failure, 1 success */
2381 S_regmatch(pTHX_ regnode *prog)
2382 {
2383     register regnode *scan;     /* Current node. */
2384     regnode *next;              /* Next node. */
2385     regnode *inner;             /* Next node in internal branch. */
2386     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2387                                    function of same name */
2388     register I32 n;             /* no or next */
2389     register I32 ln = 0;        /* len or last */
2390     register char *s = Nullch;  /* operand or save */
2391     register char *locinput = PL_reginput;
2392     register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2393     int minmod = 0, sw = 0, logical = 0;
2394     I32 unwind = 0;
2395
2396     /* used by the trie code */
2397     SV                 *sv_accept_buff;  /* accepting states we have traversed */
2398     reg_trie_accepted  *accept_buff;     /* "" */
2399     reg_trie_data      *trie;            /* what trie are we using right now */
2400     U32 accepted = 0;                    /* how many accepting states we have seen*/
2401
2402 #if 0
2403     I32 firstcp = PL_savestack_ix;
2404 #endif
2405     register bool do_utf8 = PL_reg_match_utf8;
2406 #ifdef DEBUGGING
2407     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2408     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2409     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2410
2411     SV *re_debug_flags;
2412 #endif
2413
2414     GET_RE_DEBUG_FLAGS;
2415
2416 #ifdef DEBUGGING
2417     PL_regindent++;
2418 #endif
2419
2420
2421     /* Note that nextchr is a byte even in UTF */
2422     nextchr = UCHARAT(locinput);
2423     scan = prog;
2424     while (scan != NULL) {
2425
2426         DEBUG_EXECUTE_r( {
2427             SV *prop = sv_newmortal();
2428             int docolor = *PL_colors[0];
2429             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2430             int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2431             /* The part of the string before starttry has one color
2432                (pref0_len chars), between starttry and current
2433                position another one (pref_len - pref0_len chars),
2434                after the current position the third one.
2435                We assume that pref0_len <= pref_len, otherwise we
2436                decrease pref0_len.  */
2437             int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2438                 ? (5 + taill) - l : locinput - PL_bostr;
2439             int pref0_len;
2440
2441             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2442                 pref_len++;
2443             pref0_len = pref_len  - (locinput - PL_reg_starttry);
2444             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2445                 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2446                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2447             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2448                 l--;
2449             if (pref0_len < 0)
2450                 pref0_len = 0;
2451             if (pref0_len > pref_len)
2452                 pref0_len = pref_len;
2453             regprop(prop, scan);
2454             {
2455               char *s0 =
2456                 do_utf8 && OP(scan) != CANY ?
2457                 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2458                                pref0_len, 60, UNI_DISPLAY_REGEX) :
2459                 locinput - pref_len;
2460               int len0 = do_utf8 ? strlen(s0) : pref0_len;
2461               char *s1 = do_utf8 && OP(scan) != CANY ?
2462                 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2463                                pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2464                 locinput - pref_len + pref0_len;
2465               int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2466               char *s2 = do_utf8 && OP(scan) != CANY ?
2467                 pv_uni_display(dsv2, (U8*)locinput,
2468                                PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2469                 locinput;
2470               int len2 = do_utf8 ? strlen(s2) : l;
2471               PerlIO_printf(Perl_debug_log,
2472                             "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2473                             (IV)(locinput - PL_bostr),
2474                             PL_colors[4],
2475                             len0, s0,
2476                             PL_colors[5],
2477                             PL_colors[2],
2478                             len1, s1,
2479                             PL_colors[3],
2480                             (docolor ? "" : "> <"),
2481                             PL_colors[0],
2482                             len2, s2,
2483                             PL_colors[1],
2484                             15 - l - pref_len + 1,
2485                             "",
2486                             (IV)(scan - PL_regprogram), PL_regindent*2, "",
2487                             SvPVX(prop));
2488             }
2489         });
2490
2491         next = scan + NEXT_OFF(scan);
2492         if (next == scan)
2493             next = NULL;
2494
2495         switch (OP(scan)) {
2496         case BOL:
2497             if (locinput == PL_bostr)
2498             {
2499                 /* regtill = regbol; */
2500                 break;
2501             }
2502             sayNO;
2503         case MBOL:
2504             if (locinput == PL_bostr ||
2505                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2506             {
2507                 break;
2508             }
2509             sayNO;
2510         case SBOL:
2511             if (locinput == PL_bostr)
2512                 break;
2513             sayNO;
2514         case GPOS:
2515             if (locinput == PL_reg_ganch)
2516                 break;
2517             sayNO;
2518         case EOL:
2519                 goto seol;
2520         case MEOL:
2521             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2522                 sayNO;
2523             break;
2524         case SEOL:
2525           seol:
2526             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2527                 sayNO;
2528             if (PL_regeol - locinput > 1)
2529                 sayNO;
2530             break;
2531         case EOS:
2532             if (PL_regeol != locinput)
2533                 sayNO;
2534             break;
2535         case SANY:
2536             if (!nextchr && locinput >= PL_regeol)
2537                 sayNO;
2538             if (do_utf8) {
2539                 locinput += PL_utf8skip[nextchr];
2540                 if (locinput > PL_regeol)
2541                     sayNO;
2542                 nextchr = UCHARAT(locinput);
2543             }
2544             else
2545                 nextchr = UCHARAT(++locinput);
2546             break;
2547         case CANY:
2548             if (!nextchr && locinput >= PL_regeol)
2549                 sayNO;
2550             nextchr = UCHARAT(++locinput);
2551             break;
2552         case REG_ANY:
2553             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2554                 sayNO;
2555             if (do_utf8) {
2556                 locinput += PL_utf8skip[nextchr];
2557                 if (locinput > PL_regeol)
2558                     sayNO;
2559                 nextchr = UCHARAT(locinput);
2560             }
2561             else
2562                 nextchr = UCHARAT(++locinput);
2563             break;
2564
2565
2566
2567         /*
2568            traverse the TRIE keeping track of all accepting states
2569            we transition through until we get to a failing node.
2570
2571            we use two slightly different pieces of code to handle
2572            the traversal depending on whether its case sensitive or
2573            not. we reuse the accept code however. (this should probably
2574            be turned into a macro.)
2575
2576         */
2577         case TRIEF:
2578         case TRIEFL:
2579             {
2580
2581                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2582                 U8 *uc = ( U8* )locinput;
2583                 U32 state = 1;
2584                 U16 charid = 0;
2585                 U32 base = 0;
2586                 UV uvc = 0;
2587                 STRLEN len = 0;
2588                 STRLEN foldlen = 0;
2589                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2590                 U8 *uscan = (U8*)NULL;
2591                 STRLEN bufflen=0;
2592                 accepted = 0;
2593
2594                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2595
2596                 while ( state && uc <= (U8*)PL_regeol ) {
2597
2598                     TRIE_CHECK_STATE_IS_ACCEPTING;
2599
2600                     base = trie->states[ state ].trans.base;
2601
2602                     DEBUG_TRIE_EXECUTE_r(
2603                                 PerlIO_printf( Perl_debug_log,
2604                                     "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2605                                     REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2606                                     state, base, accepted );
2607                     );
2608
2609                     if ( base ) {
2610
2611                         if ( do_utf8 || UTF ) {
2612                             if ( foldlen>0 ) {
2613                                 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2614                                 foldlen -= len;
2615                                 uscan += len;
2616                                 len=0;
2617                             } else {
2618                                 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2619                                 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2620                                 foldlen -= UNISKIP( uvc );
2621                                 uscan = foldbuf + UNISKIP( uvc );
2622                             }
2623                         } else {
2624                             uvc = (U32)*uc;
2625                             len = 1;
2626                         }
2627
2628                         TRIE_HANDLE_CHAR;
2629
2630                     } else {
2631                         state = 0;
2632                     }
2633                     DEBUG_TRIE_EXECUTE_r(
2634                                 PerlIO_printf( Perl_debug_log,
2635                                     "Charid:%3x CV:%4x After State: %4x%s\n",
2636                                     charid, uvc, state, PL_colors[5] );
2637                     );
2638                 }
2639                 if ( !accepted ) {
2640                    sayNO;
2641                 } else {
2642                     goto TrieAccept;
2643                 }
2644             }
2645             /* unreached codepoint: we jump into the middle of the next case
2646                from previous if blocks */
2647         case TRIE:
2648             {
2649                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2650                 U8 *uc = (U8*)locinput;
2651                 U32 state = 1;
2652                 U16 charid = 0;
2653                 U32 base = 0;
2654                 UV uvc = 0;
2655                 STRLEN len = 0;
2656                 STRLEN bufflen = 0;
2657                 accepted = 0;
2658
2659                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2660
2661                 while ( state && uc <= (U8*)PL_regeol ) {
2662
2663                     TRIE_CHECK_STATE_IS_ACCEPTING;
2664
2665                     base = trie->states[ state ].trans.base;
2666
2667                     DEBUG_TRIE_EXECUTE_r(
2668                             PerlIO_printf( Perl_debug_log,
2669                                 "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2670                                 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2671                                 state, base, accepted );
2672                     );
2673
2674                     if ( base ) {
2675
2676                         if ( do_utf8 || UTF ) {
2677                             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2678                         } else {
2679                             uvc = (U32)*uc;
2680                             len = 1;
2681                         }
2682
2683                         TRIE_HANDLE_CHAR;
2684
2685                     } else {
2686                         state = 0;
2687                     }
2688                     DEBUG_TRIE_EXECUTE_r(
2689                             PerlIO_printf( Perl_debug_log,
2690                                 "Charid:%3x CV:%4x After State: %4x%s\n",
2691                                 charid, uvc, state, PL_colors[5] );
2692                     );
2693                 }
2694                 if ( !accepted ) {
2695                    sayNO;
2696                 }
2697             }
2698
2699
2700             /*
2701                There was at least one accepting state that we
2702                transitioned through. Presumably the number of accepting
2703                states is going to be low, typically one or two. So we
2704                simply scan through to find the one with lowest wordnum.
2705                Once we find it, we swap the last state into its place
2706                and decrement the size. We then try to match the rest of
2707                the pattern at the point where the word ends, if we
2708                succeed then we end the loop, otherwise the loop
2709                eventually terminates once all of the accepting states
2710                have been tried.
2711             */
2712         TrieAccept:
2713             {
2714                 int gotit = 0;
2715
2716                 if ( accepted == 1 ) {
2717                     DEBUG_EXECUTE_r({
2718                         SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2719                         PerlIO_printf( Perl_debug_log,
2720                             "%*s  %sonly one match : #%d <%s>%s\n",
2721                             REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2722                             accept_buff[ 0 ].wordnum,
2723                             tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",
2724                             PL_colors[5] );
2725                     });
2726                     PL_reginput = (char *)accept_buff[ 0 ].endpos;
2727                     /* in this case we free tmps/leave before we call regmatch
2728                        as we wont be using accept_buff again. */
2729                     FREETMPS;
2730                     LEAVE;
2731                     gotit = regmatch( scan + NEXT_OFF( scan ) );
2732                 } else {
2733                     DEBUG_EXECUTE_r(
2734                         PerlIO_printf( Perl_debug_log,"%*s  %sgot %d possible matches%s\n",
2735                             REPORT_CODE_OFF + PL_regindent * 2, "",PL_colors[4], accepted,
2736                             PL_colors[5] );
2737                     );
2738                     while ( !gotit && accepted-- ) {
2739                         U32 best = 0;
2740                         U32 cur;
2741                         for( cur = 1 ; cur <= accepted ; cur++ ) {
2742                              DEBUG_TRIE_EXECUTE_r(
2743                                         PerlIO_printf( Perl_debug_log,
2744                                             "%*s  %sgot %d (%d) as best, looking at %d (%d)%s\n",
2745                                             REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2746                                             best, accept_buff[ best ].wordnum, cur,
2747                                             accept_buff[ cur ].wordnum, PL_colors[5] );
2748                              );
2749
2750                             if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2751                                     best = cur;
2752                         }
2753                         DEBUG_EXECUTE_r({
2754                             SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2755                             PerlIO_printf( Perl_debug_log, "%*s  %strying alternation #%d <%s> at 0x%p%s\n",
2756                                 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2757                                 accept_buff[best].wordnum,
2758                                 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan,
2759                                 PL_colors[5] );
2760                         });
2761                         if ( best<accepted ) {
2762                             reg_trie_accepted tmp = accept_buff[ best ];
2763                             accept_buff[ best ] = accept_buff[ accepted ];
2764                             accept_buff[ accepted ] = tmp;
2765                             best = accepted;
2766                         }
2767                         PL_reginput = (char *)accept_buff[ best ].endpos;
2768
2769                         /* 
2770                            as far as I can tell we only need the SAVETMPS/FREETMPS 
2771                            for re's with EVAL in them but I'm leaving them in for 
2772                            all until I can be sure.
2773                          */
2774                         SAVETMPS;
2775                         gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2776                         FREETMPS;
2777                     }
2778                     FREETMPS;
2779                     LEAVE;
2780                 }
2781                 
2782                 if ( gotit ) {
2783                     sayYES;
2784                 } else {
2785                     sayNO;
2786                 }
2787             }
2788             /* unreached codepoint */
2789         case EXACT:
2790             s = STRING(scan);
2791             ln = STR_LEN(scan);
2792             if (do_utf8 != UTF) {
2793                 /* The target and the pattern have differing utf8ness. */
2794                 char *l = locinput;
2795                 char *e = s + ln;
2796                 STRLEN ulen;
2797
2798                 if (do_utf8) {
2799                     /* The target is utf8, the pattern is not utf8. */
2800                     while (s < e) {
2801                         if (l >= PL_regeol)
2802                              sayNO;
2803                         if (NATIVE_TO_UNI(*(U8*)s) !=
2804                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2805                                            ckWARN(WARN_UTF8) ?
2806                                            0 : UTF8_ALLOW_ANY))
2807                              sayNO;
2808                         l += ulen;
2809                         s ++;
2810                     }
2811                 }
2812                 else {
2813                     /* The target is not utf8, the pattern is utf8. */
2814                     while (s < e) {
2815                         if (l >= PL_regeol)
2816                             sayNO;
2817                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2818                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2819                                            ckWARN(WARN_UTF8) ?
2820                                            0 : UTF8_ALLOW_ANY))
2821                             sayNO;
2822                         s += ulen;
2823                         l ++;
2824                     }
2825                 }
2826                 locinput = l;
2827                 nextchr = UCHARAT(locinput);
2828                 break;
2829             }
2830             /* The target and the pattern have the same utf8ness. */
2831             /* Inline the first character, for speed. */
2832             if (UCHARAT(s) != nextchr)
2833                 sayNO;
2834             if (PL_regeol - locinput < ln)
2835                 sayNO;
2836             if (ln > 1 && memNE(s, locinput, ln))
2837                 sayNO;
2838             locinput += ln;
2839             nextchr = UCHARAT(locinput);
2840             break;
2841         case EXACTFL:
2842             PL_reg_flags |= RF_tainted;
2843             /* FALL THROUGH */
2844         case EXACTF:
2845             s = STRING(scan);
2846             ln = STR_LEN(scan);
2847
2848             if (do_utf8 || UTF) {
2849               /* Either target or the pattern are utf8. */
2850                 char *l = locinput;
2851                 char *e = PL_regeol;
2852
2853                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2854                                l, &e, 0,  do_utf8)) {
2855                      /* One more case for the sharp s:
2856                       * pack("U0U*", 0xDF) =~ /ss/i,
2857                       * the 0xC3 0x9F are the UTF-8
2858                       * byte sequence for the U+00DF. */
2859                      if (!(do_utf8 &&
2860                            toLOWER(s[0]) == 's' &&
2861                            ln >= 2 &&
2862                            toLOWER(s[1]) == 's' &&
2863                            (U8)l[0] == 0xC3 &&
2864                            e - l >= 2 &&
2865                            (U8)l[1] == 0x9F))
2866                           sayNO;
2867                 }
2868                 locinput = e;
2869                 nextchr = UCHARAT(locinput);
2870                 break;
2871             }
2872
2873             /* Neither the target and the pattern are utf8. */
2874
2875             /* Inline the first character, for speed. */
2876             if (UCHARAT(s) != nextchr &&
2877                 UCHARAT(s) != ((OP(scan) == EXACTF)
2878                                ? PL_fold : PL_fold_locale)[nextchr])
2879                 sayNO;
2880             if (PL_regeol - locinput < ln)
2881                 sayNO;
2882             if (ln > 1 && (OP(scan) == EXACTF
2883                            ? ibcmp(s, locinput, ln)
2884                            : ibcmp_locale(s, locinput, ln)))
2885                 sayNO;
2886             locinput += ln;
2887             nextchr = UCHARAT(locinput);
2888             break;
2889         case ANYOF:
2890             if (do_utf8) {
2891                 STRLEN inclasslen = PL_regeol - locinput;
2892
2893                 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2894                     sayNO_ANYOF;
2895                 if (locinput >= PL_regeol)
2896                     sayNO;
2897                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2898                 nextchr = UCHARAT(locinput);
2899                 break;
2900             }
2901             else {
2902                 if (nextchr < 0)
2903                     nextchr = UCHARAT(locinput);
2904                 if (!REGINCLASS(scan, (U8*)locinput))
2905                     sayNO_ANYOF;
2906                 if (!nextchr && locinput >= PL_regeol)
2907                     sayNO;
2908                 nextchr = UCHARAT(++locinput);
2909                 break;
2910             }
2911         no_anyof:
2912             /* If we might have the case of the German sharp s
2913              * in a casefolding Unicode character class. */
2914
2915             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2916                  locinput += SHARP_S_SKIP;
2917                  nextchr = UCHARAT(locinput);
2918             }
2919             else
2920                  sayNO;
2921             break;
2922         case ALNUML:
2923             PL_reg_flags |= RF_tainted;
2924             /* FALL THROUGH */
2925         case ALNUM:
2926             if (!nextchr)
2927                 sayNO;
2928             if (do_utf8) {
2929                 LOAD_UTF8_CHARCLASS(alnum,"a");
2930                 if (!(OP(scan) == ALNUM
2931                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2932                       : isALNUM_LC_utf8((U8*)locinput)))
2933                 {
2934                     sayNO;
2935                 }
2936                 locinput += PL_utf8skip[nextchr];
2937                 nextchr = UCHARAT(locinput);
2938                 break;
2939             }
2940             if (!(OP(scan) == ALNUM
2941                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2942                 sayNO;
2943             nextchr = UCHARAT(++locinput);
2944             break;
2945         case NALNUML:
2946             PL_reg_flags |= RF_tainted;
2947             /* FALL THROUGH */
2948         case NALNUM:
2949             if (!nextchr && locinput >= PL_regeol)
2950                 sayNO;
2951             if (do_utf8) {
2952                 LOAD_UTF8_CHARCLASS(alnum,"a");
2953                 if (OP(scan) == NALNUM
2954                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2955                     : isALNUM_LC_utf8((U8*)locinput))
2956                 {
2957                     sayNO;
2958                 }
2959                 locinput += PL_utf8skip[nextchr];
2960                 nextchr = UCHARAT(locinput);
2961                 break;
2962             }
2963             if (OP(scan) == NALNUM
2964                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2965                 sayNO;
2966             nextchr = UCHARAT(++locinput);
2967             break;
2968         case BOUNDL:
2969         case NBOUNDL:
2970             PL_reg_flags |= RF_tainted;
2971             /* FALL THROUGH */
2972         case BOUND:
2973         case NBOUND:
2974             /* was last char in word? */
2975             if (do_utf8) {
2976                 if (locinput == PL_bostr)
2977                     ln = '\n';
2978                 else {
2979                     U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2980                 
2981                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2982                 }
2983                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2984                     ln = isALNUM_uni(ln);
2985                     LOAD_UTF8_CHARCLASS(alnum,"a");
2986                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2987                 }
2988                 else {
2989                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2990                     n = isALNUM_LC_utf8((U8*)locinput);
2991                 }
2992             }
2993             else {
2994                 ln = (locinput != PL_bostr) ?
2995                     UCHARAT(locinput - 1) : '\n';
2996                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2997                     ln = isALNUM(ln);
2998                     n = isALNUM(nextchr);
2999                 }
3000                 else {
3001                     ln = isALNUM_LC(ln);
3002                     n = isALNUM_LC(nextchr);
3003                 }
3004             }
3005             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3006                                     OP(scan) == BOUNDL))
3007                     sayNO;
3008             break;
3009         case SPACEL:
3010             PL_reg_flags |= RF_tainted;
3011             /* FALL THROUGH */
3012         case SPACE:
3013             if (!nextchr)
3014                 sayNO;
3015             if (do_utf8) {
3016                 if (UTF8_IS_CONTINUED(nextchr)) {
3017                     LOAD_UTF8_CHARCLASS(space," ");
3018                     if (!(OP(scan) == SPACE
3019                           ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3020                           : isSPACE_LC_utf8((U8*)locinput)))
3021                     {
3022                         sayNO;
3023                     }
3024                     locinput += PL_utf8skip[nextchr];
3025                     nextchr = UCHARAT(locinput);
3026                     break;
3027                 }
3028                 if (!(OP(scan) == SPACE
3029                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3030                     sayNO;
3031                 nextchr = UCHARAT(++locinput);
3032             }
3033             else {
3034                 if (!(OP(scan) == SPACE
3035                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3036                     sayNO;
3037                 nextchr = UCHARAT(++locinput);
3038             }
3039             break;
3040         case NSPACEL:
3041             PL_reg_flags |= RF_tainted;
3042             /* FALL THROUGH */
3043         case NSPACE:
3044             if (!nextchr && locinput >= PL_regeol)
3045                 sayNO;
3046             if (do_utf8) {
3047                 LOAD_UTF8_CHARCLASS(space," ");
3048                 if (OP(scan) == NSPACE
3049                     ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3050                     : isSPACE_LC_utf8((U8*)locinput))
3051                 {
3052                     sayNO;
3053                 }
3054                 locinput += PL_utf8skip[nextchr];
3055                 nextchr = UCHARAT(locinput);
3056                 break;
3057             }
3058             if (OP(scan) == NSPACE
3059                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3060                 sayNO;
3061             nextchr = UCHARAT(++locinput);
3062             break;
3063         case DIGITL:
3064             PL_reg_flags |= RF_tainted;
3065             /* FALL THROUGH */
3066         case DIGIT:
3067             if (!nextchr)
3068                 sayNO;
3069             if (do_utf8) {
3070                 LOAD_UTF8_CHARCLASS(digit,"0");
3071                 if (!(OP(scan) == DIGIT
3072                       ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3073                       : isDIGIT_LC_utf8((U8*)locinput)))
3074                 {
3075                     sayNO;
3076                 }
3077                 locinput += PL_utf8skip[nextchr];
3078                 nextchr = UCHARAT(locinput);
3079                 break;
3080             }
3081             if (!(OP(scan) == DIGIT
3082                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3083                 sayNO;
3084             nextchr = UCHARAT(++locinput);
3085             break;
3086         case NDIGITL:
3087             PL_reg_flags |= RF_tainted;
3088             /* FALL THROUGH */
3089         case NDIGIT:
3090             if (!nextchr && locinput >= PL_regeol)
3091                 sayNO;
3092             if (do_utf8) {
3093                 LOAD_UTF8_CHARCLASS(digit,"0");
3094                 if (OP(scan) == NDIGIT
3095                     ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3096                     : isDIGIT_LC_utf8((U8*)locinput))
3097                 {
3098                     sayNO;
3099                 }
3100                 locinput += PL_utf8skip[nextchr];
3101                 nextchr = UCHARAT(locinput);
3102                 break;
3103             }
3104             if (OP(scan) == NDIGIT
3105                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3106                 sayNO;
3107             nextchr = UCHARAT(++locinput);
3108             break;
3109         case CLUMP:
3110             if (locinput >= PL_regeol)
3111                 sayNO;
3112             if  (do_utf8) {
3113                 LOAD_UTF8_CHARCLASS(mark,"~");
3114                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3115                     sayNO;
3116                 locinput += PL_utf8skip[nextchr];
3117                 while (locinput < PL_regeol &&
3118                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3119                     locinput += UTF8SKIP(locinput);
3120                 if (locinput > PL_regeol)
3121                     sayNO;
3122             } 
3123             else
3124                locinput++;
3125             nextchr = UCHARAT(locinput);
3126             break;
3127         case REFFL:
3128             PL_reg_flags |= RF_tainted;
3129             /* FALL THROUGH */
3130         case REF:
3131         case REFF:
3132             n = ARG(scan);  /* which paren pair */
3133             ln = PL_regstartp[n];
3134             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3135             if ((I32)*PL_reglastparen < n || ln == -1)
3136                 sayNO;                  /* Do not match unless seen CLOSEn. */
3137             if (ln == PL_regendp[n])
3138                 break;
3139
3140             s = PL_bostr + ln;
3141             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
3142                 char *l = locinput;
3143                 char *e = PL_bostr + PL_regendp[n];
3144                 /*
3145                  * Note that we can't do the "other character" lookup trick as
3146                  * in the 8-bit case (no pun intended) because in Unicode we
3147                  * have to map both upper and title case to lower case.
3148                  */
3149                 if (OP(scan) == REFF) {
3150                     STRLEN ulen1, ulen2;
3151                     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3152                     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3153                     while (s < e) {
3154                         if (l >= PL_regeol)
3155                             sayNO;
3156                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3157                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3158                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3159                             sayNO;
3160                         s += ulen1;
3161                         l += ulen2;
3162                     }
3163                 }
3164                 locinput = l;
3165                 nextchr = UCHARAT(locinput);
3166                 break;
3167             }
3168
3169             /* Inline the first character, for speed. */
3170             if (UCHARAT(s) != nextchr &&
3171                 (OP(scan) == REF ||
3172                  (UCHARAT(s) != ((OP(scan) == REFF
3173                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3174                 sayNO;
3175             ln = PL_regendp[n] - ln;
3176             if (locinput + ln > PL_regeol)
3177                 sayNO;
3178             if (ln > 1 && (OP(scan) == REF
3179                            ? memNE(s, locinput, ln)
3180                            : (OP(scan) == REFF
3181                               ? ibcmp(s, locinput, ln)
3182                               : ibcmp_locale(s, locinput, ln))))
3183                 sayNO;
3184             locinput += ln;
3185             nextchr = UCHARAT(locinput);
3186             break;
3187
3188         case NOTHING:
3189         case TAIL:
3190             break;
3191         case BACK:
3192             break;
3193         case EVAL:
3194         {
3195             dSP;
3196             OP_4tree *oop = PL_op;
3197             COP *ocurcop = PL_curcop;
3198             PAD *old_comppad;
3199             SV *ret;
3200             struct regexp *oreg = PL_reg_re;
3201         
3202             n = ARG(scan);
3203             PL_op = (OP_4tree*)PL_regdata->data[n];
3204             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3205             PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
3206             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3207
3208             {
3209                 SV **before = SP;
3210                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3211                 SPAGAIN;
3212                 if (SP == before)
3213                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3214                 else {
3215                     ret = POPs;
3216                     PUTBACK;
3217                 }
3218             }
3219
3220             PL_op = oop;
3221             PAD_RESTORE_LOCAL(old_comppad);
3222             PL_curcop = ocurcop;
3223             if (logical) {
3224                 if (logical == 2) {     /* Postponed subexpression. */
3225                     regexp *re;
3226                     MAGIC *mg = Null(MAGIC*);
3227                     re_cc_state state;
3228                     CHECKPOINT cp, lastcp;
3229                     int toggleutf;
3230                     register SV *sv;
3231
3232                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3233                         mg = mg_find(sv, PERL_MAGIC_qr);
3234                     else if (SvSMAGICAL(ret)) {
3235                         if (SvGMAGICAL(ret))
3236                             sv_unmagic(ret, PERL_MAGIC_qr);
3237                         else
3238                             mg = mg_find(ret, PERL_MAGIC_qr);
3239                     }
3240
3241                     if (mg) {
3242                         re = (regexp *)mg->mg_obj;
3243                         (void)ReREFCNT_inc(re);
3244                     }
3245                     else {
3246                         STRLEN len;
3247                         char *t = SvPV(ret, len);
3248                         PMOP pm;
3249                         char *oprecomp = PL_regprecomp;
3250                         I32 osize = PL_regsize;
3251                         I32 onpar = PL_regnpar;
3252
3253                         Zero(&pm, 1, PMOP);
3254                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3255                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3256                         if (!(SvFLAGS(ret)
3257                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3258                                 | SVs_GMG)))
3259                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3260                                         PERL_MAGIC_qr,0,0);
3261                         PL_regprecomp = oprecomp;
3262                         PL_regsize = osize;
3263                         PL_regnpar = onpar;
3264                     }
3265                     DEBUG_EXECUTE_r(
3266                         PerlIO_printf(Perl_debug_log,
3267                                       "Entering embedded `%s%.60s%s%s'\n",
3268                                       PL_colors[0],
3269                                       re->precomp,
3270                                       PL_colors[1],
3271                                       (strlen(re->precomp) > 60 ? "..." : ""))
3272                         );
3273                     state.node = next;
3274                     state.prev = PL_reg_call_cc;
3275                     state.cc = PL_regcc;
3276                     state.re = PL_reg_re;
3277
3278                     PL_regcc = 0;
3279                 
3280                     cp = regcppush(0);  /* Save *all* the positions. */
3281                     REGCP_SET(lastcp);
3282                     cache_re(re);
3283                     state.ss = PL_savestack_ix;
3284                     *PL_reglastparen = 0;
3285                     *PL_reglastcloseparen = 0;
3286                     PL_reg_call_cc = &state;
3287                     PL_reginput = locinput;
3288                     toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3289                                 ((re->reganch & ROPT_UTF8) != 0);
3290                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3291
3292                     /* XXXX This is too dramatic a measure... */
3293                     PL_reg_maxiter = 0;
3294
3295                     if (regmatch(re->program + 1)) {
3296                         /* Even though we succeeded, we need to restore
3297                            global variables, since we may be wrapped inside
3298                            SUSPEND, thus the match may be not finished yet. */
3299
3300                         /* XXXX Do this only if SUSPENDed? */
3301                         PL_reg_call_cc = state.prev;
3302                         PL_regcc = state.cc;
3303                         PL_reg_re = state.re;
3304                         cache_re(PL_reg_re);
3305                         if (toggleutf) PL_reg_flags ^= RF_utf8;
3306
3307                         /* XXXX This is too dramatic a measure... */
3308                         PL_reg_maxiter = 0;
3309
3310                         /* These are needed even if not SUSPEND. */
3311                         ReREFCNT_dec(re);
3312                         regcpblow(cp);
3313                         sayYES;
3314                     }
3315                     ReREFCNT_dec(re);
3316                     REGCP_UNWIND(lastcp);
3317                     regcppop();
3318                     PL_reg_call_cc = state.prev;
3319                     PL_regcc = state.cc;
3320                     PL_reg_re = state.re;
3321                     cache_re(PL_reg_re);
3322                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3323
3324                     /* XXXX This is too dramatic a measure... */
3325                     PL_reg_maxiter = 0;
3326
3327                     logical = 0;
3328                     sayNO;
3329                 }
3330                 sw = SvTRUE(ret);
3331                 logical = 0;
3332             }
3333             else {
3334                 sv_setsv(save_scalar(PL_replgv), ret);
3335                 cache_re(oreg);
3336             }
3337             break;
3338         }
3339         case OPEN:
3340             n = ARG(scan);  /* which paren pair */
3341             PL_reg_start_tmp[n] = locinput;
3342             if (n > PL_regsize)
3343                 PL_regsize = n;
3344             break;
3345         case CLOSE:
3346             n = ARG(scan);  /* which paren pair */
3347             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3348             PL_regendp[n] = locinput - PL_bostr;
3349             if (n > (I32)*PL_reglastparen)
3350                 *PL_reglastparen = n;
3351             *PL_reglastcloseparen = n;
3352             break;
3353         case GROUPP:
3354             n = ARG(scan);  /* which paren pair */
3355             sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3356             break;
3357         case IFTHEN:
3358             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3359             if (sw)
3360                 next = NEXTOPER(NEXTOPER(scan));
3361             else {
3362                 next = scan + ARG(scan);
3363                 if (OP(next) == IFTHEN) /* Fake one. */
3364                     next = NEXTOPER(NEXTOPER(next));
3365             }
3366             break;
3367         case LOGICAL:
3368             logical = scan->flags;
3369             break;
3370 /*******************************************************************
3371  PL_regcc contains infoblock about the innermost (...)* loop, and
3372  a pointer to the next outer infoblock.
3373
3374  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3375
3376    1) After matching X, regnode for CURLYX is processed;
3377
3378    2) This regnode creates infoblock on the stack, and calls
3379       regmatch() recursively with the starting point at WHILEM node;
3380
3381    3) Each hit of WHILEM node tries to match A and Z (in the order
3382       depending on the current iteration, min/max of {min,max} and
3383       greediness).  The information about where are nodes for "A"
3384       and "Z" is read from the infoblock, as is info on how many times "A"
3385       was already matched, and greediness.
3386
3387    4) After A matches, the same WHILEM node is hit again.
3388
3389    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3390       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3391       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3392       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3393       of the external loop.
3394
3395  Currently present infoblocks form a tree with a stem formed by PL_curcc
3396  and whatever it mentions via ->next, and additional attached trees
3397  corresponding to temporarily unset infoblocks as in "5" above.
3398
3399  In the following picture infoblocks for outer loop of
3400  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3401  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3402  infoblocks are drawn below the "reset" infoblock.
3403
3404  In fact in the picture below we do not show failed matches for Z and T
3405  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3406  more obvious *why* one needs to *temporary* unset infoblocks.]
3407
3408   Matched       REx position    InfoBlocks      Comment
3409                 (Y(A)*?Z)*?T    x
3410                 Y(A)*?Z)*?T     x <- O
3411   Y             (A)*?Z)*?T      x <- O
3412   Y             A)*?Z)*?T       x <- O <- I
3413   YA            )*?Z)*?T        x <- O <- I
3414   YA            A)*?Z)*?T       x <- O <- I
3415   YAA           )*?Z)*?T        x <- O <- I
3416   YAA           Z)*?T           x <- O          # Temporary unset I
3417                                      I
3418
3419   YAAZ          Y(A)*?Z)*?T     x <- O
3420                                      I
3421
3422   YAAZY         (A)*?Z)*?T      x <- O
3423                                      I
3424
3425   YAAZY         A)*?Z)*?T       x <- O <- I
3426                                      I
3427
3428   YAAZYA        )*?Z)*?T        x <- O <- I     
3429                                      I
3430
3431   YAAZYA        Z)*?T           x <- O          # Temporary unset I
3432                                      I,I
3433
3434   YAAZYAZ       )*?T            x <- O
3435                                      I,I
3436
3437   YAAZYAZ       T               x               # Temporary unset O
3438                                 O
3439                                 I,I
3440
3441   YAAZYAZT                      x
3442                                 O
3443                                 I,I
3444  *******************************************************************/
3445         case CURLYX: {
3446                 CURCUR cc;
3447                 CHECKPOINT cp = PL_savestack_ix;
3448                 /* No need to save/restore up to this paren */
3449                 I32 parenfloor = scan->flags;
3450
3451                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3452                     next += ARG(next);
3453                 cc.oldcc = PL_regcc;
3454                 PL_regcc = &cc;
3455                 /* XXXX Probably it is better to teach regpush to support
3456                    parenfloor > PL_regsize... */
3457                 if (parenfloor > (I32)*PL_reglastparen)
3458                     parenfloor = *PL_reglastparen; /* Pessimization... */
3459                 cc.parenfloor = parenfloor;
3460                 cc.cur = -1;
3461                 cc.min = ARG1(scan);
3462                 cc.max  = ARG2(scan);
3463                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3464                 cc.next = next;
3465                 cc.minmod = minmod;
3466                 cc.lastloc = 0;
3467                 PL_reginput = locinput;
3468                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3469                 regcpblow(cp);
3470                 PL_regcc = cc.oldcc;
3471                 saySAME(n);
3472             }
3473             /* NOT REACHED */
3474         case WHILEM: {
3475                 /*
3476                  * This is really hard to understand, because after we match
3477                  * what we're trying to match, we must make sure the rest of
3478                  * the REx is going to match for sure, and to do that we have
3479                  * to go back UP the parse tree by recursing ever deeper.  And
3480                  * if it fails, we have to reset our parent's current state
3481                  * that we can try again after backing off.
3482                  */
3483
3484                 CHECKPOINT cp, lastcp;
3485                 CURCUR* cc = PL_regcc;
3486                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3487                 I32 cache_offset = 0, cache_bit = 0;
3488                 
3489                 n = cc->cur + 1;        /* how many we know we matched */
3490                 PL_reginput = locinput;
3491
3492                 DEBUG_EXECUTE_r(
3493                     PerlIO_printf(Perl_debug_log,
3494                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3495                                   REPORT_CODE_OFF+PL_regindent*2, "",
3496                                   (long)n, (long)cc->min,
3497                                   (long)cc->max, PTR2UV(cc))
3498                     );
3499
3500                 /* If degenerate scan matches "", assume scan done. */
3501
3502                 if (locinput == cc->lastloc && n >= cc->min) {
3503                     PL_regcc = cc->oldcc;
3504                     if (PL_regcc)
3505                         ln = PL_regcc->cur;
3506                     DEBUG_EXECUTE_r(
3507                         PerlIO_printf(Perl_debug_log,
3508                            "%*s  empty match detected, try continuation...\n",
3509                            REPORT_CODE_OFF+PL_regindent*2, "")
3510                         );
3511                     if (regmatch(cc->next))
3512                         sayYES;
3513                     if (PL_regcc)
3514                         PL_regcc->cur = ln;
3515                     PL_regcc = cc;
3516                     sayNO;
3517                 }
3518
3519                 /* First just match a string of min scans. */
3520
3521                 if (n < cc->min) {
3522                     cc->cur = n;
3523                     cc->lastloc = locinput;
3524                     if (regmatch(cc->scan))
3525                         sayYES;
3526                     cc->cur = n - 1;
3527                     cc->lastloc = lastloc;
3528                     sayNO;
3529                 }
3530
3531                 if (scan->flags) {
3532                     /* Check whether we already were at this position.
3533                         Postpone detection until we know the match is not
3534                         *that* much linear. */
3535                 if (!PL_reg_maxiter) {
3536                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3537                     PL_reg_leftiter = PL_reg_maxiter;
3538                 }
3539                 if (PL_reg_leftiter-- == 0) {
3540                     I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
3541                     if (PL_reg_poscache) {
3542                         if ((I32)PL_reg_poscache_size < size) {
3543                             Renew(PL_reg_poscache, size, char);
3544                             PL_reg_poscache_size = size;
3545                         }
3546                         Zero(PL_reg_poscache, size, char);
3547                     }
3548                     else {
3549                         PL_reg_poscache_size = size;
3550                         Newz(29, PL_reg_poscache, size, char);
3551                     }
3552                     DEBUG_EXECUTE_r(
3553                         PerlIO_printf(Perl_debug_log,
3554               "%sDetected a super-linear match, switching on caching%s...\n",
3555                                       PL_colors[4], PL_colors[5])
3556                         );
3557                 }
3558                 if (PL_reg_leftiter < 0) {
3559                     cache_offset = locinput - PL_bostr;
3560
3561                     cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3562                             + cache_offset * (scan->flags>>4);
3563                     cache_bit = cache_offset % 8;
3564                     cache_offset /= 8;
3565                     if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
3566                     DEBUG_EXECUTE_r(
3567                         PerlIO_printf(Perl_debug_log,
3568                                       "%*s  already tried at this position...\n",
3569                                       REPORT_CODE_OFF+PL_regindent*2, "")
3570                         );
3571                         if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3572                             /* cache records success */
3573                             sayYES;
3574                         else
3575                             /* cache records failure */
3576                             sayNO_SILENT;
3577                     }
3578                     PL_reg_poscache[cache_offset] |= (1<<cache_bit);
3579                 }
3580                 }
3581
3582                 /* Prefer next over scan for minimal matching. */
3583
3584                 if (cc->minmod) {
3585                     PL_regcc = cc->oldcc;
3586                     if (PL_regcc)
3587                         ln = PL_regcc->cur;
3588                     cp = regcppush(cc->parenfloor);
3589                     REGCP_SET(lastcp);
3590                     if (regmatch(cc->next)) {
3591                         regcpblow(cp);
3592                         CACHEsayYES;    /* All done. */
3593                     }
3594                     REGCP_UNWIND(lastcp);
3595                     regcppop();
3596                     if (PL_regcc)
3597                         PL_regcc->cur = ln;
3598                     PL_regcc = cc;
3599
3600                     if (n >= cc->max) { /* Maximum greed exceeded? */
3601                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3602                             && !(PL_reg_flags & RF_warned)) {
3603                             PL_reg_flags |= RF_warned;
3604                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3605                                  "Complex regular subexpression recursion",
3606                                  REG_INFTY - 1);
3607                         }
3608                         CACHEsayNO;
3609                     }
3610
3611                     DEBUG_EXECUTE_r(
3612                         PerlIO_printf(Perl_debug_log,
3613                                       "%*s  trying longer...\n",
3614                                       REPORT_CODE_OFF+PL_regindent*2, "")
3615                         );
3616                     /* Try scanning more and see if it helps. */
3617                     PL_reginput = locinput;
3618                     cc->cur = n;
3619                     cc->lastloc = locinput;
3620                     cp = regcppush(cc->parenfloor);
3621                     REGCP_SET(lastcp);
3622                     if (regmatch(cc->scan)) {
3623                         regcpblow(cp);
3624                         CACHEsayYES;
3625                     }
3626                     REGCP_UNWIND(lastcp);
3627                     regcppop();
3628                     cc->cur = n - 1;
3629                     cc->lastloc = lastloc;
3630                     CACHEsayNO;
3631                 }
3632
3633                 /* Prefer scan over next for maximal matching. */
3634
3635                 if (n < cc->max) {      /* More greed allowed? */
3636                     cp = regcppush(cc->parenfloor);
3637                     cc->cur = n;
3638                     cc->lastloc = locinput;
3639                     REGCP_SET(lastcp);
3640                     if (regmatch(cc->scan)) {
3641                         regcpblow(cp);
3642                         CACHEsayYES;
3643                     }
3644                     REGCP_UNWIND(lastcp);
3645                     regcppop();         /* Restore some previous $<digit>s? */
3646                     PL_reginput = locinput;
3647                     DEBUG_EXECUTE_r(
3648                         PerlIO_printf(Perl_debug_log,
3649                                       "%*s  failed, try continuation...\n",
3650                                       REPORT_CODE_OFF+PL_regindent*2, "")
3651                         );
3652                 }
3653                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3654                         && !(PL_reg_flags & RF_warned)) {
3655                     PL_reg_flags |= RF_warned;
3656                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3657                          "Complex regular subexpression recursion",
3658                          REG_INFTY - 1);
3659                 }
3660
3661                 /* Failed deeper matches of scan, so see if this one works. */
3662                 PL_regcc = cc->oldcc;
3663                 if (PL_regcc)
3664                     ln = PL_regcc->cur;
3665                 if (regmatch(cc->next))
3666                     CACHEsayYES;
3667                 if (PL_regcc)
3668                     PL_regcc->cur = ln;
3669                 PL_regcc = cc;
3670                 cc->cur = n - 1;
3671                 cc->lastloc = lastloc;
3672                 CACHEsayNO;
3673             }
3674             /* NOT REACHED */
3675         case BRANCHJ:
3676             next = scan + ARG(scan);
3677             if (next == scan)
3678                 next = NULL;
3679             inner = NEXTOPER(NEXTOPER(scan));
3680             goto do_branch;
3681         case BRANCH:
3682             inner = NEXTOPER(scan);
3683           do_branch:
3684             {
3685                 c1 = OP(scan);
3686                 if (OP(next) != c1)     /* No choice. */
3687                     next = inner;       /* Avoid recursion. */
3688                 else {
3689                     I32 lastparen = *PL_reglastparen;
3690                     I32 unwind1;
3691                     re_unwind_branch_t *uw;
3692
3693                     /* Put unwinding data on stack */
3694                     unwind1 = SSNEWt(1,re_unwind_branch_t);
3695                     uw = SSPTRt(unwind1,re_unwind_branch_t);
3696                     uw->prev = unwind;
3697                     unwind = unwind1;
3698                     uw->type = ((c1 == BRANCH)
3699                                 ? RE_UNWIND_BRANCH
3700                                 : RE_UNWIND_BRANCHJ);
3701                     uw->lastparen = lastparen;
3702                     uw->next = next;
3703                     uw->locinput = locinput;
3704                     uw->nextchr = nextchr;
3705 #ifdef DEBUGGING
3706                     uw->regindent = ++PL_regindent;
3707 #endif
3708
3709                     REGCP_SET(uw->lastcp);
3710
3711                     /* Now go into the first branch */
3712                     next = inner;
3713                 }
3714             }
3715             break;
3716         case MINMOD:
3717             minmod = 1;
3718             break;
3719         case CURLYM:
3720         {
3721             I32 l = 0;
3722             CHECKPOINT lastcp;
3723         
3724             /* We suppose that the next guy does not need
3725                backtracking: in particular, it is of constant non-zero length,
3726                and has no parenths to influence future backrefs. */
3727             ln = ARG1(scan);  /* min to match */
3728             n  = ARG2(scan);  /* max to match */
3729             paren = scan->flags;
3730             if (paren) {
3731                 if (paren > PL_regsize)
3732                     PL_regsize = paren;
3733                 if (paren > (I32)*PL_reglastparen)
3734                     *PL_reglastparen = paren;
3735             }
3736             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3737             if (paren)
3738                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3739             PL_reginput = locinput;
3740             if (minmod) {
3741                 minmod = 0;
3742                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3743                     sayNO;
3744                 locinput = PL_reginput;
3745                 if (HAS_TEXT(next) || JUMPABLE(next)) {
3746                     regnode *text_node = next;
3747
3748                     if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3749
3750                     if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3751                     else {
3752                         if (PL_regkind[(U8)OP(text_node)] == REF) {
3753                             c1 = c2 = -1000;
3754                             goto assume_ok_MM;
3755                         }
3756                         else { c1 = (U8)*STRING(text_node); }
3757                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3758                             c2 = PL_fold[c1];
3759                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3760                             c2 = PL_fold_locale[c1];
3761                         else
3762                             c2 = c1;
3763                     }
3764                 }
3765                 else
3766                     c1 = c2 = -1000;
3767             assume_ok_MM:
3768                 REGCP_SET(lastcp);
3769                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3770                     /* If it could work, try it. */
3771                     if (c1 == -1000 ||
3772                         UCHARAT(PL_reginput) == c1 ||
3773                         UCHARAT(PL_reginput) == c2)
3774                     {
3775                         if (paren) {
3776                             if (ln) {
3777                                 PL_regstartp[paren] =
3778                                     HOPc(PL_reginput, -l) - PL_bostr;
3779                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3780                             }
3781                             else
3782                                 PL_regendp[paren] = -1;
3783                         }
3784                         if (regmatch(next))
3785                             sayYES;
3786                         REGCP_UNWIND(lastcp);
3787                     }
3788                     /* Couldn't or didn't -- move forward. */
3789                     PL_reginput = locinput;
3790                     if (regrepeat_hard(scan, 1, &l)) {
3791                         ln++;
3792                         locinput = PL_reginput;
3793                     }
3794                     else
3795                         sayNO;
3796                 }
3797             }
3798             else {
3799                 n = regrepeat_hard(scan, n, &l);
3800                 locinput = PL_reginput;
3801                 DEBUG_EXECUTE_r(
3802                     PerlIO_printf(Perl_debug_log,
3803                                   "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3804                                   (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3805                                   (IV) n, (IV)l)
3806                     );
3807                 if (n >= ln) {
3808                     if (HAS_TEXT(next) || JUMPABLE(next)) {
3809                         regnode *text_node = next;
3810
3811                         if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3812
3813                         if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3814                         else {
3815                             if (PL_regkind[(U8)OP(text_node)] == REF) {
3816                                 c1 = c2 = -1000;
3817                                 goto assume_ok_REG;
3818                             }
3819                             else { c1 = (U8)*STRING(text_node); }
3820
3821                             if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3822                                 c2 = PL_fold[c1];
3823                             else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3824                                 c2 = PL_fold_locale[c1];
3825                             else
3826                                 c2 = c1;
3827                         }
3828                     }
3829                     else
3830                         c1 = c2 = -1000;
3831                 }
3832             assume_ok_REG:
3833                 REGCP_SET(lastcp);
3834                 while (n >= ln) {
3835                     /* If it could work, try it. */
3836                     if (c1 == -1000 ||
3837                         UCHARAT(PL_reginput) == c1 ||
3838                         UCHARAT(PL_reginput) == c2)
3839                     {
3840                         DEBUG_EXECUTE_r(
3841                                 PerlIO_printf(Perl_debug_log,
3842                                               "%*s  trying tail with n=%"IVdf"...\n",
3843                                               (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3844                             );
3845                         if (paren) {
3846                             if (n) {
3847                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3848                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3849                             }
3850                             else
3851                                 PL_regendp[paren] = -1;
3852                         }
3853                         if (regmatch(next))
3854                             sayYES;
3855                         REGCP_UNWIND(lastcp);
3856                     }
3857                     /* Couldn't or didn't -- back up. */
3858                     n--;
3859                     locinput = HOPc(locinput, -l);
3860                     PL_reginput = locinput;
3861                 }
3862             }
3863             sayNO;
3864             break;
3865         }
3866         case CURLYN:
3867             paren = scan->flags;        /* Which paren to set */
3868             if (paren > PL_regsize)
3869                 PL_regsize = paren;
3870             if (paren > (I32)*PL_reglastparen)
3871                 *PL_reglastparen = paren;
3872             ln = ARG1(scan);  /* min to match */
3873             n  = ARG2(scan);  /* max to match */
3874             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3875             goto repeat;
3876         case CURLY:
3877             paren = 0;
3878             ln = ARG1(scan);  /* min to match */
3879             n  = ARG2(scan);  /* max to match */
3880             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3881             goto repeat;
3882         case STAR:
3883             ln = 0;
3884             n = REG_INFTY;
3885             scan = NEXTOPER(scan);
3886             paren = 0;
3887             goto repeat;
3888         case PLUS:
3889             ln = 1;
3890             n = REG_INFTY;
3891             scan = NEXTOPER(scan);
3892             paren = 0;
3893           repeat:
3894             /*
3895             * Lookahead to avoid useless match attempts
3896             * when we know what character comes next.
3897             */
3898
3899             /*
3900             * Used to only do .*x and .*?x, but now it allows
3901             * for )'s, ('s and (?{ ... })'s to be in the way
3902             * of the quantifier and the EXACT-like node.  -- japhy
3903             */
3904
3905             if (HAS_TEXT(next) || JUMPABLE(next)) {
3906                 U8 *s;
3907                 regnode *text_node = next;
3908
3909                 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3910
3911                 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3912                 else {
3913                     if (PL_regkind[(U8)OP(text_node)] == REF) {
3914                         c1 = c2 = -1000;
3915                         goto assume_ok_easy;
3916                     }
3917                     else { s = (U8*)STRING(text_node); }
3918
3919                     if (!UTF) {
3920                         c2 = c1 = *s;
3921                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3922                             c2 = PL_fold[c1];
3923                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3924                             c2 = PL_fold_locale[c1];
3925                     }
3926                     else { /* UTF */
3927                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3928                              STRLEN ulen1, ulen2;
3929                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3930                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3931
3932                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3933                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3934
3935                              c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3936                                                  ckWARN(WARN_UTF8) ?
3937                                                  0 : UTF8_ALLOW_ANY);
3938                              c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3939                                                  ckWARN(WARN_UTF8) ?
3940                                                  0 : UTF8_ALLOW_ANY);
3941                         }
3942                         else {
3943                             c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3944                                                      ckWARN(WARN_UTF8) ?
3945                                                      0 : UTF8_ALLOW_ANY);
3946                         }
3947                     }
3948                 }
3949             }
3950             else
3951                 c1 = c2 = -1000;
3952         assume_ok_easy:
3953             PL_reginput = locinput;
3954             if (minmod) {
3955                 CHECKPOINT lastcp;
3956                 minmod = 0;
3957                 if (ln && regrepeat(scan, ln) < ln)
3958                     sayNO;
3959                 locinput = PL_reginput;
3960                 REGCP_SET(lastcp);
3961                 if (c1 != -1000) {
3962                     char *e; /* Should not check after this */
3963                     char *old = locinput;
3964                     int count = 0;
3965
3966                     if  (n == REG_INFTY) {
3967                         e = PL_regeol - 1;
3968                         if (do_utf8)
3969                             while (UTF8_IS_CONTINUATION(*(U8*)e))
3970                                 e--;
3971                     }
3972                     else if (do_utf8) {
3973                         int m = n - ln;
3974                         for (e = locinput;
3975                              m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3976                             e += UTF8SKIP(e);
3977                     }
3978                     else {
3979                         e = locinput + n - ln;
3980                         if (e >= PL_regeol)
3981                             e = PL_regeol - 1;
3982                     }
3983                     while (1) {
3984                         /* Find place 'next' could work */
3985                         if (!do_utf8) {
3986                             if (c1 == c2) {
3987                                 while (locinput <= e &&
3988                                        UCHARAT(locinput) != c1)
3989                                     locinput++;
3990                             } else {
3991                                 while (locinput <= e
3992                                        && UCHARAT(locinput) != c1
3993                                        && UCHARAT(locinput) != c2)
3994                                     locinput++;
3995                             }
3996                             count = locinput - old;
3997                         }
3998                         else {
3999                             STRLEN len;
4000                             if (c1 == c2) {
4001                                 /* count initialised to
4002                                  * utf8_distance(old, locinput) */
4003                                 while (locinput <= e &&
4004                                        utf8n_to_uvchr((U8*)locinput,
4005                                                       UTF8_MAXBYTES, &len,
4006                                                       ckWARN(WARN_UTF8) ?
4007                                                       0 : UTF8_ALLOW_ANY) != (UV)c1) {
4008                                     locinput += len;
4009                                     count++;
4010                                 }
4011                             } else {
4012                                 /* count initialised to
4013                                  * utf8_distance(old, locinput) */
4014                                 while (locinput <= e) {
4015                                     UV c = utf8n_to_uvchr((U8*)locinput,
4016                                                           UTF8_MAXBYTES, &len,
4017                                                           ckWARN(WARN_UTF8) ?
4018                                                           0 : UTF8_ALLOW_ANY);
4019                                     if (c == (UV)c1 || c == (UV)c2)
4020                                         break;
4021                                     locinput += len;
4022                                     count++;
4023                                 }
4024                             }
4025                         }
4026                         if (locinput > e)
4027                             sayNO;
4028                         /* PL_reginput == old now */
4029                         if (locinput != old) {
4030                             ln = 1;     /* Did some */
4031                             if (regrepeat(scan, count) < count)
4032                                 sayNO;
4033                         }
4034                         /* PL_reginput == locinput now */
4035                         TRYPAREN(paren, ln, locinput);
4036                         PL_reginput = locinput; /* Could be reset... */
4037                         REGCP_UNWIND(lastcp);
4038                         /* Couldn't or didn't -- move forward. */
4039                         old = locinput;
4040                         if (do_utf8)
4041                             locinput += UTF8SKIP(locinput);
4042                         else
4043                             locinput++;
4044                         count = 1;
4045                     }
4046                 }
4047                 else
4048                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
4049                     UV c;
4050                     if (c1 != -1000) {
4051                         if (do_utf8)
4052                             c = utf8n_to_uvchr((U8*)PL_reginput,
4053                                                UTF8_MAXBYTES, 0,
4054                                                ckWARN(WARN_UTF8) ?
4055                                                0 : UTF8_ALLOW_ANY);
4056                         else
4057                             c = UCHARAT(PL_reginput);
4058                         /* If it could work, try it. */
4059                         if (c == (UV)c1 || c == (UV)c2)
4060                         {
4061                             TRYPAREN(paren, ln, PL_reginput);
4062                             REGCP_UNWIND(lastcp);
4063                         }
4064                     }
4065                     /* If it could work, try it. */
4066                     else if (c1 == -1000)
4067                     {
4068                         TRYPAREN(paren, ln, PL_reginput);
4069                         REGCP_UNWIND(lastcp);
4070                     }
4071                     /* Couldn't or didn't -- move forward. */
4072                     PL_reginput = locinput;
4073                     if (regrepeat(scan, 1)) {
4074                         ln++;
4075                         locinput = PL_reginput;
4076                     }
4077                     else
4078                         sayNO;
4079                 }
4080             }
4081             else {
4082                 CHECKPOINT lastcp;
4083                 n = regrepeat(scan, n);
4084                 locinput = PL_reginput;
4085                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
4086                     (OP(next) != MEOL ||
4087                         OP(next) == SEOL || OP(next) == EOS))
4088                 {
4089                     ln = n;                     /* why back off? */
4090                     /* ...because $ and \Z can match before *and* after
4091                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4092                        We should back off by one in this case. */
4093                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4094                         ln--;
4095                 }
4096                 REGCP_SET(lastcp);
4097                 if (paren) {
4098                     UV c = 0;
4099                     while (n >= ln) {
4100                         if (c1 != -1000) {
4101                             if (do_utf8)
4102                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4103                                                    UTF8_MAXBYTES, 0,
4104                                                    ckWARN(WARN_UTF8) ?
4105                                                    0 : UTF8_ALLOW_ANY);
4106                             else
4107                                 c = UCHARAT(PL_reginput);
4108                         }
4109                         /* If it could work, try it. */
4110                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4111                             {
4112                                 TRYPAREN(paren, n, PL_reginput);
4113                                 REGCP_UNWIND(lastcp);
4114                             }
4115                         /* Couldn't or didn't -- back up. */
4116                         n--;
4117                         PL_reginput = locinput = HOPc(locinput, -1);
4118                     }
4119                 }
4120                 else {
4121                     UV c = 0;
4122                     while (n >= ln) {
4123                         if (c1 != -1000) {
4124                             if (do_utf8)
4125                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4126                                                    UTF8_MAXBYTES, 0,
4127                                                    ckWARN(WARN_UTF8) ?
4128                                                    0 : UTF8_ALLOW_ANY);
4129                             else
4130                                 c = UCHARAT(PL_reginput);
4131                         }
4132                         /* If it could work, try it. */
4133                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4134                             {
4135                                 TRYPAREN(paren, n, PL_reginput);
4136                                 REGCP_UNWIND(lastcp);
4137                             }
4138                         /* Couldn't or didn't -- back up. */
4139                         n--;
4140                         PL_reginput = locinput = HOPc(locinput, -1);
4141                     }
4142                 }
4143             }
4144             sayNO;
4145             break;
4146         case END:
4147             if (PL_reg_call_cc) {
4148                 re_cc_state *cur_call_cc = PL_reg_call_cc;
4149                 CURCUR *cctmp = PL_regcc;
4150                 regexp *re = PL_reg_re;
4151                 CHECKPOINT cp, lastcp;
4152                 
4153                 cp = regcppush(0);      /* Save *all* the positions. */
4154                 REGCP_SET(lastcp);
4155                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4156                                                     the caller. */
4157                 PL_reginput = locinput; /* Make position available to
4158                                            the callcc. */
4159                 cache_re(PL_reg_call_cc->re);
4160                 PL_regcc = PL_reg_call_cc->cc;
4161                 PL_reg_call_cc = PL_reg_call_cc->prev;
4162                 if (regmatch(cur_call_cc->node)) {
4163                     PL_reg_call_cc = cur_call_cc;
4164                     regcpblow(cp);
4165                     sayYES;
4166                 }
4167                 REGCP_UNWIND(lastcp);
4168                 regcppop();
4169                 PL_reg_call_cc = cur_call_cc;
4170                 PL_regcc = cctmp;
4171                 PL_reg_re = re;
4172                 cache_re(re);
4173
4174                 DEBUG_EXECUTE_r(
4175                     PerlIO_printf(Perl_debug_log,
4176                                   "%*s  continuation failed...\n",
4177                                   REPORT_CODE_OFF+PL_regindent*2, "")
4178                     );
4179                 sayNO_SILENT;
4180             }
4181             if (locinput < PL_regtill) {
4182                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4183                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4184                                       PL_colors[4],
4185                                       (long)(locinput - PL_reg_starttry),
4186                                       (long)(PL_regtill - PL_reg_starttry),
4187                                       PL_colors[5]));
4188                 sayNO_FINAL;            /* Cannot match: too short. */
4189             }
4190             PL_reginput = locinput;     /* put where regtry can find it */
4191             sayYES_FINAL;               /* Success! */
4192         case SUCCEED:
4193             PL_reginput = locinput;     /* put where regtry can find it */
4194             sayYES_LOUD;                /* Success! */
4195         case SUSPEND:
4196             n = 1;
4197             PL_reginput = locinput;
4198             goto do_ifmatch;    
4199         case UNLESSM:
4200             n = 0;
4201             if (scan->flags) {
4202                 s = HOPBACKc(locinput, scan->flags);
4203                 if (!s)
4204                     goto say_yes;
4205                 PL_reginput = s;
4206             }
4207             else
4208                 PL_reginput = locinput;
4209             goto do_ifmatch;
4210         case IFMATCH:
4211             n = 1;
4212             if (scan->flags) {
4213                 s = HOPBACKc(locinput, scan->flags);
4214                 if (!s)
4215                     goto say_no;
4216                 PL_reginput = s;
4217             }
4218             else
4219                 PL_reginput = locinput;
4220
4221           do_ifmatch:
4222             inner = NEXTOPER(NEXTOPER(scan));
4223             if (regmatch(inner) != n) {
4224               say_no:
4225                 if (logical) {
4226                     logical = 0;
4227                     sw = 0;
4228                     goto do_longjump;
4229                 }
4230                 else
4231                     sayNO;
4232             }
4233           say_yes:
4234             if (logical) {
4235                 logical = 0;
4236                 sw = 1;
4237             }
4238             if (OP(scan) == SUSPEND) {
4239                 locinput = PL_reginput;
4240                 nextchr = UCHARAT(locinput);
4241             }
4242             /* FALL THROUGH. */
4243         case LONGJMP:
4244           do_longjump:
4245             next = scan + ARG(scan);
4246             if (next == scan)
4247                 next = NULL;
4248             break;
4249         default:
4250             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4251                           PTR2UV(scan), OP(scan));
4252             Perl_croak(aTHX_ "regexp memory corruption");
4253         }
4254       reenter:
4255         scan = next;
4256     }
4257
4258     /*
4259     * We get here only if there's trouble -- normally "case END" is
4260     * the terminating point.
4261     */
4262     Perl_croak(aTHX_ "corrupted regexp pointers");
4263     /*NOTREACHED*/
4264     sayNO;
4265
4266 yes_loud:
4267     DEBUG_EXECUTE_r(
4268         PerlIO_printf(Perl_debug_log,
4269                       "%*s  %scould match...%s\n",
4270                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
4271         );
4272     goto yes;
4273 yes_final:
4274     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4275                           PL_colors[4],PL_colors[5]));
4276 yes:
4277 #ifdef DEBUGGING
4278     PL_regindent--;
4279 #endif
4280
4281 #if 0                                   /* Breaks $^R */
4282     if (unwind)
4283         regcpblow(firstcp);
4284 #endif
4285     return 1;
4286
4287 no:
4288     DEBUG_EXECUTE_r(
4289         PerlIO_printf(Perl_debug_log,
4290                       "%*s  %sfailed...%s\n",
4291                       REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
4292         );
4293     goto do_no;
4294 no_final:
4295 do_no:
4296     if (unwind) {
4297         re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4298
4299         switch (uw->type) {
4300         case RE_UNWIND_BRANCH:
4301         case RE_UNWIND_BRANCHJ:
4302         {
4303             re_unwind_branch_t *uwb = &(uw->branch);
4304             I32 lastparen = uwb->lastparen;
4305         
4306             REGCP_UNWIND(uwb->lastcp);
4307             for (n = *PL_reglastparen; n > lastparen; n--)
4308                 PL_regendp[n] = -1;
4309             *PL_reglastparen = n;
4310             scan = next = uwb->next;
4311             if ( !scan ||
4312                  OP(scan) != (uwb->type == RE_UNWIND_BRANCH
4313                               ? BRANCH : BRANCHJ) ) {           /* Failure */
4314                 unwind = uwb->prev;
4315 #ifdef DEBUGGING
4316                 PL_regindent--;
4317 #endif
4318                 goto do_no;
4319             }
4320             /* Have more choice yet.  Reuse the same uwb.  */
4321             /*SUPPRESS 560*/
4322             if ((n = (uwb->type == RE_UNWIND_BRANCH
4323                       ? NEXT_OFF(next) : ARG(next))))
4324                 next += n;
4325             else
4326                 next = NULL;    /* XXXX Needn't unwinding in this case... */
4327             uwb->next = next;
4328             next = NEXTOPER(scan);
4329             if (uwb->type == RE_UNWIND_BRANCHJ)
4330                 next = NEXTOPER(next);
4331             locinput = uwb->locinput;
4332             nextchr = uwb->nextchr;
4333 #ifdef DEBUGGING
4334             PL_regindent = uwb->regindent;
4335 #endif
4336
4337             goto reenter;
4338         }
4339         /* NOT REACHED */
4340         default:
4341             Perl_croak(aTHX_ "regexp unwind memory corruption");
4342         }
4343         /* NOT REACHED */
4344     }
4345 #ifdef DEBUGGING
4346     PL_regindent--;
4347 #endif
4348     return 0;
4349 }
4350
4351 /*
4352  - regrepeat - repeatedly match something simple, report how many
4353  */
4354 /*
4355  * [This routine now assumes that it will only match on things of length 1.
4356  * That was true before, but now we assume scan - reginput is the count,
4357  * rather than incrementing count on every character.  [Er, except utf8.]]
4358  */
4359 STATIC I32
4360 S_regrepeat(pTHX_ regnode *p, I32 max)
4361 {
4362     register char *scan;
4363     register I32 c;
4364     register char *loceol = PL_regeol;
4365     register I32 hardcount = 0;
4366     register bool do_utf8 = PL_reg_match_utf8;
4367
4368     scan = PL_reginput;
4369     if (max == REG_INFTY)
4370         max = I32_MAX;
4371     else if (max < loceol - scan)
4372       loceol = scan + max;
4373     switch (OP(p)) {
4374     case REG_ANY:
4375         if (do_utf8) {
4376             loceol = PL_regeol;
4377             while (scan < loceol && hardcount < max && *scan != '\n') {
4378                 scan += UTF8SKIP(scan);
4379                 hardcount++;
4380             }
4381         } else {
4382             while (scan < loceol && *scan != '\n')
4383                 scan++;
4384         }
4385         break;
4386     case SANY:
4387         if (do_utf8) {
4388             loceol = PL_regeol;
4389             while (scan < loceol && hardcount < max) {
4390                 scan += UTF8SKIP(scan);
4391                 hardcount++;
4392             }
4393         }
4394         else
4395             scan = loceol;
4396         break;
4397     case CANY:
4398         scan = loceol;
4399         break;
4400     case EXACT:         /* length of string is 1 */
4401         c = (U8)*STRING(p);
4402         while (scan < loceol && UCHARAT(scan) == c)
4403             scan++;
4404         break;
4405     case EXACTF:        /* length of string is 1 */
4406         c = (U8)*STRING(p);
4407         while (scan < loceol &&
4408                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4409             scan++;
4410         break;
4411     case EXACTFL:       /* length of string is 1 */
4412         PL_reg_flags |= RF_tainted;
4413         c = (U8)*STRING(p);
4414         while (scan < loceol &&
4415                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4416             scan++;
4417         break;
4418     case ANYOF:
4419         if (do_utf8) {
4420             loceol = PL_regeol;
4421             while (hardcount < max && scan < loceol &&
4422                    reginclass(p, (U8*)scan, 0, do_utf8)) {
4423                 scan += UTF8SKIP(scan);
4424                 hardcount++;
4425             }
4426         } else {
4427             while (scan < loceol && REGINCLASS(p, (U8*)scan))
4428                 scan++;
4429         }
4430         break;
4431     case ALNUM:
4432         if (do_utf8) {
4433             loceol = PL_regeol;
4434             LOAD_UTF8_CHARCLASS(alnum,"a");
4435             while (hardcount < max && scan < loceol &&
4436                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4437                 scan += UTF8SKIP(scan);
4438                 hardcount++;
4439             }
4440         } else {
4441             while (scan < loceol && isALNUM(*scan))
4442                 scan++;
4443         }
4444         break;
4445     case ALNUML:
4446         PL_reg_flags |= RF_tainted;
4447         if (do_utf8) {
4448             loceol = PL_regeol;
4449             while (hardcount < max && scan < loceol &&
4450                    isALNUM_LC_utf8((U8*)scan)) {
4451                 scan += UTF8SKIP(scan);
4452                 hardcount++;
4453             }
4454         } else {
4455             while (scan < loceol && isALNUM_LC(*scan))
4456                 scan++;
4457         }
4458         break;
4459     case NALNUM:
4460         if (do_utf8) {
4461             loceol = PL_regeol;
4462             LOAD_UTF8_CHARCLASS(alnum,"a");
4463             while (hardcount < max && scan < loceol &&
4464                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4465                 scan += UTF8SKIP(scan);
4466                 hardcount++;
4467             }
4468         } else {
4469             while (scan < loceol && !isALNUM(*scan))
4470                 scan++;
4471         }
4472         break;
4473     case NALNUML:
4474         PL_reg_flags |= RF_tainted;
4475         if (do_utf8) {
4476             loceol = PL_regeol;
4477             while (hardcount < max && scan < loceol &&
4478                    !isALNUM_LC_utf8((U8*)scan)) {
4479                 scan += UTF8SKIP(scan);
4480                 hardcount++;
4481             }
4482         } else {
4483             while (scan < loceol && !isALNUM_LC(*scan))
4484                 scan++;
4485         }
4486         break;
4487     case SPACE:
4488         if (do_utf8) {
4489             loceol = PL_regeol;
4490             LOAD_UTF8_CHARCLASS(space," ");
4491             while (hardcount < max && scan < loceol &&
4492                    (*scan == ' ' ||
4493                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4494                 scan += UTF8SKIP(scan);
4495                 hardcount++;
4496             }
4497         } else {
4498             while (scan < loceol && isSPACE(*scan))
4499                 scan++;
4500         }
4501         break;
4502     case SPACEL:
4503         PL_reg_flags |= RF_tainted;
4504         if (do_utf8) {
4505             loceol = PL_regeol;
4506             while (hardcount < max && scan < loceol &&
4507                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4508                 scan += UTF8SKIP(scan);
4509                 hardcount++;
4510             }
4511         } else {
4512             while (scan < loceol && isSPACE_LC(*scan))
4513                 scan++;
4514         }
4515         break;
4516     case NSPACE:
4517         if (do_utf8) {
4518             loceol = PL_regeol;
4519             LOAD_UTF8_CHARCLASS(space," ");
4520             while (hardcount < max && scan < loceol &&
4521                    !(*scan == ' ' ||
4522                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4523                 scan += UTF8SKIP(scan);
4524                 hardcount++;
4525             }
4526         } else {
4527             while (scan < loceol && !isSPACE(*scan))
4528                 scan++;
4529             break;
4530         }
4531     case NSPACEL:
4532         PL_reg_flags |= RF_tainted;
4533         if (do_utf8) {
4534             loceol = PL_regeol;
4535             while (hardcount < max && scan < loceol &&
4536                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4537                 scan += UTF8SKIP(scan);
4538                 hardcount++;
4539             }
4540         } else {
4541             while (scan < loceol && !isSPACE_LC(*scan))
4542                 scan++;
4543         }
4544         break;
4545     case DIGIT:
4546         if (do_utf8) {
4547             loceol = PL_regeol;
4548             LOAD_UTF8_CHARCLASS(digit,"0");
4549             while (hardcount < max && scan < loceol &&
4550                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4551                 scan += UTF8SKIP(scan);
4552                 hardcount++;
4553             }
4554         } else {
4555             while (scan < loceol && isDIGIT(*scan))
4556                 scan++;
4557         }
4558         break;
4559     case NDIGIT:
4560         if (do_utf8) {
4561             loceol = PL_regeol;
4562             LOAD_UTF8_CHARCLASS(digit,"0");
4563             while (hardcount < max && scan < loceol &&
4564                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4565                 scan += UTF8SKIP(scan);
4566                 hardcount++;
4567             }
4568         } else {
4569             while (scan < loceol && !isDIGIT(*scan))
4570                 scan++;
4571         }
4572         break;
4573     default:            /* Called on something of 0 width. */
4574         break;          /* So match right here or not at all. */
4575     }
4576
4577     if (hardcount)
4578         c = hardcount;
4579     else
4580         c = scan - PL_reginput;
4581     PL_reginput = scan;
4582
4583     DEBUG_r({
4584                 SV *re_debug_flags;
4585                 SV *prop = sv_newmortal();
4586                 GET_RE_DEBUG_FLAGS;
4587                 DEBUG_EXECUTE_r({
4588                 regprop(prop, p);
4589                 PerlIO_printf(Perl_debug_log,
4590                               "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4591                               REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4592         });
4593         });
4594
4595     return(c);
4596 }
4597
4598 /*
4599  - regrepeat_hard - repeatedly match something, report total lenth and length
4600  *
4601  * The repeater is supposed to have constant non-zero length.
4602  */
4603
4604 STATIC I32
4605 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4606 {
4607     register char *scan = Nullch;
4608     register char *start;
4609     register char *loceol = PL_regeol;
4610     I32 l = 0;
4611     I32 count = 0, res = 1;
4612
4613     if (!max)
4614         return 0;
4615
4616     start = PL_reginput;
4617     if (PL_reg_match_utf8) {
4618         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4619             if (!count++) {
4620                 l = 0;
4621                 while (start < PL_reginput) {
4622                     l++;
4623                     start += UTF8SKIP(start);
4624                 }
4625                 *lp = l;
4626                 if (l == 0)
4627                     return max;
4628             }
4629             if (count == max)
4630                 return count;
4631         }
4632     }
4633     else {
4634         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4635             if (!count++) {
4636                 *lp = l = PL_reginput - start;
4637                 if (max != REG_INFTY && l*max < loceol - scan)
4638                     loceol = scan + l*max;
4639                 if (l == 0)
4640                     return max;
4641             }
4642         }
4643     }
4644     if (!res)
4645         PL_reginput = scan;
4646
4647     return count;
4648 }
4649
4650 /*
4651 - regclass_swash - prepare the utf8 swash
4652 */
4653
4654 SV *
4655 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4656 {
4657     SV *sw  = NULL;
4658     SV *si  = NULL;
4659     SV *alt = NULL;
4660
4661     if (PL_regdata && PL_regdata->count) {
4662         U32 n = ARG(node);
4663
4664         if (PL_regdata->what[n] == 's') {
4665             SV *rv = (SV*)PL_regdata->data[n];
4666             AV *av = (AV*)SvRV((SV*)rv);
4667             SV **ary = AvARRAY(av);
4668             SV **a, **b;
4669         
4670             /* See the end of regcomp.c:S_reglass() for
4671              * documentation of these array elements. */
4672
4673             si = *ary;
4674             a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4675             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4676
4677             if (a)
4678                 sw = *a;
4679             else if (si && doinit) {
4680                 sw = swash_init("utf8", "", si, 1, 0);
4681                 (void)av_store(av, 1, sw);
4682             }
4683             if (b)
4684                 alt = *b;
4685         }
4686     }
4687         
4688     if (listsvp)
4689         *listsvp = si;
4690     if (altsvp)
4691         *altsvp  = alt;
4692
4693     return sw;
4694 }
4695
4696 /*
4697  - reginclass - determine if a character falls into a character class
4698  
4699   The n is the ANYOF regnode, the p is the target string, lenp
4700   is pointer to the maximum length of how far to go in the p
4701   (if the lenp is zero, UTF8SKIP(p) is used),
4702   do_utf8 tells whether the target string is in UTF-8.
4703
4704  */
4705
4706 STATIC bool
4707 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4708 {
4709     char flags = ANYOF_FLAGS(n);
4710     bool match = FALSE;
4711     UV c = *p;
4712     STRLEN len = 0;
4713     STRLEN plen;
4714
4715     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4716          c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4717                             ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4718
4719     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4720     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4721         if (lenp)
4722             *lenp = 0;
4723         if (do_utf8 && !ANYOF_RUNTIME(n)) {
4724             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4725                 match = TRUE;
4726         }
4727         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4728             match = TRUE;
4729         if (!match) {
4730             AV *av;
4731             SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4732         
4733             if (sw) {
4734                 if (swash_fetch(sw, p, do_utf8))
4735                     match = TRUE;
4736                 else if (flags & ANYOF_FOLD) {
4737                     if (!match && lenp && av) {
4738                         I32 i;
4739                       
4740                         for (i = 0; i <= av_len(av); i++) {
4741                             SV* sv = *av_fetch(av, i, FALSE);
4742                             STRLEN len;
4743                             char *s = SvPV(sv, len);
4744                         
4745                             if (len <= plen && memEQ(s, (char*)p, len)) {
4746                                 *lenp = len;
4747                                 match = TRUE;
4748                                 break;
4749                             }
4750                         }
4751                     }
4752                     if (!match) {
4753                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4754                         STRLEN tmplen;
4755
4756                         to_utf8_fold(p, tmpbuf, &tmplen);
4757                         if (swash_fetch(sw, tmpbuf, do_utf8))
4758                             match = TRUE;
4759                     }
4760                 }
4761             }
4762         }
4763         if (match && lenp && *lenp == 0)
4764             *lenp = UNISKIP(NATIVE_TO_UNI(c));
4765     }
4766     if (!match && c < 256) {
4767         if (ANYOF_BITMAP_TEST(n, c))
4768             match = TRUE;
4769         else if (flags & ANYOF_FOLD) {
4770             U8 f;
4771
4772             if (flags & ANYOF_LOCALE) {
4773                 PL_reg_flags |= RF_tainted;
4774                 f = PL_fold_locale[c];
4775             }
4776             else
4777                 f = PL_fold[c];
4778             if (f != c && ANYOF_BITMAP_TEST(n, f))
4779                 match = TRUE;
4780         }
4781         
4782         if (!match && (flags & ANYOF_CLASS)) {
4783             PL_reg_flags |= RF_tainted;
4784             if (
4785                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4786                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4787                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4788                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4789                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4790                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4791                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4792                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4793                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4794                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4795                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4796                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4797                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4798                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4799                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4800                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4801                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4802                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4803                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4804                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4805                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4806                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4807                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4808                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4809                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4810                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4811                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4812                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4813                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4814                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4815                 ) /* How's that for a conditional? */
4816             {
4817                 match = TRUE;
4818             }
4819         }
4820     }
4821
4822     return (flags & ANYOF_INVERT) ? !match : match;
4823 }
4824
4825 STATIC U8 *
4826 S_reghop(pTHX_ U8 *s, I32 off)
4827 {
4828     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4829 }
4830
4831 STATIC U8 *
4832 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4833 {
4834     if (off >= 0) {
4835         while (off-- && s < lim) {
4836             /* XXX could check well-formedness here */
4837             s += UTF8SKIP(s);
4838         }
4839     }
4840     else {
4841         while (off++) {
4842             if (s > lim) {
4843                 s--;
4844                 if (UTF8_IS_CONTINUED(*s)) {
4845                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4846                         s--;
4847                 }
4848                 /* XXX could check well-formedness here */
4849             }
4850         }
4851     }
4852     return s;
4853 }
4854
4855 STATIC U8 *
4856 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4857 {
4858     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4859 }
4860
4861 STATIC U8 *
4862 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4863 {
4864     if (off >= 0) {
4865         while (off-- && s < lim) {
4866             /* XXX could check well-formedness here */
4867             s += UTF8SKIP(s);
4868         }
4869         if (off >= 0)
4870             return 0;
4871     }
4872     else {
4873         while (off++) {
4874             if (s > lim) {
4875                 s--;
4876                 if (UTF8_IS_CONTINUED(*s)) {
4877                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4878                         s--;
4879                 }
4880                 /* XXX could check well-formedness here */
4881             }
4882             else
4883                 break;
4884         }
4885         if (off <= 0)
4886             return 0;
4887     }
4888     return s;
4889 }
4890
4891 static void
4892 restore_pos(pTHX_ void *arg)
4893 {
4894     (void)arg; /* unused */
4895     if (PL_reg_eval_set) {
4896         if (PL_reg_oldsaved) {
4897             PL_reg_re->subbeg = PL_reg_oldsaved;
4898             PL_reg_re->sublen = PL_reg_oldsavedlen;
4899 #ifdef PERL_COPY_ON_WRITE
4900             PL_reg_re->saved_copy = PL_nrs;
4901 #endif
4902             RX_MATCH_COPIED_on(PL_reg_re);
4903         }
4904         PL_reg_magic->mg_len = PL_reg_oldpos;
4905         PL_reg_eval_set = 0;
4906         PL_curpm = PL_reg_oldcurpm;
4907     }   
4908 }
4909
4910 STATIC void
4911 S_to_utf8_substr(pTHX_ register regexp *prog)
4912 {
4913     SV* sv;
4914     if (prog->float_substr && !prog->float_utf8) {
4915         prog->float_utf8 = sv = newSVsv(prog->float_substr);
4916         sv_utf8_upgrade(sv);
4917         if (SvTAIL(prog->float_substr))
4918             SvTAIL_on(sv);
4919         if (prog->float_substr == prog->check_substr)
4920             prog->check_utf8 = sv;
4921     }
4922     if (prog->anchored_substr && !prog->anchored_utf8) {
4923         prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
4924         sv_utf8_upgrade(sv);
4925         if (SvTAIL(prog->anchored_substr))
4926             SvTAIL_on(sv);
4927         if (prog->anchored_substr == prog->check_substr)
4928             prog->check_utf8 = sv;
4929     }
4930 }
4931
4932 STATIC void
4933 S_to_byte_substr(pTHX_ register regexp *prog)
4934 {
4935     SV* sv;
4936     if (prog->float_utf8 && !prog->float_substr) {
4937         prog->float_substr = sv = newSVsv(prog->float_utf8);
4938         if (sv_utf8_downgrade(sv, TRUE)) {
4939             if (SvTAIL(prog->float_utf8))
4940                 SvTAIL_on(sv);
4941         } else {
4942             SvREFCNT_dec(sv);
4943             prog->float_substr = sv = &PL_sv_undef;
4944         }
4945         if (prog->float_utf8 == prog->check_utf8)
4946             prog->check_substr = sv;
4947     }
4948     if (prog->anchored_utf8 && !prog->anchored_substr) {
4949         prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
4950         if (sv_utf8_downgrade(sv, TRUE)) {
4951             if (SvTAIL(prog->anchored_utf8))
4952                 SvTAIL_on(sv);
4953         } else {
4954             SvREFCNT_dec(sv);
4955             prog->anchored_substr = sv = &PL_sv_undef;
4956         }
4957         if (prog->anchored_utf8 == prog->check_utf8)
4958             prog->check_substr = sv;
4959     }
4960 }