regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 /* need to replace pregcomp et al, so enable that */
  34 #  ifndef PERL_IN_XSUB_RE
  35 #    define PERL_IN_XSUB_RE
  36 #  endif
  37 /* need access to debugger hooks */
  38 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
  39 #    define DEBUGGING
  40 #  endif
  41 #endif
  42
  43 #ifdef PERL_IN_XSUB_RE
  44 /* We *really* need to overwrite these symbols: */
  45 #  define Perl_regexec_flags my_regexec
  46 #  define Perl_regdump my_regdump
  47 #  define Perl_regprop my_regprop
  48 #  define Perl_re_intuit_start my_re_intuit_start
  49 /* *These* symbols are masked to allow static link. */
  50 #  define Perl_pregexec my_pregexec
  51 #  define Perl_reginitcolors my_reginitcolors
  52 #  define Perl_regclass_swash my_regclass_swash
  53
  54 #  define PERL_NO_GET_CONTEXT
  55 #endif
  56
  57 /*
  58  * pregcomp and pregexec -- regsub and regerror are not used in perl
  59  *
  60  *      Copyright (c) 1986 by University of Toronto.
  61  *      Written by Henry Spencer.  Not derived from licensed software.
  62  *
  63  *      Permission is granted to anyone to use this software for any
  64  *      purpose on any computer system, and to redistribute it freely,
  65  *      subject to the following restrictions:
  66  *
  67  *      1. The author is not responsible for the consequences of use of
  68  *              this software, no matter how awful, even if they arise
  69  *              from defects in it.
  70  *
  71  *      2. The origin of this software must not be misrepresented, either
  72  *              by explicit claim or by omission.
  73  *
  74  *      3. Altered versions must be plainly marked as such, and must not
  75  *              be misrepresented as being the original software.
  76  *
  77  ****    Alterations to Henry's code are...
  78  ****
  79  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  80  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
  81  ****
  82  ****    You may distribute under the terms of either the GNU General Public
  83  ****    License or the Artistic License, as specified in the README file.
  84  *
  85  * Beware that some of this code is subtly aware of the way operator
  86  * precedence is structured in regular expressions.  Serious changes in
  87  * regular-expression syntax might require a total rethink.
  88  */
  89 #include "EXTERN.h"
  90 #define PERL_IN_REGEXEC_C
  91 #include "perl.h"
  92
  93 #include "regcomp.h"
  94
  95 #define RF_tainted      1               /* tainted information used? */
  96 #define RF_warned       2               /* warned about big count? */
  97 #define RF_evaled       4               /* Did an EVAL with setting? */
  98 #define RF_utf8         8               /* String contains multibyte chars? */
  99
 100 #define UTF ((PL_reg_flags & RF_utf8) != 0)
 101
 102 #define RS_init         1               /* eval environment created */
 103 #define RS_set          2               /* replsv value is set */
 104
 105 #ifndef STATIC
 106 #define STATIC  static
 107 #endif
 108
 109 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
 110
 111 /*
 112  * Forwards.
 113  */
 114
 115 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 116 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 117
 118 #define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
 119             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 120             : (U8*)(pos + off)))
 121 #define HOPBACKc(pos, off) ((char*)     \
 122     ((PL_reg_match_utf8)                \
 123         ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 124     : (pos - off >= PL_bostr)           \
 125         ? (U8*)(pos - off)              \
 126     : (U8*)NULL)                        \
 127 )
 128
 129 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
 130 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 131
 132 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 133     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((U8*)str); assert(ok); LEAVE; } } STMT_END
 134 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 135 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 136 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 137 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 138
 139 /* for use after a quantifier and before an EXACT-like node -- japhy */
 140 #define JUMPABLE(rn) ( \
 141     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
 142     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 143     OP(rn) == PLUS || OP(rn) == MINMOD || \
 144     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
 145 )
 146
 147 #define HAS_TEXT(rn) ( \
 148     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
 149 )
 150
 151 /*
 152   Search for mandatory following text node; for lookahead, the text must
 153   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 154 */
 155 #define FIND_NEXT_IMPT(rn) STMT_START { \
 156     while (JUMPABLE(rn)) \
 157         if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
 158             rn = NEXTOPER(NEXTOPER(rn)); \
 159         else if (OP(rn) == PLUS) \
 160             rn = NEXTOPER(rn); \
 161         else if (OP(rn) == IFMATCH) \
 162             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 163         else rn += NEXT_OFF(rn); \
 164 } STMT_END
 165
 166 static void restore_pos(pTHX_ void *arg);
 167
 168 STATIC CHECKPOINT
 169 S_regcppush(pTHX_ I32 parenfloor)
 170 {
 171     const int retval = PL_savestack_ix;
 172 #define REGCP_PAREN_ELEMS 4
 173     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 174     int p;
 175
 176     if (paren_elems_to_push < 0)
 177         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 178
 179 #define REGCP_OTHER_ELEMS 6
 180     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 181     for (p = PL_regsize; p > parenfloor; p--) {
 182 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 183         SSPUSHINT(PL_regendp[p]);
 184         SSPUSHINT(PL_regstartp[p]);
 185         SSPUSHPTR(PL_reg_start_tmp[p]);
 186         SSPUSHINT(p);
 187     }
 188 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 189     SSPUSHINT(PL_regsize);
 190     SSPUSHINT(*PL_reglastparen);
 191     SSPUSHINT(*PL_reglastcloseparen);
 192     SSPUSHPTR(PL_reginput);
 193 #define REGCP_FRAME_ELEMS 2
 194 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 195  * are needed for the regexp context stack bookkeeping. */
 196     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 197     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 198
 199     return retval;
 200 }
 201
 202 /* These are needed since we do not localize EVAL nodes: */
 203 #  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,          \
 204                              "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
 205                              (IV)PL_savestack_ix)); cp = PL_savestack_ix
 206
 207 #  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?             \
 208                                 PerlIO_printf(Perl_debug_log,           \
 209                                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 210                                 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
 211
 212 STATIC char *
 213 S_regcppop(pTHX)
 214 {
 215     I32 i;
 216     char *input;
 217
 218     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 219     i = SSPOPINT;
 220     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 221     i = SSPOPINT; /* Parentheses elements to pop. */
 222     input = (char *) SSPOPPTR;
 223     *PL_reglastcloseparen = SSPOPINT;
 224     *PL_reglastparen = SSPOPINT;
 225     PL_regsize = SSPOPINT;
 226
 227     /* Now restore the parentheses context. */
 228     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 229          i > 0; i -= REGCP_PAREN_ELEMS) {
 230         I32 tmps;
 231         U32 paren = (U32)SSPOPINT;
 232         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 233         PL_regstartp[paren] = SSPOPINT;
 234         tmps = SSPOPINT;
 235         if (paren <= *PL_reglastparen)
 236             PL_regendp[paren] = tmps;
 237         DEBUG_r(
 238             PerlIO_printf(Perl_debug_log,
 239                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 240                           (UV)paren, (IV)PL_regstartp[paren],
 241                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 242                           (IV)PL_regendp[paren],
 243                           (paren > *PL_reglastparen ? "(no)" : ""));
 244         );
 245     }
 246     DEBUG_r(
 247         if (*PL_reglastparen + 1 <= PL_regnpar) {
 248             PerlIO_printf(Perl_debug_log,
 249                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 250                           (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
 251         }
 252     );
 253 #if 1
 254     /* It would seem that the similar code in regtry()
 255      * already takes care of this, and in fact it is in
 256      * a better location to since this code can #if 0-ed out
 257      * but the code in regtry() is needed or otherwise tests
 258      * requiring null fields (pat.t#187 and split.t#{13,14}
 259      * (as of patchlevel 7877)  will fail.  Then again,
 260      * this code seems to be necessary or otherwise
 261      * building DynaLoader will fail:
 262      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
 263      * --jhi */
 264     for (i = *PL_reglastparen + 1; (U32)i <= PL_regnpar; i++) {
 265         if (i > PL_regsize)
 266             PL_regstartp[i] = -1;
 267         PL_regendp[i] = -1;
 268     }
 269 #endif
 270     return input;
 271 }
 272
 273 typedef struct re_cc_state
 274 {
 275     I32 ss;
 276     regnode *node;
 277     struct re_cc_state *prev;
 278     CURCUR *cc;
 279     regexp *re;
 280 } re_cc_state;
 281
 282 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 283
 284 #define TRYPAREN(paren, n, input) {                             \
 285     if (paren) {                                                \
 286         if (n) {                                                \
 287             PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;   \
 288             PL_regendp[paren] = input - PL_bostr;               \
 289         }                                                       \
 290         else                                                    \
 291             PL_regendp[paren] = -1;                             \
 292     }                                                           \
 293     if (regmatch(next))                                         \
 294         sayYES;                                                 \
 295     if (paren && n)                                             \
 296         PL_regendp[paren] = -1;                                 \
 297 }
 298
 299
 300 /*
 301  * pregexec and friends
 302  */
 303
 304 /*
 305  - pregexec - match a regexp against a string
 306  */
 307 I32
 308 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
 309          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 310 /* strend: pointer to null at end of string */
 311 /* strbeg: real beginning of string */
 312 /* minend: end of match must be >=minend after stringarg. */
 313 /* nosave: For optimizations. */
 314 {
 315     return
 316         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 317                       nosave ? 0 : REXEC_COPY_STR);
 318 }
 319
 320 STATIC void
 321 S_cache_re(pTHX_ regexp *prog)
 322 {
 323     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
 324 #ifdef DEBUGGING
 325     PL_regprogram = prog->program;
 326 #endif
 327     PL_regnpar = prog->nparens;
 328     PL_regdata = prog->data;
 329     PL_reg_re = prog;
 330 }
 331
 332 /*
 333  * Need to implement the following flags for reg_anch:
 334  *
 335  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 336  * USE_INTUIT_ML
 337  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 338  * INTUIT_AUTORITATIVE_ML
 339  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 340  * INTUIT_ONCE_ML
 341  *
 342  * Another flag for this function: SECOND_TIME (so that float substrs
 343  * with giant delta may be not rechecked).
 344  */
 345
 346 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 347
 348 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 349    Otherwise, only SvCUR(sv) is used to get strbeg. */
 350
 351 /* XXXX We assume that strpos is strbeg unless sv. */
 352
 353 /* XXXX Some places assume that there is a fixed substring.
 354         An update may be needed if optimizer marks as "INTUITable"
 355         RExen without fixed substrings.  Similarly, it is assumed that
 356         lengths of all the strings are no more than minlen, thus they
 357         cannot come from lookahead.
 358         (Or minlen should take into account lookahead.) */
 359
 360 /* A failure to find a constant substring means that there is no need to make
 361    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 362    finding a substring too deep into the string means that less calls to
 363    regtry() should be needed.
 364
 365    REx compiler's optimizer found 4 possible hints:
 366         a) Anchored substring;
 367         b) Fixed substring;
 368         c) Whether we are anchored (beginning-of-line or \G);
 369         d) First node (of those at offset 0) which may distingush positions;
 370    We use a)b)d) and multiline-part of c), and try to find a position in the
 371    string which does not contradict any of them.
 372  */
 373
 374 /* Most of decisions we do here should have been done at compile time.
 375    The nodes of the REx which we used for the search should have been
 376    deleted from the finite automaton. */
 377
 378 char *
 379 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 380                      char *strend, U32 flags, re_scream_pos_data *data)
 381 {
 382     register I32 start_shift = 0;
 383     /* Should be nonnegative! */
 384     register I32 end_shift   = 0;
 385     register char *s;
 386     register SV *check;
 387     char *strbeg;
 388     char *t;
 389     const int do_utf8 = sv ? SvUTF8(sv) : 0;    /* if no sv we have to assume bytes */
 390     I32 ml_anch;
 391     register char *other_last = NULL;   /* other substr checked before this */
 392     char *check_at = NULL;              /* check substr found at this pos */
 393     const I32 multiline = PL_multiline | (prog->reganch & PMf_MULTILINE);
 394 #ifdef DEBUGGING
 395     const char * const i_strpos = strpos;
 396     SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
 397 #endif
 398     RX_MATCH_UTF8_set(prog,do_utf8);
 399
 400     if (prog->reganch & ROPT_UTF8) {
 401         DEBUG_r(PerlIO_printf(Perl_debug_log,
 402                               "UTF-8 regex...\n"));
 403         PL_reg_flags |= RF_utf8;
 404     }
 405
 406     DEBUG_r({
 407          const char *s   = PL_reg_match_utf8 ?
 408                          sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
 409                          strpos;
 410          const int   len = PL_reg_match_utf8 ?
 411                          (int)strlen(s) : strend - strpos;
 412          if (!PL_colorset)
 413               reginitcolors();
 414          if (PL_reg_match_utf8)
 415              DEBUG_r(PerlIO_printf(Perl_debug_log,
 416                                    "UTF-8 target...\n"));
 417          PerlIO_printf(Perl_debug_log,
 418                        "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
 419                        PL_colors[4],PL_colors[5],PL_colors[0],
 420                        prog->precomp,
 421                        PL_colors[1],
 422                        (strlen(prog->precomp) > 60 ? "..." : ""),
 423                        PL_colors[0],
 424                        (int)(len > 60 ? 60 : len),
 425                        s, PL_colors[1],
 426                        (len > 60 ? "..." : "")
 427               );
 428     });
 429
 430     /* CHR_DIST() would be more correct here but it makes things slow. */
 431     if (prog->minlen > strend - strpos) {
 432         DEBUG_r(PerlIO_printf(Perl_debug_log,
 433                               "String too short... [re_intuit_start]\n"));
 434         goto fail;
 435     }
 436     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 437     PL_regeol = strend;
 438     if (do_utf8) {
 439         if (!prog->check_utf8 && prog->check_substr)
 440             to_utf8_substr(prog);
 441         check = prog->check_utf8;
 442     } else {
 443         if (!prog->check_substr && prog->check_utf8)
 444             to_byte_substr(prog);
 445         check = prog->check_substr;
 446     }
 447    if (check == &PL_sv_undef) {
 448         DEBUG_r(PerlIO_printf(Perl_debug_log,
 449                 "Non-utf string cannot match utf check string\n"));
 450         goto fail;
 451     }
 452     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
 453         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
 454                      || ( (prog->reganch & ROPT_ANCH_BOL)
 455                           && !multiline ) );    /* Check after \n? */
 456
 457         if (!ml_anch) {
 458           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
 459                                   | ROPT_IMPLICIT)) /* not a real BOL */
 460                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 461                && sv && !SvROK(sv)
 462                && (strpos != strbeg)) {
 463               DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 464               goto fail;
 465           }
 466           if (prog->check_offset_min == prog->check_offset_max &&
 467               !(prog->reganch & ROPT_CANY_SEEN)) {
 468             /* Substring at constant offset from beg-of-str... */
 469             I32 slen;
 470
 471             s = HOP3c(strpos, prog->check_offset_min, strend);
 472             if (SvTAIL(check)) {
 473                 slen = SvCUR(check);    /* >= 1 */
 474
 475                 if ( strend - s > slen || strend - s < slen - 1
 476                      || (strend - s == slen && strend[-1] != '\n')) {
 477                     DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 478                     goto fail_finish;
 479                 }
 480                 /* Now should match s[0..slen-2] */
 481                 slen--;
 482                 if (slen && (*SvPVX_const(check) != *s
 483                              || (slen > 1
 484                                  && memNE(SvPVX_const(check), s, slen)))) {
 485                   report_neq:
 486                     DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 487                     goto fail_finish;
 488                 }
 489             }
 490             else if (*SvPVX_const(check) != *s
 491                      || ((slen = SvCUR(check)) > 1
 492                          && memNE(SvPVX_const(check), s, slen)))
 493                 goto report_neq;
 494             check_at = s;
 495             goto success_at_start;
 496           }
 497         }
 498         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 499         s = strpos;
 500         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 501         end_shift = prog->minlen - start_shift -
 502             CHR_SVLEN(check) + (SvTAIL(check) != 0);
 503         if (!ml_anch) {
 504             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 505                                          - (SvTAIL(check) != 0);
 506             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 507
 508             if (end_shift < eshift)
 509                 end_shift = eshift;
 510         }
 511     }
 512     else {                              /* Can match at random position */
 513         ml_anch = 0;
 514         s = strpos;
 515         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 516         /* Should be nonnegative! */
 517         end_shift = prog->minlen - start_shift -
 518             CHR_SVLEN(check) + (SvTAIL(check) != 0);
 519     }
 520
 521 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
 522     if (end_shift < 0)
 523         Perl_croak(aTHX_ "panic: end_shift");
 524 #endif
 525
 526   restart:
 527     /* Find a possible match in the region s..strend by looking for
 528        the "check" substring in the region corrected by start/end_shift. */
 529     if (flags & REXEC_SCREAM) {
 530         I32 p = -1;                     /* Internal iterator of scream. */
 531         I32 * const pp = data ? data->scream_pos : &p;
 532
 533         if (PL_screamfirst[BmRARE(check)] >= 0
 534             || ( BmRARE(check) == '\n'
 535                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 536                  && SvTAIL(check) ))
 537             s = screaminstr(sv, check,
 538                             start_shift + (s - strbeg), end_shift, pp, 0);
 539         else
 540             goto fail_finish;
 541         /* we may be pointing at the wrong string */
 542         if (s && RX_MATCH_COPIED(prog))
 543             s = strbeg + (s - SvPVX_const(sv));
 544         if (data)
 545             *data->scream_olds = s;
 546     }
 547     else if (prog->reganch & ROPT_CANY_SEEN)
 548         s = fbm_instr((U8*)(s + start_shift),
 549                       (U8*)(strend - end_shift),
 550                       check, multiline ? FBMrf_MULTILINE : 0);
 551     else
 552         s = fbm_instr(HOP3(s, start_shift, strend),
 553                       HOP3(strend, -end_shift, strbeg),
 554                       check, multiline ? FBMrf_MULTILINE : 0);
 555
 556     /* Update the count-of-usability, remove useless subpatterns,
 557         unshift s.  */
 558
 559         /* FIXME - DEBUG_EXECUTE_r if that is merged to maint.  */
 560     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
 561                           (s ? "Found" : "Did not find"),
 562                           (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
 563                           PL_colors[0],
 564                           (int)(SvCUR(check) - (SvTAIL(check)!=0)),
 565                           SvPVX_const(check),
 566                           PL_colors[1], (SvTAIL(check) ? "$" : ""),
 567                           (s ? " at offset " : "...\n") ) );
 568
 569     if (!s)
 570         goto fail_finish;
 571
 572     check_at = s;
 573
 574     /* Finish the diagnostic message */
 575     DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 576
 577     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 578        Start with the other substr.
 579        XXXX no SCREAM optimization yet - and a very coarse implementation
 580        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 581                 *always* match.  Probably should be marked during compile...
 582        Probably it is right to do no SCREAM here...
 583      */
 584
 585     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
 586         /* Take into account the "other" substring. */
 587         /* XXXX May be hopelessly wrong for UTF... */
 588         if (!other_last)
 589             other_last = strpos;
 590         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 591           do_other_anchored:
 592             {
 593                 char * const last = HOP3c(s, -start_shift, strbeg);
 594                 char *last1, *last2;
 595                 char *s1 = s;
 596                 SV* must;
 597
 598                 t = s - prog->check_offset_max;
 599                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 600                     && (!do_utf8
 601                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 602                             && t > strpos)))
 603                     NOOP;
 604                 else
 605                     t = strpos;
 606                 t = HOP3c(t, prog->anchored_offset, strend);
 607                 if (t < other_last)     /* These positions already checked */
 608                     t = other_last;
 609                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 610                 if (last < last1)
 611                     last1 = last;
 612  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 613                 /* On end-of-str: see comment below. */
 614                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 615                 if (must == &PL_sv_undef) {
 616                     s = (char*)NULL;
 617                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 618                 }
 619                 else
 620                     s = fbm_instr(
 621                         (unsigned char*)t,
 622                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 623                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 624                         must,
 625                         multiline ? FBMrf_MULTILINE : 0
 626                     );
 627                 DEBUG_r(PerlIO_printf(Perl_debug_log,
 628                         "%s anchored substr \"%s%.*s%s\"%s",
 629                         (s ? "Found" : "Contradicts"),
 630                         PL_colors[0],
 631                           (int)(SvCUR(must)
 632                           - (SvTAIL(must)!=0)),
 633                           SvPVX_const(must),
 634                           PL_colors[1], (SvTAIL(must) ? "$" : "")));
 635                 if (!s) {
 636                     if (last1 >= last2) {
 637                         DEBUG_r(PerlIO_printf(Perl_debug_log,
 638                                                 ", giving up...\n"));
 639                         goto fail_finish;
 640                     }
 641                     DEBUG_r(PerlIO_printf(Perl_debug_log,
 642                         ", trying floating at offset %ld...\n",
 643                         (long)(HOP3c(s1, 1, strend) - i_strpos)));
 644                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 645                     s = HOP3c(last, 1, strend);
 646                     goto restart;
 647                 }
 648                 else {
 649                     DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 650                           (long)(s - i_strpos)));
 651                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 652                     other_last = HOP3c(s, 1, strend);
 653                     s = s1;
 654                     if (t == strpos)
 655                         goto try_at_start;
 656                     goto try_at_offset;
 657                 }
 658             }
 659         }
 660         else {          /* Take into account the floating substring. */
 661             char *last, *last1;
 662             char *s1 = s;
 663             SV* must;
 664
 665             t = HOP3c(s, -start_shift, strbeg);
 666             last1 = last =
 667                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 668             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 669                 last = HOP3c(t, prog->float_max_offset, strend);
 670             s = HOP3c(t, prog->float_min_offset, strend);
 671             if (s < other_last)
 672                 s = other_last;
 673  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 674             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 675             /* fbm_instr() takes into account exact value of end-of-str
 676                if the check is SvTAIL(ed).  Since false positives are OK,
 677                and end-of-str is not later than strend we are OK. */
 678             if (must == &PL_sv_undef) {
 679                 s = (char*)NULL;
 680                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 681             }
 682             else
 683                 s = fbm_instr((unsigned char*)s,
 684                               (unsigned char*)last + SvCUR(must)
 685                                   - (SvTAIL(must)!=0),
 686                               must, multiline ? FBMrf_MULTILINE : 0);
 687             /* FIXME - DEBUG_EXECUTE_r if that is merged to maint  */
 688             DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
 689                     (s ? "Found" : "Contradicts"),
 690                     PL_colors[0],
 691                       (int)(SvCUR(must) - (SvTAIL(must)!=0)),
 692                       SvPVX_const(must),
 693                       PL_colors[1], (SvTAIL(must) ? "$" : "")));
 694             if (!s) {
 695                 if (last1 == last) {
 696                     DEBUG_r(PerlIO_printf(Perl_debug_log,
 697                                             ", giving up...\n"));
 698                     goto fail_finish;
 699                 }
 700                 DEBUG_r(PerlIO_printf(Perl_debug_log,
 701                     ", trying anchored starting at offset %ld...\n",
 702                     (long)(s1 + 1 - i_strpos)));
 703                 other_last = last;
 704                 s = HOP3c(t, 1, strend);
 705                 goto restart;
 706             }
 707             else {
 708                 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 709                       (long)(s - i_strpos)));
 710                 other_last = s; /* Fix this later. --Hugo */
 711                 s = s1;
 712                 if (t == strpos)
 713                     goto try_at_start;
 714                 goto try_at_offset;
 715             }
 716         }
 717     }
 718
 719     t = s - prog->check_offset_max;
 720     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 721         && (!do_utf8
 722             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
 723                  && t > strpos))) {
 724         /* Fixed substring is found far enough so that the match
 725            cannot start at strpos. */
 726       try_at_offset:
 727         if (ml_anch && t[-1] != '\n') {
 728             /* Eventually fbm_*() should handle this, but often
 729                anchored_offset is not 0, so this check will not be wasted. */
 730             /* XXXX In the code below we prefer to look for "^" even in
 731                presence of anchored substrings.  And we search even
 732                beyond the found float position.  These pessimizations
 733                are historical artefacts only.  */
 734           find_anchor:
 735             while (t < strend - prog->minlen) {
 736                 if (*t == '\n') {
 737                     if (t < check_at - prog->check_offset_min) {
 738                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 739                             /* Since we moved from the found position,
 740                                we definitely contradict the found anchored
 741                                substr.  Due to the above check we do not
 742                                contradict "check" substr.
 743                                Thus we can arrive here only if check substr
 744                                is float.  Redo checking for "other"=="fixed".
 745                              */
 746                             strpos = t + 1;
 747                             DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 748                                 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 749                             goto do_other_anchored;
 750                         }
 751                         /* We don't contradict the found floating substring. */
 752                         /* XXXX Why not check for STCLASS? */
 753                         s = t + 1;
 754                         DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 755                             PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
 756                         goto set_useful;
 757                     }
 758                     /* Position contradicts check-string */
 759                     /* XXXX probably better to look for check-string
 760                        than for "\n", so one should lower the limit for t? */
 761                     DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 762                         PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
 763                     other_last = strpos = s = t + 1;
 764                     goto restart;
 765                 }
 766                 t++;
 767             }
 768             DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 769                         PL_colors[0],PL_colors[1]));
 770             goto fail_finish;
 771         }
 772         else {
 773             DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 774                         PL_colors[0],PL_colors[1]));
 775         }
 776         s = t;
 777       set_useful:
 778         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 779     }
 780     else {
 781         /* The found string does not prohibit matching at strpos,
 782            - no optimization of calling REx engine can be performed,
 783            unless it was an MBOL and we are not after MBOL,
 784            or a future STCLASS check will fail this. */
 785       try_at_start:
 786         /* Even in this situation we may use MBOL flag if strpos is offset
 787            wrt the start of the string. */
 788         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 789             && (strpos != strbeg) && strpos[-1] != '\n'
 790             /* May be due to an implicit anchor of m{.*foo}  */
 791             && !(prog->reganch & ROPT_IMPLICIT))
 792         {
 793             t = strpos;
 794             goto find_anchor;
 795         }
 796         DEBUG_r( if (ml_anch)
 797             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 798                         (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
 799         );
 800       success_at_start:
 801         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
 802             && (do_utf8 ? (
 803                 prog->check_utf8                /* Could be deleted already */
 804                 && --BmUSEFUL(prog->check_utf8) < 0
 805                 && (prog->check_utf8 == prog->float_utf8)
 806             ) : (
 807                 prog->check_substr              /* Could be deleted already */
 808                 && --BmUSEFUL(prog->check_substr) < 0
 809                 && (prog->check_substr == prog->float_substr)
 810             )))
 811         {
 812             /* If flags & SOMETHING - do not do it many times on the same match */
 813             DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 814             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 815             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 816                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 817             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 818             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 819             check = NULL;                       /* abort */
 820             s = strpos;
 821             /* XXXX This is a remnant of the old implementation.  It
 822                     looks wasteful, since now INTUIT can use many
 823                     other heuristics. */
 824             prog->reganch &= ~RE_USE_INTUIT;
 825         }
 826         else
 827             s = strpos;
 828     }
 829
 830     /* Last resort... */
 831     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 832     if (prog->regstclass) {
 833         /* minlen == 0 is possible if regstclass is \b or \B,
 834            and the fixed substr is ''$.
 835            Since minlen is already taken into account, s+1 is before strend;
 836            accidentally, minlen >= 1 guaranties no false positives at s + 1
 837            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 838            regstclass does not come from lookahead...  */
 839         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 840            This leaves EXACTF only, which is dealt with in find_byclass().  */
 841         const U8* const str = (U8*)STRING(prog->regstclass);
 842         const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
 843                           ? CHR_DIST((U8 *)str+STR_LEN(prog->regstclass),
 844                                      (U8 *)str)
 845                     : 1);
 846         const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 847                 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
 848                 : (prog->float_substr || prog->float_utf8
 849                    ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
 850                            cl_l, strend)
 851                    : strend);
 852
 853         t = s;
 854         cache_re(prog);
 855         s = find_byclass(prog, prog->regstclass, s, endpos, 1);
 856         if (!s) {
 857 #ifdef DEBUGGING
 858             const char *what = NULL;
 859 #endif
 860             if (endpos == strend) {
 861                 DEBUG_r( PerlIO_printf(Perl_debug_log,
 862                                 "Could not match STCLASS...\n") );
 863                 goto fail;
 864             }
 865             DEBUG_r( PerlIO_printf(Perl_debug_log,
 866                                    "This position contradicts STCLASS...\n") );
 867             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
 868                 goto fail;
 869             /* Contradict one of substrings */
 870             if (prog->anchored_substr || prog->anchored_utf8) {
 871                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 872                     DEBUG_r( what = "anchored" );
 873                   hop_and_restart:
 874                     s = HOP3c(t, 1, strend);
 875                     if (s + start_shift + end_shift > strend) {
 876                         /* XXXX Should be taken into account earlier? */
 877                         DEBUG_r( PerlIO_printf(Perl_debug_log,
 878                                                "Could not match STCLASS...\n") );
 879                         goto fail;
 880                     }
 881                     if (!check)
 882                         goto giveup;
 883                     DEBUG_r( PerlIO_printf(Perl_debug_log,
 884                                 "Looking for %s substr starting at offset %ld...\n",
 885                                  what, (long)(s + start_shift - i_strpos)) );
 886                     goto restart;
 887                 }
 888                 /* Have both, check_string is floating */
 889                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 890                     goto retry_floating_check;
 891                 /* Recheck anchored substring, but not floating... */
 892                 s = check_at;
 893                 if (!check)
 894                     goto giveup;
 895                 DEBUG_r( PerlIO_printf(Perl_debug_log,
 896                           "Looking for anchored substr starting at offset %ld...\n",
 897                           (long)(other_last - i_strpos)) );
 898                 goto do_other_anchored;
 899             }
 900             /* Another way we could have checked stclass at the
 901                current position only: */
 902             if (ml_anch) {
 903                 s = t = t + 1;
 904                 if (!check)
 905                     goto giveup;
 906                 DEBUG_r( PerlIO_printf(Perl_debug_log,
 907                           "Looking for /%s^%s/m starting at offset %ld...\n",
 908                           PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
 909                 goto try_at_offset;
 910             }
 911             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 912                 goto fail;
 913             /* Check is floating subtring. */
 914           retry_floating_check:
 915             t = check_at - start_shift;
 916             DEBUG_r( what = "floating" );
 917             goto hop_and_restart;
 918         }
 919         if (t != s) {
 920             DEBUG_r(PerlIO_printf(Perl_debug_log,
 921                         "By STCLASS: moving %ld --> %ld\n",
 922                                   (long)(t - i_strpos), (long)(s - i_strpos))
 923                    );
 924         }
 925         else {
 926             DEBUG_r(PerlIO_printf(Perl_debug_log,
 927                                   "Does not contradict STCLASS...\n");
 928                    );
 929         }
 930     }
 931   giveup:
 932     DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 933                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 934                           PL_colors[5], (long)(s - i_strpos)) );
 935     return s;
 936
 937   fail_finish:                          /* Substring not found */
 938     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 939         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 940   fail:
 941     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 942                           PL_colors[4],PL_colors[5]));
 943     return NULL;
 944 }
 945
 946 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
 947     if ( (CoNd)                                        \
 948          && (ln == len ||                              \
 949              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
 950                         m, NULL, ln, (bool)UTF))       \
 951          && (norun || regtry(prog, s)) )               \
 952         goto got_it;                                   \
 953     else {                                             \
 954          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
 955          uvchr_to_utf8(tmpbuf, c);                     \
 956          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
 957          if ( f != c                                   \
 958               && (f == c1 || f == c2)                  \
 959               && (ln == foldlen ||                     \
 960                   !ibcmp_utf8((char *) foldbuf,        \
 961                               NULL, foldlen, do_utf8,  \
 962                               m,                       \
 963                               NULL, ln, (bool)UTF))    \
 964               && (norun || regtry(prog, s)) )          \
 965               goto got_it;                             \
 966     }                                                  \
 967     s += len
 968
 969 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
 970 STMT_START {                                              \
 971     while (s <= e) {                                      \
 972         if ( (CoNd)                                       \
 973              && (ln == 1 || !(OP(c) == EXACTF             \
 974                               ? ibcmp(s, m, ln)           \
 975                               : ibcmp_locale(s, m, ln)))  \
 976              && (norun || regtry(prog, s)) )              \
 977             goto got_it;                                  \
 978         s++;                                              \
 979     }                                                     \
 980 } STMT_END
 981
 982 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
 983 STMT_START {                                          \
 984     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
 985         CoDe                                          \
 986         s += uskip;                                   \
 987     }                                                 \
 988 } STMT_END
 989
 990 #define REXEC_FBC_SCAN(CoDe)                          \
 991 STMT_START {                                          \
 992     while (s < strend) {                              \
 993         CoDe                                          \
 994         s++;                                          \
 995     }                                                 \
 996 } STMT_END
 997
 998 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
 999 REXEC_FBC_UTF8_SCAN(                                  \
1000     if (CoNd) {                                       \
1001         if (tmp && (norun || regtry(prog, s)))        \
1002             goto got_it;                              \
1003         else                                          \
1004             tmp = doevery;                            \
1005     }                                                 \
1006     else                                              \
1007         tmp = 1;                                      \
1008 )
1009
1010 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1011 REXEC_FBC_SCAN(                                       \
1012     if (CoNd) {                                       \
1013         if (tmp && (norun || regtry(prog, s)))        \
1014             goto got_it;                              \
1015         else                                          \
1016             tmp = doevery;                            \
1017     }                                                 \
1018     else                                              \
1019         tmp = 1;                                      \
1020 )
1021
1022 #define REXEC_FBC_TRYIT               \
1023 if ((norun || regtry(prog, s)))       \
1024     goto got_it
1025
1026 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1027     if (do_utf8) {                                             \
1028         UtFpReLoAd;                                            \
1029         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1030     }                                                          \
1031     else {                                                     \
1032         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1033     }                                                          \
1034     break
1035
1036 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1037     PL_reg_flags |= RF_tainted;                                \
1038     if (do_utf8) {                                             \
1039         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1040     }                                                          \
1041     else {                                                     \
1042         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1043     }                                                          \
1044     break
1045
1046 STATIC char *
1047 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
1048 {
1049         const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
1050         char *m;
1051         STRLEN ln;
1052         STRLEN lnc;
1053         register STRLEN uskip;
1054         unsigned int c1;
1055         unsigned int c2;
1056         char *e;
1057         register I32 tmp = 1;   /* Scratch variable? */
1058         register const bool do_utf8 = PL_reg_match_utf8;
1059
1060         /* We know what class it must start with. */
1061         switch (OP(c)) {
1062         case ANYOF:
1063             if (do_utf8) {
1064                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1065                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1066                           reginclass(c, (U8*)s, 0, do_utf8) :
1067                           REGINCLASS(c, (U8*)s));
1068             }
1069             else {
1070                  while (s < strend) {
1071                       STRLEN skip = 1;
1072
1073                       if (REGINCLASS(c, (U8*)s) ||
1074                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1075                            /* The assignment of 2 is intentional:
1076                             * for the folded sharp s, the skip is 2. */
1077                            (skip = SHARP_S_SKIP))) {
1078                            if (tmp && (norun || regtry(prog, s)))
1079                                 goto got_it;
1080                            else
1081                                 tmp = doevery;
1082                       }
1083                       else
1084                            tmp = 1;
1085                       s += skip;
1086                  }
1087             }
1088             break;
1089         case CANY:
1090             REXEC_FBC_SCAN(
1091                 if (tmp && (norun || regtry(prog, s)))
1092                     goto got_it;
1093                 else
1094                     tmp = doevery;
1095             );
1096             break;
1097         case EXACTF:
1098             m   = STRING(c);
1099             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1100             lnc = (I32) ln;     /* length to match in characters */
1101             if (UTF) {
1102                 STRLEN ulen1, ulen2;
1103                 U8 *sm = (U8 *) m;
1104                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1105                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1106                 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1107
1108                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1109                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1110
1111                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1112                                     0, uniflags);
1113                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1114                                     0, uniflags);
1115                 lnc = 0;
1116                 while (sm < ((U8 *) m + ln)) {
1117                     lnc++;
1118                     sm += UTF8SKIP(sm);
1119                 }
1120             }
1121             else {
1122                 c1 = *(U8*)m;
1123                 c2 = PL_fold[c1];
1124             }
1125             goto do_exactf;
1126         case EXACTFL:
1127             m   = STRING(c);
1128             ln  = STR_LEN(c);
1129             lnc = (I32) ln;
1130             c1 = *(U8*)m;
1131             c2 = PL_fold_locale[c1];
1132           do_exactf:
1133             e = HOP3c(strend, -((I32)lnc), s);
1134
1135             if (norun && e < s)
1136                 e = s;                  /* Due to minlen logic of intuit() */
1137
1138             /* The idea in the EXACTF* cases is to first find the
1139              * first character of the EXACTF* node and then, if
1140              * necessary, case-insensitively compare the full
1141              * text of the node.  The c1 and c2 are the first
1142              * characters (though in Unicode it gets a bit
1143              * more complicated because there are more cases
1144              * than just upper and lower: one needs to use
1145              * the so-called folding case for case-insensitive
1146              * matching (called "loose matching" in Unicode).
1147              * ibcmp_utf8() will do just that. */
1148
1149             if (do_utf8) {
1150                 UV c, f;
1151                 U8 tmpbuf [UTF8_MAXBYTES+1];
1152                 STRLEN len, foldlen;
1153                 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1154                 if (c1 == c2) {
1155                     /* Upper and lower of 1st char are equal -
1156                      * probably not a "letter". */
1157                     while (s <= e) {
1158                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1159                                            uniflags);
1160                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1161                     }
1162                 }
1163                 else {
1164                     while (s <= e) {
1165                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1166                                            uniflags);
1167
1168                         /* Handle some of the three Greek sigmas cases.
1169                          * Note that not all the possible combinations
1170                          * are handled here: some of them are handled
1171                          * by the standard folding rules, and some of
1172                          * them (the character class or ANYOF cases)
1173                          * are handled during compiletime in
1174                          * regexec.c:S_regclass(). */
1175                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1176                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1177                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1178
1179                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1180                     }
1181                 }
1182             }
1183             else {
1184                 if (c1 == c2)
1185                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1186                 else
1187                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1188             }
1189             break;
1190         case BOUNDL:
1191             PL_reg_flags |= RF_tainted;
1192             /* FALL THROUGH */
1193         case BOUND:
1194             if (do_utf8) {
1195                 if (s == PL_bostr)
1196                     tmp = '\n';
1197                 else {
1198                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1199                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1200                 }
1201                 tmp = ((OP(c) == BOUND ?
1202                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1203                 LOAD_UTF8_CHARCLASS_ALNUM();
1204                 REXEC_FBC_UTF8_SCAN(
1205                     if (tmp == !(OP(c) == BOUND ?
1206                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1207                                  isALNUM_LC_utf8((U8*)s)))
1208                     {
1209                         tmp = !tmp;
1210                         REXEC_FBC_TRYIT;
1211                 }
1212                 );
1213             }
1214             else {
1215                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1216                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1217                 REXEC_FBC_SCAN(
1218                     if (tmp ==
1219                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1220                         tmp = !tmp;
1221                         REXEC_FBC_TRYIT;
1222                 }
1223                 );
1224             }
1225             if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1226                 goto got_it;
1227             break;
1228         case NBOUNDL:
1229             PL_reg_flags |= RF_tainted;
1230             /* FALL THROUGH */
1231         case NBOUND:
1232             if (do_utf8) {
1233                 if (s == PL_bostr)
1234                     tmp = '\n';
1235                 else {
1236                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1237                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1238                 }
1239                 tmp = ((OP(c) == NBOUND ?
1240                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1241                 LOAD_UTF8_CHARCLASS_ALNUM();
1242                 REXEC_FBC_UTF8_SCAN(
1243                     if (tmp == !(OP(c) == NBOUND ?
1244                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1245                                  isALNUM_LC_utf8((U8*)s)))
1246                         tmp = !tmp;
1247                     else REXEC_FBC_TRYIT;
1248                 );
1249             }
1250             else {
1251                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1252                 tmp = ((OP(c) == NBOUND ?
1253                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1254                 REXEC_FBC_SCAN(
1255                     if (tmp ==
1256                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1257                         tmp = !tmp;
1258                     else REXEC_FBC_TRYIT;
1259                 );
1260             }
1261             if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1262                 goto got_it;
1263             break;
1264         case ALNUM:
1265             REXEC_FBC_CSCAN_PRELOAD(
1266                 LOAD_UTF8_CHARCLASS_ALNUM(),
1267                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1268                 isALNUM(*s)
1269             );
1270         case ALNUML:
1271             REXEC_FBC_CSCAN_TAINT(
1272                 isALNUM_LC_utf8((U8*)s),
1273                 isALNUM_LC(*s)
1274             );
1275         case NALNUM:
1276             REXEC_FBC_CSCAN_PRELOAD(
1277                 LOAD_UTF8_CHARCLASS_ALNUM(),
1278                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1279                 !isALNUM(*s)
1280             );
1281         case NALNUML:
1282             REXEC_FBC_CSCAN_TAINT(
1283                 !isALNUM_LC_utf8((U8*)s),
1284                 !isALNUM_LC(*s)
1285             );
1286         case SPACE:
1287             REXEC_FBC_CSCAN_PRELOAD(
1288                 LOAD_UTF8_CHARCLASS_SPACE(),
1289                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1290                 isSPACE(*s)
1291             );
1292         case SPACEL:
1293             REXEC_FBC_CSCAN_TAINT(
1294                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1295                 isSPACE_LC(*s)
1296             );
1297         case NSPACE:
1298             REXEC_FBC_CSCAN_PRELOAD(
1299                 LOAD_UTF8_CHARCLASS_SPACE(),
1300                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1301                 !isSPACE(*s)
1302             );
1303         case NSPACEL:
1304             REXEC_FBC_CSCAN_TAINT(
1305                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1306                 !isSPACE_LC(*s)
1307             );
1308         case DIGIT:
1309             REXEC_FBC_CSCAN_PRELOAD(
1310                 LOAD_UTF8_CHARCLASS_DIGIT(),
1311                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1312                 isDIGIT(*s)
1313             );
1314         case DIGITL:
1315             REXEC_FBC_CSCAN_TAINT(
1316                 isDIGIT_LC_utf8((U8*)s),
1317                 isDIGIT_LC(*s)
1318             );
1319         case NDIGIT:
1320             REXEC_FBC_CSCAN_PRELOAD(
1321                 LOAD_UTF8_CHARCLASS_DIGIT(),
1322                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1323                 !isDIGIT(*s)
1324             );
1325         case NDIGITL:
1326             REXEC_FBC_CSCAN_TAINT(
1327                 !isDIGIT_LC_utf8((U8*)s),
1328                 !isDIGIT_LC(*s)
1329             );
1330         default:
1331             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1332             break;
1333         }
1334         return 0;
1335       got_it:
1336         return s;
1337 }
1338
1339 /*
1340  - regexec_flags - match a regexp against a string
1341  */
1342 I32
1343 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1344               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1345 /* strend: pointer to null at end of string */
1346 /* strbeg: real beginning of string */
1347 /* minend: end of match must be >=minend after stringarg. */
1348 /* data: May be used for some additional optimizations. */
1349 /* nosave: For optimizations. */
1350 {
1351     register char *s;
1352     register regnode *c;
1353     register char *startpos = stringarg;
1354     I32 minlen;         /* must match at least this many chars */
1355     I32 dontbother = 0; /* how many characters not to try at end */
1356     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1357     I32 scream_pos = -1;                /* Internal iterator of scream. */
1358     char *scream_olds = NULL;
1359     SV* oreplsv = GvSV(PL_replgv);
1360     const bool do_utf8 = DO_UTF8(sv);
1361     I32 multiline;
1362 #ifdef DEBUGGING
1363     SV* dsv0;
1364     SV* dsv1;
1365 #endif
1366     PERL_UNUSED_ARG(data);
1367
1368     /* Be paranoid... */
1369     if (prog == NULL || startpos == NULL) {
1370         Perl_croak(aTHX_ "NULL regexp parameter");
1371         return 0;
1372     }
1373
1374     PL_regcc = 0;
1375
1376     cache_re(prog);
1377 #ifdef DEBUGGING
1378     PL_regnarrate = DEBUG_r_TEST;
1379 #endif
1380
1381     RX_MATCH_UTF8_set(prog, do_utf8);
1382
1383     multiline = PL_multiline | (prog->reganch & PMf_MULTILINE);
1384
1385 #ifdef DEBUGGING
1386     dsv0 = PERL_DEBUG_PAD_ZERO(0);
1387     dsv1 = PERL_DEBUG_PAD_ZERO(1);
1388 #endif
1389
1390     minlen = prog->minlen;
1391     if (strend - startpos < minlen) {
1392         DEBUG_r(PerlIO_printf(Perl_debug_log,
1393                               "String too short [regexec_flags]...\n"));
1394         goto phooey;
1395     }
1396
1397     /* Check validity of program. */
1398     if (UCHARAT(prog->program) != REG_MAGIC) {
1399         Perl_croak(aTHX_ "corrupted regexp program");
1400     }
1401
1402     PL_reg_flags = 0;
1403     PL_reg_eval_set = 0;
1404     PL_reg_maxiter = 0;
1405
1406     if (prog->reganch & ROPT_UTF8)
1407         PL_reg_flags |= RF_utf8;
1408
1409     /* Mark beginning of line for ^ and lookbehind. */
1410     PL_regbol = startpos;
1411     PL_bostr  = strbeg;
1412     PL_reg_sv = sv;
1413
1414     /* Mark end of line for $ (and such) */
1415     PL_regeol = strend;
1416
1417     /* see how far we have to get to not match where we matched before */
1418     PL_regtill = startpos+minend;
1419
1420     /* We start without call_cc context.  */
1421     PL_reg_call_cc = 0;
1422
1423     /* If there is a "must appear" string, look for it. */
1424     s = startpos;
1425
1426     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1427         MAGIC *mg;
1428
1429         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1430             PL_reg_ganch = startpos;
1431         else if (sv && SvTYPE(sv) >= SVt_PVMG
1432                   && SvMAGIC(sv)
1433                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1434                   && mg->mg_len >= 0) {
1435             PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1436             if (prog->reganch & ROPT_ANCH_GPOS) {
1437                 if (s > PL_reg_ganch)
1438                     goto phooey;
1439                 s = PL_reg_ganch;
1440             }
1441         }
1442         else                            /* pos() not defined */
1443             PL_reg_ganch = strbeg;
1444     }
1445
1446     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1447         re_scream_pos_data d;
1448
1449         d.scream_olds = &scream_olds;
1450         d.scream_pos = &scream_pos;
1451         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1452         if (!s) {
1453             DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1454             goto phooey;        /* not present */
1455         }
1456     }
1457
1458     DEBUG_r({
1459         const char * const s0   = UTF
1460             ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1461                           UNI_DISPLAY_REGEX)
1462             : prog->precomp;
1463         const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
1464         const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1465                                                UNI_DISPLAY_REGEX) : startpos;
1466         const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
1467          if (!PL_colorset)
1468              reginitcolors();
1469          PerlIO_printf(Perl_debug_log,
1470                        "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
1471                        PL_colors[4],PL_colors[5],PL_colors[0],
1472                        len0, len0, s0,
1473                        PL_colors[1],
1474                        len0 > 60 ? "..." : "",
1475                        PL_colors[0],
1476                        (int)(len1 > 60 ? 60 : len1),
1477                        s1, PL_colors[1],
1478                        (len1 > 60 ? "..." : "")
1479               );
1480     });
1481
1482     /* Simplest case:  anchored match need be tried only once. */
1483     /*  [unless only anchor is BOL and multiline is set] */
1484     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1485         if (s == startpos && regtry(prog, startpos))
1486             goto got_it;
1487         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1488                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1489         {
1490             char *end;
1491
1492             if (minlen)
1493                 dontbother = minlen - 1;
1494             end = HOP3c(strend, -dontbother, strbeg) - 1;
1495             /* for multiline we only have to try after newlines */
1496             if (prog->check_substr || prog->check_utf8) {
1497                 if (s == startpos)
1498                     goto after_try;
1499                 while (1) {
1500                     if (regtry(prog, s))
1501                         goto got_it;
1502                   after_try:
1503                     if (s >= end)
1504                         goto phooey;
1505                     if (prog->reganch & RE_USE_INTUIT) {
1506                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1507                         if (!s)
1508                             goto phooey;
1509                     }
1510                     else
1511                         s++;
1512                 }
1513             } else {
1514                 if (s > startpos)
1515                     s--;
1516                 while (s < end) {
1517                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1518                         if (regtry(prog, s))
1519                             goto got_it;
1520                     }
1521                 }
1522             }
1523         }
1524         goto phooey;
1525     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1526         if (regtry(prog, PL_reg_ganch))
1527             goto got_it;
1528         goto phooey;
1529     }
1530
1531     /* Messy cases:  unanchored match. */
1532     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1533         /* we have /x+whatever/ */
1534         /* it must be a one character string (XXXX Except UTF?) */
1535         char ch;
1536 #ifdef DEBUGGING
1537         int did_match = 0;
1538 #endif
1539         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1540             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1541         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1542
1543         if (do_utf8) {
1544             REXEC_FBC_SCAN(
1545                 if (*s == ch) {
1546                     DEBUG_r( did_match = 1 );
1547                     if (regtry(prog, s)) goto got_it;
1548                     s += UTF8SKIP(s);
1549                     while (s < strend && *s == ch)
1550                         s += UTF8SKIP(s);
1551                 }
1552             );
1553         }
1554         else {
1555             REXEC_FBC_SCAN(
1556                 if (*s == ch) {
1557                     DEBUG_r( did_match = 1 );
1558                     if (regtry(prog, s)) goto got_it;
1559                     s++;
1560                     while (s < strend && *s == ch)
1561                         s++;
1562                 }
1563             );
1564         }
1565         DEBUG_r(if (!did_match)
1566                 PerlIO_printf(Perl_debug_log,
1567                                   "Did not find anchored character...\n")
1568                );
1569     }
1570     else if (prog->anchored_substr != NULL
1571               || prog->anchored_utf8 != NULL
1572               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1573                   && prog->float_max_offset < strend - s)) {
1574         SV *must;
1575         I32 back_max;
1576         I32 back_min;
1577         char *last;
1578         char *last1;            /* Last position checked before */
1579 #ifdef DEBUGGING
1580         int did_match = 0;
1581 #endif
1582         if (prog->anchored_substr || prog->anchored_utf8) {
1583             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1584                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1585             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1586             back_max = back_min = prog->anchored_offset;
1587         } else {
1588             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1589                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1590             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1591             back_max = prog->float_max_offset;
1592             back_min = prog->float_min_offset;
1593         }
1594         if (must == &PL_sv_undef)
1595             /* could not downgrade utf8 check substring, so must fail */
1596             goto phooey;
1597
1598         last = HOP3c(strend,    /* Cannot start after this */
1599                           -(I32)(CHR_SVLEN(must)
1600                                  - (SvTAIL(must) != 0) + back_min), strbeg);
1601
1602         if (s > PL_bostr)
1603             last1 = HOPc(s, -1);
1604         else
1605             last1 = s - 1;      /* bogus */
1606
1607         /* XXXX check_substr already used to find "s", can optimize if
1608            check_substr==must. */
1609         scream_pos = -1;
1610         dontbother = end_shift;
1611         strend = HOPc(strend, -dontbother);
1612         while ( (s <= last) &&
1613                 ((flags & REXEC_SCREAM)
1614                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1615                                     end_shift, &scream_pos, 0))
1616                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1617                                   (unsigned char*)strend, must,
1618                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1619             /* we may be pointing at the wrong string */
1620             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1621                 s = strbeg + (s - SvPVX_const(sv));
1622             DEBUG_r( did_match = 1 );
1623             if (HOPc(s, -back_max) > last1) {
1624                 last1 = HOPc(s, -back_min);
1625                 s = HOPc(s, -back_max);
1626             }
1627             else {
1628                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1629
1630                 last1 = HOPc(s, -back_min);
1631                 s = t;
1632             }
1633             if (do_utf8) {
1634                 while (s <= last1) {
1635                     if (regtry(prog, s))
1636                         goto got_it;
1637                     s += UTF8SKIP(s);
1638                 }
1639             }
1640             else {
1641                 while (s <= last1) {
1642                     if (regtry(prog, s))
1643                         goto got_it;
1644                     s++;
1645                 }
1646             }
1647         }
1648         DEBUG_r(if (!did_match)
1649                     PerlIO_printf(Perl_debug_log,
1650                                   "Did not find %s substr \"%s%.*s%s\"%s...\n",
1651                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1652                                ? "anchored" : "floating"),
1653                               PL_colors[0],
1654                               (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1655                               SvPVX_const(must),
1656                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1657                );
1658         goto phooey;
1659     }
1660     else if ((c = prog->regstclass)) {
1661         if (minlen) {
1662             I32 op = (U8)OP(prog->regstclass);
1663             /* don't bother with what can't match */
1664             if (PL_regkind[op] != EXACT && op != CANY)
1665                 strend = HOPc(strend, -(minlen - 1));
1666         }
1667         DEBUG_r({
1668             SV *prop = sv_newmortal();
1669             const char *s0;
1670             const char *s1;
1671             int len0;
1672             int len1;
1673
1674             regprop(prop, c);
1675             s0 = UTF ?
1676               pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
1677                              UNI_DISPLAY_REGEX) :
1678               SvPVX_const(prop);
1679             len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1680             s1 = UTF ?
1681               sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1682             len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
1683             PerlIO_printf(Perl_debug_log,
1684                           "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
1685                           len0, len0, s0,
1686                           len1, len1, s1);
1687         });
1688         if (find_byclass(prog, c, s, strend, 0))
1689             goto got_it;
1690         DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1691     }
1692     else {
1693         dontbother = 0;
1694         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
1695             /* Trim the end. */
1696             char *last;
1697             SV* float_real;
1698
1699             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1700                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1701             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1702
1703             if (flags & REXEC_SCREAM) {
1704                 last = screaminstr(sv, float_real, s - strbeg,
1705                                    end_shift, &scream_pos, 1); /* last one */
1706                 if (!last)
1707                     last = scream_olds; /* Only one occurrence. */
1708                 /* we may be pointing at the wrong string */
1709                 else if (RX_MATCH_COPIED(prog))
1710                     s = strbeg + (s - SvPVX_const(sv));
1711             }
1712             else {
1713                 STRLEN len;
1714                 const char * const little = SvPV_const(float_real, len);
1715
1716                 if (SvTAIL(float_real)) {
1717                     if (memEQ(strend - len + 1, little, len - 1))
1718                         last = strend - len + 1;
1719                     else if (!multiline)
1720                         last = memEQ(strend - len, little, len)
1721                             ? strend - len : NULL;
1722                     else
1723                         goto find_last;
1724                 } else {
1725                   find_last:
1726                     if (len)
1727                         last = rninstr(s, strend, little, little + len);
1728                     else
1729                         last = strend;  /* matching "$" */
1730                 }
1731             }
1732             if (last == NULL) {
1733                 DEBUG_r(PerlIO_printf(Perl_debug_log,
1734                                       "%sCan't trim the tail, match fails (should not happen)%s\n",
1735                                       PL_colors[4],PL_colors[5]));
1736                 goto phooey; /* Should not happen! */
1737             }
1738             dontbother = strend - last + prog->float_min_offset;
1739         }
1740         if (minlen && (dontbother < minlen))
1741             dontbother = minlen - 1;
1742         strend -= dontbother;              /* this one's always in bytes! */
1743         /* We don't know much -- general case. */
1744         if (do_utf8) {
1745             for (;;) {
1746                 if (regtry(prog, s))
1747                     goto got_it;
1748                 if (s >= strend)
1749                     break;
1750                 s += UTF8SKIP(s);
1751             };
1752         }
1753         else {
1754             do {
1755                 if (regtry(prog, s))
1756                     goto got_it;
1757             } while (s++ < strend);
1758         }
1759     }
1760
1761     /* Failure. */
1762     goto phooey;
1763
1764 got_it:
1765     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1766
1767     if (PL_reg_eval_set) {
1768         /* Preserve the current value of $^R */
1769         if (oreplsv != GvSV(PL_replgv))
1770             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1771                                                   restored, the value remains
1772                                                   the same. */
1773         restore_pos(aTHX_ 0);
1774     }
1775
1776     /* make sure $`, $&, $', and $digit will work later */
1777     if ( !(flags & REXEC_NOT_FIRST) ) {
1778         if (RX_MATCH_COPIED(prog)) {
1779             Safefree(prog->subbeg);
1780             RX_MATCH_COPIED_off(prog);
1781         }
1782         if (flags & REXEC_COPY_STR) {
1783             I32 i = PL_regeol - startpos + (stringarg - strbeg);
1784
1785             s = savepvn(strbeg, i);
1786             prog->subbeg = s;
1787             prog->sublen = i;
1788             RX_MATCH_COPIED_on(prog);
1789         }
1790         else {
1791             prog->subbeg = strbeg;
1792             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
1793         }
1794     }
1795
1796     return 1;
1797
1798 phooey:
1799     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1800                           PL_colors[4],PL_colors[5]));
1801     if (PL_reg_eval_set)
1802         restore_pos(aTHX_ 0);
1803     return 0;
1804 }
1805
1806 /*
1807  - regtry - try match at specific point
1808  */
1809 STATIC I32                      /* 0 failure, 1 success */
1810 S_regtry(pTHX_ regexp *prog, char *startpos)
1811 {
1812     register I32 *sp;
1813     register I32 *ep;
1814     CHECKPOINT lastcp;
1815
1816 #ifdef DEBUGGING
1817     PL_regindent = 0;   /* XXXX Not good when matches are reenterable... */
1818 #endif
1819     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1820         MAGIC *mg;
1821
1822         PL_reg_eval_set = RS_init;
1823         DEBUG_r(DEBUG_s(
1824             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
1825                           (IV)(PL_stack_sp - PL_stack_base));
1826             ));
1827         SAVESTACK_CXPOS();
1828         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1829         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
1830         SAVETMPS;
1831         /* Apparently this is not needed, judging by wantarray. */
1832         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1833            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1834
1835         if (PL_reg_sv) {
1836             /* Make $_ available to executed code. */
1837             if (PL_reg_sv != DEFSV) {
1838                 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
1839                 SAVESPTR(DEFSV);
1840                 DEFSV = PL_reg_sv;
1841             }
1842
1843             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1844                   && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
1845                 /* prepare for quick setting of pos */
1846 #ifdef PERL_OLD_COPY_ON_WRITE
1847                 if (SvIsCOW(sv))
1848                     sv_force_normal_flags(sv, 0);
1849 #endif
1850                 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
1851                                  &PL_vtbl_mglob, NULL, 0);
1852                 mg->mg_len = -1;
1853             }
1854             PL_reg_magic    = mg;
1855             PL_reg_oldpos   = mg->mg_len;
1856             SAVEDESTRUCTOR_X(restore_pos, 0);
1857         }
1858         if (!PL_reg_curpm) {
1859             Newxz(PL_reg_curpm, 1, PMOP);
1860 #ifdef USE_ITHREADS
1861             {
1862                 SV* repointer = newSViv(0);
1863                 /* so we know which PL_regex_padav element is PL_reg_curpm */
1864                 SvFLAGS(repointer) |= SVf_BREAK;
1865                 av_push(PL_regex_padav,repointer);
1866                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1867                 PL_regex_pad = AvARRAY(PL_regex_padav);
1868             }
1869 #endif
1870         }
1871         PM_SETRE(PL_reg_curpm, prog);
1872         PL_reg_oldcurpm = PL_curpm;
1873         PL_curpm = PL_reg_curpm;
1874         if (RX_MATCH_COPIED(prog)) {
1875             /*  Here is a serious problem: we cannot rewrite subbeg,
1876                 since it may be needed if this match fails.  Thus
1877                 $` inside (?{}) could fail... */
1878             PL_reg_oldsaved = prog->subbeg;
1879             PL_reg_oldsavedlen = prog->sublen;
1880             RX_MATCH_COPIED_off(prog);
1881         }
1882         else
1883             PL_reg_oldsaved = NULL;
1884         prog->subbeg = PL_bostr;
1885         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1886     }
1887     prog->startp[0] = startpos - PL_bostr;
1888     PL_reginput = startpos;
1889     PL_regstartp = prog->startp;
1890     PL_regendp = prog->endp;
1891     PL_reglastparen = &prog->lastparen;
1892     PL_reglastcloseparen = &prog->lastcloseparen;
1893     prog->lastparen = 0;
1894     prog->lastcloseparen = 0;
1895     PL_regsize = 0;
1896     DEBUG_r(PL_reg_starttry = startpos);
1897     if (PL_reg_start_tmpl <= prog->nparens) {
1898         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1899         if(PL_reg_start_tmp)
1900             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1901         else
1902             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1903     }
1904
1905     /* XXXX What this code is doing here?!!!  There should be no need
1906        to do this again and again, PL_reglastparen should take care of
1907        this!  --ilya*/
1908
1909     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1910      * Actually, the code in regcppop() (which Ilya may be meaning by
1911      * PL_reglastparen), is not needed at all by the test suite
1912      * (op/regexp, op/pat, op/split), but that code is needed, oddly
1913      * enough, for building DynaLoader, or otherwise this
1914      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1915      * will happen.  Meanwhile, this code *is* needed for the
1916      * above-mentioned test suite tests to succeed.  The common theme
1917      * on those tests seems to be returning null fields from matches.
1918      * --jhi */
1919 #if 1
1920     sp = prog->startp;
1921     ep = prog->endp;
1922     if (prog->nparens) {
1923         register I32 i;
1924         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
1925             *++sp = -1;
1926             *++ep = -1;
1927         }
1928     }
1929 #endif
1930     REGCP_SET(lastcp);
1931     if (regmatch(prog->program + 1)) {
1932         prog->endp[0] = PL_reginput - PL_bostr;
1933         return 1;
1934     }
1935     REGCP_UNWIND(lastcp);
1936     return 0;
1937 }
1938
1939 #define RE_UNWIND_BRANCH        1
1940 #define RE_UNWIND_BRANCHJ       2
1941
1942 union re_unwind_t;
1943
1944 typedef struct {                /* XX: makes sense to enlarge it... */
1945     I32 type;
1946     I32 prev;
1947     CHECKPOINT lastcp;
1948 } re_unwind_generic_t;
1949
1950 typedef struct {
1951     I32 type;
1952     I32 prev;
1953     CHECKPOINT lastcp;
1954     I32 lastparen;
1955     regnode *next;
1956     char *locinput;
1957     I32 nextchr;
1958 #ifdef DEBUGGING
1959     int regindent;
1960 #endif
1961 } re_unwind_branch_t;
1962
1963 typedef union re_unwind_t {
1964     I32 type;
1965     re_unwind_generic_t generic;
1966     re_unwind_branch_t branch;
1967 } re_unwind_t;
1968
1969 #define sayYES goto yes
1970 #define sayNO goto no
1971 #define sayNO_ANYOF goto no_anyof
1972 #define sayYES_FINAL goto yes_final
1973 #define sayYES_LOUD  goto yes_loud
1974 #define sayNO_FINAL  goto no_final
1975 #define sayNO_SILENT goto do_no
1976 #define saySAME(x) if (x) goto yes; else goto no
1977
1978 #define POSCACHE_SUCCESS 0      /* caching success rather than failure */
1979 #define POSCACHE_SEEN 1         /* we know what we're caching */
1980 #define POSCACHE_START 2        /* the real cache: this bit maps to pos 0 */
1981 #define CACHEsayYES STMT_START { \
1982     if (cache_offset | cache_bit) { \
1983         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
1984             PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
1985         else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
1986             /* cache records failure, but this is success */ \
1987             DEBUG_r( \
1988                 PerlIO_printf(Perl_debug_log, \
1989                     "%*s  (remove success from failure cache)\n", \
1990                     REPORT_CODE_OFF+PL_regindent*2, "") \
1991             ); \
1992             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
1993         } \
1994     } \
1995     sayYES; \
1996 } STMT_END
1997 #define CACHEsayNO STMT_START { \
1998     if (cache_offset | cache_bit) { \
1999         if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2000             PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2001         else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2002             /* cache records success, but this is failure */ \
2003             DEBUG_r( \
2004                 PerlIO_printf(Perl_debug_log, \
2005                     "%*s  (remove failure from success cache)\n", \
2006                     REPORT_CODE_OFF+PL_regindent*2, "") \
2007             ); \
2008             PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2009         } \
2010     } \
2011     sayNO; \
2012 } STMT_END
2013
2014
2015 /* this value indiciates that the c1/c2 "next char" test should be skipped */
2016 #define CHRTEST_VOID -1000
2017
2018 #define REPORT_CODE_OFF 24
2019
2020 /*
2021  - regmatch - main matching routine
2022  *
2023  * Conceptually the strategy is simple:  check to see whether the current
2024  * node matches, call self recursively to see whether the rest matches,
2025  * and then act accordingly.  In practice we make some effort to avoid
2026  * recursion, in particular by going through "ordinary" nodes (that don't
2027  * need to know whether the rest of the match failed) by a loop instead of
2028  * by recursion.
2029  */
2030 /* [lwall] I've hoisted the register declarations to the outer block in order to
2031  * maybe save a little bit of pushing and popping on the stack.  It also takes
2032  * advantage of machines that use a register save mask on subroutine entry.
2033  */
2034 STATIC I32                      /* 0 failure, 1 success */
2035 S_regmatch(pTHX_ regnode *prog)
2036 {
2037     register regnode *scan;     /* Current node. */
2038     regnode *next;              /* Next node. */
2039     regnode *inner;             /* Next node in internal branch. */
2040     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2041                                    function of same name */
2042     register I32 n;             /* no or next */
2043     register I32 ln = 0;        /* len or last */
2044     register char *s = NULL;    /* operand or save */
2045     register char *locinput = PL_reginput;
2046     register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2047     int minmod = 0, sw = 0, logical = 0;
2048     I32 unwind = 0;
2049 #if 0
2050     I32 firstcp = PL_savestack_ix;
2051 #endif
2052     register const bool do_utf8 = PL_reg_match_utf8;
2053 #ifdef DEBUGGING
2054     SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
2055     SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2056     SV * const dsv2 = PERL_DEBUG_PAD_ZERO(2);
2057 #endif
2058     U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2059
2060     I32 parenfloor = 0;
2061
2062 #ifdef DEBUGGING
2063     PL_regindent++;
2064 #endif
2065
2066     /* Note that nextchr is a byte even in UTF */
2067     nextchr = UCHARAT(locinput);
2068     scan = prog;
2069     while (scan != NULL) {
2070
2071         DEBUG_r( {
2072             SV * const prop = sv_newmortal();
2073             const int docolor = *PL_colors[0];
2074             const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2075             int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2076             /* The part of the string before starttry has one color
2077                (pref0_len chars), between starttry and current
2078                position another one (pref_len - pref0_len chars),
2079                after the current position the third one.
2080                We assume that pref0_len <= pref_len, otherwise we
2081                decrease pref0_len.  */
2082             int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2083                 ? (5 + taill) - l : locinput - PL_bostr;
2084             int pref0_len;
2085
2086             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2087                 pref_len++;
2088             pref0_len = pref_len  - (locinput - PL_reg_starttry);
2089             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2090                 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2091                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2092             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2093                 l--;
2094             if (pref0_len < 0)
2095                 pref0_len = 0;
2096             if (pref0_len > pref_len)
2097                 pref0_len = pref_len;
2098             regprop(prop, scan);
2099             {
2100               const char * const s0 =
2101                 do_utf8 && OP(scan) != CANY ?
2102                 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2103                                pref0_len, 60, UNI_DISPLAY_REGEX) :
2104                 locinput - pref_len;
2105               const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
2106               const char * const s1 = do_utf8 && OP(scan) != CANY ?
2107                 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2108                                pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2109                 locinput - pref_len + pref0_len;
2110               const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
2111               const char * const s2 = do_utf8 && OP(scan) != CANY ?
2112                 pv_uni_display(dsv2, (U8*)locinput,
2113                                PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2114                 locinput;
2115               const int len2 = do_utf8 ? (int)strlen(s2) : l;
2116               PerlIO_printf(Perl_debug_log,
2117                             "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2118                             (IV)(locinput - PL_bostr),
2119                             PL_colors[4],
2120                             len0, s0,
2121                             PL_colors[5],
2122                             PL_colors[2],
2123                             len1, s1,
2124                             PL_colors[3],
2125                             (docolor ? "" : "> <"),
2126                             PL_colors[0],
2127                             len2, s2,
2128                             PL_colors[1],
2129                             15 - l - pref_len + 1,
2130                             "",
2131                             (IV)(scan - PL_regprogram), PL_regindent*2, "",
2132                             SvPVX_const(prop));
2133             }
2134         });
2135
2136         next = scan + NEXT_OFF(scan);
2137         if (next == scan)
2138             next = NULL;
2139
2140         switch (OP(scan)) {
2141         case BOL:
2142             if (locinput == PL_bostr || (PL_multiline &&
2143                 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2144             {
2145                 /* regtill = regbol; */
2146                 break;
2147             }
2148             sayNO;
2149         case MBOL:
2150             if (locinput == PL_bostr ||
2151                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2152             {
2153                 break;
2154             }
2155             sayNO;
2156         case SBOL:
2157             if (locinput == PL_bostr)
2158                 break;
2159             sayNO;
2160         case GPOS:
2161             if (locinput == PL_reg_ganch)
2162                 break;
2163             sayNO;
2164         case EOL:
2165             if (PL_multiline)
2166                 goto meol;
2167             else
2168                 goto seol;
2169         case MEOL:
2170           meol:
2171             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2172                 sayNO;
2173             break;
2174         case SEOL:
2175           seol:
2176             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2177                 sayNO;
2178             if (PL_regeol - locinput > 1)
2179                 sayNO;
2180             break;
2181         case EOS:
2182             if (PL_regeol != locinput)
2183                 sayNO;
2184             break;
2185         case SANY:
2186             if (!nextchr && locinput >= PL_regeol)
2187                 sayNO;
2188             if (do_utf8) {
2189                 locinput += PL_utf8skip[nextchr];
2190                 if (locinput > PL_regeol)
2191                     sayNO;
2192                 nextchr = UCHARAT(locinput);
2193             }
2194             else
2195                 nextchr = UCHARAT(++locinput);
2196             break;
2197         case CANY:
2198             if (!nextchr && locinput >= PL_regeol)
2199                 sayNO;
2200             nextchr = UCHARAT(++locinput);
2201             break;
2202         case REG_ANY:
2203             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2204                 sayNO;
2205             if (do_utf8) {
2206                 locinput += PL_utf8skip[nextchr];
2207                 if (locinput > PL_regeol)
2208                     sayNO;
2209                 nextchr = UCHARAT(locinput);
2210             }
2211             else
2212                 nextchr = UCHARAT(++locinput);
2213             break;
2214         case EXACT:
2215             s = STRING(scan);
2216             ln = STR_LEN(scan);
2217             if (do_utf8 != UTF) {
2218                 /* The target and the pattern have differing utf8ness. */
2219                 char *l = locinput;
2220                 const char *e = s + ln;
2221
2222                 if (do_utf8) {
2223                     /* The target is utf8, the pattern is not utf8. */
2224                     while (s < e) {
2225                         STRLEN ulen;
2226                         if (l >= PL_regeol)
2227                              sayNO;
2228                         if (NATIVE_TO_UNI(*(U8*)s) !=
2229                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2230                                             uniflags))
2231                              sayNO;
2232                         l += ulen;
2233                         s ++;
2234                     }
2235                 }
2236                 else {
2237                     /* The target is not utf8, the pattern is utf8. */
2238                     while (s < e) {
2239                         STRLEN ulen;
2240                         if (l >= PL_regeol)
2241                             sayNO;
2242                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2243                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2244                                            uniflags))
2245                             sayNO;
2246                         s += ulen;
2247                         l ++;
2248                     }
2249                 }
2250                 locinput = l;
2251                 nextchr = UCHARAT(locinput);
2252                 break;
2253             }
2254             /* The target and the pattern have the same utf8ness. */
2255             /* Inline the first character, for speed. */
2256             if (UCHARAT(s) != nextchr)
2257                 sayNO;
2258             if (PL_regeol - locinput < ln)
2259                 sayNO;
2260             if (ln > 1 && memNE(s, locinput, ln))
2261                 sayNO;
2262             locinput += ln;
2263             nextchr = UCHARAT(locinput);
2264             break;
2265         case EXACTFL:
2266             PL_reg_flags |= RF_tainted;
2267             /* FALL THROUGH */
2268         case EXACTF:
2269             s = STRING(scan);
2270             ln = STR_LEN(scan);
2271
2272             if (do_utf8 || UTF) {
2273               /* Either target or the pattern are utf8. */
2274                 char *l = locinput;
2275                 char *e = PL_regeol;
2276
2277                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2278                                l, &e, 0,  do_utf8)) {
2279                      /* One more case for the sharp s:
2280                       * pack("U0U*", 0xDF) =~ /ss/i,
2281                       * the 0xC3 0x9F are the UTF-8
2282                       * byte sequence for the U+00DF. */
2283                      if (!(do_utf8 &&
2284                            toLOWER(s[0]) == 's' &&
2285                            ln >= 2 &&
2286                            toLOWER(s[1]) == 's' &&
2287                            (U8)l[0] == 0xC3 &&
2288                            e - l >= 2 &&
2289                            (U8)l[1] == 0x9F))
2290                           sayNO;
2291                 }
2292                 locinput = e;
2293                 nextchr = UCHARAT(locinput);
2294                 break;
2295             }
2296
2297             /* Neither the target and the pattern are utf8. */
2298
2299             /* Inline the first character, for speed. */
2300             if (UCHARAT(s) != nextchr &&
2301                 UCHARAT(s) != ((OP(scan) == EXACTF)
2302                                ? PL_fold : PL_fold_locale)[nextchr])
2303                 sayNO;
2304             if (PL_regeol - locinput < ln)
2305                 sayNO;
2306             if (ln > 1 && (OP(scan) == EXACTF
2307                            ? ibcmp(s, locinput, ln)
2308                            : ibcmp_locale(s, locinput, ln)))
2309                 sayNO;
2310             locinput += ln;
2311             nextchr = UCHARAT(locinput);
2312             break;
2313         case ANYOF:
2314             if (do_utf8) {
2315                 STRLEN inclasslen = PL_regeol - locinput;
2316
2317                 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2318                     sayNO_ANYOF;
2319                 if (locinput >= PL_regeol)
2320                     sayNO;
2321                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2322                 nextchr = UCHARAT(locinput);
2323                 break;
2324             }
2325             else {
2326                 if (nextchr < 0)
2327                     nextchr = UCHARAT(locinput);
2328                 if (!REGINCLASS(scan, (U8*)locinput))
2329                     sayNO_ANYOF;
2330                 if (!nextchr && locinput >= PL_regeol)
2331                     sayNO;
2332                 nextchr = UCHARAT(++locinput);
2333                 break;
2334             }
2335         no_anyof:
2336             /* If we might have the case of the German sharp s
2337              * in a casefolding Unicode character class. */
2338
2339             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2340                  locinput += SHARP_S_SKIP;
2341                  nextchr = UCHARAT(locinput);
2342             }
2343             else
2344                  sayNO;
2345             break;
2346         case ALNUML:
2347             PL_reg_flags |= RF_tainted;
2348             /* FALL THROUGH */
2349         case ALNUM:
2350             if (!nextchr)
2351                 sayNO;
2352             if (do_utf8) {
2353                 LOAD_UTF8_CHARCLASS_ALNUM();
2354                 if (!(OP(scan) == ALNUM
2355                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2356                       : isALNUM_LC_utf8((U8*)locinput)))
2357                 {
2358                     sayNO;
2359                 }
2360                 locinput += PL_utf8skip[nextchr];
2361                 nextchr = UCHARAT(locinput);
2362                 break;
2363             }
2364             if (!(OP(scan) == ALNUM
2365                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2366                 sayNO;
2367             nextchr = UCHARAT(++locinput);
2368             break;
2369         case NALNUML:
2370             PL_reg_flags |= RF_tainted;
2371             /* FALL THROUGH */
2372         case NALNUM:
2373             if (!nextchr && locinput >= PL_regeol)
2374                 sayNO;
2375             if (do_utf8) {
2376                 LOAD_UTF8_CHARCLASS_ALNUM();
2377                 if (OP(scan) == NALNUM
2378                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2379                     : isALNUM_LC_utf8((U8*)locinput))
2380                 {
2381                     sayNO;
2382                 }
2383                 locinput += PL_utf8skip[nextchr];
2384                 nextchr = UCHARAT(locinput);
2385                 break;
2386             }
2387             if (OP(scan) == NALNUM
2388                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2389                 sayNO;
2390             nextchr = UCHARAT(++locinput);
2391             break;
2392         case BOUNDL:
2393         case NBOUNDL:
2394             PL_reg_flags |= RF_tainted;
2395             /* FALL THROUGH */
2396         case BOUND:
2397         case NBOUND:
2398             /* was last char in word? */
2399             if (do_utf8) {
2400                 if (locinput == PL_bostr)
2401                     ln = '\n';
2402                 else {
2403                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2404
2405                     ln = utf8n_to_uvchr((U8 *)r, UTF8SKIP(r), 0, 0);
2406                 }
2407                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2408                     ln = isALNUM_uni(ln);
2409                     LOAD_UTF8_CHARCLASS_ALNUM();
2410                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2411                 }
2412                 else {
2413                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2414                     n = isALNUM_LC_utf8((U8*)locinput);
2415                 }
2416             }
2417             else {
2418                 ln = (locinput != PL_bostr) ?
2419                     UCHARAT(locinput - 1) : '\n';
2420                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2421                     ln = isALNUM(ln);
2422                     n = isALNUM(nextchr);
2423                 }
2424                 else {
2425                     ln = isALNUM_LC(ln);
2426                     n = isALNUM_LC(nextchr);
2427                 }
2428             }
2429             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2430                                     OP(scan) == BOUNDL))
2431                     sayNO;
2432             break;
2433         case SPACEL:
2434             PL_reg_flags |= RF_tainted;
2435             /* FALL THROUGH */
2436         case SPACE:
2437             if (!nextchr)
2438                 sayNO;
2439             if (do_utf8) {
2440                 if (UTF8_IS_CONTINUED(nextchr)) {
2441                     LOAD_UTF8_CHARCLASS_SPACE();
2442                     if (!(OP(scan) == SPACE
2443                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2444                           : isSPACE_LC_utf8((U8*)locinput)))
2445                     {
2446                         sayNO;
2447                     }
2448                     locinput += PL_utf8skip[nextchr];
2449                     nextchr = UCHARAT(locinput);
2450                     break;
2451                 }
2452                 if (!(OP(scan) == SPACE
2453                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2454                     sayNO;
2455                 nextchr = UCHARAT(++locinput);
2456             }
2457             else {
2458                 if (!(OP(scan) == SPACE
2459                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2460                     sayNO;
2461                 nextchr = UCHARAT(++locinput);
2462             }
2463             break;
2464         case NSPACEL:
2465             PL_reg_flags |= RF_tainted;
2466             /* FALL THROUGH */
2467         case NSPACE:
2468             if (!nextchr && locinput >= PL_regeol)
2469                 sayNO;
2470             if (do_utf8) {
2471                 LOAD_UTF8_CHARCLASS_SPACE();
2472                 if (OP(scan) == NSPACE
2473                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2474                     : isSPACE_LC_utf8((U8*)locinput))
2475                 {
2476                     sayNO;
2477                 }
2478                 locinput += PL_utf8skip[nextchr];
2479                 nextchr = UCHARAT(locinput);
2480                 break;
2481             }
2482             if (OP(scan) == NSPACE
2483                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2484                 sayNO;
2485             nextchr = UCHARAT(++locinput);
2486             break;
2487         case DIGITL:
2488             PL_reg_flags |= RF_tainted;
2489             /* FALL THROUGH */
2490         case DIGIT:
2491             if (!nextchr)
2492                 sayNO;
2493             if (do_utf8) {
2494                 LOAD_UTF8_CHARCLASS_DIGIT();
2495                 if (!(OP(scan) == DIGIT
2496                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2497                       : isDIGIT_LC_utf8((U8*)locinput)))
2498                 {
2499                     sayNO;
2500                 }
2501                 locinput += PL_utf8skip[nextchr];
2502                 nextchr = UCHARAT(locinput);
2503                 break;
2504             }
2505             if (!(OP(scan) == DIGIT
2506                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2507                 sayNO;
2508             nextchr = UCHARAT(++locinput);
2509             break;
2510         case NDIGITL:
2511             PL_reg_flags |= RF_tainted;
2512             /* FALL THROUGH */
2513         case NDIGIT:
2514             if (!nextchr && locinput >= PL_regeol)
2515                 sayNO;
2516             if (do_utf8) {
2517                 LOAD_UTF8_CHARCLASS_DIGIT();
2518                 if (OP(scan) == NDIGIT
2519                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2520                     : isDIGIT_LC_utf8((U8*)locinput))
2521                 {
2522                     sayNO;
2523                 }
2524                 locinput += PL_utf8skip[nextchr];
2525                 nextchr = UCHARAT(locinput);
2526                 break;
2527             }
2528             if (OP(scan) == NDIGIT
2529                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2530                 sayNO;
2531             nextchr = UCHARAT(++locinput);
2532             break;
2533         case CLUMP:
2534             if (locinput >= PL_regeol)
2535                 sayNO;
2536             if  (do_utf8) {
2537                 LOAD_UTF8_CHARCLASS_MARK();
2538                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2539                     sayNO;
2540                 locinput += PL_utf8skip[nextchr];
2541                 while (locinput < PL_regeol &&
2542                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2543                     locinput += UTF8SKIP(locinput);
2544                 if (locinput > PL_regeol)
2545                     sayNO;
2546             }
2547             else
2548                locinput++;
2549             nextchr = UCHARAT(locinput);
2550             break;
2551         case REFFL:
2552             PL_reg_flags |= RF_tainted;
2553             /* FALL THROUGH */
2554         case REF:
2555         case REFF:
2556             n = ARG(scan);  /* which paren pair */
2557             ln = PL_regstartp[n];
2558             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
2559             if ((I32)*PL_reglastparen < n || ln == -1)
2560                 sayNO;                  /* Do not match unless seen CLOSEn. */
2561             if (ln == PL_regendp[n])
2562                 break;
2563
2564             s = PL_bostr + ln;
2565             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
2566                 char *l = locinput;
2567                 const char *e = PL_bostr + PL_regendp[n];
2568                 /*
2569                  * Note that we can't do the "other character" lookup trick as
2570                  * in the 8-bit case (no pun intended) because in Unicode we
2571                  * have to map both upper and title case to lower case.
2572                  */
2573                 if (OP(scan) == REFF) {
2574                     while (s < e) {
2575                         STRLEN ulen1, ulen2;
2576                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
2577                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
2578
2579                         if (l >= PL_regeol)
2580                             sayNO;
2581                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2582                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2583                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2584                             sayNO;
2585                         s += ulen1;
2586                         l += ulen2;
2587                     }
2588                 }
2589                 locinput = l;
2590                 nextchr = UCHARAT(locinput);
2591                 break;
2592             }
2593
2594             /* Inline the first character, for speed. */
2595             if (UCHARAT(s) != nextchr &&
2596                 (OP(scan) == REF ||
2597                  (UCHARAT(s) != ((OP(scan) == REFF
2598                                   ? PL_fold : PL_fold_locale)[nextchr]))))
2599                 sayNO;
2600             ln = PL_regendp[n] - ln;
2601             if (locinput + ln > PL_regeol)
2602                 sayNO;
2603             if (ln > 1 && (OP(scan) == REF
2604                            ? memNE(s, locinput, ln)
2605                            : (OP(scan) == REFF
2606                               ? ibcmp(s, locinput, ln)
2607                               : ibcmp_locale(s, locinput, ln))))
2608                 sayNO;
2609             locinput += ln;
2610             nextchr = UCHARAT(locinput);
2611             break;
2612
2613         case NOTHING:
2614         case TAIL:
2615             break;
2616         case BACK:
2617             break;
2618         case EVAL:
2619         {
2620             dSP;
2621             OP_4tree * const oop = PL_op;
2622             COP * const ocurcop = PL_curcop;
2623             PAD *old_comppad;
2624             SV *ret;
2625             struct regexp * const oreg = PL_reg_re;
2626
2627             n = ARG(scan);
2628             PL_op = (OP_4tree*)PL_regdata->data[n];
2629             DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2630             PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2631             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2632
2633             {
2634                 SV ** const before = SP;
2635                 CALLRUNOPS(aTHX);                       /* Scalar context. */
2636                 SPAGAIN;
2637                 if (SP == before)
2638                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2639                 else {
2640                     ret = POPs;
2641                     PUTBACK;
2642                 }
2643             }
2644
2645             PL_op = oop;
2646             PAD_RESTORE_LOCAL(old_comppad);
2647             PL_curcop = ocurcop;
2648             if (logical) {
2649                 if (logical == 2) {     /* Postponed subexpression. */
2650                     regexp *re;
2651                     MAGIC *mg = NULL;
2652                     re_cc_state state;
2653                     CHECKPOINT cp, lastcp;
2654                     int toggleutf;
2655                     register SV *sv;
2656
2657                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2658                         mg = mg_find(sv, PERL_MAGIC_qr);
2659                     else if (SvSMAGICAL(ret)) {
2660                         if (SvGMAGICAL(ret))
2661                             sv_unmagic(ret, PERL_MAGIC_qr);
2662                         else
2663                             mg = mg_find(ret, PERL_MAGIC_qr);
2664                     }
2665
2666                     if (mg) {
2667                         re = (regexp *)mg->mg_obj;
2668                         (void)ReREFCNT_inc(re);
2669                     }
2670                     else {
2671                         STRLEN len;
2672                         const char * const t = SvPV_const(ret, len);
2673                         PMOP pm;
2674                         char * const oprecomp = PL_regprecomp;
2675                         const I32 osize = PL_regsize;
2676                         const I32 onpar = PL_regnpar;
2677
2678                         Zero(&pm, 1, PMOP);
2679                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2680                         re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
2681                         if (!(SvFLAGS(ret)
2682                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2683                                 | SVs_GMG)))
2684                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
2685                                         PERL_MAGIC_qr,0,0);
2686                         PL_regprecomp = oprecomp;
2687                         PL_regsize = osize;
2688                         PL_regnpar = onpar;
2689                     }
2690                     DEBUG_r(
2691                         PerlIO_printf(Perl_debug_log,
2692                                       "Entering embedded \"%s%.60s%s%s\"\n",
2693                                       PL_colors[0],
2694                                       re->precomp,
2695                                       PL_colors[1],
2696                                       (strlen(re->precomp) > 60 ? "..." : ""))
2697                         );
2698                     state.node = next;
2699                     state.prev = PL_reg_call_cc;
2700                     state.cc = PL_regcc;
2701                     state.re = PL_reg_re;
2702
2703                     PL_regcc = 0;
2704
2705                     cp = regcppush(0);  /* Save *all* the positions. */
2706                     REGCP_SET(lastcp);
2707                     cache_re(re);
2708                     state.ss = PL_savestack_ix;
2709                     *PL_reglastparen = 0;
2710                     *PL_reglastcloseparen = 0;
2711                     PL_reg_call_cc = &state;
2712                     PL_reginput = locinput;
2713                     toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2714                                 ((re->reganch & ROPT_UTF8) != 0);
2715                     if (toggleutf) PL_reg_flags ^= RF_utf8;
2716
2717                     /* XXXX This is too dramatic a measure... */
2718                     PL_reg_maxiter = 0;
2719
2720                     if (regmatch(re->program + 1)) {
2721                         /* Even though we succeeded, we need to restore
2722                            global variables, since we may be wrapped inside
2723                            SUSPEND, thus the match may be not finished yet. */
2724
2725                         /* XXXX Do this only if SUSPENDed? */
2726                         PL_reg_call_cc = state.prev;
2727                         PL_regcc = state.cc;
2728                         PL_reg_re = state.re;
2729                         cache_re(PL_reg_re);
2730                         if (toggleutf) PL_reg_flags ^= RF_utf8;
2731
2732                         /* XXXX This is too dramatic a measure... */
2733                         PL_reg_maxiter = 0;
2734
2735                         /* These are needed even if not SUSPEND. */
2736                         ReREFCNT_dec(re);
2737                         regcpblow(cp);
2738                         sayYES;
2739                     }
2740                     ReREFCNT_dec(re);
2741                     REGCP_UNWIND(lastcp);
2742                     regcppop();
2743                     PL_reg_call_cc = state.prev;
2744                     PL_regcc = state.cc;
2745                     PL_reg_re = state.re;
2746                     cache_re(PL_reg_re);
2747                     if (toggleutf) PL_reg_flags ^= RF_utf8;
2748
2749                     /* XXXX This is too dramatic a measure... */
2750                     PL_reg_maxiter = 0;
2751
2752                     logical = 0;
2753                     sayNO;
2754                 }
2755                 sw = SvTRUE(ret);
2756                 logical = 0;
2757             }
2758             else {
2759                 sv_setsv(save_scalar(PL_replgv), ret);
2760                 cache_re(oreg);
2761             }
2762             break;
2763         }
2764         case OPEN:
2765             n = ARG(scan);  /* which paren pair */
2766             PL_reg_start_tmp[n] = locinput;
2767             if (n > PL_regsize)
2768                 PL_regsize = n;
2769             break;
2770         case CLOSE:
2771             n = ARG(scan);  /* which paren pair */
2772             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2773             PL_regendp[n] = locinput - PL_bostr;
2774             if (n > (I32)*PL_reglastparen)
2775                 *PL_reglastparen = n;
2776             *PL_reglastcloseparen = n;
2777             break;
2778         case GROUPP:
2779             n = ARG(scan);  /* which paren pair */
2780             sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2781             break;
2782         case IFTHEN:
2783             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
2784             if (sw)
2785                 next = NEXTOPER(NEXTOPER(scan));
2786             else {
2787                 next = scan + ARG(scan);
2788                 if (OP(next) == IFTHEN) /* Fake one. */
2789                     next = NEXTOPER(NEXTOPER(next));
2790             }
2791             break;
2792         case LOGICAL:
2793             logical = scan->flags;
2794             break;
2795 /*******************************************************************
2796  PL_regcc contains infoblock about the innermost (...)* loop, and
2797  a pointer to the next outer infoblock.
2798
2799  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2800
2801    1) After matching X, regnode for CURLYX is processed;
2802
2803    2) This regnode creates infoblock on the stack, and calls
2804       regmatch() recursively with the starting point at WHILEM node;
2805
2806    3) Each hit of WHILEM node tries to match A and Z (in the order
2807       depending on the current iteration, min/max of {min,max} and
2808       greediness).  The information about where are nodes for "A"
2809       and "Z" is read from the infoblock, as is info on how many times "A"
2810       was already matched, and greediness.
2811
2812    4) After A matches, the same WHILEM node is hit again.
2813
2814    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2815       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
2816       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2817       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
2818       of the external loop.
2819
2820  Currently present infoblocks form a tree with a stem formed by PL_curcc
2821  and whatever it mentions via ->next, and additional attached trees
2822  corresponding to temporarily unset infoblocks as in "5" above.
2823
2824  In the following picture infoblocks for outer loop of
2825  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
2826  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
2827  infoblocks are drawn below the "reset" infoblock.
2828
2829  In fact in the picture below we do not show failed matches for Z and T
2830  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
2831  more obvious *why* one needs to *temporary* unset infoblocks.]
2832
2833   Matched       REx position    InfoBlocks      Comment
2834                 (Y(A)*?Z)*?T    x
2835                 Y(A)*?Z)*?T     x <- O
2836   Y             (A)*?Z)*?T      x <- O
2837   Y             A)*?Z)*?T       x <- O <- I
2838   YA            )*?Z)*?T        x <- O <- I
2839   YA            A)*?Z)*?T       x <- O <- I
2840   YAA           )*?Z)*?T        x <- O <- I
2841   YAA           Z)*?T           x <- O          # Temporary unset I
2842                                      I
2843
2844   YAAZ          Y(A)*?Z)*?T     x <- O
2845                                      I
2846
2847   YAAZY         (A)*?Z)*?T      x <- O
2848                                      I
2849
2850   YAAZY         A)*?Z)*?T       x <- O <- I
2851                                      I
2852
2853   YAAZYA        )*?Z)*?T        x <- O <- I
2854                                      I
2855
2856   YAAZYA        Z)*?T           x <- O          # Temporary unset I
2857                                      I,I
2858
2859   YAAZYAZ       )*?T            x <- O
2860                                      I,I
2861
2862   YAAZYAZ       T               x               # Temporary unset O
2863                                 O
2864                                 I,I
2865
2866   YAAZYAZT                      x
2867                                 O
2868                                 I,I
2869  *******************************************************************/
2870         case CURLYX: {
2871                 CURCUR cc;
2872                 CHECKPOINT cp = PL_savestack_ix;
2873                 /* No need to save/restore up to this paren */
2874                 parenfloor = scan->flags;
2875
2876                 /* Dave says:
2877
2878                    CURLYX and WHILEM are always paired: they're the moral
2879                    equivalent of pp_enteriter anbd pp_iter.
2880
2881                    The only time next could be null is if the node tree is
2882                    corrupt. This was mentioned on p5p a few days ago.
2883
2884                    See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
2885                    So we'll assert that this is true:
2886                 */
2887                 assert(next);
2888                 if (next && (OP(PREVOPER(next)) == NOTHING)) /* LONGJMP */
2889                     next += ARG(next);
2890                 cc.oldcc = PL_regcc;
2891                 PL_regcc = &cc;
2892                 /* XXXX Probably it is better to teach regpush to support
2893                    parenfloor > PL_regsize... */
2894                 if (parenfloor > (I32)*PL_reglastparen)
2895                     parenfloor = *PL_reglastparen; /* Pessimization... */
2896                 cc.parenfloor = parenfloor;
2897                 cc.cur = -1;
2898                 cc.min = ARG1(scan);
2899                 cc.max  = ARG2(scan);
2900                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2901                 cc.next = next;
2902                 cc.minmod = minmod;
2903                 cc.lastloc = 0;
2904                 PL_reginput = locinput;
2905                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
2906                 if (PL_reg_eval_set){
2907                   SV *pres= GvSV(PL_replgv);
2908                   SvREFCNT_inc(pres);
2909                   regcpblow(cp);
2910                   sv_setsv(GvSV(PL_replgv), pres);
2911                   SvREFCNT_dec(pres);
2912                 } else {
2913                   regcpblow(cp);
2914                 }
2915                 PL_regcc = cc.oldcc;
2916                 saySAME(n);
2917             }
2918             /* NOTREACHED */
2919         case WHILEM:
2920             /* Dave says:
2921
2922             st->cc gets initialised by CURLYX ready for use by WHILEM.
2923             So again, unless somethings been corrupted, st->cc cannot
2924             be null at that point in WHILEM.
2925
2926             See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
2927             So we'll assert that this is true:
2928             */
2929             assert(PL_regcc);
2930             {
2931                 /*
2932                  * This is really hard to understand, because after we match
2933                  * what we're trying to match, we must make sure the rest of
2934                  * the REx is going to match for sure, and to do that we have
2935                  * to go back UP the parse tree by recursing ever deeper.  And
2936                  * if it fails, we have to reset our parent's current state
2937                  * that we can try again after backing off.
2938                  */
2939
2940                 CHECKPOINT cp, lastcp;
2941                 CURCUR* cc = PL_regcc;
2942                 char * const lastloc = cc->lastloc; /* Detection of 0-len. */
2943                 I32 cache_offset = 0, cache_bit = 0;
2944
2945                 n = cc->cur + 1;        /* how many we know we matched */
2946                 PL_reginput = locinput;
2947
2948                 DEBUG_r(
2949                     PerlIO_printf(Perl_debug_log,
2950                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
2951                                   REPORT_CODE_OFF+PL_regindent*2, "",
2952                                   (long)n, (long)cc->min,
2953                                   (long)cc->max, PTR2UV(cc))
2954                     );
2955
2956                 /* If degenerate scan matches "", assume scan done. */
2957
2958                 if (locinput == cc->lastloc && n >= cc->min) {
2959                     PL_regcc = cc->oldcc;
2960                     if (PL_regcc)
2961                         ln = PL_regcc->cur;
2962                     DEBUG_r(
2963                         PerlIO_printf(Perl_debug_log,
2964                            "%*s  empty match detected, try continuation...\n",
2965                            REPORT_CODE_OFF+PL_regindent*2, "")
2966                         );
2967                     if (regmatch(cc->next))
2968                         sayYES;
2969                     if (PL_regcc)
2970                         PL_regcc->cur = ln;
2971                     PL_regcc = cc;
2972                     sayNO;
2973                 }
2974
2975                 /* First just match a string of min scans. */
2976
2977                 if (n < cc->min) {
2978                     cc->cur = n;
2979                     cc->lastloc = locinput;
2980                     if (regmatch(cc->scan))
2981                         sayYES;
2982                     cc->cur = n - 1;
2983                     cc->lastloc = lastloc;
2984                     sayNO;
2985                 }
2986
2987                 if (scan->flags) {
2988                     /* Check whether we already were at this position.
2989                         Postpone detection until we know the match is not
2990                         *that* much linear. */
2991                 if (!PL_reg_maxiter) {
2992                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2993                     /* possible overflow for long strings and many CURLYX's */
2994                     if (PL_reg_maxiter < 0)
2995                         PL_reg_maxiter = I32_MAX;
2996                     PL_reg_leftiter = PL_reg_maxiter;
2997                 }
2998                 if (PL_reg_leftiter-- == 0) {
2999                     const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
3000                     if (PL_reg_poscache) {
3001                         if ((I32)PL_reg_poscache_size < size) {
3002                             Renew(PL_reg_poscache, size, char);
3003                             PL_reg_poscache_size = size;
3004                         }
3005                         Zero(PL_reg_poscache, size, char);
3006                     }
3007                     else {
3008                         PL_reg_poscache_size = size;
3009                         Newxz(PL_reg_poscache, size, char);
3010                     }
3011                     DEBUG_r(
3012                         PerlIO_printf(Perl_debug_log,
3013               "%sDetected a super-linear match, switching on caching%s...\n",
3014                                       PL_colors[4], PL_colors[5])
3015                         );
3016                 }
3017                 if (PL_reg_leftiter < 0) {
3018                     cache_offset = locinput - PL_bostr;
3019
3020                     cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3021                             + cache_offset * (scan->flags>>4);
3022                     cache_bit = cache_offset % 8;
3023                     cache_offset /= 8;
3024                     if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
3025                     DEBUG_r(
3026                         PerlIO_printf(Perl_debug_log,
3027                                       "%*s  already tried at this position...\n",
3028                                       REPORT_CODE_OFF+PL_regindent*2, "")
3029                         );
3030                         if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3031                             /* cache records success */
3032                             sayYES;
3033                         else
3034                             /* cache records failure */
3035                             sayNO_SILENT;
3036                     }
3037                     PL_reg_poscache[cache_offset] |= (1<<cache_bit);
3038                 }
3039                 }
3040
3041                 /* Prefer next over scan for minimal matching. */
3042
3043                 if (cc->minmod) {
3044                     PL_regcc = cc->oldcc;
3045                     if (PL_regcc)
3046                         ln = PL_regcc->cur;
3047                     cp = regcppush(cc->parenfloor);
3048                     REGCP_SET(lastcp);
3049                     if (regmatch(cc->next)) {
3050                         regcpblow(cp);
3051                         CACHEsayYES;    /* All done. */
3052                     }
3053                     REGCP_UNWIND(lastcp);
3054                     regcppop();
3055                     if (PL_regcc)
3056                         PL_regcc->cur = ln;
3057                     PL_regcc = cc;
3058
3059                     if (n >= cc->max) { /* Maximum greed exceeded? */
3060                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3061                             && !(PL_reg_flags & RF_warned)) {
3062                             PL_reg_flags |= RF_warned;
3063                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3064                                  "Complex regular subexpression recursion",
3065                                  REG_INFTY - 1);
3066                         }
3067                         CACHEsayNO;
3068                     }
3069
3070                     DEBUG_r(
3071                         PerlIO_printf(Perl_debug_log,
3072                                       "%*s  trying longer...\n",
3073                                       REPORT_CODE_OFF+PL_regindent*2, "")
3074                         );
3075                     /* Try scanning more and see if it helps. */
3076                     PL_reginput = locinput;
3077                     cc->cur = n;
3078                     cc->lastloc = locinput;
3079                     cp = regcppush(cc->parenfloor);
3080                     REGCP_SET(lastcp);
3081                     if (regmatch(cc->scan)) {
3082                         regcpblow(cp);
3083                         CACHEsayYES;
3084                     }
3085                     REGCP_UNWIND(lastcp);
3086                     regcppop();
3087                     cc->cur = n - 1;
3088                     cc->lastloc = lastloc;
3089                     CACHEsayNO;
3090                 }
3091
3092                 /* Prefer scan over next for maximal matching. */
3093
3094                 if (n < cc->max) {      /* More greed allowed? */
3095                     cp = regcppush(cc->parenfloor);
3096                     cc->cur = n;
3097                     cc->lastloc = locinput;
3098                     REGCP_SET(lastcp);
3099                     if (regmatch(cc->scan)) {
3100                         regcpblow(cp);
3101                         CACHEsayYES;
3102                     }
3103                     REGCP_UNWIND(lastcp);
3104                     regcppop();         /* Restore some previous $<digit>s? */
3105                     PL_reginput = locinput;
3106                     DEBUG_r(
3107                         PerlIO_printf(Perl_debug_log,
3108                                       "%*s  failed, try continuation...\n",
3109                                       REPORT_CODE_OFF+PL_regindent*2, "")
3110                         );
3111                 }
3112                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3113                         && !(PL_reg_flags & RF_warned)) {
3114                     PL_reg_flags |= RF_warned;
3115                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3116                          "Complex regular subexpression recursion",
3117                          REG_INFTY - 1);
3118                 }
3119
3120                 /* Failed deeper matches of scan, so see if this one works. */
3121                 PL_regcc = cc->oldcc;
3122                 if (PL_regcc)
3123                     ln = PL_regcc->cur;
3124                 if (regmatch(cc->next))
3125                     CACHEsayYES;
3126                 if (PL_regcc)
3127                     PL_regcc->cur = ln;
3128                 PL_regcc = cc;
3129                 cc->cur = n - 1;
3130                 cc->lastloc = lastloc;
3131                 CACHEsayNO;
3132             }
3133             /* NOTREACHED */
3134         case BRANCHJ:
3135             next = scan + ARG(scan);
3136             if (next == scan)
3137                 next = NULL;
3138             inner = NEXTOPER(NEXTOPER(scan));
3139             goto do_branch;
3140         case BRANCH:
3141             inner = NEXTOPER(scan);
3142           do_branch:
3143             {
3144                 c1 = OP(scan);
3145                 if (!next || OP(next) != c1)    /* No choice. */
3146                     next = inner;       /* Avoid recursion. */
3147                 else {
3148                     const I32 lastparen = *PL_reglastparen;
3149                     /* Put unwinding data on stack */
3150                     const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3151                     re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3152
3153                     uw->prev = unwind;
3154                     unwind = unwind1;
3155                     uw->type = ((c1 == BRANCH)
3156                                 ? RE_UNWIND_BRANCH
3157                                 : RE_UNWIND_BRANCHJ);
3158                     uw->lastparen = lastparen;
3159                     uw->next = next;
3160                     uw->locinput = locinput;
3161                     uw->nextchr = nextchr;
3162 #ifdef DEBUGGING
3163                     uw->regindent = ++PL_regindent;
3164 #endif
3165
3166                     REGCP_SET(uw->lastcp);
3167
3168                     /* Now go into the first branch */
3169                     next = inner;
3170                 }
3171             }
3172             break;
3173         case MINMOD:
3174             minmod = 1;
3175             break;
3176         case CURLYM:
3177         {
3178             I32 l = 0;
3179             I32 matches = 0;
3180             CHECKPOINT lastcp;
3181             I32 maxwanted;
3182
3183             /* We suppose that the next guy does not need
3184                backtracking: in particular, it is of constant non-zero length,
3185                and has no parenths to influence future backrefs. */
3186             ln = ARG1(scan);  /* min to match */
3187             n  = ARG2(scan);  /* max to match */
3188             paren = scan->flags;
3189             if (paren) {
3190                 if (paren > PL_regsize)
3191                     PL_regsize = paren;
3192                 if (paren > (I32)*PL_reglastparen)
3193                     *PL_reglastparen = paren;
3194             }
3195             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3196             if (paren)
3197                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3198             PL_reginput = locinput;
3199             maxwanted = minmod ? ln : n;
3200             if (maxwanted) {
3201                 while (PL_reginput < PL_regeol && matches < maxwanted) {
3202                     if (!regmatch(scan))
3203                         break;
3204                     /* on first match, determine length, l */
3205                     if (!matches++) {
3206                         if (PL_reg_match_utf8) {
3207                             char *s = locinput;
3208                             while (s < PL_reginput) {
3209                                 l++;
3210                                 s += UTF8SKIP(s);
3211                             }
3212                         }
3213                         else {
3214                             l = PL_reginput - locinput;
3215                         }
3216                         if (l == 0) {
3217                             matches = maxwanted;
3218                             break;
3219                         }
3220                     }
3221                     locinput = PL_reginput;
3222                 }
3223             }
3224
3225             PL_reginput = locinput;
3226
3227             if (minmod) {
3228                 minmod = 0;
3229                 if (ln && matches < ln)
3230                     sayNO;
3231                 if (HAS_TEXT(next) || JUMPABLE(next)) {
3232                     regnode *text_node = next;
3233
3234                     if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3235
3236                     if (! HAS_TEXT(text_node)) c1 = c2 = CHRTEST_VOID;
3237                     else {
3238                         if (PL_regkind[(U8)OP(text_node)] == REF) {
3239                             c1 = c2 = CHRTEST_VOID;
3240                             goto assume_ok_MM;
3241                         }
3242                         else { c1 = (U8)*STRING(text_node); }
3243                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3244                             c2 = PL_fold[c1];
3245                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3246                             c2 = PL_fold_locale[c1];
3247                         else
3248                             c2 = c1;
3249                     }
3250                 }
3251                 else
3252                     c1 = c2 = CHRTEST_VOID;
3253             assume_ok_MM:
3254                 REGCP_SET(lastcp);
3255                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3256                     /* If it could work, try it. */
3257                     if (c1 == CHRTEST_VOID ||
3258                         UCHARAT(PL_reginput) == c1 ||
3259                         UCHARAT(PL_reginput) == c2)
3260                     {
3261                         if (paren) {
3262                             if (ln) {
3263                                 PL_regstartp[paren] =
3264                                     HOPc(PL_reginput, -l) - PL_bostr;
3265                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3266                             }
3267                             else
3268                                 PL_regendp[paren] = -1;
3269                         }
3270                         if (regmatch(next))
3271                             sayYES;
3272                         REGCP_UNWIND(lastcp);
3273                     }
3274                     /* Couldn't or didn't -- move forward. */
3275                     PL_reginput = locinput;
3276                     if (regmatch(scan)) {
3277                         ln++;
3278                         locinput = PL_reginput;
3279                     }
3280                     else
3281                         sayNO;
3282                 }
3283             }
3284             else {
3285                 DEBUG_r(
3286                     PerlIO_printf(Perl_debug_log,
3287                                   "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3288                                   (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3289                                   (IV) matches, (IV)l)
3290                     );
3291                 if (matches >= ln) {
3292                     if (HAS_TEXT(next) || JUMPABLE(next)) {
3293                         regnode *text_node = next;
3294
3295                         if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3296
3297                         if (! HAS_TEXT(text_node)) c1 = c2 = CHRTEST_VOID;
3298                         else {
3299                             if (PL_regkind[(U8)OP(text_node)] == REF) {
3300                                 c1 = c2 = CHRTEST_VOID;
3301                                 goto assume_ok_REG;
3302                             }
3303                             else { c1 = (U8)*STRING(text_node); }
3304
3305                             if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3306                                 c2 = PL_fold[c1];
3307                             else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3308                                 c2 = PL_fold_locale[c1];
3309                             else
3310                                 c2 = c1;
3311                         }
3312                     }
3313                     else
3314                         c1 = c2 = CHRTEST_VOID;
3315                 }
3316             assume_ok_REG:
3317                 REGCP_SET(lastcp);
3318                 while (matches >= ln) {
3319                     /* If it could work, try it. */
3320                     if (c1 == CHRTEST_VOID ||
3321                         UCHARAT(PL_reginput) == c1 ||
3322                         UCHARAT(PL_reginput) == c2)
3323                     {
3324                         DEBUG_r(
3325                             PerlIO_printf(Perl_debug_log,
3326                                 "%*s  trying tail with matches=%"IVdf"...\n",
3327                                 (int)(REPORT_CODE_OFF+PL_regindent*2),
3328                                 "", (IV)matches)
3329                             );
3330                         if (paren) {
3331                             if (matches) {
3332                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3333                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3334                             }
3335                             else
3336                                 PL_regendp[paren] = -1;
3337                         }
3338                         if (regmatch(next))
3339                             sayYES;
3340                         REGCP_UNWIND(lastcp);
3341                     }
3342                     /* Couldn't or didn't -- back up. */
3343                     matches--;
3344                     locinput = HOPc(locinput, -l);
3345                     PL_reginput = locinput;
3346                 }
3347             }
3348             sayNO;
3349             /* NOTREACHED */
3350             break;
3351         }
3352         case CURLYN:
3353             paren = scan->flags;        /* Which paren to set */
3354             if (paren > PL_regsize)
3355                 PL_regsize = paren;
3356             if (paren > (I32)*PL_reglastparen)
3357                 *PL_reglastparen = paren;
3358             ln = ARG1(scan);  /* min to match */
3359             n  = ARG2(scan);  /* max to match */
3360             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3361             goto repeat;
3362         case CURLY:
3363             paren = 0;
3364             ln = ARG1(scan);  /* min to match */
3365             n  = ARG2(scan);  /* max to match */
3366             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3367             goto repeat;
3368         case STAR:
3369             ln = 0;
3370             n = REG_INFTY;
3371             scan = NEXTOPER(scan);
3372             paren = 0;
3373             goto repeat;
3374         case PLUS:
3375             ln = 1;
3376             n = REG_INFTY;
3377             scan = NEXTOPER(scan);
3378             paren = 0;
3379           repeat:
3380             /*
3381             * Lookahead to avoid useless match attempts
3382             * when we know what character comes next.
3383             */
3384
3385             /*
3386             * Used to only do .*x and .*?x, but now it allows
3387             * for )'s, ('s and (?{ ... })'s to be in the way
3388             * of the quantifier and the EXACT-like node.  -- japhy
3389             */
3390
3391             if (HAS_TEXT(next) || JUMPABLE(next)) {
3392                 U8 *s;
3393                 regnode *text_node = next;
3394
3395                 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3396
3397                 if (! HAS_TEXT(text_node)) c1 = c2 = CHRTEST_VOID;
3398                 else {
3399                     if (PL_regkind[(U8)OP(text_node)] == REF) {
3400                         c1 = c2 = CHRTEST_VOID;
3401                         goto assume_ok_easy;
3402                     }
3403                     else { s = (U8*)STRING(text_node); }
3404
3405                     if (!UTF) {
3406                         c2 = c1 = *s;
3407                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3408                             c2 = PL_fold[c1];
3409                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3410                             c2 = PL_fold_locale[c1];
3411                     }
3412                     else { /* UTF */
3413                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3414                              STRLEN ulen1, ulen2;
3415                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3416                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3417
3418                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3419                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3420 #ifdef EBCDIC
3421                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
3422                                                     ckWARN(WARN_UTF8) ?
3423                                                     0 : UTF8_ALLOW_ANY);
3424                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
3425                                                     ckWARN(WARN_UTF8) ?
3426                                                     0 : UTF8_ALLOW_ANY);
3427 #else
3428                              c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3429                                                     uniflags);
3430                              c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3431                                                     uniflags);
3432 #endif
3433                         }
3434                         else {
3435                             c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3436                                                      uniflags);
3437                         }
3438                     }
3439                 }
3440             }
3441             else
3442                 c1 = c2 = CHRTEST_VOID;
3443         assume_ok_easy:
3444             PL_reginput = locinput;
3445             if (minmod) {
3446                 CHECKPOINT lastcp;
3447                 minmod = 0;
3448                 if (ln && regrepeat(scan, ln) < ln)
3449                     sayNO;
3450                 locinput = PL_reginput;
3451                 REGCP_SET(lastcp);
3452                 if (c1 != CHRTEST_VOID) {
3453                     char *e; /* Should not check after this */
3454                     char *old = locinput;
3455                     int count = 0;
3456
3457                     if  (n == REG_INFTY) {
3458                         e = PL_regeol - 1;
3459                         if (do_utf8)
3460                             while (UTF8_IS_CONTINUATION(*(U8*)e))
3461                                 e--;
3462                     }
3463                     else if (do_utf8) {
3464                         int m = n - ln;
3465                         for (e = locinput;
3466                              m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3467                             e += UTF8SKIP(e);
3468                     }
3469                     else {
3470                         e = locinput + n - ln;
3471                         if (e >= PL_regeol)
3472                             e = PL_regeol - 1;
3473                     }
3474                     while (1) {
3475                         /* Find place 'next' could work */
3476                         if (!do_utf8) {
3477                             if (c1 == c2) {
3478                                 while (locinput <= e &&
3479                                        UCHARAT(locinput) != c1)
3480                                     locinput++;
3481                             } else {
3482                                 while (locinput <= e
3483                                        && UCHARAT(locinput) != c1
3484                                        && UCHARAT(locinput) != c2)
3485                                     locinput++;
3486                             }
3487                             count = locinput - old;
3488                         }
3489                         else {
3490                             if (c1 == c2) {
3491                                 STRLEN len;
3492                                 /* count initialised to
3493                                  * utf8_distance(old, locinput) */
3494                                 while (locinput <= e &&
3495                                        utf8n_to_uvchr((U8*)locinput,
3496                                                       UTF8_MAXBYTES, &len,
3497                                                       uniflags) != (UV)c1) {
3498                                     locinput += len;
3499                                     count++;
3500                                 }
3501                             } else {
3502                                 /* count initialised to
3503                                  * utf8_distance(old, locinput) */
3504                                 while (locinput <= e) {
3505                                     STRLEN len;
3506                                     const UV c = utf8n_to_uvchr((U8*)locinput,
3507                                                           UTF8_MAXBYTES, &len,
3508                                                           uniflags);
3509                                     if (c == (UV)c1 || c == (UV)c2)
3510                                         break;
3511                                     locinput += len;
3512                                     count++;
3513                                 }
3514                             }
3515                         }
3516                         if (locinput > e)
3517                             sayNO;
3518                         /* PL_reginput == old now */
3519                         if (locinput != old) {
3520                             ln = 1;     /* Did some */
3521                             if (regrepeat(scan, count) < count)
3522                                 sayNO;
3523                         }
3524                         /* PL_reginput == locinput now */
3525                         TRYPAREN(paren, ln, locinput);
3526                         PL_reginput = locinput; /* Could be reset... */
3527                         REGCP_UNWIND(lastcp);
3528                         /* Couldn't or didn't -- move forward. */
3529                         old = locinput;
3530                         if (do_utf8)
3531                             locinput += UTF8SKIP(locinput);
3532                         else
3533                             locinput++;
3534                         count = 1;
3535                     }
3536                 }
3537                 else
3538                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3539                     UV c;
3540                     if (c1 != CHRTEST_VOID) {
3541                         if (do_utf8)
3542                             c = utf8n_to_uvchr((U8*)PL_reginput,
3543                                                UTF8_MAXBYTES, 0,
3544                                                uniflags);
3545                         else
3546                             c = UCHARAT(PL_reginput);
3547                         /* If it could work, try it. */
3548                         if (c == (UV)c1 || c == (UV)c2)
3549                         {
3550                             TRYPAREN(paren, ln, PL_reginput);
3551                             REGCP_UNWIND(lastcp);
3552                         }
3553                     }
3554                     /* If it could work, try it. */
3555                     else if (c1 == CHRTEST_VOID)
3556                     {
3557                         TRYPAREN(paren, ln, PL_reginput);
3558                         REGCP_UNWIND(lastcp);
3559                     }
3560                     /* Couldn't or didn't -- move forward. */
3561                     PL_reginput = locinput;
3562                     if (regrepeat(scan, 1)) {
3563                         ln++;
3564                         locinput = PL_reginput;
3565                     }
3566                     else
3567                         sayNO;
3568                 }
3569             }
3570             else {
3571                 CHECKPOINT lastcp;
3572                 n = regrepeat(scan, n);
3573                 locinput = PL_reginput;
3574                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3575                     ((!PL_multiline && OP(next) != MEOL) ||
3576                         OP(next) == SEOL || OP(next) == EOS))
3577                 {
3578                     ln = n;                     /* why back off? */
3579                     /* ...because $ and \Z can match before *and* after
3580                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3581                        We should back off by one in this case. */
3582                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3583                         ln--;
3584                 }
3585                 REGCP_SET(lastcp);
3586                 {
3587                     UV c = 0;
3588                     while (n >= ln) {
3589                         if (c1 != CHRTEST_VOID) {
3590                             if (do_utf8)
3591                                 c = utf8n_to_uvchr((U8*)PL_reginput,
3592                                                    UTF8_MAXBYTES, 0,
3593                                                    uniflags);
3594                             else
3595                                 c = UCHARAT(PL_reginput);
3596                         }
3597                         /* If it could work, try it. */
3598                         if (c1 == CHRTEST_VOID || c == (UV)c1 || c == (UV)c2)
3599                             {
3600                                 TRYPAREN(paren, n, PL_reginput);
3601                                 REGCP_UNWIND(lastcp);
3602                             }
3603                         /* Couldn't or didn't -- back up. */
3604                         n--;
3605                         PL_reginput = locinput = HOPc(locinput, -1);
3606                     }
3607                 }
3608             }
3609             sayNO;
3610             break;
3611         case END:
3612             if (PL_reg_call_cc) {
3613                 re_cc_state *cur_call_cc = PL_reg_call_cc;
3614                 CURCUR *cctmp = PL_regcc;
3615                 regexp *re = PL_reg_re;
3616                 CHECKPOINT lastcp;
3617                 I32 tmp;
3618
3619                 /* Save *all* the positions. */
3620                 const CHECKPOINT cp = regcppush(0);
3621                 REGCP_SET(lastcp);
3622
3623                 /* Restore parens of the caller. */
3624                 tmp = PL_savestack_ix;
3625                 PL_savestack_ix = PL_reg_call_cc->ss;
3626                 regcppop();
3627                 PL_savestack_ix = tmp;
3628
3629                 /* Make position available to the callcc. */
3630                 PL_reginput = locinput;
3631
3632                 cache_re(PL_reg_call_cc->re);
3633                 PL_regcc = PL_reg_call_cc->cc;
3634                 PL_reg_call_cc = PL_reg_call_cc->prev;
3635                 if (regmatch(cur_call_cc->node)) {
3636                     PL_reg_call_cc = cur_call_cc;
3637                     regcpblow(cp);
3638                     sayYES;
3639                 }
3640                 REGCP_UNWIND(lastcp);
3641                 regcppop();
3642                 PL_reg_call_cc = cur_call_cc;
3643                 PL_regcc = cctmp;
3644                 PL_reg_re = re;
3645                 cache_re(re);
3646
3647                 DEBUG_r(
3648                     PerlIO_printf(Perl_debug_log,
3649                                   "%*s  continuation failed...\n",
3650                                   REPORT_CODE_OFF+PL_regindent*2, "")
3651                     );
3652                 sayNO_SILENT;
3653             }
3654             if (locinput < PL_regtill) {
3655                 DEBUG_r(PerlIO_printf(Perl_debug_log,
3656                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3657                                       PL_colors[4],
3658                                       (long)(locinput - PL_reg_starttry),
3659                                       (long)(PL_regtill - PL_reg_starttry),
3660                                       PL_colors[5]));
3661                 sayNO_FINAL;            /* Cannot match: too short. */
3662             }
3663             PL_reginput = locinput;     /* put where regtry can find it */
3664             sayYES_FINAL;               /* Success! */
3665         case SUCCEED:
3666             PL_reginput = locinput;     /* put where regtry can find it */
3667             sayYES_LOUD;                /* Success! */
3668         case SUSPEND:
3669             n = 1;
3670             PL_reginput = locinput;
3671             goto do_ifmatch;
3672         case UNLESSM:
3673             n = 0;
3674             if (scan->flags) {
3675                 s = HOPBACKc(locinput, scan->flags);
3676                 if (!s)
3677                     goto say_yes;
3678                 PL_reginput = s;
3679             }
3680             else
3681                 PL_reginput = locinput;
3682             goto do_ifmatch;
3683         case IFMATCH:
3684             n = 1;
3685             if (scan->flags) {
3686                 s = HOPBACKc(locinput, scan->flags);
3687                 if (!s)
3688                     goto say_no;
3689                 PL_reginput = s;
3690             }
3691             else
3692                 PL_reginput = locinput;
3693
3694           do_ifmatch:
3695             inner = NEXTOPER(NEXTOPER(scan));
3696             if (regmatch(inner) != n) {
3697               say_no:
3698                 if (logical) {
3699                     logical = 0;
3700                     sw = 0;
3701                     goto do_longjump;
3702                 }
3703                 else
3704                     sayNO;
3705             }
3706           say_yes:
3707             if (logical) {
3708                 logical = 0;
3709                 sw = 1;
3710             }
3711             if (OP(scan) == SUSPEND) {
3712                 locinput = PL_reginput;
3713                 nextchr = UCHARAT(locinput);
3714             }
3715             /* FALL THROUGH. */
3716         case LONGJMP:
3717           do_longjump:
3718             next = scan + ARG(scan);
3719             if (next == scan)
3720                 next = NULL;
3721             break;
3722         default:
3723             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3724                           PTR2UV(scan), OP(scan));
3725             Perl_croak(aTHX_ "regexp memory corruption");
3726         }
3727       reenter:
3728         scan = next;
3729     }
3730
3731     /*
3732     * We get here only if there's trouble -- normally "case END" is
3733     * the terminating point.
3734     */
3735     Perl_croak(aTHX_ "corrupted regexp pointers");
3736     /*NOTREACHED*/
3737     sayNO;
3738
3739 yes_loud:
3740     DEBUG_r(
3741         PerlIO_printf(Perl_debug_log,
3742                       "%*s  %scould match...%s\n",
3743                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3744         );
3745     goto yes;
3746 yes_final:
3747     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3748                           PL_colors[4],PL_colors[5]));
3749 yes:
3750 #ifdef DEBUGGING
3751     PL_regindent--;
3752 #endif
3753
3754 #if 0                                   /* Breaks $^R */
3755     if (unwind)
3756         regcpblow(firstcp);
3757 #endif
3758     return 1;
3759
3760 no:
3761     DEBUG_r(
3762         PerlIO_printf(Perl_debug_log,
3763                       "%*s  %sfailed...%s\n",
3764                       REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3765         );
3766     goto do_no;
3767 no_final:
3768 do_no:
3769     if (unwind) {
3770         re_unwind_t * const uw = SSPTRt(unwind,re_unwind_t);
3771
3772         switch (uw->type) {
3773         case RE_UNWIND_BRANCH:
3774         case RE_UNWIND_BRANCHJ:
3775         {
3776             re_unwind_branch_t * const uwb = &(uw->branch);
3777             const I32 lastparen = uwb->lastparen;
3778
3779             REGCP_UNWIND(uwb->lastcp);
3780             for (n = *PL_reglastparen; n > lastparen; n--)
3781                 PL_regendp[n] = -1;
3782             *PL_reglastparen = n;
3783             scan = next = uwb->next;
3784             if ( !scan ||
3785                  OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3786                               ? BRANCH : BRANCHJ) ) {           /* Failure */
3787                 unwind = uwb->prev;
3788 #ifdef DEBUGGING
3789                 PL_regindent--;
3790 #endif
3791                 goto do_no;
3792             }
3793             /* Have more choice yet.  Reuse the same uwb.  */
3794             if ((n = (uwb->type == RE_UNWIND_BRANCH
3795                       ? NEXT_OFF(next) : ARG(next))))
3796                 next += n;
3797             else
3798                 next = NULL;    /* XXXX Needn't unwinding in this case... */
3799             uwb->next = next;
3800             next = NEXTOPER(scan);
3801             if (uwb->type == RE_UNWIND_BRANCHJ)
3802                 next = NEXTOPER(next);
3803             locinput = uwb->locinput;
3804             nextchr = uwb->nextchr;
3805 #ifdef DEBUGGING
3806             PL_regindent = uwb->regindent;
3807 #endif
3808
3809             goto reenter;
3810         }
3811         /* NOTREACHED */
3812         default:
3813             Perl_croak(aTHX_ "regexp unwind memory corruption");
3814         }
3815         /* NOTREACHED */
3816     }
3817 #ifdef DEBUGGING
3818     PL_regindent--;
3819 #endif
3820     return 0;
3821 }
3822
3823 /*
3824  - regrepeat - repeatedly match something simple, report how many
3825  */
3826 /*
3827  * [This routine now assumes that it will only match on things of length 1.
3828  * That was true before, but now we assume scan - reginput is the count,
3829  * rather than incrementing count on every character.  [Er, except utf8.]]
3830  */
3831 STATIC I32
3832 S_regrepeat(pTHX_ const regnode *p, I32 max)
3833 {
3834     register char *scan;
3835     register I32 c;
3836     register char *loceol = PL_regeol;
3837     register I32 hardcount = 0;
3838     register bool do_utf8 = PL_reg_match_utf8;
3839
3840     scan = PL_reginput;
3841     if (max == REG_INFTY)
3842         max = I32_MAX;
3843     else if (max < loceol - scan)
3844       loceol = scan + max;
3845     switch (OP(p)) {
3846     case REG_ANY:
3847         if (do_utf8) {
3848             loceol = PL_regeol;
3849             while (scan < loceol && hardcount < max && *scan != '\n') {
3850                 scan += UTF8SKIP(scan);
3851                 hardcount++;
3852             }
3853         } else {
3854             while (scan < loceol && *scan != '\n')
3855                 scan++;
3856         }
3857         break;
3858     case SANY:
3859         if (do_utf8) {
3860             loceol = PL_regeol;
3861             while (scan < loceol && hardcount < max) {
3862                 scan += UTF8SKIP(scan);
3863                 hardcount++;
3864             }
3865         }
3866         else
3867             scan = loceol;
3868         break;
3869     case CANY:
3870         scan = loceol;
3871         break;
3872     case EXACT:         /* length of string is 1 */
3873         c = (U8)*STRING(p);
3874         while (scan < loceol && UCHARAT(scan) == c)
3875             scan++;
3876         break;
3877     case EXACTF:        /* length of string is 1 */
3878         c = (U8)*STRING(p);
3879         while (scan < loceol &&
3880                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3881             scan++;
3882         break;
3883     case EXACTFL:       /* length of string is 1 */
3884         PL_reg_flags |= RF_tainted;
3885         c = (U8)*STRING(p);
3886         while (scan < loceol &&
3887                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3888             scan++;
3889         break;
3890     case ANYOF:
3891         if (do_utf8) {
3892             loceol = PL_regeol;
3893             while (hardcount < max && scan < loceol &&
3894                    reginclass(p, (U8*)scan, 0, do_utf8)) {
3895                 scan += UTF8SKIP(scan);
3896                 hardcount++;
3897             }
3898         } else {
3899             while (scan < loceol && REGINCLASS(p, (U8*)scan))
3900                 scan++;
3901         }
3902         break;
3903     case ALNUM:
3904         if (do_utf8) {
3905             loceol = PL_regeol;
3906             LOAD_UTF8_CHARCLASS_ALNUM();
3907             while (hardcount < max && scan < loceol &&
3908                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
3909                 scan += UTF8SKIP(scan);
3910                 hardcount++;
3911             }
3912         } else {
3913             while (scan < loceol && isALNUM(*scan))
3914                 scan++;
3915         }
3916         break;
3917     case ALNUML:
3918         PL_reg_flags |= RF_tainted;
3919         if (do_utf8) {
3920             loceol = PL_regeol;
3921             while (hardcount < max && scan < loceol &&
3922                    isALNUM_LC_utf8((U8*)scan)) {
3923                 scan += UTF8SKIP(scan);
3924                 hardcount++;
3925             }
3926         } else {
3927             while (scan < loceol && isALNUM_LC(*scan))
3928                 scan++;
3929         }
3930         break;
3931     case NALNUM:
3932         if (do_utf8) {
3933             loceol = PL_regeol;
3934             LOAD_UTF8_CHARCLASS_ALNUM();
3935             while (hardcount < max && scan < loceol &&
3936                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
3937                 scan += UTF8SKIP(scan);
3938                 hardcount++;
3939             }
3940         } else {
3941             while (scan < loceol && !isALNUM(*scan))
3942                 scan++;
3943         }
3944         break;
3945     case NALNUML:
3946         PL_reg_flags |= RF_tainted;
3947         if (do_utf8) {
3948             loceol = PL_regeol;
3949             while (hardcount < max && scan < loceol &&
3950                    !isALNUM_LC_utf8((U8*)scan)) {
3951                 scan += UTF8SKIP(scan);
3952                 hardcount++;
3953             }
3954         } else {
3955             while (scan < loceol && !isALNUM_LC(*scan))
3956                 scan++;
3957         }
3958         break;
3959     case SPACE:
3960         if (do_utf8) {
3961             loceol = PL_regeol;
3962             LOAD_UTF8_CHARCLASS_SPACE();
3963             while (hardcount < max && scan < loceol &&
3964                    (*scan == ' ' ||
3965                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
3966                 scan += UTF8SKIP(scan);
3967                 hardcount++;
3968             }
3969         } else {
3970             while (scan < loceol && isSPACE(*scan))
3971                 scan++;
3972         }
3973         break;
3974     case SPACEL:
3975         PL_reg_flags |= RF_tainted;
3976         if (do_utf8) {
3977             loceol = PL_regeol;
3978             while (hardcount < max && scan < loceol &&
3979                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3980                 scan += UTF8SKIP(scan);
3981                 hardcount++;
3982             }
3983         } else {
3984             while (scan < loceol && isSPACE_LC(*scan))
3985                 scan++;
3986         }
3987         break;
3988     case NSPACE:
3989         if (do_utf8) {
3990             loceol = PL_regeol;
3991             LOAD_UTF8_CHARCLASS_SPACE();
3992             while (hardcount < max && scan < loceol &&
3993                    !(*scan == ' ' ||
3994                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
3995                 scan += UTF8SKIP(scan);
3996                 hardcount++;
3997             }
3998         } else {
3999             while (scan < loceol && !isSPACE(*scan))
4000                 scan++;
4001             break;
4002         }
4003     case NSPACEL:
4004         PL_reg_flags |= RF_tainted;
4005         if (do_utf8) {
4006             loceol = PL_regeol;
4007             while (hardcount < max && scan < loceol &&
4008                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4009                 scan += UTF8SKIP(scan);
4010                 hardcount++;
4011             }
4012         } else {
4013             while (scan < loceol && !isSPACE_LC(*scan))
4014                 scan++;
4015         }
4016         break;
4017     case DIGIT:
4018         if (do_utf8) {
4019             loceol = PL_regeol;
4020             LOAD_UTF8_CHARCLASS_DIGIT();
4021             while (hardcount < max && scan < loceol &&
4022                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4023                 scan += UTF8SKIP(scan);
4024                 hardcount++;
4025             }
4026         } else {
4027             while (scan < loceol && isDIGIT(*scan))
4028                 scan++;
4029         }
4030         break;
4031     case NDIGIT:
4032         if (do_utf8) {
4033             loceol = PL_regeol;
4034             LOAD_UTF8_CHARCLASS_DIGIT();
4035             while (hardcount < max && scan < loceol &&
4036                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4037                 scan += UTF8SKIP(scan);
4038                 hardcount++;
4039             }
4040         } else {
4041             while (scan < loceol && !isDIGIT(*scan))
4042                 scan++;
4043         }
4044         break;
4045     default:            /* Called on something of 0 width. */
4046         break;          /* So match right here or not at all. */
4047     }
4048
4049     if (hardcount)
4050         c = hardcount;
4051     else
4052         c = scan - PL_reginput;
4053     PL_reginput = scan;
4054
4055     DEBUG_r(
4056         {
4057                 SV * const prop = sv_newmortal();
4058
4059                 regprop(prop, (regnode *)p);
4060                 PerlIO_printf(Perl_debug_log,
4061                               "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4062                               REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
4063         });
4064
4065     return(c);
4066 }
4067
4068
4069 /*
4070 - regclass_swash - prepare the utf8 swash
4071 */
4072
4073 SV *
4074 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4075 {
4076     SV *sw  = NULL;
4077     SV *si  = NULL;
4078     SV *alt = NULL;
4079
4080     if (PL_regdata && PL_regdata->count) {
4081         const U32 n = ARG(node);
4082
4083         if (PL_regdata->what[n] == 's') {
4084             SV * const rv = (SV*)PL_regdata->data[n];
4085             AV * const av = (AV*)SvRV((SV*)rv);
4086             SV **const ary = AvARRAY(av);
4087             SV **a, **b;
4088
4089             /* See the end of regcomp.c:S_regclass() for
4090              * documentation of these array elements. */
4091
4092             si = *ary;
4093             a  = SvROK(ary[1]) ? &ary[1] : 0;
4094             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4095
4096             if (a)
4097                 sw = *a;
4098             else if (si && doinit) {
4099                 sw = swash_init("utf8", "", si, 1, 0);
4100                 (void)av_store(av, 1, sw);
4101             }
4102             if (b)
4103                 alt = *b;
4104         }
4105     }
4106
4107     if (listsvp)
4108         *listsvp = si;
4109     if (altsvp)
4110         *altsvp  = alt;
4111
4112     return sw;
4113 }
4114
4115 /*
4116  - reginclass - determine if a character falls into a character class
4117
4118   The n is the ANYOF regnode, the p is the target string, lenp
4119   is pointer to the maximum length of how far to go in the p
4120   (if the lenp is zero, UTF8SKIP(p) is used),
4121   do_utf8 tells whether the target string is in UTF-8.
4122
4123  */
4124
4125 STATIC bool
4126 S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
4127 {
4128     const char flags = ANYOF_FLAGS(n);
4129     bool match = FALSE;
4130     UV c = *p;
4131     STRLEN len = 0;
4132     STRLEN plen;
4133
4134     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4135         c = utf8n_to_uvchr((U8 *)p, UTF8_MAXBYTES, &len,
4136                             ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4137                                         UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4138         if (len == (STRLEN)-1)
4139             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4140     }
4141
4142     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4143     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4144         if (lenp)
4145             *lenp = 0;
4146         if (do_utf8 && !ANYOF_RUNTIME(n)) {
4147             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4148                 match = TRUE;
4149         }
4150         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4151             match = TRUE;
4152         if (!match) {
4153             AV *av;
4154             SV * const sw = regclass_swash((regnode *)n, TRUE, 0, (SV**)&av);
4155
4156             if (sw) {
4157                 if (swash_fetch(sw, (U8 *)p, do_utf8))
4158                     match = TRUE;
4159                 else if (flags & ANYOF_FOLD) {
4160                     if (!match && lenp && av) {
4161                         I32 i;
4162                         for (i = 0; i <= av_len(av); i++) {
4163                             SV* const sv = *av_fetch(av, i, FALSE);
4164                             STRLEN len;
4165                             const char * const s = SvPV_const(sv, len);
4166
4167                             if (len <= plen && memEQ(s, (char*)p, len)) {
4168                                 *lenp = len;
4169                                 match = TRUE;
4170                                 break;
4171                             }
4172                         }
4173                     }
4174                     if (!match) {
4175                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4176                         STRLEN tmplen;
4177
4178                         to_utf8_fold((U8 *)p, tmpbuf, &tmplen);
4179                         if (swash_fetch(sw, tmpbuf, do_utf8))
4180                             match = TRUE;
4181                     }
4182                 }
4183             }
4184         }
4185         if (match && lenp && *lenp == 0)
4186             *lenp = UNISKIP(NATIVE_TO_UNI(c));
4187     }
4188     if (!match && c < 256) {
4189         if (ANYOF_BITMAP_TEST(n, c))
4190             match = TRUE;
4191         else if (flags & ANYOF_FOLD) {
4192             U8 f;
4193
4194             if (flags & ANYOF_LOCALE) {
4195                 PL_reg_flags |= RF_tainted;
4196                 f = PL_fold_locale[c];
4197             }
4198             else
4199                 f = PL_fold[c];
4200             if (f != c && ANYOF_BITMAP_TEST(n, f))
4201                 match = TRUE;
4202         }
4203
4204         if (!match && (flags & ANYOF_CLASS)) {
4205             PL_reg_flags |= RF_tainted;
4206             if (
4207                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4208                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4209                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4210                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4211                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4212                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4213                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4214                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4215                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4216                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4217                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4218                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4219                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4220                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4221                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4222                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4223                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4224                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4225                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4226                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4227                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4228                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4229                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4230                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4231                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4232                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4233                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4234                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4235                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4236                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4237                 ) /* How's that for a conditional? */
4238             {
4239                 match = TRUE;
4240             }
4241         }
4242     }
4243
4244     return (flags & ANYOF_INVERT) ? !match : match;
4245 }
4246
4247 STATIC U8 *
4248 S_reghop3(U8 *s, I32 off, const U8* lim)
4249 {
4250     if (off >= 0) {
4251         while (off-- && s < lim) {
4252             /* XXX could check well-formedness here */
4253             s += UTF8SKIP(s);
4254         }
4255     }
4256     else {
4257         while (off++) {
4258             if (s > lim) {
4259                 s--;
4260                 if (UTF8_IS_CONTINUED(*s)) {
4261                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4262                         s--;
4263                 }
4264                 /* XXX could check well-formedness here */
4265             }
4266         }
4267     }
4268     return s;
4269 }
4270
4271 STATIC U8 *
4272 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
4273 {
4274     if (off >= 0) {
4275         while (off-- && s < lim) {
4276             /* XXX could check well-formedness here */
4277             s += UTF8SKIP(s);
4278         }
4279         if (off >= 0)
4280             return 0;
4281     }
4282     else {
4283         while (off++) {
4284             if (s > lim) {
4285                 s--;
4286                 if (UTF8_IS_CONTINUED(*s)) {
4287                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4288                         s--;
4289                 }
4290                 /* XXX could check well-formedness here */
4291             }
4292             else
4293                 break;
4294         }
4295         if (off <= 0)
4296             return 0;
4297     }
4298     return s;
4299 }
4300
4301 static void
4302 restore_pos(pTHX_ void *arg)
4303 {
4304     PERL_UNUSED_ARG(arg);
4305     if (PL_reg_eval_set) {
4306         if (PL_reg_oldsaved) {
4307             PL_reg_re->subbeg = PL_reg_oldsaved;
4308             PL_reg_re->sublen = PL_reg_oldsavedlen;
4309             RX_MATCH_COPIED_on(PL_reg_re);
4310         }
4311         PL_reg_magic->mg_len = PL_reg_oldpos;
4312         PL_reg_eval_set = 0;
4313         PL_curpm = PL_reg_oldcurpm;
4314     }
4315 }
4316
4317 STATIC void
4318 S_to_utf8_substr(pTHX_ register regexp *prog)
4319 {
4320     int i = 1;
4321     do {
4322         if (prog->substrs->data[i].substr
4323             && !prog->substrs->data[i].utf8_substr) {
4324             SV* const sv = newSVsv(prog->substrs->data[i].substr);
4325             prog->substrs->data[i].utf8_substr = sv;
4326             sv_utf8_upgrade(sv);
4327             if (SvVALID(prog->substrs->data[i].substr)) {
4328                 const U8 flags = SvTAIL(prog->substrs->data[i].substr)
4329                     ? FBMcf_TAIL : 0;
4330                 if (flags) {
4331                     /* Trim the trailing \n that fbm_compile added last
4332                        time.  */
4333                     SvCUR_set(sv, SvCUR(sv) - 1);
4334                     /* Whilst this makes the SV technically "invalid" (as its
4335                        buffer is no longer followed by "\0") when fbm_compile()
4336                        adds the "\n" back, a "\0" is restored.  */
4337                 }
4338                 fbm_compile(sv, flags);
4339             }
4340             if (prog->substrs->data[i].substr == prog->check_substr)
4341                 prog->check_utf8 = sv;
4342         }
4343     } while (i--);
4344 }
4345
4346 STATIC void
4347 S_to_byte_substr(pTHX_ register regexp *prog)
4348 {
4349     int i = 1;
4350     do {
4351         if (prog->substrs->data[i].utf8_substr
4352             && !prog->substrs->data[i].substr) {
4353             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
4354             if (sv_utf8_downgrade(sv, TRUE)) {
4355                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
4356                     const U8 flags = SvTAIL(prog->substrs->data[i].utf8_substr)
4357                         ? FBMcf_TAIL : 0;
4358                     if (flags) {
4359                         /* Trim the trailing \n that fbm_compile added last
4360                            time.  */
4361                         SvCUR_set(sv, SvCUR(sv) - 1);
4362                     }
4363                     fbm_compile(sv, flags);
4364                 }
4365             } else {
4366                 SvREFCNT_dec(sv);
4367                 sv = &PL_sv_undef;
4368             }
4369             prog->substrs->data[i].substr = sv;
4370             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
4371                 prog->check_substr = sv;
4372         }
4373     } while (i--);
4374 }
4375
4376 /*
4377  * Local variables:
4378  * c-indentation-style: bsd
4379  * c-basic-offset: 4
4380  * indent-tabs-mode: t
4381  * End:
4382  *
4383  * ex: set ts=8 sts=4 sw=4 noet:
4384  */