This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Perl_croak() needs an aTHX_ in PerlIO_vsprintf().
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e 33#ifdef PERL_EXT_RE_BUILD
54df2634 34#include "re_top.h"
9041c2e3 35#endif
56953603 36
a687059c 37/*
e50aee73 38 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
39 *
40 * Copyright (c) 1986 by University of Toronto.
41 * Written by Henry Spencer. Not derived from licensed software.
42 *
43 * Permission is granted to anyone to use this software for any
44 * purpose on any computer system, and to redistribute it freely,
45 * subject to the following restrictions:
46 *
47 * 1. The author is not responsible for the consequences of use of
48 * this software, no matter how awful, even if they arise
49 * from defects in it.
50 *
51 * 2. The origin of this software must not be misrepresented, either
52 * by explicit claim or by omission.
53 *
54 * 3. Altered versions must be plainly marked as such, and must not
55 * be misrepresented as being the original software.
56 *
57 **** Alterations to Henry's code are...
58 ****
4bb101f2 59 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 60 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 61 ****
9ef589d8
LW
62 **** You may distribute under the terms of either the GNU General Public
63 **** License or the Artistic License, as specified in the README file.
a687059c
LW
64 *
65 * Beware that some of this code is subtly aware of the way operator
66 * precedence is structured in regular expressions. Serious changes in
67 * regular-expression syntax might require a total rethink.
68 */
69#include "EXTERN.h"
864dbfa3 70#define PERL_IN_REGEXEC_C
a687059c 71#include "perl.h"
0f5d15d6 72
54df2634
NC
73#ifdef PERL_IN_XSUB_RE
74# include "re_comp.h"
75#else
76# include "regcomp.h"
77#endif
a687059c 78
c277df42
IZ
79#define RF_tainted 1 /* tainted information used? */
80#define RF_warned 2 /* warned about big count? */
ce862d02 81#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
82#define RF_utf8 8 /* String contains multibyte chars? */
83
eb160463 84#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
85
86#define RS_init 1 /* eval environment created */
87#define RS_set 2 /* replsv value is set */
c277df42 88
a687059c
LW
89#ifndef STATIC
90#define STATIC static
91#endif
92
32fc9b6a 93#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
33b8afdf 99#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 100#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 101
52657f30
AL
102#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
104 : (U8*)(pos + off)))
105#define HOPBACKc(pos, off) ((char*) \
106 ((PL_reg_match_utf8) \
b9ea4ed6 107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
efb30f32
HS
108 : (pos - off >= PL_bostr) \
109 ? (U8*)(pos - off) \
52657f30 110 : (U8*)NULL) \
efb30f32 111)
efb30f32 112
53c4c00c 113#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 114#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 115
1a4fad37
AL
116#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
117 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
118#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
119#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
120#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
121#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 122
5f80c4cf 123/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
124#define JUMPABLE(rn) ( \
125 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
126 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
127 OP(rn) == PLUS || OP(rn) == MINMOD || \
128 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
129)
130
cca55fe3
JP
131#define HAS_TEXT(rn) ( \
132 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
133)
e2d8ce26 134
a84d97b6
HS
135/*
136 Search for mandatory following text node; for lookahead, the text must
137 follow but for lookbehind (rn->flags != 0) we skip to the next step.
138*/
cca55fe3 139#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 140 while (JUMPABLE(rn)) \
a84d97b6 141 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 142 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
143 else if (OP(rn) == PLUS) \
144 rn = NEXTOPER(rn); \
a84d97b6
HS
145 else if (OP(rn) == IFMATCH) \
146 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 147 else rn += NEXT_OFF(rn); \
5f80c4cf 148} STMT_END
74750237 149
acfe0abc 150static void restore_pos(pTHX_ void *arg);
51371543 151
76e3520e 152STATIC CHECKPOINT
cea2e8a9 153S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 154{
97aff369 155 dVAR;
a3b680e6 156 const int retval = PL_savestack_ix;
b1ce53c5 157#define REGCP_PAREN_ELEMS 4
a3b680e6 158 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
159 int p;
160
e49a9654
IH
161 if (paren_elems_to_push < 0)
162 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
163
a01268b5 164#define REGCP_OTHER_ELEMS 6
4b3c1a47 165 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 166 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 167/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
168 SSPUSHINT(PL_regendp[p]);
169 SSPUSHINT(PL_regstartp[p]);
3280af22 170 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
171 SSPUSHINT(p);
172 }
b1ce53c5 173/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
174 SSPUSHINT(PL_regsize);
175 SSPUSHINT(*PL_reglastparen);
a01268b5 176 SSPUSHINT(*PL_reglastcloseparen);
3280af22 177 SSPUSHPTR(PL_reginput);
41123dfd
JH
178#define REGCP_FRAME_ELEMS 2
179/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
180 * are needed for the regexp context stack bookkeeping. */
181 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 182 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 183
a0d0e21e
LW
184 return retval;
185}
186
c277df42 187/* These are needed since we do not localize EVAL nodes: */
a3621e74 188# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 190 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 191
a3621e74 192# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 193 PerlIO_printf(Perl_debug_log, \
faccc32b 194 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 195 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 196
76e3520e 197STATIC char *
097eb12c 198S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 199{
97aff369 200 dVAR;
b1ce53c5 201 I32 i;
a0d0e21e 202 char *input;
b1ce53c5 203
a3621e74
YO
204 GET_RE_DEBUG_FLAGS_DECL;
205
b1ce53c5 206 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 207 i = SSPOPINT;
b1ce53c5
JH
208 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
209 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 210 input = (char *) SSPOPPTR;
a01268b5 211 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
212 *PL_reglastparen = SSPOPINT;
213 PL_regsize = SSPOPINT;
b1ce53c5
JH
214
215 /* Now restore the parentheses context. */
41123dfd
JH
216 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
217 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 218 I32 tmps;
097eb12c 219 U32 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
a3621e74 225 DEBUG_EXECUTE_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
a3621e74 234 DEBUG_EXECUTE_r(
bb7a0f54 235 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b 237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 238 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
bb7a0f54 252 for (i = *PL_reglastparen + 1; (U32)i <= rex->nparens; i++) {
097eb12c
AL
253 if (i > PL_regsize)
254 PL_regstartp[i] = -1;
255 PL_regendp[i] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
02db2b7b 261#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 262
95b24440 263#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
264 if (paren) { \
265 if (n) { \
266 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
267 PL_regendp[paren] = input - PL_bostr; \
268 } \
269 else \
270 PL_regendp[paren] = -1; \
271 } \
95b24440
DM
272 REGMATCH(next, where); \
273 if (result) \
29d1e993
HS
274 sayYES; \
275 if (paren && n) \
276 PL_regendp[paren] = -1; \
277}
278
279
a687059c 280/*
e50aee73 281 * pregexec and friends
a687059c
LW
282 */
283
76234dfb 284#ifndef PERL_IN_XSUB_RE
a687059c 285/*
c277df42 286 - pregexec - match a regexp against a string
a687059c 287 */
c277df42 288I32
864dbfa3 289Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 290 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
291/* strend: pointer to null at end of string */
292/* strbeg: real beginning of string */
293/* minend: end of match must be >=minend after stringarg. */
294/* nosave: For optimizations. */
295{
296 return
9041c2e3 297 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
298 nosave ? 0 : REXEC_COPY_STR);
299}
76234dfb 300#endif
22e551b9 301
9041c2e3 302/*
cad2e5aa
JH
303 * Need to implement the following flags for reg_anch:
304 *
305 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
306 * USE_INTUIT_ML
307 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
308 * INTUIT_AUTORITATIVE_ML
309 * INTUIT_ONCE_NOML - Intuit can match in one location only.
310 * INTUIT_ONCE_ML
311 *
312 * Another flag for this function: SECOND_TIME (so that float substrs
313 * with giant delta may be not rechecked).
314 */
315
316/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
317
3f7c398e 318/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
319 Otherwise, only SvCUR(sv) is used to get strbeg. */
320
321/* XXXX We assume that strpos is strbeg unless sv. */
322
6eb5f6b9
JH
323/* XXXX Some places assume that there is a fixed substring.
324 An update may be needed if optimizer marks as "INTUITable"
325 RExen without fixed substrings. Similarly, it is assumed that
326 lengths of all the strings are no more than minlen, thus they
327 cannot come from lookahead.
328 (Or minlen should take into account lookahead.) */
329
2c2d71f5
JH
330/* A failure to find a constant substring means that there is no need to make
331 an expensive call to REx engine, thus we celebrate a failure. Similarly,
332 finding a substring too deep into the string means that less calls to
30944b6d
IZ
333 regtry() should be needed.
334
335 REx compiler's optimizer found 4 possible hints:
336 a) Anchored substring;
337 b) Fixed substring;
338 c) Whether we are anchored (beginning-of-line or \G);
339 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 340 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
341 string which does not contradict any of them.
342 */
2c2d71f5 343
6eb5f6b9
JH
344/* Most of decisions we do here should have been done at compile time.
345 The nodes of the REx which we used for the search should have been
346 deleted from the finite automaton. */
347
cad2e5aa
JH
348char *
349Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
350 char *strend, U32 flags, re_scream_pos_data *data)
351{
97aff369 352 dVAR;
b7953727 353 register I32 start_shift = 0;
cad2e5aa 354 /* Should be nonnegative! */
b7953727 355 register I32 end_shift = 0;
2c2d71f5
JH
356 register char *s;
357 register SV *check;
a1933d95 358 char *strbeg;
cad2e5aa 359 char *t;
a3b680e6 360 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 361 I32 ml_anch;
bd61b366
SS
362 register char *other_last = NULL; /* other substr checked before this */
363 char *check_at = NULL; /* check substr found at this pos */
1df70142 364 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 365#ifdef DEBUGGING
890ce7af
AL
366 const char * const i_strpos = strpos;
367 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 368#endif
a3621e74
YO
369
370 GET_RE_DEBUG_FLAGS_DECL;
371
a30b2f1f 372 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 373
b8d68ded 374 if (prog->reganch & ROPT_UTF8) {
a3621e74 375 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
376 "UTF-8 regex...\n"));
377 PL_reg_flags |= RF_utf8;
378 }
379
a3621e74 380 DEBUG_EXECUTE_r({
1df70142 381 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
382 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
383 strpos;
1df70142 384 const int len = PL_reg_match_utf8 ?
bb7a0f54 385 (int)strlen(s) : strend - strpos;
2a782b5b
JH
386 if (!PL_colorset)
387 reginitcolors();
b8d68ded 388 if (PL_reg_match_utf8)
a3621e74 389 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 390 "UTF-8 target...\n"));
2a782b5b 391 PerlIO_printf(Perl_debug_log,
a0288114 392 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 393 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
394 prog->precomp,
395 PL_colors[1],
396 (strlen(prog->precomp) > 60 ? "..." : ""),
397 PL_colors[0],
398 (int)(len > 60 ? 60 : len),
399 s, PL_colors[1],
400 (len > 60 ? "..." : "")
401 );
402 });
cad2e5aa 403
c344f387
JH
404 /* CHR_DIST() would be more correct here but it makes things slow. */
405 if (prog->minlen > strend - strpos) {
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 407 "String too short... [re_intuit_start]\n"));
cad2e5aa 408 goto fail;
2c2d71f5 409 }
a1933d95 410 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 411 PL_regeol = strend;
33b8afdf
JH
412 if (do_utf8) {
413 if (!prog->check_utf8 && prog->check_substr)
414 to_utf8_substr(prog);
415 check = prog->check_utf8;
416 } else {
417 if (!prog->check_substr && prog->check_utf8)
418 to_byte_substr(prog);
419 check = prog->check_substr;
420 }
421 if (check == &PL_sv_undef) {
a3621e74 422 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
423 "Non-utf string cannot match utf check string\n"));
424 goto fail;
425 }
2c2d71f5 426 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
427 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
428 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 429 && !multiline ) ); /* Check after \n? */
cad2e5aa 430
7e25d62c
JH
431 if (!ml_anch) {
432 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
433 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 434 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
435 && sv && !SvROK(sv)
436 && (strpos != strbeg)) {
a3621e74 437 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
438 goto fail;
439 }
440 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 441 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 442 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
443 I32 slen;
444
1aa99e6b 445 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
446 if (SvTAIL(check)) {
447 slen = SvCUR(check); /* >= 1 */
cad2e5aa 448
9041c2e3 449 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 450 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 451 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 452 goto fail_finish;
cad2e5aa
JH
453 }
454 /* Now should match s[0..slen-2] */
455 slen--;
3f7c398e 456 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 457 || (slen > 1
3f7c398e 458 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 459 report_neq:
a3621e74 460 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
461 goto fail_finish;
462 }
cad2e5aa 463 }
3f7c398e 464 else if (*SvPVX_const(check) != *s
653099ff 465 || ((slen = SvCUR(check)) > 1
3f7c398e 466 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 467 goto report_neq;
c315bfe8 468 check_at = s;
2c2d71f5 469 goto success_at_start;
7e25d62c 470 }
cad2e5aa 471 }
2c2d71f5 472 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 473 s = strpos;
2c2d71f5 474 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 475 end_shift = prog->minlen - start_shift -
653099ff 476 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 477 if (!ml_anch) {
a3b680e6 478 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 479 - (SvTAIL(check) != 0);
a3b680e6 480 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
481
482 if (end_shift < eshift)
483 end_shift = eshift;
484 }
cad2e5aa 485 }
2c2d71f5 486 else { /* Can match at random position */
cad2e5aa
JH
487 ml_anch = 0;
488 s = strpos;
2c2d71f5
JH
489 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
490 /* Should be nonnegative! */
491 end_shift = prog->minlen - start_shift -
653099ff 492 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
493 }
494
2c2d71f5 495#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 496 if (end_shift < 0)
6bbae5e6 497 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
498#endif
499
2c2d71f5
JH
500 restart:
501 /* Find a possible match in the region s..strend by looking for
502 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 503 if (flags & REXEC_SCREAM) {
cad2e5aa 504 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 505 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 506
2c2d71f5
JH
507 if (PL_screamfirst[BmRARE(check)] >= 0
508 || ( BmRARE(check) == '\n'
509 && (BmPREVIOUS(check) == SvCUR(check) - 1)
510 && SvTAIL(check) ))
9041c2e3 511 s = screaminstr(sv, check,
2c2d71f5 512 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 513 else
2c2d71f5 514 goto fail_finish;
4addbd3b
HS
515 /* we may be pointing at the wrong string */
516 if (s && RX_MATCH_COPIED(prog))
3f7c398e 517 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
518 if (data)
519 *data->scream_olds = s;
520 }
f33976b4 521 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
522 s = fbm_instr((U8*)(s + start_shift),
523 (U8*)(strend - end_shift),
7fba1cd6 524 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 525 else
1aa99e6b
IH
526 s = fbm_instr(HOP3(s, start_shift, strend),
527 HOP3(strend, -end_shift, strbeg),
7fba1cd6 528 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
529
530 /* Update the count-of-usability, remove useless subpatterns,
531 unshift s. */
2c2d71f5 532
a0288114 533 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 534 (s ? "Found" : "Did not find"),
33b8afdf 535 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 536 PL_colors[0],
7b0972df 537 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 538 SvPVX_const(check),
2c2d71f5
JH
539 PL_colors[1], (SvTAIL(check) ? "$" : ""),
540 (s ? " at offset " : "...\n") ) );
541
542 if (!s)
543 goto fail_finish;
544
6eb5f6b9
JH
545 check_at = s;
546
2c2d71f5 547 /* Finish the diagnostic message */
a3621e74 548 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
549
550 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
551 Start with the other substr.
552 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 553 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
554 *always* match. Probably should be marked during compile...
555 Probably it is right to do no SCREAM here...
556 */
557
33b8afdf 558 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 559 /* Take into account the "other" substring. */
2c2d71f5
JH
560 /* XXXX May be hopelessly wrong for UTF... */
561 if (!other_last)
6eb5f6b9 562 other_last = strpos;
33b8afdf 563 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
564 do_other_anchored:
565 {
890ce7af
AL
566 char * const last = HOP3c(s, -start_shift, strbeg);
567 char *last1, *last2;
2c2d71f5 568 char *s1 = s;
33b8afdf 569 SV* must;
2c2d71f5 570
2c2d71f5
JH
571 t = s - prog->check_offset_max;
572 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 573 && (!do_utf8
0ce71af7 574 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 575 && t > strpos)))
6f207bd3 576 NOOP;
2c2d71f5
JH
577 else
578 t = strpos;
1aa99e6b 579 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
580 if (t < other_last) /* These positions already checked */
581 t = other_last;
1aa99e6b 582 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
583 if (last < last1)
584 last1 = last;
585 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
586 /* On end-of-str: see comment below. */
33b8afdf
JH
587 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
588 if (must == &PL_sv_undef) {
589 s = (char*)NULL;
a3621e74 590 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
591 }
592 else
593 s = fbm_instr(
594 (unsigned char*)t,
595 HOP3(HOP3(last1, prog->anchored_offset, strend)
596 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
597 must,
7fba1cd6 598 multiline ? FBMrf_MULTILINE : 0
33b8afdf 599 );
a3621e74 600 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 601 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
33b8afdf
JH
604 (int)(SvCUR(must)
605 - (SvTAIL(must)!=0)),
3f7c398e 606 SvPVX_const(must),
33b8afdf 607 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
608 if (!s) {
609 if (last1 >= last2) {
a3621e74 610 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
611 ", giving up...\n"));
612 goto fail_finish;
613 }
a3621e74 614 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 615 ", trying floating at offset %ld...\n",
1aa99e6b
IH
616 (long)(HOP3c(s1, 1, strend) - i_strpos)));
617 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
618 s = HOP3c(last, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
a3621e74 622 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
1aa99e6b
IH
624 t = HOP3c(s, -prog->anchored_offset, strbeg);
625 other_last = HOP3c(s, 1, strend);
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
30944b6d 631 }
2c2d71f5
JH
632 }
633 else { /* Take into account the floating substring. */
33b8afdf
JH
634 char *last, *last1;
635 char *s1 = s;
636 SV* must;
637
638 t = HOP3c(s, -start_shift, strbeg);
639 last1 = last =
640 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
641 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
642 last = HOP3c(t, prog->float_max_offset, strend);
643 s = HOP3c(t, prog->float_min_offset, strend);
644 if (s < other_last)
645 s = other_last;
2c2d71f5 646 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
647 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
648 /* fbm_instr() takes into account exact value of end-of-str
649 if the check is SvTAIL(ed). Since false positives are OK,
650 and end-of-str is not later than strend we are OK. */
651 if (must == &PL_sv_undef) {
652 s = (char*)NULL;
a3621e74 653 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
654 }
655 else
2c2d71f5 656 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
657 (unsigned char*)last + SvCUR(must)
658 - (SvTAIL(must)!=0),
7fba1cd6 659 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 660 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
661 (s ? "Found" : "Contradicts"),
662 PL_colors[0],
663 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 664 SvPVX_const(must),
33b8afdf
JH
665 PL_colors[1], (SvTAIL(must) ? "$" : "")));
666 if (!s) {
667 if (last1 == last) {
a3621e74 668 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
669 ", giving up...\n"));
670 goto fail_finish;
2c2d71f5 671 }
a3621e74 672 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
673 ", trying anchored starting at offset %ld...\n",
674 (long)(s1 + 1 - i_strpos)));
675 other_last = last;
676 s = HOP3c(t, 1, strend);
677 goto restart;
678 }
679 else {
a3621e74 680 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
681 (long)(s - i_strpos)));
682 other_last = s; /* Fix this later. --Hugo */
683 s = s1;
684 if (t == strpos)
685 goto try_at_start;
686 goto try_at_offset;
687 }
2c2d71f5 688 }
cad2e5aa 689 }
2c2d71f5
JH
690
691 t = s - prog->check_offset_max;
2c2d71f5 692 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 693 && (!do_utf8
0ce71af7 694 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
1aa99e6b 695 && t > strpos))) {
2c2d71f5
JH
696 /* Fixed substring is found far enough so that the match
697 cannot start at strpos. */
698 try_at_offset:
cad2e5aa 699 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
700 /* Eventually fbm_*() should handle this, but often
701 anchored_offset is not 0, so this check will not be wasted. */
702 /* XXXX In the code below we prefer to look for "^" even in
703 presence of anchored substrings. And we search even
704 beyond the found float position. These pessimizations
705 are historical artefacts only. */
706 find_anchor:
2c2d71f5 707 while (t < strend - prog->minlen) {
cad2e5aa 708 if (*t == '\n') {
4ee3650e 709 if (t < check_at - prog->check_offset_min) {
33b8afdf 710 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
711 /* Since we moved from the found position,
712 we definitely contradict the found anchored
30944b6d
IZ
713 substr. Due to the above check we do not
714 contradict "check" substr.
715 Thus we can arrive here only if check substr
716 is float. Redo checking for "other"=="fixed".
717 */
9041c2e3 718 strpos = t + 1;
a3621e74 719 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 720 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
721 goto do_other_anchored;
722 }
4ee3650e
GS
723 /* We don't contradict the found floating substring. */
724 /* XXXX Why not check for STCLASS? */
cad2e5aa 725 s = t + 1;
a3621e74 726 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 727 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
728 goto set_useful;
729 }
4ee3650e
GS
730 /* Position contradicts check-string */
731 /* XXXX probably better to look for check-string
732 than for "\n", so one should lower the limit for t? */
a3621e74 733 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 734 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 735 other_last = strpos = s = t + 1;
cad2e5aa
JH
736 goto restart;
737 }
738 t++;
739 }
a3621e74 740 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 741 PL_colors[0], PL_colors[1]));
2c2d71f5 742 goto fail_finish;
cad2e5aa 743 }
f5952150 744 else {
a3621e74 745 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 746 PL_colors[0], PL_colors[1]));
f5952150 747 }
cad2e5aa
JH
748 s = t;
749 set_useful:
33b8afdf 750 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
751 }
752 else {
f5952150 753 /* The found string does not prohibit matching at strpos,
2c2d71f5 754 - no optimization of calling REx engine can be performed,
f5952150
GS
755 unless it was an MBOL and we are not after MBOL,
756 or a future STCLASS check will fail this. */
2c2d71f5
JH
757 try_at_start:
758 /* Even in this situation we may use MBOL flag if strpos is offset
759 wrt the start of the string. */
05b4157f 760 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 761 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
762 /* May be due to an implicit anchor of m{.*foo} */
763 && !(prog->reganch & ROPT_IMPLICIT))
764 {
cad2e5aa
JH
765 t = strpos;
766 goto find_anchor;
767 }
a3621e74 768 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 769 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 770 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 771 );
2c2d71f5 772 success_at_start:
30944b6d 773 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
774 && (do_utf8 ? (
775 prog->check_utf8 /* Could be deleted already */
776 && --BmUSEFUL(prog->check_utf8) < 0
777 && (prog->check_utf8 == prog->float_utf8)
778 ) : (
779 prog->check_substr /* Could be deleted already */
780 && --BmUSEFUL(prog->check_substr) < 0
781 && (prog->check_substr == prog->float_substr)
782 )))
66e933ab 783 {
cad2e5aa 784 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 785 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
786 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
787 if (do_utf8 ? prog->check_substr : prog->check_utf8)
788 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
789 prog->check_substr = prog->check_utf8 = NULL; /* disable */
790 prog->float_substr = prog->float_utf8 = NULL; /* clear */
791 check = NULL; /* abort */
cad2e5aa 792 s = strpos;
3cf5c195
IZ
793 /* XXXX This is a remnant of the old implementation. It
794 looks wasteful, since now INTUIT can use many
6eb5f6b9 795 other heuristics. */
cad2e5aa
JH
796 prog->reganch &= ~RE_USE_INTUIT;
797 }
798 else
799 s = strpos;
800 }
801
6eb5f6b9
JH
802 /* Last resort... */
803 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
804 if (prog->regstclass) {
805 /* minlen == 0 is possible if regstclass is \b or \B,
806 and the fixed substr is ''$.
807 Since minlen is already taken into account, s+1 is before strend;
808 accidentally, minlen >= 1 guaranties no false positives at s + 1
809 even for \b or \B. But (minlen? 1 : 0) below assumes that
810 regstclass does not come from lookahead... */
811 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
812 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 813 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 814 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 815 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 816 : 1);
a3b680e6 817 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 818 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 819 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
820 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
821 cl_l, strend)
822 : strend);
6eb5f6b9
JH
823
824 t = s;
3b0527fe 825 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
826 if (!s) {
827#ifdef DEBUGGING
cbbf8932 828 const char *what = NULL;
6eb5f6b9
JH
829#endif
830 if (endpos == strend) {
a3621e74 831 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
832 "Could not match STCLASS...\n") );
833 goto fail;
834 }
a3621e74 835 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 836 "This position contradicts STCLASS...\n") );
653099ff
GS
837 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
838 goto fail;
6eb5f6b9 839 /* Contradict one of substrings */
33b8afdf
JH
840 if (prog->anchored_substr || prog->anchored_utf8) {
841 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 842 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 843 hop_and_restart:
1aa99e6b 844 s = HOP3c(t, 1, strend);
66e933ab
GS
845 if (s + start_shift + end_shift > strend) {
846 /* XXXX Should be taken into account earlier? */
a3621e74 847 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
848 "Could not match STCLASS...\n") );
849 goto fail;
850 }
5e39e1e5
HS
851 if (!check)
852 goto giveup;
a3621e74 853 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 854 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
855 what, (long)(s + start_shift - i_strpos)) );
856 goto restart;
857 }
66e933ab 858 /* Have both, check_string is floating */
6eb5f6b9
JH
859 if (t + start_shift >= check_at) /* Contradicts floating=check */
860 goto retry_floating_check;
861 /* Recheck anchored substring, but not floating... */
9041c2e3 862 s = check_at;
5e39e1e5
HS
863 if (!check)
864 goto giveup;
a3621e74 865 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 866 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
867 (long)(other_last - i_strpos)) );
868 goto do_other_anchored;
869 }
60e71179
GS
870 /* Another way we could have checked stclass at the
871 current position only: */
872 if (ml_anch) {
873 s = t = t + 1;
5e39e1e5
HS
874 if (!check)
875 goto giveup;
a3621e74 876 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 877 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 878 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 879 goto try_at_offset;
66e933ab 880 }
33b8afdf 881 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 882 goto fail;
6eb5f6b9
JH
883 /* Check is floating subtring. */
884 retry_floating_check:
885 t = check_at - start_shift;
a3621e74 886 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
887 goto hop_and_restart;
888 }
b7953727 889 if (t != s) {
a3621e74 890 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 891 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
892 (long)(t - i_strpos), (long)(s - i_strpos))
893 );
894 }
895 else {
a3621e74 896 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
897 "Does not contradict STCLASS...\n");
898 );
899 }
6eb5f6b9 900 }
5e39e1e5 901 giveup:
a3621e74 902 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
903 PL_colors[4], (check ? "Guessed" : "Giving up"),
904 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 905 return s;
2c2d71f5
JH
906
907 fail_finish: /* Substring not found */
33b8afdf
JH
908 if (prog->check_substr || prog->check_utf8) /* could be removed already */
909 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 910 fail:
a3621e74 911 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 912 PL_colors[4], PL_colors[5]));
bd61b366 913 return NULL;
cad2e5aa 914}
9661b544 915
6eb5f6b9 916/* We know what class REx starts with. Try to find this position... */
3b0527fe
DM
917/* if reginfo is NULL, its a dryrun */
918
3c3eec57 919STATIC char *
3b0527fe
DM
920S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
921*strend, const regmatch_info *reginfo)
a687059c 922{
27da23d5 923 dVAR;
1df70142 924 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 925 char *m;
d8093b23 926 STRLEN ln;
5dab1207 927 STRLEN lnc;
078c425b 928 register STRLEN uskip;
d8093b23
G
929 unsigned int c1;
930 unsigned int c2;
6eb5f6b9
JH
931 char *e;
932 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 933 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 934
6eb5f6b9
JH
935 /* We know what class it must start with. */
936 switch (OP(c)) {
6eb5f6b9 937 case ANYOF:
388cc4de 938 if (do_utf8) {
078c425b 939 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
940 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
941 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
942 reginclass(prog, c, (U8*)s, 0, do_utf8) :
943 REGINCLASS(prog, c, (U8*)s)) {
3b0527fe 944 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
945 goto got_it;
946 else
947 tmp = doevery;
948 }
949 else
950 tmp = 1;
078c425b 951 s += uskip;
388cc4de
HS
952 }
953 }
954 else {
955 while (s < strend) {
956 STRLEN skip = 1;
957
32fc9b6a 958 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
959 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
960 /* The assignment of 2 is intentional:
961 * for the folded sharp s, the skip is 2. */
962 (skip = SHARP_S_SKIP))) {
3b0527fe 963 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
964 goto got_it;
965 else
966 tmp = doevery;
967 }
968 else
969 tmp = 1;
970 s += skip;
971 }
a0d0e21e 972 }
6eb5f6b9 973 break;
f33976b4
DB
974 case CANY:
975 while (s < strend) {
3b0527fe 976 if (tmp && (!reginfo || regtry(reginfo, s)))
f33976b4
DB
977 goto got_it;
978 else
979 tmp = doevery;
980 s++;
981 }
982 break;
6eb5f6b9 983 case EXACTF:
5dab1207
NIS
984 m = STRING(c);
985 ln = STR_LEN(c); /* length to match in octets/bytes */
986 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 987 if (UTF) {
a2a2844f 988 STRLEN ulen1, ulen2;
5dab1207 989 U8 *sm = (U8 *) m;
89ebb4a3
JH
990 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
991 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 992 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
993
994 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
995 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
996
89ebb4a3 997 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 998 0, uniflags);
89ebb4a3 999 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1000 0, uniflags);
5dab1207
NIS
1001 lnc = 0;
1002 while (sm < ((U8 *) m + ln)) {
1003 lnc++;
1004 sm += UTF8SKIP(sm);
1005 }
1aa99e6b
IH
1006 }
1007 else {
1008 c1 = *(U8*)m;
1009 c2 = PL_fold[c1];
1010 }
6eb5f6b9
JH
1011 goto do_exactf;
1012 case EXACTFL:
5dab1207
NIS
1013 m = STRING(c);
1014 ln = STR_LEN(c);
1015 lnc = (I32) ln;
d8093b23 1016 c1 = *(U8*)m;
6eb5f6b9
JH
1017 c2 = PL_fold_locale[c1];
1018 do_exactf:
db12adc6 1019 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1020
3b0527fe 1021 if (!reginfo && e < s)
6eb5f6b9 1022 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1023
60a8b682
JH
1024 /* The idea in the EXACTF* cases is to first find the
1025 * first character of the EXACTF* node and then, if
1026 * necessary, case-insensitively compare the full
1027 * text of the node. The c1 and c2 are the first
1028 * characters (though in Unicode it gets a bit
1029 * more complicated because there are more cases
7f16dd3d
JH
1030 * than just upper and lower: one needs to use
1031 * the so-called folding case for case-insensitive
1032 * matching (called "loose matching" in Unicode).
1033 * ibcmp_utf8() will do just that. */
60a8b682 1034
1aa99e6b 1035 if (do_utf8) {
575cac57 1036 UV c, f;
89ebb4a3 1037 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1038 STRLEN len, foldlen;
4ad0818d 1039 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1040 if (c1 == c2) {
5dab1207
NIS
1041 /* Upper and lower of 1st char are equal -
1042 * probably not a "letter". */
1aa99e6b 1043 while (s <= e) {
89ebb4a3 1044 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1045 uniflags);
80aecb99
JH
1046 if ( c == c1
1047 && (ln == len ||
66423254 1048 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1049 m, (char **)0, ln, (bool)UTF))
3b0527fe 1050 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1051 goto got_it;
80aecb99 1052 else {
1df70142 1053 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1054 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1055 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1056 if ( f != c
1057 && (f == c1 || f == c2)
1058 && (ln == foldlen ||
66423254
JH
1059 !ibcmp_utf8((char *) foldbuf,
1060 (char **)0, foldlen, do_utf8,
d07ddd77 1061 m,
eb160463 1062 (char **)0, ln, (bool)UTF))
3b0527fe 1063 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1064 goto got_it;
1065 }
1aa99e6b
IH
1066 s += len;
1067 }
09091399
JH
1068 }
1069 else {
1aa99e6b 1070 while (s <= e) {
89ebb4a3 1071 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1072 uniflags);
80aecb99 1073
60a8b682 1074 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1075 * Note that not all the possible combinations
1076 * are handled here: some of them are handled
1077 * by the standard folding rules, and some of
1078 * them (the character class or ANYOF cases)
1079 * are handled during compiletime in
1080 * regexec.c:S_regclass(). */
880bd946
JH
1081 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1082 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1083 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1084
1085 if ( (c == c1 || c == c2)
1086 && (ln == len ||
66423254 1087 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1088 m, (char **)0, ln, (bool)UTF))
3b0527fe 1089 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1090 goto got_it;
80aecb99 1091 else {
1df70142 1092 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1093 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1094 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1095 if ( f != c
1096 && (f == c1 || f == c2)
1097 && (ln == foldlen ||
a6872d42 1098 !ibcmp_utf8((char *) foldbuf,
66423254 1099 (char **)0, foldlen, do_utf8,
d07ddd77 1100 m,
eb160463 1101 (char **)0, ln, (bool)UTF))
3b0527fe 1102 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1103 goto got_it;
1104 }
1aa99e6b
IH
1105 s += len;
1106 }
09091399 1107 }
1aa99e6b
IH
1108 }
1109 else {
1110 if (c1 == c2)
1111 while (s <= e) {
1112 if ( *(U8*)s == c1
1113 && (ln == 1 || !(OP(c) == EXACTF
1114 ? ibcmp(s, m, ln)
1115 : ibcmp_locale(s, m, ln)))
3b0527fe 1116 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1117 goto got_it;
1118 s++;
1119 }
1120 else
1121 while (s <= e) {
1122 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1123 && (ln == 1 || !(OP(c) == EXACTF
1124 ? ibcmp(s, m, ln)
1125 : ibcmp_locale(s, m, ln)))
3b0527fe 1126 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1127 goto got_it;
1128 s++;
1129 }
b3c9acc1
IZ
1130 }
1131 break;
bbce6d69 1132 case BOUNDL:
3280af22 1133 PL_reg_flags |= RF_tainted;
bbce6d69 1134 /* FALL THROUGH */
a0d0e21e 1135 case BOUND:
ffc61ed2 1136 if (do_utf8) {
12d33761 1137 if (s == PL_bostr)
ffc61ed2
JH
1138 tmp = '\n';
1139 else {
6136c704 1140 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1141 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1142 }
1143 tmp = ((OP(c) == BOUND ?
9041c2e3 1144 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1145 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1146 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1147 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1148 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1149 isALNUM_LC_utf8((U8*)s)))
1150 {
1151 tmp = !tmp;
3b0527fe 1152 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1153 goto got_it;
1154 }
078c425b 1155 s += uskip;
a687059c 1156 }
a0d0e21e 1157 }
667bb95a 1158 else {
12d33761 1159 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1160 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1161 while (s < strend) {
1162 if (tmp ==
1163 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1164 tmp = !tmp;
3b0527fe 1165 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1166 goto got_it;
1167 }
1168 s++;
a0ed51b3 1169 }
a0ed51b3 1170 }
3b0527fe 1171 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1172 goto got_it;
1173 break;
bbce6d69 1174 case NBOUNDL:
3280af22 1175 PL_reg_flags |= RF_tainted;
bbce6d69 1176 /* FALL THROUGH */
a0d0e21e 1177 case NBOUND:
ffc61ed2 1178 if (do_utf8) {
12d33761 1179 if (s == PL_bostr)
ffc61ed2
JH
1180 tmp = '\n';
1181 else {
6136c704 1182 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1183 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1184 }
1185 tmp = ((OP(c) == NBOUND ?
9041c2e3 1186 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1187 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1188 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1189 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1190 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1191 isALNUM_LC_utf8((U8*)s)))
1192 tmp = !tmp;
3b0527fe 1193 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2 1194 goto got_it;
078c425b 1195 s += uskip;
ffc61ed2 1196 }
a0d0e21e 1197 }
667bb95a 1198 else {
12d33761 1199 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1200 tmp = ((OP(c) == NBOUND ?
1201 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1202 while (s < strend) {
1203 if (tmp ==
1204 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1205 tmp = !tmp;
3b0527fe 1206 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1207 goto got_it;
1208 s++;
1209 }
a0ed51b3 1210 }
3b0527fe 1211 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1212 goto got_it;
1213 break;
a0d0e21e 1214 case ALNUM:
ffc61ed2 1215 if (do_utf8) {
1a4fad37 1216 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1217 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1218 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1219 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
bbce6d69 1224 else
ffc61ed2 1225 tmp = 1;
078c425b 1226 s += uskip;
bbce6d69 1227 }
bbce6d69 1228 }
ffc61ed2
JH
1229 else {
1230 while (s < strend) {
1231 if (isALNUM(*s)) {
3b0527fe 1232 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
a0ed51b3 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s++;
a0ed51b3 1240 }
a0ed51b3
LW
1241 }
1242 break;
bbce6d69 1243 case ALNUML:
3280af22 1244 PL_reg_flags |= RF_tainted;
ffc61ed2 1245 if (do_utf8) {
078c425b 1246 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1247 if (isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1248 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a687059c 1253 else
ffc61ed2 1254 tmp = 1;
078c425b 1255 s += uskip;
a0d0e21e 1256 }
a0d0e21e 1257 }
ffc61ed2
JH
1258 else {
1259 while (s < strend) {
1260 if (isALNUM_LC(*s)) {
3b0527fe 1261 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a0ed51b3 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s++;
a0ed51b3 1269 }
a0ed51b3
LW
1270 }
1271 break;
a0d0e21e 1272 case NALNUM:
ffc61ed2 1273 if (do_utf8) {
1a4fad37 1274 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1275 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1276 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1277 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
bbce6d69 1282 else
ffc61ed2 1283 tmp = 1;
078c425b 1284 s += uskip;
bbce6d69 1285 }
bbce6d69 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (!isALNUM(*s)) {
3b0527fe 1290 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
bbce6d69 1301 case NALNUML:
3280af22 1302 PL_reg_flags |= RF_tainted;
ffc61ed2 1303 if (do_utf8) {
078c425b 1304 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1305 if (!isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1306 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a687059c 1311 else
ffc61ed2 1312 tmp = 1;
078c425b 1313 s += uskip;
a687059c 1314 }
a0d0e21e 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (!isALNUM_LC(*s)) {
3b0527fe 1319 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
a0ed51b3 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
a0ed51b3 1327 }
a0ed51b3
LW
1328 }
1329 break;
a0d0e21e 1330 case SPACE:
ffc61ed2 1331 if (do_utf8) {
1a4fad37 1332 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1333 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1334 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
3b0527fe 1335 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0d0e21e 1340 else
ffc61ed2 1341 tmp = 1;
078c425b 1342 s += uskip;
2304df62 1343 }
a0d0e21e 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (isSPACE(*s)) {
3b0527fe 1348 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
bbce6d69 1359 case SPACEL:
3280af22 1360 PL_reg_flags |= RF_tainted;
ffc61ed2 1361 if (do_utf8) {
078c425b 1362 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1363 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
3b0527fe 1364 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
bbce6d69 1369 else
ffc61ed2 1370 tmp = 1;
078c425b 1371 s += uskip;
bbce6d69 1372 }
bbce6d69 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (isSPACE_LC(*s)) {
3b0527fe 1377 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
a0ed51b3 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
a0ed51b3 1385 }
a0ed51b3
LW
1386 }
1387 break;
a0d0e21e 1388 case NSPACE:
ffc61ed2 1389 if (do_utf8) {
1a4fad37 1390 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1391 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1392 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
3b0527fe 1393 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
a0d0e21e 1398 else
ffc61ed2 1399 tmp = 1;
078c425b 1400 s += uskip;
a687059c 1401 }
a0d0e21e 1402 }
ffc61ed2
JH
1403 else {
1404 while (s < strend) {
1405 if (!isSPACE(*s)) {
3b0527fe 1406 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
a0ed51b3 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s++;
a0ed51b3 1414 }
a0ed51b3
LW
1415 }
1416 break;
bbce6d69 1417 case NSPACEL:
3280af22 1418 PL_reg_flags |= RF_tainted;
ffc61ed2 1419 if (do_utf8) {
078c425b 1420 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1421 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
3b0527fe 1422 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
bbce6d69 1427 else
ffc61ed2 1428 tmp = 1;
078c425b 1429 s += uskip;
bbce6d69 1430 }
bbce6d69 1431 }
ffc61ed2
JH
1432 else {
1433 while (s < strend) {
1434 if (!isSPACE_LC(*s)) {
3b0527fe 1435 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
a0ed51b3 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s++;
a0ed51b3 1443 }
a0ed51b3
LW
1444 }
1445 break;
a0d0e21e 1446 case DIGIT:
ffc61ed2 1447 if (do_utf8) {
1a4fad37 1448 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1449 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1450 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1451 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
a0d0e21e 1456 else
ffc61ed2 1457 tmp = 1;
078c425b 1458 s += uskip;
2b69d0c2 1459 }
a0d0e21e 1460 }
ffc61ed2
JH
1461 else {
1462 while (s < strend) {
1463 if (isDIGIT(*s)) {
3b0527fe 1464 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
a0ed51b3 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s++;
a0ed51b3 1472 }
a0ed51b3
LW
1473 }
1474 break;
b8c5462f
JH
1475 case DIGITL:
1476 PL_reg_flags |= RF_tainted;
ffc61ed2 1477 if (do_utf8) {
078c425b 1478 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1479 if (isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1480 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1481 goto got_it;
1482 else
1483 tmp = doevery;
1484 }
b8c5462f 1485 else
ffc61ed2 1486 tmp = 1;
078c425b 1487 s += uskip;
b8c5462f 1488 }
b8c5462f 1489 }
ffc61ed2
JH
1490 else {
1491 while (s < strend) {
1492 if (isDIGIT_LC(*s)) {
3b0527fe 1493 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1494 goto got_it;
1495 else
1496 tmp = doevery;
1497 }
b8c5462f 1498 else
ffc61ed2
JH
1499 tmp = 1;
1500 s++;
b8c5462f 1501 }
b8c5462f
JH
1502 }
1503 break;
a0d0e21e 1504 case NDIGIT:
ffc61ed2 1505 if (do_utf8) {
1a4fad37 1506 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1507 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1508 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1509 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1510 goto got_it;
1511 else
1512 tmp = doevery;
1513 }
a0d0e21e 1514 else
ffc61ed2 1515 tmp = 1;
078c425b 1516 s += uskip;
a687059c 1517 }
a0d0e21e 1518 }
ffc61ed2
JH
1519 else {
1520 while (s < strend) {
1521 if (!isDIGIT(*s)) {
3b0527fe 1522 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1523 goto got_it;
1524 else
1525 tmp = doevery;
1526 }
a0ed51b3 1527 else
ffc61ed2
JH
1528 tmp = 1;
1529 s++;
a0ed51b3 1530 }
a0ed51b3
LW
1531 }
1532 break;
b8c5462f
JH
1533 case NDIGITL:
1534 PL_reg_flags |= RF_tainted;
ffc61ed2 1535 if (do_utf8) {
078c425b 1536 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1537 if (!isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1538 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1539 goto got_it;
1540 else
1541 tmp = doevery;
1542 }
b8c5462f 1543 else
ffc61ed2 1544 tmp = 1;
078c425b 1545 s += uskip;
b8c5462f 1546 }
a0ed51b3 1547 }
ffc61ed2
JH
1548 else {
1549 while (s < strend) {
1550 if (!isDIGIT_LC(*s)) {
3b0527fe 1551 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1552 goto got_it;
1553 else
1554 tmp = doevery;
1555 }
cf93c79d 1556 else
ffc61ed2
JH
1557 tmp = 1;
1558 s++;
b8c5462f 1559 }
b8c5462f
JH
1560 }
1561 break;
b3c9acc1 1562 default:
3c3eec57
GS
1563 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1564 break;
d6a28714 1565 }
6eb5f6b9
JH
1566 return 0;
1567 got_it:
1568 return s;
1569}
1570
1571/*
1572 - regexec_flags - match a regexp against a string
1573 */
1574I32
1575Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1576 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1577/* strend: pointer to null at end of string */
1578/* strbeg: real beginning of string */
1579/* minend: end of match must be >=minend after stringarg. */
1580/* data: May be used for some additional optimizations. */
1581/* nosave: For optimizations. */
1582{
97aff369 1583 dVAR;
6eb5f6b9
JH
1584 register char *s;
1585 register regnode *c;
1586 register char *startpos = stringarg;
6eb5f6b9
JH
1587 I32 minlen; /* must match at least this many chars */
1588 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1589 I32 end_shift = 0; /* Same for the end. */ /* CC */
1590 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1591 char *scream_olds = NULL;
6eb5f6b9 1592 SV* oreplsv = GvSV(PL_replgv);
1df70142 1593 const bool do_utf8 = DO_UTF8(sv);
2757e526 1594 I32 multiline;
2a782b5b 1595#ifdef DEBUGGING
2757e526
JH
1596 SV* dsv0;
1597 SV* dsv1;
2a782b5b 1598#endif
3b0527fe 1599 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1600
1601 GET_RE_DEBUG_FLAGS_DECL;
1602
9d4ba2ae 1603 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1604
1605 /* Be paranoid... */
1606 if (prog == NULL || startpos == NULL) {
1607 Perl_croak(aTHX_ "NULL regexp parameter");
1608 return 0;
1609 }
1610
2757e526 1611 multiline = prog->reganch & PMf_MULTILINE;
3b0527fe 1612 reginfo.prog = prog;
2757e526
JH
1613
1614#ifdef DEBUGGING
1615 dsv0 = PERL_DEBUG_PAD_ZERO(0);
1616 dsv1 = PERL_DEBUG_PAD_ZERO(1);
1617#endif
1618
bac06658
JH
1619 RX_MATCH_UTF8_set(prog, do_utf8);
1620
6eb5f6b9 1621 minlen = prog->minlen;
61a36c01 1622 if (strend - startpos < minlen) {
a3621e74 1623 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1624 "String too short [regexec_flags]...\n"));
1625 goto phooey;
1aa99e6b 1626 }
6eb5f6b9 1627
6eb5f6b9
JH
1628 /* Check validity of program. */
1629 if (UCHARAT(prog->program) != REG_MAGIC) {
1630 Perl_croak(aTHX_ "corrupted regexp program");
1631 }
1632
1633 PL_reg_flags = 0;
1634 PL_reg_eval_set = 0;
1635 PL_reg_maxiter = 0;
1636
1637 if (prog->reganch & ROPT_UTF8)
1638 PL_reg_flags |= RF_utf8;
1639
1640 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1641 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1642 PL_bostr = strbeg;
3b0527fe 1643 reginfo.sv = sv;
6eb5f6b9
JH
1644
1645 /* Mark end of line for $ (and such) */
1646 PL_regeol = strend;
1647
1648 /* see how far we have to get to not match where we matched before */
3b0527fe 1649 reginfo.till = startpos+minend;
6eb5f6b9 1650
6eb5f6b9
JH
1651 /* If there is a "must appear" string, look for it. */
1652 s = startpos;
1653
3b0527fe 1654 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1655 MAGIC *mg;
1656
1657 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
3b0527fe 1658 reginfo.ganch = startpos;
6eb5f6b9
JH
1659 else if (sv && SvTYPE(sv) >= SVt_PVMG
1660 && SvMAGIC(sv)
14befaf4
DM
1661 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1662 && mg->mg_len >= 0) {
3b0527fe 1663 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
6eb5f6b9 1664 if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1665 if (s > reginfo.ganch)
6eb5f6b9 1666 goto phooey;
3b0527fe 1667 s = reginfo.ganch;
6eb5f6b9
JH
1668 }
1669 }
1670 else /* pos() not defined */
3b0527fe 1671 reginfo.ganch = strbeg;
6eb5f6b9
JH
1672 }
1673
a0714e2c 1674 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1675 re_scream_pos_data d;
1676
1677 d.scream_olds = &scream_olds;
1678 d.scream_pos = &scream_pos;
1679 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1680 if (!s) {
a3621e74 1681 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1682 goto phooey; /* not present */
3fa9c3d7 1683 }
6eb5f6b9
JH
1684 }
1685
a3621e74 1686 DEBUG_EXECUTE_r({
1df70142
AL
1687 const char * const s0 = UTF
1688 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1689 UNI_DISPLAY_REGEX)
1690 : prog->precomp;
bb7a0f54 1691 const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
1df70142 1692 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1693 UNI_DISPLAY_REGEX) : startpos;
bb7a0f54 1694 const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1695 if (!PL_colorset)
1696 reginitcolors();
1697 PerlIO_printf(Perl_debug_log,
a0288114 1698 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1699 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1700 len0, len0, s0,
2a782b5b 1701 PL_colors[1],
9e55ce06 1702 len0 > 60 ? "..." : "",
2a782b5b 1703 PL_colors[0],
9e55ce06
JH
1704 (int)(len1 > 60 ? 60 : len1),
1705 s1, PL_colors[1],
1706 (len1 > 60 ? "..." : "")
2a782b5b
JH
1707 );
1708 });
6eb5f6b9
JH
1709
1710 /* Simplest case: anchored match need be tried only once. */
1711 /* [unless only anchor is BOL and multiline is set] */
1712 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
3b0527fe 1713 if (s == startpos && regtry(&reginfo, startpos))
6eb5f6b9 1714 goto got_it;
7fba1cd6 1715 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1716 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1717 {
1718 char *end;
1719
1720 if (minlen)
1721 dontbother = minlen - 1;
1aa99e6b 1722 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1723 /* for multiline we only have to try after newlines */
33b8afdf 1724 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1725 if (s == startpos)
1726 goto after_try;
1727 while (1) {
3b0527fe 1728 if (regtry(&reginfo, s))
6eb5f6b9
JH
1729 goto got_it;
1730 after_try:
1731 if (s >= end)
1732 goto phooey;
1733 if (prog->reganch & RE_USE_INTUIT) {
1734 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1735 if (!s)
1736 goto phooey;
1737 }
1738 else
1739 s++;
1740 }
1741 } else {
1742 if (s > startpos)
1743 s--;
1744 while (s < end) {
1745 if (*s++ == '\n') { /* don't need PL_utf8skip here */
3b0527fe 1746 if (regtry(&reginfo, s))
6eb5f6b9
JH
1747 goto got_it;
1748 }
1749 }
1750 }
1751 }
1752 goto phooey;
1753 } else if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1754 if (regtry(&reginfo, reginfo.ganch))
6eb5f6b9
JH
1755 goto got_it;
1756 goto phooey;
1757 }
1758
1759 /* Messy cases: unanchored match. */
33b8afdf 1760 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1761 /* we have /x+whatever/ */
1762 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1763 char ch;
bf93d4cc
GS
1764#ifdef DEBUGGING
1765 int did_match = 0;
1766#endif
33b8afdf
JH
1767 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1768 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1769 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1770
1aa99e6b 1771 if (do_utf8) {
6eb5f6b9
JH
1772 while (s < strend) {
1773 if (*s == ch) {
a3621e74 1774 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1775 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1776 s += UTF8SKIP(s);
1777 while (s < strend && *s == ch)
1778 s += UTF8SKIP(s);
1779 }
1780 s += UTF8SKIP(s);
1781 }
1782 }
1783 else {
1784 while (s < strend) {
1785 if (*s == ch) {
a3621e74 1786 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1787 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1788 s++;
1789 while (s < strend && *s == ch)
1790 s++;
1791 }
1792 s++;
1793 }
1794 }
a3621e74 1795 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1796 PerlIO_printf(Perl_debug_log,
b7953727
JH
1797 "Did not find anchored character...\n")
1798 );
6eb5f6b9 1799 }
a0714e2c
SS
1800 else if (prog->anchored_substr != NULL
1801 || prog->anchored_utf8 != NULL
1802 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1803 && prog->float_max_offset < strend - s)) {
1804 SV *must;
1805 I32 back_max;
1806 I32 back_min;
1807 char *last;
6eb5f6b9 1808 char *last1; /* Last position checked before */
bf93d4cc
GS
1809#ifdef DEBUGGING
1810 int did_match = 0;
1811#endif
33b8afdf
JH
1812 if (prog->anchored_substr || prog->anchored_utf8) {
1813 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1814 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1815 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1816 back_max = back_min = prog->anchored_offset;
1817 } else {
1818 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1819 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1820 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1821 back_max = prog->float_max_offset;
1822 back_min = prog->float_min_offset;
1823 }
1824 if (must == &PL_sv_undef)
1825 /* could not downgrade utf8 check substring, so must fail */
1826 goto phooey;
1827
1828 last = HOP3c(strend, /* Cannot start after this */
1829 -(I32)(CHR_SVLEN(must)
1830 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1831
1832 if (s > PL_bostr)
1833 last1 = HOPc(s, -1);
1834 else
1835 last1 = s - 1; /* bogus */
1836
a0288114 1837 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1838 check_substr==must. */
1839 scream_pos = -1;
1840 dontbother = end_shift;
1841 strend = HOPc(strend, -dontbother);
1842 while ( (s <= last) &&
9041c2e3 1843 ((flags & REXEC_SCREAM)
1aa99e6b 1844 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1845 end_shift, &scream_pos, 0))
1aa99e6b 1846 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1847 (unsigned char*)strend, must,
7fba1cd6 1848 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1849 /* we may be pointing at the wrong string */
1850 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1851 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1852 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1853 if (HOPc(s, -back_max) > last1) {
1854 last1 = HOPc(s, -back_min);
1855 s = HOPc(s, -back_max);
1856 }
1857 else {
52657f30 1858 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1859
1860 last1 = HOPc(s, -back_min);
52657f30 1861 s = t;
6eb5f6b9 1862 }
1aa99e6b 1863 if (do_utf8) {
6eb5f6b9 1864 while (s <= last1) {
3b0527fe 1865 if (regtry(&reginfo, s))
6eb5f6b9
JH
1866 goto got_it;
1867 s += UTF8SKIP(s);
1868 }
1869 }
1870 else {
1871 while (s <= last1) {
3b0527fe 1872 if (regtry(&reginfo, s))
6eb5f6b9
JH
1873 goto got_it;
1874 s++;
1875 }
1876 }
1877 }
a3621e74 1878 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1879 PerlIO_printf(Perl_debug_log,
a0288114 1880 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1881 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1882 ? "anchored" : "floating"),
1883 PL_colors[0],
1884 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1885 SvPVX_const(must),
b7953727
JH
1886 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1887 );
6eb5f6b9
JH
1888 goto phooey;
1889 }
155aba94 1890 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1891 if (minlen) {
1892 I32 op = (U8)OP(prog->regstclass);
66e933ab 1893 /* don't bother with what can't match */
f14c76ed
RGS
1894 if (PL_regkind[op] != EXACT && op != CANY)
1895 strend = HOPc(strend, -(minlen - 1));
1896 }
a3621e74 1897 DEBUG_EXECUTE_r({
ffc61ed2 1898 SV *prop = sv_newmortal();
cfd0369c
NC
1899 const char *s0;
1900 const char *s1;
9e55ce06
JH
1901 int len0;
1902 int len1;
1903
32fc9b6a 1904 regprop(prog, prop, c);
9e55ce06 1905 s0 = UTF ?
3f7c398e 1906 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1907 UNI_DISPLAY_REGEX) :
cfd0369c 1908 SvPVX_const(prop);
9e55ce06
JH
1909 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1910 s1 = UTF ?
c728cb41 1911 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
bb7a0f54 1912 len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
9e55ce06 1913 PerlIO_printf(Perl_debug_log,
a0288114 1914 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1915 len0, len0, s0,
1916 len1, len1, s1);
ffc61ed2 1917 });
3b0527fe 1918 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1919 goto got_it;
a3621e74 1920 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1921 }
1922 else {
1923 dontbother = 0;
a0714e2c 1924 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1925 /* Trim the end. */
d6a28714 1926 char *last;
33b8afdf
JH
1927 SV* float_real;
1928
1929 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1930 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1931 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1932
1933 if (flags & REXEC_SCREAM) {
33b8afdf 1934 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1935 end_shift, &scream_pos, 1); /* last one */
1936 if (!last)
ffc61ed2 1937 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1938 /* we may be pointing at the wrong string */
1939 else if (RX_MATCH_COPIED(prog))
3f7c398e 1940 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1941 }
d6a28714
JH
1942 else {
1943 STRLEN len;
cfd0369c 1944 const char * const little = SvPV_const(float_real, len);
d6a28714 1945
33b8afdf 1946 if (SvTAIL(float_real)) {
d6a28714
JH
1947 if (memEQ(strend - len + 1, little, len - 1))
1948 last = strend - len + 1;
7fba1cd6 1949 else if (!multiline)
9041c2e3 1950 last = memEQ(strend - len, little, len)
bd61b366 1951 ? strend - len : NULL;
b8c5462f 1952 else
d6a28714
JH
1953 goto find_last;
1954 } else {
1955 find_last:
9041c2e3 1956 if (len)
d6a28714 1957 last = rninstr(s, strend, little, little + len);
b8c5462f 1958 else
a0288114 1959 last = strend; /* matching "$" */
b8c5462f 1960 }
b8c5462f 1961 }
bf93d4cc 1962 if (last == NULL) {
a3621e74 1963 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1964 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1965 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1966 goto phooey; /* Should not happen! */
1967 }
d6a28714
JH
1968 dontbother = strend - last + prog->float_min_offset;
1969 }
1970 if (minlen && (dontbother < minlen))
1971 dontbother = minlen - 1;
1972 strend -= dontbother; /* this one's always in bytes! */
1973 /* We don't know much -- general case. */
1aa99e6b 1974 if (do_utf8) {
d6a28714 1975 for (;;) {
3b0527fe 1976 if (regtry(&reginfo, s))
d6a28714
JH
1977 goto got_it;
1978 if (s >= strend)
1979 break;
b8c5462f 1980 s += UTF8SKIP(s);
d6a28714
JH
1981 };
1982 }
1983 else {
1984 do {
3b0527fe 1985 if (regtry(&reginfo, s))
d6a28714
JH
1986 goto got_it;
1987 } while (s++ < strend);
1988 }
1989 }
1990
1991 /* Failure. */
1992 goto phooey;
1993
1994got_it:
1995 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1996
1997 if (PL_reg_eval_set) {
1998 /* Preserve the current value of $^R */
1999 if (oreplsv != GvSV(PL_replgv))
2000 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2001 restored, the value remains
2002 the same. */
4f639d21 2003 restore_pos(aTHX_ prog);
d6a28714
JH
2004 }
2005
2006 /* make sure $`, $&, $', and $digit will work later */
2007 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2008 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2009 if (flags & REXEC_COPY_STR) {
2010 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2011#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2012 if ((SvIsCOW(sv)
2013 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2014 if (DEBUG_C_TEST) {
2015 PerlIO_printf(Perl_debug_log,
2016 "Copy on write: regexp capture, type %d\n",
2017 (int) SvTYPE(sv));
2018 }
2019 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2020 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2021 assert (SvPOKp(prog->saved_copy));
2022 } else
2023#endif
2024 {
2025 RX_MATCH_COPIED_on(prog);
2026 s = savepvn(strbeg, i);
2027 prog->subbeg = s;
2028 }
d6a28714 2029 prog->sublen = i;
d6a28714
JH
2030 }
2031 else {
2032 prog->subbeg = strbeg;
2033 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2034 }
2035 }
9041c2e3 2036
d6a28714
JH
2037 return 1;
2038
2039phooey:
a3621e74 2040 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2041 PL_colors[4], PL_colors[5]));
d6a28714 2042 if (PL_reg_eval_set)
4f639d21 2043 restore_pos(aTHX_ prog);
d6a28714
JH
2044 return 0;
2045}
2046
2047/*
2048 - regtry - try match at specific point
2049 */
2050STATIC I32 /* 0 failure, 1 success */
3b0527fe 2051S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
d6a28714 2052{
97aff369 2053 dVAR;
d6a28714
JH
2054 register I32 *sp;
2055 register I32 *ep;
2056 CHECKPOINT lastcp;
3b0527fe 2057 regexp *prog = reginfo->prog;
a3621e74 2058 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2059
02db2b7b
IZ
2060#ifdef DEBUGGING
2061 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2062#endif
d6a28714
JH
2063 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2064 MAGIC *mg;
2065
2066 PL_reg_eval_set = RS_init;
a3621e74 2067 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2068 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2069 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2070 ));
e8347627 2071 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2072 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2073 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2074 SAVETMPS;
2075 /* Apparently this is not needed, judging by wantarray. */
e8347627 2076 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2077 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2078
3b0527fe 2079 if (reginfo->sv) {
d6a28714 2080 /* Make $_ available to executed code. */
3b0527fe 2081 if (reginfo->sv != DEFSV) {
59f00321 2082 SAVE_DEFSV;
3b0527fe 2083 DEFSV = reginfo->sv;
b8c5462f 2084 }
d6a28714 2085
3b0527fe
DM
2086 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2087 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2088 /* prepare for quick setting of pos */
d300d9fa
NC
2089#ifdef PERL_OLD_COPY_ON_WRITE
2090 if (SvIsCOW(sv))
2091 sv_force_normal_flags(sv, 0);
2092#endif
3b0527fe 2093 mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global,
d300d9fa 2094 &PL_vtbl_mglob, NULL, 0);
d6a28714 2095 mg->mg_len = -1;
b8c5462f 2096 }
d6a28714
JH
2097 PL_reg_magic = mg;
2098 PL_reg_oldpos = mg->mg_len;
4f639d21 2099 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2100 }
09687e5a 2101 if (!PL_reg_curpm) {
a02a5408 2102 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2103#ifdef USE_ITHREADS
2104 {
2105 SV* repointer = newSViv(0);
577e12cc 2106 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2107 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2108 av_push(PL_regex_padav,repointer);
2109 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2110 PL_regex_pad = AvARRAY(PL_regex_padav);
2111 }
2112#endif
2113 }
aaa362c4 2114 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2115 PL_reg_oldcurpm = PL_curpm;
2116 PL_curpm = PL_reg_curpm;
2117 if (RX_MATCH_COPIED(prog)) {
2118 /* Here is a serious problem: we cannot rewrite subbeg,
2119 since it may be needed if this match fails. Thus
2120 $` inside (?{}) could fail... */
2121 PL_reg_oldsaved = prog->subbeg;
2122 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2123#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2124 PL_nrs = prog->saved_copy;
2125#endif
d6a28714
JH
2126 RX_MATCH_COPIED_off(prog);
2127 }
2128 else
bd61b366 2129 PL_reg_oldsaved = NULL;
d6a28714
JH
2130 prog->subbeg = PL_bostr;
2131 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2132 }
973dddac 2133 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2134 PL_reginput = startpos;
2135 PL_regstartp = prog->startp;
2136 PL_regendp = prog->endp;
2137 PL_reglastparen = &prog->lastparen;
a01268b5 2138 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2139 prog->lastparen = 0;
03994de8 2140 prog->lastcloseparen = 0;
d6a28714 2141 PL_regsize = 0;
a3621e74 2142 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2143 if (PL_reg_start_tmpl <= prog->nparens) {
2144 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2145 if(PL_reg_start_tmp)
2146 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2147 else
a02a5408 2148 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2149 }
2150
2151 /* XXXX What this code is doing here?!!! There should be no need
2152 to do this again and again, PL_reglastparen should take care of
3dd2943c 2153 this! --ilya*/
dafc8851
JH
2154
2155 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2156 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2157 * PL_reglastparen), is not needed at all by the test suite
2158 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2159 * enough, for building DynaLoader, or otherwise this
2160 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2161 * will happen. Meanwhile, this code *is* needed for the
2162 * above-mentioned test suite tests to succeed. The common theme
2163 * on those tests seems to be returning null fields from matches.
2164 * --jhi */
dafc8851 2165#if 1
d6a28714
JH
2166 sp = prog->startp;
2167 ep = prog->endp;
2168 if (prog->nparens) {
097eb12c 2169 register I32 i;
eb160463 2170 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2171 *++sp = -1;
2172 *++ep = -1;
2173 }
2174 }
dafc8851 2175#endif
02db2b7b 2176 REGCP_SET(lastcp);
3b0527fe 2177 if (regmatch(reginfo, prog->program + 1)) {
d6a28714
JH
2178 prog->endp[0] = PL_reginput - PL_bostr;
2179 return 1;
2180 }
02db2b7b 2181 REGCP_UNWIND(lastcp);
d6a28714
JH
2182 return 0;
2183}
2184
02db2b7b
IZ
2185#define RE_UNWIND_BRANCH 1
2186#define RE_UNWIND_BRANCHJ 2
2187
2188union re_unwind_t;
2189
2190typedef struct { /* XX: makes sense to enlarge it... */
2191 I32 type;
2192 I32 prev;
2193 CHECKPOINT lastcp;
2194} re_unwind_generic_t;
2195
2196typedef struct {
2197 I32 type;
2198 I32 prev;
2199 CHECKPOINT lastcp;
2200 I32 lastparen;
2201 regnode *next;
2202 char *locinput;
2203 I32 nextchr;
3a2830be 2204 int minmod;
02db2b7b
IZ
2205#ifdef DEBUGGING
2206 int regindent;
2207#endif
2208} re_unwind_branch_t;
2209
2210typedef union re_unwind_t {
2211 I32 type;
2212 re_unwind_generic_t generic;
2213 re_unwind_branch_t branch;
2214} re_unwind_t;
2215
8ba1375e
MJD
2216#define sayYES goto yes
2217#define sayNO goto no
e0f9d4a8 2218#define sayNO_ANYOF goto no_anyof
8ba1375e 2219#define sayYES_FINAL goto yes_final
8ba1375e
MJD
2220#define sayNO_FINAL goto no_final
2221#define sayNO_SILENT goto do_no
2222#define saySAME(x) if (x) goto yes; else goto no
2223
3ab3c9b4
HS
2224#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2225#define POSCACHE_SEEN 1 /* we know what we're caching */
2226#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
7409bbd3 2227
3ab3c9b4 2228#define CACHEsayYES STMT_START { \
d8319b27 2229 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3
DM
2230 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
2231 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
2232 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2233 } \
2234 else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
2235 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2236 } \
2237 else { \
3ab3c9b4
HS
2238 /* cache records failure, but this is success */ \
2239 DEBUG_r( \
2240 PerlIO_printf(Perl_debug_log, \
2241 "%*s (remove success from failure cache)\n", \
2242 REPORT_CODE_OFF+PL_regindent*2, "") \
2243 ); \
d8319b27 2244 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2245 } \
2246 } \
2247 sayYES; \
2248} STMT_END
7409bbd3 2249
3ab3c9b4 2250#define CACHEsayNO STMT_START { \
d8319b27 2251 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3 2252 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
3ab3c9b4 2253 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
7409bbd3
DM
2254 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2255 } \
2256 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2258 } \
2259 else { \
3ab3c9b4
HS
2260 /* cache records success, but this is failure */ \
2261 DEBUG_r( \
2262 PerlIO_printf(Perl_debug_log, \
2263 "%*s (remove failure from success cache)\n", \
2264 REPORT_CODE_OFF+PL_regindent*2, "") \
2265 ); \
d8319b27 2266 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2267 } \
2268 } \
2269 sayNO; \
2270} STMT_END
2271
a3621e74
YO
2272/* this is used to determine how far from the left messages like
2273 'failed...' are printed. Currently 29 makes these messages line
2274 up with the opcode they refer to. Earlier perls used 25 which
2275 left these messages outdented making reviewing a debug output
2276 quite difficult.
2277*/
2278#define REPORT_CODE_OFF 29
2279
2280
2281/* Make sure there is a test for this +1 options in re_tests */
2282#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2283
9e137952
DM
2284/* this value indiciates that the c1/c2 "next char" test should be skipped */
2285#define CHRTEST_VOID -1000
2286
86545054
DM
2287#define SLAB_FIRST(s) (&(s)->states[0])
2288#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2289
5d9a96ca
DM
2290/* grab a new slab and return the first slot in it */
2291
2292STATIC regmatch_state *
2293S_push_slab(pTHX)
2294{
54df2634
NC
2295#if PERL_VERSION < 9
2296 dMY_CXT;
2297#endif
5d9a96ca
DM
2298 regmatch_slab *s = PL_regmatch_slab->next;
2299 if (!s) {
2300 Newx(s, 1, regmatch_slab);
2301 s->prev = PL_regmatch_slab;
2302 s->next = NULL;
2303 PL_regmatch_slab->next = s;
2304 }
2305 PL_regmatch_slab = s;
86545054 2306 return SLAB_FIRST(s);
5d9a96ca 2307}
5b47454d 2308
95b24440
DM
2309/* simulate a recursive call to regmatch */
2310
2311#define REGMATCH(ns, where) \
5d9a96ca
DM
2312 st->scan = scan; \
2313 scan = (ns); \
2314 st->resume_state = resume_##where; \
95b24440
DM
2315 goto start_recurse; \
2316 resume_point_##where:
2317
aa283a38
DM
2318
2319/* push a new regex state. Set newst to point to it */
2320
2321#define PUSH_STATE(newst, resume) \
2322 depth++; \
2323 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2324 st->scan = scan; \
2325 st->next = next; \
2326 st->n = n; \
2327 st->locinput = locinput; \
2328 st->resume_state = resume; \
2329 newst = st+1; \
86545054 2330 if (newst > SLAB_LAST(PL_regmatch_slab)) \
aa283a38
DM
2331 newst = S_push_slab(aTHX); \
2332 PL_regmatch_state = newst; \
2333 newst->cc = 0; \
2334 newst->minmod = 0; \
2335 newst->sw = 0; \
2336 newst->logical = 0; \
2337 newst->unwind = 0; \
2338 locinput = PL_reginput; \
2339 nextchr = UCHARAT(locinput);
2340
2341#define POP_STATE \
2342 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2343 depth--; \
2344 st--; \
86545054 2345 if (st < SLAB_FIRST(PL_regmatch_slab)) { \
aa283a38 2346 PL_regmatch_slab = PL_regmatch_slab->prev; \
86545054 2347 st = SLAB_LAST(PL_regmatch_slab); \
aa283a38
DM
2348 } \
2349 PL_regmatch_state = st; \
2350 scan = st->scan; \
2351 next = st->next; \
2352 n = st->n; \
2353 locinput = st->locinput; \
2354 nextchr = UCHARAT(locinput);
2355
d6a28714
JH
2356/*
2357 - regmatch - main matching routine
2358 *
2359 * Conceptually the strategy is simple: check to see whether the current
2360 * node matches, call self recursively to see whether the rest matches,
2361 * and then act accordingly. In practice we make some effort to avoid
2362 * recursion, in particular by going through "ordinary" nodes (that don't
2363 * need to know whether the rest of the match failed) by a loop instead of
2364 * by recursion.
2365 */
2366/* [lwall] I've hoisted the register declarations to the outer block in order to
2367 * maybe save a little bit of pushing and popping on the stack. It also takes
2368 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2369 *
2370 * This function used to be heavily recursive, but since this had the
2371 * effect of blowing the CPU stack on complex regexes, it has been
2372 * restructured to be iterative, and to save state onto the heap rather
2373 * than the stack. Essentially whereever regmatch() used to be called, it
2374 * pushes the current state, notes where to return, then jumps back into
2375 * the main loop.
2376 *
2377 * Originally the structure of this function used to look something like
2378
2379 S_regmatch() {
2380 int a = 1, b = 2;
2381 ...
2382 while (scan != NULL) {
5d9a96ca 2383 a++; // do stuff with a and b
95b24440
DM
2384 ...
2385 switch (OP(scan)) {
2386 case FOO: {
2387 int local = 3;
2388 ...
2389 if (regmatch(...)) // recurse
2390 goto yes;
2391 }
2392 ...
2393 }
2394 }
2395 yes:
2396 return 1;
2397 }
2398
2399 * Now it looks something like this:
2400
5d9a96ca 2401 typedef struct {
95b24440
DM
2402 int a, b, local;
2403 int resume_state;
5d9a96ca 2404 } regmatch_state;
95b24440
DM
2405
2406 S_regmatch() {
5d9a96ca
DM
2407 regmatch_state *st = new();
2408 int depth=0;
2409 st->a++; // do stuff with a and b
95b24440
DM
2410 ...
2411 while (scan != NULL) {
2412 ...
2413 switch (OP(scan)) {
2414 case FOO: {
5d9a96ca 2415 st->local = 3;
95b24440 2416 ...
5d9a96ca
DM
2417 st->scan = scan;
2418 scan = ...;
2419 st->resume_state = resume_FOO;
2420 goto start_recurse; // recurse
95b24440 2421
5d9a96ca
DM
2422 resume_point_FOO:
2423 if (result)
95b24440
DM
2424 goto yes;
2425 }
2426 ...
2427 }
5d9a96ca
DM
2428 start_recurse:
2429 st = new(); push a new state
2430 st->a = 1; st->b = 2;
2431 depth++;
95b24440 2432 }
5d9a96ca 2433 yes:
95b24440 2434 result = 1;
5d9a96ca
DM
2435 if (depth--) {
2436 st = pop();
95b24440
DM
2437 switch (resume_state) {
2438 case resume_FOO:
2439 goto resume_point_FOO;
2440 ...
2441 }
2442 }
2443 return result
2444 }
2445
2446 * WARNING: this means that any line in this function that contains a
2447 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2448 * regmatch() using gotos instead. Thus the values of any local variables
2449 * not saved in the regmatch_state structure will have been lost when
2450 * execution resumes on the next line .
5d9a96ca
DM
2451 *
2452 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2453 * PL_regmatch_state always points to the currently active state, and
2454 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2455 * The first time regmatch is called, the first slab is allocated, and is
2456 * never freed until interpreter desctruction. When the slab is full,
2457 * a new one is allocated chained to the end. At exit from regmatch, slabs
2458 * allocated since entry are freed.
d6a28714 2459 */
95b24440
DM
2460
2461
d6a28714 2462STATIC I32 /* 0 failure, 1 success */
3b0527fe 2463S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
d6a28714 2464{
54df2634
NC
2465#if PERL_VERSION < 9
2466 dMY_CXT;
2467#endif
27da23d5 2468 dVAR;
95b24440 2469 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2470 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2471
3b0527fe
DM
2472 regexp *rex = reginfo->prog;
2473
5d9a96ca
DM
2474 regmatch_slab *orig_slab;
2475 regmatch_state *orig_state;
a3621e74 2476
5d9a96ca
DM
2477 /* the current state. This is a cached copy of PL_regmatch_state */
2478 register regmatch_state *st;
95b24440 2479
5d9a96ca
DM
2480 /* cache heavy used fields of st in registers */
2481 register regnode *scan;
2482 register regnode *next;
2483 register I32 n = 0; /* initialize to shut up compiler warning */
2484 register char *locinput = PL_reginput;
95b24440 2485
5d9a96ca
DM
2486 /* these variables are NOT saved during a recusive RFEGMATCH: */
2487 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2488 bool result; /* return value of S_regmatch */
2489 regnode *inner; /* Next node in internal branch. */
2490 int depth = 0; /* depth of recursion */
aa283a38 2491 regmatch_state *newst; /* when pushing a state, this is the new one */
77cb431f
DM
2492 regmatch_state *yes_state = NULL; /* state to pop to on success of
2493 subpattern */
95b24440
DM
2494
2495#ifdef DEBUGGING
ab74612d 2496 SV *re_debug_flags = NULL;
a3621e74 2497 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2498 PL_regindent++;
2499#endif
2500
5d9a96ca
DM
2501 /* on first ever call to regmatch, allocate first slab */
2502 if (!PL_regmatch_slab) {
2503 Newx(PL_regmatch_slab, 1, regmatch_slab);
2504 PL_regmatch_slab->prev = NULL;
2505 PL_regmatch_slab->next = NULL;
86545054 2506 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2507 }
2508
2509 /* remember current high-water mark for exit */
2510 /* XXX this should be done with SAVE* instead */
2511 orig_slab = PL_regmatch_slab;
2512 orig_state = PL_regmatch_state;
2513
2514 /* grab next free state slot */
2515 st = ++PL_regmatch_state;
86545054 2516 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2517 st = PL_regmatch_state = S_push_slab(aTHX);
2518
2519 st->minmod = 0;
2520 st->sw = 0;
2521 st->logical = 0;
2522 st->unwind = 0;
2523 st->cc = NULL;
d6a28714
JH
2524 /* Note that nextchr is a byte even in UTF */
2525 nextchr = UCHARAT(locinput);
2526 scan = prog;
2527 while (scan != NULL) {
8ba1375e 2528
a3621e74 2529 DEBUG_EXECUTE_r( {
6136c704 2530 SV * const prop = sv_newmortal();
1df70142
AL
2531 const int docolor = *PL_colors[0];
2532 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2533 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2534 /* The part of the string before starttry has one color
2535 (pref0_len chars), between starttry and current
2536 position another one (pref_len - pref0_len chars),
2537 after the current position the third one.
2538 We assume that pref0_len <= pref_len, otherwise we
2539 decrease pref0_len. */
9041c2e3 2540 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2541 ? (5 + taill) - l : locinput - PL_bostr;
2542 int pref0_len;
d6a28714 2543
df1ffd02 2544 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2545 pref_len++;
2546 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2547 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2548 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2549 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2550 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2551 l--;
d6a28714
JH
2552 if (pref0_len < 0)
2553 pref0_len = 0;
2554 if (pref0_len > pref_len)
2555 pref0_len = pref_len;
32fc9b6a 2556 regprop(rex, prop, scan);
2a782b5b 2557 {
1df70142 2558 const char * const s0 =
f14c76ed 2559 do_utf8 && OP(scan) != CANY ?
95b24440 2560 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2561 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2562 locinput - pref_len;
bb7a0f54 2563 const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
1df70142 2564 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2565 pv_uni_display(PERL_DEBUG_PAD(1),
2566 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2567 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2568 locinput - pref_len + pref0_len;
bb7a0f54 2569 const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
1df70142 2570 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2571 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2572 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2573 locinput;
bb7a0f54 2574 const int len2 = do_utf8 ? (int)strlen(s2) : l;
2a782b5b
JH
2575 PerlIO_printf(Perl_debug_log,
2576 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2577 (IV)(locinput - PL_bostr),
2578 PL_colors[4],
2579 len0, s0,
2580 PL_colors[5],
2581 PL_colors[2],
2582 len1, s1,
2583 PL_colors[3],
2584 (docolor ? "" : "> <"),
2585 PL_colors[0],
2586 len2, s2,
2587 PL_colors[1],
2588 15 - l - pref_len + 1,
2589 "",
4f639d21 2590 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2591 SvPVX_const(prop));
2a782b5b
JH
2592 }
2593 });
d6a28714
JH
2594
2595 next = scan + NEXT_OFF(scan);
2596 if (next == scan)
2597 next = NULL;
2598
2599 switch (OP(scan)) {
2600 case BOL:
7fba1cd6 2601 if (locinput == PL_bostr)
d6a28714 2602 {
3b0527fe 2603 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2604 break;
2605 }
d6a28714
JH
2606 sayNO;
2607 case MBOL:
12d33761
HS
2608 if (locinput == PL_bostr ||
2609 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2610 {
b8c5462f
JH
2611 break;
2612 }
d6a28714
JH
2613 sayNO;
2614 case SBOL:
c2a73568 2615 if (locinput == PL_bostr)
b8c5462f 2616 break;
d6a28714
JH
2617 sayNO;
2618 case GPOS:
3b0527fe 2619 if (locinput == reginfo->ganch)
d6a28714
JH
2620 break;
2621 sayNO;
2622 case EOL:
d6a28714
JH
2623 goto seol;
2624 case MEOL:
d6a28714 2625 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2626 sayNO;
b8c5462f 2627 break;
d6a28714
JH
2628 case SEOL:
2629 seol:
2630 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2631 sayNO;
d6a28714 2632 if (PL_regeol - locinput > 1)
b8c5462f 2633 sayNO;
b8c5462f 2634 break;
d6a28714
JH
2635 case EOS:
2636 if (PL_regeol != locinput)
b8c5462f 2637 sayNO;
d6a28714 2638 break;
ffc61ed2 2639 case SANY:
d6a28714 2640 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2641 sayNO;
f33976b4
DB
2642 if (do_utf8) {
2643 locinput += PL_utf8skip[nextchr];
2644 if (locinput > PL_regeol)
2645 sayNO;
2646 nextchr = UCHARAT(locinput);
2647 }
2648 else
2649 nextchr = UCHARAT(++locinput);
2650 break;
2651 case CANY:
2652 if (!nextchr && locinput >= PL_regeol)
2653 sayNO;
b8c5462f 2654 nextchr = UCHARAT(++locinput);
a0d0e21e 2655 break;
ffc61ed2 2656 case REG_ANY:
1aa99e6b
IH
2657 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2658 sayNO;
2659 if (do_utf8) {
b8c5462f 2660 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2661 if (locinput > PL_regeol)
2662 sayNO;
a0ed51b3 2663 nextchr = UCHARAT(locinput);
a0ed51b3 2664 }
1aa99e6b
IH
2665 else
2666 nextchr = UCHARAT(++locinput);
a0ed51b3 2667 break;
a3621e74
YO
2668
2669
2670
2671 /*
2672 traverse the TRIE keeping track of all accepting states
2673 we transition through until we get to a failing node.
2674
a3621e74
YO
2675
2676 */
5b47454d 2677 case TRIE:
a3621e74
YO
2678 case TRIEF:
2679 case TRIEFL:
2680 {
a3621e74
YO
2681 U8 *uc = ( U8* )locinput;
2682 U32 state = 1;
2683 U16 charid = 0;
2684 U32 base = 0;
2685 UV uvc = 0;
2686 STRLEN len = 0;
2687 STRLEN foldlen = 0;
a3621e74
YO
2688 U8 *uscan = (U8*)NULL;
2689 STRLEN bufflen=0;
95b24440 2690 SV *sv_accept_buff = NULL;
5b47454d
DM
2691 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2692 trie_type = do_utf8 ?
2693 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2694 : trie_plain;
2695
7087a21c
NC
2696 /* what trie are we using right now */
2697 reg_trie_data *trie
32fc9b6a 2698 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
d8319b27 2699 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2700 result = 0;
a3621e74
YO
2701
2702 while ( state && uc <= (U8*)PL_regeol ) {
2703
5b47454d 2704 if (trie->states[ state ].wordnum) {
d8319b27 2705 if (!st->u.trie.accepted ) {
5b47454d
DM
2706 ENTER;
2707 SAVETMPS;
2708 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2709 sv_accept_buff=newSV(bufflen *
2710 sizeof(reg_trie_accepted) - 1);
2711 SvCUR_set(sv_accept_buff,
2712 sizeof(reg_trie_accepted));
2713 SvPOK_on(sv_accept_buff);
2714 sv_2mortal(sv_accept_buff);
d8319b27 2715 st->u.trie.accept_buff =
5b47454d
DM
2716 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2717 }
2718 else {
d8319b27 2719 if (st->u.trie.accepted >= bufflen) {
5b47454d 2720 bufflen *= 2;
d8319b27 2721 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2722 SvGROW(sv_accept_buff,
2723 bufflen * sizeof(reg_trie_accepted));
2724 }
2725 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2726 + sizeof(reg_trie_accepted));
2727 }
d8319b27
DM
2728 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2729 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2730 ++st->u.trie.accepted;
5b47454d 2731 }
a3621e74
YO
2732
2733 base = trie->states[ state ].trans.base;
2734
2735 DEBUG_TRIE_EXECUTE_r(
2736 PerlIO_printf( Perl_debug_log,
e4584336 2737 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2738 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2739 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2740 );
2741
2742 if ( base ) {
5b47454d
DM
2743 switch (trie_type) {
2744 case trie_uft8_fold:
a3621e74
YO
2745 if ( foldlen>0 ) {
2746 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2747 foldlen -= len;
2748 uscan += len;
2749 len=0;
2750 } else {
1df70142 2751 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2752 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2753 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2754 foldlen -= UNISKIP( uvc );
2755 uscan = foldbuf + UNISKIP( uvc );
2756 }
5b47454d
DM
2757 break;
2758 case trie_utf8:
2759 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2760 &len, uniflags );
2761 break;
2762 case trie_plain:
e4584336 2763 uvc = (UV)*uc;
a3621e74
YO
2764 len = 1;
2765 }
2766
5b47454d
DM
2767 if (uvc < 256) {
2768 charid = trie->charmap[ uvc ];
2769 }
2770 else {
2771 charid = 0;
2772 if (trie->widecharmap) {
2773 SV** svpp = (SV**)NULL;
2774 svpp = hv_fetch(trie->widecharmap,
2775 (char*)&uvc, sizeof(UV), 0);
2776 if (svpp)
2777 charid = (U16)SvIV(*svpp);
2778 }
2779 }
a3621e74 2780
5b47454d
DM
2781 if (charid &&
2782 (base + charid > trie->uniquecharcount )
2783 && (base + charid - 1 - trie->uniquecharcount
2784 < trie->lasttrans)
2785 && trie->trans[base + charid - 1 -
2786 trie->uniquecharcount].check == state)
2787 {
2788 state = trie->trans[base + charid - 1 -
2789 trie->uniquecharcount ].next;
2790 }
2791 else {
2792 state = 0;
2793 }
2794 uc += len;
2795
2796 }
2797 else {
a3621e74
YO
2798 state = 0;
2799 }
2800 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2801 PerlIO_printf( Perl_debug_log,
2802 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2803 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2804 );
2805 }
d8319b27 2806 if (!st->u.trie.accepted )
a3621e74 2807 sayNO;
a3621e74
YO
2808
2809 /*
2810 There was at least one accepting state that we
2811 transitioned through. Presumably the number of accepting
2812 states is going to be low, typically one or two. So we
2813 simply scan through to find the one with lowest wordnum.
2814 Once we find it, we swap the last state into its place
2815 and decrement the size. We then try to match the rest of
2816 the pattern at the point where the word ends, if we
2817 succeed then we end the loop, otherwise the loop
2818 eventually terminates once all of the accepting states
2819 have been tried.
2820 */
a3621e74 2821
d8319b27 2822 if ( st->u.trie.accepted == 1 ) {
a3621e74 2823 DEBUG_EXECUTE_r({
097eb12c 2824 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2825 PerlIO_printf( Perl_debug_log,
2826 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2827 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2828 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2829 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2830 PL_colors[5] );
2831 });
d8319b27 2832 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2833 /* in this case we free tmps/leave before we call regmatch
2834 as we wont be using accept_buff again. */
2835 FREETMPS;
2836 LEAVE;
95b24440
DM
2837 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2838 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2839 } else {
2840 DEBUG_EXECUTE_r(
e4584336 2841 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2842 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2843 PL_colors[5] );
2844 );
d8319b27 2845 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2846 U32 best = 0;
2847 U32 cur;
d8319b27 2848 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2849 DEBUG_TRIE_EXECUTE_r(
2850 PerlIO_printf( Perl_debug_log,
2851 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2852 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2853 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2854 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2855 );
a3621e74 2856
d8319b27
DM
2857 if (st->u.trie.accept_buff[cur].wordnum <
2858 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2859 best = cur;
a3621e74
YO
2860 }
2861 DEBUG_EXECUTE_r({
87830502 2862 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2863 rex->data->data[ARG(scan)];
d8319b27 2864 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2865 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2866 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2867 st->u.trie.accept_buff[best].wordnum,
ca0270c4 2868 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", (void*)scan,
a3621e74
YO
2869 PL_colors[5] );
2870 });
d8319b27
DM
2871 if ( best<st->u.trie.accepted ) {
2872 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2873 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2874 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2875 best = st->u.trie.accepted;
a3621e74 2876 }
d8319b27 2877 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2878
2879 /*
2880 as far as I can tell we only need the SAVETMPS/FREETMPS
2881 for re's with EVAL in them but I'm leaving them in for
2882 all until I can be sure.
2883 */
2884 SAVETMPS;
95b24440
DM
2885 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2886 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2887 FREETMPS;
2888 }
2889 FREETMPS;
2890 LEAVE;
2891 }
2892
95b24440 2893 if (result) {
a3621e74
YO
2894 sayYES;
2895 } else {
2896 sayNO;
2897 }
2898 }
2899 /* unreached codepoint */
95b24440
DM
2900 case EXACT: {
2901 char *s = STRING(scan);
5d9a96ca 2902 st->ln = STR_LEN(scan);
eb160463 2903 if (do_utf8 != UTF) {
bc517b45 2904 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2905 char *l = locinput;
5d9a96ca 2906 const char *e = s + st->ln;
a72c7584 2907
5ff6fc6d
JH
2908 if (do_utf8) {
2909 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2910 while (s < e) {
a3b680e6 2911 STRLEN ulen;
1aa99e6b 2912 if (l >= PL_regeol)
5ff6fc6d
JH
2913 sayNO;
2914 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2915 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2916 uniflags))
5ff6fc6d 2917 sayNO;
bc517b45 2918 l += ulen;
5ff6fc6d 2919 s ++;
1aa99e6b 2920 }
5ff6fc6d
JH
2921 }
2922 else {
2923 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2924 while (s < e) {
a3b680e6 2925 STRLEN ulen;
1aa99e6b
IH
2926 if (l >= PL_regeol)
2927 sayNO;
5ff6fc6d 2928 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2929 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2930 uniflags))
1aa99e6b 2931 sayNO;
bc517b45 2932 s += ulen;
a72c7584 2933 l ++;
1aa99e6b 2934 }
5ff6fc6d 2935 }
1aa99e6b
IH
2936 locinput = l;
2937 nextchr = UCHARAT(locinput);
2938 break;
2939 }
bc517b45 2940 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2941 /* Inline the first character, for speed. */
2942 if (UCHARAT(s) != nextchr)
2943 sayNO;
5d9a96ca 2944 if (PL_regeol - locinput < st->ln)
d6a28714 2945 sayNO;
5d9a96ca 2946 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2947 sayNO;
5d9a96ca 2948 locinput += st->ln;
d6a28714
JH
2949 nextchr = UCHARAT(locinput);
2950 break;
95b24440 2951 }
d6a28714 2952 case EXACTFL:
b8c5462f
JH
2953 PL_reg_flags |= RF_tainted;
2954 /* FALL THROUGH */
95b24440
DM
2955 case EXACTF: {
2956 char *s = STRING(scan);
5d9a96ca 2957 st->ln = STR_LEN(scan);
d6a28714 2958
d07ddd77
JH
2959 if (do_utf8 || UTF) {
2960 /* Either target or the pattern are utf8. */
d6a28714 2961 char *l = locinput;
d07ddd77 2962 char *e = PL_regeol;
bc517b45 2963
5d9a96ca 2964 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2965 l, &e, 0, do_utf8)) {
5486206c
JH
2966 /* One more case for the sharp s:
2967 * pack("U0U*", 0xDF) =~ /ss/i,
2968 * the 0xC3 0x9F are the UTF-8
2969 * byte sequence for the U+00DF. */
2970 if (!(do_utf8 &&
2971 toLOWER(s[0]) == 's' &&
5d9a96ca 2972 st->ln >= 2 &&
5486206c
JH
2973 toLOWER(s[1]) == 's' &&
2974 (U8)l[0] == 0xC3 &&
2975 e - l >= 2 &&
2976 (U8)l[1] == 0x9F))
2977 sayNO;
2978 }
d07ddd77
JH
2979 locinput = e;
2980 nextchr = UCHARAT(locinput);
2981 break;
a0ed51b3 2982 }
d6a28714 2983
bc517b45
JH
2984 /* Neither the target and the pattern are utf8. */
2985
d6a28714
JH
2986 /* Inline the first character, for speed. */
2987 if (UCHARAT(s) != nextchr &&
2988 UCHARAT(s) != ((OP(scan) == EXACTF)
2989 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2990 sayNO;
5d9a96ca 2991 if (PL_regeol - locinput < st->ln)
b8c5462f 2992 sayNO;
5d9a96ca
DM
2993 if (st->ln > 1 && (OP(scan) == EXACTF
2994 ? ibcmp(s, locinput, st->ln)
2995 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2996 sayNO;
5d9a96ca 2997 locinput += st->ln;
d6a28714 2998 nextchr = UCHARAT(locinput);
a0d0e21e 2999 break;
95b24440 3000 }
d6a28714 3001 case ANYOF:
ffc61ed2 3002 if (do_utf8) {
9e55ce06
JH
3003 STRLEN inclasslen = PL_regeol - locinput;
3004
32fc9b6a 3005 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 3006 sayNO_ANYOF;
ffc61ed2
JH
3007 if (locinput >= PL_regeol)
3008 sayNO;
0f0076b4 3009 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3010 nextchr = UCHARAT(locinput);
e0f9d4a8 3011 break;
ffc61ed2
JH
3012 }
3013 else {
3014 if (nextchr < 0)
3015 nextchr = UCHARAT(locinput);
32fc9b6a 3016 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 3017 sayNO_ANYOF;
ffc61ed2
JH
3018 if (!nextchr && locinput >= PL_regeol)
3019 sayNO;
3020 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3021 break;
3022 }
3023 no_anyof:
3024 /* If we might have the case of the German sharp s
3025 * in a casefolding Unicode character class. */
3026
ebc501f0
JH
3027 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3028 locinput += SHARP_S_SKIP;
e0f9d4a8 3029 nextchr = UCHARAT(locinput);
ffc61ed2 3030 }
e0f9d4a8
JH
3031 else
3032 sayNO;
b8c5462f 3033 break;
d6a28714 3034 case ALNUML:
b8c5462f
JH
3035 PL_reg_flags |= RF_tainted;
3036 /* FALL THROUGH */
d6a28714 3037 case ALNUM:
b8c5462f 3038 if (!nextchr)
4633a7c4 3039 sayNO;
ffc61ed2 3040 if (do_utf8) {
1a4fad37 3041 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3042 if (!(OP(scan) == ALNUM
bb7a0f54 3043 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3044 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3045 {
3046 sayNO;
a0ed51b3 3047 }
b8c5462f 3048 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3049 nextchr = UCHARAT(locinput);
3050 break;
3051 }
ffc61ed2 3052 if (!(OP(scan) == ALNUM
d6a28714 3053 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3054 sayNO;
b8c5462f 3055 nextchr = UCHARAT(++locinput);
a0d0e21e 3056 break;
d6a28714 3057 case NALNUML:
b8c5462f
JH
3058 PL_reg_flags |= RF_tainted;
3059 /* FALL THROUGH */
d6a28714
JH
3060 case NALNUM:
3061 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3062 sayNO;
ffc61ed2 3063 if (do_utf8) {
1a4fad37 3064 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3065 if (OP(scan) == NALNUM
bb7a0f54 3066 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3067 : isALNUM_LC_utf8((U8*)locinput))
3068 {
b8c5462f 3069 sayNO;
d6a28714 3070 }
b8c5462f
JH
3071 locinput += PL_utf8skip[nextchr];
3072 nextchr = UCHARAT(locinput);
3073 break;
3074 }
ffc61ed2 3075 if (OP(scan) == NALNUM
d6a28714 3076 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3077 sayNO;
76e3520e 3078 nextchr = UCHARAT(++locinput);
a0d0e21e 3079 break;
d6a28714
JH
3080 case BOUNDL:
3081 case NBOUNDL:
3280af22 3082 PL_reg_flags |= RF_tainted;
bbce6d69 3083 /* FALL THROUGH */
d6a28714
JH
3084 case BOUND:
3085 case NBOUND:
3086 /* was last char in word? */
ffc61ed2 3087 if (do_utf8) {
12d33761 3088 if (locinput == PL_bostr)
5d9a96ca 3089 st->ln = '\n';
ffc61ed2 3090 else {
a3b680e6 3091 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3092
4ad0818d 3093 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3094 }
3095 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3096 st->ln = isALNUM_uni(st->ln);
1a4fad37 3097 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3098 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3099 }
3100 else {
5d9a96ca 3101 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3102 n = isALNUM_LC_utf8((U8*)locinput);
3103 }
a0ed51b3 3104 }
d6a28714 3105 else {
5d9a96ca 3106 st->ln = (locinput != PL_bostr) ?
12d33761 3107 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3108 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3109 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3110 n = isALNUM(nextchr);
3111 }
3112 else {
5d9a96ca 3113 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3114 n = isALNUM_LC(nextchr);
3115 }
d6a28714 3116 }
5d9a96ca 3117 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3118 OP(scan) == BOUNDL))
3119 sayNO;
a0ed51b3 3120 break;
d6a28714 3121 case SPACEL:
3280af22 3122 PL_reg_flags |= RF_tainted;
bbce6d69 3123 /* FALL THROUGH */
d6a28714 3124 case SPACE:
9442cb0e 3125 if (!nextchr)
4633a7c4 3126 sayNO;
1aa99e6b 3127 if (do_utf8) {
fd400ab9 3128 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3129 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3130 if (!(OP(scan) == SPACE
bb7a0f54 3131 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3132 : isSPACE_LC_utf8((U8*)locinput)))
3133 {
3134 sayNO;
3135 }
3136 locinput += PL_utf8skip[nextchr];
3137 nextchr = UCHARAT(locinput);
3138 break;
d6a28714 3139 }
ffc61ed2
JH
3140 if (!(OP(scan) == SPACE
3141 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3142 sayNO;
3143 nextchr = UCHARAT(++locinput);
3144 }
3145 else {
3146 if (!(OP(scan) == SPACE
3147 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3148 sayNO;
3149 nextchr = UCHARAT(++locinput);
a0ed51b3 3150 }
a0ed51b3 3151 break;
d6a28714 3152 case NSPACEL:
3280af22 3153 PL_reg_flags |= RF_tainted;
bbce6d69 3154 /* FALL THROUGH */
d6a28714 3155 case NSPACE:
9442cb0e 3156 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3157 sayNO;
1aa99e6b 3158 if (do_utf8) {
1a4fad37 3159 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3160 if (OP(scan) == NSPACE
bb7a0f54 3161 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3162 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3163 {
3164 sayNO;
3165 }
3166 locinput += PL_utf8skip[nextchr];
3167 nextchr = UCHARAT(locinput);
3168 break;
a0ed51b3 3169 }
ffc61ed2 3170 if (OP(scan) == NSPACE
d6a28714 3171 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3172 sayNO;
76e3520e 3173 nextchr = UCHARAT(++locinput);
a0d0e21e 3174 break;
d6a28714 3175 case DIGITL:
a0ed51b3
LW
3176 PL_reg_flags |= RF_tainted;
3177 /* FALL THROUGH */
d6a28714 3178 case DIGIT:
9442cb0e 3179 if (!nextchr)
a0ed51b3 3180 sayNO;
1aa99e6b 3181 if (do_utf8) {
1a4fad37 3182 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3183 if (!(OP(scan) == DIGIT
bb7a0f54 3184 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3185 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3186 {
a0ed51b3 3187 sayNO;
dfe13c55 3188 }
6f06b55f 3189 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3190 nextchr = UCHARAT(locinput);
3191 break;
3192 }
ffc61ed2 3193 if (!(OP(scan) == DIGIT
9442cb0e 3194 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3195 sayNO;
3196 nextchr = UCHARAT(++locinput);
3197 break;
d6a28714 3198 case NDIGITL:
b8c5462f
JH
3199 PL_reg_flags |= RF_tainted;
3200 /* FALL THROUGH */
d6a28714 3201 case NDIGIT:
9442cb0e 3202 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3203 sayNO;
1aa99e6b 3204 if (do_utf8) {
1a4fad37 3205 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3206 if (OP(scan) == NDIGIT
bb7a0f54 3207 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3208 : isDIGIT_LC_utf8((U8*)locinput))
3209 {
a0ed51b3 3210 sayNO;
9442cb0e 3211 }
6f06b55f 3212 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3213 nextchr = UCHARAT(locinput);
3214 break;
3215 }
ffc61ed2 3216 if (OP(scan) == NDIGIT
9442cb0e 3217 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3218 sayNO;
3219 nextchr = UCHARAT(++locinput);
3220 break;
3221 case CLUMP:
b7c83a7e 3222 if (locinput >= PL_regeol)
a0ed51b3 3223 sayNO;
b7c83a7e 3224 if (do_utf8) {
1a4fad37 3225 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3226 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3227 sayNO;
3228 locinput += PL_utf8skip[nextchr];
3229 while (locinput < PL_regeol &&
3230 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3231 locinput += UTF8SKIP(locinput);
3232 if (locinput > PL_regeol)
3233 sayNO;
eb08e2da
JH
3234 }
3235 else
3236 locinput++;
a0ed51b3
LW
3237 nextchr = UCHARAT(locinput);
3238 break;
c8756f30 3239 case REFFL:
3280af22 3240 PL_reg_flags |= RF_tainted;
c8756f30 3241 /* FALL THROUGH */
c277df42 3242 case REF:
95b24440
DM
3243 case REFF: {
3244 char *s;
c277df42 3245 n = ARG(scan); /* which paren pair */
5d9a96ca 3246 st->ln = PL_regstartp[n];
2c2d71f5 3247 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3248 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3249 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3250 if (st->ln == PL_regendp[n])
a0d0e21e 3251 break;
a0ed51b3 3252
5d9a96ca 3253 s = PL_bostr + st->ln;
1aa99e6b 3254 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3255 char *l = locinput;
a3b680e6 3256 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3257 /*
3258 * Note that we can't do the "other character" lookup trick as
3259 * in the 8-bit case (no pun intended) because in Unicode we
3260 * have to map both upper and title case to lower case.
3261 */
3262 if (OP(scan) == REFF) {
3263 while (s < e) {
a3b680e6
AL
3264 STRLEN ulen1, ulen2;
3265 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3266 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3267
a0ed51b3
LW
3268 if (l >= PL_regeol)
3269 sayNO;
a2a2844f
JH
3270 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3271 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3272 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3273 sayNO;
a2a2844f
JH
3274 s += ulen1;
3275 l += ulen2;
a0ed51b3
LW
3276 }
3277 }
3278 locinput = l;
3279 nextchr = UCHARAT(locinput);
3280 break;
3281 }
3282
a0d0e21e 3283 /* Inline the first character, for speed. */
76e3520e 3284 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3285 (OP(scan) == REF ||
3286 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3287 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3288 sayNO;
5d9a96ca
DM
3289 st->ln = PL_regendp[n] - st->ln;
3290 if (locinput + st->ln > PL_regeol)
4633a7c4 3291 sayNO;
5d9a96ca
DM
3292 if (st->ln > 1 && (OP(scan) == REF
3293 ? memNE(s, locinput, st->ln)
c8756f30 3294 : (OP(scan) == REFF
5d9a96ca
DM
3295 ? ibcmp(s, locinput, st->ln)
3296 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3297 sayNO;
5d9a96ca 3298 locinput += st->ln;
76e3520e 3299 nextchr = UCHARAT(locinput);
a0d0e21e 3300 break;
95b24440 3301 }
a0d0e21e
LW
3302
3303 case NOTHING:
c277df42 3304 case TAIL:
a0d0e21e
LW
3305 break;
3306 case BACK:
3307 break;
c277df42
IZ
3308 case EVAL:
3309 {
c277df42 3310 SV *ret;
8e5e9ebe 3311 {
4aabdb9b
DM
3312 /* execute the code in the {...} */
3313 dSP;
6136c704 3314 SV ** const before = SP;
4aabdb9b
DM
3315 OP_4tree * const oop = PL_op;
3316 COP * const ocurcop = PL_curcop;
3317 PAD *old_comppad;
4aabdb9b
DM
3318
3319 n = ARG(scan);
32fc9b6a 3320 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3321 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3322 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3323 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3324
8e5e9ebe
RGS
3325 CALLRUNOPS(aTHX); /* Scalar context. */
3326 SPAGAIN;
3327 if (SP == before)
075aa684 3328 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3329 else {
3330 ret = POPs;
3331 PUTBACK;
3332 }
4aabdb9b
DM
3333
3334 PL_op = oop;
3335 PAD_RESTORE_LOCAL(old_comppad);
3336 PL_curcop = ocurcop;
3337 if (!st->logical) {
3338 /* /(?{...})/ */
3339 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3340 break;
3341 }
8e5e9ebe 3342 }
4aabdb9b
DM
3343 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3344 regexp *re;
4aabdb9b 3345 {
4f639d21
DM
3346 /* extract RE object from returned value; compiling if
3347 * necessary */
3348
6136c704 3349 MAGIC *mg = NULL;
4aabdb9b 3350 SV *sv;
faf82a0b
AE
3351 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3352 mg = mg_find(sv, PERL_MAGIC_qr);
3353 else if (SvSMAGICAL(ret)) {
3354 if (SvGMAGICAL(ret))
3355 sv_unmagic(ret, PERL_MAGIC_qr);
3356 else
3357 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3358 }
faf82a0b 3359
0f5d15d6
IZ
3360 if (mg) {
3361 re = (regexp *)mg->mg_obj;
df0003d4 3362 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3363 }
3364 else {
3365 STRLEN len;
6136c704 3366 const char * const t = SvPV_const(ret, len);
0f5d15d6 3367 PMOP pm;
a3b680e6 3368 const I32 osize = PL_regsize;
0f5d15d6 3369
5fcd1c1b 3370 Zero(&pm, 1, PMOP);
4aabdb9b 3371 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3372 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3373 if (!(SvFLAGS(ret)
faf82a0b
AE
3374 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3375 | SVs_GMG)))
14befaf4
DM
3376 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3377 PERL_MAGIC_qr,0,0);
0f5d15d6 3378 PL_regsize = osize;
0f5d15d6 3379 }
4aabdb9b 3380 }
aa283a38
DM
3381
3382 /* run the pattern returned from (??{...}) */
3383
4aabdb9b
DM
3384 DEBUG_EXECUTE_r(
3385 PerlIO_printf(Perl_debug_log,
3386 "Entering embedded \"%s%.60s%s%s\"\n",
3387 PL_colors[0],
3388 re->precomp,
3389 PL_colors[1],
3390 (strlen(re->precomp) > 60 ? "..." : ""))
3391 );
2c2d71f5 3392
4aabdb9b
DM
3393 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3394 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3395 *PL_reglastparen = 0;
3396 *PL_reglastcloseparen = 0;
4aabdb9b 3397 PL_reginput = locinput;
4aabdb9b
DM
3398
3399 /* XXXX This is too dramatic a measure... */
3400 PL_reg_maxiter = 0;
3401
5d9a96ca 3402 st->logical = 0;
aa283a38
DM
3403 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3404 ((re->reganch & ROPT_UTF8) != 0);
3405 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3406 st->u.eval.prev_rex = rex;
aa283a38 3407 rex = re;
aa283a38 3408
77cb431f
DM
3409 /* resume to current state on success */
3410 st->u.yes.prev_yes_state = yes_state;
3411 yes_state = st;
aa283a38
DM
3412 PUSH_STATE(newst, resume_EVAL);
3413 st = newst;
3414
3415 /* now continue from first node in postoned RE */
3416 next = re->program + 1;
3417 break;
4aabdb9b 3418 /* NOTREACHED */
a0ed51b3 3419 }
4aabdb9b
DM
3420 /* /(?(?{...})X|Y)/ */
3421 st->sw = SvTRUE(ret);
3422 st->logical = 0;
c277df42
IZ
3423 break;
3424 }
a0d0e21e 3425 case OPEN:
c277df42 3426 n = ARG(scan); /* which paren pair */
3280af22
NIS
3427 PL_reg_start_tmp[n] = locinput;
3428 if (n > PL_regsize)
3429 PL_regsize = n;
a0d0e21e
LW
3430 break;
3431 case CLOSE:
c277df42 3432 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3433 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3434 PL_regendp[n] = locinput - PL_bostr;
eb160463 3435 if (n > (I32)*PL_reglastparen)
3280af22 3436 *PL_reglastparen = n;
a01268b5 3437 *PL_reglastcloseparen = n;
a0d0e21e 3438 break;
c277df42
IZ
3439 case GROUPP:
3440 n = ARG(scan); /* which paren pair */
5d9a96ca 3441 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3442 break;
3443 case IFTHEN:
2c2d71f5 3444 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3445 if (st->sw)
c277df42
IZ
3446 next = NEXTOPER(NEXTOPER(scan));
3447 else {
3448 next = scan + ARG(scan);
3449 if (OP(next) == IFTHEN) /* Fake one. */
3450 next = NEXTOPER(NEXTOPER(next));
3451 }
3452 break;
3453 case LOGICAL:
5d9a96ca 3454 st->logical = scan->flags;
c277df42 3455 break;
2ab05381 3456/*******************************************************************
a0374537
DM
3457 cc points to the regmatch_state associated with the most recent CURLYX.
3458 This struct contains info about the innermost (...)* loop (an
3459 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3460
3461 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3462
95b24440 3463 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3464
a0374537 3465 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3466 with the starting point at WHILEM node;
2ab05381
IZ
3467
3468 3) Each hit of WHILEM node tries to match A and Z (in the order
3469 depending on the current iteration, min/max of {min,max} and
3470 greediness). The information about where are nodes for "A"
a0374537 3471 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3472 was already matched, and greediness.
3473
3474 4) After A matches, the same WHILEM node is hit again.
3475
95b24440 3476 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3477 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3478 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3479 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3480 of the external loop.
3481
a0374537 3482 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3483 and whatever it mentions via ->next, and additional attached trees
3484 corresponding to temporarily unset infoblocks as in "5" above.
3485
95b24440 3486 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3487 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3488 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3489 infoblocks are drawn below the "reset" infoblock.
3490
3491 In fact in the picture below we do not show failed matches for Z and T
3492 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3493 more obvious *why* one needs to *temporary* unset infoblocks.]
3494
3495 Matched REx position InfoBlocks Comment
3496 (Y(A)*?Z)*?T x
3497 Y(A)*?Z)*?T x <- O
3498 Y (A)*?Z)*?T x <- O
3499 Y A)*?Z)*?T x <- O <- I
3500 YA )*?Z)*?T x <- O <- I
3501 YA A)*?Z)*?T x <- O <- I
3502 YAA )*?Z)*?T x <- O <- I
3503 YAA Z)*?T x <- O # Temporary unset I
3504 I
3505
3506 YAAZ Y(A)*?Z)*?T x <- O
3507 I
3508
3509 YAAZY (A)*?Z)*?T x <- O
3510 I
3511
3512 YAAZY A)*?Z)*?T x <- O <- I
3513 I
3514
3515 YAAZYA )*?Z)*?T x <- O <- I
3516 I
3517
3518 YAAZYA Z)*?T x <- O # Temporary unset I
3519 I,I
3520
3521 YAAZYAZ )*?T x <- O
3522 I,I
3523
3524 YAAZYAZ T x # Temporary unset O
3525 O
3526 I,I
3527
3528 YAAZYAZT x
3529 O
3530 I,I
3531 *******************************************************************/
95b24440 3532
a0d0e21e 3533 case CURLYX: {
cb434fcc
IZ
3534 /* No need to save/restore up to this paren */
3535 I32 parenfloor = scan->flags;
c277df42 3536
c2b7afd3
NC
3537 /* Dave says:
3538
3539 CURLYX and WHILEM are always paired: they're the moral
3540 equivalent of pp_enteriter anbd pp_iter.
3541
3542 The only time next could be null is if the node tree is
3543 corrupt. This was mentioned on p5p a few days ago.
3544
3545 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3546 So we'll assert that this is true:
3547 */
3548 assert(next);
30b2893d 3549 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
c277df42 3550 next += ARG(next);
cb434fcc
IZ
3551 /* XXXX Probably it is better to teach regpush to support
3552 parenfloor > PL_regsize... */
eb160463 3553 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3554 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3555
d8319b27
DM
3556 st->u.curlyx.cp = PL_savestack_ix;
3557 st->u.curlyx.outercc = st->cc;
a0374537
DM
3558 st->cc = st;
3559 /* these fields contain the state of the current curly.
3560 * they are accessed by subsequent WHILEMs;
3561 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3562 st->u.curlyx.parenfloor = parenfloor;
3563 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3564 st->u.curlyx.min = ARG1(scan);
3565 st->u.curlyx.max = ARG2(scan);
3566 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3567 st->u.curlyx.lastloc = 0;
a0374537
DM
3568 /* st->next and st->minmod are also read by WHILEM */
3569
3280af22 3570 PL_reginput = locinput;
95b24440
DM
3571 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3572 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3573 regcpblow(st->u.curlyx.cp);
3574 st->cc = st->u.curlyx.outercc;
95b24440 3575 saySAME(result);
a0d0e21e 3576 }
5f66b61c 3577 /* NOTREACHED */
a0d0e21e
LW
3578 case WHILEM: {
3579 /*
3580 * This is really hard to understand, because after we match
3581 * what we're trying to match, we must make sure the rest of
2c2d71f5 3582 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3583 * to go back UP the parse tree by recursing ever deeper. And
3584 * if it fails, we have to reset our parent's current state
3585 * that we can try again after backing off.
3586 */
3587
c2b7afd3
NC
3588 /* Dave says:
3589
3590 st->cc gets initialised by CURLYX ready for use by WHILEM.
3591 So again, unless somethings been corrupted, st->cc cannot
3592 be null at that point in WHILEM.
3593
3594 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3595 So we'll assert that this is true:
3596 */
3597 assert(st->cc);
d8319b27
DM
3598 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3599 st->u.whilem.cache_offset = 0;
3600 st->u.whilem.cache_bit = 0;
c277df42 3601
d8319b27 3602 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3603 PL_reginput = locinput;
a0d0e21e 3604
a3621e74 3605 DEBUG_EXECUTE_r(
9041c2e3 3606 PerlIO_printf(Perl_debug_log,
91f3b821 3607 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3608 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3609 (long)n, (long)st->cc->u.curlyx.min,
3610 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3611 );
4633a7c4 3612
a0d0e21e
LW
3613 /* If degenerate scan matches "", assume scan done. */
3614
d8319b27
DM
3615 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3616 st->u.whilem.savecc = st->cc;
3617 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3618 if (st->cc)
d8319b27 3619 st->ln = st->cc->u.curlyx.cur;
a3621e74 3620 DEBUG_EXECUTE_r(
c3464db5
DD
3621 PerlIO_printf(Perl_debug_log,
3622 "%*s empty match detected, try continuation...\n",
3280af22 3623 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3624 );
d8319b27 3625 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3626 /*** all unsaved local vars undefined at this point */
d8319b27 3627 st->cc = st->u.whilem.savecc;
95b24440 3628 if (result)
4633a7c4 3629 sayYES;
d8319b27
DM
3630 if (st->cc->u.curlyx.outercc)
3631 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3632 sayNO;
a0d0e21e
LW
3633 }
3634
3635 /* First just match a string of min scans. */
3636
d8319b27
DM
3637 if (n < st->cc->u.curlyx.min) {
3638 st->cc->u.curlyx.cur = n;
3639 st->cc->u.curlyx.lastloc = locinput;
3640 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3641 /*** all unsaved local vars undefined at this point */
3642 if (result)
4633a7c4 3643 sayYES;
d8319b27
DM
3644 st->cc->u.curlyx.cur = n - 1;
3645 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3646 sayNO;
a0d0e21e
LW
3647 }
3648
2c2d71f5
JH
3649 if (scan->flags) {
3650 /* Check whether we already were at this position.
3651 Postpone detection until we know the match is not
3652 *that* much linear. */
3653 if (!PL_reg_maxiter) {
3654 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3655 PL_reg_leftiter = PL_reg_maxiter;
3656 }
3657 if (PL_reg_leftiter-- == 0) {
a3b680e6 3658 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3659 if (PL_reg_poscache) {
eb160463 3660 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3661 Renew(PL_reg_poscache, size, char);
3662 PL_reg_poscache_size = size;
3663 }
3664 Zero(PL_reg_poscache, size, char);
3665 }
3666 else {
3667 PL_reg_poscache_size = size;
a02a5408 3668 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3669 }
a3621e74 3670 DEBUG_EXECUTE_r(
2c2d71f5
JH
3671 PerlIO_printf(Perl_debug_log,
3672 "%sDetected a super-linear match, switching on caching%s...\n",
3673 PL_colors[4], PL_colors[5])
3674 );
3675 }
3676 if (PL_reg_leftiter < 0) {
d8319b27 3677 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3678
d8319b27
DM
3679 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3680 + st->u.whilem.cache_offset * (scan->flags>>4);
3681 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3682 st->u.whilem.cache_offset /= 8;
3683 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3684 DEBUG_EXECUTE_r(
2c2d71f5
JH
3685 PerlIO_printf(Perl_debug_log,
3686 "%*s already tried at this position...\n",
3687 REPORT_CODE_OFF+PL_regindent*2, "")
3688 );
3ab3c9b4
HS
3689 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3690 /* cache records success */
c2b0868c
HS
3691 sayYES;
3692 else
3ab3c9b4 3693 /* cache records failure */
c2b0868c 3694 sayNO_SILENT;
2c2d71f5 3695 }
2c2d71f5
JH
3696 }
3697 }
3698
a0d0e21e
LW
3699 /* Prefer next over scan for minimal matching. */
3700
5d9a96ca 3701 if (st->cc->minmod) {
d8319b27
DM
3702 st->u.whilem.savecc = st->cc;
3703 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3704 if (st->cc)
d8319b27
DM
3705 st->ln = st->cc->u.curlyx.cur;
3706 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3707 REGCP_SET(st->u.whilem.lastcp);
3708 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3709 /*** all unsaved local vars undefined at this point */
d8319b27 3710 st->cc = st->u.whilem.savecc;
95b24440 3711 if (result) {
d8319b27 3712 regcpblow(st->u.whilem.cp);
3ab3c9b4 3713 CACHEsayYES; /* All done. */
5f05dabc 3714 }
d8319b27 3715 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3716 regcppop(rex);
d8319b27
DM
3717 if (st->cc->u.curlyx.outercc)
3718 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3719
d8319b27 3720 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3721 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3722 && !(PL_reg_flags & RF_warned)) {
3723 PL_reg_flags |= RF_warned;
9014280d 3724 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3725 "Complex regular subexpression recursion",
3726 REG_INFTY - 1);
c277df42 3727 }
3ab3c9b4 3728 CACHEsayNO;
c277df42 3729 }
a687059c 3730
a3621e74 3731 DEBUG_EXECUTE_r(
c3464db5
DD
3732 PerlIO_printf(Perl_debug_log,
3733 "%*s trying longer...\n",
3280af22 3734 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3735 );
a0d0e21e 3736 /* Try scanning more and see if it helps. */
3280af22 3737 PL_reginput = locinput;
d8319b27
DM
3738 st->cc->u.curlyx.cur = n;
3739 st->cc->u.curlyx.lastloc = locinput;
3740 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3741 REGCP_SET(st->u.whilem.lastcp);
3742 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3743 /*** all unsaved local vars undefined at this point */
3744 if (result) {
d8319b27 3745 regcpblow(st->u.whilem.cp);
3ab3c9b4 3746 CACHEsayYES;
5f05dabc 3747 }
d8319b27 3748 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3749 regcppop(rex);
d8319b27
DM
3750 st->cc->u.curlyx.cur = n - 1;
3751 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3752 CACHEsayNO;
a0d0e21e
LW
3753 }
3754
3755 /* Prefer scan over next for maximal matching. */
3756
d8319b27
DM
3757 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3758 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3759 st->cc->u.curlyx.cur = n;
3760 st->cc->u.curlyx.lastloc = locinput;
3761 REGCP_SET(st->u.whilem.lastcp);
3762 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3763 /*** all unsaved local vars undefined at this point */
3764 if (result) {
d8319b27 3765 regcpblow(st->u.whilem.cp);
3ab3c9b4 3766 CACHEsayYES;
5f05dabc 3767 }
d8319b27 3768 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3769 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3770 PL_reginput = locinput;
a3621e74 3771 DEBUG_EXECUTE_r(
c3464db5
DD
3772 PerlIO_printf(Perl_debug_log,
3773 "%*s failed, try continuation...\n",
3280af22 3774 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3775 );
3776 }
9041c2e3 3777 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3778 && !(PL_reg_flags & RF_warned)) {
3280af22 3779 PL_reg_flags |= RF_warned;
9014280d 3780 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3781 "Complex regular subexpression recursion",
3782 REG_INFTY - 1);
a0d0e21e
LW
3783 }
3784
3785 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3786 st->u.whilem.savecc = st->cc;
3787 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3788 if (st->cc)
d8319b27
DM
3789 st->ln = st->cc->u.curlyx.cur;
3790 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3791 /*** all unsaved local vars undefined at this point */
d8319b27 3792 st->cc = st->u.whilem.savecc;
95b24440 3793 if (result)
3ab3c9b4 3794 CACHEsayYES;
d8319b27
DM
3795 if (st->cc->u.curlyx.outercc)
3796 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3797 st->cc->u.curlyx.cur = n - 1;
3798 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3799 CACHEsayNO;
a0d0e21e 3800 }
5f66b61c 3801 /* NOTREACHED */
9041c2e3 3802 case BRANCHJ:
c277df42
IZ
3803 next = scan + ARG(scan);
3804 if (next == scan)
3805 next = NULL;
3806 inner = NEXTOPER(NEXTOPER(scan));
3807 goto do_branch;
9041c2e3 3808 case BRANCH:
c277df42
IZ
3809 inner = NEXTOPER(scan);
3810 do_branch:
3811 {
e822a8b4
DM
3812 I32 type;
3813 type = OP(scan);
ae5031b3 3814 if (!next || OP(next) != type) /* No choice. */
c277df42 3815 next = inner; /* Avoid recursion. */
a0d0e21e 3816 else {
a3b680e6 3817 const I32 lastparen = *PL_reglastparen;
02db2b7b 3818 /* Put unwinding data on stack */
6136c704
AL
3819 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3820 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3821
5d9a96ca
DM
3822 uw->prev = st->unwind;
3823 st->unwind = unwind1;
e822a8b4 3824 uw->type = ((type == BRANCH)
02db2b7b
IZ
3825 ? RE_UNWIND_BRANCH
3826 : RE_UNWIND_BRANCHJ);
3827 uw->lastparen = lastparen;
3828 uw->next = next;
3829 uw->locinput = locinput;
3830 uw->nextchr = nextchr;
3a2830be 3831 uw->minmod = st->minmod;
02db2b7b
IZ
3832#ifdef DEBUGGING
3833 uw->regindent = ++PL_regindent;
3834#endif
c277df42 3835
02db2b7b
IZ
3836 REGCP_SET(uw->lastcp);
3837
3838 /* Now go into the first branch */
3839 next = inner;
a687059c 3840 }
a0d0e21e
LW
3841 }
3842 break;
3843 case MINMOD:
5d9a96ca 3844 st->minmod = 1;
a0d0e21e 3845 break;
c277df42
IZ
3846 case CURLYM:
3847 {
d8319b27 3848 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3849
c277df42 3850 /* We suppose that the next guy does not need
0e788c72 3851 backtracking: in particular, it is of constant non-zero length,
c277df42 3852 and has no parenths to influence future backrefs. */
5d9a96ca 3853 st->ln = ARG1(scan); /* min to match */
c277df42 3854 n = ARG2(scan); /* max to match */
d8319b27
DM
3855 st->u.curlym.paren = scan->flags;
3856 if (st->u.curlym.paren) {
3857 if (st->u.curlym.paren > PL_regsize)
3858 PL_regsize = st->u.curlym.paren;
3859 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3860 *PL_reglastparen = st->u.curlym.paren;
c277df42 3861 }
dc45a647 3862 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3863 if (st->u.curlym.paren)
c277df42 3864 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3865 PL_reginput = locinput;
d8319b27 3866 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
0cadcf80
DM
3867 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
3868 /* resume to current state on success */
3869 st->u.yes.prev_yes_state = yes_state;
3870 yes_state = st;
3871 REGMATCH(scan, CURLYM1);
3872 yes_state = st->u.yes.prev_yes_state;
3873 /*** all unsaved local vars undefined at this point */
3874 if (!result)
3875 break;
3876 /* on first match, determine length, u.curlym.l */
3877 if (!st->u.curlym.matches++) {
3878 if (PL_reg_match_utf8) {
3879 char *s = locinput;
3880 while (s < PL_reginput) {
3881 st->u.curlym.l++;
3882 s += UTF8SKIP(s);
6407bf3b
DM
3883 }
3884 }
0cadcf80
DM
3885 else {
3886 st->u.curlym.l = PL_reginput - locinput;
3887 }
3888 if (st->u.curlym.l == 0) {
3889 st->u.curlym.matches = st->u.curlym.maxwanted;
3890 break;
3891 }
6407bf3b 3892 }
0cadcf80 3893 locinput = PL_reginput;
6407bf3b
DM
3894 }
3895
3896 PL_reginput = locinput;
0cadcf80 3897 if (st->u.curlym.matches < st->ln) {
5d9a96ca 3898 st->minmod = 0;
0cadcf80
DM
3899 sayNO;
3900 }
5f80c4cf 3901
0cadcf80
DM
3902 DEBUG_EXECUTE_r(
3903 PerlIO_printf(Perl_debug_log,
3904 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3905 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3906 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
3907 );
3908
3909 /* calculate c1 and c1 for possible match of 1st char
3910 * following curly */
9e137952 3911 st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
0cadcf80
DM
3912 if (HAS_TEXT(next) || JUMPABLE(next)) {
3913 regnode *text_node = next;
3914 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3915 if (HAS_TEXT(text_node)
3916 && PL_regkind[(U8)OP(text_node)] != REF)
3917 {
3918 st->u.curlym.c1 = (U8)*STRING(text_node);
3919 st->u.curlym.c2 =
3920 (OP(text_node) == EXACTF || OP(text_node) == REFF)
3921 ? PL_fold[st->u.curlym.c1]
3922 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3923 ? PL_fold_locale[st->u.curlym.c1]
3924 : st->u.curlym.c1;
3925 }
3926 }
5f80c4cf 3927
0cadcf80
DM
3928 REGCP_SET(st->u.curlym.lastcp);
3929
3930 st->u.curlym.minmod = st->minmod;
3931 st->minmod = 0;
3932 while (st->u.curlym.matches >= st->ln
3933 && (st->u.curlym.matches <= n
3934 /* for REG_INFTY, ln could overflow to negative */
3935 || (n == REG_INFTY && st->u.curlym.matches >= 0)))
3936 {
3937 /* If it could work, try it. */
9e137952 3938 if (st->u.curlym.c1 == CHRTEST_VOID ||
0cadcf80
DM
3939 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3940 UCHARAT(PL_reginput) == st->u.curlym.c2)
3941 {
3942 DEBUG_EXECUTE_r(
3943 PerlIO_printf(Perl_debug_log,
3944 "%*s trying tail with matches=%"IVdf"...\n",
3945 (int)(REPORT_CODE_OFF+PL_regindent*2),
3946 "", (IV)st->u.curlym.matches)
3947 );
3948 if (st->u.curlym.paren) {
3949 if (st->u.curlym.matches) {
3950 PL_regstartp[st->u.curlym.paren]
3951 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3952 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
cca55fe3 3953 }
5f80c4cf 3954 else
0cadcf80 3955 PL_regendp[st->u.curlym.paren] = -1;
5f80c4cf 3956 }
0cadcf80
DM
3957 /* resume to current state on success */
3958 st->u.yes.prev_yes_state = yes_state;
3959 yes_state = st;
3960 REGMATCH(next, CURLYM2);
3961 yes_state = st->u.yes.prev_yes_state;
3962 /*** all unsaved local vars undefined at this point */
3963 if (result)
3964 /* XXX tmp sayYES; */
3965 sayYES_FINAL;
3966 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3967 }
0cadcf80
DM
3968 /* Couldn't or didn't -- move forward/backward. */
3969 if (st->u.curlym.minmod) {
3280af22 3970 PL_reginput = locinput;
dad79028
DM
3971 /* resume to current state on success */
3972 st->u.yes.prev_yes_state = yes_state;
3973 yes_state = st;
95b24440 3974 REGMATCH(scan, CURLYM3);
dad79028 3975 yes_state = st->u.yes.prev_yes_state;
95b24440
DM
3976 /*** all unsaved local vars undefined at this point */
3977 if (result) {
0cadcf80 3978 st->u.curlym.matches++;
3280af22 3979 locinput = PL_reginput;
c277df42
IZ
3980 }
3981 else
3982 sayNO;
3983 }
0cadcf80 3984 else {
d8319b27
DM
3985 st->u.curlym.matches--;
3986 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3987 PL_reginput = locinput;
c277df42
IZ
3988 }
3989 }
3990 sayNO;
5f66b61c 3991 /* NOTREACHED */
c277df42
IZ
3992 break;
3993 }
3994 case CURLYN:
d8319b27
DM
3995 st->u.plus.paren = scan->flags; /* Which paren to set */
3996 if (st->u.plus.paren > PL_regsize)
3997 PL_regsize = st->u.plus.paren;
3998 if (st->u.plus.paren > (I32)*PL_reglastparen)
3999 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4000 st->ln = ARG1(scan); /* min to match */
c277df42 4001 n = ARG2(scan); /* max to match */
dc45a647 4002 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4003 goto repeat;
a0d0e21e 4004 case CURLY:
d8319b27 4005 st->u.plus.paren = 0;
5d9a96ca 4006 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4007 n = ARG2(scan); /* max to match */
dc45a647 4008 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4009 goto repeat;
4010 case STAR:
5d9a96ca 4011 st->ln = 0;
c277df42 4012 n = REG_INFTY;
a0d0e21e 4013 scan = NEXTOPER(scan);
d8319b27 4014 st->u.plus.paren = 0;
a0d0e21e
LW
4015 goto repeat;
4016 case PLUS:
5d9a96ca 4017 st->ln = 1;
c277df42
IZ
4018 n = REG_INFTY;
4019 scan = NEXTOPER(scan);
d8319b27 4020 st->u.plus.paren = 0;
c277df42 4021 repeat:
a0d0e21e
LW
4022 /*
4023 * Lookahead to avoid useless match attempts
4024 * when we know what character comes next.
4025 */
5f80c4cf
JP
4026
4027 /*
4028 * Used to only do .*x and .*?x, but now it allows
4029 * for )'s, ('s and (?{ ... })'s to be in the way
4030 * of the quantifier and the EXACT-like node. -- japhy
4031 */
4032
cca55fe3 4033 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4034 U8 *s;
4035 regnode *text_node = next;
4036
cca55fe3 4037 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4038
9e137952
DM
4039 if (! HAS_TEXT(text_node))
4040 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
5f80c4cf 4041 else {
cca55fe3 4042 if (PL_regkind[(U8)OP(text_node)] == REF) {
9e137952 4043 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
44a68960 4044 goto assume_ok_easy;
cca55fe3
JP
4045 }
4046 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4047
4048 if (!UTF) {
d8319b27 4049 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4050 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4051 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4052 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4053 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4054 }
5f80c4cf 4055 else { /* UTF */
f65d3ee7 4056 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4057 STRLEN ulen1, ulen2;
89ebb4a3
JH
4058 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4059 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4060
4061 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4062 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4063
d8319b27 4064 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4065 uniflags);
d8319b27 4066 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4067 uniflags);
5f80c4cf
JP
4068 }
4069 else {
d8319b27 4070 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4071 uniflags);
5f80c4cf 4072 }
1aa99e6b
IH
4073 }
4074 }
bbce6d69 4075 }
a0d0e21e 4076 else
9e137952 4077 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
cca55fe3 4078 assume_ok_easy:
3280af22 4079 PL_reginput = locinput;
5d9a96ca
DM
4080 if (st->minmod) {
4081 st->minmod = 0;
32fc9b6a 4082 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4083 sayNO;
a0ed51b3 4084 locinput = PL_reginput;
d8319b27 4085 REGCP_SET(st->u.plus.lastcp);
9e137952 4086 if (st->u.plus.c1 != CHRTEST_VOID) {
d8319b27
DM
4087 st->u.plus.old = locinput;
4088 st->u.plus.count = 0;
0fe9bf95 4089
1aa99e6b 4090 if (n == REG_INFTY) {
d8319b27 4091 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4092 if (do_utf8)
d8319b27
DM
4093 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4094 st->u.plus.e--;
1aa99e6b
IH
4095 }
4096 else if (do_utf8) {
5d9a96ca 4097 int m = n - st->ln;
d8319b27
DM
4098 for (st->u.plus.e = locinput;
4099 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4100 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4101 }
4102 else {
d8319b27
DM
4103 st->u.plus.e = locinput + n - st->ln;
4104 if (st->u.plus.e >= PL_regeol)
4105 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4106 }
0fe9bf95
IZ
4107 while (1) {
4108 /* Find place 'next' could work */
1aa99e6b 4109 if (!do_utf8) {
d8319b27
DM
4110 if (st->u.plus.c1 == st->u.plus.c2) {
4111 while (locinput <= st->u.plus.e &&
4112 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4113 locinput++;
4114 } else {
d8319b27
DM
4115 while (locinput <= st->u.plus.e
4116 && UCHARAT(locinput) != st->u.plus.c1
4117 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4118 locinput++;
4119 }
d8319b27 4120 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4121 }
4122 else {
d8319b27 4123 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4124 STRLEN len;
872c91ae
JH
4125 /* count initialised to
4126 * utf8_distance(old, locinput) */
d8319b27 4127 while (locinput <= st->u.plus.e &&
872c91ae 4128 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4129 UTF8_MAXBYTES, &len,
d8319b27 4130 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4131 locinput += len;
d8319b27 4132 st->u.plus.count++;
b2f2f093 4133 }
1aa99e6b 4134 } else {
872c91ae
JH
4135 /* count initialised to
4136 * utf8_distance(old, locinput) */
d8319b27 4137 while (locinput <= st->u.plus.e) {
c4fd8992
AL
4138 STRLEN len;
4139 const UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4140 UTF8_MAXBYTES, &len,
041457d9 4141 uniflags);
d8319b27 4142 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4143 break;
b2f2f093 4144 locinput += len;
d8319b27 4145 st->u.plus.count++;
1aa99e6b
IH
4146 }
4147 }
0fe9bf95 4148 }
d8319b27 4149 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4150 sayNO;
4151 /* PL_reginput == old now */
d8319b27 4152 if (locinput != st->u.plus.old) {
5d9a96ca 4153 st->ln = 1; /* Did some */
32fc9b6a 4154 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4155 sayNO;
4156 }
4157 /* PL_reginput == locinput now */
d8319b27 4158 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4159 /*** all unsaved local vars undefined at this point */
0fe9bf95 4160 PL_reginput = locinput; /* Could be reset... */
d8319b27 4161 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4162 /* Couldn't or didn't -- move forward. */
d8319b27 4163 st->u.plus.old = locinput;
1aa99e6b
IH
4164 if (do_utf8)
4165 locinput += UTF8SKIP(locinput);
4166 else
4167 locinput++;
d8319b27 4168 st->u.plus.count = 1;
0fe9bf95
IZ
4169 }
4170 }
4171 else
5d9a96ca 4172 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4173 UV c;
9e137952 4174 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4175 if (do_utf8)
872c91ae 4176 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4177 UTF8_MAXBYTES, 0,
041457d9 4178 uniflags);
1aa99e6b 4179 else
9041c2e3 4180 c = UCHARAT(PL_reginput);
2390ecbc 4181 /* If it could work, try it. */
d8319b27 4182 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4183 {
d8319b27 4184 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4185 /*** all unsaved local vars undefined at this point */
d8319b27 4186 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4187 }
1aa99e6b 4188 }
a0d0e21e 4189 /* If it could work, try it. */
9e137952 4190 else if (st->u.plus.c1 == CHRTEST_VOID)
bbce6d69 4191 {
d8319b27 4192 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4193 /*** all unsaved local vars undefined at this point */
d8319b27 4194 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4195 }
c277df42 4196 /* Couldn't or didn't -- move forward. */
a0ed51b3 4197 PL_reginput = locinput;
32fc9b6a 4198 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4199 st->ln++;
a0ed51b3
LW
4200 locinput = PL_reginput;
4201 }
4202 else
4633a7c4 4203 sayNO;
a0d0e21e
LW
4204 }
4205 }
4206 else {
32fc9b6a 4207 n = regrepeat(rex, scan, n);
a0ed51b3 4208 locinput = PL_reginput;
5d9a96ca 4209 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4210 (OP(next) != MEOL ||
15272685
HS
4211 OP(next) == SEOL || OP(next) == EOS))
4212 {
5d9a96ca 4213 st->ln = n; /* why back off? */
1aeab75a
GS
4214 /* ...because $ and \Z can match before *and* after
4215 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4216 We should back off by one in this case. */
4217 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4218 st->ln--;
1aeab75a 4219 }
d8319b27 4220 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4221 {
8fa7f367 4222 UV c = 0;
5d9a96ca 4223 while (n >= st->ln) {
9e137952 4224 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4225 if (do_utf8)
872c91ae 4226 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4227 UTF8_MAXBYTES, 0,
041457d9 4228 uniflags);
1aa99e6b 4229 else
9041c2e3 4230 c = UCHARAT(PL_reginput);
1aa99e6b 4231 }
c277df42 4232 /* If it could work, try it. */
9e137952 4233 if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4234 {
d8319b27 4235 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4236 /*** all unsaved local vars undefined at this point */
d8319b27 4237 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4238 }
4239 /* Couldn't or didn't -- back up. */
4240 n--;
dfe13c55 4241 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4242 }
a0d0e21e
LW
4243 }
4244 }
4633a7c4 4245 sayNO;
c277df42 4246 break;
a0d0e21e 4247 case END:
3b0527fe 4248 if (locinput < reginfo->till) {
a3621e74 4249 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4250 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4251 PL_colors[4],
4252 (long)(locinput - PL_reg_starttry),
3b0527fe 4253 (long)(reginfo->till - PL_reg_starttry),
7821416a
IZ
4254 PL_colors[5]));
4255 sayNO_FINAL; /* Cannot match: too short. */
4256 }
4257 PL_reginput = locinput; /* put where regtry can find it */
4258 sayYES_FINAL; /* Success! */
dad79028
DM
4259
4260 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4261 DEBUG_EXECUTE_r(
4262 PerlIO_printf(Perl_debug_log,
4263 "%*s %ssubpattern success...%s\n",
4264 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
3280af22 4265 PL_reginput = locinput; /* put where regtry can find it */
dad79028
DM
4266 sayYES_FINAL; /* Success! */
4267
4268 case SUSPEND: /* (?>FOO) */
4269 st->u.ifmatch.wanted = 1;
9fe1d20c 4270 PL_reginput = locinput;
9041c2e3 4271 goto do_ifmatch;
dad79028
DM
4272
4273 case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
4274 st->u.ifmatch.wanted = 0;
4275 goto ifmatch_trivial_fail_test;
4276
4277 case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
4278 st->u.ifmatch.wanted = 1;
4279 ifmatch_trivial_fail_test:
a0ed51b3 4280 if (scan->flags) {
52657f30 4281 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4282 if (!s) {
4283 /* trivial fail */
4284 if (st->logical) {
4285 st->logical = 0;
4286 st->sw = 1 - st->u.ifmatch.wanted;
4287 }
4288 else if (st->u.ifmatch.wanted)
4289 sayNO;
4290 next = scan + ARG(scan);
4291 if (next == scan)
4292 next = NULL;
4293 break;
4294 }
efb30f32 4295 PL_reginput = s;
a0ed51b3
LW
4296 }
4297 else
4298 PL_reginput = locinput;
4299
c277df42 4300 do_ifmatch:
dad79028
DM
4301 /* resume to current state on success */
4302 st->u.yes.prev_yes_state = yes_state;
4303 yes_state = st;
4304 PUSH_STATE(newst, resume_IFMATCH);
4305 st = newst;
4306 next = NEXTOPER(NEXTOPER(scan));
4307 break;
4308
c277df42 4309 case LONGJMP:
c277df42
IZ
4310 next = scan + ARG(scan);
4311 if (next == scan)
4312 next = NULL;
a0d0e21e
LW
4313 break;
4314 default:
b900a521 4315 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4316 PTR2UV(scan), OP(scan));
cea2e8a9 4317 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4318 }
95b24440 4319
02db2b7b 4320 reenter:
a0d0e21e 4321 scan = next;
95b24440
DM
4322 continue;
4323 /* NOTREACHED */
4324
4325 /* simulate recursively calling regmatch(), but without actually
4326 * recursing - ie save the current state on the heap rather than on
4327 * the stack, then re-enter the loop. This avoids complex regexes
4328 * blowing the processor stack */
4329
4330 start_recurse:
4331 {
5d9a96ca
DM
4332 /* push new state */
4333 regmatch_state *oldst = st;
4334
4335 depth++;
4336
4337 /* grab the next free state slot */
4338 st++;
86545054 4339 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
4340 st = S_push_slab(aTHX);
4341 PL_regmatch_state = st;
4342
4343 oldst->next = next;
4344 oldst->n = n;
4345 oldst->locinput = locinput;
5d9a96ca
DM
4346
4347 st->cc = oldst->cc;
95b24440
DM
4348 locinput = PL_reginput;
4349 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4350 st->minmod = 0;
4351 st->sw = 0;
4352 st->logical = 0;
4353 st->unwind = 0;
95b24440
DM
4354#ifdef DEBUGGING
4355 PL_regindent++;
4356#endif
4357 }
a0d0e21e 4358 }
a687059c 4359
aa283a38
DM
4360
4361
a0d0e21e
LW
4362 /*
4363 * We get here only if there's trouble -- normally "case END" is
4364 * the terminating point.
4365 */
cea2e8a9 4366 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4367 /*NOTREACHED*/
4633a7c4
LW
4368 sayNO;
4369
7821416a 4370yes_final:
77cb431f
DM
4371
4372 if (yes_state) {
4373 /* we have successfully completed a subexpression, but we must now
4374 * pop to the state marked by yes_state and continue from there */
4375
dad79028 4376 /*XXX tmp for CURLYM*/
c4fd8992
AL
4377 regmatch_slab * const oslab = PL_regmatch_slab;
4378 regmatch_state * const ost = st;
4379 regmatch_state * const oys = yes_state;
dad79028
DM
4380 int odepth = depth;
4381
77cb431f
DM
4382 assert(st != yes_state);
4383 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4384 || yes_state > SLAB_LAST(PL_regmatch_slab))
4385 {
4386 /* not in this slab, pop slab */
4387 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4388 PL_regmatch_slab = PL_regmatch_slab->prev;
4389 st = SLAB_LAST(PL_regmatch_slab);
4390 }
4391 depth -= (st - yes_state);
dad79028 4392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
77cb431f
DM
4393 st = yes_state;
4394 yes_state = st->u.yes.prev_yes_state;
4395 PL_regmatch_state = st;
4396
4397 switch (st->resume_state) {
4398 case resume_EVAL:
4399 if (st->u.eval.toggleutf)
4400 PL_reg_flags ^= RF_utf8;
4401 ReREFCNT_dec(rex);
4402 rex = st->u.eval.prev_rex;
4403 /* XXXX This is too dramatic a measure... */
4404 PL_reg_maxiter = 0;
4405 /* Restore parens of the caller without popping the
4406 * savestack */
4407 {
c4fd8992 4408 const I32 tmp = PL_savestack_ix;
77cb431f
DM
4409 PL_savestack_ix = st->u.eval.lastcp;
4410 regcppop(rex);
4411 PL_savestack_ix = tmp;
4412 }
4413 PL_reginput = locinput;
4414 /* continue at the node following the (??{...}) */
4415 next = st->next;
4416 goto reenter;
4417
dad79028
DM
4418 case resume_IFMATCH:
4419 if (st->logical) {
4420 st->logical = 0;
4421 st->sw = st->u.ifmatch.wanted;
4422 }
4423 else if (!st->u.ifmatch.wanted)
4424 sayNO;
4425
4426 if (OP(st->scan) == SUSPEND)
4427 locinput = PL_reginput;
4428 else {
4429 locinput = PL_reginput = st->locinput;
4430 nextchr = UCHARAT(locinput);
4431 }
4432 next = st->scan + ARG(st->scan);
4433 if (next == st->scan)
4434 next = NULL;
4435 goto reenter;
4436
4437 /* XXX tmp don't handle yes_state yet */
4438 case resume_CURLYM1:
4439 case resume_CURLYM2:
4440 case resume_CURLYM3:
dad79028
DM
4441 PL_regmatch_slab =oslab;
4442 st = ost;
4443 PL_regmatch_state = st;
4444 depth = odepth;
4445 yes_state = oys;
4446 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
4447 goto yes;
4448
77cb431f
DM
4449 default:
4450 Perl_croak(aTHX_ "unexpected yes reume state");
4451 }
4452 }
4453
a3621e74 4454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4455 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4456yes:
4457#ifdef DEBUGGING
3280af22 4458 PL_regindent--;
4633a7c4 4459#endif
02db2b7b 4460
95b24440 4461 result = 1;
aa283a38 4462 /* XXX this is duplicate(ish) code to that in the do_no section.
77cb431f
DM
4463 * eventually a yes should just pop the stack back to the current
4464 * yes_state */
aa283a38
DM
4465 if (depth) {
4466 /* restore previous state and re-enter */
4467 POP_STATE;
4468
4469 switch (st->resume_state) {
4470 case resume_TRIE1:
4471 goto resume_point_TRIE1;
4472 case resume_TRIE2:
4473 goto resume_point_TRIE2;
aa283a38
DM
4474 case resume_CURLYX:
4475 goto resume_point_CURLYX;
4476 case resume_WHILEM1:
4477 goto resume_point_WHILEM1;
4478 case resume_WHILEM2:
4479 goto resume_point_WHILEM2;
4480 case resume_WHILEM3:
4481 goto resume_point_WHILEM3;
4482 case resume_WHILEM4:
4483 goto resume_point_WHILEM4;
4484 case resume_WHILEM5:
4485 goto resume_point_WHILEM5;
4486 case resume_WHILEM6:
4487 goto resume_point_WHILEM6;
4488 case resume_CURLYM1:
4489 goto resume_point_CURLYM1;
4490 case resume_CURLYM2:
4491 goto resume_point_CURLYM2;
4492 case resume_CURLYM3:
4493 goto resume_point_CURLYM3;
aa283a38
DM
4494 case resume_PLUS1:
4495 goto resume_point_PLUS1;
4496 case resume_PLUS2:
4497 goto resume_point_PLUS2;
4498 case resume_PLUS3:
4499 goto resume_point_PLUS3;
4500 case resume_PLUS4:
4501 goto resume_point_PLUS4;
77cb431f 4502
dad79028 4503 case resume_IFMATCH:
77cb431f 4504 case resume_EVAL:
aa283a38
DM
4505 default:
4506 Perl_croak(aTHX_ "regexp resume memory corruption");
4507 }
4508 }
4509 goto final_exit;
4633a7c4
LW
4510
4511no:
a3621e74 4512 DEBUG_EXECUTE_r(
7821416a
IZ
4513 PerlIO_printf(Perl_debug_log,
4514 "%*s %sfailed...%s\n",
e4584336 4515 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4516 );
4517 goto do_no;
4518no_final:
4519do_no:
5d9a96ca
DM
4520 if (st->unwind) {
4521 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4522
4523 switch (uw->type) {
4524 case RE_UNWIND_BRANCH:
4525 case RE_UNWIND_BRANCHJ:
4526 {
6136c704 4527 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4528 const I32 lastparen = uwb->lastparen;
9041c2e3 4529
02db2b7b
IZ
4530 REGCP_UNWIND(uwb->lastcp);
4531 for (n = *PL_reglastparen; n > lastparen; n--)
4532 PL_regendp[n] = -1;
4533 *PL_reglastparen = n;
4534 scan = next = uwb->next;
3a2830be 4535 st->minmod = uwb->minmod;
9041c2e3
NIS
4536 if ( !scan ||
4537 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4538 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4539 st->unwind = uwb->prev;
02db2b7b
IZ
4540#ifdef DEBUGGING
4541 PL_regindent--;
4542#endif
4543 goto do_no;
4544 }
4545 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4546 if ((n = (uwb->type == RE_UNWIND_BRANCH
4547 ? NEXT_OFF(next) : ARG(next))))
4548 next += n;
4549 else
4550 next = NULL; /* XXXX Needn't unwinding in this case... */
4551 uwb->next = next;
4552 next = NEXTOPER(scan);
4553 if (uwb->type == RE_UNWIND_BRANCHJ)
4554 next = NEXTOPER(next);
4555 locinput = uwb->locinput;
4556 nextchr = uwb->nextchr;
4557#ifdef DEBUGGING
4558 PL_regindent = uwb->regindent;
4559#endif
4560
4561 goto reenter;
4562 }
5f66b61c 4563 /* NOTREACHED */
02db2b7b
IZ
4564 default:
4565 Perl_croak(aTHX_ "regexp unwind memory corruption");
4566 }
5f66b61c 4567 /* NOTREACHED */
02db2b7b 4568 }
aa283a38 4569
4633a7c4 4570#ifdef DEBUGGING
3280af22 4571 PL_regindent--;
4633a7c4 4572#endif
95b24440 4573 result = 0;
5d9a96ca 4574
aa283a38
DM
4575 if (depth) {
4576 /* there's a previous state to backtrack to */
4577 POP_STATE;
5d9a96ca 4578 switch (st->resume_state) {
95b24440
DM
4579 case resume_TRIE1:
4580 goto resume_point_TRIE1;
4581 case resume_TRIE2:
4582 goto resume_point_TRIE2;
aa283a38
DM
4583 case resume_EVAL:
4584 /* we have failed an (??{...}). Restore state to the outer re
4585 * then re-throw the failure */
4586 if (st->u.eval.toggleutf)
4587 PL_reg_flags ^= RF_utf8;
4588 ReREFCNT_dec(rex);
4589 rex = st->u.eval.prev_rex;
77cb431f 4590 yes_state = st->u.yes.prev_yes_state;
aa283a38
DM
4591
4592 /* XXXX This is too dramatic a measure... */
4593 PL_reg_maxiter = 0;
4594
4595 PL_reginput = locinput;
4596 REGCP_UNWIND(st->u.eval.lastcp);
4597 regcppop(rex);
4598 goto do_no;
4599
95b24440
DM
4600 case resume_CURLYX:
4601 goto resume_point_CURLYX;
4602 case resume_WHILEM1:
4603 goto resume_point_WHILEM1;
4604 case resume_WHILEM2:
4605 goto resume_point_WHILEM2;
4606 case resume_WHILEM3:
4607 goto resume_point_WHILEM3;
4608 case resume_WHILEM4:
4609 goto resume_point_WHILEM4;
4610 case resume_WHILEM5:
4611 goto resume_point_WHILEM5;
4612 case resume_WHILEM6:
4613 goto resume_point_WHILEM6;
4614 case resume_CURLYM1:
4615 goto resume_point_CURLYM1;
4616 case resume_CURLYM2:
4617 goto resume_point_CURLYM2;
4618 case resume_CURLYM3:
4619 goto resume_point_CURLYM3;
95b24440 4620 case resume_IFMATCH:
dad79028
DM
4621 yes_state = st->u.yes.prev_yes_state;
4622 if (st->logical) {
4623 st->logical = 0;
4624 st->sw = !st->u.ifmatch.wanted;
4625 }
4626 else if (st->u.ifmatch.wanted)
4627 sayNO;
4628
4629 assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
4630 locinput = PL_reginput = st->locinput;
4631 nextchr = UCHARAT(locinput);
4632 next = scan + ARG(scan);
4633 if (next == scan)
4634 next = NULL;
4635 goto reenter;
4636
95b24440
DM
4637 case resume_PLUS1:
4638 goto resume_point_PLUS1;
4639 case resume_PLUS2:
4640 goto resume_point_PLUS2;
4641 case resume_PLUS3:
4642 goto resume_point_PLUS3;
4643 case resume_PLUS4:
4644 goto resume_point_PLUS4;
95b24440
DM
4645 default:
4646 Perl_croak(aTHX_ "regexp resume memory corruption");
4647 }
95b24440 4648 }
aa283a38
DM
4649
4650final_exit:
4651
5d9a96ca
DM
4652 /* restore original high-water mark */
4653 PL_regmatch_slab = orig_slab;
4654 PL_regmatch_state = orig_state;
4655
4656 /* free all slabs above current one */
4657 if (orig_slab->next) {
c4fd8992 4658 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
4659 orig_slab->next = NULL;
4660 while (sl) {
c4fd8992 4661 regmatch_slab * const osl = sl;
5d9a96ca 4662 sl = sl->next;
ad65c075 4663 Safefree(osl);
5d9a96ca
DM
4664 }
4665 }
4666
95b24440
DM
4667 return result;
4668
a687059c
LW
4669}
4670
4671/*
4672 - regrepeat - repeatedly match something simple, report how many
4673 */
4674/*
4675 * [This routine now assumes that it will only match on things of length 1.
4676 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4677 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4678 */
76e3520e 4679STATIC I32
32fc9b6a 4680S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4681{
27da23d5 4682 dVAR;
a0d0e21e 4683 register char *scan;
a0d0e21e 4684 register I32 c;
3280af22 4685 register char *loceol = PL_regeol;
a0ed51b3 4686 register I32 hardcount = 0;
53c4c00c 4687 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4688
3280af22 4689 scan = PL_reginput;
faf11cac
HS
4690 if (max == REG_INFTY)
4691 max = I32_MAX;
4692 else if (max < loceol - scan)
7f596f4c 4693 loceol = scan + max;
a0d0e21e 4694 switch (OP(p)) {
22c35a8c 4695 case REG_ANY:
1aa99e6b 4696 if (do_utf8) {
ffc61ed2 4697 loceol = PL_regeol;
1aa99e6b 4698 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4699 scan += UTF8SKIP(scan);
4700 hardcount++;
4701 }
4702 } else {
4703 while (scan < loceol && *scan != '\n')
4704 scan++;
a0ed51b3
LW
4705 }
4706 break;
ffc61ed2 4707 case SANY:
def8e4ea
JH
4708 if (do_utf8) {
4709 loceol = PL_regeol;
a0804c9e 4710 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4711 scan += UTF8SKIP(scan);
4712 hardcount++;
4713 }
4714 }
4715 else
4716 scan = loceol;
a0ed51b3 4717 break;
f33976b4
DB
4718 case CANY:
4719 scan = loceol;
4720 break;
090f7165
JH
4721 case EXACT: /* length of string is 1 */
4722 c = (U8)*STRING(p);
4723 while (scan < loceol && UCHARAT(scan) == c)
4724 scan++;
bbce6d69 4725 break;
4726 case EXACTF: /* length of string is 1 */
cd439c50 4727 c = (U8)*STRING(p);
bbce6d69 4728 while (scan < loceol &&
22c35a8c 4729 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4730 scan++;
4731 break;
4732 case EXACTFL: /* length of string is 1 */
3280af22 4733 PL_reg_flags |= RF_tainted;
cd439c50 4734 c = (U8)*STRING(p);
bbce6d69 4735 while (scan < loceol &&
22c35a8c 4736 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4737 scan++;
4738 break;
4739 case ANYOF:
ffc61ed2
JH
4740 if (do_utf8) {
4741 loceol = PL_regeol;
cfc92286 4742 while (hardcount < max && scan < loceol &&
32fc9b6a 4743 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4744 scan += UTF8SKIP(scan);
4745 hardcount++;
4746 }
4747 } else {
32fc9b6a 4748 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4749 scan++;
4750 }
a0d0e21e
LW
4751 break;
4752 case ALNUM:
1aa99e6b 4753 if (do_utf8) {
ffc61ed2 4754 loceol = PL_regeol;
1a4fad37 4755 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4756 while (hardcount < max && scan < loceol &&
3568d838 4757 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4758 scan += UTF8SKIP(scan);
4759 hardcount++;
4760 }
4761 } else {
4762 while (scan < loceol && isALNUM(*scan))
4763 scan++;
a0ed51b3
LW
4764 }
4765 break;
bbce6d69 4766 case ALNUML:
3280af22 4767 PL_reg_flags |= RF_tainted;
1aa99e6b 4768 if (do_utf8) {
ffc61ed2 4769 loceol = PL_regeol;
1aa99e6b
IH
4770 while (hardcount < max && scan < loceol &&
4771 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4772 scan += UTF8SKIP(scan);
4773 hardcount++;
4774 }
4775 } else {
4776 while (scan < loceol && isALNUM_LC(*scan))
4777 scan++;
a0ed51b3
LW
4778 }
4779 break;
a0d0e21e 4780 case NALNUM:
1aa99e6b 4781 if (do_utf8) {
ffc61ed2 4782 loceol = PL_regeol;
1a4fad37 4783 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4784 while (hardcount < max && scan < loceol &&
3568d838 4785 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4786 scan += UTF8SKIP(scan);
4787 hardcount++;
4788 }
4789 } else {
4790 while (scan < loceol && !isALNUM(*scan))
4791 scan++;
a0ed51b3
LW
4792 }
4793 break;
bbce6d69 4794 case NALNUML:
3280af22 4795 PL_reg_flags |= RF_tainted;
1aa99e6b 4796 if (do_utf8) {
ffc61ed2 4797 loceol = PL_regeol;
1aa99e6b
IH
4798 while (hardcount < max && scan < loceol &&
4799 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4800 scan += UTF8SKIP(scan);
4801 hardcount++;
4802 }
4803 } else {
4804 while (scan < loceol && !isALNUM_LC(*scan))
4805 scan++;
a0ed51b3
LW
4806 }
4807 break;
a0d0e21e 4808 case SPACE:
1aa99e6b 4809 if (do_utf8) {
ffc61ed2 4810 loceol = PL_regeol;
1a4fad37 4811 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4812 while (hardcount < max && scan < loceol &&
3568d838
JH
4813 (*scan == ' ' ||
4814 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4815 scan += UTF8SKIP(scan);
4816 hardcount++;
4817 }
4818 } else {
4819 while (scan < loceol && isSPACE(*scan))
4820 scan++;
a0ed51b3
LW
4821 }
4822 break;
bbce6d69 4823 case SPACEL:
3280af22 4824 PL_reg_flags |= RF_tainted;
1aa99e6b 4825 if (do_utf8) {
ffc61ed2 4826 loceol = PL_regeol;
1aa99e6b 4827 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4828 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4829 scan += UTF8SKIP(scan);
4830 hardcount++;
4831 }
4832 } else {
4833 while (scan < loceol && isSPACE_LC(*scan))
4834 scan++;
a0ed51b3
LW
4835 }
4836 break;
a0d0e21e 4837 case NSPACE:
1aa99e6b 4838 if (do_utf8) {
ffc61ed2 4839 loceol = PL_regeol;
1a4fad37 4840 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4841 while (hardcount < max && scan < loceol &&
3568d838
JH
4842 !(*scan == ' ' ||
4843 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4844 scan += UTF8SKIP(scan);
4845 hardcount++;
4846 }
4847 } else {
4848 while (scan < loceol && !isSPACE(*scan))
4849 scan++;
4850 break;
a0ed51b3 4851 }
bbce6d69 4852 case NSPACEL:
3280af22 4853 PL_reg_flags |= RF_tainted;
1aa99e6b 4854 if (do_utf8) {
ffc61ed2 4855 loceol = PL_regeol;
1aa99e6b 4856 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4857 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4858 scan += UTF8SKIP(scan);
4859 hardcount++;
4860 }
4861 } else {
4862 while (scan < loceol && !isSPACE_LC(*scan))
4863 scan++;
a0ed51b3
LW
4864 }
4865 break;
a0d0e21e 4866 case DIGIT:
1aa99e6b 4867 if (do_utf8) {
ffc61ed2 4868 loceol = PL_regeol;
1a4fad37 4869 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4870 while (hardcount < max && scan < loceol &&
3568d838 4871 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4872 scan += UTF8SKIP(scan);
4873 hardcount++;
4874 }
4875 } else {
4876 while (scan < loceol && isDIGIT(*scan))
4877 scan++;
a0ed51b3
LW
4878 }
4879 break;
a0d0e21e 4880 case NDIGIT:
1aa99e6b 4881 if (do_utf8) {
ffc61ed2 4882 loceol = PL_regeol;
1a4fad37 4883 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4884 while (hardcount < max && scan < loceol &&
3568d838 4885 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4886 scan += UTF8SKIP(scan);
4887 hardcount++;
4888 }
4889 } else {
4890 while (scan < loceol && !isDIGIT(*scan))
4891 scan++;
a0ed51b3
LW
4892 }
4893 break;
a0d0e21e
LW
4894 default: /* Called on something of 0 width. */
4895 break; /* So match right here or not at all. */
4896 }
a687059c 4897
a0ed51b3
LW
4898 if (hardcount)
4899 c = hardcount;
4900 else
4901 c = scan - PL_reginput;
3280af22 4902 PL_reginput = scan;
a687059c 4903
a3621e74 4904 DEBUG_r({
ab74612d 4905 SV *re_debug_flags = NULL;
6136c704 4906 SV * const prop = sv_newmortal();
a3621e74
YO
4907 GET_RE_DEBUG_FLAGS;
4908 DEBUG_EXECUTE_r({
32fc9b6a 4909 regprop(prog, prop, p);
9041c2e3
NIS
4910 PerlIO_printf(Perl_debug_log,
4911 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4912 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4913 });
a3621e74 4914 });
9041c2e3 4915
a0d0e21e 4916 return(c);
a687059c
LW
4917}
4918
c277df42 4919
76234dfb 4920#ifndef PERL_IN_XSUB_RE
c277df42 4921/*
ffc61ed2
JH
4922- regclass_swash - prepare the utf8 swash
4923*/
4924
4925SV *
32fc9b6a 4926Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4927{
97aff369 4928 dVAR;
9e55ce06
JH
4929 SV *sw = NULL;
4930 SV *si = NULL;
4931 SV *alt = NULL;
32fc9b6a 4932 const struct reg_data *data = prog ? prog->data : NULL;
ffc61ed2 4933
4f639d21 4934 if (data && data->count) {
a3b680e6 4935 const U32 n = ARG(node);
ffc61ed2 4936
4f639d21
DM
4937 if (data->what[n] == 's') {
4938 SV * const rv = (SV*)data->data[n];
890ce7af 4939 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4940 SV **const ary = AvARRAY(av);
9e55ce06 4941 SV **a, **b;
9041c2e3 4942
711a919c 4943 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4944 * documentation of these array elements. */
4945
b11f357e 4946 si = *ary;
8f7f7219 4947 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4948 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4949
ffc61ed2
JH
4950 if (a)
4951 sw = *a;
4952 else if (si && doinit) {
4953 sw = swash_init("utf8", "", si, 1, 0);
4954 (void)av_store(av, 1, sw);
4955 }
9e55ce06
JH
4956 if (b)
4957 alt = *b;
ffc61ed2
JH
4958 }
4959 }
4960
9e55ce06
JH
4961 if (listsvp)
4962 *listsvp = si;
4963 if (altsvp)
4964 *altsvp = alt;
ffc61ed2
JH
4965
4966 return sw;
4967}
76234dfb 4968#endif
ffc61ed2
JH
4969
4970/*
ba7b4546 4971 - reginclass - determine if a character falls into a character class
832705d4
JH
4972
4973 The n is the ANYOF regnode, the p is the target string, lenp
4974 is pointer to the maximum length of how far to go in the p
4975 (if the lenp is zero, UTF8SKIP(p) is used),
4976 do_utf8 tells whether the target string is in UTF-8.
4977
bbce6d69 4978 */
4979
76e3520e 4980STATIC bool
32fc9b6a 4981S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4982{
27da23d5 4983 dVAR;
a3b680e6 4984 const char flags = ANYOF_FLAGS(n);
bbce6d69 4985 bool match = FALSE;
cc07378b 4986 UV c = *p;
ae9ddab8 4987 STRLEN len = 0;
9e55ce06 4988 STRLEN plen;
1aa99e6b 4989
19f67299
TS
4990 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4991 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4992 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4993 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4994 if (len == (STRLEN)-1)
4995 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4996 }
bbce6d69 4997
0f0076b4 4998 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4999 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5000 if (lenp)
5001 *lenp = 0;
ffc61ed2 5002 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5003 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5004 match = TRUE;
bbce6d69 5005 }
3568d838 5006 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5007 match = TRUE;
ffc61ed2 5008 if (!match) {
9e55ce06 5009 AV *av;
32fc9b6a 5010 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5011
5012 if (sw) {
3568d838 5013 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5014 match = TRUE;
5015 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5016 if (!match && lenp && av) {
5017 I32 i;
9e55ce06 5018 for (i = 0; i <= av_len(av); i++) {
890ce7af 5019 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5020 STRLEN len;
890ce7af 5021 const char * const s = SvPV_const(sv, len);
9e55ce06 5022
061b10df 5023 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5024 *lenp = len;
5025 match = TRUE;
5026 break;
5027 }
5028 }
5029 }
5030 if (!match) {
89ebb4a3 5031 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5032 STRLEN tmplen;
5033
9e55ce06
JH
5034 to_utf8_fold(p, tmpbuf, &tmplen);
5035 if (swash_fetch(sw, tmpbuf, do_utf8))
5036 match = TRUE;
5037 }
ffc61ed2
JH
5038 }
5039 }
bbce6d69 5040 }
9e55ce06 5041 if (match && lenp && *lenp == 0)
0f0076b4 5042 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5043 }
1aa99e6b 5044 if (!match && c < 256) {
ffc61ed2
JH
5045 if (ANYOF_BITMAP_TEST(n, c))
5046 match = TRUE;
5047 else if (flags & ANYOF_FOLD) {
eb160463 5048 U8 f;
a0ed51b3 5049
ffc61ed2
JH
5050 if (flags & ANYOF_LOCALE) {
5051 PL_reg_flags |= RF_tainted;
5052 f = PL_fold_locale[c];
5053 }
5054 else
5055 f = PL_fold[c];
5056 if (f != c && ANYOF_BITMAP_TEST(n, f))
5057 match = TRUE;
5058 }
5059
5060 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5061 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5062 if (
5063 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5064 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5065 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5066 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5067 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5068 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5069 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5070 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5071 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5072 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5073 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5074 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5075 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5076 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5077 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5078 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5079 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5080 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5081 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5082 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5083 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5084 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5085 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5086 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5087 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5088 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5089 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5090 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5091 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5092 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5093 ) /* How's that for a conditional? */
5094 {
5095 match = TRUE;
5096 }
a0ed51b3 5097 }
a0ed51b3
LW
5098 }
5099
a0ed51b3
LW
5100 return (flags & ANYOF_INVERT) ? !match : match;
5101}
161b471a 5102
dfe13c55 5103STATIC U8 *
0ce71af7 5104S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5105{
97aff369 5106 dVAR;
a0ed51b3 5107 if (off >= 0) {
1aa99e6b 5108 while (off-- && s < lim) {
ffc61ed2 5109 /* XXX could check well-formedness here */
a0ed51b3 5110 s += UTF8SKIP(s);
ffc61ed2 5111 }
a0ed51b3
LW
5112 }
5113 else {
5114 while (off++) {
1aa99e6b 5115 if (s > lim) {
a0ed51b3 5116 s--;
ffc61ed2 5117 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5118 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5119 s--;
ffc61ed2
JH
5120 }
5121 /* XXX could check well-formedness here */
a0ed51b3
LW
5122 }
5123 }
5124 }
5125 return s;
5126}
161b471a 5127
dfe13c55 5128STATIC U8 *
0ce71af7 5129S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5130{
97aff369 5131 dVAR;
a0ed51b3 5132 if (off >= 0) {
1aa99e6b 5133 while (off-- && s < lim) {
ffc61ed2 5134 /* XXX could check well-formedness here */
a0ed51b3 5135 s += UTF8SKIP(s);
ffc61ed2 5136 }
a0ed51b3
LW
5137 if (off >= 0)
5138 return 0;
5139 }
5140 else {
5141 while (off++) {
1aa99e6b 5142 if (s > lim) {
a0ed51b3 5143 s--;
ffc61ed2 5144 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5145 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5146 s--;
ffc61ed2
JH
5147 }
5148 /* XXX could check well-formedness here */
a0ed51b3
LW
5149 }
5150 else
5151 break;
5152 }
5153 if (off <= 0)
5154 return 0;
5155 }
5156 return s;
5157}
51371543 5158
51371543 5159static void
acfe0abc 5160restore_pos(pTHX_ void *arg)
51371543 5161{
97aff369 5162 dVAR;
097eb12c 5163 regexp * const rex = (regexp *)arg;
51371543
GS
5164 if (PL_reg_eval_set) {
5165 if (PL_reg_oldsaved) {
4f639d21
DM
5166 rex->subbeg = PL_reg_oldsaved;
5167 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5168#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5169 rex->saved_copy = PL_nrs;
ed252734 5170#endif
4f639d21 5171 RX_MATCH_COPIED_on(rex);
51371543
GS
5172 }
5173 PL_reg_magic->mg_len = PL_reg_oldpos;
5174 PL_reg_eval_set = 0;
5175 PL_curpm = PL_reg_oldcurpm;
5176 }
5177}
33b8afdf
JH
5178
5179STATIC void
5180S_to_utf8_substr(pTHX_ register regexp *prog)
5181{
33b8afdf 5182 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5183 SV* const sv = newSVsv(prog->float_substr);
5184 prog->float_utf8 = sv;
33b8afdf
JH
5185 sv_utf8_upgrade(sv);
5186 if (SvTAIL(prog->float_substr))
5187 SvTAIL_on(sv);
5188 if (prog->float_substr == prog->check_substr)
5189 prog->check_utf8 = sv;
5190 }
5191 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5192 SV* const sv = newSVsv(prog->anchored_substr);
5193 prog->anchored_utf8 = sv;
33b8afdf
JH
5194 sv_utf8_upgrade(sv);
5195 if (SvTAIL(prog->anchored_substr))
5196 SvTAIL_on(sv);
5197 if (prog->anchored_substr == prog->check_substr)
5198 prog->check_utf8 = sv;
5199 }
5200}
5201
5202STATIC void
5203S_to_byte_substr(pTHX_ register regexp *prog)
5204{
97aff369 5205 dVAR;
33b8afdf 5206 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5207 SV* sv = newSVsv(prog->float_utf8);
5208 prog->float_substr = sv;
33b8afdf
JH
5209 if (sv_utf8_downgrade(sv, TRUE)) {
5210 if (SvTAIL(prog->float_utf8))
5211 SvTAIL_on(sv);
5212 } else {
5213 SvREFCNT_dec(sv);
5214 prog->float_substr = sv = &PL_sv_undef;
5215 }
5216 if (prog->float_utf8 == prog->check_utf8)
5217 prog->check_substr = sv;
5218 }
5219 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5220 SV* sv = newSVsv(prog->anchored_utf8);
5221 prog->anchored_substr = sv;
33b8afdf
JH
5222 if (sv_utf8_downgrade(sv, TRUE)) {
5223 if (SvTAIL(prog->anchored_utf8))
5224 SvTAIL_on(sv);
5225 } else {
5226 SvREFCNT_dec(sv);
5227 prog->anchored_substr = sv = &PL_sv_undef;
5228 }
5229 if (prog->anchored_utf8 == prog->check_utf8)
5230 prog->check_substr = sv;
5231 }
5232}
66610fdd
RGS
5233
5234/*
5235 * Local variables:
5236 * c-indentation-style: bsd
5237 * c-basic-offset: 4
5238 * indent-tabs-mode: t
5239 * End:
5240 *
37442d52
RGS
5241 * ex: set ts=8 sts=4 sw=4 noet:
5242 */