This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
skip t/op/incfilter.t for "make clean;make miniperl;make minitest"
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e 33#ifdef PERL_EXT_RE_BUILD
54df2634 34#include "re_top.h"
9041c2e3 35#endif
56953603 36
a687059c 37/*
e50aee73 38 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
39 *
40 * Copyright (c) 1986 by University of Toronto.
41 * Written by Henry Spencer. Not derived from licensed software.
42 *
43 * Permission is granted to anyone to use this software for any
44 * purpose on any computer system, and to redistribute it freely,
45 * subject to the following restrictions:
46 *
47 * 1. The author is not responsible for the consequences of use of
48 * this software, no matter how awful, even if they arise
49 * from defects in it.
50 *
51 * 2. The origin of this software must not be misrepresented, either
52 * by explicit claim or by omission.
53 *
54 * 3. Altered versions must be plainly marked as such, and must not
55 * be misrepresented as being the original software.
56 *
57 **** Alterations to Henry's code are...
58 ****
4bb101f2 59 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 60 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 61 ****
9ef589d8
LW
62 **** You may distribute under the terms of either the GNU General Public
63 **** License or the Artistic License, as specified in the README file.
a687059c
LW
64 *
65 * Beware that some of this code is subtly aware of the way operator
66 * precedence is structured in regular expressions. Serious changes in
67 * regular-expression syntax might require a total rethink.
68 */
69#include "EXTERN.h"
864dbfa3 70#define PERL_IN_REGEXEC_C
a687059c 71#include "perl.h"
0f5d15d6 72
54df2634
NC
73#ifdef PERL_IN_XSUB_RE
74# include "re_comp.h"
75#else
76# include "regcomp.h"
77#endif
a687059c 78
c277df42
IZ
79#define RF_tainted 1 /* tainted information used? */
80#define RF_warned 2 /* warned about big count? */
ce862d02 81#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
82#define RF_utf8 8 /* String contains multibyte chars? */
83
eb160463 84#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
85
86#define RS_init 1 /* eval environment created */
87#define RS_set 2 /* replsv value is set */
c277df42 88
a687059c
LW
89#ifndef STATIC
90#define STATIC static
91#endif
92
32fc9b6a 93#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
33b8afdf 99#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 100#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 101
52657f30
AL
102#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
104 : (U8*)(pos + off)))
105#define HOPBACKc(pos, off) ((char*) \
106 ((PL_reg_match_utf8) \
b9ea4ed6 107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
efb30f32
HS
108 : (pos - off >= PL_bostr) \
109 ? (U8*)(pos - off) \
52657f30 110 : (U8*)NULL) \
efb30f32 111)
efb30f32 112
53c4c00c 113#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 114#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 115
1a4fad37
AL
116#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
117 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
118#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
119#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
120#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
121#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 122
5f80c4cf 123/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
124#define JUMPABLE(rn) ( \
125 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
126 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
127 OP(rn) == PLUS || OP(rn) == MINMOD || \
128 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
129)
130
cca55fe3
JP
131#define HAS_TEXT(rn) ( \
132 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
133)
e2d8ce26 134
a84d97b6
HS
135/*
136 Search for mandatory following text node; for lookahead, the text must
137 follow but for lookbehind (rn->flags != 0) we skip to the next step.
138*/
cca55fe3 139#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 140 while (JUMPABLE(rn)) \
a84d97b6 141 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 142 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
143 else if (OP(rn) == PLUS) \
144 rn = NEXTOPER(rn); \
a84d97b6
HS
145 else if (OP(rn) == IFMATCH) \
146 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 147 else rn += NEXT_OFF(rn); \
5f80c4cf 148} STMT_END
74750237 149
acfe0abc 150static void restore_pos(pTHX_ void *arg);
51371543 151
76e3520e 152STATIC CHECKPOINT
cea2e8a9 153S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 154{
97aff369 155 dVAR;
a3b680e6 156 const int retval = PL_savestack_ix;
b1ce53c5 157#define REGCP_PAREN_ELEMS 4
a3b680e6 158 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
159 int p;
160
e49a9654
IH
161 if (paren_elems_to_push < 0)
162 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
163
a01268b5 164#define REGCP_OTHER_ELEMS 6
4b3c1a47 165 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 166 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 167/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
168 SSPUSHINT(PL_regendp[p]);
169 SSPUSHINT(PL_regstartp[p]);
3280af22 170 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
171 SSPUSHINT(p);
172 }
b1ce53c5 173/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
174 SSPUSHINT(PL_regsize);
175 SSPUSHINT(*PL_reglastparen);
a01268b5 176 SSPUSHINT(*PL_reglastcloseparen);
3280af22 177 SSPUSHPTR(PL_reginput);
41123dfd
JH
178#define REGCP_FRAME_ELEMS 2
179/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
180 * are needed for the regexp context stack bookkeeping. */
181 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 182 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 183
a0d0e21e
LW
184 return retval;
185}
186
c277df42 187/* These are needed since we do not localize EVAL nodes: */
a3621e74 188# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 190 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 191
a3621e74 192# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 193 PerlIO_printf(Perl_debug_log, \
faccc32b 194 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 195 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 196
76e3520e 197STATIC char *
097eb12c 198S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 199{
97aff369 200 dVAR;
b1ce53c5 201 I32 i;
a0d0e21e 202 char *input;
b1ce53c5 203
a3621e74
YO
204 GET_RE_DEBUG_FLAGS_DECL;
205
b1ce53c5 206 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 207 i = SSPOPINT;
b1ce53c5
JH
208 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
209 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 210 input = (char *) SSPOPPTR;
a01268b5 211 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
212 *PL_reglastparen = SSPOPINT;
213 PL_regsize = SSPOPINT;
b1ce53c5
JH
214
215 /* Now restore the parentheses context. */
41123dfd
JH
216 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
217 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 218 I32 tmps;
097eb12c 219 U32 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
a3621e74 225 DEBUG_EXECUTE_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
a3621e74 234 DEBUG_EXECUTE_r(
bb7a0f54 235 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b 237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 238 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
bb7a0f54 252 for (i = *PL_reglastparen + 1; (U32)i <= rex->nparens; i++) {
097eb12c
AL
253 if (i > PL_regsize)
254 PL_regstartp[i] = -1;
255 PL_regendp[i] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
02db2b7b 261#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 262
95b24440 263#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
264 if (paren) { \
265 if (n) { \
266 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
267 PL_regendp[paren] = input - PL_bostr; \
268 } \
269 else \
270 PL_regendp[paren] = -1; \
271 } \
95b24440
DM
272 REGMATCH(next, where); \
273 if (result) \
29d1e993
HS
274 sayYES; \
275 if (paren && n) \
276 PL_regendp[paren] = -1; \
277}
278
279
a687059c 280/*
e50aee73 281 * pregexec and friends
a687059c
LW
282 */
283
76234dfb 284#ifndef PERL_IN_XSUB_RE
a687059c 285/*
c277df42 286 - pregexec - match a regexp against a string
a687059c 287 */
c277df42 288I32
864dbfa3 289Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 290 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
291/* strend: pointer to null at end of string */
292/* strbeg: real beginning of string */
293/* minend: end of match must be >=minend after stringarg. */
294/* nosave: For optimizations. */
295{
296 return
9041c2e3 297 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
298 nosave ? 0 : REXEC_COPY_STR);
299}
76234dfb 300#endif
22e551b9 301
9041c2e3 302/*
cad2e5aa
JH
303 * Need to implement the following flags for reg_anch:
304 *
305 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
306 * USE_INTUIT_ML
307 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
308 * INTUIT_AUTORITATIVE_ML
309 * INTUIT_ONCE_NOML - Intuit can match in one location only.
310 * INTUIT_ONCE_ML
311 *
312 * Another flag for this function: SECOND_TIME (so that float substrs
313 * with giant delta may be not rechecked).
314 */
315
316/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
317
3f7c398e 318/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
319 Otherwise, only SvCUR(sv) is used to get strbeg. */
320
321/* XXXX We assume that strpos is strbeg unless sv. */
322
6eb5f6b9
JH
323/* XXXX Some places assume that there is a fixed substring.
324 An update may be needed if optimizer marks as "INTUITable"
325 RExen without fixed substrings. Similarly, it is assumed that
326 lengths of all the strings are no more than minlen, thus they
327 cannot come from lookahead.
328 (Or minlen should take into account lookahead.) */
329
2c2d71f5
JH
330/* A failure to find a constant substring means that there is no need to make
331 an expensive call to REx engine, thus we celebrate a failure. Similarly,
332 finding a substring too deep into the string means that less calls to
30944b6d
IZ
333 regtry() should be needed.
334
335 REx compiler's optimizer found 4 possible hints:
336 a) Anchored substring;
337 b) Fixed substring;
338 c) Whether we are anchored (beginning-of-line or \G);
339 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 340 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
341 string which does not contradict any of them.
342 */
2c2d71f5 343
6eb5f6b9
JH
344/* Most of decisions we do here should have been done at compile time.
345 The nodes of the REx which we used for the search should have been
346 deleted from the finite automaton. */
347
cad2e5aa
JH
348char *
349Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
350 char *strend, U32 flags, re_scream_pos_data *data)
351{
97aff369 352 dVAR;
b7953727 353 register I32 start_shift = 0;
cad2e5aa 354 /* Should be nonnegative! */
b7953727 355 register I32 end_shift = 0;
2c2d71f5
JH
356 register char *s;
357 register SV *check;
a1933d95 358 char *strbeg;
cad2e5aa 359 char *t;
a3b680e6 360 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 361 I32 ml_anch;
bd61b366
SS
362 register char *other_last = NULL; /* other substr checked before this */
363 char *check_at = NULL; /* check substr found at this pos */
1df70142 364 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 365#ifdef DEBUGGING
890ce7af
AL
366 const char * const i_strpos = strpos;
367 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 368#endif
a3621e74
YO
369
370 GET_RE_DEBUG_FLAGS_DECL;
371
a30b2f1f 372 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 373
b8d68ded 374 if (prog->reganch & ROPT_UTF8) {
a3621e74 375 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
376 "UTF-8 regex...\n"));
377 PL_reg_flags |= RF_utf8;
378 }
379
a3621e74 380 DEBUG_EXECUTE_r({
1df70142 381 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
382 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
383 strpos;
1df70142 384 const int len = PL_reg_match_utf8 ?
bb7a0f54 385 (int)strlen(s) : strend - strpos;
2a782b5b
JH
386 if (!PL_colorset)
387 reginitcolors();
b8d68ded 388 if (PL_reg_match_utf8)
a3621e74 389 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 390 "UTF-8 target...\n"));
2a782b5b 391 PerlIO_printf(Perl_debug_log,
a0288114 392 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 393 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
394 prog->precomp,
395 PL_colors[1],
396 (strlen(prog->precomp) > 60 ? "..." : ""),
397 PL_colors[0],
398 (int)(len > 60 ? 60 : len),
399 s, PL_colors[1],
400 (len > 60 ? "..." : "")
401 );
402 });
cad2e5aa 403
c344f387
JH
404 /* CHR_DIST() would be more correct here but it makes things slow. */
405 if (prog->minlen > strend - strpos) {
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 407 "String too short... [re_intuit_start]\n"));
cad2e5aa 408 goto fail;
2c2d71f5 409 }
a1933d95 410 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 411 PL_regeol = strend;
33b8afdf
JH
412 if (do_utf8) {
413 if (!prog->check_utf8 && prog->check_substr)
414 to_utf8_substr(prog);
415 check = prog->check_utf8;
416 } else {
417 if (!prog->check_substr && prog->check_utf8)
418 to_byte_substr(prog);
419 check = prog->check_substr;
420 }
421 if (check == &PL_sv_undef) {
a3621e74 422 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
423 "Non-utf string cannot match utf check string\n"));
424 goto fail;
425 }
2c2d71f5 426 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
427 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
428 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 429 && !multiline ) ); /* Check after \n? */
cad2e5aa 430
7e25d62c
JH
431 if (!ml_anch) {
432 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
433 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 434 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
435 && sv && !SvROK(sv)
436 && (strpos != strbeg)) {
a3621e74 437 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
438 goto fail;
439 }
440 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 441 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 442 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
443 I32 slen;
444
1aa99e6b 445 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
446 if (SvTAIL(check)) {
447 slen = SvCUR(check); /* >= 1 */
cad2e5aa 448
9041c2e3 449 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 450 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 451 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 452 goto fail_finish;
cad2e5aa
JH
453 }
454 /* Now should match s[0..slen-2] */
455 slen--;
3f7c398e 456 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 457 || (slen > 1
3f7c398e 458 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 459 report_neq:
a3621e74 460 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
461 goto fail_finish;
462 }
cad2e5aa 463 }
3f7c398e 464 else if (*SvPVX_const(check) != *s
653099ff 465 || ((slen = SvCUR(check)) > 1
3f7c398e 466 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 467 goto report_neq;
c315bfe8 468 check_at = s;
2c2d71f5 469 goto success_at_start;
7e25d62c 470 }
cad2e5aa 471 }
2c2d71f5 472 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 473 s = strpos;
2c2d71f5 474 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 475 end_shift = prog->minlen - start_shift -
653099ff 476 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 477 if (!ml_anch) {
a3b680e6 478 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 479 - (SvTAIL(check) != 0);
a3b680e6 480 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
481
482 if (end_shift < eshift)
483 end_shift = eshift;
484 }
cad2e5aa 485 }
2c2d71f5 486 else { /* Can match at random position */
cad2e5aa
JH
487 ml_anch = 0;
488 s = strpos;
2c2d71f5
JH
489 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
490 /* Should be nonnegative! */
491 end_shift = prog->minlen - start_shift -
653099ff 492 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
493 }
494
2c2d71f5 495#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 496 if (end_shift < 0)
6bbae5e6 497 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
498#endif
499
2c2d71f5
JH
500 restart:
501 /* Find a possible match in the region s..strend by looking for
502 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 503 if (flags & REXEC_SCREAM) {
cad2e5aa 504 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 505 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 506
2c2d71f5
JH
507 if (PL_screamfirst[BmRARE(check)] >= 0
508 || ( BmRARE(check) == '\n'
509 && (BmPREVIOUS(check) == SvCUR(check) - 1)
510 && SvTAIL(check) ))
9041c2e3 511 s = screaminstr(sv, check,
2c2d71f5 512 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 513 else
2c2d71f5 514 goto fail_finish;
4addbd3b
HS
515 /* we may be pointing at the wrong string */
516 if (s && RX_MATCH_COPIED(prog))
3f7c398e 517 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
518 if (data)
519 *data->scream_olds = s;
520 }
f33976b4 521 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
522 s = fbm_instr((U8*)(s + start_shift),
523 (U8*)(strend - end_shift),
7fba1cd6 524 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 525 else
1aa99e6b
IH
526 s = fbm_instr(HOP3(s, start_shift, strend),
527 HOP3(strend, -end_shift, strbeg),
7fba1cd6 528 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
529
530 /* Update the count-of-usability, remove useless subpatterns,
531 unshift s. */
2c2d71f5 532
a0288114 533 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 534 (s ? "Found" : "Did not find"),
33b8afdf 535 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 536 PL_colors[0],
7b0972df 537 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 538 SvPVX_const(check),
2c2d71f5
JH
539 PL_colors[1], (SvTAIL(check) ? "$" : ""),
540 (s ? " at offset " : "...\n") ) );
541
542 if (!s)
543 goto fail_finish;
544
6eb5f6b9
JH
545 check_at = s;
546
2c2d71f5 547 /* Finish the diagnostic message */
a3621e74 548 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
549
550 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
551 Start with the other substr.
552 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 553 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
554 *always* match. Probably should be marked during compile...
555 Probably it is right to do no SCREAM here...
556 */
557
33b8afdf 558 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 559 /* Take into account the "other" substring. */
2c2d71f5
JH
560 /* XXXX May be hopelessly wrong for UTF... */
561 if (!other_last)
6eb5f6b9 562 other_last = strpos;
33b8afdf 563 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
564 do_other_anchored:
565 {
890ce7af
AL
566 char * const last = HOP3c(s, -start_shift, strbeg);
567 char *last1, *last2;
2c2d71f5 568 char *s1 = s;
33b8afdf 569 SV* must;
2c2d71f5 570
2c2d71f5
JH
571 t = s - prog->check_offset_max;
572 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 573 && (!do_utf8
0ce71af7 574 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 575 && t > strpos)))
6f207bd3 576 NOOP;
2c2d71f5
JH
577 else
578 t = strpos;
1aa99e6b 579 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
580 if (t < other_last) /* These positions already checked */
581 t = other_last;
1aa99e6b 582 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
583 if (last < last1)
584 last1 = last;
585 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
586 /* On end-of-str: see comment below. */
33b8afdf
JH
587 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
588 if (must == &PL_sv_undef) {
589 s = (char*)NULL;
a3621e74 590 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
591 }
592 else
593 s = fbm_instr(
594 (unsigned char*)t,
595 HOP3(HOP3(last1, prog->anchored_offset, strend)
596 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
597 must,
7fba1cd6 598 multiline ? FBMrf_MULTILINE : 0
33b8afdf 599 );
a3621e74 600 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 601 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
33b8afdf
JH
604 (int)(SvCUR(must)
605 - (SvTAIL(must)!=0)),
3f7c398e 606 SvPVX_const(must),
33b8afdf 607 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
608 if (!s) {
609 if (last1 >= last2) {
a3621e74 610 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
611 ", giving up...\n"));
612 goto fail_finish;
613 }
a3621e74 614 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 615 ", trying floating at offset %ld...\n",
1aa99e6b
IH
616 (long)(HOP3c(s1, 1, strend) - i_strpos)));
617 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
618 s = HOP3c(last, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
a3621e74 622 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
1aa99e6b
IH
624 t = HOP3c(s, -prog->anchored_offset, strbeg);
625 other_last = HOP3c(s, 1, strend);
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
30944b6d 631 }
2c2d71f5
JH
632 }
633 else { /* Take into account the floating substring. */
33b8afdf
JH
634 char *last, *last1;
635 char *s1 = s;
636 SV* must;
637
638 t = HOP3c(s, -start_shift, strbeg);
639 last1 = last =
640 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
641 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
642 last = HOP3c(t, prog->float_max_offset, strend);
643 s = HOP3c(t, prog->float_min_offset, strend);
644 if (s < other_last)
645 s = other_last;
2c2d71f5 646 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
647 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
648 /* fbm_instr() takes into account exact value of end-of-str
649 if the check is SvTAIL(ed). Since false positives are OK,
650 and end-of-str is not later than strend we are OK. */
651 if (must == &PL_sv_undef) {
652 s = (char*)NULL;
a3621e74 653 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
654 }
655 else
2c2d71f5 656 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
657 (unsigned char*)last + SvCUR(must)
658 - (SvTAIL(must)!=0),
7fba1cd6 659 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 660 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
661 (s ? "Found" : "Contradicts"),
662 PL_colors[0],
663 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 664 SvPVX_const(must),
33b8afdf
JH
665 PL_colors[1], (SvTAIL(must) ? "$" : "")));
666 if (!s) {
667 if (last1 == last) {
a3621e74 668 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
669 ", giving up...\n"));
670 goto fail_finish;
2c2d71f5 671 }
a3621e74 672 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
673 ", trying anchored starting at offset %ld...\n",
674 (long)(s1 + 1 - i_strpos)));
675 other_last = last;
676 s = HOP3c(t, 1, strend);
677 goto restart;
678 }
679 else {
a3621e74 680 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
681 (long)(s - i_strpos)));
682 other_last = s; /* Fix this later. --Hugo */
683 s = s1;
684 if (t == strpos)
685 goto try_at_start;
686 goto try_at_offset;
687 }
2c2d71f5 688 }
cad2e5aa 689 }
2c2d71f5
JH
690
691 t = s - prog->check_offset_max;
2c2d71f5 692 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 693 && (!do_utf8
0ce71af7 694 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
1aa99e6b 695 && t > strpos))) {
2c2d71f5
JH
696 /* Fixed substring is found far enough so that the match
697 cannot start at strpos. */
698 try_at_offset:
cad2e5aa 699 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
700 /* Eventually fbm_*() should handle this, but often
701 anchored_offset is not 0, so this check will not be wasted. */
702 /* XXXX In the code below we prefer to look for "^" even in
703 presence of anchored substrings. And we search even
704 beyond the found float position. These pessimizations
705 are historical artefacts only. */
706 find_anchor:
2c2d71f5 707 while (t < strend - prog->minlen) {
cad2e5aa 708 if (*t == '\n') {
4ee3650e 709 if (t < check_at - prog->check_offset_min) {
33b8afdf 710 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
711 /* Since we moved from the found position,
712 we definitely contradict the found anchored
30944b6d
IZ
713 substr. Due to the above check we do not
714 contradict "check" substr.
715 Thus we can arrive here only if check substr
716 is float. Redo checking for "other"=="fixed".
717 */
9041c2e3 718 strpos = t + 1;
a3621e74 719 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 720 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
721 goto do_other_anchored;
722 }
4ee3650e
GS
723 /* We don't contradict the found floating substring. */
724 /* XXXX Why not check for STCLASS? */
cad2e5aa 725 s = t + 1;
a3621e74 726 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 727 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
728 goto set_useful;
729 }
4ee3650e
GS
730 /* Position contradicts check-string */
731 /* XXXX probably better to look for check-string
732 than for "\n", so one should lower the limit for t? */
a3621e74 733 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 734 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 735 other_last = strpos = s = t + 1;
cad2e5aa
JH
736 goto restart;
737 }
738 t++;
739 }
a3621e74 740 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 741 PL_colors[0], PL_colors[1]));
2c2d71f5 742 goto fail_finish;
cad2e5aa 743 }
f5952150 744 else {
a3621e74 745 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 746 PL_colors[0], PL_colors[1]));
f5952150 747 }
cad2e5aa
JH
748 s = t;
749 set_useful:
33b8afdf 750 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
751 }
752 else {
f5952150 753 /* The found string does not prohibit matching at strpos,
2c2d71f5 754 - no optimization of calling REx engine can be performed,
f5952150
GS
755 unless it was an MBOL and we are not after MBOL,
756 or a future STCLASS check will fail this. */
2c2d71f5
JH
757 try_at_start:
758 /* Even in this situation we may use MBOL flag if strpos is offset
759 wrt the start of the string. */
05b4157f 760 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 761 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
762 /* May be due to an implicit anchor of m{.*foo} */
763 && !(prog->reganch & ROPT_IMPLICIT))
764 {
cad2e5aa
JH
765 t = strpos;
766 goto find_anchor;
767 }
a3621e74 768 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 769 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 770 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 771 );
2c2d71f5 772 success_at_start:
30944b6d 773 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
774 && (do_utf8 ? (
775 prog->check_utf8 /* Could be deleted already */
776 && --BmUSEFUL(prog->check_utf8) < 0
777 && (prog->check_utf8 == prog->float_utf8)
778 ) : (
779 prog->check_substr /* Could be deleted already */
780 && --BmUSEFUL(prog->check_substr) < 0
781 && (prog->check_substr == prog->float_substr)
782 )))
66e933ab 783 {
cad2e5aa 784 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 785 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
786 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
787 if (do_utf8 ? prog->check_substr : prog->check_utf8)
788 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
789 prog->check_substr = prog->check_utf8 = NULL; /* disable */
790 prog->float_substr = prog->float_utf8 = NULL; /* clear */
791 check = NULL; /* abort */
cad2e5aa 792 s = strpos;
3cf5c195
IZ
793 /* XXXX This is a remnant of the old implementation. It
794 looks wasteful, since now INTUIT can use many
6eb5f6b9 795 other heuristics. */
cad2e5aa
JH
796 prog->reganch &= ~RE_USE_INTUIT;
797 }
798 else
799 s = strpos;
800 }
801
6eb5f6b9
JH
802 /* Last resort... */
803 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
804 if (prog->regstclass) {
805 /* minlen == 0 is possible if regstclass is \b or \B,
806 and the fixed substr is ''$.
807 Since minlen is already taken into account, s+1 is before strend;
808 accidentally, minlen >= 1 guaranties no false positives at s + 1
809 even for \b or \B. But (minlen? 1 : 0) below assumes that
810 regstclass does not come from lookahead... */
811 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
812 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 813 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 814 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 815 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 816 : 1);
a3b680e6 817 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 818 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 819 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
820 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
821 cl_l, strend)
822 : strend);
6eb5f6b9
JH
823
824 t = s;
3b0527fe 825 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
826 if (!s) {
827#ifdef DEBUGGING
cbbf8932 828 const char *what = NULL;
6eb5f6b9
JH
829#endif
830 if (endpos == strend) {
a3621e74 831 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
832 "Could not match STCLASS...\n") );
833 goto fail;
834 }
a3621e74 835 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 836 "This position contradicts STCLASS...\n") );
653099ff
GS
837 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
838 goto fail;
6eb5f6b9 839 /* Contradict one of substrings */
33b8afdf
JH
840 if (prog->anchored_substr || prog->anchored_utf8) {
841 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 842 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 843 hop_and_restart:
1aa99e6b 844 s = HOP3c(t, 1, strend);
66e933ab
GS
845 if (s + start_shift + end_shift > strend) {
846 /* XXXX Should be taken into account earlier? */
a3621e74 847 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
848 "Could not match STCLASS...\n") );
849 goto fail;
850 }
5e39e1e5
HS
851 if (!check)
852 goto giveup;
a3621e74 853 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 854 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
855 what, (long)(s + start_shift - i_strpos)) );
856 goto restart;
857 }
66e933ab 858 /* Have both, check_string is floating */
6eb5f6b9
JH
859 if (t + start_shift >= check_at) /* Contradicts floating=check */
860 goto retry_floating_check;
861 /* Recheck anchored substring, but not floating... */
9041c2e3 862 s = check_at;
5e39e1e5
HS
863 if (!check)
864 goto giveup;
a3621e74 865 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 866 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
867 (long)(other_last - i_strpos)) );
868 goto do_other_anchored;
869 }
60e71179
GS
870 /* Another way we could have checked stclass at the
871 current position only: */
872 if (ml_anch) {
873 s = t = t + 1;
5e39e1e5
HS
874 if (!check)
875 goto giveup;
a3621e74 876 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 877 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 878 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 879 goto try_at_offset;
66e933ab 880 }
33b8afdf 881 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 882 goto fail;
6eb5f6b9
JH
883 /* Check is floating subtring. */
884 retry_floating_check:
885 t = check_at - start_shift;
a3621e74 886 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
887 goto hop_and_restart;
888 }
b7953727 889 if (t != s) {
a3621e74 890 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 891 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
892 (long)(t - i_strpos), (long)(s - i_strpos))
893 );
894 }
895 else {
a3621e74 896 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
897 "Does not contradict STCLASS...\n");
898 );
899 }
6eb5f6b9 900 }
5e39e1e5 901 giveup:
a3621e74 902 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
903 PL_colors[4], (check ? "Guessed" : "Giving up"),
904 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 905 return s;
2c2d71f5
JH
906
907 fail_finish: /* Substring not found */
33b8afdf
JH
908 if (prog->check_substr || prog->check_utf8) /* could be removed already */
909 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 910 fail:
a3621e74 911 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 912 PL_colors[4], PL_colors[5]));
bd61b366 913 return NULL;
cad2e5aa 914}
9661b544 915
6eb5f6b9 916/* We know what class REx starts with. Try to find this position... */
3b0527fe
DM
917/* if reginfo is NULL, its a dryrun */
918
3c3eec57 919STATIC char *
3b0527fe
DM
920S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
921*strend, const regmatch_info *reginfo)
a687059c 922{
27da23d5 923 dVAR;
1df70142 924 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 925 char *m;
d8093b23 926 STRLEN ln;
5dab1207 927 STRLEN lnc;
078c425b 928 register STRLEN uskip;
d8093b23
G
929 unsigned int c1;
930 unsigned int c2;
6eb5f6b9
JH
931 char *e;
932 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 933 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 934
6eb5f6b9
JH
935 /* We know what class it must start with. */
936 switch (OP(c)) {
6eb5f6b9 937 case ANYOF:
388cc4de 938 if (do_utf8) {
078c425b 939 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
940 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
941 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
942 reginclass(prog, c, (U8*)s, 0, do_utf8) :
943 REGINCLASS(prog, c, (U8*)s)) {
3b0527fe 944 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
945 goto got_it;
946 else
947 tmp = doevery;
948 }
949 else
950 tmp = 1;
078c425b 951 s += uskip;
388cc4de
HS
952 }
953 }
954 else {
955 while (s < strend) {
956 STRLEN skip = 1;
957
32fc9b6a 958 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
959 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
960 /* The assignment of 2 is intentional:
961 * for the folded sharp s, the skip is 2. */
962 (skip = SHARP_S_SKIP))) {
3b0527fe 963 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
964 goto got_it;
965 else
966 tmp = doevery;
967 }
968 else
969 tmp = 1;
970 s += skip;
971 }
a0d0e21e 972 }
6eb5f6b9 973 break;
f33976b4
DB
974 case CANY:
975 while (s < strend) {
3b0527fe 976 if (tmp && (!reginfo || regtry(reginfo, s)))
f33976b4
DB
977 goto got_it;
978 else
979 tmp = doevery;
980 s++;
981 }
982 break;
6eb5f6b9 983 case EXACTF:
5dab1207
NIS
984 m = STRING(c);
985 ln = STR_LEN(c); /* length to match in octets/bytes */
986 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 987 if (UTF) {
a2a2844f 988 STRLEN ulen1, ulen2;
5dab1207 989 U8 *sm = (U8 *) m;
89ebb4a3
JH
990 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
991 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 992 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
993
994 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
995 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
996
89ebb4a3 997 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 998 0, uniflags);
89ebb4a3 999 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1000 0, uniflags);
5dab1207
NIS
1001 lnc = 0;
1002 while (sm < ((U8 *) m + ln)) {
1003 lnc++;
1004 sm += UTF8SKIP(sm);
1005 }
1aa99e6b
IH
1006 }
1007 else {
1008 c1 = *(U8*)m;
1009 c2 = PL_fold[c1];
1010 }
6eb5f6b9
JH
1011 goto do_exactf;
1012 case EXACTFL:
5dab1207
NIS
1013 m = STRING(c);
1014 ln = STR_LEN(c);
1015 lnc = (I32) ln;
d8093b23 1016 c1 = *(U8*)m;
6eb5f6b9
JH
1017 c2 = PL_fold_locale[c1];
1018 do_exactf:
db12adc6 1019 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1020
3b0527fe 1021 if (!reginfo && e < s)
6eb5f6b9 1022 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1023
60a8b682
JH
1024 /* The idea in the EXACTF* cases is to first find the
1025 * first character of the EXACTF* node and then, if
1026 * necessary, case-insensitively compare the full
1027 * text of the node. The c1 and c2 are the first
1028 * characters (though in Unicode it gets a bit
1029 * more complicated because there are more cases
7f16dd3d
JH
1030 * than just upper and lower: one needs to use
1031 * the so-called folding case for case-insensitive
1032 * matching (called "loose matching" in Unicode).
1033 * ibcmp_utf8() will do just that. */
60a8b682 1034
1aa99e6b 1035 if (do_utf8) {
575cac57 1036 UV c, f;
89ebb4a3 1037 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1038 STRLEN len, foldlen;
4ad0818d 1039 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1040 if (c1 == c2) {
5dab1207
NIS
1041 /* Upper and lower of 1st char are equal -
1042 * probably not a "letter". */
1aa99e6b 1043 while (s <= e) {
89ebb4a3 1044 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1045 uniflags);
80aecb99
JH
1046 if ( c == c1
1047 && (ln == len ||
66423254 1048 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1049 m, (char **)0, ln, (bool)UTF))
3b0527fe 1050 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1051 goto got_it;
80aecb99 1052 else {
1df70142 1053 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1054 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1055 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1056 if ( f != c
1057 && (f == c1 || f == c2)
1058 && (ln == foldlen ||
66423254
JH
1059 !ibcmp_utf8((char *) foldbuf,
1060 (char **)0, foldlen, do_utf8,
d07ddd77 1061 m,
eb160463 1062 (char **)0, ln, (bool)UTF))
3b0527fe 1063 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1064 goto got_it;
1065 }
1aa99e6b
IH
1066 s += len;
1067 }
09091399
JH
1068 }
1069 else {
1aa99e6b 1070 while (s <= e) {
89ebb4a3 1071 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1072 uniflags);
80aecb99 1073
60a8b682 1074 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1075 * Note that not all the possible combinations
1076 * are handled here: some of them are handled
1077 * by the standard folding rules, and some of
1078 * them (the character class or ANYOF cases)
1079 * are handled during compiletime in
1080 * regexec.c:S_regclass(). */
880bd946
JH
1081 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1082 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1083 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1084
1085 if ( (c == c1 || c == c2)
1086 && (ln == len ||
66423254 1087 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1088 m, (char **)0, ln, (bool)UTF))
3b0527fe 1089 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1090 goto got_it;
80aecb99 1091 else {
1df70142 1092 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1093 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1094 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1095 if ( f != c
1096 && (f == c1 || f == c2)
1097 && (ln == foldlen ||
a6872d42 1098 !ibcmp_utf8((char *) foldbuf,
66423254 1099 (char **)0, foldlen, do_utf8,
d07ddd77 1100 m,
eb160463 1101 (char **)0, ln, (bool)UTF))
3b0527fe 1102 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1103 goto got_it;
1104 }
1aa99e6b
IH
1105 s += len;
1106 }
09091399 1107 }
1aa99e6b
IH
1108 }
1109 else {
1110 if (c1 == c2)
1111 while (s <= e) {
1112 if ( *(U8*)s == c1
1113 && (ln == 1 || !(OP(c) == EXACTF
1114 ? ibcmp(s, m, ln)
1115 : ibcmp_locale(s, m, ln)))
3b0527fe 1116 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1117 goto got_it;
1118 s++;
1119 }
1120 else
1121 while (s <= e) {
1122 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1123 && (ln == 1 || !(OP(c) == EXACTF
1124 ? ibcmp(s, m, ln)
1125 : ibcmp_locale(s, m, ln)))
3b0527fe 1126 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1127 goto got_it;
1128 s++;
1129 }
b3c9acc1
IZ
1130 }
1131 break;
bbce6d69 1132 case BOUNDL:
3280af22 1133 PL_reg_flags |= RF_tainted;
bbce6d69 1134 /* FALL THROUGH */
a0d0e21e 1135 case BOUND:
ffc61ed2 1136 if (do_utf8) {
12d33761 1137 if (s == PL_bostr)
ffc61ed2
JH
1138 tmp = '\n';
1139 else {
6136c704 1140 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1141 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1142 }
1143 tmp = ((OP(c) == BOUND ?
9041c2e3 1144 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1145 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1146 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1147 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1148 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1149 isALNUM_LC_utf8((U8*)s)))
1150 {
1151 tmp = !tmp;
3b0527fe 1152 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1153 goto got_it;
1154 }
078c425b 1155 s += uskip;
a687059c 1156 }
a0d0e21e 1157 }
667bb95a 1158 else {
12d33761 1159 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1160 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1161 while (s < strend) {
1162 if (tmp ==
1163 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1164 tmp = !tmp;
3b0527fe 1165 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1166 goto got_it;
1167 }
1168 s++;
a0ed51b3 1169 }
a0ed51b3 1170 }
3b0527fe 1171 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1172 goto got_it;
1173 break;
bbce6d69 1174 case NBOUNDL:
3280af22 1175 PL_reg_flags |= RF_tainted;
bbce6d69 1176 /* FALL THROUGH */
a0d0e21e 1177 case NBOUND:
ffc61ed2 1178 if (do_utf8) {
12d33761 1179 if (s == PL_bostr)
ffc61ed2
JH
1180 tmp = '\n';
1181 else {
6136c704 1182 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1183 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1184 }
1185 tmp = ((OP(c) == NBOUND ?
9041c2e3 1186 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1187 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1188 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1189 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1190 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1191 isALNUM_LC_utf8((U8*)s)))
1192 tmp = !tmp;
3b0527fe 1193 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2 1194 goto got_it;
078c425b 1195 s += uskip;
ffc61ed2 1196 }
a0d0e21e 1197 }
667bb95a 1198 else {
12d33761 1199 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1200 tmp = ((OP(c) == NBOUND ?
1201 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1202 while (s < strend) {
1203 if (tmp ==
1204 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1205 tmp = !tmp;
3b0527fe 1206 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1207 goto got_it;
1208 s++;
1209 }
a0ed51b3 1210 }
3b0527fe 1211 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1212 goto got_it;
1213 break;
a0d0e21e 1214 case ALNUM:
ffc61ed2 1215 if (do_utf8) {
1a4fad37 1216 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1217 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1218 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1219 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
bbce6d69 1224 else
ffc61ed2 1225 tmp = 1;
078c425b 1226 s += uskip;
bbce6d69 1227 }
bbce6d69 1228 }
ffc61ed2
JH
1229 else {
1230 while (s < strend) {
1231 if (isALNUM(*s)) {
3b0527fe 1232 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
a0ed51b3 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s++;
a0ed51b3 1240 }
a0ed51b3
LW
1241 }
1242 break;
bbce6d69 1243 case ALNUML:
3280af22 1244 PL_reg_flags |= RF_tainted;
ffc61ed2 1245 if (do_utf8) {
078c425b 1246 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1247 if (isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1248 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a687059c 1253 else
ffc61ed2 1254 tmp = 1;
078c425b 1255 s += uskip;
a0d0e21e 1256 }
a0d0e21e 1257 }
ffc61ed2
JH
1258 else {
1259 while (s < strend) {
1260 if (isALNUM_LC(*s)) {
3b0527fe 1261 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a0ed51b3 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s++;
a0ed51b3 1269 }
a0ed51b3
LW
1270 }
1271 break;
a0d0e21e 1272 case NALNUM:
ffc61ed2 1273 if (do_utf8) {
1a4fad37 1274 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1275 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1276 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1277 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
bbce6d69 1282 else
ffc61ed2 1283 tmp = 1;
078c425b 1284 s += uskip;
bbce6d69 1285 }
bbce6d69 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (!isALNUM(*s)) {
3b0527fe 1290 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
bbce6d69 1301 case NALNUML:
3280af22 1302 PL_reg_flags |= RF_tainted;
ffc61ed2 1303 if (do_utf8) {
078c425b 1304 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1305 if (!isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1306 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a687059c 1311 else
ffc61ed2 1312 tmp = 1;
078c425b 1313 s += uskip;
a687059c 1314 }
a0d0e21e 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (!isALNUM_LC(*s)) {
3b0527fe 1319 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
a0ed51b3 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
a0ed51b3 1327 }
a0ed51b3
LW
1328 }
1329 break;
a0d0e21e 1330 case SPACE:
ffc61ed2 1331 if (do_utf8) {
1a4fad37 1332 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1333 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1334 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
3b0527fe 1335 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0d0e21e 1340 else
ffc61ed2 1341 tmp = 1;
078c425b 1342 s += uskip;
2304df62 1343 }
a0d0e21e 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (isSPACE(*s)) {
3b0527fe 1348 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
bbce6d69 1359 case SPACEL:
3280af22 1360 PL_reg_flags |= RF_tainted;
ffc61ed2 1361 if (do_utf8) {
078c425b 1362 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1363 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
3b0527fe 1364 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
bbce6d69 1369 else
ffc61ed2 1370 tmp = 1;
078c425b 1371 s += uskip;
bbce6d69 1372 }
bbce6d69 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (isSPACE_LC(*s)) {
3b0527fe 1377 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
a0ed51b3 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
a0ed51b3 1385 }
a0ed51b3
LW
1386 }
1387 break;
a0d0e21e 1388 case NSPACE:
ffc61ed2 1389 if (do_utf8) {
1a4fad37 1390 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1391 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1392 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
3b0527fe 1393 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
a0d0e21e 1398 else
ffc61ed2 1399 tmp = 1;
078c425b 1400 s += uskip;
a687059c 1401 }
a0d0e21e 1402 }
ffc61ed2
JH
1403 else {
1404 while (s < strend) {
1405 if (!isSPACE(*s)) {
3b0527fe 1406 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
a0ed51b3 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s++;
a0ed51b3 1414 }
a0ed51b3
LW
1415 }
1416 break;
bbce6d69 1417 case NSPACEL:
3280af22 1418 PL_reg_flags |= RF_tainted;
ffc61ed2 1419 if (do_utf8) {
078c425b 1420 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1421 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
3b0527fe 1422 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
bbce6d69 1427 else
ffc61ed2 1428 tmp = 1;
078c425b 1429 s += uskip;
bbce6d69 1430 }
bbce6d69 1431 }
ffc61ed2
JH
1432 else {
1433 while (s < strend) {
1434 if (!isSPACE_LC(*s)) {
3b0527fe 1435 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
a0ed51b3 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s++;
a0ed51b3 1443 }
a0ed51b3
LW
1444 }
1445 break;
a0d0e21e 1446 case DIGIT:
ffc61ed2 1447 if (do_utf8) {
1a4fad37 1448 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1449 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1450 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1451 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
a0d0e21e 1456 else
ffc61ed2 1457 tmp = 1;
078c425b 1458 s += uskip;
2b69d0c2 1459 }
a0d0e21e 1460 }
ffc61ed2
JH
1461 else {
1462 while (s < strend) {
1463 if (isDIGIT(*s)) {
3b0527fe 1464 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
a0ed51b3 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s++;
a0ed51b3 1472 }
a0ed51b3
LW
1473 }
1474 break;
b8c5462f
JH
1475 case DIGITL:
1476 PL_reg_flags |= RF_tainted;
ffc61ed2 1477 if (do_utf8) {
078c425b 1478 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1479 if (isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1480 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1481 goto got_it;
1482 else
1483 tmp = doevery;
1484 }
b8c5462f 1485 else
ffc61ed2 1486 tmp = 1;
078c425b 1487 s += uskip;
b8c5462f 1488 }
b8c5462f 1489 }
ffc61ed2
JH
1490 else {
1491 while (s < strend) {
1492 if (isDIGIT_LC(*s)) {
3b0527fe 1493 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1494 goto got_it;
1495 else
1496 tmp = doevery;
1497 }
b8c5462f 1498 else
ffc61ed2
JH
1499 tmp = 1;
1500 s++;
b8c5462f 1501 }
b8c5462f
JH
1502 }
1503 break;
a0d0e21e 1504 case NDIGIT:
ffc61ed2 1505 if (do_utf8) {
1a4fad37 1506 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1507 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1508 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1509 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1510 goto got_it;
1511 else
1512 tmp = doevery;
1513 }
a0d0e21e 1514 else
ffc61ed2 1515 tmp = 1;
078c425b 1516 s += uskip;
a687059c 1517 }
a0d0e21e 1518 }
ffc61ed2
JH
1519 else {
1520 while (s < strend) {
1521 if (!isDIGIT(*s)) {
3b0527fe 1522 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1523 goto got_it;
1524 else
1525 tmp = doevery;
1526 }
a0ed51b3 1527 else
ffc61ed2
JH
1528 tmp = 1;
1529 s++;
a0ed51b3 1530 }
a0ed51b3
LW
1531 }
1532 break;
b8c5462f
JH
1533 case NDIGITL:
1534 PL_reg_flags |= RF_tainted;
ffc61ed2 1535 if (do_utf8) {
078c425b 1536 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1537 if (!isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1538 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1539 goto got_it;
1540 else
1541 tmp = doevery;
1542 }
b8c5462f 1543 else
ffc61ed2 1544 tmp = 1;
078c425b 1545 s += uskip;
b8c5462f 1546 }
a0ed51b3 1547 }
ffc61ed2
JH
1548 else {
1549 while (s < strend) {
1550 if (!isDIGIT_LC(*s)) {
3b0527fe 1551 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1552 goto got_it;
1553 else
1554 tmp = doevery;
1555 }
cf93c79d 1556 else
ffc61ed2
JH
1557 tmp = 1;
1558 s++;
b8c5462f 1559 }
b8c5462f
JH
1560 }
1561 break;
b3c9acc1 1562 default:
3c3eec57
GS
1563 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1564 break;
d6a28714 1565 }
6eb5f6b9
JH
1566 return 0;
1567 got_it:
1568 return s;
1569}
1570
1571/*
1572 - regexec_flags - match a regexp against a string
1573 */
1574I32
1575Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1576 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1577/* strend: pointer to null at end of string */
1578/* strbeg: real beginning of string */
1579/* minend: end of match must be >=minend after stringarg. */
1580/* data: May be used for some additional optimizations. */
1581/* nosave: For optimizations. */
1582{
97aff369 1583 dVAR;
6eb5f6b9
JH
1584 register char *s;
1585 register regnode *c;
1586 register char *startpos = stringarg;
6eb5f6b9
JH
1587 I32 minlen; /* must match at least this many chars */
1588 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1589 I32 end_shift = 0; /* Same for the end. */ /* CC */
1590 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1591 char *scream_olds = NULL;
6eb5f6b9 1592 SV* oreplsv = GvSV(PL_replgv);
1df70142 1593 const bool do_utf8 = DO_UTF8(sv);
2757e526 1594 I32 multiline;
2a782b5b 1595#ifdef DEBUGGING
2757e526
JH
1596 SV* dsv0;
1597 SV* dsv1;
2a782b5b 1598#endif
3b0527fe 1599 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1600
1601 GET_RE_DEBUG_FLAGS_DECL;
1602
9d4ba2ae 1603 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1604
1605 /* Be paranoid... */
1606 if (prog == NULL || startpos == NULL) {
1607 Perl_croak(aTHX_ "NULL regexp parameter");
1608 return 0;
1609 }
1610
2757e526 1611 multiline = prog->reganch & PMf_MULTILINE;
3b0527fe 1612 reginfo.prog = prog;
2757e526
JH
1613
1614#ifdef DEBUGGING
1615 dsv0 = PERL_DEBUG_PAD_ZERO(0);
1616 dsv1 = PERL_DEBUG_PAD_ZERO(1);
1617#endif
1618
bac06658
JH
1619 RX_MATCH_UTF8_set(prog, do_utf8);
1620
6eb5f6b9 1621 minlen = prog->minlen;
61a36c01 1622 if (strend - startpos < minlen) {
a3621e74 1623 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1624 "String too short [regexec_flags]...\n"));
1625 goto phooey;
1aa99e6b 1626 }
6eb5f6b9 1627
6eb5f6b9
JH
1628 /* Check validity of program. */
1629 if (UCHARAT(prog->program) != REG_MAGIC) {
1630 Perl_croak(aTHX_ "corrupted regexp program");
1631 }
1632
1633 PL_reg_flags = 0;
1634 PL_reg_eval_set = 0;
1635 PL_reg_maxiter = 0;
1636
1637 if (prog->reganch & ROPT_UTF8)
1638 PL_reg_flags |= RF_utf8;
1639
1640 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1641 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1642 PL_bostr = strbeg;
3b0527fe 1643 reginfo.sv = sv;
6eb5f6b9
JH
1644
1645 /* Mark end of line for $ (and such) */
1646 PL_regeol = strend;
1647
1648 /* see how far we have to get to not match where we matched before */
3b0527fe 1649 reginfo.till = startpos+minend;
6eb5f6b9 1650
6eb5f6b9
JH
1651 /* If there is a "must appear" string, look for it. */
1652 s = startpos;
1653
3b0527fe 1654 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1655 MAGIC *mg;
1656
1657 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
3b0527fe 1658 reginfo.ganch = startpos;
6eb5f6b9
JH
1659 else if (sv && SvTYPE(sv) >= SVt_PVMG
1660 && SvMAGIC(sv)
14befaf4
DM
1661 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1662 && mg->mg_len >= 0) {
3b0527fe 1663 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
6eb5f6b9 1664 if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1665 if (s > reginfo.ganch)
6eb5f6b9 1666 goto phooey;
3b0527fe 1667 s = reginfo.ganch;
6eb5f6b9
JH
1668 }
1669 }
1670 else /* pos() not defined */
3b0527fe 1671 reginfo.ganch = strbeg;
6eb5f6b9
JH
1672 }
1673
a0714e2c 1674 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1675 re_scream_pos_data d;
1676
1677 d.scream_olds = &scream_olds;
1678 d.scream_pos = &scream_pos;
1679 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1680 if (!s) {
a3621e74 1681 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1682 goto phooey; /* not present */
3fa9c3d7 1683 }
6eb5f6b9
JH
1684 }
1685
a3621e74 1686 DEBUG_EXECUTE_r({
1df70142
AL
1687 const char * const s0 = UTF
1688 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1689 UNI_DISPLAY_REGEX)
1690 : prog->precomp;
bb7a0f54 1691 const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
1df70142 1692 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1693 UNI_DISPLAY_REGEX) : startpos;
bb7a0f54 1694 const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1695 if (!PL_colorset)
1696 reginitcolors();
1697 PerlIO_printf(Perl_debug_log,
a0288114 1698 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1699 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1700 len0, len0, s0,
2a782b5b 1701 PL_colors[1],
9e55ce06 1702 len0 > 60 ? "..." : "",
2a782b5b 1703 PL_colors[0],
9e55ce06
JH
1704 (int)(len1 > 60 ? 60 : len1),
1705 s1, PL_colors[1],
1706 (len1 > 60 ? "..." : "")
2a782b5b
JH
1707 );
1708 });
6eb5f6b9
JH
1709
1710 /* Simplest case: anchored match need be tried only once. */
1711 /* [unless only anchor is BOL and multiline is set] */
1712 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
3b0527fe 1713 if (s == startpos && regtry(&reginfo, startpos))
6eb5f6b9 1714 goto got_it;
7fba1cd6 1715 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1716 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1717 {
1718 char *end;
1719
1720 if (minlen)
1721 dontbother = minlen - 1;
1aa99e6b 1722 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1723 /* for multiline we only have to try after newlines */
33b8afdf 1724 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1725 if (s == startpos)
1726 goto after_try;
1727 while (1) {
3b0527fe 1728 if (regtry(&reginfo, s))
6eb5f6b9
JH
1729 goto got_it;
1730 after_try:
1731 if (s >= end)
1732 goto phooey;
1733 if (prog->reganch & RE_USE_INTUIT) {
1734 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1735 if (!s)
1736 goto phooey;
1737 }
1738 else
1739 s++;
1740 }
1741 } else {
1742 if (s > startpos)
1743 s--;
1744 while (s < end) {
1745 if (*s++ == '\n') { /* don't need PL_utf8skip here */
3b0527fe 1746 if (regtry(&reginfo, s))
6eb5f6b9
JH
1747 goto got_it;
1748 }
1749 }
1750 }
1751 }
1752 goto phooey;
1753 } else if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1754 if (regtry(&reginfo, reginfo.ganch))
6eb5f6b9
JH
1755 goto got_it;
1756 goto phooey;
1757 }
1758
1759 /* Messy cases: unanchored match. */
33b8afdf 1760 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1761 /* we have /x+whatever/ */
1762 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1763 char ch;
bf93d4cc
GS
1764#ifdef DEBUGGING
1765 int did_match = 0;
1766#endif
33b8afdf
JH
1767 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1768 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1769 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1770
1aa99e6b 1771 if (do_utf8) {
6eb5f6b9
JH
1772 while (s < strend) {
1773 if (*s == ch) {
a3621e74 1774 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1775 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1776 s += UTF8SKIP(s);
1777 while (s < strend && *s == ch)
1778 s += UTF8SKIP(s);
1779 }
1780 s += UTF8SKIP(s);
1781 }
1782 }
1783 else {
1784 while (s < strend) {
1785 if (*s == ch) {
a3621e74 1786 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1787 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1788 s++;
1789 while (s < strend && *s == ch)
1790 s++;
1791 }
1792 s++;
1793 }
1794 }
a3621e74 1795 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1796 PerlIO_printf(Perl_debug_log,
b7953727
JH
1797 "Did not find anchored character...\n")
1798 );
6eb5f6b9 1799 }
a0714e2c
SS
1800 else if (prog->anchored_substr != NULL
1801 || prog->anchored_utf8 != NULL
1802 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1803 && prog->float_max_offset < strend - s)) {
1804 SV *must;
1805 I32 back_max;
1806 I32 back_min;
1807 char *last;
6eb5f6b9 1808 char *last1; /* Last position checked before */
bf93d4cc
GS
1809#ifdef DEBUGGING
1810 int did_match = 0;
1811#endif
33b8afdf
JH
1812 if (prog->anchored_substr || prog->anchored_utf8) {
1813 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1814 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1815 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1816 back_max = back_min = prog->anchored_offset;
1817 } else {
1818 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1819 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1820 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1821 back_max = prog->float_max_offset;
1822 back_min = prog->float_min_offset;
1823 }
1824 if (must == &PL_sv_undef)
1825 /* could not downgrade utf8 check substring, so must fail */
1826 goto phooey;
1827
1828 last = HOP3c(strend, /* Cannot start after this */
1829 -(I32)(CHR_SVLEN(must)
1830 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1831
1832 if (s > PL_bostr)
1833 last1 = HOPc(s, -1);
1834 else
1835 last1 = s - 1; /* bogus */
1836
a0288114 1837 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1838 check_substr==must. */
1839 scream_pos = -1;
1840 dontbother = end_shift;
1841 strend = HOPc(strend, -dontbother);
1842 while ( (s <= last) &&
9041c2e3 1843 ((flags & REXEC_SCREAM)
1aa99e6b 1844 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1845 end_shift, &scream_pos, 0))
1aa99e6b 1846 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1847 (unsigned char*)strend, must,
7fba1cd6 1848 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1849 /* we may be pointing at the wrong string */
1850 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1851 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1852 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1853 if (HOPc(s, -back_max) > last1) {
1854 last1 = HOPc(s, -back_min);
1855 s = HOPc(s, -back_max);
1856 }
1857 else {
52657f30 1858 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1859
1860 last1 = HOPc(s, -back_min);
52657f30 1861 s = t;
6eb5f6b9 1862 }
1aa99e6b 1863 if (do_utf8) {
6eb5f6b9 1864 while (s <= last1) {
3b0527fe 1865 if (regtry(&reginfo, s))
6eb5f6b9
JH
1866 goto got_it;
1867 s += UTF8SKIP(s);
1868 }
1869 }
1870 else {
1871 while (s <= last1) {
3b0527fe 1872 if (regtry(&reginfo, s))
6eb5f6b9
JH
1873 goto got_it;
1874 s++;
1875 }
1876 }
1877 }
a3621e74 1878 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1879 PerlIO_printf(Perl_debug_log,
a0288114 1880 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1881 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1882 ? "anchored" : "floating"),
1883 PL_colors[0],
1884 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1885 SvPVX_const(must),
b7953727
JH
1886 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1887 );
6eb5f6b9
JH
1888 goto phooey;
1889 }
155aba94 1890 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1891 if (minlen) {
1892 I32 op = (U8)OP(prog->regstclass);
66e933ab 1893 /* don't bother with what can't match */
f14c76ed
RGS
1894 if (PL_regkind[op] != EXACT && op != CANY)
1895 strend = HOPc(strend, -(minlen - 1));
1896 }
a3621e74 1897 DEBUG_EXECUTE_r({
ffc61ed2 1898 SV *prop = sv_newmortal();
cfd0369c
NC
1899 const char *s0;
1900 const char *s1;
9e55ce06
JH
1901 int len0;
1902 int len1;
1903
32fc9b6a 1904 regprop(prog, prop, c);
9e55ce06 1905 s0 = UTF ?
3f7c398e 1906 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1907 UNI_DISPLAY_REGEX) :
cfd0369c 1908 SvPVX_const(prop);
9e55ce06
JH
1909 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1910 s1 = UTF ?
c728cb41 1911 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
bb7a0f54 1912 len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
9e55ce06 1913 PerlIO_printf(Perl_debug_log,
a0288114 1914 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1915 len0, len0, s0,
1916 len1, len1, s1);
ffc61ed2 1917 });
3b0527fe 1918 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1919 goto got_it;
a3621e74 1920 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1921 }
1922 else {
1923 dontbother = 0;
a0714e2c 1924 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1925 /* Trim the end. */
d6a28714 1926 char *last;
33b8afdf
JH
1927 SV* float_real;
1928
1929 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1930 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1931 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1932
1933 if (flags & REXEC_SCREAM) {
33b8afdf 1934 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1935 end_shift, &scream_pos, 1); /* last one */
1936 if (!last)
ffc61ed2 1937 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1938 /* we may be pointing at the wrong string */
1939 else if (RX_MATCH_COPIED(prog))
3f7c398e 1940 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1941 }
d6a28714
JH
1942 else {
1943 STRLEN len;
cfd0369c 1944 const char * const little = SvPV_const(float_real, len);
d6a28714 1945
33b8afdf 1946 if (SvTAIL(float_real)) {
d6a28714
JH
1947 if (memEQ(strend - len + 1, little, len - 1))
1948 last = strend - len + 1;
7fba1cd6 1949 else if (!multiline)
9041c2e3 1950 last = memEQ(strend - len, little, len)
bd61b366 1951 ? strend - len : NULL;
b8c5462f 1952 else
d6a28714
JH
1953 goto find_last;
1954 } else {
1955 find_last:
9041c2e3 1956 if (len)
d6a28714 1957 last = rninstr(s, strend, little, little + len);
b8c5462f 1958 else
a0288114 1959 last = strend; /* matching "$" */
b8c5462f 1960 }
b8c5462f 1961 }
bf93d4cc 1962 if (last == NULL) {
a3621e74 1963 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1964 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1965 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1966 goto phooey; /* Should not happen! */
1967 }
d6a28714
JH
1968 dontbother = strend - last + prog->float_min_offset;
1969 }
1970 if (minlen && (dontbother < minlen))
1971 dontbother = minlen - 1;
1972 strend -= dontbother; /* this one's always in bytes! */
1973 /* We don't know much -- general case. */
1aa99e6b 1974 if (do_utf8) {
d6a28714 1975 for (;;) {
3b0527fe 1976 if (regtry(&reginfo, s))
d6a28714
JH
1977 goto got_it;
1978 if (s >= strend)
1979 break;
b8c5462f 1980 s += UTF8SKIP(s);
d6a28714
JH
1981 };
1982 }
1983 else {
1984 do {
3b0527fe 1985 if (regtry(&reginfo, s))
d6a28714
JH
1986 goto got_it;
1987 } while (s++ < strend);
1988 }
1989 }
1990
1991 /* Failure. */
1992 goto phooey;
1993
1994got_it:
1995 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1996
1997 if (PL_reg_eval_set) {
1998 /* Preserve the current value of $^R */
1999 if (oreplsv != GvSV(PL_replgv))
2000 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2001 restored, the value remains
2002 the same. */
4f639d21 2003 restore_pos(aTHX_ prog);
d6a28714
JH
2004 }
2005
2006 /* make sure $`, $&, $', and $digit will work later */
2007 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2008 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2009 if (flags & REXEC_COPY_STR) {
2010 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2011#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2012 if ((SvIsCOW(sv)
2013 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2014 if (DEBUG_C_TEST) {
2015 PerlIO_printf(Perl_debug_log,
2016 "Copy on write: regexp capture, type %d\n",
2017 (int) SvTYPE(sv));
2018 }
2019 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2020 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2021 assert (SvPOKp(prog->saved_copy));
2022 } else
2023#endif
2024 {
2025 RX_MATCH_COPIED_on(prog);
2026 s = savepvn(strbeg, i);
2027 prog->subbeg = s;
2028 }
d6a28714 2029 prog->sublen = i;
d6a28714
JH
2030 }
2031 else {
2032 prog->subbeg = strbeg;
2033 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2034 }
2035 }
9041c2e3 2036
d6a28714
JH
2037 return 1;
2038
2039phooey:
a3621e74 2040 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2041 PL_colors[4], PL_colors[5]));
d6a28714 2042 if (PL_reg_eval_set)
4f639d21 2043 restore_pos(aTHX_ prog);
d6a28714
JH
2044 return 0;
2045}
2046
2047/*
2048 - regtry - try match at specific point
2049 */
2050STATIC I32 /* 0 failure, 1 success */
3b0527fe 2051S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
d6a28714 2052{
97aff369 2053 dVAR;
d6a28714
JH
2054 register I32 *sp;
2055 register I32 *ep;
2056 CHECKPOINT lastcp;
3b0527fe 2057 regexp *prog = reginfo->prog;
a3621e74 2058 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2059
02db2b7b
IZ
2060#ifdef DEBUGGING
2061 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2062#endif
d6a28714
JH
2063 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2064 MAGIC *mg;
2065
2066 PL_reg_eval_set = RS_init;
a3621e74 2067 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2068 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2069 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2070 ));
e8347627 2071 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2072 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2073 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2074 SAVETMPS;
2075 /* Apparently this is not needed, judging by wantarray. */
e8347627 2076 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2077 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2078
3b0527fe 2079 if (reginfo->sv) {
d6a28714 2080 /* Make $_ available to executed code. */
3b0527fe 2081 if (reginfo->sv != DEFSV) {
59f00321 2082 SAVE_DEFSV;
3b0527fe 2083 DEFSV = reginfo->sv;
b8c5462f 2084 }
d6a28714 2085
3b0527fe
DM
2086 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2087 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2088 /* prepare for quick setting of pos */
d300d9fa
NC
2089#ifdef PERL_OLD_COPY_ON_WRITE
2090 if (SvIsCOW(sv))
2091 sv_force_normal_flags(sv, 0);
2092#endif
3b0527fe 2093 mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global,
d300d9fa 2094 &PL_vtbl_mglob, NULL, 0);
d6a28714 2095 mg->mg_len = -1;
b8c5462f 2096 }
d6a28714
JH
2097 PL_reg_magic = mg;
2098 PL_reg_oldpos = mg->mg_len;
4f639d21 2099 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2100 }
09687e5a 2101 if (!PL_reg_curpm) {
a02a5408 2102 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2103#ifdef USE_ITHREADS
2104 {
2105 SV* repointer = newSViv(0);
577e12cc 2106 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2107 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2108 av_push(PL_regex_padav,repointer);
2109 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2110 PL_regex_pad = AvARRAY(PL_regex_padav);
2111 }
2112#endif
2113 }
aaa362c4 2114 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2115 PL_reg_oldcurpm = PL_curpm;
2116 PL_curpm = PL_reg_curpm;
2117 if (RX_MATCH_COPIED(prog)) {
2118 /* Here is a serious problem: we cannot rewrite subbeg,
2119 since it may be needed if this match fails. Thus
2120 $` inside (?{}) could fail... */
2121 PL_reg_oldsaved = prog->subbeg;
2122 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2123#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2124 PL_nrs = prog->saved_copy;
2125#endif
d6a28714
JH
2126 RX_MATCH_COPIED_off(prog);
2127 }
2128 else
bd61b366 2129 PL_reg_oldsaved = NULL;
d6a28714
JH
2130 prog->subbeg = PL_bostr;
2131 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2132 }
973dddac 2133 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2134 PL_reginput = startpos;
2135 PL_regstartp = prog->startp;
2136 PL_regendp = prog->endp;
2137 PL_reglastparen = &prog->lastparen;
a01268b5 2138 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2139 prog->lastparen = 0;
03994de8 2140 prog->lastcloseparen = 0;
d6a28714 2141 PL_regsize = 0;
a3621e74 2142 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2143 if (PL_reg_start_tmpl <= prog->nparens) {
2144 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2145 if(PL_reg_start_tmp)
2146 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2147 else
a02a5408 2148 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2149 }
2150
2151 /* XXXX What this code is doing here?!!! There should be no need
2152 to do this again and again, PL_reglastparen should take care of
3dd2943c 2153 this! --ilya*/
dafc8851
JH
2154
2155 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2156 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2157 * PL_reglastparen), is not needed at all by the test suite
2158 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2159 * enough, for building DynaLoader, or otherwise this
2160 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2161 * will happen. Meanwhile, this code *is* needed for the
2162 * above-mentioned test suite tests to succeed. The common theme
2163 * on those tests seems to be returning null fields from matches.
2164 * --jhi */
dafc8851 2165#if 1
d6a28714
JH
2166 sp = prog->startp;
2167 ep = prog->endp;
2168 if (prog->nparens) {
097eb12c 2169 register I32 i;
eb160463 2170 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2171 *++sp = -1;
2172 *++ep = -1;
2173 }
2174 }
dafc8851 2175#endif
02db2b7b 2176 REGCP_SET(lastcp);
3b0527fe 2177 if (regmatch(reginfo, prog->program + 1)) {
d6a28714
JH
2178 prog->endp[0] = PL_reginput - PL_bostr;
2179 return 1;
2180 }
02db2b7b 2181 REGCP_UNWIND(lastcp);
d6a28714
JH
2182 return 0;
2183}
2184
02db2b7b
IZ
2185#define RE_UNWIND_BRANCH 1
2186#define RE_UNWIND_BRANCHJ 2
2187
2188union re_unwind_t;
2189
2190typedef struct { /* XX: makes sense to enlarge it... */
2191 I32 type;
2192 I32 prev;
2193 CHECKPOINT lastcp;
2194} re_unwind_generic_t;
2195
2196typedef struct {
2197 I32 type;
2198 I32 prev;
2199 CHECKPOINT lastcp;
2200 I32 lastparen;
2201 regnode *next;
2202 char *locinput;
2203 I32 nextchr;
3a2830be 2204 int minmod;
02db2b7b
IZ
2205#ifdef DEBUGGING
2206 int regindent;
2207#endif
2208} re_unwind_branch_t;
2209
2210typedef union re_unwind_t {
2211 I32 type;
2212 re_unwind_generic_t generic;
2213 re_unwind_branch_t branch;
2214} re_unwind_t;
2215
8ba1375e
MJD
2216#define sayYES goto yes
2217#define sayNO goto no
e0f9d4a8 2218#define sayNO_ANYOF goto no_anyof
8ba1375e 2219#define sayYES_FINAL goto yes_final
8ba1375e
MJD
2220#define sayNO_FINAL goto no_final
2221#define sayNO_SILENT goto do_no
2222#define saySAME(x) if (x) goto yes; else goto no
2223
3ab3c9b4
HS
2224#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2225#define POSCACHE_SEEN 1 /* we know what we're caching */
2226#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
7409bbd3 2227
3ab3c9b4 2228#define CACHEsayYES STMT_START { \
d8319b27 2229 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3
DM
2230 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
2231 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
2232 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2233 } \
2234 else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
2235 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2236 } \
2237 else { \
3ab3c9b4
HS
2238 /* cache records failure, but this is success */ \
2239 DEBUG_r( \
2240 PerlIO_printf(Perl_debug_log, \
2241 "%*s (remove success from failure cache)\n", \
2242 REPORT_CODE_OFF+PL_regindent*2, "") \
2243 ); \
d8319b27 2244 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2245 } \
2246 } \
2247 sayYES; \
2248} STMT_END
7409bbd3 2249
3ab3c9b4 2250#define CACHEsayNO STMT_START { \
d8319b27 2251 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3 2252 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
3ab3c9b4 2253 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
7409bbd3
DM
2254 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2255 } \
2256 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2258 } \
2259 else { \
3ab3c9b4
HS
2260 /* cache records success, but this is failure */ \
2261 DEBUG_r( \
2262 PerlIO_printf(Perl_debug_log, \
2263 "%*s (remove failure from success cache)\n", \
2264 REPORT_CODE_OFF+PL_regindent*2, "") \
2265 ); \
d8319b27 2266 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2267 } \
2268 } \
2269 sayNO; \
2270} STMT_END
2271
a3621e74
YO
2272/* this is used to determine how far from the left messages like
2273 'failed...' are printed. Currently 29 makes these messages line
2274 up with the opcode they refer to. Earlier perls used 25 which
2275 left these messages outdented making reviewing a debug output
2276 quite difficult.
2277*/
2278#define REPORT_CODE_OFF 29
2279
2280
2281/* Make sure there is a test for this +1 options in re_tests */
2282#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2283
9e137952
DM
2284/* this value indiciates that the c1/c2 "next char" test should be skipped */
2285#define CHRTEST_VOID -1000
2286
86545054
DM
2287#define SLAB_FIRST(s) (&(s)->states[0])
2288#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2289
5d9a96ca
DM
2290/* grab a new slab and return the first slot in it */
2291
2292STATIC regmatch_state *
2293S_push_slab(pTHX)
2294{
54df2634
NC
2295#if PERL_VERSION < 9
2296 dMY_CXT;
2297#endif
5d9a96ca
DM
2298 regmatch_slab *s = PL_regmatch_slab->next;
2299 if (!s) {
2300 Newx(s, 1, regmatch_slab);
2301 s->prev = PL_regmatch_slab;
2302 s->next = NULL;
2303 PL_regmatch_slab->next = s;
2304 }
2305 PL_regmatch_slab = s;
86545054 2306 return SLAB_FIRST(s);
5d9a96ca 2307}
5b47454d 2308
95b24440
DM
2309/* simulate a recursive call to regmatch */
2310
2311#define REGMATCH(ns, where) \
5d9a96ca
DM
2312 st->scan = scan; \
2313 scan = (ns); \
2314 st->resume_state = resume_##where; \
95b24440
DM
2315 goto start_recurse; \
2316 resume_point_##where:
2317
aa283a38
DM
2318
2319/* push a new regex state. Set newst to point to it */
2320
2321#define PUSH_STATE(newst, resume) \
2322 depth++; \
2323 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2324 st->scan = scan; \
2325 st->next = next; \
2326 st->n = n; \
2327 st->locinput = locinput; \
2328 st->resume_state = resume; \
2329 newst = st+1; \
86545054 2330 if (newst > SLAB_LAST(PL_regmatch_slab)) \
aa283a38
DM
2331 newst = S_push_slab(aTHX); \
2332 PL_regmatch_state = newst; \
2333 newst->cc = 0; \
2334 newst->minmod = 0; \
2335 newst->sw = 0; \
2336 newst->logical = 0; \
2337 newst->unwind = 0; \
2338 locinput = PL_reginput; \
2339 nextchr = UCHARAT(locinput);
2340
2341#define POP_STATE \
2342 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2343 depth--; \
2344 st--; \
86545054 2345 if (st < SLAB_FIRST(PL_regmatch_slab)) { \
aa283a38 2346 PL_regmatch_slab = PL_regmatch_slab->prev; \
86545054 2347 st = SLAB_LAST(PL_regmatch_slab); \
aa283a38
DM
2348 } \
2349 PL_regmatch_state = st; \
2350 scan = st->scan; \
2351 next = st->next; \
2352 n = st->n; \
2353 locinput = st->locinput; \
2354 nextchr = UCHARAT(locinput);
2355
d6a28714
JH
2356/*
2357 - regmatch - main matching routine
2358 *
2359 * Conceptually the strategy is simple: check to see whether the current
2360 * node matches, call self recursively to see whether the rest matches,
2361 * and then act accordingly. In practice we make some effort to avoid
2362 * recursion, in particular by going through "ordinary" nodes (that don't
2363 * need to know whether the rest of the match failed) by a loop instead of
2364 * by recursion.
2365 */
2366/* [lwall] I've hoisted the register declarations to the outer block in order to
2367 * maybe save a little bit of pushing and popping on the stack. It also takes
2368 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2369 *
2370 * This function used to be heavily recursive, but since this had the
2371 * effect of blowing the CPU stack on complex regexes, it has been
2372 * restructured to be iterative, and to save state onto the heap rather
2373 * than the stack. Essentially whereever regmatch() used to be called, it
2374 * pushes the current state, notes where to return, then jumps back into
2375 * the main loop.
2376 *
2377 * Originally the structure of this function used to look something like
2378
2379 S_regmatch() {
2380 int a = 1, b = 2;
2381 ...
2382 while (scan != NULL) {
5d9a96ca 2383 a++; // do stuff with a and b
95b24440
DM
2384 ...
2385 switch (OP(scan)) {
2386 case FOO: {
2387 int local = 3;
2388 ...
2389 if (regmatch(...)) // recurse
2390 goto yes;
2391 }
2392 ...
2393 }
2394 }
2395 yes:
2396 return 1;
2397 }
2398
2399 * Now it looks something like this:
2400
5d9a96ca 2401 typedef struct {
95b24440
DM
2402 int a, b, local;
2403 int resume_state;
5d9a96ca 2404 } regmatch_state;
95b24440
DM
2405
2406 S_regmatch() {
5d9a96ca
DM
2407 regmatch_state *st = new();
2408 int depth=0;
2409 st->a++; // do stuff with a and b
95b24440
DM
2410 ...
2411 while (scan != NULL) {
2412 ...
2413 switch (OP(scan)) {
2414 case FOO: {
5d9a96ca 2415 st->local = 3;
95b24440 2416 ...
5d9a96ca
DM
2417 st->scan = scan;
2418 scan = ...;
2419 st->resume_state = resume_FOO;
2420 goto start_recurse; // recurse
95b24440 2421
5d9a96ca
DM
2422 resume_point_FOO:
2423 if (result)
95b24440
DM
2424 goto yes;
2425 }
2426 ...
2427 }
5d9a96ca
DM
2428 start_recurse:
2429 st = new(); push a new state
2430 st->a = 1; st->b = 2;
2431 depth++;
95b24440 2432 }
5d9a96ca 2433 yes:
95b24440 2434 result = 1;
5d9a96ca
DM
2435 if (depth--) {
2436 st = pop();
95b24440
DM
2437 switch (resume_state) {
2438 case resume_FOO:
2439 goto resume_point_FOO;
2440 ...
2441 }
2442 }
2443 return result
2444 }
2445
2446 * WARNING: this means that any line in this function that contains a
2447 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2448 * regmatch() using gotos instead. Thus the values of any local variables
2449 * not saved in the regmatch_state structure will have been lost when
2450 * execution resumes on the next line .
5d9a96ca
DM
2451 *
2452 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2453 * PL_regmatch_state always points to the currently active state, and
2454 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2455 * The first time regmatch is called, the first slab is allocated, and is
2456 * never freed until interpreter desctruction. When the slab is full,
2457 * a new one is allocated chained to the end. At exit from regmatch, slabs
2458 * allocated since entry are freed.
d6a28714 2459 */
95b24440
DM
2460
2461
d6a28714 2462STATIC I32 /* 0 failure, 1 success */
3b0527fe 2463S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
d6a28714 2464{
54df2634
NC
2465#if PERL_VERSION < 9
2466 dMY_CXT;
2467#endif
27da23d5 2468 dVAR;
95b24440 2469 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2470 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2471
3b0527fe
DM
2472 regexp *rex = reginfo->prog;
2473
5d9a96ca
DM
2474 regmatch_slab *orig_slab;
2475 regmatch_state *orig_state;
a3621e74 2476
5d9a96ca
DM
2477 /* the current state. This is a cached copy of PL_regmatch_state */
2478 register regmatch_state *st;
95b24440 2479
5d9a96ca
DM
2480 /* cache heavy used fields of st in registers */
2481 register regnode *scan;
2482 register regnode *next;
2483 register I32 n = 0; /* initialize to shut up compiler warning */
2484 register char *locinput = PL_reginput;
95b24440 2485
5d9a96ca
DM
2486 /* these variables are NOT saved during a recusive RFEGMATCH: */
2487 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2488 bool result; /* return value of S_regmatch */
2489 regnode *inner; /* Next node in internal branch. */
2490 int depth = 0; /* depth of recursion */
aa283a38 2491 regmatch_state *newst; /* when pushing a state, this is the new one */
77cb431f
DM
2492 regmatch_state *yes_state = NULL; /* state to pop to on success of
2493 subpattern */
95b24440
DM
2494
2495#ifdef DEBUGGING
ab74612d 2496 SV *re_debug_flags = NULL;
a3621e74 2497 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2498 PL_regindent++;
2499#endif
2500
5d9a96ca
DM
2501 /* on first ever call to regmatch, allocate first slab */
2502 if (!PL_regmatch_slab) {
2503 Newx(PL_regmatch_slab, 1, regmatch_slab);
2504 PL_regmatch_slab->prev = NULL;
2505 PL_regmatch_slab->next = NULL;
86545054 2506 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2507 }
2508
2509 /* remember current high-water mark for exit */
2510 /* XXX this should be done with SAVE* instead */
2511 orig_slab = PL_regmatch_slab;
2512 orig_state = PL_regmatch_state;
2513
2514 /* grab next free state slot */
2515 st = ++PL_regmatch_state;
86545054 2516 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2517 st = PL_regmatch_state = S_push_slab(aTHX);
2518
2519 st->minmod = 0;
2520 st->sw = 0;
2521 st->logical = 0;
2522 st->unwind = 0;
2523 st->cc = NULL;
d6a28714
JH
2524 /* Note that nextchr is a byte even in UTF */
2525 nextchr = UCHARAT(locinput);
2526 scan = prog;
2527 while (scan != NULL) {
8ba1375e 2528
a3621e74 2529 DEBUG_EXECUTE_r( {
6136c704 2530 SV * const prop = sv_newmortal();
1df70142
AL
2531 const int docolor = *PL_colors[0];
2532 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2533 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2534 /* The part of the string before starttry has one color
2535 (pref0_len chars), between starttry and current
2536 position another one (pref_len - pref0_len chars),
2537 after the current position the third one.
2538 We assume that pref0_len <= pref_len, otherwise we
2539 decrease pref0_len. */
9041c2e3 2540 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2541 ? (5 + taill) - l : locinput - PL_bostr;
2542 int pref0_len;
d6a28714 2543
df1ffd02 2544 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2545 pref_len++;
2546 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2547 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2548 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2549 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2550 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2551 l--;
d6a28714
JH
2552 if (pref0_len < 0)
2553 pref0_len = 0;
2554 if (pref0_len > pref_len)
2555 pref0_len = pref_len;
32fc9b6a 2556 regprop(rex, prop, scan);
2a782b5b 2557 {
1df70142 2558 const char * const s0 =
f14c76ed 2559 do_utf8 && OP(scan) != CANY ?
95b24440 2560 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2561 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2562 locinput - pref_len;
bb7a0f54 2563 const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
1df70142 2564 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2565 pv_uni_display(PERL_DEBUG_PAD(1),
2566 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2567 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2568 locinput - pref_len + pref0_len;
bb7a0f54 2569 const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
1df70142 2570 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2571 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2572 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2573 locinput;
bb7a0f54 2574 const int len2 = do_utf8 ? (int)strlen(s2) : l;
2a782b5b
JH
2575 PerlIO_printf(Perl_debug_log,
2576 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2577 (IV)(locinput - PL_bostr),
2578 PL_colors[4],
2579 len0, s0,
2580 PL_colors[5],
2581 PL_colors[2],
2582 len1, s1,
2583 PL_colors[3],
2584 (docolor ? "" : "> <"),
2585 PL_colors[0],
2586 len2, s2,
2587 PL_colors[1],
2588 15 - l - pref_len + 1,
2589 "",
4f639d21 2590 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2591 SvPVX_const(prop));
2a782b5b
JH
2592 }
2593 });
d6a28714
JH
2594
2595 next = scan + NEXT_OFF(scan);
2596 if (next == scan)
2597 next = NULL;
2598
2599 switch (OP(scan)) {
2600 case BOL:
7fba1cd6 2601 if (locinput == PL_bostr)
d6a28714 2602 {
3b0527fe 2603 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2604 break;
2605 }
d6a28714
JH
2606 sayNO;
2607 case MBOL:
12d33761
HS
2608 if (locinput == PL_bostr ||
2609 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2610 {
b8c5462f
JH
2611 break;
2612 }
d6a28714
JH
2613 sayNO;
2614 case SBOL:
c2a73568 2615 if (locinput == PL_bostr)
b8c5462f 2616 break;
d6a28714
JH
2617 sayNO;
2618 case GPOS:
3b0527fe 2619 if (locinput == reginfo->ganch)
d6a28714
JH
2620 break;
2621 sayNO;
2622 case EOL:
d6a28714
JH
2623 goto seol;
2624 case MEOL:
d6a28714 2625 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2626 sayNO;
b8c5462f 2627 break;
d6a28714
JH
2628 case SEOL:
2629 seol:
2630 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2631 sayNO;
d6a28714 2632 if (PL_regeol - locinput > 1)
b8c5462f 2633 sayNO;
b8c5462f 2634 break;
d6a28714
JH
2635 case EOS:
2636 if (PL_regeol != locinput)
b8c5462f 2637 sayNO;
d6a28714 2638 break;
ffc61ed2 2639 case SANY:
d6a28714 2640 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2641 sayNO;
f33976b4
DB
2642 if (do_utf8) {
2643 locinput += PL_utf8skip[nextchr];
2644 if (locinput > PL_regeol)
2645 sayNO;
2646 nextchr = UCHARAT(locinput);
2647 }
2648 else
2649 nextchr = UCHARAT(++locinput);
2650 break;
2651 case CANY:
2652 if (!nextchr && locinput >= PL_regeol)
2653 sayNO;
b8c5462f 2654 nextchr = UCHARAT(++locinput);
a0d0e21e 2655 break;
ffc61ed2 2656 case REG_ANY:
1aa99e6b
IH
2657 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2658 sayNO;
2659 if (do_utf8) {
b8c5462f 2660 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2661 if (locinput > PL_regeol)
2662 sayNO;
a0ed51b3 2663 nextchr = UCHARAT(locinput);
a0ed51b3 2664 }
1aa99e6b
IH
2665 else
2666 nextchr = UCHARAT(++locinput);
a0ed51b3 2667 break;
a3621e74
YO
2668
2669
2670
2671 /*
2672 traverse the TRIE keeping track of all accepting states
2673 we transition through until we get to a failing node.
2674
a3621e74
YO
2675
2676 */
5b47454d 2677 case TRIE:
a3621e74
YO
2678 case TRIEF:
2679 case TRIEFL:
2680 {
a3621e74
YO
2681 U8 *uc = ( U8* )locinput;
2682 U32 state = 1;
2683 U16 charid = 0;
2684 U32 base = 0;
2685 UV uvc = 0;
2686 STRLEN len = 0;
2687 STRLEN foldlen = 0;
a3621e74
YO
2688 U8 *uscan = (U8*)NULL;
2689 STRLEN bufflen=0;
95b24440 2690 SV *sv_accept_buff = NULL;
5b47454d
DM
2691 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2692 trie_type = do_utf8 ?
2693 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2694 : trie_plain;
2695
7087a21c
NC
2696 /* what trie are we using right now */
2697 reg_trie_data *trie
32fc9b6a 2698 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
d8319b27 2699 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2700 result = 0;
a3621e74
YO
2701
2702 while ( state && uc <= (U8*)PL_regeol ) {
2703
5b47454d 2704 if (trie->states[ state ].wordnum) {
d8319b27 2705 if (!st->u.trie.accepted ) {
5b47454d
DM
2706 ENTER;
2707 SAVETMPS;
2708 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2709 sv_accept_buff=newSV(bufflen *
2710 sizeof(reg_trie_accepted) - 1);
2711 SvCUR_set(sv_accept_buff,
2712 sizeof(reg_trie_accepted));
2713 SvPOK_on(sv_accept_buff);
2714 sv_2mortal(sv_accept_buff);
d8319b27 2715 st->u.trie.accept_buff =
5b47454d
DM
2716 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2717 }
2718 else {
d8319b27 2719 if (st->u.trie.accepted >= bufflen) {
5b47454d 2720 bufflen *= 2;
d8319b27 2721 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2722 SvGROW(sv_accept_buff,
2723 bufflen * sizeof(reg_trie_accepted));
2724 }
2725 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2726 + sizeof(reg_trie_accepted));
2727 }
d8319b27
DM
2728 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2729 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2730 ++st->u.trie.accepted;
5b47454d 2731 }
a3621e74
YO
2732
2733 base = trie->states[ state ].trans.base;
2734
2735 DEBUG_TRIE_EXECUTE_r(
2736 PerlIO_printf( Perl_debug_log,
e4584336 2737 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2738 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2739 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2740 );
2741
2742 if ( base ) {
5b47454d
DM
2743 switch (trie_type) {
2744 case trie_uft8_fold:
a3621e74
YO
2745 if ( foldlen>0 ) {
2746 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2747 foldlen -= len;
2748 uscan += len;
2749 len=0;
2750 } else {
1df70142 2751 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2752 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2753 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2754 foldlen -= UNISKIP( uvc );
2755 uscan = foldbuf + UNISKIP( uvc );
2756 }
5b47454d
DM
2757 break;
2758 case trie_utf8:
2759 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2760 &len, uniflags );
2761 break;
2762 case trie_plain:
e4584336 2763 uvc = (UV)*uc;
a3621e74
YO
2764 len = 1;
2765 }
2766
5b47454d
DM
2767 if (uvc < 256) {
2768 charid = trie->charmap[ uvc ];
2769 }
2770 else {
2771 charid = 0;
2772 if (trie->widecharmap) {
2773 SV** svpp = (SV**)NULL;
2774 svpp = hv_fetch(trie->widecharmap,
2775 (char*)&uvc, sizeof(UV), 0);
2776 if (svpp)
2777 charid = (U16)SvIV(*svpp);
2778 }
2779 }
a3621e74 2780
5b47454d
DM
2781 if (charid &&
2782 (base + charid > trie->uniquecharcount )
2783 && (base + charid - 1 - trie->uniquecharcount
2784 < trie->lasttrans)
2785 && trie->trans[base + charid - 1 -
2786 trie->uniquecharcount].check == state)
2787 {
2788 state = trie->trans[base + charid - 1 -
2789 trie->uniquecharcount ].next;
2790 }
2791 else {
2792 state = 0;
2793 }
2794 uc += len;
2795
2796 }
2797 else {
a3621e74
YO
2798 state = 0;
2799 }
2800 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2801 PerlIO_printf( Perl_debug_log,
2802 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2803 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2804 );
2805 }
d8319b27 2806 if (!st->u.trie.accepted )
a3621e74 2807 sayNO;
a3621e74
YO
2808
2809 /*
2810 There was at least one accepting state that we
2811 transitioned through. Presumably the number of accepting
2812 states is going to be low, typically one or two. So we
2813 simply scan through to find the one with lowest wordnum.
2814 Once we find it, we swap the last state into its place
2815 and decrement the size. We then try to match the rest of
2816 the pattern at the point where the word ends, if we
2817 succeed then we end the loop, otherwise the loop
2818 eventually terminates once all of the accepting states
2819 have been tried.
2820 */
a3621e74 2821
d8319b27 2822 if ( st->u.trie.accepted == 1 ) {
a3621e74 2823 DEBUG_EXECUTE_r({
097eb12c 2824 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2825 PerlIO_printf( Perl_debug_log,
2826 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2827 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2828 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2829 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2830 PL_colors[5] );
2831 });
d8319b27 2832 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2833 /* in this case we free tmps/leave before we call regmatch
2834 as we wont be using accept_buff again. */
2835 FREETMPS;
2836 LEAVE;
95b24440
DM
2837 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2838 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2839 } else {
2840 DEBUG_EXECUTE_r(
e4584336 2841 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2842 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2843 PL_colors[5] );
2844 );
d8319b27 2845 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2846 U32 best = 0;
2847 U32 cur;
d8319b27 2848 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2849 DEBUG_TRIE_EXECUTE_r(
2850 PerlIO_printf( Perl_debug_log,
2851 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2852 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2853 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2854 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2855 );
a3621e74 2856
d8319b27
DM
2857 if (st->u.trie.accept_buff[cur].wordnum <
2858 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2859 best = cur;
a3621e74
YO
2860 }
2861 DEBUG_EXECUTE_r({
87830502 2862 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2863 rex->data->data[ARG(scan)];
d8319b27 2864 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2865 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2866 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2867 st->u.trie.accept_buff[best].wordnum,
ca0270c4 2868 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", (void*)scan,
a3621e74
YO
2869 PL_colors[5] );
2870 });
d8319b27
DM
2871 if ( best<st->u.trie.accepted ) {
2872 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2873 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2874 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2875 best = st->u.trie.accepted;
a3621e74 2876 }
d8319b27 2877 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2878
2879 /*
2880 as far as I can tell we only need the SAVETMPS/FREETMPS
2881 for re's with EVAL in them but I'm leaving them in for
2882 all until I can be sure.
2883 */
2884 SAVETMPS;
95b24440
DM
2885 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2886 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2887 FREETMPS;
2888 }
2889 FREETMPS;
2890 LEAVE;
2891 }
2892
95b24440 2893 if (result) {
a3621e74
YO
2894 sayYES;
2895 } else {
2896 sayNO;
2897 }
2898 }
2899 /* unreached codepoint */
95b24440
DM
2900 case EXACT: {
2901 char *s = STRING(scan);
5d9a96ca 2902 st->ln = STR_LEN(scan);
eb160463 2903 if (do_utf8 != UTF) {
bc517b45 2904 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2905 char *l = locinput;
5d9a96ca 2906 const char *e = s + st->ln;
a72c7584 2907
5ff6fc6d
JH
2908 if (do_utf8) {
2909 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2910 while (s < e) {
a3b680e6 2911 STRLEN ulen;
1aa99e6b 2912 if (l >= PL_regeol)
5ff6fc6d
JH
2913 sayNO;
2914 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2915 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2916 uniflags))
5ff6fc6d 2917 sayNO;
bc517b45 2918 l += ulen;
5ff6fc6d 2919 s ++;
1aa99e6b 2920 }
5ff6fc6d
JH
2921 }
2922 else {
2923 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2924 while (s < e) {
a3b680e6 2925 STRLEN ulen;
1aa99e6b
IH
2926 if (l >= PL_regeol)
2927 sayNO;
5ff6fc6d 2928 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2929 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2930 uniflags))
1aa99e6b 2931 sayNO;
bc517b45 2932 s += ulen;
a72c7584 2933 l ++;
1aa99e6b 2934 }
5ff6fc6d 2935 }
1aa99e6b
IH
2936 locinput = l;
2937 nextchr = UCHARAT(locinput);
2938 break;
2939 }
bc517b45 2940 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2941 /* Inline the first character, for speed. */
2942 if (UCHARAT(s) != nextchr)
2943 sayNO;
5d9a96ca 2944 if (PL_regeol - locinput < st->ln)
d6a28714 2945 sayNO;
5d9a96ca 2946 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2947 sayNO;
5d9a96ca 2948 locinput += st->ln;
d6a28714
JH
2949 nextchr = UCHARAT(locinput);
2950 break;
95b24440 2951 }
d6a28714 2952 case EXACTFL:
b8c5462f
JH
2953 PL_reg_flags |= RF_tainted;
2954 /* FALL THROUGH */
95b24440
DM
2955 case EXACTF: {
2956 char *s = STRING(scan);
5d9a96ca 2957 st->ln = STR_LEN(scan);
d6a28714 2958
d07ddd77
JH
2959 if (do_utf8 || UTF) {
2960 /* Either target or the pattern are utf8. */
d6a28714 2961 char *l = locinput;
d07ddd77 2962 char *e = PL_regeol;
bc517b45 2963
5d9a96ca 2964 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2965 l, &e, 0, do_utf8)) {
5486206c
JH
2966 /* One more case for the sharp s:
2967 * pack("U0U*", 0xDF) =~ /ss/i,
2968 * the 0xC3 0x9F are the UTF-8
2969 * byte sequence for the U+00DF. */
2970 if (!(do_utf8 &&
2971 toLOWER(s[0]) == 's' &&
5d9a96ca 2972 st->ln >= 2 &&
5486206c
JH
2973 toLOWER(s[1]) == 's' &&
2974 (U8)l[0] == 0xC3 &&
2975 e - l >= 2 &&
2976 (U8)l[1] == 0x9F))
2977 sayNO;
2978 }
d07ddd77
JH
2979 locinput = e;
2980 nextchr = UCHARAT(locinput);
2981 break;
a0ed51b3 2982 }
d6a28714 2983
bc517b45
JH
2984 /* Neither the target and the pattern are utf8. */
2985
d6a28714
JH
2986 /* Inline the first character, for speed. */
2987 if (UCHARAT(s) != nextchr &&
2988 UCHARAT(s) != ((OP(scan) == EXACTF)
2989 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2990 sayNO;
5d9a96ca 2991 if (PL_regeol - locinput < st->ln)
b8c5462f 2992 sayNO;
5d9a96ca
DM
2993 if (st->ln > 1 && (OP(scan) == EXACTF
2994 ? ibcmp(s, locinput, st->ln)
2995 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2996 sayNO;
5d9a96ca 2997 locinput += st->ln;
d6a28714 2998 nextchr = UCHARAT(locinput);
a0d0e21e 2999 break;
95b24440 3000 }
d6a28714 3001 case ANYOF:
ffc61ed2 3002 if (do_utf8) {
9e55ce06
JH
3003 STRLEN inclasslen = PL_regeol - locinput;
3004
32fc9b6a 3005 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 3006 sayNO_ANYOF;
ffc61ed2
JH
3007 if (locinput >= PL_regeol)
3008 sayNO;
0f0076b4 3009 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3010 nextchr = UCHARAT(locinput);
e0f9d4a8 3011 break;
ffc61ed2
JH
3012 }
3013 else {
3014 if (nextchr < 0)
3015 nextchr = UCHARAT(locinput);
32fc9b6a 3016 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 3017 sayNO_ANYOF;
ffc61ed2
JH
3018 if (!nextchr && locinput >= PL_regeol)
3019 sayNO;
3020 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3021 break;
3022 }
3023 no_anyof:
3024 /* If we might have the case of the German sharp s
3025 * in a casefolding Unicode character class. */
3026
ebc501f0
JH
3027 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3028 locinput += SHARP_S_SKIP;
e0f9d4a8 3029 nextchr = UCHARAT(locinput);
ffc61ed2 3030 }
e0f9d4a8
JH
3031 else
3032 sayNO;
b8c5462f 3033 break;
d6a28714 3034 case ALNUML:
b8c5462f
JH
3035 PL_reg_flags |= RF_tainted;
3036 /* FALL THROUGH */
d6a28714 3037 case ALNUM:
b8c5462f 3038 if (!nextchr)
4633a7c4 3039 sayNO;
ffc61ed2 3040 if (do_utf8) {
1a4fad37 3041 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3042 if (!(OP(scan) == ALNUM
bb7a0f54 3043 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3044 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3045 {
3046 sayNO;
a0ed51b3 3047 }
b8c5462f 3048 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3049 nextchr = UCHARAT(locinput);
3050 break;
3051 }
ffc61ed2 3052 if (!(OP(scan) == ALNUM
d6a28714 3053 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3054 sayNO;
b8c5462f 3055 nextchr = UCHARAT(++locinput);
a0d0e21e 3056 break;
d6a28714 3057 case NALNUML:
b8c5462f
JH
3058 PL_reg_flags |= RF_tainted;
3059 /* FALL THROUGH */
d6a28714
JH
3060 case NALNUM:
3061 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3062 sayNO;
ffc61ed2 3063 if (do_utf8) {
1a4fad37 3064 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3065 if (OP(scan) == NALNUM
bb7a0f54 3066 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3067 : isALNUM_LC_utf8((U8*)locinput))
3068 {
b8c5462f 3069 sayNO;
d6a28714 3070 }
b8c5462f
JH
3071 locinput += PL_utf8skip[nextchr];
3072 nextchr = UCHARAT(locinput);
3073 break;
3074 }
ffc61ed2 3075 if (OP(scan) == NALNUM
d6a28714 3076 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3077 sayNO;
76e3520e 3078 nextchr = UCHARAT(++locinput);
a0d0e21e 3079 break;
d6a28714
JH
3080 case BOUNDL:
3081 case NBOUNDL:
3280af22 3082 PL_reg_flags |= RF_tainted;
bbce6d69 3083 /* FALL THROUGH */
d6a28714
JH
3084 case BOUND:
3085 case NBOUND:
3086 /* was last char in word? */
ffc61ed2 3087 if (do_utf8) {
12d33761 3088 if (locinput == PL_bostr)
5d9a96ca 3089 st->ln = '\n';
ffc61ed2 3090 else {
a3b680e6 3091 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3092
4ad0818d 3093 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3094 }
3095 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3096 st->ln = isALNUM_uni(st->ln);
1a4fad37 3097 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3098 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3099 }
3100 else {
5d9a96ca 3101 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3102 n = isALNUM_LC_utf8((U8*)locinput);
3103 }
a0ed51b3 3104 }
d6a28714 3105 else {
5d9a96ca 3106 st->ln = (locinput != PL_bostr) ?
12d33761 3107 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3108 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3109 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3110 n = isALNUM(nextchr);
3111 }
3112 else {
5d9a96ca 3113 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3114 n = isALNUM_LC(nextchr);
3115 }
d6a28714 3116 }
5d9a96ca 3117 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3118 OP(scan) == BOUNDL))
3119 sayNO;
a0ed51b3 3120 break;
d6a28714 3121 case SPACEL:
3280af22 3122 PL_reg_flags |= RF_tainted;
bbce6d69 3123 /* FALL THROUGH */
d6a28714 3124 case SPACE:
9442cb0e 3125 if (!nextchr)
4633a7c4 3126 sayNO;
1aa99e6b 3127 if (do_utf8) {
fd400ab9 3128 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3129 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3130 if (!(OP(scan) == SPACE
bb7a0f54 3131 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3132 : isSPACE_LC_utf8((U8*)locinput)))
3133 {
3134 sayNO;
3135 }
3136 locinput += PL_utf8skip[nextchr];
3137 nextchr = UCHARAT(locinput);
3138 break;
d6a28714 3139 }
ffc61ed2
JH
3140 if (!(OP(scan) == SPACE
3141 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3142 sayNO;
3143 nextchr = UCHARAT(++locinput);
3144 }
3145 else {
3146 if (!(OP(scan) == SPACE
3147 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3148 sayNO;
3149 nextchr = UCHARAT(++locinput);
a0ed51b3 3150 }
a0ed51b3 3151 break;
d6a28714 3152 case NSPACEL:
3280af22 3153 PL_reg_flags |= RF_tainted;
bbce6d69 3154 /* FALL THROUGH */
d6a28714 3155 case NSPACE:
9442cb0e 3156 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3157 sayNO;
1aa99e6b 3158 if (do_utf8) {
1a4fad37 3159 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3160 if (OP(scan) == NSPACE
bb7a0f54 3161 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3162 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3163 {
3164 sayNO;
3165 }
3166 locinput += PL_utf8skip[nextchr];
3167 nextchr = UCHARAT(locinput);
3168 break;
a0ed51b3 3169 }
ffc61ed2 3170 if (OP(scan) == NSPACE
d6a28714 3171 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3172 sayNO;
76e3520e 3173 nextchr = UCHARAT(++locinput);
a0d0e21e 3174 break;
d6a28714 3175 case DIGITL:
a0ed51b3
LW
3176 PL_reg_flags |= RF_tainted;
3177 /* FALL THROUGH */
d6a28714 3178 case DIGIT:
9442cb0e 3179 if (!nextchr)
a0ed51b3 3180 sayNO;
1aa99e6b 3181 if (do_utf8) {
1a4fad37 3182 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3183 if (!(OP(scan) == DIGIT
bb7a0f54 3184 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3185 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3186 {
a0ed51b3 3187 sayNO;
dfe13c55 3188 }
6f06b55f 3189 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3190 nextchr = UCHARAT(locinput);
3191 break;
3192 }
ffc61ed2 3193 if (!(OP(scan) == DIGIT
9442cb0e 3194 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3195 sayNO;
3196 nextchr = UCHARAT(++locinput);
3197 break;
d6a28714 3198 case NDIGITL:
b8c5462f
JH
3199 PL_reg_flags |= RF_tainted;
3200 /* FALL THROUGH */
d6a28714 3201 case NDIGIT:
9442cb0e 3202 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3203 sayNO;
1aa99e6b 3204 if (do_utf8) {
1a4fad37 3205 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3206 if (OP(scan) == NDIGIT
bb7a0f54 3207 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3208 : isDIGIT_LC_utf8((U8*)locinput))
3209 {
a0ed51b3 3210 sayNO;
9442cb0e 3211 }
6f06b55f 3212 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3213 nextchr = UCHARAT(locinput);
3214 break;
3215 }
ffc61ed2 3216 if (OP(scan) == NDIGIT
9442cb0e 3217 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3218 sayNO;
3219 nextchr = UCHARAT(++locinput);
3220 break;
3221 case CLUMP:
b7c83a7e 3222 if (locinput >= PL_regeol)
a0ed51b3 3223 sayNO;
b7c83a7e 3224 if (do_utf8) {
1a4fad37 3225 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3226 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3227 sayNO;
3228 locinput += PL_utf8skip[nextchr];
3229 while (locinput < PL_regeol &&
3230 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3231 locinput += UTF8SKIP(locinput);
3232 if (locinput > PL_regeol)
3233 sayNO;
eb08e2da
JH
3234 }
3235 else
3236 locinput++;
a0ed51b3
LW
3237 nextchr = UCHARAT(locinput);
3238 break;
c8756f30 3239 case REFFL:
3280af22 3240 PL_reg_flags |= RF_tainted;
c8756f30 3241 /* FALL THROUGH */
c277df42 3242 case REF:
95b24440
DM
3243 case REFF: {
3244 char *s;
c277df42 3245 n = ARG(scan); /* which paren pair */
5d9a96ca 3246 st->ln = PL_regstartp[n];
2c2d71f5 3247 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3248 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3249 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3250 if (st->ln == PL_regendp[n])
a0d0e21e 3251 break;
a0ed51b3 3252
5d9a96ca 3253 s = PL_bostr + st->ln;
1aa99e6b 3254 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3255 char *l = locinput;
a3b680e6 3256 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3257 /*
3258 * Note that we can't do the "other character" lookup trick as
3259 * in the 8-bit case (no pun intended) because in Unicode we
3260 * have to map both upper and title case to lower case.
3261 */
3262 if (OP(scan) == REFF) {
3263 while (s < e) {
a3b680e6
AL
3264 STRLEN ulen1, ulen2;
3265 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3266 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3267
a0ed51b3
LW
3268 if (l >= PL_regeol)
3269 sayNO;
a2a2844f
JH
3270 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3271 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3272 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3273 sayNO;
a2a2844f
JH
3274 s += ulen1;
3275 l += ulen2;
a0ed51b3
LW
3276 }
3277 }
3278 locinput = l;
3279 nextchr = UCHARAT(locinput);
3280 break;
3281 }
3282
a0d0e21e 3283 /* Inline the first character, for speed. */
76e3520e 3284 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3285 (OP(scan) == REF ||
3286 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3287 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3288 sayNO;
5d9a96ca
DM
3289 st->ln = PL_regendp[n] - st->ln;
3290 if (locinput + st->ln > PL_regeol)
4633a7c4 3291 sayNO;
5d9a96ca
DM
3292 if (st->ln > 1 && (OP(scan) == REF
3293 ? memNE(s, locinput, st->ln)
c8756f30 3294 : (OP(scan) == REFF
5d9a96ca
DM
3295 ? ibcmp(s, locinput, st->ln)
3296 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3297 sayNO;
5d9a96ca 3298 locinput += st->ln;
76e3520e 3299 nextchr = UCHARAT(locinput);
a0d0e21e 3300 break;
95b24440 3301 }
a0d0e21e
LW
3302
3303 case NOTHING:
c277df42 3304 case TAIL:
a0d0e21e
LW
3305 break;
3306 case BACK:
3307 break;
c277df42
IZ
3308 case EVAL:
3309 {
c277df42 3310 SV *ret;
8e5e9ebe 3311 {
4aabdb9b
DM
3312 /* execute the code in the {...} */
3313 dSP;
6136c704 3314 SV ** const before = SP;
4aabdb9b
DM
3315 OP_4tree * const oop = PL_op;
3316 COP * const ocurcop = PL_curcop;
3317 PAD *old_comppad;
4aabdb9b
DM
3318
3319 n = ARG(scan);
32fc9b6a 3320 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3321 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3322 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3323 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3324
8e5e9ebe
RGS
3325 CALLRUNOPS(aTHX); /* Scalar context. */
3326 SPAGAIN;
3327 if (SP == before)
075aa684 3328 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3329 else {
3330 ret = POPs;
3331 PUTBACK;
3332 }
4aabdb9b
DM
3333
3334 PL_op = oop;
3335 PAD_RESTORE_LOCAL(old_comppad);
3336 PL_curcop = ocurcop;
3337 if (!st->logical) {
3338 /* /(?{...})/ */
3339 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3340 break;
3341 }
8e5e9ebe 3342 }
4aabdb9b
DM
3343 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3344 regexp *re;
4aabdb9b 3345 {
4f639d21
DM
3346 /* extract RE object from returned value; compiling if
3347 * necessary */
3348
6136c704 3349 MAGIC *mg = NULL;
4aabdb9b 3350 SV *sv;
faf82a0b
AE
3351 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3352 mg = mg_find(sv, PERL_MAGIC_qr);
3353 else if (SvSMAGICAL(ret)) {
3354 if (SvGMAGICAL(ret))
3355 sv_unmagic(ret, PERL_MAGIC_qr);
3356 else
3357 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3358 }
faf82a0b 3359
0f5d15d6
IZ
3360 if (mg) {
3361 re = (regexp *)mg->mg_obj;
df0003d4 3362 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3363 }
3364 else {
3365 STRLEN len;
6136c704 3366 const char * const t = SvPV_const(ret, len);
0f5d15d6 3367 PMOP pm;
a3b680e6 3368 const I32 osize = PL_regsize;
0f5d15d6 3369
5fcd1c1b 3370 Zero(&pm, 1, PMOP);
4aabdb9b 3371 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3372 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3373 if (!(SvFLAGS(ret)
faf82a0b
AE
3374 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3375 | SVs_GMG)))
14befaf4
DM
3376 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3377 PERL_MAGIC_qr,0,0);
0f5d15d6 3378 PL_regsize = osize;
0f5d15d6 3379 }
4aabdb9b 3380 }
aa283a38
DM
3381
3382 /* run the pattern returned from (??{...}) */
3383
4aabdb9b
DM
3384 DEBUG_EXECUTE_r(
3385 PerlIO_printf(Perl_debug_log,
3386 "Entering embedded \"%s%.60s%s%s\"\n",
3387 PL_colors[0],
3388 re->precomp,
3389 PL_colors[1],
3390 (strlen(re->precomp) > 60 ? "..." : ""))
3391 );
2c2d71f5 3392
4aabdb9b
DM
3393 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3394 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3395 *PL_reglastparen = 0;
3396 *PL_reglastcloseparen = 0;
4aabdb9b 3397 PL_reginput = locinput;
4aabdb9b
DM
3398
3399 /* XXXX This is too dramatic a measure... */
3400 PL_reg_maxiter = 0;
3401
5d9a96ca 3402 st->logical = 0;
aa283a38
DM
3403 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3404 ((re->reganch & ROPT_UTF8) != 0);
3405 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3406 st->u.eval.prev_rex = rex;
aa283a38 3407 rex = re;
aa283a38 3408
77cb431f
DM
3409 /* resume to current state on success */
3410 st->u.yes.prev_yes_state = yes_state;
3411 yes_state = st;
aa283a38
DM
3412 PUSH_STATE(newst, resume_EVAL);
3413 st = newst;
3414
3415 /* now continue from first node in postoned RE */
3416 next = re->program + 1;
3417 break;
4aabdb9b 3418 /* NOTREACHED */
a0ed51b3 3419 }
4aabdb9b
DM
3420 /* /(?(?{...})X|Y)/ */
3421 st->sw = SvTRUE(ret);
3422 st->logical = 0;
c277df42
IZ
3423 break;
3424 }
a0d0e21e 3425 case OPEN:
c277df42 3426 n = ARG(scan); /* which paren pair */
3280af22
NIS
3427 PL_reg_start_tmp[n] = locinput;
3428 if (n > PL_regsize)
3429 PL_regsize = n;
a0d0e21e
LW
3430 break;
3431 case CLOSE:
c277df42 3432 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3433 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3434 PL_regendp[n] = locinput - PL_bostr;
eb160463 3435 if (n > (I32)*PL_reglastparen)
3280af22 3436 *PL_reglastparen = n;
a01268b5 3437 *PL_reglastcloseparen = n;
a0d0e21e 3438 break;
c277df42
IZ
3439 case GROUPP:
3440 n = ARG(scan); /* which paren pair */
5d9a96ca 3441 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3442 break;
3443 case IFTHEN:
2c2d71f5 3444 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3445 if (st->sw)
c277df42
IZ
3446 next = NEXTOPER(NEXTOPER(scan));
3447 else {
3448 next = scan + ARG(scan);
3449 if (OP(next) == IFTHEN) /* Fake one. */
3450 next = NEXTOPER(NEXTOPER(next));
3451 }
3452 break;
3453 case LOGICAL:
5d9a96ca 3454 st->logical = scan->flags;
c277df42 3455 break;
2ab05381 3456/*******************************************************************
a0374537
DM
3457 cc points to the regmatch_state associated with the most recent CURLYX.
3458 This struct contains info about the innermost (...)* loop (an
3459 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3460
3461 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3462
95b24440 3463 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3464
a0374537 3465 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3466 with the starting point at WHILEM node;
2ab05381
IZ
3467
3468 3) Each hit of WHILEM node tries to match A and Z (in the order
3469 depending on the current iteration, min/max of {min,max} and
3470 greediness). The information about where are nodes for "A"
a0374537 3471 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3472 was already matched, and greediness.
3473
3474 4) After A matches, the same WHILEM node is hit again.
3475
95b24440 3476 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3477 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3478 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3479 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3480 of the external loop.
3481
a0374537 3482 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3483 and whatever it mentions via ->next, and additional attached trees
3484 corresponding to temporarily unset infoblocks as in "5" above.
3485
95b24440 3486 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3487 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3488 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3489 infoblocks are drawn below the "reset" infoblock.
3490
3491 In fact in the picture below we do not show failed matches for Z and T
3492 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3493 more obvious *why* one needs to *temporary* unset infoblocks.]
3494
3495 Matched REx position InfoBlocks Comment
3496 (Y(A)*?Z)*?T x
3497 Y(A)*?Z)*?T x <- O
3498 Y (A)*?Z)*?T x <- O
3499 Y A)*?Z)*?T x <- O <- I
3500 YA )*?Z)*?T x <- O <- I
3501 YA A)*?Z)*?T x <- O <- I
3502 YAA )*?Z)*?T x <- O <- I
3503 YAA Z)*?T x <- O # Temporary unset I
3504 I
3505
3506 YAAZ Y(A)*?Z)*?T x <- O
3507 I
3508
3509 YAAZY (A)*?Z)*?T x <- O
3510 I
3511
3512 YAAZY A)*?Z)*?T x <- O <- I
3513 I
3514
3515 YAAZYA )*?Z)*?T x <- O <- I
3516 I
3517
3518 YAAZYA Z)*?T x <- O # Temporary unset I
3519 I,I
3520
3521 YAAZYAZ )*?T x <- O
3522 I,I
3523
3524 YAAZYAZ T x # Temporary unset O
3525 O
3526 I,I
3527
3528 YAAZYAZT x
3529 O
3530 I,I
3531 *******************************************************************/
95b24440 3532
a0d0e21e 3533 case CURLYX: {
cb434fcc
IZ
3534 /* No need to save/restore up to this paren */
3535 I32 parenfloor = scan->flags;
c277df42 3536
c2b7afd3
NC
3537 /* Dave says:
3538
3539 CURLYX and WHILEM are always paired: they're the moral
3540 equivalent of pp_enteriter anbd pp_iter.
3541
3542 The only time next could be null is if the node tree is
3543 corrupt. This was mentioned on p5p a few days ago.
3544
3545 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3546 So we'll assert that this is true:
3547 */
3548 assert(next);
30b2893d 3549 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
c277df42 3550 next += ARG(next);
cb434fcc
IZ
3551 /* XXXX Probably it is better to teach regpush to support
3552 parenfloor > PL_regsize... */
eb160463 3553 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3554 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3555
d8319b27
DM
3556 st->u.curlyx.cp = PL_savestack_ix;
3557 st->u.curlyx.outercc = st->cc;
a0374537
DM
3558 st->cc = st;
3559 /* these fields contain the state of the current curly.
3560 * they are accessed by subsequent WHILEMs;
3561 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3562 st->u.curlyx.parenfloor = parenfloor;
3563 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3564 st->u.curlyx.min = ARG1(scan);
3565 st->u.curlyx.max = ARG2(scan);
3566 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3567 st->u.curlyx.lastloc = 0;
a0374537
DM
3568 /* st->next and st->minmod are also read by WHILEM */
3569
3280af22 3570 PL_reginput = locinput;
95b24440
DM
3571 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3572 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3573 regcpblow(st->u.curlyx.cp);
3574 st->cc = st->u.curlyx.outercc;
95b24440 3575 saySAME(result);
a0d0e21e 3576 }
5f66b61c 3577 /* NOTREACHED */
a0d0e21e
LW
3578 case WHILEM: {
3579 /*
3580 * This is really hard to understand, because after we match
3581 * what we're trying to match, we must make sure the rest of
2c2d71f5 3582 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3583 * to go back UP the parse tree by recursing ever deeper. And
3584 * if it fails, we have to reset our parent's current state
3585 * that we can try again after backing off.
3586 */
3587
c2b7afd3
NC
3588 /* Dave says:
3589
3590 st->cc gets initialised by CURLYX ready for use by WHILEM.
3591 So again, unless somethings been corrupted, st->cc cannot
3592 be null at that point in WHILEM.
3593
3594 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3595 So we'll assert that this is true:
3596 */
3597 assert(st->cc);
d8319b27
DM
3598 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3599 st->u.whilem.cache_offset = 0;
3600 st->u.whilem.cache_bit = 0;
c277df42 3601
d8319b27 3602 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3603 PL_reginput = locinput;
a0d0e21e 3604
a3621e74 3605 DEBUG_EXECUTE_r(
9041c2e3 3606 PerlIO_printf(Perl_debug_log,
91f3b821 3607 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3608 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3609 (long)n, (long)st->cc->u.curlyx.min,
3610 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3611 );
4633a7c4 3612
a0d0e21e
LW
3613 /* If degenerate scan matches "", assume scan done. */
3614
d8319b27
DM
3615 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3616 st->u.whilem.savecc = st->cc;
3617 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3618 if (st->cc)
d8319b27 3619 st->ln = st->cc->u.curlyx.cur;
a3621e74 3620 DEBUG_EXECUTE_r(
c3464db5
DD
3621 PerlIO_printf(Perl_debug_log,
3622 "%*s empty match detected, try continuation...\n",
3280af22 3623 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3624 );
d8319b27 3625 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3626 /*** all unsaved local vars undefined at this point */
d8319b27 3627 st->cc = st->u.whilem.savecc;
95b24440 3628 if (result)
4633a7c4 3629 sayYES;
d8319b27
DM
3630 if (st->cc->u.curlyx.outercc)
3631 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3632 sayNO;
a0d0e21e
LW
3633 }
3634
3635 /* First just match a string of min scans. */
3636
d8319b27
DM
3637 if (n < st->cc->u.curlyx.min) {
3638 st->cc->u.curlyx.cur = n;
3639 st->cc->u.curlyx.lastloc = locinput;
3640 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3641 /*** all unsaved local vars undefined at this point */
3642 if (result)
4633a7c4 3643 sayYES;
d8319b27
DM
3644 st->cc->u.curlyx.cur = n - 1;
3645 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3646 sayNO;
a0d0e21e
LW
3647 }
3648
2c2d71f5
JH
3649 if (scan->flags) {
3650 /* Check whether we already were at this position.
3651 Postpone detection until we know the match is not
3652 *that* much linear. */
3653 if (!PL_reg_maxiter) {
3654 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
3655 /* possible overflow for long strings and many CURLYX's */
3656 if (PL_reg_maxiter < 0)
3657 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
3658 PL_reg_leftiter = PL_reg_maxiter;
3659 }
3660 if (PL_reg_leftiter-- == 0) {
a3b680e6 3661 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3662 if (PL_reg_poscache) {
eb160463 3663 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3664 Renew(PL_reg_poscache, size, char);
3665 PL_reg_poscache_size = size;
3666 }
3667 Zero(PL_reg_poscache, size, char);
3668 }
3669 else {
3670 PL_reg_poscache_size = size;
a02a5408 3671 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3672 }
a3621e74 3673 DEBUG_EXECUTE_r(
2c2d71f5
JH
3674 PerlIO_printf(Perl_debug_log,
3675 "%sDetected a super-linear match, switching on caching%s...\n",
3676 PL_colors[4], PL_colors[5])
3677 );
3678 }
3679 if (PL_reg_leftiter < 0) {
d8319b27 3680 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3681
d8319b27
DM
3682 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3683 + st->u.whilem.cache_offset * (scan->flags>>4);
3684 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3685 st->u.whilem.cache_offset /= 8;
3686 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3687 DEBUG_EXECUTE_r(
2c2d71f5
JH
3688 PerlIO_printf(Perl_debug_log,
3689 "%*s already tried at this position...\n",
3690 REPORT_CODE_OFF+PL_regindent*2, "")
3691 );
3ab3c9b4
HS
3692 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3693 /* cache records success */
c2b0868c
HS
3694 sayYES;
3695 else
3ab3c9b4 3696 /* cache records failure */
c2b0868c 3697 sayNO_SILENT;
2c2d71f5 3698 }
2c2d71f5
JH
3699 }
3700 }
3701
a0d0e21e
LW
3702 /* Prefer next over scan for minimal matching. */
3703
5d9a96ca 3704 if (st->cc->minmod) {
d8319b27
DM
3705 st->u.whilem.savecc = st->cc;
3706 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3707 if (st->cc)
d8319b27
DM
3708 st->ln = st->cc->u.curlyx.cur;
3709 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3710 REGCP_SET(st->u.whilem.lastcp);
3711 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3712 /*** all unsaved local vars undefined at this point */
d8319b27 3713 st->cc = st->u.whilem.savecc;
95b24440 3714 if (result) {
d8319b27 3715 regcpblow(st->u.whilem.cp);
3ab3c9b4 3716 CACHEsayYES; /* All done. */
5f05dabc 3717 }
d8319b27 3718 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3719 regcppop(rex);
d8319b27
DM
3720 if (st->cc->u.curlyx.outercc)
3721 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3722
d8319b27 3723 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3724 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3725 && !(PL_reg_flags & RF_warned)) {
3726 PL_reg_flags |= RF_warned;
9014280d 3727 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3728 "Complex regular subexpression recursion",
3729 REG_INFTY - 1);
c277df42 3730 }
3ab3c9b4 3731 CACHEsayNO;
c277df42 3732 }
a687059c 3733
a3621e74 3734 DEBUG_EXECUTE_r(
c3464db5
DD
3735 PerlIO_printf(Perl_debug_log,
3736 "%*s trying longer...\n",
3280af22 3737 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3738 );
a0d0e21e 3739 /* Try scanning more and see if it helps. */
3280af22 3740 PL_reginput = locinput;
d8319b27
DM
3741 st->cc->u.curlyx.cur = n;
3742 st->cc->u.curlyx.lastloc = locinput;
3743 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3744 REGCP_SET(st->u.whilem.lastcp);
3745 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3746 /*** all unsaved local vars undefined at this point */
3747 if (result) {
d8319b27 3748 regcpblow(st->u.whilem.cp);
3ab3c9b4 3749 CACHEsayYES;
5f05dabc 3750 }
d8319b27 3751 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3752 regcppop(rex);
d8319b27
DM
3753 st->cc->u.curlyx.cur = n - 1;
3754 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3755 CACHEsayNO;
a0d0e21e
LW
3756 }
3757
3758 /* Prefer scan over next for maximal matching. */
3759
d8319b27
DM
3760 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3761 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3762 st->cc->u.curlyx.cur = n;
3763 st->cc->u.curlyx.lastloc = locinput;
3764 REGCP_SET(st->u.whilem.lastcp);
3765 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3766 /*** all unsaved local vars undefined at this point */
3767 if (result) {
d8319b27 3768 regcpblow(st->u.whilem.cp);
3ab3c9b4 3769 CACHEsayYES;
5f05dabc 3770 }
d8319b27 3771 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3772 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3773 PL_reginput = locinput;
a3621e74 3774 DEBUG_EXECUTE_r(
c3464db5
DD
3775 PerlIO_printf(Perl_debug_log,
3776 "%*s failed, try continuation...\n",
3280af22 3777 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3778 );
3779 }
9041c2e3 3780 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3781 && !(PL_reg_flags & RF_warned)) {
3280af22 3782 PL_reg_flags |= RF_warned;
9014280d 3783 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3784 "Complex regular subexpression recursion",
3785 REG_INFTY - 1);
a0d0e21e
LW
3786 }
3787
3788 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3789 st->u.whilem.savecc = st->cc;
3790 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3791 if (st->cc)
d8319b27
DM
3792 st->ln = st->cc->u.curlyx.cur;
3793 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3794 /*** all unsaved local vars undefined at this point */
d8319b27 3795 st->cc = st->u.whilem.savecc;
95b24440 3796 if (result)
3ab3c9b4 3797 CACHEsayYES;
d8319b27
DM
3798 if (st->cc->u.curlyx.outercc)
3799 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3800 st->cc->u.curlyx.cur = n - 1;
3801 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3802 CACHEsayNO;
a0d0e21e 3803 }
5f66b61c 3804 /* NOTREACHED */
9041c2e3 3805 case BRANCHJ:
c277df42
IZ
3806 next = scan + ARG(scan);
3807 if (next == scan)
3808 next = NULL;
3809 inner = NEXTOPER(NEXTOPER(scan));
3810 goto do_branch;
9041c2e3 3811 case BRANCH:
c277df42
IZ
3812 inner = NEXTOPER(scan);
3813 do_branch:
3814 {
e822a8b4
DM
3815 I32 type;
3816 type = OP(scan);
ae5031b3 3817 if (!next || OP(next) != type) /* No choice. */
c277df42 3818 next = inner; /* Avoid recursion. */
a0d0e21e 3819 else {
a3b680e6 3820 const I32 lastparen = *PL_reglastparen;
02db2b7b 3821 /* Put unwinding data on stack */
6136c704
AL
3822 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3823 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3824
5d9a96ca
DM
3825 uw->prev = st->unwind;
3826 st->unwind = unwind1;
e822a8b4 3827 uw->type = ((type == BRANCH)
02db2b7b
IZ
3828 ? RE_UNWIND_BRANCH
3829 : RE_UNWIND_BRANCHJ);
3830 uw->lastparen = lastparen;
3831 uw->next = next;
3832 uw->locinput = locinput;
3833 uw->nextchr = nextchr;
3a2830be 3834 uw->minmod = st->minmod;
02db2b7b
IZ
3835#ifdef DEBUGGING
3836 uw->regindent = ++PL_regindent;
3837#endif
c277df42 3838
02db2b7b
IZ
3839 REGCP_SET(uw->lastcp);
3840
3841 /* Now go into the first branch */
3842 next = inner;
a687059c 3843 }
a0d0e21e
LW
3844 }
3845 break;
3846 case MINMOD:
5d9a96ca 3847 st->minmod = 1;
a0d0e21e 3848 break;
c277df42
IZ
3849 case CURLYM:
3850 {
d8319b27 3851 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3852
c277df42 3853 /* We suppose that the next guy does not need
0e788c72 3854 backtracking: in particular, it is of constant non-zero length,
c277df42 3855 and has no parenths to influence future backrefs. */
5d9a96ca 3856 st->ln = ARG1(scan); /* min to match */
c277df42 3857 n = ARG2(scan); /* max to match */
d8319b27
DM
3858 st->u.curlym.paren = scan->flags;
3859 if (st->u.curlym.paren) {
3860 if (st->u.curlym.paren > PL_regsize)
3861 PL_regsize = st->u.curlym.paren;
3862 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3863 *PL_reglastparen = st->u.curlym.paren;
c277df42 3864 }
dc45a647 3865 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3866 if (st->u.curlym.paren)
c277df42 3867 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3868 PL_reginput = locinput;
d8319b27 3869 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
0cadcf80
DM
3870 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
3871 /* resume to current state on success */
3872 st->u.yes.prev_yes_state = yes_state;
3873 yes_state = st;
3874 REGMATCH(scan, CURLYM1);
3875 yes_state = st->u.yes.prev_yes_state;
3876 /*** all unsaved local vars undefined at this point */
3877 if (!result)
3878 break;
3879 /* on first match, determine length, u.curlym.l */
3880 if (!st->u.curlym.matches++) {
3881 if (PL_reg_match_utf8) {
3882 char *s = locinput;
3883 while (s < PL_reginput) {
3884 st->u.curlym.l++;
3885 s += UTF8SKIP(s);
6407bf3b
DM
3886 }
3887 }
0cadcf80
DM
3888 else {
3889 st->u.curlym.l = PL_reginput - locinput;
3890 }
3891 if (st->u.curlym.l == 0) {
3892 st->u.curlym.matches = st->u.curlym.maxwanted;
3893 break;
3894 }
6407bf3b 3895 }
0cadcf80 3896 locinput = PL_reginput;
6407bf3b
DM
3897 }
3898
3899 PL_reginput = locinput;
0cadcf80 3900 if (st->u.curlym.matches < st->ln) {
5d9a96ca 3901 st->minmod = 0;
0cadcf80
DM
3902 sayNO;
3903 }
5f80c4cf 3904
0cadcf80
DM
3905 DEBUG_EXECUTE_r(
3906 PerlIO_printf(Perl_debug_log,
3907 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3908 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3909 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
3910 );
3911
3912 /* calculate c1 and c1 for possible match of 1st char
3913 * following curly */
9e137952 3914 st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
0cadcf80
DM
3915 if (HAS_TEXT(next) || JUMPABLE(next)) {
3916 regnode *text_node = next;
3917 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3918 if (HAS_TEXT(text_node)
3919 && PL_regkind[(U8)OP(text_node)] != REF)
3920 {
3921 st->u.curlym.c1 = (U8)*STRING(text_node);
3922 st->u.curlym.c2 =
3923 (OP(text_node) == EXACTF || OP(text_node) == REFF)
3924 ? PL_fold[st->u.curlym.c1]
3925 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3926 ? PL_fold_locale[st->u.curlym.c1]
3927 : st->u.curlym.c1;
3928 }
3929 }
5f80c4cf 3930
0cadcf80
DM
3931 REGCP_SET(st->u.curlym.lastcp);
3932
3933 st->u.curlym.minmod = st->minmod;
3934 st->minmod = 0;
3935 while (st->u.curlym.matches >= st->ln
3936 && (st->u.curlym.matches <= n
3937 /* for REG_INFTY, ln could overflow to negative */
3938 || (n == REG_INFTY && st->u.curlym.matches >= 0)))
3939 {
3940 /* If it could work, try it. */
9e137952 3941 if (st->u.curlym.c1 == CHRTEST_VOID ||
0cadcf80
DM
3942 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3943 UCHARAT(PL_reginput) == st->u.curlym.c2)
3944 {
3945 DEBUG_EXECUTE_r(
3946 PerlIO_printf(Perl_debug_log,
3947 "%*s trying tail with matches=%"IVdf"...\n",
3948 (int)(REPORT_CODE_OFF+PL_regindent*2),
3949 "", (IV)st->u.curlym.matches)
3950 );
3951 if (st->u.curlym.paren) {
3952 if (st->u.curlym.matches) {
3953 PL_regstartp[st->u.curlym.paren]
3954 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3955 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
cca55fe3 3956 }
5f80c4cf 3957 else
0cadcf80 3958 PL_regendp[st->u.curlym.paren] = -1;
5f80c4cf 3959 }
0cadcf80
DM
3960 /* resume to current state on success */
3961 st->u.yes.prev_yes_state = yes_state;
3962 yes_state = st;
3963 REGMATCH(next, CURLYM2);
3964 yes_state = st->u.yes.prev_yes_state;
3965 /*** all unsaved local vars undefined at this point */
3966 if (result)
3967 /* XXX tmp sayYES; */
3968 sayYES_FINAL;
3969 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3970 }
0cadcf80
DM
3971 /* Couldn't or didn't -- move forward/backward. */
3972 if (st->u.curlym.minmod) {
3280af22 3973 PL_reginput = locinput;
dad79028
DM
3974 /* resume to current state on success */
3975 st->u.yes.prev_yes_state = yes_state;
3976 yes_state = st;
95b24440 3977 REGMATCH(scan, CURLYM3);
dad79028 3978 yes_state = st->u.yes.prev_yes_state;
95b24440
DM
3979 /*** all unsaved local vars undefined at this point */
3980 if (result) {
0cadcf80 3981 st->u.curlym.matches++;
3280af22 3982 locinput = PL_reginput;
c277df42
IZ
3983 }
3984 else
3985 sayNO;
3986 }
0cadcf80 3987 else {
d8319b27
DM
3988 st->u.curlym.matches--;
3989 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3990 PL_reginput = locinput;
c277df42
IZ
3991 }
3992 }
3993 sayNO;
5f66b61c 3994 /* NOTREACHED */
c277df42
IZ
3995 break;
3996 }
3997 case CURLYN:
d8319b27
DM
3998 st->u.plus.paren = scan->flags; /* Which paren to set */
3999 if (st->u.plus.paren > PL_regsize)
4000 PL_regsize = st->u.plus.paren;
4001 if (st->u.plus.paren > (I32)*PL_reglastparen)
4002 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4003 st->ln = ARG1(scan); /* min to match */
c277df42 4004 n = ARG2(scan); /* max to match */
dc45a647 4005 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4006 goto repeat;
a0d0e21e 4007 case CURLY:
d8319b27 4008 st->u.plus.paren = 0;
5d9a96ca 4009 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4010 n = ARG2(scan); /* max to match */
dc45a647 4011 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4012 goto repeat;
4013 case STAR:
5d9a96ca 4014 st->ln = 0;
c277df42 4015 n = REG_INFTY;
a0d0e21e 4016 scan = NEXTOPER(scan);
d8319b27 4017 st->u.plus.paren = 0;
a0d0e21e
LW
4018 goto repeat;
4019 case PLUS:
5d9a96ca 4020 st->ln = 1;
c277df42
IZ
4021 n = REG_INFTY;
4022 scan = NEXTOPER(scan);
d8319b27 4023 st->u.plus.paren = 0;
c277df42 4024 repeat:
a0d0e21e
LW
4025 /*
4026 * Lookahead to avoid useless match attempts
4027 * when we know what character comes next.
4028 */
5f80c4cf
JP
4029
4030 /*
4031 * Used to only do .*x and .*?x, but now it allows
4032 * for )'s, ('s and (?{ ... })'s to be in the way
4033 * of the quantifier and the EXACT-like node. -- japhy
4034 */
4035
cca55fe3 4036 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4037 U8 *s;
4038 regnode *text_node = next;
4039
cca55fe3 4040 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4041
9e137952
DM
4042 if (! HAS_TEXT(text_node))
4043 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
5f80c4cf 4044 else {
cca55fe3 4045 if (PL_regkind[(U8)OP(text_node)] == REF) {
9e137952 4046 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
44a68960 4047 goto assume_ok_easy;
cca55fe3
JP
4048 }
4049 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4050
4051 if (!UTF) {
d8319b27 4052 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4053 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4054 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4055 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4056 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4057 }
5f80c4cf 4058 else { /* UTF */
f65d3ee7 4059 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4060 STRLEN ulen1, ulen2;
89ebb4a3
JH
4061 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4062 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4063
4064 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4065 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4066
d8319b27 4067 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4068 uniflags);
d8319b27 4069 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4070 uniflags);
5f80c4cf
JP
4071 }
4072 else {
d8319b27 4073 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4074 uniflags);
5f80c4cf 4075 }
1aa99e6b
IH
4076 }
4077 }
bbce6d69 4078 }
a0d0e21e 4079 else
9e137952 4080 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
cca55fe3 4081 assume_ok_easy:
3280af22 4082 PL_reginput = locinput;
5d9a96ca
DM
4083 if (st->minmod) {
4084 st->minmod = 0;
32fc9b6a 4085 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4086 sayNO;
a0ed51b3 4087 locinput = PL_reginput;
d8319b27 4088 REGCP_SET(st->u.plus.lastcp);
9e137952 4089 if (st->u.plus.c1 != CHRTEST_VOID) {
d8319b27
DM
4090 st->u.plus.old = locinput;
4091 st->u.plus.count = 0;
0fe9bf95 4092
1aa99e6b 4093 if (n == REG_INFTY) {
d8319b27 4094 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4095 if (do_utf8)
d8319b27
DM
4096 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4097 st->u.plus.e--;
1aa99e6b
IH
4098 }
4099 else if (do_utf8) {
5d9a96ca 4100 int m = n - st->ln;
d8319b27
DM
4101 for (st->u.plus.e = locinput;
4102 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4103 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4104 }
4105 else {
d8319b27
DM
4106 st->u.plus.e = locinput + n - st->ln;
4107 if (st->u.plus.e >= PL_regeol)
4108 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4109 }
0fe9bf95
IZ
4110 while (1) {
4111 /* Find place 'next' could work */
1aa99e6b 4112 if (!do_utf8) {
d8319b27
DM
4113 if (st->u.plus.c1 == st->u.plus.c2) {
4114 while (locinput <= st->u.plus.e &&
4115 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4116 locinput++;
4117 } else {
d8319b27
DM
4118 while (locinput <= st->u.plus.e
4119 && UCHARAT(locinput) != st->u.plus.c1
4120 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4121 locinput++;
4122 }
d8319b27 4123 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4124 }
4125 else {
d8319b27 4126 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4127 STRLEN len;
872c91ae
JH
4128 /* count initialised to
4129 * utf8_distance(old, locinput) */
d8319b27 4130 while (locinput <= st->u.plus.e &&
872c91ae 4131 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4132 UTF8_MAXBYTES, &len,
d8319b27 4133 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4134 locinput += len;
d8319b27 4135 st->u.plus.count++;
b2f2f093 4136 }
1aa99e6b 4137 } else {
872c91ae
JH
4138 /* count initialised to
4139 * utf8_distance(old, locinput) */
d8319b27 4140 while (locinput <= st->u.plus.e) {
c4fd8992
AL
4141 STRLEN len;
4142 const UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4143 UTF8_MAXBYTES, &len,
041457d9 4144 uniflags);
d8319b27 4145 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4146 break;
b2f2f093 4147 locinput += len;
d8319b27 4148 st->u.plus.count++;
1aa99e6b
IH
4149 }
4150 }
0fe9bf95 4151 }
d8319b27 4152 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4153 sayNO;
4154 /* PL_reginput == old now */
d8319b27 4155 if (locinput != st->u.plus.old) {
5d9a96ca 4156 st->ln = 1; /* Did some */
32fc9b6a 4157 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4158 sayNO;
4159 }
4160 /* PL_reginput == locinput now */
d8319b27 4161 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4162 /*** all unsaved local vars undefined at this point */
0fe9bf95 4163 PL_reginput = locinput; /* Could be reset... */
d8319b27 4164 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4165 /* Couldn't or didn't -- move forward. */
d8319b27 4166 st->u.plus.old = locinput;
1aa99e6b
IH
4167 if (do_utf8)
4168 locinput += UTF8SKIP(locinput);
4169 else
4170 locinput++;
d8319b27 4171 st->u.plus.count = 1;
0fe9bf95
IZ
4172 }
4173 }
4174 else
5d9a96ca 4175 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4176 UV c;
9e137952 4177 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4178 if (do_utf8)
872c91ae 4179 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4180 UTF8_MAXBYTES, 0,
041457d9 4181 uniflags);
1aa99e6b 4182 else
9041c2e3 4183 c = UCHARAT(PL_reginput);
2390ecbc 4184 /* If it could work, try it. */
d8319b27 4185 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4186 {
d8319b27 4187 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4188 /*** all unsaved local vars undefined at this point */
d8319b27 4189 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4190 }
1aa99e6b 4191 }
a0d0e21e 4192 /* If it could work, try it. */
9e137952 4193 else if (st->u.plus.c1 == CHRTEST_VOID)
bbce6d69 4194 {
d8319b27 4195 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4196 /*** all unsaved local vars undefined at this point */
d8319b27 4197 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4198 }
c277df42 4199 /* Couldn't or didn't -- move forward. */
a0ed51b3 4200 PL_reginput = locinput;
32fc9b6a 4201 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4202 st->ln++;
a0ed51b3
LW
4203 locinput = PL_reginput;
4204 }
4205 else
4633a7c4 4206 sayNO;
a0d0e21e
LW
4207 }
4208 }
4209 else {
32fc9b6a 4210 n = regrepeat(rex, scan, n);
a0ed51b3 4211 locinput = PL_reginput;
5d9a96ca 4212 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4213 (OP(next) != MEOL ||
15272685
HS
4214 OP(next) == SEOL || OP(next) == EOS))
4215 {
5d9a96ca 4216 st->ln = n; /* why back off? */
1aeab75a
GS
4217 /* ...because $ and \Z can match before *and* after
4218 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4219 We should back off by one in this case. */
4220 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4221 st->ln--;
1aeab75a 4222 }
d8319b27 4223 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4224 {
8fa7f367 4225 UV c = 0;
5d9a96ca 4226 while (n >= st->ln) {
9e137952 4227 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4228 if (do_utf8)
872c91ae 4229 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4230 UTF8_MAXBYTES, 0,
041457d9 4231 uniflags);
1aa99e6b 4232 else
9041c2e3 4233 c = UCHARAT(PL_reginput);
1aa99e6b 4234 }
c277df42 4235 /* If it could work, try it. */
9e137952 4236 if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4237 {
d8319b27 4238 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4239 /*** all unsaved local vars undefined at this point */
d8319b27 4240 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4241 }
4242 /* Couldn't or didn't -- back up. */
4243 n--;
dfe13c55 4244 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4245 }
a0d0e21e
LW
4246 }
4247 }
4633a7c4 4248 sayNO;
c277df42 4249 break;
a0d0e21e 4250 case END:
3b0527fe 4251 if (locinput < reginfo->till) {
a3621e74 4252 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4253 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4254 PL_colors[4],
4255 (long)(locinput - PL_reg_starttry),
3b0527fe 4256 (long)(reginfo->till - PL_reg_starttry),
7821416a
IZ
4257 PL_colors[5]));
4258 sayNO_FINAL; /* Cannot match: too short. */
4259 }
4260 PL_reginput = locinput; /* put where regtry can find it */
4261 sayYES_FINAL; /* Success! */
dad79028
DM
4262
4263 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4264 DEBUG_EXECUTE_r(
4265 PerlIO_printf(Perl_debug_log,
4266 "%*s %ssubpattern success...%s\n",
4267 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
3280af22 4268 PL_reginput = locinput; /* put where regtry can find it */
dad79028
DM
4269 sayYES_FINAL; /* Success! */
4270
4271 case SUSPEND: /* (?>FOO) */
4272 st->u.ifmatch.wanted = 1;
9fe1d20c 4273 PL_reginput = locinput;
9041c2e3 4274 goto do_ifmatch;
dad79028
DM
4275
4276 case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
4277 st->u.ifmatch.wanted = 0;
4278 goto ifmatch_trivial_fail_test;
4279
4280 case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
4281 st->u.ifmatch.wanted = 1;
4282 ifmatch_trivial_fail_test:
a0ed51b3 4283 if (scan->flags) {
52657f30 4284 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4285 if (!s) {
4286 /* trivial fail */
4287 if (st->logical) {
4288 st->logical = 0;
4289 st->sw = 1 - st->u.ifmatch.wanted;
4290 }
4291 else if (st->u.ifmatch.wanted)
4292 sayNO;
4293 next = scan + ARG(scan);
4294 if (next == scan)
4295 next = NULL;
4296 break;
4297 }
efb30f32 4298 PL_reginput = s;
a0ed51b3
LW
4299 }
4300 else
4301 PL_reginput = locinput;
4302
c277df42 4303 do_ifmatch:
dad79028
DM
4304 /* resume to current state on success */
4305 st->u.yes.prev_yes_state = yes_state;
4306 yes_state = st;
4307 PUSH_STATE(newst, resume_IFMATCH);
4308 st = newst;
4309 next = NEXTOPER(NEXTOPER(scan));
4310 break;
4311
c277df42 4312 case LONGJMP:
c277df42
IZ
4313 next = scan + ARG(scan);
4314 if (next == scan)
4315 next = NULL;
a0d0e21e
LW
4316 break;
4317 default:
b900a521 4318 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4319 PTR2UV(scan), OP(scan));
cea2e8a9 4320 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4321 }
95b24440 4322
02db2b7b 4323 reenter:
a0d0e21e 4324 scan = next;
95b24440
DM
4325 continue;
4326 /* NOTREACHED */
4327
4328 /* simulate recursively calling regmatch(), but without actually
4329 * recursing - ie save the current state on the heap rather than on
4330 * the stack, then re-enter the loop. This avoids complex regexes
4331 * blowing the processor stack */
4332
4333 start_recurse:
4334 {
5d9a96ca
DM
4335 /* push new state */
4336 regmatch_state *oldst = st;
4337
4338 depth++;
4339
4340 /* grab the next free state slot */
4341 st++;
86545054 4342 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
4343 st = S_push_slab(aTHX);
4344 PL_regmatch_state = st;
4345
4346 oldst->next = next;
4347 oldst->n = n;
4348 oldst->locinput = locinput;
5d9a96ca
DM
4349
4350 st->cc = oldst->cc;
95b24440
DM
4351 locinput = PL_reginput;
4352 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4353 st->minmod = 0;
4354 st->sw = 0;
4355 st->logical = 0;
4356 st->unwind = 0;
95b24440
DM
4357#ifdef DEBUGGING
4358 PL_regindent++;
4359#endif
4360 }
a0d0e21e 4361 }
a687059c 4362
aa283a38
DM
4363
4364
a0d0e21e
LW
4365 /*
4366 * We get here only if there's trouble -- normally "case END" is
4367 * the terminating point.
4368 */
cea2e8a9 4369 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4370 /*NOTREACHED*/
4633a7c4
LW
4371 sayNO;
4372
7821416a 4373yes_final:
77cb431f
DM
4374
4375 if (yes_state) {
4376 /* we have successfully completed a subexpression, but we must now
4377 * pop to the state marked by yes_state and continue from there */
4378
dad79028 4379 /*XXX tmp for CURLYM*/
c4fd8992
AL
4380 regmatch_slab * const oslab = PL_regmatch_slab;
4381 regmatch_state * const ost = st;
4382 regmatch_state * const oys = yes_state;
dad79028
DM
4383 int odepth = depth;
4384
77cb431f
DM
4385 assert(st != yes_state);
4386 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4387 || yes_state > SLAB_LAST(PL_regmatch_slab))
4388 {
4389 /* not in this slab, pop slab */
4390 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4391 PL_regmatch_slab = PL_regmatch_slab->prev;
4392 st = SLAB_LAST(PL_regmatch_slab);
4393 }
4394 depth -= (st - yes_state);
dad79028 4395 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
77cb431f
DM
4396 st = yes_state;
4397 yes_state = st->u.yes.prev_yes_state;
4398 PL_regmatch_state = st;
4399
4400 switch (st->resume_state) {
4401 case resume_EVAL:
4402 if (st->u.eval.toggleutf)
4403 PL_reg_flags ^= RF_utf8;
4404 ReREFCNT_dec(rex);
4405 rex = st->u.eval.prev_rex;
4406 /* XXXX This is too dramatic a measure... */
4407 PL_reg_maxiter = 0;
4408 /* Restore parens of the caller without popping the
4409 * savestack */
4410 {
c4fd8992 4411 const I32 tmp = PL_savestack_ix;
77cb431f
DM
4412 PL_savestack_ix = st->u.eval.lastcp;
4413 regcppop(rex);
4414 PL_savestack_ix = tmp;
4415 }
4416 PL_reginput = locinput;
4417 /* continue at the node following the (??{...}) */
4418 next = st->next;
4419 goto reenter;
4420
dad79028
DM
4421 case resume_IFMATCH:
4422 if (st->logical) {
4423 st->logical = 0;
4424 st->sw = st->u.ifmatch.wanted;
4425 }
4426 else if (!st->u.ifmatch.wanted)
4427 sayNO;
4428
4429 if (OP(st->scan) == SUSPEND)
4430 locinput = PL_reginput;
4431 else {
4432 locinput = PL_reginput = st->locinput;
4433 nextchr = UCHARAT(locinput);
4434 }
4435 next = st->scan + ARG(st->scan);
4436 if (next == st->scan)
4437 next = NULL;
4438 goto reenter;
4439
4440 /* XXX tmp don't handle yes_state yet */
4441 case resume_CURLYM1:
4442 case resume_CURLYM2:
4443 case resume_CURLYM3:
dad79028
DM
4444 PL_regmatch_slab =oslab;
4445 st = ost;
4446 PL_regmatch_state = st;
4447 depth = odepth;
4448 yes_state = oys;
4449 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
4450 goto yes;
4451
77cb431f
DM
4452 default:
4453 Perl_croak(aTHX_ "unexpected yes reume state");
4454 }
4455 }
4456
a3621e74 4457 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4458 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4459yes:
4460#ifdef DEBUGGING
3280af22 4461 PL_regindent--;
4633a7c4 4462#endif
02db2b7b 4463
95b24440 4464 result = 1;
aa283a38 4465 /* XXX this is duplicate(ish) code to that in the do_no section.
77cb431f
DM
4466 * eventually a yes should just pop the stack back to the current
4467 * yes_state */
aa283a38
DM
4468 if (depth) {
4469 /* restore previous state and re-enter */
4470 POP_STATE;
4471
4472 switch (st->resume_state) {
4473 case resume_TRIE1:
4474 goto resume_point_TRIE1;
4475 case resume_TRIE2:
4476 goto resume_point_TRIE2;
aa283a38
DM
4477 case resume_CURLYX:
4478 goto resume_point_CURLYX;
4479 case resume_WHILEM1:
4480 goto resume_point_WHILEM1;
4481 case resume_WHILEM2:
4482 goto resume_point_WHILEM2;
4483 case resume_WHILEM3:
4484 goto resume_point_WHILEM3;
4485 case resume_WHILEM4:
4486 goto resume_point_WHILEM4;
4487 case resume_WHILEM5:
4488 goto resume_point_WHILEM5;
4489 case resume_WHILEM6:
4490 goto resume_point_WHILEM6;
4491 case resume_CURLYM1:
4492 goto resume_point_CURLYM1;
4493 case resume_CURLYM2:
4494 goto resume_point_CURLYM2;
4495 case resume_CURLYM3:
4496 goto resume_point_CURLYM3;
aa283a38
DM
4497 case resume_PLUS1:
4498 goto resume_point_PLUS1;
4499 case resume_PLUS2:
4500 goto resume_point_PLUS2;
4501 case resume_PLUS3:
4502 goto resume_point_PLUS3;
4503 case resume_PLUS4:
4504 goto resume_point_PLUS4;
77cb431f 4505
dad79028 4506 case resume_IFMATCH:
77cb431f 4507 case resume_EVAL:
aa283a38
DM
4508 default:
4509 Perl_croak(aTHX_ "regexp resume memory corruption");
4510 }
4511 }
4512 goto final_exit;
4633a7c4
LW
4513
4514no:
a3621e74 4515 DEBUG_EXECUTE_r(
7821416a
IZ
4516 PerlIO_printf(Perl_debug_log,
4517 "%*s %sfailed...%s\n",
e4584336 4518 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4519 );
4520 goto do_no;
4521no_final:
4522do_no:
5d9a96ca
DM
4523 if (st->unwind) {
4524 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4525
4526 switch (uw->type) {
4527 case RE_UNWIND_BRANCH:
4528 case RE_UNWIND_BRANCHJ:
4529 {
6136c704 4530 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4531 const I32 lastparen = uwb->lastparen;
9041c2e3 4532
02db2b7b
IZ
4533 REGCP_UNWIND(uwb->lastcp);
4534 for (n = *PL_reglastparen; n > lastparen; n--)
4535 PL_regendp[n] = -1;
4536 *PL_reglastparen = n;
4537 scan = next = uwb->next;
3a2830be 4538 st->minmod = uwb->minmod;
9041c2e3
NIS
4539 if ( !scan ||
4540 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4541 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4542 st->unwind = uwb->prev;
02db2b7b
IZ
4543#ifdef DEBUGGING
4544 PL_regindent--;
4545#endif
4546 goto do_no;
4547 }
4548 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4549 if ((n = (uwb->type == RE_UNWIND_BRANCH
4550 ? NEXT_OFF(next) : ARG(next))))
4551 next += n;
4552 else
4553 next = NULL; /* XXXX Needn't unwinding in this case... */
4554 uwb->next = next;
4555 next = NEXTOPER(scan);
4556 if (uwb->type == RE_UNWIND_BRANCHJ)
4557 next = NEXTOPER(next);
4558 locinput = uwb->locinput;
4559 nextchr = uwb->nextchr;
4560#ifdef DEBUGGING
4561 PL_regindent = uwb->regindent;
4562#endif
4563
4564 goto reenter;
4565 }
5f66b61c 4566 /* NOTREACHED */
02db2b7b
IZ
4567 default:
4568 Perl_croak(aTHX_ "regexp unwind memory corruption");
4569 }
5f66b61c 4570 /* NOTREACHED */
02db2b7b 4571 }
aa283a38 4572
4633a7c4 4573#ifdef DEBUGGING
3280af22 4574 PL_regindent--;
4633a7c4 4575#endif
95b24440 4576 result = 0;
5d9a96ca 4577
aa283a38
DM
4578 if (depth) {
4579 /* there's a previous state to backtrack to */
4580 POP_STATE;
5d9a96ca 4581 switch (st->resume_state) {
95b24440
DM
4582 case resume_TRIE1:
4583 goto resume_point_TRIE1;
4584 case resume_TRIE2:
4585 goto resume_point_TRIE2;
aa283a38
DM
4586 case resume_EVAL:
4587 /* we have failed an (??{...}). Restore state to the outer re
4588 * then re-throw the failure */
4589 if (st->u.eval.toggleutf)
4590 PL_reg_flags ^= RF_utf8;
4591 ReREFCNT_dec(rex);
4592 rex = st->u.eval.prev_rex;
77cb431f 4593 yes_state = st->u.yes.prev_yes_state;
aa283a38
DM
4594
4595 /* XXXX This is too dramatic a measure... */
4596 PL_reg_maxiter = 0;
4597
4598 PL_reginput = locinput;
4599 REGCP_UNWIND(st->u.eval.lastcp);
4600 regcppop(rex);
4601 goto do_no;
4602
95b24440
DM
4603 case resume_CURLYX:
4604 goto resume_point_CURLYX;
4605 case resume_WHILEM1:
4606 goto resume_point_WHILEM1;
4607 case resume_WHILEM2:
4608 goto resume_point_WHILEM2;
4609 case resume_WHILEM3:
4610 goto resume_point_WHILEM3;
4611 case resume_WHILEM4:
4612 goto resume_point_WHILEM4;
4613 case resume_WHILEM5:
4614 goto resume_point_WHILEM5;
4615 case resume_WHILEM6:
4616 goto resume_point_WHILEM6;
4617 case resume_CURLYM1:
4618 goto resume_point_CURLYM1;
4619 case resume_CURLYM2:
4620 goto resume_point_CURLYM2;
4621 case resume_CURLYM3:
4622 goto resume_point_CURLYM3;
95b24440 4623 case resume_IFMATCH:
dad79028
DM
4624 yes_state = st->u.yes.prev_yes_state;
4625 if (st->logical) {
4626 st->logical = 0;
4627 st->sw = !st->u.ifmatch.wanted;
4628 }
4629 else if (st->u.ifmatch.wanted)
4630 sayNO;
4631
4632 assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
4633 locinput = PL_reginput = st->locinput;
4634 nextchr = UCHARAT(locinput);
4635 next = scan + ARG(scan);
4636 if (next == scan)
4637 next = NULL;
4638 goto reenter;
4639
95b24440
DM
4640 case resume_PLUS1:
4641 goto resume_point_PLUS1;
4642 case resume_PLUS2:
4643 goto resume_point_PLUS2;
4644 case resume_PLUS3:
4645 goto resume_point_PLUS3;
4646 case resume_PLUS4:
4647 goto resume_point_PLUS4;
95b24440
DM
4648 default:
4649 Perl_croak(aTHX_ "regexp resume memory corruption");
4650 }
95b24440 4651 }
aa283a38
DM
4652
4653final_exit:
4654
5d9a96ca
DM
4655 /* restore original high-water mark */
4656 PL_regmatch_slab = orig_slab;
4657 PL_regmatch_state = orig_state;
4658
4659 /* free all slabs above current one */
4660 if (orig_slab->next) {
c4fd8992 4661 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
4662 orig_slab->next = NULL;
4663 while (sl) {
c4fd8992 4664 regmatch_slab * const osl = sl;
5d9a96ca 4665 sl = sl->next;
ad65c075 4666 Safefree(osl);
5d9a96ca
DM
4667 }
4668 }
4669
95b24440
DM
4670 return result;
4671
a687059c
LW
4672}
4673
4674/*
4675 - regrepeat - repeatedly match something simple, report how many
4676 */
4677/*
4678 * [This routine now assumes that it will only match on things of length 1.
4679 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4680 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4681 */
76e3520e 4682STATIC I32
32fc9b6a 4683S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4684{
27da23d5 4685 dVAR;
a0d0e21e 4686 register char *scan;
a0d0e21e 4687 register I32 c;
3280af22 4688 register char *loceol = PL_regeol;
a0ed51b3 4689 register I32 hardcount = 0;
53c4c00c 4690 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4691
3280af22 4692 scan = PL_reginput;
faf11cac
HS
4693 if (max == REG_INFTY)
4694 max = I32_MAX;
4695 else if (max < loceol - scan)
7f596f4c 4696 loceol = scan + max;
a0d0e21e 4697 switch (OP(p)) {
22c35a8c 4698 case REG_ANY:
1aa99e6b 4699 if (do_utf8) {
ffc61ed2 4700 loceol = PL_regeol;
1aa99e6b 4701 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4702 scan += UTF8SKIP(scan);
4703 hardcount++;
4704 }
4705 } else {
4706 while (scan < loceol && *scan != '\n')
4707 scan++;
a0ed51b3
LW
4708 }
4709 break;
ffc61ed2 4710 case SANY:
def8e4ea
JH
4711 if (do_utf8) {
4712 loceol = PL_regeol;
a0804c9e 4713 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4714 scan += UTF8SKIP(scan);
4715 hardcount++;
4716 }
4717 }
4718 else
4719 scan = loceol;
a0ed51b3 4720 break;
f33976b4
DB
4721 case CANY:
4722 scan = loceol;
4723 break;
090f7165
JH
4724 case EXACT: /* length of string is 1 */
4725 c = (U8)*STRING(p);
4726 while (scan < loceol && UCHARAT(scan) == c)
4727 scan++;
bbce6d69 4728 break;
4729 case EXACTF: /* length of string is 1 */
cd439c50 4730 c = (U8)*STRING(p);
bbce6d69 4731 while (scan < loceol &&
22c35a8c 4732 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4733 scan++;
4734 break;
4735 case EXACTFL: /* length of string is 1 */
3280af22 4736 PL_reg_flags |= RF_tainted;
cd439c50 4737 c = (U8)*STRING(p);
bbce6d69 4738 while (scan < loceol &&
22c35a8c 4739 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4740 scan++;
4741 break;
4742 case ANYOF:
ffc61ed2
JH
4743 if (do_utf8) {
4744 loceol = PL_regeol;
cfc92286 4745 while (hardcount < max && scan < loceol &&
32fc9b6a 4746 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4747 scan += UTF8SKIP(scan);
4748 hardcount++;
4749 }
4750 } else {
32fc9b6a 4751 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4752 scan++;
4753 }
a0d0e21e
LW
4754 break;
4755 case ALNUM:
1aa99e6b 4756 if (do_utf8) {
ffc61ed2 4757 loceol = PL_regeol;
1a4fad37 4758 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4759 while (hardcount < max && scan < loceol &&
3568d838 4760 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4761 scan += UTF8SKIP(scan);
4762 hardcount++;
4763 }
4764 } else {
4765 while (scan < loceol && isALNUM(*scan))
4766 scan++;
a0ed51b3
LW
4767 }
4768 break;
bbce6d69 4769 case ALNUML:
3280af22 4770 PL_reg_flags |= RF_tainted;
1aa99e6b 4771 if (do_utf8) {
ffc61ed2 4772 loceol = PL_regeol;
1aa99e6b
IH
4773 while (hardcount < max && scan < loceol &&
4774 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4775 scan += UTF8SKIP(scan);
4776 hardcount++;
4777 }
4778 } else {
4779 while (scan < loceol && isALNUM_LC(*scan))
4780 scan++;
a0ed51b3
LW
4781 }
4782 break;
a0d0e21e 4783 case NALNUM:
1aa99e6b 4784 if (do_utf8) {
ffc61ed2 4785 loceol = PL_regeol;
1a4fad37 4786 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4787 while (hardcount < max && scan < loceol &&
3568d838 4788 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4789 scan += UTF8SKIP(scan);
4790 hardcount++;
4791 }
4792 } else {
4793 while (scan < loceol && !isALNUM(*scan))
4794 scan++;
a0ed51b3
LW
4795 }
4796 break;
bbce6d69 4797 case NALNUML:
3280af22 4798 PL_reg_flags |= RF_tainted;
1aa99e6b 4799 if (do_utf8) {
ffc61ed2 4800 loceol = PL_regeol;
1aa99e6b
IH
4801 while (hardcount < max && scan < loceol &&
4802 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4803 scan += UTF8SKIP(scan);
4804 hardcount++;
4805 }
4806 } else {
4807 while (scan < loceol && !isALNUM_LC(*scan))
4808 scan++;
a0ed51b3
LW
4809 }
4810 break;
a0d0e21e 4811 case SPACE:
1aa99e6b 4812 if (do_utf8) {
ffc61ed2 4813 loceol = PL_regeol;
1a4fad37 4814 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4815 while (hardcount < max && scan < loceol &&
3568d838
JH
4816 (*scan == ' ' ||
4817 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4818 scan += UTF8SKIP(scan);
4819 hardcount++;
4820 }
4821 } else {
4822 while (scan < loceol && isSPACE(*scan))
4823 scan++;
a0ed51b3
LW
4824 }
4825 break;
bbce6d69 4826 case SPACEL:
3280af22 4827 PL_reg_flags |= RF_tainted;
1aa99e6b 4828 if (do_utf8) {
ffc61ed2 4829 loceol = PL_regeol;
1aa99e6b 4830 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4831 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4832 scan += UTF8SKIP(scan);
4833 hardcount++;
4834 }
4835 } else {
4836 while (scan < loceol && isSPACE_LC(*scan))
4837 scan++;
a0ed51b3
LW
4838 }
4839 break;
a0d0e21e 4840 case NSPACE:
1aa99e6b 4841 if (do_utf8) {
ffc61ed2 4842 loceol = PL_regeol;
1a4fad37 4843 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4844 while (hardcount < max && scan < loceol &&
3568d838
JH
4845 !(*scan == ' ' ||
4846 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4847 scan += UTF8SKIP(scan);
4848 hardcount++;
4849 }
4850 } else {
4851 while (scan < loceol && !isSPACE(*scan))
4852 scan++;
4853 break;
a0ed51b3 4854 }
bbce6d69 4855 case NSPACEL:
3280af22 4856 PL_reg_flags |= RF_tainted;
1aa99e6b 4857 if (do_utf8) {
ffc61ed2 4858 loceol = PL_regeol;
1aa99e6b 4859 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4860 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4861 scan += UTF8SKIP(scan);
4862 hardcount++;
4863 }
4864 } else {
4865 while (scan < loceol && !isSPACE_LC(*scan))
4866 scan++;
a0ed51b3
LW
4867 }
4868 break;
a0d0e21e 4869 case DIGIT:
1aa99e6b 4870 if (do_utf8) {
ffc61ed2 4871 loceol = PL_regeol;
1a4fad37 4872 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4873 while (hardcount < max && scan < loceol &&
3568d838 4874 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4875 scan += UTF8SKIP(scan);
4876 hardcount++;
4877 }
4878 } else {
4879 while (scan < loceol && isDIGIT(*scan))
4880 scan++;
a0ed51b3
LW
4881 }
4882 break;
a0d0e21e 4883 case NDIGIT:
1aa99e6b 4884 if (do_utf8) {
ffc61ed2 4885 loceol = PL_regeol;
1a4fad37 4886 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4887 while (hardcount < max && scan < loceol &&
3568d838 4888 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4889 scan += UTF8SKIP(scan);
4890 hardcount++;
4891 }
4892 } else {
4893 while (scan < loceol && !isDIGIT(*scan))
4894 scan++;
a0ed51b3
LW
4895 }
4896 break;
a0d0e21e
LW
4897 default: /* Called on something of 0 width. */
4898 break; /* So match right here or not at all. */
4899 }
a687059c 4900
a0ed51b3
LW
4901 if (hardcount)
4902 c = hardcount;
4903 else
4904 c = scan - PL_reginput;
3280af22 4905 PL_reginput = scan;
a687059c 4906
a3621e74 4907 DEBUG_r({
ab74612d 4908 SV *re_debug_flags = NULL;
6136c704 4909 SV * const prop = sv_newmortal();
a3621e74
YO
4910 GET_RE_DEBUG_FLAGS;
4911 DEBUG_EXECUTE_r({
32fc9b6a 4912 regprop(prog, prop, p);
9041c2e3
NIS
4913 PerlIO_printf(Perl_debug_log,
4914 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4915 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4916 });
a3621e74 4917 });
9041c2e3 4918
a0d0e21e 4919 return(c);
a687059c
LW
4920}
4921
c277df42 4922
76234dfb 4923#ifndef PERL_IN_XSUB_RE
c277df42 4924/*
ffc61ed2
JH
4925- regclass_swash - prepare the utf8 swash
4926*/
4927
4928SV *
32fc9b6a 4929Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4930{
97aff369 4931 dVAR;
9e55ce06
JH
4932 SV *sw = NULL;
4933 SV *si = NULL;
4934 SV *alt = NULL;
32fc9b6a 4935 const struct reg_data *data = prog ? prog->data : NULL;
ffc61ed2 4936
4f639d21 4937 if (data && data->count) {
a3b680e6 4938 const U32 n = ARG(node);
ffc61ed2 4939
4f639d21
DM
4940 if (data->what[n] == 's') {
4941 SV * const rv = (SV*)data->data[n];
890ce7af 4942 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4943 SV **const ary = AvARRAY(av);
9e55ce06 4944 SV **a, **b;
9041c2e3 4945
711a919c 4946 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4947 * documentation of these array elements. */
4948
b11f357e 4949 si = *ary;
8f7f7219 4950 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4951 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4952
ffc61ed2
JH
4953 if (a)
4954 sw = *a;
4955 else if (si && doinit) {
4956 sw = swash_init("utf8", "", si, 1, 0);
4957 (void)av_store(av, 1, sw);
4958 }
9e55ce06
JH
4959 if (b)
4960 alt = *b;
ffc61ed2
JH
4961 }
4962 }
4963
9e55ce06
JH
4964 if (listsvp)
4965 *listsvp = si;
4966 if (altsvp)
4967 *altsvp = alt;
ffc61ed2
JH
4968
4969 return sw;
4970}
76234dfb 4971#endif
ffc61ed2
JH
4972
4973/*
ba7b4546 4974 - reginclass - determine if a character falls into a character class
832705d4
JH
4975
4976 The n is the ANYOF regnode, the p is the target string, lenp
4977 is pointer to the maximum length of how far to go in the p
4978 (if the lenp is zero, UTF8SKIP(p) is used),
4979 do_utf8 tells whether the target string is in UTF-8.
4980
bbce6d69 4981 */
4982
76e3520e 4983STATIC bool
32fc9b6a 4984S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4985{
27da23d5 4986 dVAR;
a3b680e6 4987 const char flags = ANYOF_FLAGS(n);
bbce6d69 4988 bool match = FALSE;
cc07378b 4989 UV c = *p;
ae9ddab8 4990 STRLEN len = 0;
9e55ce06 4991 STRLEN plen;
1aa99e6b 4992
19f67299
TS
4993 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4994 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4995 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4996 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4997 if (len == (STRLEN)-1)
4998 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4999 }
bbce6d69 5000
0f0076b4 5001 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5002 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5003 if (lenp)
5004 *lenp = 0;
ffc61ed2 5005 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5006 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5007 match = TRUE;
bbce6d69 5008 }
3568d838 5009 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5010 match = TRUE;
ffc61ed2 5011 if (!match) {
9e55ce06 5012 AV *av;
32fc9b6a 5013 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5014
5015 if (sw) {
3568d838 5016 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5017 match = TRUE;
5018 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5019 if (!match && lenp && av) {
5020 I32 i;
9e55ce06 5021 for (i = 0; i <= av_len(av); i++) {
890ce7af 5022 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5023 STRLEN len;
890ce7af 5024 const char * const s = SvPV_const(sv, len);
9e55ce06 5025
061b10df 5026 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5027 *lenp = len;
5028 match = TRUE;
5029 break;
5030 }
5031 }
5032 }
5033 if (!match) {
89ebb4a3 5034 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5035 STRLEN tmplen;
5036
9e55ce06
JH
5037 to_utf8_fold(p, tmpbuf, &tmplen);
5038 if (swash_fetch(sw, tmpbuf, do_utf8))
5039 match = TRUE;
5040 }
ffc61ed2
JH
5041 }
5042 }
bbce6d69 5043 }
9e55ce06 5044 if (match && lenp && *lenp == 0)
0f0076b4 5045 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5046 }
1aa99e6b 5047 if (!match && c < 256) {
ffc61ed2
JH
5048 if (ANYOF_BITMAP_TEST(n, c))
5049 match = TRUE;
5050 else if (flags & ANYOF_FOLD) {
eb160463 5051 U8 f;
a0ed51b3 5052
ffc61ed2
JH
5053 if (flags & ANYOF_LOCALE) {
5054 PL_reg_flags |= RF_tainted;
5055 f = PL_fold_locale[c];
5056 }
5057 else
5058 f = PL_fold[c];
5059 if (f != c && ANYOF_BITMAP_TEST(n, f))
5060 match = TRUE;
5061 }
5062
5063 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5064 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5065 if (
5066 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5067 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5068 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5069 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5070 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5071 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5072 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5073 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5074 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5075 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5076 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5077 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5078 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5079 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5080 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5081 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5082 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5083 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5084 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5085 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5086 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5087 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5088 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5089 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5090 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5091 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5092 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5093 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5094 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5095 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5096 ) /* How's that for a conditional? */
5097 {
5098 match = TRUE;
5099 }
a0ed51b3 5100 }
a0ed51b3
LW
5101 }
5102
a0ed51b3
LW
5103 return (flags & ANYOF_INVERT) ? !match : match;
5104}
161b471a 5105
dfe13c55 5106STATIC U8 *
0ce71af7 5107S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5108{
97aff369 5109 dVAR;
a0ed51b3 5110 if (off >= 0) {
1aa99e6b 5111 while (off-- && s < lim) {
ffc61ed2 5112 /* XXX could check well-formedness here */
a0ed51b3 5113 s += UTF8SKIP(s);
ffc61ed2 5114 }
a0ed51b3
LW
5115 }
5116 else {
5117 while (off++) {
1aa99e6b 5118 if (s > lim) {
a0ed51b3 5119 s--;
ffc61ed2 5120 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5121 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5122 s--;
ffc61ed2
JH
5123 }
5124 /* XXX could check well-formedness here */
a0ed51b3
LW
5125 }
5126 }
5127 }
5128 return s;
5129}
161b471a 5130
dfe13c55 5131STATIC U8 *
0ce71af7 5132S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5133{
97aff369 5134 dVAR;
a0ed51b3 5135 if (off >= 0) {
1aa99e6b 5136 while (off-- && s < lim) {
ffc61ed2 5137 /* XXX could check well-formedness here */
a0ed51b3 5138 s += UTF8SKIP(s);
ffc61ed2 5139 }
a0ed51b3
LW
5140 if (off >= 0)
5141 return 0;
5142 }
5143 else {
5144 while (off++) {
1aa99e6b 5145 if (s > lim) {
a0ed51b3 5146 s--;
ffc61ed2 5147 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5148 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5149 s--;
ffc61ed2
JH
5150 }
5151 /* XXX could check well-formedness here */
a0ed51b3
LW
5152 }
5153 else
5154 break;
5155 }
5156 if (off <= 0)
5157 return 0;
5158 }
5159 return s;
5160}
51371543 5161
51371543 5162static void
acfe0abc 5163restore_pos(pTHX_ void *arg)
51371543 5164{
97aff369 5165 dVAR;
097eb12c 5166 regexp * const rex = (regexp *)arg;
51371543
GS
5167 if (PL_reg_eval_set) {
5168 if (PL_reg_oldsaved) {
4f639d21
DM
5169 rex->subbeg = PL_reg_oldsaved;
5170 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5171#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5172 rex->saved_copy = PL_nrs;
ed252734 5173#endif
4f639d21 5174 RX_MATCH_COPIED_on(rex);
51371543
GS
5175 }
5176 PL_reg_magic->mg_len = PL_reg_oldpos;
5177 PL_reg_eval_set = 0;
5178 PL_curpm = PL_reg_oldcurpm;
5179 }
5180}
33b8afdf
JH
5181
5182STATIC void
5183S_to_utf8_substr(pTHX_ register regexp *prog)
5184{
33b8afdf 5185 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5186 SV* const sv = newSVsv(prog->float_substr);
5187 prog->float_utf8 = sv;
33b8afdf
JH
5188 sv_utf8_upgrade(sv);
5189 if (SvTAIL(prog->float_substr))
5190 SvTAIL_on(sv);
5191 if (prog->float_substr == prog->check_substr)
5192 prog->check_utf8 = sv;
5193 }
5194 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5195 SV* const sv = newSVsv(prog->anchored_substr);
5196 prog->anchored_utf8 = sv;
33b8afdf
JH
5197 sv_utf8_upgrade(sv);
5198 if (SvTAIL(prog->anchored_substr))
5199 SvTAIL_on(sv);
5200 if (prog->anchored_substr == prog->check_substr)
5201 prog->check_utf8 = sv;
5202 }
5203}
5204
5205STATIC void
5206S_to_byte_substr(pTHX_ register regexp *prog)
5207{
97aff369 5208 dVAR;
33b8afdf 5209 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5210 SV* sv = newSVsv(prog->float_utf8);
5211 prog->float_substr = sv;
33b8afdf
JH
5212 if (sv_utf8_downgrade(sv, TRUE)) {
5213 if (SvTAIL(prog->float_utf8))
5214 SvTAIL_on(sv);
5215 } else {
5216 SvREFCNT_dec(sv);
5217 prog->float_substr = sv = &PL_sv_undef;
5218 }
5219 if (prog->float_utf8 == prog->check_utf8)
5220 prog->check_substr = sv;
5221 }
5222 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5223 SV* sv = newSVsv(prog->anchored_utf8);
5224 prog->anchored_substr = sv;
33b8afdf
JH
5225 if (sv_utf8_downgrade(sv, TRUE)) {
5226 if (SvTAIL(prog->anchored_utf8))
5227 SvTAIL_on(sv);
5228 } else {
5229 SvREFCNT_dec(sv);
5230 prog->anchored_substr = sv = &PL_sv_undef;
5231 }
5232 if (prog->anchored_utf8 == prog->check_utf8)
5233 prog->check_substr = sv;
5234 }
5235}
66610fdd
RGS
5236
5237/*
5238 * Local variables:
5239 * c-indentation-style: bsd
5240 * c-basic-offset: 4
5241 * indent-tabs-mode: t
5242 * End:
5243 *
37442d52
RGS
5244 * ex: set ts=8 sts=4 sw=4 noet:
5245 */