This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] More regex optimisations and debug enhancements (including Andys stuff...
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e 33#ifdef PERL_EXT_RE_BUILD
54df2634 34#include "re_top.h"
9041c2e3 35#endif
56953603 36
a687059c 37/*
e50aee73 38 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
39 *
40 * Copyright (c) 1986 by University of Toronto.
41 * Written by Henry Spencer. Not derived from licensed software.
42 *
43 * Permission is granted to anyone to use this software for any
44 * purpose on any computer system, and to redistribute it freely,
45 * subject to the following restrictions:
46 *
47 * 1. The author is not responsible for the consequences of use of
48 * this software, no matter how awful, even if they arise
49 * from defects in it.
50 *
51 * 2. The origin of this software must not be misrepresented, either
52 * by explicit claim or by omission.
53 *
54 * 3. Altered versions must be plainly marked as such, and must not
55 * be misrepresented as being the original software.
56 *
57 **** Alterations to Henry's code are...
58 ****
4bb101f2 59 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 60 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 61 ****
9ef589d8
LW
62 **** You may distribute under the terms of either the GNU General Public
63 **** License or the Artistic License, as specified in the README file.
a687059c
LW
64 *
65 * Beware that some of this code is subtly aware of the way operator
66 * precedence is structured in regular expressions. Serious changes in
67 * regular-expression syntax might require a total rethink.
68 */
69#include "EXTERN.h"
864dbfa3 70#define PERL_IN_REGEXEC_C
a687059c 71#include "perl.h"
0f5d15d6 72
54df2634
NC
73#ifdef PERL_IN_XSUB_RE
74# include "re_comp.h"
75#else
76# include "regcomp.h"
77#endif
a687059c 78
c277df42
IZ
79#define RF_tainted 1 /* tainted information used? */
80#define RF_warned 2 /* warned about big count? */
ce862d02 81#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
82#define RF_utf8 8 /* String contains multibyte chars? */
83
eb160463 84#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
85
86#define RS_init 1 /* eval environment created */
87#define RS_set 2 /* replsv value is set */
c277df42 88
a687059c
LW
89#ifndef STATIC
90#define STATIC static
91#endif
92
32fc9b6a 93#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
33b8afdf 99#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 100#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 101
3dab1dad
YO
102#define HOPc(pos,off) \
103 (char *)(PL_reg_match_utf8 \
52657f30 104 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
3dab1dad
YO
105 : (U8*)(pos + off))
106#define HOPBACKc(pos, off) \
107 (char*)(PL_reg_match_utf8\
b9ea4ed6 108 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
efb30f32 109 : (pos - off >= PL_bostr) \
3dab1dad
YO
110 ? pos - off \
111 : NULL)
efb30f32 112
53c4c00c 113#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 114#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 115
1a4fad37
AL
116#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
117 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
118#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
119#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
120#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
121#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 122
3dab1dad
YO
123/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
124
5f80c4cf 125/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
126#define JUMPABLE(rn) ( \
127 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
128 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
129 OP(rn) == PLUS || OP(rn) == MINMOD || \
3dab1dad 130 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
131)
132
cca55fe3 133#define HAS_TEXT(rn) ( \
3dab1dad 134 PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
cca55fe3 135)
e2d8ce26 136
a84d97b6
HS
137/*
138 Search for mandatory following text node; for lookahead, the text must
139 follow but for lookbehind (rn->flags != 0) we skip to the next step.
140*/
cca55fe3 141#define FIND_NEXT_IMPT(rn) STMT_START { \
3dab1dad
YO
142 while (JUMPABLE(rn)) { \
143 const OPCODE type = OP(rn); \
144 if (type == SUSPEND || PL_regkind[type] == CURLY) \
e2d8ce26 145 rn = NEXTOPER(NEXTOPER(rn)); \
3dab1dad 146 else if (type == PLUS) \
cca55fe3 147 rn = NEXTOPER(rn); \
3dab1dad 148 else if (type == IFMATCH) \
a84d97b6 149 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
3dab1dad 151 } \
5f80c4cf 152} STMT_END
74750237 153
acfe0abc 154static void restore_pos(pTHX_ void *arg);
51371543 155
76e3520e 156STATIC CHECKPOINT
cea2e8a9 157S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 158{
97aff369 159 dVAR;
a3b680e6 160 const int retval = PL_savestack_ix;
b1ce53c5 161#define REGCP_PAREN_ELEMS 4
a3b680e6 162 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
163 int p;
164
e49a9654
IH
165 if (paren_elems_to_push < 0)
166 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
167
a01268b5 168#define REGCP_OTHER_ELEMS 6
4b3c1a47 169 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 170 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 171/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
172 SSPUSHINT(PL_regendp[p]);
173 SSPUSHINT(PL_regstartp[p]);
3280af22 174 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
175 SSPUSHINT(p);
176 }
b1ce53c5 177/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
178 SSPUSHINT(PL_regsize);
179 SSPUSHINT(*PL_reglastparen);
a01268b5 180 SSPUSHINT(*PL_reglastcloseparen);
3280af22 181 SSPUSHPTR(PL_reginput);
41123dfd
JH
182#define REGCP_FRAME_ELEMS 2
183/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
184 * are needed for the regexp context stack bookkeeping. */
185 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 186 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 187
a0d0e21e
LW
188 return retval;
189}
190
c277df42 191/* These are needed since we do not localize EVAL nodes: */
a3621e74 192# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 193 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 194 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 195
a3621e74 196# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 197 PerlIO_printf(Perl_debug_log, \
faccc32b 198 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 199 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 200
76e3520e 201STATIC char *
097eb12c 202S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 203{
97aff369 204 dVAR;
b1ce53c5 205 I32 i;
a0d0e21e 206 char *input;
b1ce53c5 207
a3621e74
YO
208 GET_RE_DEBUG_FLAGS_DECL;
209
b1ce53c5 210 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 211 i = SSPOPINT;
b1ce53c5
JH
212 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
213 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 214 input = (char *) SSPOPPTR;
a01268b5 215 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
216 *PL_reglastparen = SSPOPINT;
217 PL_regsize = SSPOPINT;
b1ce53c5
JH
218
219 /* Now restore the parentheses context. */
41123dfd
JH
220 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
221 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 222 I32 tmps;
097eb12c 223 U32 paren = (U32)SSPOPINT;
3280af22 224 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
225 PL_regstartp[paren] = SSPOPINT;
226 tmps = SSPOPINT;
3280af22
NIS
227 if (paren <= *PL_reglastparen)
228 PL_regendp[paren] = tmps;
a3621e74 229 DEBUG_EXECUTE_r(
c3464db5 230 PerlIO_printf(Perl_debug_log,
b900a521 231 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 232 (UV)paren, (IV)PL_regstartp[paren],
b900a521 233 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 234 (IV)PL_regendp[paren],
3280af22 235 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 236 );
a0d0e21e 237 }
a3621e74 238 DEBUG_EXECUTE_r(
bb7a0f54 239 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 240 PerlIO_printf(Perl_debug_log,
faccc32b 241 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 242 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
243 }
244 );
daf18116 245#if 1
dafc8851
JH
246 /* It would seem that the similar code in regtry()
247 * already takes care of this, and in fact it is in
248 * a better location to since this code can #if 0-ed out
249 * but the code in regtry() is needed or otherwise tests
250 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
251 * (as of patchlevel 7877) will fail. Then again,
252 * this code seems to be necessary or otherwise
253 * building DynaLoader will fail:
254 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
255 * --jhi */
bb7a0f54 256 for (i = *PL_reglastparen + 1; (U32)i <= rex->nparens; i++) {
097eb12c
AL
257 if (i > PL_regsize)
258 PL_regstartp[i] = -1;
259 PL_regendp[i] = -1;
a0d0e21e 260 }
dafc8851 261#endif
a0d0e21e
LW
262 return input;
263}
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
95b24440 267#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
95b24440
DM
276 REGMATCH(next, where); \
277 if (result) \
29d1e993
HS
278 sayYES; \
279 if (paren && n) \
280 PL_regendp[paren] = -1; \
281}
282
283
a687059c 284/*
e50aee73 285 * pregexec and friends
a687059c
LW
286 */
287
76234dfb 288#ifndef PERL_IN_XSUB_RE
a687059c 289/*
c277df42 290 - pregexec - match a regexp against a string
a687059c 291 */
c277df42 292I32
864dbfa3 293Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 294 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
295/* strend: pointer to null at end of string */
296/* strbeg: real beginning of string */
297/* minend: end of match must be >=minend after stringarg. */
298/* nosave: For optimizations. */
299{
300 return
9041c2e3 301 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
302 nosave ? 0 : REXEC_COPY_STR);
303}
76234dfb 304#endif
22e551b9 305
9041c2e3 306/*
cad2e5aa
JH
307 * Need to implement the following flags for reg_anch:
308 *
309 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
310 * USE_INTUIT_ML
311 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
312 * INTUIT_AUTORITATIVE_ML
313 * INTUIT_ONCE_NOML - Intuit can match in one location only.
314 * INTUIT_ONCE_ML
315 *
316 * Another flag for this function: SECOND_TIME (so that float substrs
317 * with giant delta may be not rechecked).
318 */
319
320/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
321
3f7c398e 322/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
323 Otherwise, only SvCUR(sv) is used to get strbeg. */
324
325/* XXXX We assume that strpos is strbeg unless sv. */
326
6eb5f6b9
JH
327/* XXXX Some places assume that there is a fixed substring.
328 An update may be needed if optimizer marks as "INTUITable"
329 RExen without fixed substrings. Similarly, it is assumed that
330 lengths of all the strings are no more than minlen, thus they
331 cannot come from lookahead.
332 (Or minlen should take into account lookahead.) */
333
2c2d71f5
JH
334/* A failure to find a constant substring means that there is no need to make
335 an expensive call to REx engine, thus we celebrate a failure. Similarly,
336 finding a substring too deep into the string means that less calls to
30944b6d
IZ
337 regtry() should be needed.
338
339 REx compiler's optimizer found 4 possible hints:
340 a) Anchored substring;
341 b) Fixed substring;
342 c) Whether we are anchored (beginning-of-line or \G);
343 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 344 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
345 string which does not contradict any of them.
346 */
2c2d71f5 347
6eb5f6b9
JH
348/* Most of decisions we do here should have been done at compile time.
349 The nodes of the REx which we used for the search should have been
350 deleted from the finite automaton. */
351
cad2e5aa
JH
352char *
353Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
354 char *strend, U32 flags, re_scream_pos_data *data)
355{
97aff369 356 dVAR;
b7953727 357 register I32 start_shift = 0;
cad2e5aa 358 /* Should be nonnegative! */
b7953727 359 register I32 end_shift = 0;
2c2d71f5
JH
360 register char *s;
361 register SV *check;
a1933d95 362 char *strbeg;
cad2e5aa 363 char *t;
a3b680e6 364 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 365 I32 ml_anch;
bd61b366
SS
366 register char *other_last = NULL; /* other substr checked before this */
367 char *check_at = NULL; /* check substr found at this pos */
1df70142 368 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 369#ifdef DEBUGGING
890ce7af
AL
370 const char * const i_strpos = strpos;
371 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 372#endif
a3621e74
YO
373
374 GET_RE_DEBUG_FLAGS_DECL;
375
a30b2f1f 376 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 377
b8d68ded 378 if (prog->reganch & ROPT_UTF8) {
a3621e74 379 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
380 "UTF-8 regex...\n"));
381 PL_reg_flags |= RF_utf8;
382 }
383
a3621e74 384 DEBUG_EXECUTE_r({
1df70142 385 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
386 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
387 strpos;
1df70142 388 const int len = PL_reg_match_utf8 ?
bb7a0f54 389 (int)strlen(s) : strend - strpos;
2a782b5b
JH
390 if (!PL_colorset)
391 reginitcolors();
b8d68ded 392 if (PL_reg_match_utf8)
a3621e74 393 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 394 "UTF-8 target...\n"));
2a782b5b 395 PerlIO_printf(Perl_debug_log,
a0288114 396 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 397 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
398 prog->precomp,
399 PL_colors[1],
400 (strlen(prog->precomp) > 60 ? "..." : ""),
401 PL_colors[0],
402 (int)(len > 60 ? 60 : len),
403 s, PL_colors[1],
404 (len > 60 ? "..." : "")
405 );
406 });
cad2e5aa 407
c344f387
JH
408 /* CHR_DIST() would be more correct here but it makes things slow. */
409 if (prog->minlen > strend - strpos) {
a3621e74 410 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 411 "String too short... [re_intuit_start]\n"));
cad2e5aa 412 goto fail;
2c2d71f5 413 }
a1933d95 414 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 415 PL_regeol = strend;
33b8afdf
JH
416 if (do_utf8) {
417 if (!prog->check_utf8 && prog->check_substr)
418 to_utf8_substr(prog);
419 check = prog->check_utf8;
420 } else {
421 if (!prog->check_substr && prog->check_utf8)
422 to_byte_substr(prog);
423 check = prog->check_substr;
424 }
425 if (check == &PL_sv_undef) {
a3621e74 426 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
427 "Non-utf string cannot match utf check string\n"));
428 goto fail;
429 }
2c2d71f5 430 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
431 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
432 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 433 && !multiline ) ); /* Check after \n? */
cad2e5aa 434
7e25d62c
JH
435 if (!ml_anch) {
436 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
437 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 438 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
439 && sv && !SvROK(sv)
440 && (strpos != strbeg)) {
a3621e74 441 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
442 goto fail;
443 }
444 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 445 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 446 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
447 I32 slen;
448
1aa99e6b 449 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
450 if (SvTAIL(check)) {
451 slen = SvCUR(check); /* >= 1 */
cad2e5aa 452
9041c2e3 453 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 454 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 455 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 456 goto fail_finish;
cad2e5aa
JH
457 }
458 /* Now should match s[0..slen-2] */
459 slen--;
3f7c398e 460 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 461 || (slen > 1
3f7c398e 462 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 463 report_neq:
a3621e74 464 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
465 goto fail_finish;
466 }
cad2e5aa 467 }
3f7c398e 468 else if (*SvPVX_const(check) != *s
653099ff 469 || ((slen = SvCUR(check)) > 1
3f7c398e 470 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 471 goto report_neq;
c315bfe8 472 check_at = s;
2c2d71f5 473 goto success_at_start;
7e25d62c 474 }
cad2e5aa 475 }
2c2d71f5 476 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 477 s = strpos;
2c2d71f5 478 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 479 end_shift = prog->minlen - start_shift -
653099ff 480 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 481 if (!ml_anch) {
a3b680e6 482 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 483 - (SvTAIL(check) != 0);
a3b680e6 484 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
485
486 if (end_shift < eshift)
487 end_shift = eshift;
488 }
cad2e5aa 489 }
2c2d71f5 490 else { /* Can match at random position */
cad2e5aa
JH
491 ml_anch = 0;
492 s = strpos;
2c2d71f5
JH
493 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
494 /* Should be nonnegative! */
495 end_shift = prog->minlen - start_shift -
653099ff 496 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
497 }
498
2c2d71f5 499#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 500 if (end_shift < 0)
6bbae5e6 501 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
502#endif
503
2c2d71f5
JH
504 restart:
505 /* Find a possible match in the region s..strend by looking for
506 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 507 if (flags & REXEC_SCREAM) {
cad2e5aa 508 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 509 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 510
2c2d71f5
JH
511 if (PL_screamfirst[BmRARE(check)] >= 0
512 || ( BmRARE(check) == '\n'
513 && (BmPREVIOUS(check) == SvCUR(check) - 1)
514 && SvTAIL(check) ))
9041c2e3 515 s = screaminstr(sv, check,
2c2d71f5 516 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 517 else
2c2d71f5 518 goto fail_finish;
4addbd3b
HS
519 /* we may be pointing at the wrong string */
520 if (s && RX_MATCH_COPIED(prog))
3f7c398e 521 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
522 if (data)
523 *data->scream_olds = s;
524 }
f33976b4 525 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
526 s = fbm_instr((U8*)(s + start_shift),
527 (U8*)(strend - end_shift),
7fba1cd6 528 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 529 else
1aa99e6b
IH
530 s = fbm_instr(HOP3(s, start_shift, strend),
531 HOP3(strend, -end_shift, strbeg),
7fba1cd6 532 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
533
534 /* Update the count-of-usability, remove useless subpatterns,
535 unshift s. */
2c2d71f5 536
a0288114 537 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 538 (s ? "Found" : "Did not find"),
33b8afdf 539 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 540 PL_colors[0],
7b0972df 541 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 542 SvPVX_const(check),
2c2d71f5
JH
543 PL_colors[1], (SvTAIL(check) ? "$" : ""),
544 (s ? " at offset " : "...\n") ) );
545
546 if (!s)
547 goto fail_finish;
548
6eb5f6b9
JH
549 check_at = s;
550
2c2d71f5 551 /* Finish the diagnostic message */
a3621e74 552 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
553
554 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
555 Start with the other substr.
556 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 557 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
558 *always* match. Probably should be marked during compile...
559 Probably it is right to do no SCREAM here...
560 */
561
33b8afdf 562 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 563 /* Take into account the "other" substring. */
2c2d71f5
JH
564 /* XXXX May be hopelessly wrong for UTF... */
565 if (!other_last)
6eb5f6b9 566 other_last = strpos;
33b8afdf 567 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
568 do_other_anchored:
569 {
890ce7af
AL
570 char * const last = HOP3c(s, -start_shift, strbeg);
571 char *last1, *last2;
2c2d71f5 572 char *s1 = s;
33b8afdf 573 SV* must;
2c2d71f5 574
2c2d71f5
JH
575 t = s - prog->check_offset_max;
576 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 577 && (!do_utf8
0ce71af7 578 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 579 && t > strpos)))
6f207bd3 580 NOOP;
2c2d71f5
JH
581 else
582 t = strpos;
1aa99e6b 583 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
584 if (t < other_last) /* These positions already checked */
585 t = other_last;
1aa99e6b 586 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
587 if (last < last1)
588 last1 = last;
589 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
590 /* On end-of-str: see comment below. */
33b8afdf
JH
591 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
592 if (must == &PL_sv_undef) {
593 s = (char*)NULL;
a3621e74 594 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
595 }
596 else
597 s = fbm_instr(
598 (unsigned char*)t,
599 HOP3(HOP3(last1, prog->anchored_offset, strend)
600 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
601 must,
7fba1cd6 602 multiline ? FBMrf_MULTILINE : 0
33b8afdf 603 );
a3621e74 604 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 605 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
606 (s ? "Found" : "Contradicts"),
607 PL_colors[0],
33b8afdf
JH
608 (int)(SvCUR(must)
609 - (SvTAIL(must)!=0)),
3f7c398e 610 SvPVX_const(must),
33b8afdf 611 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
612 if (!s) {
613 if (last1 >= last2) {
a3621e74 614 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
615 ", giving up...\n"));
616 goto fail_finish;
617 }
a3621e74 618 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 619 ", trying floating at offset %ld...\n",
1aa99e6b
IH
620 (long)(HOP3c(s1, 1, strend) - i_strpos)));
621 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
622 s = HOP3c(last, 1, strend);
2c2d71f5
JH
623 goto restart;
624 }
625 else {
a3621e74 626 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 627 (long)(s - i_strpos)));
1aa99e6b
IH
628 t = HOP3c(s, -prog->anchored_offset, strbeg);
629 other_last = HOP3c(s, 1, strend);
30944b6d 630 s = s1;
2c2d71f5
JH
631 if (t == strpos)
632 goto try_at_start;
2c2d71f5
JH
633 goto try_at_offset;
634 }
30944b6d 635 }
2c2d71f5
JH
636 }
637 else { /* Take into account the floating substring. */
33b8afdf
JH
638 char *last, *last1;
639 char *s1 = s;
640 SV* must;
641
642 t = HOP3c(s, -start_shift, strbeg);
643 last1 = last =
644 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
645 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
646 last = HOP3c(t, prog->float_max_offset, strend);
647 s = HOP3c(t, prog->float_min_offset, strend);
648 if (s < other_last)
649 s = other_last;
2c2d71f5 650 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
651 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
652 /* fbm_instr() takes into account exact value of end-of-str
653 if the check is SvTAIL(ed). Since false positives are OK,
654 and end-of-str is not later than strend we are OK. */
655 if (must == &PL_sv_undef) {
656 s = (char*)NULL;
a3621e74 657 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
658 }
659 else
2c2d71f5 660 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
661 (unsigned char*)last + SvCUR(must)
662 - (SvTAIL(must)!=0),
7fba1cd6 663 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 664 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
665 (s ? "Found" : "Contradicts"),
666 PL_colors[0],
667 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 668 SvPVX_const(must),
33b8afdf
JH
669 PL_colors[1], (SvTAIL(must) ? "$" : "")));
670 if (!s) {
671 if (last1 == last) {
a3621e74 672 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
673 ", giving up...\n"));
674 goto fail_finish;
2c2d71f5 675 }
a3621e74 676 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
677 ", trying anchored starting at offset %ld...\n",
678 (long)(s1 + 1 - i_strpos)));
679 other_last = last;
680 s = HOP3c(t, 1, strend);
681 goto restart;
682 }
683 else {
a3621e74 684 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
685 (long)(s - i_strpos)));
686 other_last = s; /* Fix this later. --Hugo */
687 s = s1;
688 if (t == strpos)
689 goto try_at_start;
690 goto try_at_offset;
691 }
2c2d71f5 692 }
cad2e5aa 693 }
2c2d71f5
JH
694
695 t = s - prog->check_offset_max;
2c2d71f5 696 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 697 && (!do_utf8
0ce71af7 698 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
1aa99e6b 699 && t > strpos))) {
2c2d71f5
JH
700 /* Fixed substring is found far enough so that the match
701 cannot start at strpos. */
702 try_at_offset:
cad2e5aa 703 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
704 /* Eventually fbm_*() should handle this, but often
705 anchored_offset is not 0, so this check will not be wasted. */
706 /* XXXX In the code below we prefer to look for "^" even in
707 presence of anchored substrings. And we search even
708 beyond the found float position. These pessimizations
709 are historical artefacts only. */
710 find_anchor:
2c2d71f5 711 while (t < strend - prog->minlen) {
cad2e5aa 712 if (*t == '\n') {
4ee3650e 713 if (t < check_at - prog->check_offset_min) {
33b8afdf 714 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
715 /* Since we moved from the found position,
716 we definitely contradict the found anchored
30944b6d
IZ
717 substr. Due to the above check we do not
718 contradict "check" substr.
719 Thus we can arrive here only if check substr
720 is float. Redo checking for "other"=="fixed".
721 */
9041c2e3 722 strpos = t + 1;
a3621e74 723 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 724 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
725 goto do_other_anchored;
726 }
4ee3650e
GS
727 /* We don't contradict the found floating substring. */
728 /* XXXX Why not check for STCLASS? */
cad2e5aa 729 s = t + 1;
a3621e74 730 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 731 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
732 goto set_useful;
733 }
4ee3650e
GS
734 /* Position contradicts check-string */
735 /* XXXX probably better to look for check-string
736 than for "\n", so one should lower the limit for t? */
a3621e74 737 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 738 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 739 other_last = strpos = s = t + 1;
cad2e5aa
JH
740 goto restart;
741 }
742 t++;
743 }
a3621e74 744 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 745 PL_colors[0], PL_colors[1]));
2c2d71f5 746 goto fail_finish;
cad2e5aa 747 }
f5952150 748 else {
a3621e74 749 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 750 PL_colors[0], PL_colors[1]));
f5952150 751 }
cad2e5aa
JH
752 s = t;
753 set_useful:
33b8afdf 754 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
755 }
756 else {
f5952150 757 /* The found string does not prohibit matching at strpos,
2c2d71f5 758 - no optimization of calling REx engine can be performed,
f5952150
GS
759 unless it was an MBOL and we are not after MBOL,
760 or a future STCLASS check will fail this. */
2c2d71f5
JH
761 try_at_start:
762 /* Even in this situation we may use MBOL flag if strpos is offset
763 wrt the start of the string. */
05b4157f 764 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 765 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
766 /* May be due to an implicit anchor of m{.*foo} */
767 && !(prog->reganch & ROPT_IMPLICIT))
768 {
cad2e5aa
JH
769 t = strpos;
770 goto find_anchor;
771 }
a3621e74 772 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 773 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 774 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 775 );
2c2d71f5 776 success_at_start:
30944b6d 777 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
778 && (do_utf8 ? (
779 prog->check_utf8 /* Could be deleted already */
780 && --BmUSEFUL(prog->check_utf8) < 0
781 && (prog->check_utf8 == prog->float_utf8)
782 ) : (
783 prog->check_substr /* Could be deleted already */
784 && --BmUSEFUL(prog->check_substr) < 0
785 && (prog->check_substr == prog->float_substr)
786 )))
66e933ab 787 {
cad2e5aa 788 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 789 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
790 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
791 if (do_utf8 ? prog->check_substr : prog->check_utf8)
792 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
793 prog->check_substr = prog->check_utf8 = NULL; /* disable */
794 prog->float_substr = prog->float_utf8 = NULL; /* clear */
795 check = NULL; /* abort */
cad2e5aa 796 s = strpos;
3cf5c195
IZ
797 /* XXXX This is a remnant of the old implementation. It
798 looks wasteful, since now INTUIT can use many
6eb5f6b9 799 other heuristics. */
cad2e5aa
JH
800 prog->reganch &= ~RE_USE_INTUIT;
801 }
802 else
803 s = strpos;
804 }
805
6eb5f6b9
JH
806 /* Last resort... */
807 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
808 if (prog->regstclass) {
809 /* minlen == 0 is possible if regstclass is \b or \B,
810 and the fixed substr is ''$.
811 Since minlen is already taken into account, s+1 is before strend;
812 accidentally, minlen >= 1 guaranties no false positives at s + 1
813 even for \b or \B. But (minlen? 1 : 0) below assumes that
814 regstclass does not come from lookahead... */
815 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
816 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 817 const U8* const str = (U8*)STRING(prog->regstclass);
3dab1dad 818 const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
1aa99e6b 819 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 820 : 1);
a3b680e6 821 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 822 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 823 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
824 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
825 cl_l, strend)
826 : strend);
6eb5f6b9
JH
827
828 t = s;
3b0527fe 829 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
830 if (!s) {
831#ifdef DEBUGGING
cbbf8932 832 const char *what = NULL;
6eb5f6b9
JH
833#endif
834 if (endpos == strend) {
a3621e74 835 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
836 "Could not match STCLASS...\n") );
837 goto fail;
838 }
a3621e74 839 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 840 "This position contradicts STCLASS...\n") );
653099ff
GS
841 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
842 goto fail;
6eb5f6b9 843 /* Contradict one of substrings */
33b8afdf
JH
844 if (prog->anchored_substr || prog->anchored_utf8) {
845 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 846 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 847 hop_and_restart:
1aa99e6b 848 s = HOP3c(t, 1, strend);
66e933ab
GS
849 if (s + start_shift + end_shift > strend) {
850 /* XXXX Should be taken into account earlier? */
a3621e74 851 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
852 "Could not match STCLASS...\n") );
853 goto fail;
854 }
5e39e1e5
HS
855 if (!check)
856 goto giveup;
a3621e74 857 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 858 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
859 what, (long)(s + start_shift - i_strpos)) );
860 goto restart;
861 }
66e933ab 862 /* Have both, check_string is floating */
6eb5f6b9
JH
863 if (t + start_shift >= check_at) /* Contradicts floating=check */
864 goto retry_floating_check;
865 /* Recheck anchored substring, but not floating... */
9041c2e3 866 s = check_at;
5e39e1e5
HS
867 if (!check)
868 goto giveup;
a3621e74 869 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 870 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
871 (long)(other_last - i_strpos)) );
872 goto do_other_anchored;
873 }
60e71179
GS
874 /* Another way we could have checked stclass at the
875 current position only: */
876 if (ml_anch) {
877 s = t = t + 1;
5e39e1e5
HS
878 if (!check)
879 goto giveup;
a3621e74 880 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 881 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 882 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 883 goto try_at_offset;
66e933ab 884 }
33b8afdf 885 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 886 goto fail;
6eb5f6b9
JH
887 /* Check is floating subtring. */
888 retry_floating_check:
889 t = check_at - start_shift;
a3621e74 890 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
891 goto hop_and_restart;
892 }
b7953727 893 if (t != s) {
a3621e74 894 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 895 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
896 (long)(t - i_strpos), (long)(s - i_strpos))
897 );
898 }
899 else {
a3621e74 900 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
901 "Does not contradict STCLASS...\n");
902 );
903 }
6eb5f6b9 904 }
5e39e1e5 905 giveup:
a3621e74 906 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
907 PL_colors[4], (check ? "Guessed" : "Giving up"),
908 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 909 return s;
2c2d71f5
JH
910
911 fail_finish: /* Substring not found */
33b8afdf
JH
912 if (prog->check_substr || prog->check_utf8) /* could be removed already */
913 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 914 fail:
a3621e74 915 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 916 PL_colors[4], PL_colors[5]));
bd61b366 917 return NULL;
cad2e5aa 918}
9661b544 919
6eb5f6b9 920/* We know what class REx starts with. Try to find this position... */
3b0527fe
DM
921/* if reginfo is NULL, its a dryrun */
922
3c3eec57 923STATIC char *
3b0527fe
DM
924S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
925*strend, const regmatch_info *reginfo)
a687059c 926{
27da23d5 927 dVAR;
1df70142 928 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 929 char *m;
d8093b23 930 STRLEN ln;
5dab1207 931 STRLEN lnc;
078c425b 932 register STRLEN uskip;
d8093b23
G
933 unsigned int c1;
934 unsigned int c2;
6eb5f6b9
JH
935 char *e;
936 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 937 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 938
6eb5f6b9
JH
939 /* We know what class it must start with. */
940 switch (OP(c)) {
6eb5f6b9 941 case ANYOF:
388cc4de 942 if (do_utf8) {
078c425b 943 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
944 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
945 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
946 reginclass(prog, c, (U8*)s, 0, do_utf8) :
947 REGINCLASS(prog, c, (U8*)s)) {
3b0527fe 948 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
949 goto got_it;
950 else
951 tmp = doevery;
952 }
953 else
954 tmp = 1;
078c425b 955 s += uskip;
388cc4de
HS
956 }
957 }
958 else {
959 while (s < strend) {
960 STRLEN skip = 1;
961
32fc9b6a 962 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
963 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
964 /* The assignment of 2 is intentional:
965 * for the folded sharp s, the skip is 2. */
966 (skip = SHARP_S_SKIP))) {
3b0527fe 967 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
968 goto got_it;
969 else
970 tmp = doevery;
971 }
972 else
973 tmp = 1;
974 s += skip;
975 }
a0d0e21e 976 }
6eb5f6b9 977 break;
f33976b4
DB
978 case CANY:
979 while (s < strend) {
3b0527fe 980 if (tmp && (!reginfo || regtry(reginfo, s)))
f33976b4
DB
981 goto got_it;
982 else
983 tmp = doevery;
984 s++;
985 }
986 break;
6eb5f6b9 987 case EXACTF:
5dab1207
NIS
988 m = STRING(c);
989 ln = STR_LEN(c); /* length to match in octets/bytes */
990 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 991 if (UTF) {
a2a2844f 992 STRLEN ulen1, ulen2;
5dab1207 993 U8 *sm = (U8 *) m;
89ebb4a3
JH
994 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
995 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 996 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
997
998 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
999 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1000
89ebb4a3 1001 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1002 0, uniflags);
89ebb4a3 1003 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1004 0, uniflags);
5dab1207
NIS
1005 lnc = 0;
1006 while (sm < ((U8 *) m + ln)) {
1007 lnc++;
1008 sm += UTF8SKIP(sm);
1009 }
1aa99e6b
IH
1010 }
1011 else {
1012 c1 = *(U8*)m;
1013 c2 = PL_fold[c1];
1014 }
6eb5f6b9
JH
1015 goto do_exactf;
1016 case EXACTFL:
5dab1207
NIS
1017 m = STRING(c);
1018 ln = STR_LEN(c);
1019 lnc = (I32) ln;
d8093b23 1020 c1 = *(U8*)m;
6eb5f6b9
JH
1021 c2 = PL_fold_locale[c1];
1022 do_exactf:
db12adc6 1023 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1024
3b0527fe 1025 if (!reginfo && e < s)
6eb5f6b9 1026 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1027
60a8b682
JH
1028 /* The idea in the EXACTF* cases is to first find the
1029 * first character of the EXACTF* node and then, if
1030 * necessary, case-insensitively compare the full
1031 * text of the node. The c1 and c2 are the first
1032 * characters (though in Unicode it gets a bit
1033 * more complicated because there are more cases
7f16dd3d
JH
1034 * than just upper and lower: one needs to use
1035 * the so-called folding case for case-insensitive
1036 * matching (called "loose matching" in Unicode).
1037 * ibcmp_utf8() will do just that. */
60a8b682 1038
1aa99e6b 1039 if (do_utf8) {
575cac57 1040 UV c, f;
89ebb4a3 1041 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1042 STRLEN len, foldlen;
4ad0818d 1043 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1044 if (c1 == c2) {
5dab1207
NIS
1045 /* Upper and lower of 1st char are equal -
1046 * probably not a "letter". */
1aa99e6b 1047 while (s <= e) {
89ebb4a3 1048 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1049 uniflags);
80aecb99
JH
1050 if ( c == c1
1051 && (ln == len ||
3dab1dad
YO
1052 ibcmp_utf8(s, NULL, 0, do_utf8,
1053 m, NULL, ln, (bool)UTF))
3b0527fe 1054 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1055 goto got_it;
80aecb99 1056 else {
1df70142 1057 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1058 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1059 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1060 if ( f != c
1061 && (f == c1 || f == c2)
1062 && (ln == foldlen ||
66423254 1063 !ibcmp_utf8((char *) foldbuf,
3dab1dad 1064 NULL, foldlen, do_utf8,
d07ddd77 1065 m,
3dab1dad 1066 NULL, ln, (bool)UTF))
3b0527fe 1067 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1068 goto got_it;
1069 }
1aa99e6b
IH
1070 s += len;
1071 }
09091399
JH
1072 }
1073 else {
1aa99e6b 1074 while (s <= e) {
89ebb4a3 1075 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1076 uniflags);
80aecb99 1077
60a8b682 1078 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1079 * Note that not all the possible combinations
1080 * are handled here: some of them are handled
1081 * by the standard folding rules, and some of
1082 * them (the character class or ANYOF cases)
1083 * are handled during compiletime in
1084 * regexec.c:S_regclass(). */
880bd946
JH
1085 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1086 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1087 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1088
1089 if ( (c == c1 || c == c2)
1090 && (ln == len ||
3dab1dad
YO
1091 ibcmp_utf8(s, NULL, 0, do_utf8,
1092 m, NULL, ln, (bool)UTF))
3b0527fe 1093 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1094 goto got_it;
80aecb99 1095 else {
1df70142 1096 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1097 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1098 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1099 if ( f != c
1100 && (f == c1 || f == c2)
1101 && (ln == foldlen ||
a6872d42 1102 !ibcmp_utf8((char *) foldbuf,
3dab1dad 1103 NULL, foldlen, do_utf8,
d07ddd77 1104 m,
3dab1dad 1105 NULL, ln, (bool)UTF))
3b0527fe 1106 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1107 goto got_it;
1108 }
1aa99e6b
IH
1109 s += len;
1110 }
09091399 1111 }
1aa99e6b
IH
1112 }
1113 else {
1114 if (c1 == c2)
1115 while (s <= e) {
1116 if ( *(U8*)s == c1
1117 && (ln == 1 || !(OP(c) == EXACTF
1118 ? ibcmp(s, m, ln)
1119 : ibcmp_locale(s, m, ln)))
3b0527fe 1120 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1121 goto got_it;
1122 s++;
1123 }
1124 else
1125 while (s <= e) {
1126 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1127 && (ln == 1 || !(OP(c) == EXACTF
1128 ? ibcmp(s, m, ln)
1129 : ibcmp_locale(s, m, ln)))
3b0527fe 1130 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1131 goto got_it;
1132 s++;
1133 }
b3c9acc1
IZ
1134 }
1135 break;
bbce6d69 1136 case BOUNDL:
3280af22 1137 PL_reg_flags |= RF_tainted;
bbce6d69 1138 /* FALL THROUGH */
a0d0e21e 1139 case BOUND:
ffc61ed2 1140 if (do_utf8) {
12d33761 1141 if (s == PL_bostr)
ffc61ed2
JH
1142 tmp = '\n';
1143 else {
6136c704 1144 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1145 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1146 }
1147 tmp = ((OP(c) == BOUND ?
9041c2e3 1148 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1149 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1150 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1151 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1152 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1153 isALNUM_LC_utf8((U8*)s)))
1154 {
1155 tmp = !tmp;
3b0527fe 1156 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1157 goto got_it;
1158 }
078c425b 1159 s += uskip;
a687059c 1160 }
a0d0e21e 1161 }
667bb95a 1162 else {
12d33761 1163 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1164 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1165 while (s < strend) {
1166 if (tmp ==
1167 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1168 tmp = !tmp;
3b0527fe 1169 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1170 goto got_it;
1171 }
1172 s++;
a0ed51b3 1173 }
a0ed51b3 1174 }
3b0527fe 1175 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1176 goto got_it;
1177 break;
bbce6d69 1178 case NBOUNDL:
3280af22 1179 PL_reg_flags |= RF_tainted;
bbce6d69 1180 /* FALL THROUGH */
a0d0e21e 1181 case NBOUND:
ffc61ed2 1182 if (do_utf8) {
12d33761 1183 if (s == PL_bostr)
ffc61ed2
JH
1184 tmp = '\n';
1185 else {
6136c704 1186 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1187 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1188 }
1189 tmp = ((OP(c) == NBOUND ?
9041c2e3 1190 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1191 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1192 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1193 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1194 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1195 isALNUM_LC_utf8((U8*)s)))
1196 tmp = !tmp;
3b0527fe 1197 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2 1198 goto got_it;
078c425b 1199 s += uskip;
ffc61ed2 1200 }
a0d0e21e 1201 }
667bb95a 1202 else {
12d33761 1203 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1204 tmp = ((OP(c) == NBOUND ?
1205 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1206 while (s < strend) {
1207 if (tmp ==
1208 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1209 tmp = !tmp;
3b0527fe 1210 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1211 goto got_it;
1212 s++;
1213 }
a0ed51b3 1214 }
3b0527fe 1215 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1216 goto got_it;
1217 break;
a0d0e21e 1218 case ALNUM:
ffc61ed2 1219 if (do_utf8) {
1a4fad37 1220 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1221 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1222 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1223 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1224 goto got_it;
1225 else
1226 tmp = doevery;
1227 }
bbce6d69 1228 else
ffc61ed2 1229 tmp = 1;
078c425b 1230 s += uskip;
bbce6d69 1231 }
bbce6d69 1232 }
ffc61ed2
JH
1233 else {
1234 while (s < strend) {
1235 if (isALNUM(*s)) {
3b0527fe 1236 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1237 goto got_it;
1238 else
1239 tmp = doevery;
1240 }
a0ed51b3 1241 else
ffc61ed2
JH
1242 tmp = 1;
1243 s++;
a0ed51b3 1244 }
a0ed51b3
LW
1245 }
1246 break;
bbce6d69 1247 case ALNUML:
3280af22 1248 PL_reg_flags |= RF_tainted;
ffc61ed2 1249 if (do_utf8) {
078c425b 1250 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1251 if (isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1252 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1253 goto got_it;
1254 else
1255 tmp = doevery;
1256 }
a687059c 1257 else
ffc61ed2 1258 tmp = 1;
078c425b 1259 s += uskip;
a0d0e21e 1260 }
a0d0e21e 1261 }
ffc61ed2
JH
1262 else {
1263 while (s < strend) {
1264 if (isALNUM_LC(*s)) {
3b0527fe 1265 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1266 goto got_it;
1267 else
1268 tmp = doevery;
1269 }
a0ed51b3 1270 else
ffc61ed2
JH
1271 tmp = 1;
1272 s++;
a0ed51b3 1273 }
a0ed51b3
LW
1274 }
1275 break;
a0d0e21e 1276 case NALNUM:
ffc61ed2 1277 if (do_utf8) {
1a4fad37 1278 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1279 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1280 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1281 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1282 goto got_it;
1283 else
1284 tmp = doevery;
1285 }
bbce6d69 1286 else
ffc61ed2 1287 tmp = 1;
078c425b 1288 s += uskip;
bbce6d69 1289 }
bbce6d69 1290 }
ffc61ed2
JH
1291 else {
1292 while (s < strend) {
1293 if (!isALNUM(*s)) {
3b0527fe 1294 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1295 goto got_it;
1296 else
1297 tmp = doevery;
1298 }
a0ed51b3 1299 else
ffc61ed2
JH
1300 tmp = 1;
1301 s++;
a0ed51b3 1302 }
a0ed51b3
LW
1303 }
1304 break;
bbce6d69 1305 case NALNUML:
3280af22 1306 PL_reg_flags |= RF_tainted;
ffc61ed2 1307 if (do_utf8) {
078c425b 1308 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1309 if (!isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1310 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1311 goto got_it;
1312 else
1313 tmp = doevery;
1314 }
a687059c 1315 else
ffc61ed2 1316 tmp = 1;
078c425b 1317 s += uskip;
a687059c 1318 }
a0d0e21e 1319 }
ffc61ed2
JH
1320 else {
1321 while (s < strend) {
1322 if (!isALNUM_LC(*s)) {
3b0527fe 1323 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1324 goto got_it;
1325 else
1326 tmp = doevery;
1327 }
a0ed51b3 1328 else
ffc61ed2
JH
1329 tmp = 1;
1330 s++;
a0ed51b3 1331 }
a0ed51b3
LW
1332 }
1333 break;
a0d0e21e 1334 case SPACE:
ffc61ed2 1335 if (do_utf8) {
1a4fad37 1336 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1337 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1338 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
3b0527fe 1339 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1340 goto got_it;
1341 else
1342 tmp = doevery;
1343 }
a0d0e21e 1344 else
ffc61ed2 1345 tmp = 1;
078c425b 1346 s += uskip;
2304df62 1347 }
a0d0e21e 1348 }
ffc61ed2
JH
1349 else {
1350 while (s < strend) {
1351 if (isSPACE(*s)) {
3b0527fe 1352 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1353 goto got_it;
1354 else
1355 tmp = doevery;
1356 }
a0ed51b3 1357 else
ffc61ed2
JH
1358 tmp = 1;
1359 s++;
a0ed51b3 1360 }
a0ed51b3
LW
1361 }
1362 break;
bbce6d69 1363 case SPACEL:
3280af22 1364 PL_reg_flags |= RF_tainted;
ffc61ed2 1365 if (do_utf8) {
078c425b 1366 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1367 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
3b0527fe 1368 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1369 goto got_it;
1370 else
1371 tmp = doevery;
1372 }
bbce6d69 1373 else
ffc61ed2 1374 tmp = 1;
078c425b 1375 s += uskip;
bbce6d69 1376 }
bbce6d69 1377 }
ffc61ed2
JH
1378 else {
1379 while (s < strend) {
1380 if (isSPACE_LC(*s)) {
3b0527fe 1381 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1382 goto got_it;
1383 else
1384 tmp = doevery;
1385 }
a0ed51b3 1386 else
ffc61ed2
JH
1387 tmp = 1;
1388 s++;
a0ed51b3 1389 }
a0ed51b3
LW
1390 }
1391 break;
a0d0e21e 1392 case NSPACE:
ffc61ed2 1393 if (do_utf8) {
1a4fad37 1394 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1395 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1396 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
3b0527fe 1397 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1398 goto got_it;
1399 else
1400 tmp = doevery;
1401 }
a0d0e21e 1402 else
ffc61ed2 1403 tmp = 1;
078c425b 1404 s += uskip;
a687059c 1405 }
a0d0e21e 1406 }
ffc61ed2
JH
1407 else {
1408 while (s < strend) {
1409 if (!isSPACE(*s)) {
3b0527fe 1410 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1411 goto got_it;
1412 else
1413 tmp = doevery;
1414 }
a0ed51b3 1415 else
ffc61ed2
JH
1416 tmp = 1;
1417 s++;
a0ed51b3 1418 }
a0ed51b3
LW
1419 }
1420 break;
bbce6d69 1421 case NSPACEL:
3280af22 1422 PL_reg_flags |= RF_tainted;
ffc61ed2 1423 if (do_utf8) {
078c425b 1424 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1425 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
3b0527fe 1426 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1427 goto got_it;
1428 else
1429 tmp = doevery;
1430 }
bbce6d69 1431 else
ffc61ed2 1432 tmp = 1;
078c425b 1433 s += uskip;
bbce6d69 1434 }
bbce6d69 1435 }
ffc61ed2
JH
1436 else {
1437 while (s < strend) {
1438 if (!isSPACE_LC(*s)) {
3b0527fe 1439 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1440 goto got_it;
1441 else
1442 tmp = doevery;
1443 }
a0ed51b3 1444 else
ffc61ed2
JH
1445 tmp = 1;
1446 s++;
a0ed51b3 1447 }
a0ed51b3
LW
1448 }
1449 break;
a0d0e21e 1450 case DIGIT:
ffc61ed2 1451 if (do_utf8) {
1a4fad37 1452 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1453 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1454 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1455 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1456 goto got_it;
1457 else
1458 tmp = doevery;
1459 }
a0d0e21e 1460 else
ffc61ed2 1461 tmp = 1;
078c425b 1462 s += uskip;
2b69d0c2 1463 }
a0d0e21e 1464 }
ffc61ed2
JH
1465 else {
1466 while (s < strend) {
1467 if (isDIGIT(*s)) {
3b0527fe 1468 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1469 goto got_it;
1470 else
1471 tmp = doevery;
1472 }
a0ed51b3 1473 else
ffc61ed2
JH
1474 tmp = 1;
1475 s++;
a0ed51b3 1476 }
a0ed51b3
LW
1477 }
1478 break;
b8c5462f
JH
1479 case DIGITL:
1480 PL_reg_flags |= RF_tainted;
ffc61ed2 1481 if (do_utf8) {
078c425b 1482 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1483 if (isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1484 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1485 goto got_it;
1486 else
1487 tmp = doevery;
1488 }
b8c5462f 1489 else
ffc61ed2 1490 tmp = 1;
078c425b 1491 s += uskip;
b8c5462f 1492 }
b8c5462f 1493 }
ffc61ed2
JH
1494 else {
1495 while (s < strend) {
1496 if (isDIGIT_LC(*s)) {
3b0527fe 1497 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1498 goto got_it;
1499 else
1500 tmp = doevery;
1501 }
b8c5462f 1502 else
ffc61ed2
JH
1503 tmp = 1;
1504 s++;
b8c5462f 1505 }
b8c5462f
JH
1506 }
1507 break;
a0d0e21e 1508 case NDIGIT:
ffc61ed2 1509 if (do_utf8) {
1a4fad37 1510 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1511 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1512 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1513 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1514 goto got_it;
1515 else
1516 tmp = doevery;
1517 }
a0d0e21e 1518 else
ffc61ed2 1519 tmp = 1;
078c425b 1520 s += uskip;
a687059c 1521 }
a0d0e21e 1522 }
ffc61ed2
JH
1523 else {
1524 while (s < strend) {
1525 if (!isDIGIT(*s)) {
3b0527fe 1526 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1527 goto got_it;
1528 else
1529 tmp = doevery;
1530 }
a0ed51b3 1531 else
ffc61ed2
JH
1532 tmp = 1;
1533 s++;
a0ed51b3 1534 }
a0ed51b3
LW
1535 }
1536 break;
b8c5462f
JH
1537 case NDIGITL:
1538 PL_reg_flags |= RF_tainted;
ffc61ed2 1539 if (do_utf8) {
078c425b 1540 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1541 if (!isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1542 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1543 goto got_it;
1544 else
1545 tmp = doevery;
1546 }
b8c5462f 1547 else
ffc61ed2 1548 tmp = 1;
078c425b 1549 s += uskip;
b8c5462f 1550 }
a0ed51b3 1551 }
ffc61ed2
JH
1552 else {
1553 while (s < strend) {
1554 if (!isDIGIT_LC(*s)) {
3b0527fe 1555 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1556 goto got_it;
1557 else
1558 tmp = doevery;
1559 }
cf93c79d 1560 else
ffc61ed2
JH
1561 tmp = 1;
1562 s++;
b8c5462f 1563 }
b8c5462f
JH
1564 }
1565 break;
b3c9acc1 1566 default:
3c3eec57
GS
1567 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1568 break;
d6a28714 1569 }
6eb5f6b9
JH
1570 return 0;
1571 got_it:
1572 return s;
1573}
1574
1575/*
1576 - regexec_flags - match a regexp against a string
1577 */
1578I32
1579Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1580 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1581/* strend: pointer to null at end of string */
1582/* strbeg: real beginning of string */
1583/* minend: end of match must be >=minend after stringarg. */
1584/* data: May be used for some additional optimizations. */
1585/* nosave: For optimizations. */
1586{
97aff369 1587 dVAR;
6eb5f6b9
JH
1588 register char *s;
1589 register regnode *c;
1590 register char *startpos = stringarg;
6eb5f6b9
JH
1591 I32 minlen; /* must match at least this many chars */
1592 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1593 I32 end_shift = 0; /* Same for the end. */ /* CC */
1594 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1595 char *scream_olds = NULL;
3dab1dad 1596 SV* const oreplsv = GvSV(PL_replgv);
1df70142 1597 const bool do_utf8 = DO_UTF8(sv);
2757e526 1598 I32 multiline;
2a782b5b 1599#ifdef DEBUGGING
2757e526
JH
1600 SV* dsv0;
1601 SV* dsv1;
2a782b5b 1602#endif
3b0527fe 1603 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1604
1605 GET_RE_DEBUG_FLAGS_DECL;
1606
9d4ba2ae 1607 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1608
1609 /* Be paranoid... */
1610 if (prog == NULL || startpos == NULL) {
1611 Perl_croak(aTHX_ "NULL regexp parameter");
1612 return 0;
1613 }
1614
2757e526 1615 multiline = prog->reganch & PMf_MULTILINE;
3b0527fe 1616 reginfo.prog = prog;
2757e526
JH
1617
1618#ifdef DEBUGGING
1619 dsv0 = PERL_DEBUG_PAD_ZERO(0);
1620 dsv1 = PERL_DEBUG_PAD_ZERO(1);
1621#endif
1622
bac06658
JH
1623 RX_MATCH_UTF8_set(prog, do_utf8);
1624
6eb5f6b9 1625 minlen = prog->minlen;
61a36c01 1626 if (strend - startpos < minlen) {
a3621e74 1627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1628 "String too short [regexec_flags]...\n"));
1629 goto phooey;
1aa99e6b 1630 }
6eb5f6b9 1631
6eb5f6b9
JH
1632 /* Check validity of program. */
1633 if (UCHARAT(prog->program) != REG_MAGIC) {
1634 Perl_croak(aTHX_ "corrupted regexp program");
1635 }
1636
1637 PL_reg_flags = 0;
1638 PL_reg_eval_set = 0;
1639 PL_reg_maxiter = 0;
1640
1641 if (prog->reganch & ROPT_UTF8)
1642 PL_reg_flags |= RF_utf8;
1643
1644 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1645 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1646 PL_bostr = strbeg;
3b0527fe 1647 reginfo.sv = sv;
6eb5f6b9
JH
1648
1649 /* Mark end of line for $ (and such) */
1650 PL_regeol = strend;
1651
1652 /* see how far we have to get to not match where we matched before */
3b0527fe 1653 reginfo.till = startpos+minend;
6eb5f6b9 1654
6eb5f6b9
JH
1655 /* If there is a "must appear" string, look for it. */
1656 s = startpos;
1657
3b0527fe 1658 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1659 MAGIC *mg;
1660
1661 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
3b0527fe 1662 reginfo.ganch = startpos;
6eb5f6b9
JH
1663 else if (sv && SvTYPE(sv) >= SVt_PVMG
1664 && SvMAGIC(sv)
14befaf4
DM
1665 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1666 && mg->mg_len >= 0) {
3b0527fe 1667 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
6eb5f6b9 1668 if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1669 if (s > reginfo.ganch)
6eb5f6b9 1670 goto phooey;
3b0527fe 1671 s = reginfo.ganch;
6eb5f6b9
JH
1672 }
1673 }
1674 else /* pos() not defined */
3b0527fe 1675 reginfo.ganch = strbeg;
6eb5f6b9
JH
1676 }
1677
a0714e2c 1678 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1679 re_scream_pos_data d;
1680
1681 d.scream_olds = &scream_olds;
1682 d.scream_pos = &scream_pos;
1683 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1684 if (!s) {
a3621e74 1685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1686 goto phooey; /* not present */
3fa9c3d7 1687 }
6eb5f6b9
JH
1688 }
1689
a3621e74 1690 DEBUG_EXECUTE_r({
1df70142
AL
1691 const char * const s0 = UTF
1692 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1693 UNI_DISPLAY_REGEX)
1694 : prog->precomp;
bb7a0f54 1695 const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
1df70142 1696 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1697 UNI_DISPLAY_REGEX) : startpos;
bb7a0f54 1698 const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1699 if (!PL_colorset)
1700 reginitcolors();
1701 PerlIO_printf(Perl_debug_log,
a0288114 1702 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1703 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1704 len0, len0, s0,
2a782b5b 1705 PL_colors[1],
9e55ce06 1706 len0 > 60 ? "..." : "",
2a782b5b 1707 PL_colors[0],
9e55ce06
JH
1708 (int)(len1 > 60 ? 60 : len1),
1709 s1, PL_colors[1],
1710 (len1 > 60 ? "..." : "")
2a782b5b
JH
1711 );
1712 });
6eb5f6b9
JH
1713
1714 /* Simplest case: anchored match need be tried only once. */
1715 /* [unless only anchor is BOL and multiline is set] */
1716 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
3b0527fe 1717 if (s == startpos && regtry(&reginfo, startpos))
6eb5f6b9 1718 goto got_it;
7fba1cd6 1719 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1720 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1721 {
1722 char *end;
1723
1724 if (minlen)
1725 dontbother = minlen - 1;
1aa99e6b 1726 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1727 /* for multiline we only have to try after newlines */
33b8afdf 1728 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1729 if (s == startpos)
1730 goto after_try;
1731 while (1) {
3b0527fe 1732 if (regtry(&reginfo, s))
6eb5f6b9
JH
1733 goto got_it;
1734 after_try:
1735 if (s >= end)
1736 goto phooey;
1737 if (prog->reganch & RE_USE_INTUIT) {
1738 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1739 if (!s)
1740 goto phooey;
1741 }
1742 else
1743 s++;
1744 }
1745 } else {
1746 if (s > startpos)
1747 s--;
1748 while (s < end) {
1749 if (*s++ == '\n') { /* don't need PL_utf8skip here */
3b0527fe 1750 if (regtry(&reginfo, s))
6eb5f6b9
JH
1751 goto got_it;
1752 }
1753 }
1754 }
1755 }
1756 goto phooey;
1757 } else if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1758 if (regtry(&reginfo, reginfo.ganch))
6eb5f6b9
JH
1759 goto got_it;
1760 goto phooey;
1761 }
1762
1763 /* Messy cases: unanchored match. */
33b8afdf 1764 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1765 /* we have /x+whatever/ */
1766 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1767 char ch;
bf93d4cc
GS
1768#ifdef DEBUGGING
1769 int did_match = 0;
1770#endif
33b8afdf
JH
1771 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1772 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1773 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1774
1aa99e6b 1775 if (do_utf8) {
6eb5f6b9
JH
1776 while (s < strend) {
1777 if (*s == ch) {
a3621e74 1778 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1779 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1780 s += UTF8SKIP(s);
1781 while (s < strend && *s == ch)
1782 s += UTF8SKIP(s);
1783 }
1784 s += UTF8SKIP(s);
1785 }
1786 }
1787 else {
1788 while (s < strend) {
1789 if (*s == ch) {
a3621e74 1790 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1791 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1792 s++;
1793 while (s < strend && *s == ch)
1794 s++;
1795 }
1796 s++;
1797 }
1798 }
a3621e74 1799 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1800 PerlIO_printf(Perl_debug_log,
b7953727
JH
1801 "Did not find anchored character...\n")
1802 );
6eb5f6b9 1803 }
a0714e2c
SS
1804 else if (prog->anchored_substr != NULL
1805 || prog->anchored_utf8 != NULL
1806 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1807 && prog->float_max_offset < strend - s)) {
1808 SV *must;
1809 I32 back_max;
1810 I32 back_min;
1811 char *last;
6eb5f6b9 1812 char *last1; /* Last position checked before */
bf93d4cc
GS
1813#ifdef DEBUGGING
1814 int did_match = 0;
1815#endif
33b8afdf
JH
1816 if (prog->anchored_substr || prog->anchored_utf8) {
1817 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1818 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1819 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1820 back_max = back_min = prog->anchored_offset;
1821 } else {
1822 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1823 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1824 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1825 back_max = prog->float_max_offset;
1826 back_min = prog->float_min_offset;
1827 }
1828 if (must == &PL_sv_undef)
1829 /* could not downgrade utf8 check substring, so must fail */
1830 goto phooey;
1831
1832 last = HOP3c(strend, /* Cannot start after this */
1833 -(I32)(CHR_SVLEN(must)
1834 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1835
1836 if (s > PL_bostr)
1837 last1 = HOPc(s, -1);
1838 else
1839 last1 = s - 1; /* bogus */
1840
a0288114 1841 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1842 check_substr==must. */
1843 scream_pos = -1;
1844 dontbother = end_shift;
1845 strend = HOPc(strend, -dontbother);
1846 while ( (s <= last) &&
9041c2e3 1847 ((flags & REXEC_SCREAM)
1aa99e6b 1848 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1849 end_shift, &scream_pos, 0))
1aa99e6b 1850 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1851 (unsigned char*)strend, must,
7fba1cd6 1852 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1853 /* we may be pointing at the wrong string */
1854 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1855 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1856 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1857 if (HOPc(s, -back_max) > last1) {
1858 last1 = HOPc(s, -back_min);
1859 s = HOPc(s, -back_max);
1860 }
1861 else {
52657f30 1862 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1863
1864 last1 = HOPc(s, -back_min);
52657f30 1865 s = t;
6eb5f6b9 1866 }
1aa99e6b 1867 if (do_utf8) {
6eb5f6b9 1868 while (s <= last1) {
3b0527fe 1869 if (regtry(&reginfo, s))
6eb5f6b9
JH
1870 goto got_it;
1871 s += UTF8SKIP(s);
1872 }
1873 }
1874 else {
1875 while (s <= last1) {
3b0527fe 1876 if (regtry(&reginfo, s))
6eb5f6b9
JH
1877 goto got_it;
1878 s++;
1879 }
1880 }
1881 }
a3621e74 1882 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1883 PerlIO_printf(Perl_debug_log,
a0288114 1884 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1885 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1886 ? "anchored" : "floating"),
1887 PL_colors[0],
1888 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1889 SvPVX_const(must),
b7953727
JH
1890 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1891 );
6eb5f6b9
JH
1892 goto phooey;
1893 }
155aba94 1894 else if ((c = prog->regstclass)) {
f14c76ed 1895 if (minlen) {
3dab1dad 1896 I32 op = OP(prog->regstclass);
66e933ab 1897 /* don't bother with what can't match */
f14c76ed
RGS
1898 if (PL_regkind[op] != EXACT && op != CANY)
1899 strend = HOPc(strend, -(minlen - 1));
1900 }
a3621e74 1901 DEBUG_EXECUTE_r({
ffc61ed2 1902 SV *prop = sv_newmortal();
cfd0369c
NC
1903 const char *s0;
1904 const char *s1;
9e55ce06
JH
1905 int len0;
1906 int len1;
1907
32fc9b6a 1908 regprop(prog, prop, c);
9e55ce06 1909 s0 = UTF ?
3f7c398e 1910 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1911 UNI_DISPLAY_REGEX) :
cfd0369c 1912 SvPVX_const(prop);
9e55ce06
JH
1913 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1914 s1 = UTF ?
c728cb41 1915 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
bb7a0f54 1916 len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
9e55ce06 1917 PerlIO_printf(Perl_debug_log,
a0288114 1918 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1919 len0, len0, s0,
1920 len1, len1, s1);
ffc61ed2 1921 });
3b0527fe 1922 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1923 goto got_it;
a3621e74 1924 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1925 }
1926 else {
1927 dontbother = 0;
a0714e2c 1928 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1929 /* Trim the end. */
d6a28714 1930 char *last;
33b8afdf
JH
1931 SV* float_real;
1932
1933 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1934 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1935 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1936
1937 if (flags & REXEC_SCREAM) {
33b8afdf 1938 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1939 end_shift, &scream_pos, 1); /* last one */
1940 if (!last)
ffc61ed2 1941 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1942 /* we may be pointing at the wrong string */
1943 else if (RX_MATCH_COPIED(prog))
3f7c398e 1944 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1945 }
d6a28714
JH
1946 else {
1947 STRLEN len;
cfd0369c 1948 const char * const little = SvPV_const(float_real, len);
d6a28714 1949
33b8afdf 1950 if (SvTAIL(float_real)) {
d6a28714
JH
1951 if (memEQ(strend - len + 1, little, len - 1))
1952 last = strend - len + 1;
7fba1cd6 1953 else if (!multiline)
9041c2e3 1954 last = memEQ(strend - len, little, len)
bd61b366 1955 ? strend - len : NULL;
b8c5462f 1956 else
d6a28714
JH
1957 goto find_last;
1958 } else {
1959 find_last:
9041c2e3 1960 if (len)
d6a28714 1961 last = rninstr(s, strend, little, little + len);
b8c5462f 1962 else
a0288114 1963 last = strend; /* matching "$" */
b8c5462f 1964 }
b8c5462f 1965 }
bf93d4cc 1966 if (last == NULL) {
a3621e74 1967 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1968 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1969 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1970 goto phooey; /* Should not happen! */
1971 }
d6a28714
JH
1972 dontbother = strend - last + prog->float_min_offset;
1973 }
1974 if (minlen && (dontbother < minlen))
1975 dontbother = minlen - 1;
1976 strend -= dontbother; /* this one's always in bytes! */
1977 /* We don't know much -- general case. */
1aa99e6b 1978 if (do_utf8) {
d6a28714 1979 for (;;) {
3b0527fe 1980 if (regtry(&reginfo, s))
d6a28714
JH
1981 goto got_it;
1982 if (s >= strend)
1983 break;
b8c5462f 1984 s += UTF8SKIP(s);
d6a28714
JH
1985 };
1986 }
1987 else {
1988 do {
3b0527fe 1989 if (regtry(&reginfo, s))
d6a28714
JH
1990 goto got_it;
1991 } while (s++ < strend);
1992 }
1993 }
1994
1995 /* Failure. */
1996 goto phooey;
1997
1998got_it:
1999 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2000
2001 if (PL_reg_eval_set) {
2002 /* Preserve the current value of $^R */
2003 if (oreplsv != GvSV(PL_replgv))
2004 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2005 restored, the value remains
2006 the same. */
4f639d21 2007 restore_pos(aTHX_ prog);
d6a28714
JH
2008 }
2009
2010 /* make sure $`, $&, $', and $digit will work later */
2011 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2012 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2013 if (flags & REXEC_COPY_STR) {
2014 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2015#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2016 if ((SvIsCOW(sv)
2017 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2018 if (DEBUG_C_TEST) {
2019 PerlIO_printf(Perl_debug_log,
2020 "Copy on write: regexp capture, type %d\n",
2021 (int) SvTYPE(sv));
2022 }
2023 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2024 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2025 assert (SvPOKp(prog->saved_copy));
2026 } else
2027#endif
2028 {
2029 RX_MATCH_COPIED_on(prog);
2030 s = savepvn(strbeg, i);
2031 prog->subbeg = s;
2032 }
d6a28714 2033 prog->sublen = i;
d6a28714
JH
2034 }
2035 else {
2036 prog->subbeg = strbeg;
2037 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2038 }
2039 }
9041c2e3 2040
d6a28714
JH
2041 return 1;
2042
2043phooey:
a3621e74 2044 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2045 PL_colors[4], PL_colors[5]));
d6a28714 2046 if (PL_reg_eval_set)
4f639d21 2047 restore_pos(aTHX_ prog);
d6a28714
JH
2048 return 0;
2049}
2050
2051/*
2052 - regtry - try match at specific point
2053 */
2054STATIC I32 /* 0 failure, 1 success */
3b0527fe 2055S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
d6a28714 2056{
97aff369 2057 dVAR;
d6a28714
JH
2058 register I32 *sp;
2059 register I32 *ep;
2060 CHECKPOINT lastcp;
3b0527fe 2061 regexp *prog = reginfo->prog;
a3621e74 2062 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2063
02db2b7b
IZ
2064#ifdef DEBUGGING
2065 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2066#endif
d6a28714
JH
2067 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2068 MAGIC *mg;
2069
2070 PL_reg_eval_set = RS_init;
a3621e74 2071 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2072 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2073 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2074 ));
e8347627 2075 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2076 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2077 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2078 SAVETMPS;
2079 /* Apparently this is not needed, judging by wantarray. */
e8347627 2080 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2081 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2082
3b0527fe 2083 if (reginfo->sv) {
d6a28714 2084 /* Make $_ available to executed code. */
3b0527fe 2085 if (reginfo->sv != DEFSV) {
59f00321 2086 SAVE_DEFSV;
3b0527fe 2087 DEFSV = reginfo->sv;
b8c5462f 2088 }
d6a28714 2089
3b0527fe
DM
2090 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2091 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2092 /* prepare for quick setting of pos */
d300d9fa
NC
2093#ifdef PERL_OLD_COPY_ON_WRITE
2094 if (SvIsCOW(sv))
2095 sv_force_normal_flags(sv, 0);
2096#endif
3dab1dad 2097 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
d300d9fa 2098 &PL_vtbl_mglob, NULL, 0);
d6a28714 2099 mg->mg_len = -1;
b8c5462f 2100 }
d6a28714
JH
2101 PL_reg_magic = mg;
2102 PL_reg_oldpos = mg->mg_len;
4f639d21 2103 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2104 }
09687e5a 2105 if (!PL_reg_curpm) {
a02a5408 2106 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2107#ifdef USE_ITHREADS
2108 {
2109 SV* repointer = newSViv(0);
577e12cc 2110 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2111 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2112 av_push(PL_regex_padav,repointer);
2113 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2114 PL_regex_pad = AvARRAY(PL_regex_padav);
2115 }
2116#endif
2117 }
aaa362c4 2118 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2119 PL_reg_oldcurpm = PL_curpm;
2120 PL_curpm = PL_reg_curpm;
2121 if (RX_MATCH_COPIED(prog)) {
2122 /* Here is a serious problem: we cannot rewrite subbeg,
2123 since it may be needed if this match fails. Thus
2124 $` inside (?{}) could fail... */
2125 PL_reg_oldsaved = prog->subbeg;
2126 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2127#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2128 PL_nrs = prog->saved_copy;
2129#endif
d6a28714
JH
2130 RX_MATCH_COPIED_off(prog);
2131 }
2132 else
bd61b366 2133 PL_reg_oldsaved = NULL;
d6a28714
JH
2134 prog->subbeg = PL_bostr;
2135 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2136 }
973dddac 2137 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2138 PL_reginput = startpos;
2139 PL_regstartp = prog->startp;
2140 PL_regendp = prog->endp;
2141 PL_reglastparen = &prog->lastparen;
a01268b5 2142 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2143 prog->lastparen = 0;
03994de8 2144 prog->lastcloseparen = 0;
d6a28714 2145 PL_regsize = 0;
a3621e74 2146 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2147 if (PL_reg_start_tmpl <= prog->nparens) {
2148 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2149 if(PL_reg_start_tmp)
2150 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2151 else
a02a5408 2152 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2153 }
2154
2155 /* XXXX What this code is doing here?!!! There should be no need
2156 to do this again and again, PL_reglastparen should take care of
3dd2943c 2157 this! --ilya*/
dafc8851
JH
2158
2159 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2160 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2161 * PL_reglastparen), is not needed at all by the test suite
2162 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2163 * enough, for building DynaLoader, or otherwise this
2164 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2165 * will happen. Meanwhile, this code *is* needed for the
2166 * above-mentioned test suite tests to succeed. The common theme
2167 * on those tests seems to be returning null fields from matches.
2168 * --jhi */
dafc8851 2169#if 1
d6a28714
JH
2170 sp = prog->startp;
2171 ep = prog->endp;
2172 if (prog->nparens) {
097eb12c 2173 register I32 i;
eb160463 2174 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2175 *++sp = -1;
2176 *++ep = -1;
2177 }
2178 }
dafc8851 2179#endif
02db2b7b 2180 REGCP_SET(lastcp);
3b0527fe 2181 if (regmatch(reginfo, prog->program + 1)) {
d6a28714
JH
2182 prog->endp[0] = PL_reginput - PL_bostr;
2183 return 1;
2184 }
02db2b7b 2185 REGCP_UNWIND(lastcp);
d6a28714
JH
2186 return 0;
2187}
2188
02db2b7b
IZ
2189#define RE_UNWIND_BRANCH 1
2190#define RE_UNWIND_BRANCHJ 2
2191
2192union re_unwind_t;
2193
2194typedef struct { /* XX: makes sense to enlarge it... */
2195 I32 type;
2196 I32 prev;
2197 CHECKPOINT lastcp;
2198} re_unwind_generic_t;
2199
2200typedef struct {
2201 I32 type;
2202 I32 prev;
2203 CHECKPOINT lastcp;
2204 I32 lastparen;
2205 regnode *next;
2206 char *locinput;
2207 I32 nextchr;
3a2830be 2208 int minmod;
02db2b7b
IZ
2209#ifdef DEBUGGING
2210 int regindent;
2211#endif
2212} re_unwind_branch_t;
2213
2214typedef union re_unwind_t {
2215 I32 type;
2216 re_unwind_generic_t generic;
2217 re_unwind_branch_t branch;
2218} re_unwind_t;
2219
8ba1375e
MJD
2220#define sayYES goto yes
2221#define sayNO goto no
e0f9d4a8 2222#define sayNO_ANYOF goto no_anyof
8ba1375e 2223#define sayYES_FINAL goto yes_final
8ba1375e
MJD
2224#define sayNO_FINAL goto no_final
2225#define sayNO_SILENT goto do_no
2226#define saySAME(x) if (x) goto yes; else goto no
2227
3ab3c9b4
HS
2228#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2229#define POSCACHE_SEEN 1 /* we know what we're caching */
2230#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
7409bbd3 2231
3ab3c9b4 2232#define CACHEsayYES STMT_START { \
d8319b27 2233 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3
DM
2234 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
2235 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
2236 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2237 } \
2238 else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
2239 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2240 } \
2241 else { \
3ab3c9b4
HS
2242 /* cache records failure, but this is success */ \
2243 DEBUG_r( \
2244 PerlIO_printf(Perl_debug_log, \
2245 "%*s (remove success from failure cache)\n", \
2246 REPORT_CODE_OFF+PL_regindent*2, "") \
2247 ); \
d8319b27 2248 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2249 } \
2250 } \
2251 sayYES; \
2252} STMT_END
7409bbd3 2253
3ab3c9b4 2254#define CACHEsayNO STMT_START { \
d8319b27 2255 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3 2256 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
3ab3c9b4 2257 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
7409bbd3
DM
2258 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2259 } \
2260 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2261 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2262 } \
2263 else { \
3ab3c9b4
HS
2264 /* cache records success, but this is failure */ \
2265 DEBUG_r( \
2266 PerlIO_printf(Perl_debug_log, \
2267 "%*s (remove failure from success cache)\n", \
2268 REPORT_CODE_OFF+PL_regindent*2, "") \
2269 ); \
d8319b27 2270 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2271 } \
2272 } \
2273 sayNO; \
2274} STMT_END
2275
a3621e74
YO
2276/* this is used to determine how far from the left messages like
2277 'failed...' are printed. Currently 29 makes these messages line
2278 up with the opcode they refer to. Earlier perls used 25 which
2279 left these messages outdented making reviewing a debug output
2280 quite difficult.
2281*/
2282#define REPORT_CODE_OFF 29
2283
2284
2285/* Make sure there is a test for this +1 options in re_tests */
2286#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2287
9e137952
DM
2288/* this value indiciates that the c1/c2 "next char" test should be skipped */
2289#define CHRTEST_VOID -1000
2290
86545054
DM
2291#define SLAB_FIRST(s) (&(s)->states[0])
2292#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2293
5d9a96ca
DM
2294/* grab a new slab and return the first slot in it */
2295
2296STATIC regmatch_state *
2297S_push_slab(pTHX)
2298{
54df2634
NC
2299#if PERL_VERSION < 9
2300 dMY_CXT;
2301#endif
5d9a96ca
DM
2302 regmatch_slab *s = PL_regmatch_slab->next;
2303 if (!s) {
2304 Newx(s, 1, regmatch_slab);
2305 s->prev = PL_regmatch_slab;
2306 s->next = NULL;
2307 PL_regmatch_slab->next = s;
2308 }
2309 PL_regmatch_slab = s;
86545054 2310 return SLAB_FIRST(s);
5d9a96ca 2311}
5b47454d 2312
95b24440
DM
2313/* simulate a recursive call to regmatch */
2314
2315#define REGMATCH(ns, where) \
5d9a96ca
DM
2316 st->scan = scan; \
2317 scan = (ns); \
2318 st->resume_state = resume_##where; \
95b24440
DM
2319 goto start_recurse; \
2320 resume_point_##where:
2321
aa283a38
DM
2322
2323/* push a new regex state. Set newst to point to it */
2324
2325#define PUSH_STATE(newst, resume) \
2326 depth++; \
2327 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2328 st->scan = scan; \
2329 st->next = next; \
2330 st->n = n; \
2331 st->locinput = locinput; \
2332 st->resume_state = resume; \
2333 newst = st+1; \
86545054 2334 if (newst > SLAB_LAST(PL_regmatch_slab)) \
aa283a38
DM
2335 newst = S_push_slab(aTHX); \
2336 PL_regmatch_state = newst; \
2337 newst->cc = 0; \
2338 newst->minmod = 0; \
2339 newst->sw = 0; \
2340 newst->logical = 0; \
2341 newst->unwind = 0; \
2342 locinput = PL_reginput; \
2343 nextchr = UCHARAT(locinput);
2344
2345#define POP_STATE \
2346 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2347 depth--; \
2348 st--; \
86545054 2349 if (st < SLAB_FIRST(PL_regmatch_slab)) { \
aa283a38 2350 PL_regmatch_slab = PL_regmatch_slab->prev; \
86545054 2351 st = SLAB_LAST(PL_regmatch_slab); \
aa283a38
DM
2352 } \
2353 PL_regmatch_state = st; \
2354 scan = st->scan; \
2355 next = st->next; \
2356 n = st->n; \
2357 locinput = st->locinput; \
2358 nextchr = UCHARAT(locinput);
2359
d6a28714
JH
2360/*
2361 - regmatch - main matching routine
2362 *
2363 * Conceptually the strategy is simple: check to see whether the current
2364 * node matches, call self recursively to see whether the rest matches,
2365 * and then act accordingly. In practice we make some effort to avoid
2366 * recursion, in particular by going through "ordinary" nodes (that don't
2367 * need to know whether the rest of the match failed) by a loop instead of
2368 * by recursion.
2369 */
2370/* [lwall] I've hoisted the register declarations to the outer block in order to
2371 * maybe save a little bit of pushing and popping on the stack. It also takes
2372 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2373 *
2374 * This function used to be heavily recursive, but since this had the
2375 * effect of blowing the CPU stack on complex regexes, it has been
2376 * restructured to be iterative, and to save state onto the heap rather
2377 * than the stack. Essentially whereever regmatch() used to be called, it
2378 * pushes the current state, notes where to return, then jumps back into
2379 * the main loop.
2380 *
2381 * Originally the structure of this function used to look something like
2382
2383 S_regmatch() {
2384 int a = 1, b = 2;
2385 ...
2386 while (scan != NULL) {
5d9a96ca 2387 a++; // do stuff with a and b
95b24440
DM
2388 ...
2389 switch (OP(scan)) {
2390 case FOO: {
2391 int local = 3;
2392 ...
2393 if (regmatch(...)) // recurse
2394 goto yes;
2395 }
2396 ...
2397 }
2398 }
2399 yes:
2400 return 1;
2401 }
2402
2403 * Now it looks something like this:
2404
5d9a96ca 2405 typedef struct {
95b24440
DM
2406 int a, b, local;
2407 int resume_state;
5d9a96ca 2408 } regmatch_state;
95b24440
DM
2409
2410 S_regmatch() {
5d9a96ca
DM
2411 regmatch_state *st = new();
2412 int depth=0;
2413 st->a++; // do stuff with a and b
95b24440
DM
2414 ...
2415 while (scan != NULL) {
2416 ...
2417 switch (OP(scan)) {
2418 case FOO: {
5d9a96ca 2419 st->local = 3;
95b24440 2420 ...
5d9a96ca
DM
2421 st->scan = scan;
2422 scan = ...;
2423 st->resume_state = resume_FOO;
2424 goto start_recurse; // recurse
95b24440 2425
5d9a96ca
DM
2426 resume_point_FOO:
2427 if (result)
95b24440
DM
2428 goto yes;
2429 }
2430 ...
2431 }
5d9a96ca
DM
2432 start_recurse:
2433 st = new(); push a new state
2434 st->a = 1; st->b = 2;
2435 depth++;
95b24440 2436 }
5d9a96ca 2437 yes:
95b24440 2438 result = 1;
5d9a96ca
DM
2439 if (depth--) {
2440 st = pop();
95b24440
DM
2441 switch (resume_state) {
2442 case resume_FOO:
2443 goto resume_point_FOO;
2444 ...
2445 }
2446 }
2447 return result
2448 }
2449
2450 * WARNING: this means that any line in this function that contains a
2451 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2452 * regmatch() using gotos instead. Thus the values of any local variables
2453 * not saved in the regmatch_state structure will have been lost when
2454 * execution resumes on the next line .
5d9a96ca
DM
2455 *
2456 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2457 * PL_regmatch_state always points to the currently active state, and
2458 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2459 * The first time regmatch is called, the first slab is allocated, and is
2460 * never freed until interpreter desctruction. When the slab is full,
2461 * a new one is allocated chained to the end. At exit from regmatch, slabs
2462 * allocated since entry are freed.
d6a28714 2463 */
95b24440 2464
3dab1dad 2465#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
95b24440 2466
d6a28714 2467STATIC I32 /* 0 failure, 1 success */
3b0527fe 2468S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
d6a28714 2469{
54df2634
NC
2470#if PERL_VERSION < 9
2471 dMY_CXT;
2472#endif
27da23d5 2473 dVAR;
95b24440 2474 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2475 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2476
3b0527fe
DM
2477 regexp *rex = reginfo->prog;
2478
5d9a96ca
DM
2479 regmatch_slab *orig_slab;
2480 regmatch_state *orig_state;
a3621e74 2481
5d9a96ca
DM
2482 /* the current state. This is a cached copy of PL_regmatch_state */
2483 register regmatch_state *st;
95b24440 2484
5d9a96ca
DM
2485 /* cache heavy used fields of st in registers */
2486 register regnode *scan;
2487 register regnode *next;
2488 register I32 n = 0; /* initialize to shut up compiler warning */
2489 register char *locinput = PL_reginput;
95b24440 2490
5d9a96ca
DM
2491 /* these variables are NOT saved during a recusive RFEGMATCH: */
2492 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2493 bool result; /* return value of S_regmatch */
2494 regnode *inner; /* Next node in internal branch. */
2495 int depth = 0; /* depth of recursion */
aa283a38 2496 regmatch_state *newst; /* when pushing a state, this is the new one */
77cb431f
DM
2497 regmatch_state *yes_state = NULL; /* state to pop to on success of
2498 subpattern */
95b24440
DM
2499
2500#ifdef DEBUGGING
ab74612d 2501 SV *re_debug_flags = NULL;
a3621e74 2502 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2503 PL_regindent++;
2504#endif
2505
5d9a96ca
DM
2506 /* on first ever call to regmatch, allocate first slab */
2507 if (!PL_regmatch_slab) {
2508 Newx(PL_regmatch_slab, 1, regmatch_slab);
2509 PL_regmatch_slab->prev = NULL;
2510 PL_regmatch_slab->next = NULL;
86545054 2511 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2512 }
2513
2514 /* remember current high-water mark for exit */
2515 /* XXX this should be done with SAVE* instead */
2516 orig_slab = PL_regmatch_slab;
2517 orig_state = PL_regmatch_state;
2518
2519 /* grab next free state slot */
2520 st = ++PL_regmatch_state;
86545054 2521 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2522 st = PL_regmatch_state = S_push_slab(aTHX);
2523
2524 st->minmod = 0;
2525 st->sw = 0;
2526 st->logical = 0;
2527 st->unwind = 0;
2528 st->cc = NULL;
d6a28714
JH
2529 /* Note that nextchr is a byte even in UTF */
2530 nextchr = UCHARAT(locinput);
2531 scan = prog;
2532 while (scan != NULL) {
8ba1375e 2533
a3621e74 2534 DEBUG_EXECUTE_r( {
6136c704 2535 SV * const prop = sv_newmortal();
1df70142
AL
2536 const int docolor = *PL_colors[0];
2537 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2538 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2539 /* The part of the string before starttry has one color
2540 (pref0_len chars), between starttry and current
2541 position another one (pref_len - pref0_len chars),
2542 after the current position the third one.
2543 We assume that pref0_len <= pref_len, otherwise we
2544 decrease pref0_len. */
9041c2e3 2545 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2546 ? (5 + taill) - l : locinput - PL_bostr;
2547 int pref0_len;
d6a28714 2548
df1ffd02 2549 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2550 pref_len++;
2551 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2552 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2553 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2554 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2555 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2556 l--;
d6a28714
JH
2557 if (pref0_len < 0)
2558 pref0_len = 0;
2559 if (pref0_len > pref_len)
2560 pref0_len = pref_len;
32fc9b6a 2561 regprop(rex, prop, scan);
2a782b5b 2562 {
1df70142 2563 const char * const s0 =
f14c76ed 2564 do_utf8 && OP(scan) != CANY ?
95b24440 2565 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2566 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2567 locinput - pref_len;
bb7a0f54 2568 const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
1df70142 2569 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2570 pv_uni_display(PERL_DEBUG_PAD(1),
2571 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2572 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2573 locinput - pref_len + pref0_len;
bb7a0f54 2574 const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
1df70142 2575 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2576 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2577 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2578 locinput;
bb7a0f54 2579 const int len2 = do_utf8 ? (int)strlen(s2) : l;
2a782b5b
JH
2580 PerlIO_printf(Perl_debug_log,
2581 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2582 (IV)(locinput - PL_bostr),
2583 PL_colors[4],
2584 len0, s0,
2585 PL_colors[5],
2586 PL_colors[2],
2587 len1, s1,
2588 PL_colors[3],
2589 (docolor ? "" : "> <"),
2590 PL_colors[0],
2591 len2, s2,
2592 PL_colors[1],
2593 15 - l - pref_len + 1,
2594 "",
4f639d21 2595 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2596 SvPVX_const(prop));
2a782b5b
JH
2597 }
2598 });
d6a28714
JH
2599
2600 next = scan + NEXT_OFF(scan);
2601 if (next == scan)
2602 next = NULL;
2603
2604 switch (OP(scan)) {
2605 case BOL:
7fba1cd6 2606 if (locinput == PL_bostr)
d6a28714 2607 {
3b0527fe 2608 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2609 break;
2610 }
d6a28714
JH
2611 sayNO;
2612 case MBOL:
12d33761
HS
2613 if (locinput == PL_bostr ||
2614 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2615 {
b8c5462f
JH
2616 break;
2617 }
d6a28714
JH
2618 sayNO;
2619 case SBOL:
c2a73568 2620 if (locinput == PL_bostr)
b8c5462f 2621 break;
d6a28714
JH
2622 sayNO;
2623 case GPOS:
3b0527fe 2624 if (locinput == reginfo->ganch)
d6a28714
JH
2625 break;
2626 sayNO;
2627 case EOL:
d6a28714
JH
2628 goto seol;
2629 case MEOL:
d6a28714 2630 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2631 sayNO;
b8c5462f 2632 break;
d6a28714
JH
2633 case SEOL:
2634 seol:
2635 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2636 sayNO;
d6a28714 2637 if (PL_regeol - locinput > 1)
b8c5462f 2638 sayNO;
b8c5462f 2639 break;
d6a28714
JH
2640 case EOS:
2641 if (PL_regeol != locinput)
b8c5462f 2642 sayNO;
d6a28714 2643 break;
ffc61ed2 2644 case SANY:
d6a28714 2645 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2646 sayNO;
f33976b4
DB
2647 if (do_utf8) {
2648 locinput += PL_utf8skip[nextchr];
2649 if (locinput > PL_regeol)
2650 sayNO;
2651 nextchr = UCHARAT(locinput);
2652 }
2653 else
2654 nextchr = UCHARAT(++locinput);
2655 break;
2656 case CANY:
2657 if (!nextchr && locinput >= PL_regeol)
2658 sayNO;
b8c5462f 2659 nextchr = UCHARAT(++locinput);
a0d0e21e 2660 break;
ffc61ed2 2661 case REG_ANY:
1aa99e6b
IH
2662 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2663 sayNO;
2664 if (do_utf8) {
b8c5462f 2665 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2666 if (locinput > PL_regeol)
2667 sayNO;
a0ed51b3 2668 nextchr = UCHARAT(locinput);
a0ed51b3 2669 }
1aa99e6b
IH
2670 else
2671 nextchr = UCHARAT(++locinput);
a0ed51b3 2672 break;
a3621e74
YO
2673
2674
2675
2676 /*
2677 traverse the TRIE keeping track of all accepting states
2678 we transition through until we get to a failing node.
a3621e74 2679 */
5b47454d 2680 case TRIE:
3dab1dad
YO
2681 {
2682 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2683 trie_type = do_utf8 ?
2684 (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2685 : trie_plain;
2686
2687 /* what trie are we using right now */
2688 reg_trie_data *trie
2689 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
2690 U32 state = trie->startstate;
2691
2692 if (trie->bitmap && trie_type != trie_utf8_fold &&
2693 !TRIE_BITMAP_TEST(trie,*locinput)
2694 ) {
2695 if (trie->states[ state ].wordnum) {
2696 DEBUG_EXECUTE_r(
2697 PerlIO_printf(Perl_debug_log,
2698 "%*s %smatched empty string...%s\n",
2699 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
2700 );
2701 break;
2702 } else {
2703 DEBUG_EXECUTE_r(
2704 PerlIO_printf(Perl_debug_log,
2705 "%*s %sfailed to match start class...%s\n",
2706 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
2707 );
2708 sayNO_SILENT;
2709 }
2710 }
a3621e74 2711 {
a3621e74 2712 U8 *uc = ( U8* )locinput;
a3621e74
YO
2713 U16 charid = 0;
2714 U32 base = 0;
2715 UV uvc = 0;
2716 STRLEN len = 0;
2717 STRLEN foldlen = 0;
a3621e74
YO
2718 U8 *uscan = (U8*)NULL;
2719 STRLEN bufflen=0;
95b24440 2720 SV *sv_accept_buff = NULL;
5b47454d 2721
d8319b27 2722 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2723 result = 0;
a3621e74
YO
2724
2725 while ( state && uc <= (U8*)PL_regeol ) {
2726
5b47454d 2727 if (trie->states[ state ].wordnum) {
d8319b27 2728 if (!st->u.trie.accepted ) {
5b47454d
DM
2729 ENTER;
2730 SAVETMPS;
2731 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2732 sv_accept_buff=newSV(bufflen *
2733 sizeof(reg_trie_accepted) - 1);
2734 SvCUR_set(sv_accept_buff,
2735 sizeof(reg_trie_accepted));
2736 SvPOK_on(sv_accept_buff);
2737 sv_2mortal(sv_accept_buff);
d8319b27 2738 st->u.trie.accept_buff =
5b47454d
DM
2739 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2740 }
2741 else {
d8319b27 2742 if (st->u.trie.accepted >= bufflen) {
5b47454d 2743 bufflen *= 2;
d8319b27 2744 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2745 SvGROW(sv_accept_buff,
2746 bufflen * sizeof(reg_trie_accepted));
2747 }
2748 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2749 + sizeof(reg_trie_accepted));
2750 }
d8319b27
DM
2751 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2752 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2753 ++st->u.trie.accepted;
5b47454d 2754 }
a3621e74
YO
2755
2756 base = trie->states[ state ].trans.base;
2757
2758 DEBUG_TRIE_EXECUTE_r(
2759 PerlIO_printf( Perl_debug_log,
e4584336 2760 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2761 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2762 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2763 );
2764
2765 if ( base ) {
5b47454d 2766 switch (trie_type) {
3dab1dad 2767 case trie_utf8_fold:
a3621e74
YO
2768 if ( foldlen>0 ) {
2769 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2770 foldlen -= len;
2771 uscan += len;
2772 len=0;
2773 } else {
1df70142 2774 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2775 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2776 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2777 foldlen -= UNISKIP( uvc );
2778 uscan = foldbuf + UNISKIP( uvc );
2779 }
5b47454d
DM
2780 break;
2781 case trie_utf8:
2782 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2783 &len, uniflags );
2784 break;
2785 case trie_plain:
e4584336 2786 uvc = (UV)*uc;
a3621e74
YO
2787 len = 1;
2788 }
2789
5b47454d
DM
2790 if (uvc < 256) {
2791 charid = trie->charmap[ uvc ];
2792 }
2793 else {
2794 charid = 0;
2795 if (trie->widecharmap) {
3dab1dad 2796 SV** const svpp = hv_fetch(trie->widecharmap,
5b47454d
DM
2797 (char*)&uvc, sizeof(UV), 0);
2798 if (svpp)
2799 charid = (U16)SvIV(*svpp);
2800 }
2801 }
a3621e74 2802
5b47454d
DM
2803 if (charid &&
2804 (base + charid > trie->uniquecharcount )
2805 && (base + charid - 1 - trie->uniquecharcount
2806 < trie->lasttrans)
2807 && trie->trans[base + charid - 1 -
2808 trie->uniquecharcount].check == state)
2809 {
2810 state = trie->trans[base + charid - 1 -
2811 trie->uniquecharcount ].next;
2812 }
2813 else {
2814 state = 0;
2815 }
2816 uc += len;
2817
2818 }
2819 else {
a3621e74
YO
2820 state = 0;
2821 }
2822 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2823 PerlIO_printf( Perl_debug_log,
2824 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2825 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2826 );
2827 }
d8319b27 2828 if (!st->u.trie.accepted )
a3621e74 2829 sayNO;
a3621e74
YO
2830
2831 /*
2832 There was at least one accepting state that we
2833 transitioned through. Presumably the number of accepting
2834 states is going to be low, typically one or two. So we
2835 simply scan through to find the one with lowest wordnum.
2836 Once we find it, we swap the last state into its place
2837 and decrement the size. We then try to match the rest of
2838 the pattern at the point where the word ends, if we
2839 succeed then we end the loop, otherwise the loop
2840 eventually terminates once all of the accepting states
2841 have been tried.
2842 */
a3621e74 2843
d8319b27 2844 if ( st->u.trie.accepted == 1 ) {
a3621e74 2845 DEBUG_EXECUTE_r({
097eb12c 2846 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2847 PerlIO_printf( Perl_debug_log,
2848 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2849 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2850 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2851 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2852 PL_colors[5] );
2853 });
d8319b27 2854 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2855 /* in this case we free tmps/leave before we call regmatch
2856 as we wont be using accept_buff again. */
2857 FREETMPS;
2858 LEAVE;
95b24440
DM
2859 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2860 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2861 } else {
2862 DEBUG_EXECUTE_r(
e4584336 2863 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2864 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2865 PL_colors[5] );
2866 );
d8319b27 2867 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2868 U32 best = 0;
2869 U32 cur;
d8319b27 2870 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2871 DEBUG_TRIE_EXECUTE_r(
2872 PerlIO_printf( Perl_debug_log,
2873 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2874 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2875 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2876 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2877 );
a3621e74 2878
d8319b27
DM
2879 if (st->u.trie.accept_buff[cur].wordnum <
2880 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2881 best = cur;
a3621e74
YO
2882 }
2883 DEBUG_EXECUTE_r({
87830502 2884 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2885 rex->data->data[ARG(scan)];
d8319b27 2886 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
3dab1dad 2887 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at node #%d %s\n",
e4584336 2888 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2889 st->u.trie.accept_buff[best].wordnum,
3dab1dad 2890 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", REG_NODE_NUM(scan),
a3621e74
YO
2891 PL_colors[5] );
2892 });
d8319b27
DM
2893 if ( best<st->u.trie.accepted ) {
2894 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2895 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2896 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2897 best = st->u.trie.accepted;
a3621e74 2898 }
d8319b27 2899 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2900
2901 /*
2902 as far as I can tell we only need the SAVETMPS/FREETMPS
2903 for re's with EVAL in them but I'm leaving them in for
2904 all until I can be sure.
2905 */
2906 SAVETMPS;
95b24440
DM
2907 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2908 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2909 FREETMPS;
2910 }
2911 FREETMPS;
2912 LEAVE;
2913 }
2914
95b24440 2915 if (result) {
a3621e74
YO
2916 sayYES;
2917 } else {
2918 sayNO;
2919 }
3dab1dad 2920 }}
a3621e74 2921 /* unreached codepoint */
95b24440
DM
2922 case EXACT: {
2923 char *s = STRING(scan);
5d9a96ca 2924 st->ln = STR_LEN(scan);
eb160463 2925 if (do_utf8 != UTF) {
bc517b45 2926 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2927 char *l = locinput;
5d9a96ca 2928 const char *e = s + st->ln;
a72c7584 2929
5ff6fc6d
JH
2930 if (do_utf8) {
2931 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2932 while (s < e) {
a3b680e6 2933 STRLEN ulen;
1aa99e6b 2934 if (l >= PL_regeol)
5ff6fc6d
JH
2935 sayNO;
2936 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2937 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2938 uniflags))
5ff6fc6d 2939 sayNO;
bc517b45 2940 l += ulen;
5ff6fc6d 2941 s ++;
1aa99e6b 2942 }
5ff6fc6d
JH
2943 }
2944 else {
2945 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2946 while (s < e) {
a3b680e6 2947 STRLEN ulen;
1aa99e6b
IH
2948 if (l >= PL_regeol)
2949 sayNO;
5ff6fc6d 2950 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2951 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2952 uniflags))
1aa99e6b 2953 sayNO;
bc517b45 2954 s += ulen;
a72c7584 2955 l ++;
1aa99e6b 2956 }
5ff6fc6d 2957 }
1aa99e6b
IH
2958 locinput = l;
2959 nextchr = UCHARAT(locinput);
2960 break;
2961 }
bc517b45 2962 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2963 /* Inline the first character, for speed. */
2964 if (UCHARAT(s) != nextchr)
2965 sayNO;
5d9a96ca 2966 if (PL_regeol - locinput < st->ln)
d6a28714 2967 sayNO;
5d9a96ca 2968 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2969 sayNO;
5d9a96ca 2970 locinput += st->ln;
d6a28714
JH
2971 nextchr = UCHARAT(locinput);
2972 break;
95b24440 2973 }
d6a28714 2974 case EXACTFL:
b8c5462f
JH
2975 PL_reg_flags |= RF_tainted;
2976 /* FALL THROUGH */
95b24440
DM
2977 case EXACTF: {
2978 char *s = STRING(scan);
5d9a96ca 2979 st->ln = STR_LEN(scan);
d6a28714 2980
d07ddd77
JH
2981 if (do_utf8 || UTF) {
2982 /* Either target or the pattern are utf8. */
d6a28714 2983 char *l = locinput;
d07ddd77 2984 char *e = PL_regeol;
bc517b45 2985
5d9a96ca 2986 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2987 l, &e, 0, do_utf8)) {
5486206c
JH
2988 /* One more case for the sharp s:
2989 * pack("U0U*", 0xDF) =~ /ss/i,
2990 * the 0xC3 0x9F are the UTF-8
2991 * byte sequence for the U+00DF. */
2992 if (!(do_utf8 &&
2993 toLOWER(s[0]) == 's' &&
5d9a96ca 2994 st->ln >= 2 &&
5486206c
JH
2995 toLOWER(s[1]) == 's' &&
2996 (U8)l[0] == 0xC3 &&
2997 e - l >= 2 &&
2998 (U8)l[1] == 0x9F))
2999 sayNO;
3000 }
d07ddd77
JH
3001 locinput = e;
3002 nextchr = UCHARAT(locinput);
3003 break;
a0ed51b3 3004 }
d6a28714 3005
bc517b45
JH
3006 /* Neither the target and the pattern are utf8. */
3007
d6a28714
JH
3008 /* Inline the first character, for speed. */
3009 if (UCHARAT(s) != nextchr &&
3010 UCHARAT(s) != ((OP(scan) == EXACTF)
3011 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3012 sayNO;
5d9a96ca 3013 if (PL_regeol - locinput < st->ln)
b8c5462f 3014 sayNO;
5d9a96ca
DM
3015 if (st->ln > 1 && (OP(scan) == EXACTF
3016 ? ibcmp(s, locinput, st->ln)
3017 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 3018 sayNO;
5d9a96ca 3019 locinput += st->ln;
d6a28714 3020 nextchr = UCHARAT(locinput);
a0d0e21e 3021 break;
95b24440 3022 }
d6a28714 3023 case ANYOF:
ffc61ed2 3024 if (do_utf8) {
9e55ce06
JH
3025 STRLEN inclasslen = PL_regeol - locinput;
3026
32fc9b6a 3027 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 3028 sayNO_ANYOF;
ffc61ed2
JH
3029 if (locinput >= PL_regeol)
3030 sayNO;
0f0076b4 3031 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3032 nextchr = UCHARAT(locinput);
e0f9d4a8 3033 break;
ffc61ed2
JH
3034 }
3035 else {
3036 if (nextchr < 0)
3037 nextchr = UCHARAT(locinput);
32fc9b6a 3038 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 3039 sayNO_ANYOF;
ffc61ed2
JH
3040 if (!nextchr && locinput >= PL_regeol)
3041 sayNO;
3042 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3043 break;
3044 }
3045 no_anyof:
3046 /* If we might have the case of the German sharp s
3047 * in a casefolding Unicode character class. */
3048
ebc501f0
JH
3049 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3050 locinput += SHARP_S_SKIP;
e0f9d4a8 3051 nextchr = UCHARAT(locinput);
ffc61ed2 3052 }
e0f9d4a8
JH
3053 else
3054 sayNO;
b8c5462f 3055 break;
d6a28714 3056 case ALNUML:
b8c5462f
JH
3057 PL_reg_flags |= RF_tainted;
3058 /* FALL THROUGH */
d6a28714 3059 case ALNUM:
b8c5462f 3060 if (!nextchr)
4633a7c4 3061 sayNO;
ffc61ed2 3062 if (do_utf8) {
1a4fad37 3063 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3064 if (!(OP(scan) == ALNUM
bb7a0f54 3065 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3066 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3067 {
3068 sayNO;
a0ed51b3 3069 }
b8c5462f 3070 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3071 nextchr = UCHARAT(locinput);
3072 break;
3073 }
ffc61ed2 3074 if (!(OP(scan) == ALNUM
d6a28714 3075 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3076 sayNO;
b8c5462f 3077 nextchr = UCHARAT(++locinput);
a0d0e21e 3078 break;
d6a28714 3079 case NALNUML:
b8c5462f
JH
3080 PL_reg_flags |= RF_tainted;
3081 /* FALL THROUGH */
d6a28714
JH
3082 case NALNUM:
3083 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3084 sayNO;
ffc61ed2 3085 if (do_utf8) {
1a4fad37 3086 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3087 if (OP(scan) == NALNUM
bb7a0f54 3088 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3089 : isALNUM_LC_utf8((U8*)locinput))
3090 {
b8c5462f 3091 sayNO;
d6a28714 3092 }
b8c5462f
JH
3093 locinput += PL_utf8skip[nextchr];
3094 nextchr = UCHARAT(locinput);
3095 break;
3096 }
ffc61ed2 3097 if (OP(scan) == NALNUM
d6a28714 3098 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3099 sayNO;
76e3520e 3100 nextchr = UCHARAT(++locinput);
a0d0e21e 3101 break;
d6a28714
JH
3102 case BOUNDL:
3103 case NBOUNDL:
3280af22 3104 PL_reg_flags |= RF_tainted;
bbce6d69 3105 /* FALL THROUGH */
d6a28714
JH
3106 case BOUND:
3107 case NBOUND:
3108 /* was last char in word? */
ffc61ed2 3109 if (do_utf8) {
12d33761 3110 if (locinput == PL_bostr)
5d9a96ca 3111 st->ln = '\n';
ffc61ed2 3112 else {
a3b680e6 3113 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3114
4ad0818d 3115 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3116 }
3117 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3118 st->ln = isALNUM_uni(st->ln);
1a4fad37 3119 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3120 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3121 }
3122 else {
5d9a96ca 3123 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3124 n = isALNUM_LC_utf8((U8*)locinput);
3125 }
a0ed51b3 3126 }
d6a28714 3127 else {
5d9a96ca 3128 st->ln = (locinput != PL_bostr) ?
12d33761 3129 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3130 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3131 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3132 n = isALNUM(nextchr);
3133 }
3134 else {
5d9a96ca 3135 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3136 n = isALNUM_LC(nextchr);
3137 }
d6a28714 3138 }
5d9a96ca 3139 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3140 OP(scan) == BOUNDL))
3141 sayNO;
a0ed51b3 3142 break;
d6a28714 3143 case SPACEL:
3280af22 3144 PL_reg_flags |= RF_tainted;
bbce6d69 3145 /* FALL THROUGH */
d6a28714 3146 case SPACE:
9442cb0e 3147 if (!nextchr)
4633a7c4 3148 sayNO;
1aa99e6b 3149 if (do_utf8) {
fd400ab9 3150 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3151 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3152 if (!(OP(scan) == SPACE
bb7a0f54 3153 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3154 : isSPACE_LC_utf8((U8*)locinput)))
3155 {
3156 sayNO;
3157 }
3158 locinput += PL_utf8skip[nextchr];
3159 nextchr = UCHARAT(locinput);
3160 break;
d6a28714 3161 }
ffc61ed2
JH
3162 if (!(OP(scan) == SPACE
3163 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3164 sayNO;
3165 nextchr = UCHARAT(++locinput);
3166 }
3167 else {
3168 if (!(OP(scan) == SPACE
3169 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3170 sayNO;
3171 nextchr = UCHARAT(++locinput);
a0ed51b3 3172 }
a0ed51b3 3173 break;
d6a28714 3174 case NSPACEL:
3280af22 3175 PL_reg_flags |= RF_tainted;
bbce6d69 3176 /* FALL THROUGH */
d6a28714 3177 case NSPACE:
9442cb0e 3178 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3179 sayNO;
1aa99e6b 3180 if (do_utf8) {
1a4fad37 3181 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3182 if (OP(scan) == NSPACE
bb7a0f54 3183 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3184 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3185 {
3186 sayNO;
3187 }
3188 locinput += PL_utf8skip[nextchr];
3189 nextchr = UCHARAT(locinput);
3190 break;
a0ed51b3 3191 }
ffc61ed2 3192 if (OP(scan) == NSPACE
d6a28714 3193 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3194 sayNO;
76e3520e 3195 nextchr = UCHARAT(++locinput);
a0d0e21e 3196 break;
d6a28714 3197 case DIGITL:
a0ed51b3
LW
3198 PL_reg_flags |= RF_tainted;
3199 /* FALL THROUGH */
d6a28714 3200 case DIGIT:
9442cb0e 3201 if (!nextchr)
a0ed51b3 3202 sayNO;
1aa99e6b 3203 if (do_utf8) {
1a4fad37 3204 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3205 if (!(OP(scan) == DIGIT
bb7a0f54 3206 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3207 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3208 {
a0ed51b3 3209 sayNO;
dfe13c55 3210 }
6f06b55f 3211 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3212 nextchr = UCHARAT(locinput);
3213 break;
3214 }
ffc61ed2 3215 if (!(OP(scan) == DIGIT
9442cb0e 3216 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3217 sayNO;
3218 nextchr = UCHARAT(++locinput);
3219 break;
d6a28714 3220 case NDIGITL:
b8c5462f
JH
3221 PL_reg_flags |= RF_tainted;
3222 /* FALL THROUGH */
d6a28714 3223 case NDIGIT:
9442cb0e 3224 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3225 sayNO;
1aa99e6b 3226 if (do_utf8) {
1a4fad37 3227 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3228 if (OP(scan) == NDIGIT
bb7a0f54 3229 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3230 : isDIGIT_LC_utf8((U8*)locinput))
3231 {
a0ed51b3 3232 sayNO;
9442cb0e 3233 }
6f06b55f 3234 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3235 nextchr = UCHARAT(locinput);
3236 break;
3237 }
ffc61ed2 3238 if (OP(scan) == NDIGIT
9442cb0e 3239 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3240 sayNO;
3241 nextchr = UCHARAT(++locinput);
3242 break;
3243 case CLUMP:
b7c83a7e 3244 if (locinput >= PL_regeol)
a0ed51b3 3245 sayNO;
b7c83a7e 3246 if (do_utf8) {
1a4fad37 3247 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3248 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3249 sayNO;
3250 locinput += PL_utf8skip[nextchr];
3251 while (locinput < PL_regeol &&
3252 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3253 locinput += UTF8SKIP(locinput);
3254 if (locinput > PL_regeol)
3255 sayNO;
eb08e2da
JH
3256 }
3257 else
3258 locinput++;
a0ed51b3
LW
3259 nextchr = UCHARAT(locinput);
3260 break;
c8756f30 3261 case REFFL:
3280af22 3262 PL_reg_flags |= RF_tainted;
c8756f30 3263 /* FALL THROUGH */
c277df42 3264 case REF:
95b24440
DM
3265 case REFF: {
3266 char *s;
c277df42 3267 n = ARG(scan); /* which paren pair */
5d9a96ca 3268 st->ln = PL_regstartp[n];
2c2d71f5 3269 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3270 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3271 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3272 if (st->ln == PL_regendp[n])
a0d0e21e 3273 break;
a0ed51b3 3274
5d9a96ca 3275 s = PL_bostr + st->ln;
1aa99e6b 3276 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3277 char *l = locinput;
a3b680e6 3278 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3279 /*
3280 * Note that we can't do the "other character" lookup trick as
3281 * in the 8-bit case (no pun intended) because in Unicode we
3282 * have to map both upper and title case to lower case.
3283 */
3284 if (OP(scan) == REFF) {
3285 while (s < e) {
a3b680e6
AL
3286 STRLEN ulen1, ulen2;
3287 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3288 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3289
a0ed51b3
LW
3290 if (l >= PL_regeol)
3291 sayNO;
a2a2844f
JH
3292 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3293 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3294 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3295 sayNO;
a2a2844f
JH
3296 s += ulen1;
3297 l += ulen2;
a0ed51b3
LW
3298 }
3299 }
3300 locinput = l;
3301 nextchr = UCHARAT(locinput);
3302 break;
3303 }
3304
a0d0e21e 3305 /* Inline the first character, for speed. */
76e3520e 3306 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3307 (OP(scan) == REF ||
3308 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3309 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3310 sayNO;
5d9a96ca
DM
3311 st->ln = PL_regendp[n] - st->ln;
3312 if (locinput + st->ln > PL_regeol)
4633a7c4 3313 sayNO;
5d9a96ca
DM
3314 if (st->ln > 1 && (OP(scan) == REF
3315 ? memNE(s, locinput, st->ln)
c8756f30 3316 : (OP(scan) == REFF
5d9a96ca
DM
3317 ? ibcmp(s, locinput, st->ln)
3318 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3319 sayNO;
5d9a96ca 3320 locinput += st->ln;
76e3520e 3321 nextchr = UCHARAT(locinput);
a0d0e21e 3322 break;
95b24440 3323 }
a0d0e21e
LW
3324
3325 case NOTHING:
c277df42 3326 case TAIL:
a0d0e21e
LW
3327 break;
3328 case BACK:
3329 break;
c277df42
IZ
3330 case EVAL:
3331 {
c277df42 3332 SV *ret;
8e5e9ebe 3333 {
4aabdb9b
DM
3334 /* execute the code in the {...} */
3335 dSP;
6136c704 3336 SV ** const before = SP;
4aabdb9b
DM
3337 OP_4tree * const oop = PL_op;
3338 COP * const ocurcop = PL_curcop;
3339 PAD *old_comppad;
4aabdb9b
DM
3340
3341 n = ARG(scan);
32fc9b6a 3342 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3343 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3344 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3345 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3346
8e5e9ebe
RGS
3347 CALLRUNOPS(aTHX); /* Scalar context. */
3348 SPAGAIN;
3349 if (SP == before)
075aa684 3350 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3351 else {
3352 ret = POPs;
3353 PUTBACK;
3354 }
4aabdb9b
DM
3355
3356 PL_op = oop;
3357 PAD_RESTORE_LOCAL(old_comppad);
3358 PL_curcop = ocurcop;
3359 if (!st->logical) {
3360 /* /(?{...})/ */
3361 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3362 break;
3363 }
8e5e9ebe 3364 }
4aabdb9b
DM
3365 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3366 regexp *re;
4aabdb9b 3367 {
4f639d21
DM
3368 /* extract RE object from returned value; compiling if
3369 * necessary */
3370
6136c704 3371 MAGIC *mg = NULL;
4aabdb9b 3372 SV *sv;
faf82a0b
AE
3373 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3374 mg = mg_find(sv, PERL_MAGIC_qr);
3375 else if (SvSMAGICAL(ret)) {
3376 if (SvGMAGICAL(ret))
3377 sv_unmagic(ret, PERL_MAGIC_qr);
3378 else
3379 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3380 }
faf82a0b 3381
0f5d15d6
IZ
3382 if (mg) {
3383 re = (regexp *)mg->mg_obj;
df0003d4 3384 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3385 }
3386 else {
3387 STRLEN len;
6136c704 3388 const char * const t = SvPV_const(ret, len);
0f5d15d6 3389 PMOP pm;
a3b680e6 3390 const I32 osize = PL_regsize;
0f5d15d6 3391
5fcd1c1b 3392 Zero(&pm, 1, PMOP);
4aabdb9b 3393 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3394 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3395 if (!(SvFLAGS(ret)
faf82a0b
AE
3396 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3397 | SVs_GMG)))
14befaf4
DM
3398 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3399 PERL_MAGIC_qr,0,0);
0f5d15d6 3400 PL_regsize = osize;
0f5d15d6 3401 }
4aabdb9b 3402 }
aa283a38
DM
3403
3404 /* run the pattern returned from (??{...}) */
3405
4aabdb9b
DM
3406 DEBUG_EXECUTE_r(
3407 PerlIO_printf(Perl_debug_log,
3408 "Entering embedded \"%s%.60s%s%s\"\n",
3409 PL_colors[0],
3410 re->precomp,
3411 PL_colors[1],
3412 (strlen(re->precomp) > 60 ? "..." : ""))
3413 );
2c2d71f5 3414
4aabdb9b
DM
3415 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3416 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3417 *PL_reglastparen = 0;
3418 *PL_reglastcloseparen = 0;
4aabdb9b 3419 PL_reginput = locinput;
4aabdb9b
DM
3420
3421 /* XXXX This is too dramatic a measure... */
3422 PL_reg_maxiter = 0;
3423
5d9a96ca 3424 st->logical = 0;
aa283a38
DM
3425 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3426 ((re->reganch & ROPT_UTF8) != 0);
3427 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3428 st->u.eval.prev_rex = rex;
aa283a38 3429 rex = re;
aa283a38 3430
77cb431f
DM
3431 /* resume to current state on success */
3432 st->u.yes.prev_yes_state = yes_state;
3433 yes_state = st;
aa283a38
DM
3434 PUSH_STATE(newst, resume_EVAL);
3435 st = newst;
3436
3437 /* now continue from first node in postoned RE */
3438 next = re->program + 1;
3439 break;
4aabdb9b 3440 /* NOTREACHED */
a0ed51b3 3441 }
4aabdb9b
DM
3442 /* /(?(?{...})X|Y)/ */
3443 st->sw = SvTRUE(ret);
3444 st->logical = 0;
c277df42
IZ
3445 break;
3446 }
a0d0e21e 3447 case OPEN:
c277df42 3448 n = ARG(scan); /* which paren pair */
3280af22
NIS
3449 PL_reg_start_tmp[n] = locinput;
3450 if (n > PL_regsize)
3451 PL_regsize = n;
a0d0e21e
LW
3452 break;
3453 case CLOSE:
c277df42 3454 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3455 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3456 PL_regendp[n] = locinput - PL_bostr;
eb160463 3457 if (n > (I32)*PL_reglastparen)
3280af22 3458 *PL_reglastparen = n;
a01268b5 3459 *PL_reglastcloseparen = n;
a0d0e21e 3460 break;
c277df42
IZ
3461 case GROUPP:
3462 n = ARG(scan); /* which paren pair */
5d9a96ca 3463 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3464 break;
3465 case IFTHEN:
2c2d71f5 3466 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3467 if (st->sw)
c277df42
IZ
3468 next = NEXTOPER(NEXTOPER(scan));
3469 else {
3470 next = scan + ARG(scan);
3471 if (OP(next) == IFTHEN) /* Fake one. */
3472 next = NEXTOPER(NEXTOPER(next));
3473 }
3474 break;
3475 case LOGICAL:
5d9a96ca 3476 st->logical = scan->flags;
c277df42 3477 break;
2ab05381 3478/*******************************************************************
a0374537
DM
3479 cc points to the regmatch_state associated with the most recent CURLYX.
3480 This struct contains info about the innermost (...)* loop (an
3481 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3482
3483 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3484
95b24440 3485 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3486
a0374537 3487 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3488 with the starting point at WHILEM node;
2ab05381
IZ
3489
3490 3) Each hit of WHILEM node tries to match A and Z (in the order
3491 depending on the current iteration, min/max of {min,max} and
3492 greediness). The information about where are nodes for "A"
a0374537 3493 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3494 was already matched, and greediness.
3495
3496 4) After A matches, the same WHILEM node is hit again.
3497
95b24440 3498 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3499 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3500 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3501 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3502 of the external loop.
3503
a0374537 3504 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3505 and whatever it mentions via ->next, and additional attached trees
3506 corresponding to temporarily unset infoblocks as in "5" above.
3507
95b24440 3508 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3509 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3510 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3511 infoblocks are drawn below the "reset" infoblock.
3512
3513 In fact in the picture below we do not show failed matches for Z and T
3514 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3515 more obvious *why* one needs to *temporary* unset infoblocks.]
3516
3517 Matched REx position InfoBlocks Comment
3518 (Y(A)*?Z)*?T x
3519 Y(A)*?Z)*?T x <- O
3520 Y (A)*?Z)*?T x <- O
3521 Y A)*?Z)*?T x <- O <- I
3522 YA )*?Z)*?T x <- O <- I
3523 YA A)*?Z)*?T x <- O <- I
3524 YAA )*?Z)*?T x <- O <- I
3525 YAA Z)*?T x <- O # Temporary unset I
3526 I
3527
3528 YAAZ Y(A)*?Z)*?T x <- O
3529 I
3530
3531 YAAZY (A)*?Z)*?T x <- O
3532 I
3533
3534 YAAZY A)*?Z)*?T x <- O <- I
3535 I
3536
3537 YAAZYA )*?Z)*?T x <- O <- I
3538 I
3539
3540 YAAZYA Z)*?T x <- O # Temporary unset I
3541 I,I
3542
3543 YAAZYAZ )*?T x <- O
3544 I,I
3545
3546 YAAZYAZ T x # Temporary unset O
3547 O
3548 I,I
3549
3550 YAAZYAZT x
3551 O
3552 I,I
3553 *******************************************************************/
95b24440 3554
a0d0e21e 3555 case CURLYX: {
cb434fcc
IZ
3556 /* No need to save/restore up to this paren */
3557 I32 parenfloor = scan->flags;
c277df42 3558
c2b7afd3
NC
3559 /* Dave says:
3560
3561 CURLYX and WHILEM are always paired: they're the moral
3562 equivalent of pp_enteriter anbd pp_iter.
3563
3564 The only time next could be null is if the node tree is
3565 corrupt. This was mentioned on p5p a few days ago.
3566
3567 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3568 So we'll assert that this is true:
3569 */
3570 assert(next);
30b2893d 3571 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
c277df42 3572 next += ARG(next);
cb434fcc
IZ
3573 /* XXXX Probably it is better to teach regpush to support
3574 parenfloor > PL_regsize... */
eb160463 3575 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3576 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3577
d8319b27
DM
3578 st->u.curlyx.cp = PL_savestack_ix;
3579 st->u.curlyx.outercc = st->cc;
a0374537
DM
3580 st->cc = st;
3581 /* these fields contain the state of the current curly.
3582 * they are accessed by subsequent WHILEMs;
3583 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3584 st->u.curlyx.parenfloor = parenfloor;
3585 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3586 st->u.curlyx.min = ARG1(scan);
3587 st->u.curlyx.max = ARG2(scan);
3588 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3589 st->u.curlyx.lastloc = 0;
a0374537
DM
3590 /* st->next and st->minmod are also read by WHILEM */
3591
3280af22 3592 PL_reginput = locinput;
95b24440
DM
3593 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3594 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3595 regcpblow(st->u.curlyx.cp);
3596 st->cc = st->u.curlyx.outercc;
95b24440 3597 saySAME(result);
a0d0e21e 3598 }
5f66b61c 3599 /* NOTREACHED */
a0d0e21e
LW
3600 case WHILEM: {
3601 /*
3602 * This is really hard to understand, because after we match
3603 * what we're trying to match, we must make sure the rest of
2c2d71f5 3604 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3605 * to go back UP the parse tree by recursing ever deeper. And
3606 * if it fails, we have to reset our parent's current state
3607 * that we can try again after backing off.
3608 */
3609
c2b7afd3
NC
3610 /* Dave says:
3611
3612 st->cc gets initialised by CURLYX ready for use by WHILEM.
3613 So again, unless somethings been corrupted, st->cc cannot
3614 be null at that point in WHILEM.
3615
3616 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3617 So we'll assert that this is true:
3618 */
3619 assert(st->cc);
d8319b27
DM
3620 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3621 st->u.whilem.cache_offset = 0;
3622 st->u.whilem.cache_bit = 0;
c277df42 3623
d8319b27 3624 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3625 PL_reginput = locinput;
a0d0e21e 3626
a3621e74 3627 DEBUG_EXECUTE_r(
9041c2e3 3628 PerlIO_printf(Perl_debug_log,
91f3b821 3629 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3630 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3631 (long)n, (long)st->cc->u.curlyx.min,
3632 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3633 );
4633a7c4 3634
a0d0e21e
LW
3635 /* If degenerate scan matches "", assume scan done. */
3636
d8319b27
DM
3637 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3638 st->u.whilem.savecc = st->cc;
3639 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3640 if (st->cc)
d8319b27 3641 st->ln = st->cc->u.curlyx.cur;
a3621e74 3642 DEBUG_EXECUTE_r(
c3464db5
DD
3643 PerlIO_printf(Perl_debug_log,
3644 "%*s empty match detected, try continuation...\n",
3280af22 3645 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3646 );
d8319b27 3647 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3648 /*** all unsaved local vars undefined at this point */
d8319b27 3649 st->cc = st->u.whilem.savecc;
95b24440 3650 if (result)
4633a7c4 3651 sayYES;
d8319b27
DM
3652 if (st->cc->u.curlyx.outercc)
3653 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3654 sayNO;
a0d0e21e
LW
3655 }
3656
3657 /* First just match a string of min scans. */
3658
d8319b27
DM
3659 if (n < st->cc->u.curlyx.min) {
3660 st->cc->u.curlyx.cur = n;
3661 st->cc->u.curlyx.lastloc = locinput;
3662 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3663 /*** all unsaved local vars undefined at this point */
3664 if (result)
4633a7c4 3665 sayYES;
d8319b27
DM
3666 st->cc->u.curlyx.cur = n - 1;
3667 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3668 sayNO;
a0d0e21e
LW
3669 }
3670
2c2d71f5
JH
3671 if (scan->flags) {
3672 /* Check whether we already were at this position.
3673 Postpone detection until we know the match is not
3674 *that* much linear. */
3675 if (!PL_reg_maxiter) {
3676 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
3677 /* possible overflow for long strings and many CURLYX's */
3678 if (PL_reg_maxiter < 0)
3679 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
3680 PL_reg_leftiter = PL_reg_maxiter;
3681 }
3682 if (PL_reg_leftiter-- == 0) {
a3b680e6 3683 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3684 if (PL_reg_poscache) {
eb160463 3685 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3686 Renew(PL_reg_poscache, size, char);
3687 PL_reg_poscache_size = size;
3688 }
3689 Zero(PL_reg_poscache, size, char);
3690 }
3691 else {
3692 PL_reg_poscache_size = size;
a02a5408 3693 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3694 }
a3621e74 3695 DEBUG_EXECUTE_r(
2c2d71f5
JH
3696 PerlIO_printf(Perl_debug_log,
3697 "%sDetected a super-linear match, switching on caching%s...\n",
3698 PL_colors[4], PL_colors[5])
3699 );
3700 }
3701 if (PL_reg_leftiter < 0) {
d8319b27 3702 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3703
d8319b27
DM
3704 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3705 + st->u.whilem.cache_offset * (scan->flags>>4);
3706 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3707 st->u.whilem.cache_offset /= 8;
3708 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3709 DEBUG_EXECUTE_r(
2c2d71f5
JH
3710 PerlIO_printf(Perl_debug_log,
3711 "%*s already tried at this position...\n",
3712 REPORT_CODE_OFF+PL_regindent*2, "")
3713 );
3ab3c9b4
HS
3714 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3715 /* cache records success */
c2b0868c
HS
3716 sayYES;
3717 else
3ab3c9b4 3718 /* cache records failure */
c2b0868c 3719 sayNO_SILENT;
2c2d71f5 3720 }
2c2d71f5
JH
3721 }
3722 }
3723
a0d0e21e
LW
3724 /* Prefer next over scan for minimal matching. */
3725
5d9a96ca 3726 if (st->cc->minmod) {
d8319b27
DM
3727 st->u.whilem.savecc = st->cc;
3728 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3729 if (st->cc)
d8319b27
DM
3730 st->ln = st->cc->u.curlyx.cur;
3731 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3732 REGCP_SET(st->u.whilem.lastcp);
3733 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3734 /*** all unsaved local vars undefined at this point */
d8319b27 3735 st->cc = st->u.whilem.savecc;
95b24440 3736 if (result) {
d8319b27 3737 regcpblow(st->u.whilem.cp);
3ab3c9b4 3738 CACHEsayYES; /* All done. */
5f05dabc 3739 }
d8319b27 3740 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3741 regcppop(rex);
d8319b27
DM
3742 if (st->cc->u.curlyx.outercc)
3743 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3744
d8319b27 3745 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3746 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3747 && !(PL_reg_flags & RF_warned)) {
3748 PL_reg_flags |= RF_warned;
9014280d 3749 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3750 "Complex regular subexpression recursion",
3751 REG_INFTY - 1);
c277df42 3752 }
3ab3c9b4 3753 CACHEsayNO;
c277df42 3754 }
a687059c 3755
a3621e74 3756 DEBUG_EXECUTE_r(
c3464db5
DD
3757 PerlIO_printf(Perl_debug_log,
3758 "%*s trying longer...\n",
3280af22 3759 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3760 );
a0d0e21e 3761 /* Try scanning more and see if it helps. */
3280af22 3762 PL_reginput = locinput;
d8319b27
DM
3763 st->cc->u.curlyx.cur = n;
3764 st->cc->u.curlyx.lastloc = locinput;
3765 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3766 REGCP_SET(st->u.whilem.lastcp);
3767 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3768 /*** all unsaved local vars undefined at this point */
3769 if (result) {
d8319b27 3770 regcpblow(st->u.whilem.cp);
3ab3c9b4 3771 CACHEsayYES;
5f05dabc 3772 }
d8319b27 3773 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3774 regcppop(rex);
d8319b27
DM
3775 st->cc->u.curlyx.cur = n - 1;
3776 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3777 CACHEsayNO;
a0d0e21e
LW
3778 }
3779
3780 /* Prefer scan over next for maximal matching. */
3781
d8319b27
DM
3782 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3783 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3784 st->cc->u.curlyx.cur = n;
3785 st->cc->u.curlyx.lastloc = locinput;
3786 REGCP_SET(st->u.whilem.lastcp);
3787 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3788 /*** all unsaved local vars undefined at this point */
3789 if (result) {
d8319b27 3790 regcpblow(st->u.whilem.cp);
3ab3c9b4 3791 CACHEsayYES;
5f05dabc 3792 }
d8319b27 3793 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3794 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3795 PL_reginput = locinput;
a3621e74 3796 DEBUG_EXECUTE_r(
c3464db5
DD
3797 PerlIO_printf(Perl_debug_log,
3798 "%*s failed, try continuation...\n",
3280af22 3799 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3800 );
3801 }
9041c2e3 3802 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3803 && !(PL_reg_flags & RF_warned)) {
3280af22 3804 PL_reg_flags |= RF_warned;
9014280d 3805 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3806 "Complex regular subexpression recursion",
3807 REG_INFTY - 1);
a0d0e21e
LW
3808 }
3809
3810 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3811 st->u.whilem.savecc = st->cc;
3812 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3813 if (st->cc)
d8319b27
DM
3814 st->ln = st->cc->u.curlyx.cur;
3815 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3816 /*** all unsaved local vars undefined at this point */
d8319b27 3817 st->cc = st->u.whilem.savecc;
95b24440 3818 if (result)
3ab3c9b4 3819 CACHEsayYES;
d8319b27
DM
3820 if (st->cc->u.curlyx.outercc)
3821 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3822 st->cc->u.curlyx.cur = n - 1;
3823 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3824 CACHEsayNO;
a0d0e21e 3825 }
5f66b61c 3826 /* NOTREACHED */
9041c2e3 3827 case BRANCHJ:
c277df42
IZ
3828 next = scan + ARG(scan);
3829 if (next == scan)
3830 next = NULL;
3831 inner = NEXTOPER(NEXTOPER(scan));
3832 goto do_branch;
9041c2e3 3833 case BRANCH:
c277df42
IZ
3834 inner = NEXTOPER(scan);
3835 do_branch:
3836 {
e822a8b4
DM
3837 I32 type;
3838 type = OP(scan);
ae5031b3 3839 if (!next || OP(next) != type) /* No choice. */
c277df42 3840 next = inner; /* Avoid recursion. */
a0d0e21e 3841 else {
a3b680e6 3842 const I32 lastparen = *PL_reglastparen;
02db2b7b 3843 /* Put unwinding data on stack */
6136c704
AL
3844 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3845 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3846
5d9a96ca
DM
3847 uw->prev = st->unwind;
3848 st->unwind = unwind1;
e822a8b4 3849 uw->type = ((type == BRANCH)
02db2b7b
IZ
3850 ? RE_UNWIND_BRANCH
3851 : RE_UNWIND_BRANCHJ);
3852 uw->lastparen = lastparen;
3853 uw->next = next;
3854 uw->locinput = locinput;
3855 uw->nextchr = nextchr;
3a2830be 3856 uw->minmod = st->minmod;
02db2b7b
IZ
3857#ifdef DEBUGGING
3858 uw->regindent = ++PL_regindent;
3859#endif
c277df42 3860
02db2b7b
IZ
3861 REGCP_SET(uw->lastcp);
3862
3863 /* Now go into the first branch */
3864 next = inner;
a687059c 3865 }
a0d0e21e
LW
3866 }
3867 break;
3868 case MINMOD:
5d9a96ca 3869 st->minmod = 1;
a0d0e21e 3870 break;
c277df42
IZ
3871 case CURLYM:
3872 {
d8319b27 3873 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3874
c277df42 3875 /* We suppose that the next guy does not need
0e788c72 3876 backtracking: in particular, it is of constant non-zero length,
c277df42 3877 and has no parenths to influence future backrefs. */
5d9a96ca 3878 st->ln = ARG1(scan); /* min to match */
c277df42 3879 n = ARG2(scan); /* max to match */
d8319b27
DM
3880 st->u.curlym.paren = scan->flags;
3881 if (st->u.curlym.paren) {
3882 if (st->u.curlym.paren > PL_regsize)
3883 PL_regsize = st->u.curlym.paren;
3884 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3885 *PL_reglastparen = st->u.curlym.paren;
c277df42 3886 }
dc45a647 3887 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3888 if (st->u.curlym.paren)
c277df42 3889 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3890 PL_reginput = locinput;
d8319b27 3891 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
0cadcf80
DM
3892 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
3893 /* resume to current state on success */
3894 st->u.yes.prev_yes_state = yes_state;
3895 yes_state = st;
3896 REGMATCH(scan, CURLYM1);
3897 yes_state = st->u.yes.prev_yes_state;
3898 /*** all unsaved local vars undefined at this point */
3899 if (!result)
3900 break;
3901 /* on first match, determine length, u.curlym.l */
3902 if (!st->u.curlym.matches++) {
3903 if (PL_reg_match_utf8) {
3904 char *s = locinput;
3905 while (s < PL_reginput) {
3906 st->u.curlym.l++;
3907 s += UTF8SKIP(s);
6407bf3b
DM
3908 }
3909 }
0cadcf80
DM
3910 else {
3911 st->u.curlym.l = PL_reginput - locinput;
3912 }
3913 if (st->u.curlym.l == 0) {
3914 st->u.curlym.matches = st->u.curlym.maxwanted;
3915 break;
3916 }
6407bf3b 3917 }
0cadcf80 3918 locinput = PL_reginput;
6407bf3b
DM
3919 }
3920
3921 PL_reginput = locinput;
0cadcf80 3922 if (st->u.curlym.matches < st->ln) {
5d9a96ca 3923 st->minmod = 0;
0cadcf80
DM
3924 sayNO;
3925 }
5f80c4cf 3926
0cadcf80
DM
3927 DEBUG_EXECUTE_r(
3928 PerlIO_printf(Perl_debug_log,
3929 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3930 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3931 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
3932 );
3933
3934 /* calculate c1 and c1 for possible match of 1st char
3935 * following curly */
9e137952 3936 st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
0cadcf80
DM
3937 if (HAS_TEXT(next) || JUMPABLE(next)) {
3938 regnode *text_node = next;
3dab1dad
YO
3939 if (! HAS_TEXT(text_node))
3940 FIND_NEXT_IMPT(text_node);
0cadcf80 3941 if (HAS_TEXT(text_node)
3dab1dad 3942 && PL_regkind[OP(text_node)] != REF)
0cadcf80
DM
3943 {
3944 st->u.curlym.c1 = (U8)*STRING(text_node);
3945 st->u.curlym.c2 =
3946 (OP(text_node) == EXACTF || OP(text_node) == REFF)
3947 ? PL_fold[st->u.curlym.c1]
3948 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3949 ? PL_fold_locale[st->u.curlym.c1]
3950 : st->u.curlym.c1;
3951 }
3952 }
5f80c4cf 3953
0cadcf80
DM
3954 REGCP_SET(st->u.curlym.lastcp);
3955
3956 st->u.curlym.minmod = st->minmod;
3957 st->minmod = 0;
3958 while (st->u.curlym.matches >= st->ln
3959 && (st->u.curlym.matches <= n
3960 /* for REG_INFTY, ln could overflow to negative */
3961 || (n == REG_INFTY && st->u.curlym.matches >= 0)))
3962 {
3963 /* If it could work, try it. */
9e137952 3964 if (st->u.curlym.c1 == CHRTEST_VOID ||
0cadcf80
DM
3965 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3966 UCHARAT(PL_reginput) == st->u.curlym.c2)
3967 {
3968 DEBUG_EXECUTE_r(
3969 PerlIO_printf(Perl_debug_log,
3970 "%*s trying tail with matches=%"IVdf"...\n",
3971 (int)(REPORT_CODE_OFF+PL_regindent*2),
3972 "", (IV)st->u.curlym.matches)
3973 );
3974 if (st->u.curlym.paren) {
3975 if (st->u.curlym.matches) {
3976 PL_regstartp[st->u.curlym.paren]
3977 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3978 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
cca55fe3 3979 }
5f80c4cf 3980 else
0cadcf80 3981 PL_regendp[st->u.curlym.paren] = -1;
5f80c4cf 3982 }
0cadcf80
DM
3983 /* resume to current state on success */
3984 st->u.yes.prev_yes_state = yes_state;
3985 yes_state = st;
3986 REGMATCH(next, CURLYM2);
3987 yes_state = st->u.yes.prev_yes_state;
3988 /*** all unsaved local vars undefined at this point */
3989 if (result)
3990 /* XXX tmp sayYES; */
3991 sayYES_FINAL;
3992 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3993 }
0cadcf80
DM
3994 /* Couldn't or didn't -- move forward/backward. */
3995 if (st->u.curlym.minmod) {
3280af22 3996 PL_reginput = locinput;
dad79028
DM
3997 /* resume to current state on success */
3998 st->u.yes.prev_yes_state = yes_state;
3999 yes_state = st;
95b24440 4000 REGMATCH(scan, CURLYM3);
dad79028 4001 yes_state = st->u.yes.prev_yes_state;
95b24440
DM
4002 /*** all unsaved local vars undefined at this point */
4003 if (result) {
0cadcf80 4004 st->u.curlym.matches++;
3280af22 4005 locinput = PL_reginput;
c277df42
IZ
4006 }
4007 else
4008 sayNO;
4009 }
0cadcf80 4010 else {
d8319b27
DM
4011 st->u.curlym.matches--;
4012 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 4013 PL_reginput = locinput;
c277df42
IZ
4014 }
4015 }
4016 sayNO;
5f66b61c 4017 /* NOTREACHED */
c277df42
IZ
4018 break;
4019 }
4020 case CURLYN:
d8319b27
DM
4021 st->u.plus.paren = scan->flags; /* Which paren to set */
4022 if (st->u.plus.paren > PL_regsize)
4023 PL_regsize = st->u.plus.paren;
4024 if (st->u.plus.paren > (I32)*PL_reglastparen)
4025 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4026 st->ln = ARG1(scan); /* min to match */
c277df42 4027 n = ARG2(scan); /* max to match */
dc45a647 4028 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4029 goto repeat;
a0d0e21e 4030 case CURLY:
d8319b27 4031 st->u.plus.paren = 0;
5d9a96ca 4032 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4033 n = ARG2(scan); /* max to match */
dc45a647 4034 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4035 goto repeat;
4036 case STAR:
5d9a96ca 4037 st->ln = 0;
c277df42 4038 n = REG_INFTY;
a0d0e21e 4039 scan = NEXTOPER(scan);
d8319b27 4040 st->u.plus.paren = 0;
a0d0e21e
LW
4041 goto repeat;
4042 case PLUS:
5d9a96ca 4043 st->ln = 1;
c277df42
IZ
4044 n = REG_INFTY;
4045 scan = NEXTOPER(scan);
d8319b27 4046 st->u.plus.paren = 0;
c277df42 4047 repeat:
a0d0e21e
LW
4048 /*
4049 * Lookahead to avoid useless match attempts
4050 * when we know what character comes next.
4051 */
5f80c4cf
JP
4052
4053 /*
4054 * Used to only do .*x and .*?x, but now it allows
4055 * for )'s, ('s and (?{ ... })'s to be in the way
4056 * of the quantifier and the EXACT-like node. -- japhy
4057 */
4058
cca55fe3 4059 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4060 U8 *s;
4061 regnode *text_node = next;
4062
3dab1dad
YO
4063 if (! HAS_TEXT(text_node))
4064 FIND_NEXT_IMPT(text_node);
5f80c4cf 4065
9e137952
DM
4066 if (! HAS_TEXT(text_node))
4067 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
5f80c4cf 4068 else {
3dab1dad 4069 if (PL_regkind[OP(text_node)] == REF) {
9e137952 4070 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
44a68960 4071 goto assume_ok_easy;
cca55fe3
JP
4072 }
4073 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4074
4075 if (!UTF) {
d8319b27 4076 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4077 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4078 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4079 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4080 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4081 }
5f80c4cf 4082 else { /* UTF */
f65d3ee7 4083 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4084 STRLEN ulen1, ulen2;
89ebb4a3
JH
4085 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4086 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4087
4088 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4089 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4090
d8319b27 4091 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4092 uniflags);
d8319b27 4093 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4094 uniflags);
5f80c4cf
JP
4095 }
4096 else {
d8319b27 4097 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4098 uniflags);
5f80c4cf 4099 }
1aa99e6b
IH
4100 }
4101 }
bbce6d69 4102 }
a0d0e21e 4103 else
9e137952 4104 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
cca55fe3 4105 assume_ok_easy:
3280af22 4106 PL_reginput = locinput;
5d9a96ca
DM
4107 if (st->minmod) {
4108 st->minmod = 0;
32fc9b6a 4109 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4110 sayNO;
a0ed51b3 4111 locinput = PL_reginput;
d8319b27 4112 REGCP_SET(st->u.plus.lastcp);
9e137952 4113 if (st->u.plus.c1 != CHRTEST_VOID) {
d8319b27
DM
4114 st->u.plus.old = locinput;
4115 st->u.plus.count = 0;
0fe9bf95 4116
1aa99e6b 4117 if (n == REG_INFTY) {
d8319b27 4118 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4119 if (do_utf8)
d8319b27
DM
4120 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4121 st->u.plus.e--;
1aa99e6b
IH
4122 }
4123 else if (do_utf8) {
5d9a96ca 4124 int m = n - st->ln;
d8319b27
DM
4125 for (st->u.plus.e = locinput;
4126 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4127 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4128 }
4129 else {
d8319b27
DM
4130 st->u.plus.e = locinput + n - st->ln;
4131 if (st->u.plus.e >= PL_regeol)
4132 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4133 }
0fe9bf95
IZ
4134 while (1) {
4135 /* Find place 'next' could work */
1aa99e6b 4136 if (!do_utf8) {
d8319b27
DM
4137 if (st->u.plus.c1 == st->u.plus.c2) {
4138 while (locinput <= st->u.plus.e &&
4139 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4140 locinput++;
4141 } else {
d8319b27
DM
4142 while (locinput <= st->u.plus.e
4143 && UCHARAT(locinput) != st->u.plus.c1
4144 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4145 locinput++;
4146 }
d8319b27 4147 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4148 }
4149 else {
d8319b27 4150 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4151 STRLEN len;
872c91ae
JH
4152 /* count initialised to
4153 * utf8_distance(old, locinput) */
d8319b27 4154 while (locinput <= st->u.plus.e &&
872c91ae 4155 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4156 UTF8_MAXBYTES, &len,
d8319b27 4157 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4158 locinput += len;
d8319b27 4159 st->u.plus.count++;
b2f2f093 4160 }
1aa99e6b 4161 } else {
872c91ae
JH
4162 /* count initialised to
4163 * utf8_distance(old, locinput) */
d8319b27 4164 while (locinput <= st->u.plus.e) {
c4fd8992
AL
4165 STRLEN len;
4166 const UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4167 UTF8_MAXBYTES, &len,
041457d9 4168 uniflags);
d8319b27 4169 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4170 break;
b2f2f093 4171 locinput += len;
d8319b27 4172 st->u.plus.count++;
1aa99e6b
IH
4173 }
4174 }
0fe9bf95 4175 }
d8319b27 4176 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4177 sayNO;
4178 /* PL_reginput == old now */
d8319b27 4179 if (locinput != st->u.plus.old) {
5d9a96ca 4180 st->ln = 1; /* Did some */
32fc9b6a 4181 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4182 sayNO;
4183 }
4184 /* PL_reginput == locinput now */
d8319b27 4185 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4186 /*** all unsaved local vars undefined at this point */
0fe9bf95 4187 PL_reginput = locinput; /* Could be reset... */
d8319b27 4188 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4189 /* Couldn't or didn't -- move forward. */
d8319b27 4190 st->u.plus.old = locinput;
1aa99e6b
IH
4191 if (do_utf8)
4192 locinput += UTF8SKIP(locinput);
4193 else
4194 locinput++;
d8319b27 4195 st->u.plus.count = 1;
0fe9bf95
IZ
4196 }
4197 }
4198 else
5d9a96ca 4199 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4200 UV c;
9e137952 4201 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4202 if (do_utf8)
872c91ae 4203 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4204 UTF8_MAXBYTES, 0,
041457d9 4205 uniflags);
1aa99e6b 4206 else
9041c2e3 4207 c = UCHARAT(PL_reginput);
2390ecbc 4208 /* If it could work, try it. */
d8319b27 4209 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4210 {
d8319b27 4211 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4212 /*** all unsaved local vars undefined at this point */
d8319b27 4213 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4214 }
1aa99e6b 4215 }
a0d0e21e 4216 /* If it could work, try it. */
9e137952 4217 else if (st->u.plus.c1 == CHRTEST_VOID)
bbce6d69 4218 {
d8319b27 4219 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4220 /*** all unsaved local vars undefined at this point */
d8319b27 4221 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4222 }
c277df42 4223 /* Couldn't or didn't -- move forward. */
a0ed51b3 4224 PL_reginput = locinput;
32fc9b6a 4225 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4226 st->ln++;
a0ed51b3
LW
4227 locinput = PL_reginput;
4228 }
4229 else
4633a7c4 4230 sayNO;
a0d0e21e
LW
4231 }
4232 }
4233 else {
32fc9b6a 4234 n = regrepeat(rex, scan, n);
a0ed51b3 4235 locinput = PL_reginput;
3dab1dad
YO
4236 if ((st->ln < n) && (PL_regkind[OP(next)] == EOL) &&
4237 (OP(next) != MEOL || OP(next) == SEOL || OP(next) == EOS))
15272685 4238 {
5d9a96ca 4239 st->ln = n; /* why back off? */
1aeab75a
GS
4240 /* ...because $ and \Z can match before *and* after
4241 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4242 We should back off by one in this case. */
4243 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4244 st->ln--;
1aeab75a 4245 }
d8319b27 4246 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4247 {
8fa7f367 4248 UV c = 0;
5d9a96ca 4249 while (n >= st->ln) {
9e137952 4250 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4251 if (do_utf8)
872c91ae 4252 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4253 UTF8_MAXBYTES, 0,
041457d9 4254 uniflags);
1aa99e6b 4255 else
9041c2e3 4256 c = UCHARAT(PL_reginput);
1aa99e6b 4257 }
c277df42 4258 /* If it could work, try it. */
9e137952 4259 if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4260 {
d8319b27 4261 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4262 /*** all unsaved local vars undefined at this point */
d8319b27 4263 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4264 }
4265 /* Couldn't or didn't -- back up. */
4266 n--;
dfe13c55 4267 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4268 }
a0d0e21e
LW
4269 }
4270 }
4633a7c4 4271 sayNO;
c277df42 4272 break;
a0d0e21e 4273 case END:
3b0527fe 4274 if (locinput < reginfo->till) {
a3621e74 4275 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4276 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4277 PL_colors[4],
4278 (long)(locinput - PL_reg_starttry),
3b0527fe 4279 (long)(reginfo->till - PL_reg_starttry),
7821416a
IZ
4280 PL_colors[5]));
4281 sayNO_FINAL; /* Cannot match: too short. */
4282 }
4283 PL_reginput = locinput; /* put where regtry can find it */
4284 sayYES_FINAL; /* Success! */
dad79028
DM
4285
4286 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4287 DEBUG_EXECUTE_r(
4288 PerlIO_printf(Perl_debug_log,
4289 "%*s %ssubpattern success...%s\n",
4290 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
3280af22 4291 PL_reginput = locinput; /* put where regtry can find it */
dad79028
DM
4292 sayYES_FINAL; /* Success! */
4293
4294 case SUSPEND: /* (?>FOO) */
4295 st->u.ifmatch.wanted = 1;
9fe1d20c 4296 PL_reginput = locinput;
9041c2e3 4297 goto do_ifmatch;
dad79028
DM
4298
4299 case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
4300 st->u.ifmatch.wanted = 0;
4301 goto ifmatch_trivial_fail_test;
4302
4303 case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
4304 st->u.ifmatch.wanted = 1;
4305 ifmatch_trivial_fail_test:
a0ed51b3 4306 if (scan->flags) {
52657f30 4307 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4308 if (!s) {
4309 /* trivial fail */
4310 if (st->logical) {
4311 st->logical = 0;
3dab1dad 4312 st->sw = 1 - (bool)st->u.ifmatch.wanted;
dad79028
DM
4313 }
4314 else if (st->u.ifmatch.wanted)
4315 sayNO;
4316 next = scan + ARG(scan);
4317 if (next == scan)
4318 next = NULL;
4319 break;
4320 }
efb30f32 4321 PL_reginput = s;
a0ed51b3
LW
4322 }
4323 else
4324 PL_reginput = locinput;
4325
c277df42 4326 do_ifmatch:
dad79028
DM
4327 /* resume to current state on success */
4328 st->u.yes.prev_yes_state = yes_state;
4329 yes_state = st;
4330 PUSH_STATE(newst, resume_IFMATCH);
4331 st = newst;
4332 next = NEXTOPER(NEXTOPER(scan));
4333 break;
4334
c277df42 4335 case LONGJMP:
c277df42
IZ
4336 next = scan + ARG(scan);
4337 if (next == scan)
4338 next = NULL;
a0d0e21e
LW
4339 break;
4340 default:
b900a521 4341 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4342 PTR2UV(scan), OP(scan));
cea2e8a9 4343 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4344 }
95b24440 4345
02db2b7b 4346 reenter:
a0d0e21e 4347 scan = next;
95b24440
DM
4348 continue;
4349 /* NOTREACHED */
4350
4351 /* simulate recursively calling regmatch(), but without actually
4352 * recursing - ie save the current state on the heap rather than on
4353 * the stack, then re-enter the loop. This avoids complex regexes
4354 * blowing the processor stack */
4355
4356 start_recurse:
4357 {
5d9a96ca
DM
4358 /* push new state */
4359 regmatch_state *oldst = st;
4360
4361 depth++;
4362
4363 /* grab the next free state slot */
4364 st++;
86545054 4365 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
4366 st = S_push_slab(aTHX);
4367 PL_regmatch_state = st;
4368
4369 oldst->next = next;
4370 oldst->n = n;
4371 oldst->locinput = locinput;
5d9a96ca
DM
4372
4373 st->cc = oldst->cc;
95b24440
DM
4374 locinput = PL_reginput;
4375 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4376 st->minmod = 0;
4377 st->sw = 0;
4378 st->logical = 0;
4379 st->unwind = 0;
95b24440
DM
4380#ifdef DEBUGGING
4381 PL_regindent++;
4382#endif
4383 }
a0d0e21e 4384 }
a687059c 4385
aa283a38
DM
4386
4387
a0d0e21e
LW
4388 /*
4389 * We get here only if there's trouble -- normally "case END" is
4390 * the terminating point.
4391 */
cea2e8a9 4392 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4393 /*NOTREACHED*/
4633a7c4
LW
4394 sayNO;
4395
7821416a 4396yes_final:
77cb431f
DM
4397
4398 if (yes_state) {
4399 /* we have successfully completed a subexpression, but we must now
4400 * pop to the state marked by yes_state and continue from there */
4401
dad79028 4402 /*XXX tmp for CURLYM*/
c4fd8992
AL
4403 regmatch_slab * const oslab = PL_regmatch_slab;
4404 regmatch_state * const ost = st;
4405 regmatch_state * const oys = yes_state;
dad79028
DM
4406 int odepth = depth;
4407
77cb431f
DM
4408 assert(st != yes_state);
4409 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4410 || yes_state > SLAB_LAST(PL_regmatch_slab))
4411 {
4412 /* not in this slab, pop slab */
4413 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4414 PL_regmatch_slab = PL_regmatch_slab->prev;
4415 st = SLAB_LAST(PL_regmatch_slab);
4416 }
4417 depth -= (st - yes_state);
dad79028 4418 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
77cb431f
DM
4419 st = yes_state;
4420 yes_state = st->u.yes.prev_yes_state;
4421 PL_regmatch_state = st;
4422
4423 switch (st->resume_state) {
4424 case resume_EVAL:
4425 if (st->u.eval.toggleutf)
4426 PL_reg_flags ^= RF_utf8;
4427 ReREFCNT_dec(rex);
4428 rex = st->u.eval.prev_rex;
4429 /* XXXX This is too dramatic a measure... */
4430 PL_reg_maxiter = 0;
4431 /* Restore parens of the caller without popping the
4432 * savestack */
4433 {
c4fd8992 4434 const I32 tmp = PL_savestack_ix;
77cb431f
DM
4435 PL_savestack_ix = st->u.eval.lastcp;
4436 regcppop(rex);
4437 PL_savestack_ix = tmp;
4438 }
4439 PL_reginput = locinput;
4440 /* continue at the node following the (??{...}) */
4441 next = st->next;
4442 goto reenter;
4443
dad79028
DM
4444 case resume_IFMATCH:
4445 if (st->logical) {
4446 st->logical = 0;
3dab1dad 4447 st->sw = (bool)st->u.ifmatch.wanted;
dad79028
DM
4448 }
4449 else if (!st->u.ifmatch.wanted)
4450 sayNO;
4451
4452 if (OP(st->scan) == SUSPEND)
4453 locinput = PL_reginput;
4454 else {
4455 locinput = PL_reginput = st->locinput;
4456 nextchr = UCHARAT(locinput);
4457 }
4458 next = st->scan + ARG(st->scan);
4459 if (next == st->scan)
4460 next = NULL;
4461 goto reenter;
4462
4463 /* XXX tmp don't handle yes_state yet */
4464 case resume_CURLYM1:
4465 case resume_CURLYM2:
4466 case resume_CURLYM3:
dad79028
DM
4467 PL_regmatch_slab =oslab;
4468 st = ost;
4469 PL_regmatch_state = st;
4470 depth = odepth;
4471 yes_state = oys;
4472 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
4473 goto yes;
4474
77cb431f
DM
4475 default:
4476 Perl_croak(aTHX_ "unexpected yes reume state");
4477 }
4478 }
4479
a3621e74 4480 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4481 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4482yes:
4483#ifdef DEBUGGING
3280af22 4484 PL_regindent--;
4633a7c4 4485#endif
02db2b7b 4486
95b24440 4487 result = 1;
aa283a38 4488 /* XXX this is duplicate(ish) code to that in the do_no section.
77cb431f
DM
4489 * eventually a yes should just pop the stack back to the current
4490 * yes_state */
aa283a38
DM
4491 if (depth) {
4492 /* restore previous state and re-enter */
4493 POP_STATE;
4494
4495 switch (st->resume_state) {
4496 case resume_TRIE1:
4497 goto resume_point_TRIE1;
4498 case resume_TRIE2:
4499 goto resume_point_TRIE2;
aa283a38
DM
4500 case resume_CURLYX:
4501 goto resume_point_CURLYX;
4502 case resume_WHILEM1:
4503 goto resume_point_WHILEM1;
4504 case resume_WHILEM2:
4505 goto resume_point_WHILEM2;
4506 case resume_WHILEM3:
4507 goto resume_point_WHILEM3;
4508 case resume_WHILEM4:
4509 goto resume_point_WHILEM4;
4510 case resume_WHILEM5:
4511 goto resume_point_WHILEM5;
4512 case resume_WHILEM6:
4513 goto resume_point_WHILEM6;
4514 case resume_CURLYM1:
4515 goto resume_point_CURLYM1;
4516 case resume_CURLYM2:
4517 goto resume_point_CURLYM2;
4518 case resume_CURLYM3:
4519 goto resume_point_CURLYM3;
aa283a38
DM
4520 case resume_PLUS1:
4521 goto resume_point_PLUS1;
4522 case resume_PLUS2:
4523 goto resume_point_PLUS2;
4524 case resume_PLUS3:
4525 goto resume_point_PLUS3;
4526 case resume_PLUS4:
4527 goto resume_point_PLUS4;
77cb431f 4528
dad79028 4529 case resume_IFMATCH:
77cb431f 4530 case resume_EVAL:
aa283a38
DM
4531 default:
4532 Perl_croak(aTHX_ "regexp resume memory corruption");
4533 }
4534 }
4535 goto final_exit;
4633a7c4
LW
4536
4537no:
a3621e74 4538 DEBUG_EXECUTE_r(
7821416a
IZ
4539 PerlIO_printf(Perl_debug_log,
4540 "%*s %sfailed...%s\n",
e4584336 4541 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4542 );
4543 goto do_no;
4544no_final:
4545do_no:
5d9a96ca
DM
4546 if (st->unwind) {
4547 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4548
4549 switch (uw->type) {
4550 case RE_UNWIND_BRANCH:
4551 case RE_UNWIND_BRANCHJ:
4552 {
6136c704 4553 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4554 const I32 lastparen = uwb->lastparen;
9041c2e3 4555
02db2b7b
IZ
4556 REGCP_UNWIND(uwb->lastcp);
4557 for (n = *PL_reglastparen; n > lastparen; n--)
4558 PL_regendp[n] = -1;
4559 *PL_reglastparen = n;
4560 scan = next = uwb->next;
3a2830be 4561 st->minmod = uwb->minmod;
9041c2e3
NIS
4562 if ( !scan ||
4563 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4564 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4565 st->unwind = uwb->prev;
02db2b7b
IZ
4566#ifdef DEBUGGING
4567 PL_regindent--;
4568#endif
4569 goto do_no;
4570 }
4571 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4572 if ((n = (uwb->type == RE_UNWIND_BRANCH
4573 ? NEXT_OFF(next) : ARG(next))))
4574 next += n;
4575 else
4576 next = NULL; /* XXXX Needn't unwinding in this case... */
4577 uwb->next = next;
4578 next = NEXTOPER(scan);
4579 if (uwb->type == RE_UNWIND_BRANCHJ)
4580 next = NEXTOPER(next);
4581 locinput = uwb->locinput;
4582 nextchr = uwb->nextchr;
4583#ifdef DEBUGGING
4584 PL_regindent = uwb->regindent;
4585#endif
4586
4587 goto reenter;
4588 }
5f66b61c 4589 /* NOTREACHED */
02db2b7b
IZ
4590 default:
4591 Perl_croak(aTHX_ "regexp unwind memory corruption");
4592 }
5f66b61c 4593 /* NOTREACHED */
02db2b7b 4594 }
aa283a38 4595
4633a7c4 4596#ifdef DEBUGGING
3280af22 4597 PL_regindent--;
4633a7c4 4598#endif
95b24440 4599 result = 0;
5d9a96ca 4600
aa283a38
DM
4601 if (depth) {
4602 /* there's a previous state to backtrack to */
4603 POP_STATE;
5d9a96ca 4604 switch (st->resume_state) {
95b24440
DM
4605 case resume_TRIE1:
4606 goto resume_point_TRIE1;
4607 case resume_TRIE2:
4608 goto resume_point_TRIE2;
aa283a38
DM
4609 case resume_EVAL:
4610 /* we have failed an (??{...}). Restore state to the outer re
4611 * then re-throw the failure */
4612 if (st->u.eval.toggleutf)
4613 PL_reg_flags ^= RF_utf8;
4614 ReREFCNT_dec(rex);
4615 rex = st->u.eval.prev_rex;
77cb431f 4616 yes_state = st->u.yes.prev_yes_state;
aa283a38
DM
4617
4618 /* XXXX This is too dramatic a measure... */
4619 PL_reg_maxiter = 0;
4620
4621 PL_reginput = locinput;
4622 REGCP_UNWIND(st->u.eval.lastcp);
4623 regcppop(rex);
4624 goto do_no;
4625
95b24440
DM
4626 case resume_CURLYX:
4627 goto resume_point_CURLYX;
4628 case resume_WHILEM1:
4629 goto resume_point_WHILEM1;
4630 case resume_WHILEM2:
4631 goto resume_point_WHILEM2;
4632 case resume_WHILEM3:
4633 goto resume_point_WHILEM3;
4634 case resume_WHILEM4:
4635 goto resume_point_WHILEM4;
4636 case resume_WHILEM5:
4637 goto resume_point_WHILEM5;
4638 case resume_WHILEM6:
4639 goto resume_point_WHILEM6;
4640 case resume_CURLYM1:
4641 goto resume_point_CURLYM1;
4642 case resume_CURLYM2:
4643 goto resume_point_CURLYM2;
4644 case resume_CURLYM3:
4645 goto resume_point_CURLYM3;
95b24440 4646 case resume_IFMATCH:
dad79028
DM
4647 yes_state = st->u.yes.prev_yes_state;
4648 if (st->logical) {
4649 st->logical = 0;
4650 st->sw = !st->u.ifmatch.wanted;
4651 }
4652 else if (st->u.ifmatch.wanted)
4653 sayNO;
4654
4655 assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
4656 locinput = PL_reginput = st->locinput;
4657 nextchr = UCHARAT(locinput);
4658 next = scan + ARG(scan);
4659 if (next == scan)
4660 next = NULL;
4661 goto reenter;
4662
95b24440
DM
4663 case resume_PLUS1:
4664 goto resume_point_PLUS1;
4665 case resume_PLUS2:
4666 goto resume_point_PLUS2;
4667 case resume_PLUS3:
4668 goto resume_point_PLUS3;
4669 case resume_PLUS4:
4670 goto resume_point_PLUS4;
95b24440
DM
4671 default:
4672 Perl_croak(aTHX_ "regexp resume memory corruption");
4673 }
95b24440 4674 }
aa283a38
DM
4675
4676final_exit:
4677
5d9a96ca
DM
4678 /* restore original high-water mark */
4679 PL_regmatch_slab = orig_slab;
4680 PL_regmatch_state = orig_state;
4681
4682 /* free all slabs above current one */
4683 if (orig_slab->next) {
c4fd8992 4684 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
4685 orig_slab->next = NULL;
4686 while (sl) {
c4fd8992 4687 regmatch_slab * const osl = sl;
5d9a96ca 4688 sl = sl->next;
ad65c075 4689 Safefree(osl);
5d9a96ca
DM
4690 }
4691 }
4692
95b24440
DM
4693 return result;
4694
a687059c
LW
4695}
4696
4697/*
4698 - regrepeat - repeatedly match something simple, report how many
4699 */
4700/*
4701 * [This routine now assumes that it will only match on things of length 1.
4702 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4703 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4704 */
76e3520e 4705STATIC I32
32fc9b6a 4706S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4707{
27da23d5 4708 dVAR;
a0d0e21e 4709 register char *scan;
a0d0e21e 4710 register I32 c;
3280af22 4711 register char *loceol = PL_regeol;
a0ed51b3 4712 register I32 hardcount = 0;
53c4c00c 4713 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4714
3280af22 4715 scan = PL_reginput;
faf11cac
HS
4716 if (max == REG_INFTY)
4717 max = I32_MAX;
4718 else if (max < loceol - scan)
7f596f4c 4719 loceol = scan + max;
a0d0e21e 4720 switch (OP(p)) {
22c35a8c 4721 case REG_ANY:
1aa99e6b 4722 if (do_utf8) {
ffc61ed2 4723 loceol = PL_regeol;
1aa99e6b 4724 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4725 scan += UTF8SKIP(scan);
4726 hardcount++;
4727 }
4728 } else {
4729 while (scan < loceol && *scan != '\n')
4730 scan++;
a0ed51b3
LW
4731 }
4732 break;
ffc61ed2 4733 case SANY:
def8e4ea
JH
4734 if (do_utf8) {
4735 loceol = PL_regeol;
a0804c9e 4736 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4737 scan += UTF8SKIP(scan);
4738 hardcount++;
4739 }
4740 }
4741 else
4742 scan = loceol;
a0ed51b3 4743 break;
f33976b4
DB
4744 case CANY:
4745 scan = loceol;
4746 break;
090f7165
JH
4747 case EXACT: /* length of string is 1 */
4748 c = (U8)*STRING(p);
4749 while (scan < loceol && UCHARAT(scan) == c)
4750 scan++;
bbce6d69 4751 break;
4752 case EXACTF: /* length of string is 1 */
cd439c50 4753 c = (U8)*STRING(p);
bbce6d69 4754 while (scan < loceol &&
22c35a8c 4755 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4756 scan++;
4757 break;
4758 case EXACTFL: /* length of string is 1 */
3280af22 4759 PL_reg_flags |= RF_tainted;
cd439c50 4760 c = (U8)*STRING(p);
bbce6d69 4761 while (scan < loceol &&
22c35a8c 4762 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4763 scan++;
4764 break;
4765 case ANYOF:
ffc61ed2
JH
4766 if (do_utf8) {
4767 loceol = PL_regeol;
cfc92286 4768 while (hardcount < max && scan < loceol &&
32fc9b6a 4769 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4770 scan += UTF8SKIP(scan);
4771 hardcount++;
4772 }
4773 } else {
32fc9b6a 4774 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4775 scan++;
4776 }
a0d0e21e
LW
4777 break;
4778 case ALNUM:
1aa99e6b 4779 if (do_utf8) {
ffc61ed2 4780 loceol = PL_regeol;
1a4fad37 4781 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4782 while (hardcount < max && scan < loceol &&
3568d838 4783 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4784 scan += UTF8SKIP(scan);
4785 hardcount++;
4786 }
4787 } else {
4788 while (scan < loceol && isALNUM(*scan))
4789 scan++;
a0ed51b3
LW
4790 }
4791 break;
bbce6d69 4792 case ALNUML:
3280af22 4793 PL_reg_flags |= RF_tainted;
1aa99e6b 4794 if (do_utf8) {
ffc61ed2 4795 loceol = PL_regeol;
1aa99e6b
IH
4796 while (hardcount < max && scan < loceol &&
4797 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4798 scan += UTF8SKIP(scan);
4799 hardcount++;
4800 }
4801 } else {
4802 while (scan < loceol && isALNUM_LC(*scan))
4803 scan++;
a0ed51b3
LW
4804 }
4805 break;
a0d0e21e 4806 case NALNUM:
1aa99e6b 4807 if (do_utf8) {
ffc61ed2 4808 loceol = PL_regeol;
1a4fad37 4809 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4810 while (hardcount < max && scan < loceol &&
3568d838 4811 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4812 scan += UTF8SKIP(scan);
4813 hardcount++;
4814 }
4815 } else {
4816 while (scan < loceol && !isALNUM(*scan))
4817 scan++;
a0ed51b3
LW
4818 }
4819 break;
bbce6d69 4820 case NALNUML:
3280af22 4821 PL_reg_flags |= RF_tainted;
1aa99e6b 4822 if (do_utf8) {
ffc61ed2 4823 loceol = PL_regeol;
1aa99e6b
IH
4824 while (hardcount < max && scan < loceol &&
4825 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4826 scan += UTF8SKIP(scan);
4827 hardcount++;
4828 }
4829 } else {
4830 while (scan < loceol && !isALNUM_LC(*scan))
4831 scan++;
a0ed51b3
LW
4832 }
4833 break;
a0d0e21e 4834 case SPACE:
1aa99e6b 4835 if (do_utf8) {
ffc61ed2 4836 loceol = PL_regeol;
1a4fad37 4837 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4838 while (hardcount < max && scan < loceol &&
3568d838
JH
4839 (*scan == ' ' ||
4840 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4841 scan += UTF8SKIP(scan);
4842 hardcount++;
4843 }
4844 } else {
4845 while (scan < loceol && isSPACE(*scan))
4846 scan++;
a0ed51b3
LW
4847 }
4848 break;
bbce6d69 4849 case SPACEL:
3280af22 4850 PL_reg_flags |= RF_tainted;
1aa99e6b 4851 if (do_utf8) {
ffc61ed2 4852 loceol = PL_regeol;
1aa99e6b 4853 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4854 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4855 scan += UTF8SKIP(scan);
4856 hardcount++;
4857 }
4858 } else {
4859 while (scan < loceol && isSPACE_LC(*scan))
4860 scan++;
a0ed51b3
LW
4861 }
4862 break;
a0d0e21e 4863 case NSPACE:
1aa99e6b 4864 if (do_utf8) {
ffc61ed2 4865 loceol = PL_regeol;
1a4fad37 4866 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4867 while (hardcount < max && scan < loceol &&
3568d838
JH
4868 !(*scan == ' ' ||
4869 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4870 scan += UTF8SKIP(scan);
4871 hardcount++;
4872 }
4873 } else {
4874 while (scan < loceol && !isSPACE(*scan))
4875 scan++;
4876 break;
a0ed51b3 4877 }
bbce6d69 4878 case NSPACEL:
3280af22 4879 PL_reg_flags |= RF_tainted;
1aa99e6b 4880 if (do_utf8) {
ffc61ed2 4881 loceol = PL_regeol;
1aa99e6b 4882 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4883 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4884 scan += UTF8SKIP(scan);
4885 hardcount++;
4886 }
4887 } else {
4888 while (scan < loceol && !isSPACE_LC(*scan))
4889 scan++;
a0ed51b3
LW
4890 }
4891 break;
a0d0e21e 4892 case DIGIT:
1aa99e6b 4893 if (do_utf8) {
ffc61ed2 4894 loceol = PL_regeol;
1a4fad37 4895 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4896 while (hardcount < max && scan < loceol &&
3568d838 4897 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4898 scan += UTF8SKIP(scan);
4899 hardcount++;
4900 }
4901 } else {
4902 while (scan < loceol && isDIGIT(*scan))
4903 scan++;
a0ed51b3
LW
4904 }
4905 break;
a0d0e21e 4906 case NDIGIT:
1aa99e6b 4907 if (do_utf8) {
ffc61ed2 4908 loceol = PL_regeol;
1a4fad37 4909 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4910 while (hardcount < max && scan < loceol &&
3568d838 4911 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4912 scan += UTF8SKIP(scan);
4913 hardcount++;
4914 }
4915 } else {
4916 while (scan < loceol && !isDIGIT(*scan))
4917 scan++;
a0ed51b3
LW
4918 }
4919 break;
a0d0e21e
LW
4920 default: /* Called on something of 0 width. */
4921 break; /* So match right here or not at all. */
4922 }
a687059c 4923
a0ed51b3
LW
4924 if (hardcount)
4925 c = hardcount;
4926 else
4927 c = scan - PL_reginput;
3280af22 4928 PL_reginput = scan;
a687059c 4929
a3621e74 4930 DEBUG_r({
ab74612d 4931 SV *re_debug_flags = NULL;
6136c704 4932 SV * const prop = sv_newmortal();
a3621e74
YO
4933 GET_RE_DEBUG_FLAGS;
4934 DEBUG_EXECUTE_r({
32fc9b6a 4935 regprop(prog, prop, p);
9041c2e3
NIS
4936 PerlIO_printf(Perl_debug_log,
4937 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4938 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4939 });
a3621e74 4940 });
9041c2e3 4941
a0d0e21e 4942 return(c);
a687059c
LW
4943}
4944
c277df42 4945
76234dfb 4946#ifndef PERL_IN_XSUB_RE
c277df42 4947/*
ffc61ed2
JH
4948- regclass_swash - prepare the utf8 swash
4949*/
4950
4951SV *
32fc9b6a 4952Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4953{
97aff369 4954 dVAR;
9e55ce06
JH
4955 SV *sw = NULL;
4956 SV *si = NULL;
4957 SV *alt = NULL;
3dab1dad 4958 const struct reg_data * const data = prog ? prog->data : NULL;
ffc61ed2 4959
4f639d21 4960 if (data && data->count) {
a3b680e6 4961 const U32 n = ARG(node);
ffc61ed2 4962
4f639d21
DM
4963 if (data->what[n] == 's') {
4964 SV * const rv = (SV*)data->data[n];
890ce7af 4965 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4966 SV **const ary = AvARRAY(av);
9e55ce06 4967 SV **a, **b;
9041c2e3 4968
711a919c 4969 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4970 * documentation of these array elements. */
4971
b11f357e 4972 si = *ary;
8f7f7219 4973 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4974 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4975
ffc61ed2
JH
4976 if (a)
4977 sw = *a;
4978 else if (si && doinit) {
4979 sw = swash_init("utf8", "", si, 1, 0);
4980 (void)av_store(av, 1, sw);
4981 }
9e55ce06
JH
4982 if (b)
4983 alt = *b;
ffc61ed2
JH
4984 }
4985 }
4986
9e55ce06
JH
4987 if (listsvp)
4988 *listsvp = si;
4989 if (altsvp)
4990 *altsvp = alt;
ffc61ed2
JH
4991
4992 return sw;
4993}
76234dfb 4994#endif
ffc61ed2
JH
4995
4996/*
ba7b4546 4997 - reginclass - determine if a character falls into a character class
832705d4
JH
4998
4999 The n is the ANYOF regnode, the p is the target string, lenp
5000 is pointer to the maximum length of how far to go in the p
5001 (if the lenp is zero, UTF8SKIP(p) is used),
5002 do_utf8 tells whether the target string is in UTF-8.
5003
bbce6d69 5004 */
5005
76e3520e 5006STATIC bool
32fc9b6a 5007S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 5008{
27da23d5 5009 dVAR;
a3b680e6 5010 const char flags = ANYOF_FLAGS(n);
bbce6d69 5011 bool match = FALSE;
cc07378b 5012 UV c = *p;
ae9ddab8 5013 STRLEN len = 0;
9e55ce06 5014 STRLEN plen;
1aa99e6b 5015
19f67299
TS
5016 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5017 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
5018 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5019 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
5020 if (len == (STRLEN)-1)
5021 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5022 }
bbce6d69 5023
0f0076b4 5024 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5025 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5026 if (lenp)
5027 *lenp = 0;
ffc61ed2 5028 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5029 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5030 match = TRUE;
bbce6d69 5031 }
3568d838 5032 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5033 match = TRUE;
ffc61ed2 5034 if (!match) {
9e55ce06 5035 AV *av;
32fc9b6a 5036 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5037
5038 if (sw) {
3568d838 5039 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5040 match = TRUE;
5041 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5042 if (!match && lenp && av) {
5043 I32 i;
9e55ce06 5044 for (i = 0; i <= av_len(av); i++) {
890ce7af 5045 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5046 STRLEN len;
890ce7af 5047 const char * const s = SvPV_const(sv, len);
9e55ce06 5048
061b10df 5049 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5050 *lenp = len;
5051 match = TRUE;
5052 break;
5053 }
5054 }
5055 }
5056 if (!match) {
89ebb4a3 5057 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5058 STRLEN tmplen;
5059
9e55ce06
JH
5060 to_utf8_fold(p, tmpbuf, &tmplen);
5061 if (swash_fetch(sw, tmpbuf, do_utf8))
5062 match = TRUE;
5063 }
ffc61ed2
JH
5064 }
5065 }
bbce6d69 5066 }
9e55ce06 5067 if (match && lenp && *lenp == 0)
0f0076b4 5068 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5069 }
1aa99e6b 5070 if (!match && c < 256) {
ffc61ed2
JH
5071 if (ANYOF_BITMAP_TEST(n, c))
5072 match = TRUE;
5073 else if (flags & ANYOF_FOLD) {
eb160463 5074 U8 f;
a0ed51b3 5075
ffc61ed2
JH
5076 if (flags & ANYOF_LOCALE) {
5077 PL_reg_flags |= RF_tainted;
5078 f = PL_fold_locale[c];
5079 }
5080 else
5081 f = PL_fold[c];
5082 if (f != c && ANYOF_BITMAP_TEST(n, f))
5083 match = TRUE;
5084 }
5085
5086 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5087 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5088 if (
5089 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5090 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5091 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5092 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5093 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5094 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5095 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5096 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5097 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5098 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5099 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5100 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5101 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5102 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5103 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5104 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5105 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5106 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5107 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5108 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5109 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5110 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5111 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5112 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5113 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5114 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5115 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5116 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5117 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5118 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5119 ) /* How's that for a conditional? */
5120 {
5121 match = TRUE;
5122 }
a0ed51b3 5123 }
a0ed51b3
LW
5124 }
5125
a0ed51b3
LW
5126 return (flags & ANYOF_INVERT) ? !match : match;
5127}
161b471a 5128
dfe13c55 5129STATIC U8 *
0ce71af7 5130S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5131{
97aff369 5132 dVAR;
a0ed51b3 5133 if (off >= 0) {
1aa99e6b 5134 while (off-- && s < lim) {
ffc61ed2 5135 /* XXX could check well-formedness here */
a0ed51b3 5136 s += UTF8SKIP(s);
ffc61ed2 5137 }
a0ed51b3
LW
5138 }
5139 else {
5140 while (off++) {
1aa99e6b 5141 if (s > lim) {
a0ed51b3 5142 s--;
ffc61ed2 5143 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5144 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5145 s--;
ffc61ed2
JH
5146 }
5147 /* XXX could check well-formedness here */
a0ed51b3
LW
5148 }
5149 }
5150 }
5151 return s;
5152}
161b471a 5153
dfe13c55 5154STATIC U8 *
0ce71af7 5155S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5156{
97aff369 5157 dVAR;
a0ed51b3 5158 if (off >= 0) {
1aa99e6b 5159 while (off-- && s < lim) {
ffc61ed2 5160 /* XXX could check well-formedness here */
a0ed51b3 5161 s += UTF8SKIP(s);
ffc61ed2 5162 }
a0ed51b3 5163 if (off >= 0)
3dab1dad 5164 return NULL;
a0ed51b3
LW
5165 }
5166 else {
5167 while (off++) {
1aa99e6b 5168 if (s > lim) {
a0ed51b3 5169 s--;
ffc61ed2 5170 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5171 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5172 s--;
ffc61ed2
JH
5173 }
5174 /* XXX could check well-formedness here */
a0ed51b3
LW
5175 }
5176 else
5177 break;
5178 }
5179 if (off <= 0)
3dab1dad 5180 return NULL;
a0ed51b3
LW
5181 }
5182 return s;
5183}
51371543 5184
51371543 5185static void
acfe0abc 5186restore_pos(pTHX_ void *arg)
51371543 5187{
97aff369 5188 dVAR;
097eb12c 5189 regexp * const rex = (regexp *)arg;
51371543
GS
5190 if (PL_reg_eval_set) {
5191 if (PL_reg_oldsaved) {
4f639d21
DM
5192 rex->subbeg = PL_reg_oldsaved;
5193 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5194#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5195 rex->saved_copy = PL_nrs;
ed252734 5196#endif
4f639d21 5197 RX_MATCH_COPIED_on(rex);
51371543
GS
5198 }
5199 PL_reg_magic->mg_len = PL_reg_oldpos;
5200 PL_reg_eval_set = 0;
5201 PL_curpm = PL_reg_oldcurpm;
5202 }
5203}
33b8afdf
JH
5204
5205STATIC void
5206S_to_utf8_substr(pTHX_ register regexp *prog)
5207{
33b8afdf 5208 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5209 SV* const sv = newSVsv(prog->float_substr);
5210 prog->float_utf8 = sv;
33b8afdf
JH
5211 sv_utf8_upgrade(sv);
5212 if (SvTAIL(prog->float_substr))
5213 SvTAIL_on(sv);
5214 if (prog->float_substr == prog->check_substr)
5215 prog->check_utf8 = sv;
5216 }
5217 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5218 SV* const sv = newSVsv(prog->anchored_substr);
5219 prog->anchored_utf8 = sv;
33b8afdf
JH
5220 sv_utf8_upgrade(sv);
5221 if (SvTAIL(prog->anchored_substr))
5222 SvTAIL_on(sv);
5223 if (prog->anchored_substr == prog->check_substr)
5224 prog->check_utf8 = sv;
5225 }
5226}
5227
5228STATIC void
5229S_to_byte_substr(pTHX_ register regexp *prog)
5230{
97aff369 5231 dVAR;
33b8afdf 5232 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5233 SV* sv = newSVsv(prog->float_utf8);
5234 prog->float_substr = sv;
33b8afdf
JH
5235 if (sv_utf8_downgrade(sv, TRUE)) {
5236 if (SvTAIL(prog->float_utf8))
5237 SvTAIL_on(sv);
5238 } else {
5239 SvREFCNT_dec(sv);
5240 prog->float_substr = sv = &PL_sv_undef;
5241 }
5242 if (prog->float_utf8 == prog->check_utf8)
5243 prog->check_substr = sv;
5244 }
5245 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5246 SV* sv = newSVsv(prog->anchored_utf8);
5247 prog->anchored_substr = sv;
33b8afdf
JH
5248 if (sv_utf8_downgrade(sv, TRUE)) {
5249 if (SvTAIL(prog->anchored_utf8))
5250 SvTAIL_on(sv);
5251 } else {
5252 SvREFCNT_dec(sv);
5253 prog->anchored_substr = sv = &PL_sv_undef;
5254 }
5255 if (prog->anchored_utf8 == prog->check_utf8)
5256 prog->check_substr = sv;
5257 }
5258}
66610fdd
RGS
5259
5260/*
5261 * Local variables:
5262 * c-indentation-style: bsd
5263 * c-basic-offset: 4
5264 * indent-tabs-mode: t
5265 * End:
5266 *
37442d52
RGS
5267 * ex: set ts=8 sts=4 sw=4 noet:
5268 */