This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Move the test files for the debugger in a t/ subdirectory, so they
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
166f8a29
DM
16 */
17
a687059c
LW
18/* NOTE: this is derived from Henry Spencer's regexp code, and should not
19 * confused with the original package (see point 3 below). Thanks, Henry!
20 */
21
22/* Additional note: this code is very heavily munged from Henry's version
23 * in places. In some spots I've traded clarity for efficiency, so don't
24 * blame Henry for some of the lack of readability.
25 */
26
e50aee73
AD
27/* The names of the functions have been changed from regcomp and
28 * regexec to pregcomp and pregexec in order to avoid conflicts
29 * with the POSIX routines of the same names.
30*/
31
b9d5759e 32#ifdef PERL_EXT_RE_BUILD
54df2634 33#include "re_top.h"
9041c2e3 34#endif
56953603 35
a687059c 36/*
e50aee73 37 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
38 *
39 * Copyright (c) 1986 by University of Toronto.
40 * Written by Henry Spencer. Not derived from licensed software.
41 *
42 * Permission is granted to anyone to use this software for any
43 * purpose on any computer system, and to redistribute it freely,
44 * subject to the following restrictions:
45 *
46 * 1. The author is not responsible for the consequences of use of
47 * this software, no matter how awful, even if they arise
48 * from defects in it.
49 *
50 * 2. The origin of this software must not be misrepresented, either
51 * by explicit claim or by omission.
52 *
53 * 3. Altered versions must be plainly marked as such, and must not
54 * be misrepresented as being the original software.
55 *
56 **** Alterations to Henry's code are...
57 ****
4bb101f2 58 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 59 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 60 ****
9ef589d8
LW
61 **** You may distribute under the terms of either the GNU General Public
62 **** License or the Artistic License, as specified in the README file.
a687059c
LW
63 *
64 * Beware that some of this code is subtly aware of the way operator
65 * precedence is structured in regular expressions. Serious changes in
66 * regular-expression syntax might require a total rethink.
67 */
68#include "EXTERN.h"
864dbfa3 69#define PERL_IN_REGEXEC_C
a687059c 70#include "perl.h"
0f5d15d6 71
54df2634
NC
72#ifdef PERL_IN_XSUB_RE
73# include "re_comp.h"
74#else
75# include "regcomp.h"
76#endif
a687059c 77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
faec1544 80
ab3bbdeb 81#define RF_utf8 8 /* Pattern contains multibyte chars? */
a0ed51b3 82
eb160463 83#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
32fc9b6a 92#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 93
c277df42
IZ
94/*
95 * Forwards.
96 */
97
33b8afdf 98#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 99#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 100
3dab1dad
YO
101#define HOPc(pos,off) \
102 (char *)(PL_reg_match_utf8 \
52657f30 103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
3dab1dad
YO
104 : (U8*)(pos + off))
105#define HOPBACKc(pos, off) \
07be1b83
YO
106 (char*)(PL_reg_match_utf8\
107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
108 : (pos - off >= PL_bostr) \
8e11feef 109 ? (U8*)pos - off \
3dab1dad 110 : NULL)
efb30f32 111
e7409c1b 112#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
1aa99e6b 113#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 114
1a4fad37
AL
115#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
116 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
117#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
118#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
119#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
120#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 121
3dab1dad
YO
122/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
123
5f80c4cf 124/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
125#define JUMPABLE(rn) ( \
126 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
127 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
128 OP(rn) == PLUS || OP(rn) == MINMOD || \
3dab1dad 129 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
130)
131
cca55fe3 132#define HAS_TEXT(rn) ( \
3dab1dad 133 PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
cca55fe3 134)
e2d8ce26 135
a84d97b6
HS
136/*
137 Search for mandatory following text node; for lookahead, the text must
138 follow but for lookbehind (rn->flags != 0) we skip to the next step.
139*/
cca55fe3 140#define FIND_NEXT_IMPT(rn) STMT_START { \
3dab1dad
YO
141 while (JUMPABLE(rn)) { \
142 const OPCODE type = OP(rn); \
143 if (type == SUSPEND || PL_regkind[type] == CURLY) \
e2d8ce26 144 rn = NEXTOPER(NEXTOPER(rn)); \
3dab1dad 145 else if (type == PLUS) \
cca55fe3 146 rn = NEXTOPER(rn); \
3dab1dad 147 else if (type == IFMATCH) \
a84d97b6 148 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 149 else rn += NEXT_OFF(rn); \
3dab1dad 150 } \
5f80c4cf 151} STMT_END
74750237 152
c476f425 153
acfe0abc 154static void restore_pos(pTHX_ void *arg);
51371543 155
76e3520e 156STATIC CHECKPOINT
cea2e8a9 157S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 158{
97aff369 159 dVAR;
a3b680e6 160 const int retval = PL_savestack_ix;
b1ce53c5 161#define REGCP_PAREN_ELEMS 4
a3b680e6 162 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e 163 int p;
40a82448 164 GET_RE_DEBUG_FLAGS_DECL;
a0d0e21e 165
e49a9654
IH
166 if (paren_elems_to_push < 0)
167 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
168
6bda09f9 169#define REGCP_OTHER_ELEMS 8
4b3c1a47 170 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
7f69552c 171
3280af22 172 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 173/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
174 SSPUSHINT(PL_regendp[p]);
175 SSPUSHINT(PL_regstartp[p]);
3280af22 176 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 177 SSPUSHINT(p);
40a82448
DM
178 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
179 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
180 (UV)p, (IV)PL_regstartp[p],
181 (IV)(PL_reg_start_tmp[p] - PL_bostr),
182 (IV)PL_regendp[p]
183 ));
a0d0e21e 184 }
b1ce53c5 185/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
6bda09f9
YO
186 SSPUSHPTR(PL_regstartp);
187 SSPUSHPTR(PL_regendp);
3280af22
NIS
188 SSPUSHINT(PL_regsize);
189 SSPUSHINT(*PL_reglastparen);
a01268b5 190 SSPUSHINT(*PL_reglastcloseparen);
3280af22 191 SSPUSHPTR(PL_reginput);
41123dfd
JH
192#define REGCP_FRAME_ELEMS 2
193/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
194 * are needed for the regexp context stack bookkeeping. */
195 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 196 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 197
a0d0e21e
LW
198 return retval;
199}
200
c277df42 201/* These are needed since we do not localize EVAL nodes: */
ab3bbdeb
YO
202#define REGCP_SET(cp) \
203 DEBUG_STATE_r( \
ab3bbdeb 204 PerlIO_printf(Perl_debug_log, \
e4f74956 205 " Setting an EVAL scope, savestack=%"IVdf"\n", \
ab3bbdeb
YO
206 (IV)PL_savestack_ix)); \
207 cp = PL_savestack_ix
c3464db5 208
ab3bbdeb 209#define REGCP_UNWIND(cp) \
e4f74956 210 DEBUG_STATE_r( \
ab3bbdeb 211 if (cp != PL_savestack_ix) \
e4f74956
YO
212 PerlIO_printf(Perl_debug_log, \
213 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
ab3bbdeb
YO
214 (IV)(cp), (IV)PL_savestack_ix)); \
215 regcpblow(cp)
c277df42 216
76e3520e 217STATIC char *
097eb12c 218S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 219{
97aff369 220 dVAR;
3b6647e0 221 U32 i;
a0d0e21e 222 char *input;
b1ce53c5 223
a3621e74
YO
224 GET_RE_DEBUG_FLAGS_DECL;
225
b1ce53c5 226 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 227 i = SSPOPINT;
b1ce53c5
JH
228 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
229 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 230 input = (char *) SSPOPPTR;
a01268b5 231 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
232 *PL_reglastparen = SSPOPINT;
233 PL_regsize = SSPOPINT;
6bda09f9
YO
234 PL_regendp=(I32 *) SSPOPPTR;
235 PL_regstartp=(I32 *) SSPOPPTR;
b1ce53c5 236
6bda09f9 237
b1ce53c5 238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 241 I32 tmps;
097eb12c 242 U32 paren = (U32)SSPOPINT;
3280af22 243 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
244 PL_regstartp[paren] = SSPOPINT;
245 tmps = SSPOPINT;
3280af22
NIS
246 if (paren <= *PL_reglastparen)
247 PL_regendp[paren] = tmps;
a3621e74 248 DEBUG_EXECUTE_r(
c3464db5 249 PerlIO_printf(Perl_debug_log,
b900a521 250 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 251 (UV)paren, (IV)PL_regstartp[paren],
b900a521 252 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 253 (IV)PL_regendp[paren],
3280af22 254 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 255 );
a0d0e21e 256 }
a3621e74 257 DEBUG_EXECUTE_r(
bb7a0f54 258 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 259 PerlIO_printf(Perl_debug_log,
faccc32b 260 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 261 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
262 }
263 );
daf18116 264#if 1
dafc8851
JH
265 /* It would seem that the similar code in regtry()
266 * already takes care of this, and in fact it is in
267 * a better location to since this code can #if 0-ed out
268 * but the code in regtry() is needed or otherwise tests
269 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
270 * (as of patchlevel 7877) will fail. Then again,
271 * this code seems to be necessary or otherwise
272 * building DynaLoader will fail:
273 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274 * --jhi */
3b6647e0 275 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
097eb12c
AL
276 if (i > PL_regsize)
277 PL_regstartp[i] = -1;
278 PL_regendp[i] = -1;
a0d0e21e 279 }
dafc8851 280#endif
a0d0e21e
LW
281 return input;
282}
283
02db2b7b 284#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 285
a687059c 286/*
e50aee73 287 * pregexec and friends
a687059c
LW
288 */
289
76234dfb 290#ifndef PERL_IN_XSUB_RE
a687059c 291/*
c277df42 292 - pregexec - match a regexp against a string
a687059c 293 */
c277df42 294I32
864dbfa3 295Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 296 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
297/* strend: pointer to null at end of string */
298/* strbeg: real beginning of string */
299/* minend: end of match must be >=minend after stringarg. */
300/* nosave: For optimizations. */
301{
302 return
9041c2e3 303 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
304 nosave ? 0 : REXEC_COPY_STR);
305}
76234dfb 306#endif
22e551b9 307
9041c2e3 308/*
cad2e5aa
JH
309 * Need to implement the following flags for reg_anch:
310 *
311 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
312 * USE_INTUIT_ML
313 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
314 * INTUIT_AUTORITATIVE_ML
315 * INTUIT_ONCE_NOML - Intuit can match in one location only.
316 * INTUIT_ONCE_ML
317 *
318 * Another flag for this function: SECOND_TIME (so that float substrs
319 * with giant delta may be not rechecked).
320 */
321
322/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
323
3f7c398e 324/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
325 Otherwise, only SvCUR(sv) is used to get strbeg. */
326
327/* XXXX We assume that strpos is strbeg unless sv. */
328
6eb5f6b9
JH
329/* XXXX Some places assume that there is a fixed substring.
330 An update may be needed if optimizer marks as "INTUITable"
331 RExen without fixed substrings. Similarly, it is assumed that
332 lengths of all the strings are no more than minlen, thus they
333 cannot come from lookahead.
40d049e4
YO
334 (Or minlen should take into account lookahead.)
335 NOTE: Some of this comment is not correct. minlen does now take account
336 of lookahead/behind. Further research is required. -- demerphq
337
338*/
6eb5f6b9 339
2c2d71f5
JH
340/* A failure to find a constant substring means that there is no need to make
341 an expensive call to REx engine, thus we celebrate a failure. Similarly,
342 finding a substring too deep into the string means that less calls to
30944b6d
IZ
343 regtry() should be needed.
344
345 REx compiler's optimizer found 4 possible hints:
346 a) Anchored substring;
347 b) Fixed substring;
348 c) Whether we are anchored (beginning-of-line or \G);
349 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 350 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
351 string which does not contradict any of them.
352 */
2c2d71f5 353
6eb5f6b9
JH
354/* Most of decisions we do here should have been done at compile time.
355 The nodes of the REx which we used for the search should have been
356 deleted from the finite automaton. */
357
cad2e5aa
JH
358char *
359Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
360 char *strend, U32 flags, re_scream_pos_data *data)
361{
97aff369 362 dVAR;
b7953727 363 register I32 start_shift = 0;
cad2e5aa 364 /* Should be nonnegative! */
b7953727 365 register I32 end_shift = 0;
2c2d71f5
JH
366 register char *s;
367 register SV *check;
a1933d95 368 char *strbeg;
cad2e5aa 369 char *t;
1de06328 370 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
cad2e5aa 371 I32 ml_anch;
bd61b366
SS
372 register char *other_last = NULL; /* other substr checked before this */
373 char *check_at = NULL; /* check substr found at this pos */
1df70142 374 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 375#ifdef DEBUGGING
890ce7af 376 const char * const i_strpos = strpos;
30944b6d 377#endif
a3621e74
YO
378
379 GET_RE_DEBUG_FLAGS_DECL;
380
a30b2f1f 381 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 382
b8d68ded 383 if (prog->reganch & ROPT_UTF8) {
b8d68ded
JH
384 PL_reg_flags |= RF_utf8;
385 }
ab3bbdeb
YO
386 DEBUG_EXECUTE_r(
387 debug_start_match(prog, do_utf8, strpos, strend,
1de06328
YO
388 sv ? "Guessing start of match in sv for"
389 : "Guessing start of match in string for");
2a782b5b 390 );
cad2e5aa 391
c344f387
JH
392 /* CHR_DIST() would be more correct here but it makes things slow. */
393 if (prog->minlen > strend - strpos) {
a3621e74 394 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 395 "String too short... [re_intuit_start]\n"));
cad2e5aa 396 goto fail;
2c2d71f5 397 }
1de06328 398
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
33b8afdf
JH
401 if (do_utf8) {
402 if (!prog->check_utf8 && prog->check_substr)
403 to_utf8_substr(prog);
404 check = prog->check_utf8;
405 } else {
406 if (!prog->check_substr && prog->check_utf8)
407 to_byte_substr(prog);
408 check = prog->check_substr;
409 }
1de06328 410 if (check == &PL_sv_undef) {
a3621e74 411 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1de06328 412 "Non-utf8 string cannot match utf8 check string\n"));
33b8afdf
JH
413 goto fail;
414 }
2c2d71f5 415 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
416 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
417 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 418 && !multiline ) ); /* Check after \n? */
cad2e5aa 419
7e25d62c
JH
420 if (!ml_anch) {
421 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
422 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 423 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
424 && sv && !SvROK(sv)
425 && (strpos != strbeg)) {
a3621e74 426 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
427 goto fail;
428 }
429 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 430 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 431 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
432 I32 slen;
433
1aa99e6b 434 s = HOP3c(strpos, prog->check_offset_min, strend);
1de06328 435
653099ff
GS
436 if (SvTAIL(check)) {
437 slen = SvCUR(check); /* >= 1 */
cad2e5aa 438
9041c2e3 439 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 440 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 441 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 442 goto fail_finish;
cad2e5aa
JH
443 }
444 /* Now should match s[0..slen-2] */
445 slen--;
3f7c398e 446 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 447 || (slen > 1
3f7c398e 448 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 449 report_neq:
a3621e74 450 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
451 goto fail_finish;
452 }
cad2e5aa 453 }
3f7c398e 454 else if (*SvPVX_const(check) != *s
653099ff 455 || ((slen = SvCUR(check)) > 1
3f7c398e 456 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 457 goto report_neq;
c315bfe8 458 check_at = s;
2c2d71f5 459 goto success_at_start;
7e25d62c 460 }
cad2e5aa 461 }
2c2d71f5 462 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 463 s = strpos;
2c2d71f5 464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
1de06328
YO
465 end_shift = prog->check_end_shift;
466
2c2d71f5 467 if (!ml_anch) {
a3b680e6 468 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 469 - (SvTAIL(check) != 0);
a3b680e6 470 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
471
472 if (end_shift < eshift)
473 end_shift = eshift;
474 }
cad2e5aa 475 }
2c2d71f5 476 else { /* Can match at random position */
cad2e5aa
JH
477 ml_anch = 0;
478 s = strpos;
1de06328
YO
479 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
480 end_shift = prog->check_end_shift;
481
482 /* end shift should be non negative here */
cad2e5aa
JH
483 }
484
2c2d71f5 485#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 486 if (end_shift < 0)
1de06328 487 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
b57a0404 488 (IV)end_shift, prog->precomp);
2c2d71f5
JH
489#endif
490
2c2d71f5
JH
491 restart:
492 /* Find a possible match in the region s..strend by looking for
493 the "check" substring in the region corrected by start/end_shift. */
1de06328
YO
494
495 {
496 I32 srch_start_shift = start_shift;
497 I32 srch_end_shift = end_shift;
498 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
499 srch_end_shift -= ((strbeg - s) - srch_start_shift);
500 srch_start_shift = strbeg - s;
501 }
6bda09f9 502 DEBUG_OPTIMISE_MORE_r({
1de06328
YO
503 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
504 (IV)prog->check_offset_min,
505 (IV)srch_start_shift,
506 (IV)srch_end_shift,
507 (IV)prog->check_end_shift);
508 });
509
cad2e5aa 510 if (flags & REXEC_SCREAM) {
cad2e5aa 511 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 512 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 513
2c2d71f5
JH
514 if (PL_screamfirst[BmRARE(check)] >= 0
515 || ( BmRARE(check) == '\n'
516 && (BmPREVIOUS(check) == SvCUR(check) - 1)
517 && SvTAIL(check) ))
9041c2e3 518 s = screaminstr(sv, check,
1de06328 519 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
cad2e5aa 520 else
2c2d71f5 521 goto fail_finish;
4addbd3b
HS
522 /* we may be pointing at the wrong string */
523 if (s && RX_MATCH_COPIED(prog))
3f7c398e 524 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
525 if (data)
526 *data->scream_olds = s;
527 }
1de06328
YO
528 else {
529 U8* start_point;
530 U8* end_point;
531 if (prog->reganch & ROPT_CANY_SEEN) {
532 start_point= (U8*)(s + srch_start_shift);
533 end_point= (U8*)(strend - srch_end_shift);
534 } else {
535 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
536 end_point= HOP3(strend, -srch_end_shift, strbeg);
537 }
6bda09f9 538 DEBUG_OPTIMISE_MORE_r({
56570a2c 539 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
1de06328 540 (int)(end_point - start_point),
fc8cd66c 541 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
1de06328
YO
542 start_point);
543 });
544
545 s = fbm_instr( start_point, end_point,
7fba1cd6 546 check, multiline ? FBMrf_MULTILINE : 0);
1de06328
YO
547 }
548 }
cad2e5aa
JH
549 /* Update the count-of-usability, remove useless subpatterns,
550 unshift s. */
2c2d71f5 551
ab3bbdeb
YO
552 DEBUG_EXECUTE_r({
553 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
554 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
555 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
2c2d71f5 556 (s ? "Found" : "Did not find"),
ab3bbdeb
YO
557 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
558 ? "anchored" : "floating"),
559 quoted,
560 RE_SV_TAIL(check),
561 (s ? " at offset " : "...\n") );
562 });
2c2d71f5
JH
563
564 if (!s)
565 goto fail_finish;
2c2d71f5 566 /* Finish the diagnostic message */
a3621e74 567 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 568
1de06328
YO
569 /* XXX dmq: first branch is for positive lookbehind...
570 Our check string is offset from the beginning of the pattern.
571 So we need to do any stclass tests offset forward from that
572 point. I think. :-(
573 */
574
575
576
577 check_at=s;
578
579
2c2d71f5
JH
580 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
581 Start with the other substr.
582 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 583 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
584 *always* match. Probably should be marked during compile...
585 Probably it is right to do no SCREAM here...
586 */
587
1de06328
YO
588 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
589 : (prog->float_substr && prog->anchored_substr))
590 {
30944b6d 591 /* Take into account the "other" substring. */
2c2d71f5
JH
592 /* XXXX May be hopelessly wrong for UTF... */
593 if (!other_last)
6eb5f6b9 594 other_last = strpos;
33b8afdf 595 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
596 do_other_anchored:
597 {
890ce7af
AL
598 char * const last = HOP3c(s, -start_shift, strbeg);
599 char *last1, *last2;
be8e71aa 600 char * const saved_s = s;
33b8afdf 601 SV* must;
2c2d71f5 602
2c2d71f5
JH
603 t = s - prog->check_offset_max;
604 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 605 && (!do_utf8
0ce71af7 606 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 607 && t > strpos)))
6f207bd3 608 NOOP;
2c2d71f5
JH
609 else
610 t = strpos;
1aa99e6b 611 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
612 if (t < other_last) /* These positions already checked */
613 t = other_last;
1aa99e6b 614 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
615 if (last < last1)
616 last1 = last;
1de06328
YO
617 /* XXXX It is not documented what units *_offsets are in.
618 We assume bytes, but this is clearly wrong.
619 Meaning this code needs to be carefully reviewed for errors.
620 dmq.
621 */
622
2c2d71f5 623 /* On end-of-str: see comment below. */
33b8afdf
JH
624 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
625 if (must == &PL_sv_undef) {
626 s = (char*)NULL;
1de06328 627 DEBUG_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
628 }
629 else
630 s = fbm_instr(
631 (unsigned char*)t,
632 HOP3(HOP3(last1, prog->anchored_offset, strend)
633 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
634 must,
7fba1cd6 635 multiline ? FBMrf_MULTILINE : 0
33b8afdf 636 );
ab3bbdeb
YO
637 DEBUG_EXECUTE_r({
638 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
639 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
640 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
2c2d71f5 641 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
642 quoted, RE_SV_TAIL(must));
643 });
644
645
2c2d71f5
JH
646 if (!s) {
647 if (last1 >= last2) {
a3621e74 648 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
649 ", giving up...\n"));
650 goto fail_finish;
651 }
a3621e74 652 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 653 ", trying floating at offset %ld...\n",
be8e71aa 654 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
1aa99e6b
IH
655 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
656 s = HOP3c(last, 1, strend);
2c2d71f5
JH
657 goto restart;
658 }
659 else {
a3621e74 660 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 661 (long)(s - i_strpos)));
1aa99e6b
IH
662 t = HOP3c(s, -prog->anchored_offset, strbeg);
663 other_last = HOP3c(s, 1, strend);
be8e71aa 664 s = saved_s;
2c2d71f5
JH
665 if (t == strpos)
666 goto try_at_start;
2c2d71f5
JH
667 goto try_at_offset;
668 }
30944b6d 669 }
2c2d71f5
JH
670 }
671 else { /* Take into account the floating substring. */
33b8afdf 672 char *last, *last1;
be8e71aa 673 char * const saved_s = s;
33b8afdf
JH
674 SV* must;
675
676 t = HOP3c(s, -start_shift, strbeg);
677 last1 = last =
678 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
679 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
680 last = HOP3c(t, prog->float_max_offset, strend);
681 s = HOP3c(t, prog->float_min_offset, strend);
682 if (s < other_last)
683 s = other_last;
2c2d71f5 684 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
685 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
686 /* fbm_instr() takes into account exact value of end-of-str
687 if the check is SvTAIL(ed). Since false positives are OK,
688 and end-of-str is not later than strend we are OK. */
689 if (must == &PL_sv_undef) {
690 s = (char*)NULL;
1de06328 691 DEBUG_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
692 }
693 else
2c2d71f5 694 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
695 (unsigned char*)last + SvCUR(must)
696 - (SvTAIL(must)!=0),
7fba1cd6 697 must, multiline ? FBMrf_MULTILINE : 0);
ab3bbdeb
YO
698 DEBUG_EXECUTE_r({
699 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
700 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
701 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
33b8afdf 702 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
703 quoted, RE_SV_TAIL(must));
704 });
33b8afdf
JH
705 if (!s) {
706 if (last1 == last) {
a3621e74 707 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
708 ", giving up...\n"));
709 goto fail_finish;
2c2d71f5 710 }
a3621e74 711 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf 712 ", trying anchored starting at offset %ld...\n",
be8e71aa 713 (long)(saved_s + 1 - i_strpos)));
33b8afdf
JH
714 other_last = last;
715 s = HOP3c(t, 1, strend);
716 goto restart;
717 }
718 else {
a3621e74 719 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
720 (long)(s - i_strpos)));
721 other_last = s; /* Fix this later. --Hugo */
be8e71aa 722 s = saved_s;
33b8afdf
JH
723 if (t == strpos)
724 goto try_at_start;
725 goto try_at_offset;
726 }
2c2d71f5 727 }
cad2e5aa 728 }
2c2d71f5 729
1de06328 730
9ef43ace 731 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
1de06328 732
6bda09f9 733 DEBUG_OPTIMISE_MORE_r(
1de06328
YO
734 PerlIO_printf(Perl_debug_log,
735 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
736 (IV)prog->check_offset_min,
737 (IV)prog->check_offset_max,
738 (IV)(s-strpos),
739 (IV)(t-strpos),
740 (IV)(t-s),
741 (IV)(strend-strpos)
742 )
743 );
744
2c2d71f5 745 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 746 && (!do_utf8
9ef43ace 747 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
1de06328
YO
748 && t > strpos)))
749 {
2c2d71f5
JH
750 /* Fixed substring is found far enough so that the match
751 cannot start at strpos. */
752 try_at_offset:
cad2e5aa 753 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
754 /* Eventually fbm_*() should handle this, but often
755 anchored_offset is not 0, so this check will not be wasted. */
756 /* XXXX In the code below we prefer to look for "^" even in
757 presence of anchored substrings. And we search even
758 beyond the found float position. These pessimizations
759 are historical artefacts only. */
760 find_anchor:
2c2d71f5 761 while (t < strend - prog->minlen) {
cad2e5aa 762 if (*t == '\n') {
4ee3650e 763 if (t < check_at - prog->check_offset_min) {
33b8afdf 764 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
765 /* Since we moved from the found position,
766 we definitely contradict the found anchored
30944b6d
IZ
767 substr. Due to the above check we do not
768 contradict "check" substr.
769 Thus we can arrive here only if check substr
770 is float. Redo checking for "other"=="fixed".
771 */
9041c2e3 772 strpos = t + 1;
a3621e74 773 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 774 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
775 goto do_other_anchored;
776 }
4ee3650e
GS
777 /* We don't contradict the found floating substring. */
778 /* XXXX Why not check for STCLASS? */
cad2e5aa 779 s = t + 1;
a3621e74 780 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 781 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
782 goto set_useful;
783 }
4ee3650e
GS
784 /* Position contradicts check-string */
785 /* XXXX probably better to look for check-string
786 than for "\n", so one should lower the limit for t? */
a3621e74 787 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 788 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 789 other_last = strpos = s = t + 1;
cad2e5aa
JH
790 goto restart;
791 }
792 t++;
793 }
a3621e74 794 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 795 PL_colors[0], PL_colors[1]));
2c2d71f5 796 goto fail_finish;
cad2e5aa 797 }
f5952150 798 else {
a3621e74 799 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 800 PL_colors[0], PL_colors[1]));
f5952150 801 }
cad2e5aa
JH
802 s = t;
803 set_useful:
33b8afdf 804 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
805 }
806 else {
f5952150 807 /* The found string does not prohibit matching at strpos,
2c2d71f5 808 - no optimization of calling REx engine can be performed,
f5952150
GS
809 unless it was an MBOL and we are not after MBOL,
810 or a future STCLASS check will fail this. */
2c2d71f5
JH
811 try_at_start:
812 /* Even in this situation we may use MBOL flag if strpos is offset
813 wrt the start of the string. */
05b4157f 814 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 815 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
816 /* May be due to an implicit anchor of m{.*foo} */
817 && !(prog->reganch & ROPT_IMPLICIT))
818 {
cad2e5aa
JH
819 t = strpos;
820 goto find_anchor;
821 }
a3621e74 822 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 823 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 824 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 825 );
2c2d71f5 826 success_at_start:
30944b6d 827 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
828 && (do_utf8 ? (
829 prog->check_utf8 /* Could be deleted already */
830 && --BmUSEFUL(prog->check_utf8) < 0
831 && (prog->check_utf8 == prog->float_utf8)
832 ) : (
833 prog->check_substr /* Could be deleted already */
834 && --BmUSEFUL(prog->check_substr) < 0
835 && (prog->check_substr == prog->float_substr)
836 )))
66e933ab 837 {
cad2e5aa 838 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 839 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
840 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
841 if (do_utf8 ? prog->check_substr : prog->check_utf8)
842 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
843 prog->check_substr = prog->check_utf8 = NULL; /* disable */
844 prog->float_substr = prog->float_utf8 = NULL; /* clear */
845 check = NULL; /* abort */
cad2e5aa 846 s = strpos;
3cf5c195
IZ
847 /* XXXX This is a remnant of the old implementation. It
848 looks wasteful, since now INTUIT can use many
6eb5f6b9 849 other heuristics. */
cad2e5aa
JH
850 prog->reganch &= ~RE_USE_INTUIT;
851 }
852 else
853 s = strpos;
854 }
855
6eb5f6b9
JH
856 /* Last resort... */
857 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1de06328
YO
858 /* trie stclasses are too expensive to use here, we are better off to
859 leave it to regmatch itself */
786e8c11 860 if (prog->regstclass && PL_regkind[OP(prog->regstclass)]!=TRIE) {
6eb5f6b9
JH
861 /* minlen == 0 is possible if regstclass is \b or \B,
862 and the fixed substr is ''$.
863 Since minlen is already taken into account, s+1 is before strend;
864 accidentally, minlen >= 1 guaranties no false positives at s + 1
865 even for \b or \B. But (minlen? 1 : 0) below assumes that
866 regstclass does not come from lookahead... */
867 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
868 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 869 const U8* const str = (U8*)STRING(prog->regstclass);
3dab1dad 870 const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
1aa99e6b 871 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 872 : 1);
1de06328
YO
873 char * endpos;
874 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
875 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
876 else if (prog->float_substr || prog->float_utf8)
877 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
878 else
879 endpos= strend;
880
56570a2c 881 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %d s: %d endpos: %d\n",
b57a0404 882 (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg));
1de06328 883
6eb5f6b9 884 t = s;
3b0527fe 885 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
886 if (!s) {
887#ifdef DEBUGGING
cbbf8932 888 const char *what = NULL;
6eb5f6b9
JH
889#endif
890 if (endpos == strend) {
a3621e74 891 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
892 "Could not match STCLASS...\n") );
893 goto fail;
894 }
a3621e74 895 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 896 "This position contradicts STCLASS...\n") );
653099ff
GS
897 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
898 goto fail;
6eb5f6b9 899 /* Contradict one of substrings */
33b8afdf
JH
900 if (prog->anchored_substr || prog->anchored_utf8) {
901 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 902 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 903 hop_and_restart:
1aa99e6b 904 s = HOP3c(t, 1, strend);
66e933ab
GS
905 if (s + start_shift + end_shift > strend) {
906 /* XXXX Should be taken into account earlier? */
a3621e74 907 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
908 "Could not match STCLASS...\n") );
909 goto fail;
910 }
5e39e1e5
HS
911 if (!check)
912 goto giveup;
a3621e74 913 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 914 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
915 what, (long)(s + start_shift - i_strpos)) );
916 goto restart;
917 }
66e933ab 918 /* Have both, check_string is floating */
6eb5f6b9
JH
919 if (t + start_shift >= check_at) /* Contradicts floating=check */
920 goto retry_floating_check;
921 /* Recheck anchored substring, but not floating... */
9041c2e3 922 s = check_at;
5e39e1e5
HS
923 if (!check)
924 goto giveup;
a3621e74 925 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 926 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
927 (long)(other_last - i_strpos)) );
928 goto do_other_anchored;
929 }
60e71179
GS
930 /* Another way we could have checked stclass at the
931 current position only: */
932 if (ml_anch) {
933 s = t = t + 1;
5e39e1e5
HS
934 if (!check)
935 goto giveup;
a3621e74 936 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 937 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 938 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 939 goto try_at_offset;
66e933ab 940 }
33b8afdf 941 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 942 goto fail;
6eb5f6b9
JH
943 /* Check is floating subtring. */
944 retry_floating_check:
945 t = check_at - start_shift;
a3621e74 946 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
947 goto hop_and_restart;
948 }
b7953727 949 if (t != s) {
a3621e74 950 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 951 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
952 (long)(t - i_strpos), (long)(s - i_strpos))
953 );
954 }
955 else {
a3621e74 956 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
957 "Does not contradict STCLASS...\n");
958 );
959 }
6eb5f6b9 960 }
5e39e1e5 961 giveup:
a3621e74 962 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
963 PL_colors[4], (check ? "Guessed" : "Giving up"),
964 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 965 return s;
2c2d71f5
JH
966
967 fail_finish: /* Substring not found */
33b8afdf
JH
968 if (prog->check_substr || prog->check_utf8) /* could be removed already */
969 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 970 fail:
a3621e74 971 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 972 PL_colors[4], PL_colors[5]));
bd61b366 973 return NULL;
cad2e5aa 974}
9661b544 975
786e8c11 976
3b0527fe 977
4cadc6a9
YO
978#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
979foldlen, foldbuf, uniflags) STMT_START { \
980 switch (trie_type) { \
981 case trie_utf8_fold: \
982 if ( foldlen>0 ) { \
983 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
984 foldlen -= len; \
985 uscan += len; \
986 len=0; \
987 } else { \
988 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
989 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
990 foldlen -= UNISKIP( uvc ); \
991 uscan = foldbuf + UNISKIP( uvc ); \
992 } \
993 break; \
994 case trie_utf8: \
995 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
996 break; \
997 case trie_plain: \
998 uvc = (UV)*uc; \
999 len = 1; \
1000 } \
1001 \
1002 if (uvc < 256) { \
1003 charid = trie->charmap[ uvc ]; \
1004 } \
1005 else { \
1006 charid = 0; \
1007 if (trie->widecharmap) { \
1008 SV** const svpp = hv_fetch(trie->widecharmap, \
1009 (char*)&uvc, sizeof(UV), 0); \
1010 if (svpp) \
1011 charid = (U16)SvIV(*svpp); \
1012 } \
1013 } \
1014} STMT_END
1015
1016#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1017 if ( (CoNd) \
1018 && (ln == len || \
1019 ibcmp_utf8(s, NULL, 0, do_utf8, \
1020 m, NULL, ln, (bool)UTF)) \
24b23f37 1021 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1022 goto got_it; \
1023 else { \
1024 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1025 uvchr_to_utf8(tmpbuf, c); \
1026 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1027 if ( f != c \
1028 && (f == c1 || f == c2) \
1029 && (ln == foldlen || \
1030 !ibcmp_utf8((char *) foldbuf, \
1031 NULL, foldlen, do_utf8, \
1032 m, \
1033 NULL, ln, (bool)UTF)) \
24b23f37 1034 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1035 goto got_it; \
1036 } \
1037 s += len
1038
1039#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1040STMT_START { \
1041 while (s <= e) { \
1042 if ( (CoNd) \
1043 && (ln == 1 || !(OP(c) == EXACTF \
1044 ? ibcmp(s, m, ln) \
1045 : ibcmp_locale(s, m, ln))) \
24b23f37 1046 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1047 goto got_it; \
1048 s++; \
1049 } \
1050} STMT_END
1051
1052#define REXEC_FBC_UTF8_SCAN(CoDe) \
1053STMT_START { \
1054 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1055 CoDe \
1056 s += uskip; \
1057 } \
1058} STMT_END
1059
1060#define REXEC_FBC_SCAN(CoDe) \
1061STMT_START { \
1062 while (s < strend) { \
1063 CoDe \
1064 s++; \
1065 } \
1066} STMT_END
1067
1068#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1069REXEC_FBC_UTF8_SCAN( \
1070 if (CoNd) { \
24b23f37 1071 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1072 goto got_it; \
1073 else \
1074 tmp = doevery; \
1075 } \
1076 else \
1077 tmp = 1; \
1078)
1079
1080#define REXEC_FBC_CLASS_SCAN(CoNd) \
1081REXEC_FBC_SCAN( \
1082 if (CoNd) { \
24b23f37 1083 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1084 goto got_it; \
1085 else \
1086 tmp = doevery; \
1087 } \
1088 else \
1089 tmp = 1; \
1090)
1091
1092#define REXEC_FBC_TRYIT \
24b23f37 1093if ((!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1094 goto got_it
1095
1096#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1097 if (do_utf8) { \
1098 UtFpReLoAd; \
1099 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1100 } \
1101 else { \
1102 REXEC_FBC_CLASS_SCAN(CoNd); \
1103 } \
1104 break
1105
1106#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1107 PL_reg_flags |= RF_tainted; \
1108 if (do_utf8) { \
1109 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1110 } \
1111 else { \
1112 REXEC_FBC_CLASS_SCAN(CoNd); \
1113 } \
1114 break
1115
786e8c11
YO
1116#define DUMP_EXEC_POS(li,s,doutf8) \
1117 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1118
1119/* We know what class REx starts with. Try to find this position... */
1120/* if reginfo is NULL, its a dryrun */
1121/* annoyingly all the vars in this routine have different names from their counterparts
1122 in regmatch. /grrr */
1123
3c3eec57 1124STATIC char *
07be1b83 1125S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
24b23f37 1126 const char *strend, regmatch_info *reginfo)
a687059c 1127{
27da23d5 1128 dVAR;
1df70142 1129 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 1130 char *m;
d8093b23 1131 STRLEN ln;
5dab1207 1132 STRLEN lnc;
078c425b 1133 register STRLEN uskip;
d8093b23
G
1134 unsigned int c1;
1135 unsigned int c2;
6eb5f6b9
JH
1136 char *e;
1137 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 1138 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 1139
6eb5f6b9
JH
1140 /* We know what class it must start with. */
1141 switch (OP(c)) {
6eb5f6b9 1142 case ANYOF:
388cc4de 1143 if (do_utf8) {
4cadc6a9 1144 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
388cc4de 1145 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a 1146 reginclass(prog, c, (U8*)s, 0, do_utf8) :
4cadc6a9 1147 REGINCLASS(prog, c, (U8*)s));
388cc4de
HS
1148 }
1149 else {
1150 while (s < strend) {
1151 STRLEN skip = 1;
1152
32fc9b6a 1153 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
1154 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1155 /* The assignment of 2 is intentional:
1156 * for the folded sharp s, the skip is 2. */
1157 (skip = SHARP_S_SKIP))) {
24b23f37 1158 if (tmp && (!reginfo || regtry(reginfo, &s)))
388cc4de
HS
1159 goto got_it;
1160 else
1161 tmp = doevery;
1162 }
1163 else
1164 tmp = 1;
1165 s += skip;
1166 }
a0d0e21e 1167 }
6eb5f6b9 1168 break;
f33976b4 1169 case CANY:
4cadc6a9 1170 REXEC_FBC_SCAN(
24b23f37 1171 if (tmp && (!reginfo || regtry(reginfo, &s)))
f33976b4
DB
1172 goto got_it;
1173 else
1174 tmp = doevery;
4cadc6a9 1175 );
f33976b4 1176 break;
6eb5f6b9 1177 case EXACTF:
5dab1207
NIS
1178 m = STRING(c);
1179 ln = STR_LEN(c); /* length to match in octets/bytes */
1180 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1181 if (UTF) {
a2a2844f 1182 STRLEN ulen1, ulen2;
5dab1207 1183 U8 *sm = (U8 *) m;
89ebb4a3
JH
1184 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1185 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1186 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1187
1188 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1189 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1190
89ebb4a3 1191 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1192 0, uniflags);
89ebb4a3 1193 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1194 0, uniflags);
5dab1207
NIS
1195 lnc = 0;
1196 while (sm < ((U8 *) m + ln)) {
1197 lnc++;
1198 sm += UTF8SKIP(sm);
1199 }
1aa99e6b
IH
1200 }
1201 else {
1202 c1 = *(U8*)m;
1203 c2 = PL_fold[c1];
1204 }
6eb5f6b9
JH
1205 goto do_exactf;
1206 case EXACTFL:
5dab1207
NIS
1207 m = STRING(c);
1208 ln = STR_LEN(c);
1209 lnc = (I32) ln;
d8093b23 1210 c1 = *(U8*)m;
6eb5f6b9
JH
1211 c2 = PL_fold_locale[c1];
1212 do_exactf:
db12adc6 1213 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1214
3b0527fe 1215 if (!reginfo && e < s)
6eb5f6b9 1216 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1217
60a8b682
JH
1218 /* The idea in the EXACTF* cases is to first find the
1219 * first character of the EXACTF* node and then, if
1220 * necessary, case-insensitively compare the full
1221 * text of the node. The c1 and c2 are the first
1222 * characters (though in Unicode it gets a bit
1223 * more complicated because there are more cases
7f16dd3d
JH
1224 * than just upper and lower: one needs to use
1225 * the so-called folding case for case-insensitive
1226 * matching (called "loose matching" in Unicode).
1227 * ibcmp_utf8() will do just that. */
60a8b682 1228
1aa99e6b 1229 if (do_utf8) {
575cac57 1230 UV c, f;
89ebb4a3 1231 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1232 STRLEN len, foldlen;
4ad0818d 1233 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1234 if (c1 == c2) {
5dab1207
NIS
1235 /* Upper and lower of 1st char are equal -
1236 * probably not a "letter". */
1aa99e6b 1237 while (s <= e) {
89ebb4a3 1238 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1239 uniflags);
4cadc6a9 1240 REXEC_FBC_EXACTISH_CHECK(c == c1);
1aa99e6b 1241 }
09091399
JH
1242 }
1243 else {
1aa99e6b 1244 while (s <= e) {
89ebb4a3 1245 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1246 uniflags);
80aecb99 1247
60a8b682 1248 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1249 * Note that not all the possible combinations
1250 * are handled here: some of them are handled
1251 * by the standard folding rules, and some of
1252 * them (the character class or ANYOF cases)
1253 * are handled during compiletime in
1254 * regexec.c:S_regclass(). */
880bd946
JH
1255 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1256 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1257 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99 1258
4cadc6a9 1259 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1aa99e6b 1260 }
09091399 1261 }
1aa99e6b
IH
1262 }
1263 else {
1264 if (c1 == c2)
4cadc6a9 1265 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1aa99e6b 1266 else
4cadc6a9 1267 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
b3c9acc1
IZ
1268 }
1269 break;
bbce6d69 1270 case BOUNDL:
3280af22 1271 PL_reg_flags |= RF_tainted;
bbce6d69 1272 /* FALL THROUGH */
a0d0e21e 1273 case BOUND:
ffc61ed2 1274 if (do_utf8) {
12d33761 1275 if (s == PL_bostr)
ffc61ed2
JH
1276 tmp = '\n';
1277 else {
6136c704 1278 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1279 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1280 }
1281 tmp = ((OP(c) == BOUND ?
9041c2e3 1282 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1283 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1284 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1285 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1286 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1287 isALNUM_LC_utf8((U8*)s)))
1288 {
1289 tmp = !tmp;
4cadc6a9 1290 REXEC_FBC_TRYIT;
a687059c 1291 }
4cadc6a9 1292 );
a0d0e21e 1293 }
667bb95a 1294 else {
12d33761 1295 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2 1296 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1297 REXEC_FBC_SCAN(
ffc61ed2
JH
1298 if (tmp ==
1299 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1300 tmp = !tmp;
4cadc6a9 1301 REXEC_FBC_TRYIT;
a0ed51b3 1302 }
4cadc6a9 1303 );
a0ed51b3 1304 }
24b23f37 1305 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1306 goto got_it;
1307 break;
bbce6d69 1308 case NBOUNDL:
3280af22 1309 PL_reg_flags |= RF_tainted;
bbce6d69 1310 /* FALL THROUGH */
a0d0e21e 1311 case NBOUND:
ffc61ed2 1312 if (do_utf8) {
12d33761 1313 if (s == PL_bostr)
ffc61ed2
JH
1314 tmp = '\n';
1315 else {
6136c704 1316 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1317 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1318 }
1319 tmp = ((OP(c) == NBOUND ?
9041c2e3 1320 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1321 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1322 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1323 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1324 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1325 isALNUM_LC_utf8((U8*)s)))
1326 tmp = !tmp;
4cadc6a9
YO
1327 else REXEC_FBC_TRYIT;
1328 );
a0d0e21e 1329 }
667bb95a 1330 else {
12d33761 1331 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1332 tmp = ((OP(c) == NBOUND ?
1333 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1334 REXEC_FBC_SCAN(
ffc61ed2
JH
1335 if (tmp ==
1336 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1337 tmp = !tmp;
4cadc6a9
YO
1338 else REXEC_FBC_TRYIT;
1339 );
a0ed51b3 1340 }
24b23f37 1341 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1342 goto got_it;
1343 break;
a0d0e21e 1344 case ALNUM:
4cadc6a9
YO
1345 REXEC_FBC_CSCAN_PRELOAD(
1346 LOAD_UTF8_CHARCLASS_ALNUM(),
1347 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1348 isALNUM(*s)
1349 );
bbce6d69 1350 case ALNUML:
4cadc6a9
YO
1351 REXEC_FBC_CSCAN_TAINT(
1352 isALNUM_LC_utf8((U8*)s),
1353 isALNUM_LC(*s)
1354 );
a0d0e21e 1355 case NALNUM:
4cadc6a9
YO
1356 REXEC_FBC_CSCAN_PRELOAD(
1357 LOAD_UTF8_CHARCLASS_ALNUM(),
1358 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1359 !isALNUM(*s)
1360 );
bbce6d69 1361 case NALNUML:
4cadc6a9
YO
1362 REXEC_FBC_CSCAN_TAINT(
1363 !isALNUM_LC_utf8((U8*)s),
1364 !isALNUM_LC(*s)
1365 );
a0d0e21e 1366 case SPACE:
4cadc6a9
YO
1367 REXEC_FBC_CSCAN_PRELOAD(
1368 LOAD_UTF8_CHARCLASS_SPACE(),
1369 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1370 isSPACE(*s)
1371 );
bbce6d69 1372 case SPACEL:
4cadc6a9
YO
1373 REXEC_FBC_CSCAN_TAINT(
1374 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1375 isSPACE_LC(*s)
1376 );
a0d0e21e 1377 case NSPACE:
4cadc6a9
YO
1378 REXEC_FBC_CSCAN_PRELOAD(
1379 LOAD_UTF8_CHARCLASS_SPACE(),
1380 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1381 !isSPACE(*s)
1382 );
bbce6d69 1383 case NSPACEL:
4cadc6a9
YO
1384 REXEC_FBC_CSCAN_TAINT(
1385 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1386 !isSPACE_LC(*s)
1387 );
a0d0e21e 1388 case DIGIT:
4cadc6a9
YO
1389 REXEC_FBC_CSCAN_PRELOAD(
1390 LOAD_UTF8_CHARCLASS_DIGIT(),
1391 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1392 isDIGIT(*s)
1393 );
b8c5462f 1394 case DIGITL:
4cadc6a9
YO
1395 REXEC_FBC_CSCAN_TAINT(
1396 isDIGIT_LC_utf8((U8*)s),
1397 isDIGIT_LC(*s)
1398 );
a0d0e21e 1399 case NDIGIT:
4cadc6a9
YO
1400 REXEC_FBC_CSCAN_PRELOAD(
1401 LOAD_UTF8_CHARCLASS_DIGIT(),
1402 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1403 !isDIGIT(*s)
1404 );
b8c5462f 1405 case NDIGITL:
4cadc6a9
YO
1406 REXEC_FBC_CSCAN_TAINT(
1407 !isDIGIT_LC_utf8((U8*)s),
1408 !isDIGIT_LC(*s)
1409 );
1de06328
YO
1410 case AHOCORASICKC:
1411 case AHOCORASICK:
07be1b83
YO
1412 {
1413 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1414 trie_type = do_utf8 ?
1415 (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1416 : trie_plain;
1417 /* what trie are we using right now */
1418 reg_ac_data *aho
1419 = (reg_ac_data*)prog->data->data[ ARG( c ) ];
1420 reg_trie_data *trie=aho->trie;
1421
1422 const char *last_start = strend - trie->minlen;
6148ee25 1423#ifdef DEBUGGING
07be1b83 1424 const char *real_start = s;
6148ee25 1425#endif
07be1b83 1426 STRLEN maxlen = trie->maxlen;
be8e71aa
YO
1427 SV *sv_points;
1428 U8 **points; /* map of where we were in the input string
786e8c11 1429 when reading a given char. For ASCII this
be8e71aa
YO
1430 is unnecessary overhead as the relationship
1431 is always 1:1, but for unicode, especially
1432 case folded unicode this is not true. */
f9e705e8 1433 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
786e8c11
YO
1434 U8 *bitmap=NULL;
1435
07be1b83
YO
1436
1437 GET_RE_DEBUG_FLAGS_DECL;
1438
be8e71aa
YO
1439 /* We can't just allocate points here. We need to wrap it in
1440 * an SV so it gets freed properly if there is a croak while
1441 * running the match */
1442 ENTER;
1443 SAVETMPS;
1444 sv_points=newSV(maxlen * sizeof(U8 *));
1445 SvCUR_set(sv_points,
1446 maxlen * sizeof(U8 *));
1447 SvPOK_on(sv_points);
1448 sv_2mortal(sv_points);
1449 points=(U8**)SvPV_nolen(sv_points );
1de06328
YO
1450 if ( trie_type != trie_utf8_fold
1451 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1452 {
786e8c11
YO
1453 if (trie->bitmap)
1454 bitmap=(U8*)trie->bitmap;
1455 else
1456 bitmap=(U8*)ANYOF_BITMAP(c);
07be1b83 1457 }
786e8c11
YO
1458 /* this is the Aho-Corasick algorithm modified a touch
1459 to include special handling for long "unknown char"
1460 sequences. The basic idea being that we use AC as long
1461 as we are dealing with a possible matching char, when
1462 we encounter an unknown char (and we have not encountered
1463 an accepting state) we scan forward until we find a legal
1464 starting char.
1465 AC matching is basically that of trie matching, except
1466 that when we encounter a failing transition, we fall back
1467 to the current states "fail state", and try the current char
1468 again, a process we repeat until we reach the root state,
1469 state 1, or a legal transition. If we fail on the root state
1470 then we can either terminate if we have reached an accepting
1471 state previously, or restart the entire process from the beginning
1472 if we have not.
1473
1474 */
07be1b83
YO
1475 while (s <= last_start) {
1476 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1477 U8 *uc = (U8*)s;
1478 U16 charid = 0;
1479 U32 base = 1;
1480 U32 state = 1;
1481 UV uvc = 0;
1482 STRLEN len = 0;
1483 STRLEN foldlen = 0;
1484 U8 *uscan = (U8*)NULL;
1485 U8 *leftmost = NULL;
786e8c11
YO
1486#ifdef DEBUGGING
1487 U32 accepted_word= 0;
1488#endif
07be1b83
YO
1489 U32 pointpos = 0;
1490
1491 while ( state && uc <= (U8*)strend ) {
1492 int failed=0;
786e8c11
YO
1493 U32 word = aho->states[ state ].wordnum;
1494
1de06328
YO
1495 if( state==1 ) {
1496 if ( bitmap ) {
1497 DEBUG_TRIE_EXECUTE_r(
1498 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1499 dump_exec_pos( (char *)uc, c, strend, real_start,
1500 (char *)uc, do_utf8 );
1501 PerlIO_printf( Perl_debug_log,
1502 " Scanning for legal start char...\n");
1503 }
1504 );
1505 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1506 uc++;
786e8c11 1507 }
1de06328 1508 s= (char *)uc;
786e8c11 1509 }
786e8c11
YO
1510 if (uc >(U8*)last_start) break;
1511 }
1512
1513 if ( word ) {
1514 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1515 if (!leftmost || lpos < leftmost) {
1516 DEBUG_r(accepted_word=word);
07be1b83 1517 leftmost= lpos;
786e8c11 1518 }
07be1b83 1519 if (base==0) break;
786e8c11 1520
07be1b83
YO
1521 }
1522 points[pointpos++ % maxlen]= uc;
4cadc6a9
YO
1523 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
1524 uvc, charid, foldlen, foldbuf, uniflags);
786e8c11
YO
1525 DEBUG_TRIE_EXECUTE_r({
1526 dump_exec_pos( (char *)uc, c, strend, real_start,
1527 s, do_utf8 );
07be1b83 1528 PerlIO_printf(Perl_debug_log,
786e8c11
YO
1529 " Charid:%3u CP:%4"UVxf" ",
1530 charid, uvc);
1531 });
07be1b83
YO
1532
1533 do {
6148ee25 1534#ifdef DEBUGGING
786e8c11 1535 word = aho->states[ state ].wordnum;
6148ee25 1536#endif
07be1b83
YO
1537 base = aho->states[ state ].trans.base;
1538
786e8c11
YO
1539 DEBUG_TRIE_EXECUTE_r({
1540 if (failed)
1541 dump_exec_pos( (char *)uc, c, strend, real_start,
1542 s, do_utf8 );
07be1b83 1543 PerlIO_printf( Perl_debug_log,
786e8c11
YO
1544 "%sState: %4"UVxf", word=%"UVxf,
1545 failed ? " Fail transition to " : "",
1546 (UV)state, (UV)word);
1547 });
07be1b83
YO
1548 if ( base ) {
1549 U32 tmp;
1550 if (charid &&
1551 (base + charid > trie->uniquecharcount )
1552 && (base + charid - 1 - trie->uniquecharcount
1553 < trie->lasttrans)
1554 && trie->trans[base + charid - 1 -
1555 trie->uniquecharcount].check == state
1556 && (tmp=trie->trans[base + charid - 1 -
1557 trie->uniquecharcount ].next))
1558 {
786e8c11
YO
1559 DEBUG_TRIE_EXECUTE_r(
1560 PerlIO_printf( Perl_debug_log," - legal\n"));
07be1b83
YO
1561 state = tmp;
1562 break;
1563 }
1564 else {
786e8c11
YO
1565 DEBUG_TRIE_EXECUTE_r(
1566 PerlIO_printf( Perl_debug_log," - fail\n"));
1567 failed = 1;
1568 state = aho->fail[state];
07be1b83
YO
1569 }
1570 }
1571 else {
1572 /* we must be accepting here */
786e8c11
YO
1573 DEBUG_TRIE_EXECUTE_r(
1574 PerlIO_printf( Perl_debug_log," - accepting\n"));
1575 failed = 1;
07be1b83
YO
1576 break;
1577 }
1578 } while(state);
786e8c11 1579 uc += len;
07be1b83
YO
1580 if (failed) {
1581 if (leftmost)
1582 break;
786e8c11 1583 if (!state) state = 1;
07be1b83
YO
1584 }
1585 }
1586 if ( aho->states[ state ].wordnum ) {
1587 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
786e8c11
YO
1588 if (!leftmost || lpos < leftmost) {
1589 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
07be1b83 1590 leftmost = lpos;
786e8c11 1591 }
07be1b83 1592 }
07be1b83
YO
1593 if (leftmost) {
1594 s = (char*)leftmost;
786e8c11
YO
1595 DEBUG_TRIE_EXECUTE_r({
1596 PerlIO_printf(
1597 Perl_debug_log,"Matches word #%"UVxf" at position %d. Trying full pattern...\n",
1598 (UV)accepted_word, s - real_start
1599 );
1600 });
24b23f37 1601 if (!reginfo || regtry(reginfo, &s)) {
be8e71aa
YO
1602 FREETMPS;
1603 LEAVE;
07be1b83 1604 goto got_it;
be8e71aa 1605 }
07be1b83 1606 s = HOPc(s,1);
786e8c11
YO
1607 DEBUG_TRIE_EXECUTE_r({
1608 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1609 });
07be1b83 1610 } else {
786e8c11
YO
1611 DEBUG_TRIE_EXECUTE_r(
1612 PerlIO_printf( Perl_debug_log,"No match.\n"));
07be1b83
YO
1613 break;
1614 }
1615 }
be8e71aa
YO
1616 FREETMPS;
1617 LEAVE;
07be1b83
YO
1618 }
1619 break;
b3c9acc1 1620 default:
3c3eec57
GS
1621 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1622 break;
d6a28714 1623 }
6eb5f6b9
JH
1624 return 0;
1625 got_it:
1626 return s;
1627}
1628
1629/*
1630 - regexec_flags - match a regexp against a string
1631 */
1632I32
1633Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1634 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1635/* strend: pointer to null at end of string */
1636/* strbeg: real beginning of string */
1637/* minend: end of match must be >=minend after stringarg. */
1638/* data: May be used for some additional optimizations. */
1639/* nosave: For optimizations. */
1640{
97aff369 1641 dVAR;
24b23f37 1642 /*register*/ char *s;
6eb5f6b9 1643 register regnode *c;
24b23f37 1644 /*register*/ char *startpos = stringarg;
6eb5f6b9
JH
1645 I32 minlen; /* must match at least this many chars */
1646 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1647 I32 end_shift = 0; /* Same for the end. */ /* CC */
1648 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1649 char *scream_olds = NULL;
3dab1dad 1650 SV* const oreplsv = GvSV(PL_replgv);
f9f4320a 1651 const bool do_utf8 = (bool)DO_UTF8(sv);
2757e526 1652 I32 multiline;
0df25f3d 1653
3b0527fe 1654 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1655
1656 GET_RE_DEBUG_FLAGS_DECL;
1657
9d4ba2ae 1658 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1659
1660 /* Be paranoid... */
1661 if (prog == NULL || startpos == NULL) {
1662 Perl_croak(aTHX_ "NULL regexp parameter");
1663 return 0;
1664 }
1665
2757e526 1666 multiline = prog->reganch & PMf_MULTILINE;
3b0527fe 1667 reginfo.prog = prog;
2757e526 1668
bac06658 1669 RX_MATCH_UTF8_set(prog, do_utf8);
1de06328
YO
1670 DEBUG_EXECUTE_r(
1671 debug_start_match(prog, do_utf8, startpos, strend,
1672 "Matching");
1673 );
bac06658 1674
6eb5f6b9 1675 minlen = prog->minlen;
1de06328
YO
1676
1677 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
a3621e74 1678 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1679 "String too short [regexec_flags]...\n"));
1680 goto phooey;
1aa99e6b 1681 }
6eb5f6b9 1682
1de06328 1683
6eb5f6b9
JH
1684 /* Check validity of program. */
1685 if (UCHARAT(prog->program) != REG_MAGIC) {
1686 Perl_croak(aTHX_ "corrupted regexp program");
1687 }
1688
1689 PL_reg_flags = 0;
1690 PL_reg_eval_set = 0;
1691 PL_reg_maxiter = 0;
1692
1693 if (prog->reganch & ROPT_UTF8)
1694 PL_reg_flags |= RF_utf8;
1695
1696 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1697 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1698 PL_bostr = strbeg;
3b0527fe 1699 reginfo.sv = sv;
6eb5f6b9
JH
1700
1701 /* Mark end of line for $ (and such) */
1702 PL_regeol = strend;
1703
1704 /* see how far we have to get to not match where we matched before */
3b0527fe 1705 reginfo.till = startpos+minend;
6eb5f6b9 1706
6eb5f6b9
JH
1707 /* If there is a "must appear" string, look for it. */
1708 s = startpos;
1709
3b0527fe 1710 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1711 MAGIC *mg;
1712
1713 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
3b0527fe 1714 reginfo.ganch = startpos;
6eb5f6b9
JH
1715 else if (sv && SvTYPE(sv) >= SVt_PVMG
1716 && SvMAGIC(sv)
14befaf4
DM
1717 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1718 && mg->mg_len >= 0) {
3b0527fe 1719 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
6eb5f6b9 1720 if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1721 if (s > reginfo.ganch)
6eb5f6b9 1722 goto phooey;
3b0527fe 1723 s = reginfo.ganch;
6eb5f6b9
JH
1724 }
1725 }
1726 else /* pos() not defined */
3b0527fe 1727 reginfo.ganch = strbeg;
6eb5f6b9
JH
1728 }
1729
a0714e2c 1730 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1731 re_scream_pos_data d;
1732
1733 d.scream_olds = &scream_olds;
1734 d.scream_pos = &scream_pos;
1735 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1736 if (!s) {
a3621e74 1737 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1738 goto phooey; /* not present */
3fa9c3d7 1739 }
6eb5f6b9
JH
1740 }
1741
1de06328 1742
6eb5f6b9
JH
1743
1744 /* Simplest case: anchored match need be tried only once. */
1745 /* [unless only anchor is BOL and multiline is set] */
1746 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
24b23f37 1747 if (s == startpos && regtry(&reginfo, &startpos))
6eb5f6b9 1748 goto got_it;
7fba1cd6 1749 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1750 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1751 {
1752 char *end;
1753
1754 if (minlen)
1755 dontbother = minlen - 1;
1aa99e6b 1756 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1757 /* for multiline we only have to try after newlines */
33b8afdf 1758 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1759 if (s == startpos)
1760 goto after_try;
1761 while (1) {
24b23f37 1762 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1763 goto got_it;
1764 after_try:
1765 if (s >= end)
1766 goto phooey;
1767 if (prog->reganch & RE_USE_INTUIT) {
1768 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1769 if (!s)
1770 goto phooey;
1771 }
1772 else
1773 s++;
1774 }
1775 } else {
1776 if (s > startpos)
1777 s--;
1778 while (s < end) {
1779 if (*s++ == '\n') { /* don't need PL_utf8skip here */
24b23f37 1780 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1781 goto got_it;
1782 }
1783 }
1784 }
1785 }
1786 goto phooey;
f9f4320a
YO
1787 } else if (ROPT_GPOS_CHECK == (prog->reganch & ROPT_GPOS_CHECK))
1788 {
1789 /* the warning about reginfo.ganch being used without intialization
1790 is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN
1791 and we only enter this block when the same bit is set. */
24b23f37 1792 if (regtry(&reginfo, &reginfo.ganch))
6eb5f6b9
JH
1793 goto got_it;
1794 goto phooey;
1795 }
1796
1797 /* Messy cases: unanchored match. */
33b8afdf 1798 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1799 /* we have /x+whatever/ */
1800 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1801 char ch;
bf93d4cc
GS
1802#ifdef DEBUGGING
1803 int did_match = 0;
1804#endif
33b8afdf
JH
1805 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1806 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1807 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1808
1aa99e6b 1809 if (do_utf8) {
4cadc6a9 1810 REXEC_FBC_SCAN(
6eb5f6b9 1811 if (*s == ch) {
a3621e74 1812 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1813 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1814 s += UTF8SKIP(s);
1815 while (s < strend && *s == ch)
1816 s += UTF8SKIP(s);
1817 }
4cadc6a9 1818 );
6eb5f6b9
JH
1819 }
1820 else {
4cadc6a9 1821 REXEC_FBC_SCAN(
6eb5f6b9 1822 if (*s == ch) {
a3621e74 1823 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1824 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1825 s++;
1826 while (s < strend && *s == ch)
1827 s++;
1828 }
4cadc6a9 1829 );
6eb5f6b9 1830 }
a3621e74 1831 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1832 PerlIO_printf(Perl_debug_log,
b7953727
JH
1833 "Did not find anchored character...\n")
1834 );
6eb5f6b9 1835 }
a0714e2c
SS
1836 else if (prog->anchored_substr != NULL
1837 || prog->anchored_utf8 != NULL
1838 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1839 && prog->float_max_offset < strend - s)) {
1840 SV *must;
1841 I32 back_max;
1842 I32 back_min;
1843 char *last;
6eb5f6b9 1844 char *last1; /* Last position checked before */
bf93d4cc
GS
1845#ifdef DEBUGGING
1846 int did_match = 0;
1847#endif
33b8afdf
JH
1848 if (prog->anchored_substr || prog->anchored_utf8) {
1849 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1850 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1851 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1852 back_max = back_min = prog->anchored_offset;
1853 } else {
1854 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1855 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1856 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1857 back_max = prog->float_max_offset;
1858 back_min = prog->float_min_offset;
1859 }
1de06328
YO
1860
1861
33b8afdf
JH
1862 if (must == &PL_sv_undef)
1863 /* could not downgrade utf8 check substring, so must fail */
1864 goto phooey;
1865
1de06328
YO
1866 if (back_min<0) {
1867 last = strend;
1868 } else {
1869 last = HOP3c(strend, /* Cannot start after this */
1870 -(I32)(CHR_SVLEN(must)
1871 - (SvTAIL(must) != 0) + back_min), strbeg);
1872 }
6eb5f6b9
JH
1873 if (s > PL_bostr)
1874 last1 = HOPc(s, -1);
1875 else
1876 last1 = s - 1; /* bogus */
1877
a0288114 1878 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1879 check_substr==must. */
1880 scream_pos = -1;
1881 dontbother = end_shift;
1882 strend = HOPc(strend, -dontbother);
1883 while ( (s <= last) &&
9041c2e3 1884 ((flags & REXEC_SCREAM)
1de06328 1885 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
6eb5f6b9 1886 end_shift, &scream_pos, 0))
1de06328 1887 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
9041c2e3 1888 (unsigned char*)strend, must,
7fba1cd6 1889 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1890 /* we may be pointing at the wrong string */
1891 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1892 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1893 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1894 if (HOPc(s, -back_max) > last1) {
1895 last1 = HOPc(s, -back_min);
1896 s = HOPc(s, -back_max);
1897 }
1898 else {
52657f30 1899 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1900
1901 last1 = HOPc(s, -back_min);
52657f30 1902 s = t;
6eb5f6b9 1903 }
1aa99e6b 1904 if (do_utf8) {
6eb5f6b9 1905 while (s <= last1) {
24b23f37 1906 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1907 goto got_it;
1908 s += UTF8SKIP(s);
1909 }
1910 }
1911 else {
1912 while (s <= last1) {
24b23f37 1913 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1914 goto got_it;
1915 s++;
1916 }
1917 }
1918 }
ab3bbdeb
YO
1919 DEBUG_EXECUTE_r(if (!did_match) {
1920 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1921 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1922 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
33b8afdf 1923 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc 1924 ? "anchored" : "floating"),
ab3bbdeb
YO
1925 quoted, RE_SV_TAIL(must));
1926 });
6eb5f6b9
JH
1927 goto phooey;
1928 }
786e8c11 1929 else if ( (c = prog->regstclass) ) {
f14c76ed 1930 if (minlen) {
be8e71aa 1931 const OPCODE op = OP(prog->regstclass);
66e933ab 1932 /* don't bother with what can't match */
786e8c11 1933 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
f14c76ed
RGS
1934 strend = HOPc(strend, -(minlen - 1));
1935 }
a3621e74 1936 DEBUG_EXECUTE_r({
be8e71aa 1937 SV * const prop = sv_newmortal();
32fc9b6a 1938 regprop(prog, prop, c);
0df25f3d 1939 {
ab3bbdeb
YO
1940 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1941 s,strend-s,60);
0df25f3d 1942 PerlIO_printf(Perl_debug_log,
ab3bbdeb 1943 "Matching stclass %.*s against %s (%d chars)\n",
e4f74956 1944 (int)SvCUR(prop), SvPVX_const(prop),
ab3bbdeb 1945 quoted, (int)(strend - s));
0df25f3d 1946 }
ffc61ed2 1947 });
3b0527fe 1948 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1949 goto got_it;
07be1b83 1950 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
d6a28714
JH
1951 }
1952 else {
1953 dontbother = 0;
a0714e2c 1954 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1955 /* Trim the end. */
d6a28714 1956 char *last;
33b8afdf
JH
1957 SV* float_real;
1958
1959 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1960 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1961 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1962
1963 if (flags & REXEC_SCREAM) {
33b8afdf 1964 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1965 end_shift, &scream_pos, 1); /* last one */
1966 if (!last)
ffc61ed2 1967 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1968 /* we may be pointing at the wrong string */
1969 else if (RX_MATCH_COPIED(prog))
3f7c398e 1970 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1971 }
d6a28714
JH
1972 else {
1973 STRLEN len;
cfd0369c 1974 const char * const little = SvPV_const(float_real, len);
d6a28714 1975
33b8afdf 1976 if (SvTAIL(float_real)) {
d6a28714
JH
1977 if (memEQ(strend - len + 1, little, len - 1))
1978 last = strend - len + 1;
7fba1cd6 1979 else if (!multiline)
9041c2e3 1980 last = memEQ(strend - len, little, len)
bd61b366 1981 ? strend - len : NULL;
b8c5462f 1982 else
d6a28714
JH
1983 goto find_last;
1984 } else {
1985 find_last:
9041c2e3 1986 if (len)
d6a28714 1987 last = rninstr(s, strend, little, little + len);
b8c5462f 1988 else
a0288114 1989 last = strend; /* matching "$" */
b8c5462f 1990 }
b8c5462f 1991 }
bf93d4cc 1992 if (last == NULL) {
6bda09f9
YO
1993 DEBUG_EXECUTE_r(
1994 PerlIO_printf(Perl_debug_log,
1995 "%sCan't trim the tail, match fails (should not happen)%s\n",
1996 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1997 goto phooey; /* Should not happen! */
1998 }
d6a28714
JH
1999 dontbother = strend - last + prog->float_min_offset;
2000 }
2001 if (minlen && (dontbother < minlen))
2002 dontbother = minlen - 1;
2003 strend -= dontbother; /* this one's always in bytes! */
2004 /* We don't know much -- general case. */
1aa99e6b 2005 if (do_utf8) {
d6a28714 2006 for (;;) {
24b23f37 2007 if (regtry(&reginfo, &s))
d6a28714
JH
2008 goto got_it;
2009 if (s >= strend)
2010 break;
b8c5462f 2011 s += UTF8SKIP(s);
d6a28714
JH
2012 };
2013 }
2014 else {
2015 do {
24b23f37 2016 if (regtry(&reginfo, &s))
d6a28714
JH
2017 goto got_it;
2018 } while (s++ < strend);
2019 }
2020 }
2021
2022 /* Failure. */
2023 goto phooey;
2024
2025got_it:
2026 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2027
2028 if (PL_reg_eval_set) {
2029 /* Preserve the current value of $^R */
2030 if (oreplsv != GvSV(PL_replgv))
2031 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2032 restored, the value remains
2033 the same. */
4f639d21 2034 restore_pos(aTHX_ prog);
d6a28714 2035 }
81714fb9
YO
2036 if (prog->paren_names)
2037 (void)hv_iterinit(prog->paren_names);
d6a28714
JH
2038
2039 /* make sure $`, $&, $', and $digit will work later */
2040 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2041 RX_MATCH_COPY_FREE(prog);
d6a28714 2042 if (flags & REXEC_COPY_STR) {
be8e71aa 2043 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2044#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2045 if ((SvIsCOW(sv)
2046 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2047 if (DEBUG_C_TEST) {
2048 PerlIO_printf(Perl_debug_log,
2049 "Copy on write: regexp capture, type %d\n",
2050 (int) SvTYPE(sv));
2051 }
2052 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2053 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2054 assert (SvPOKp(prog->saved_copy));
2055 } else
2056#endif
2057 {
2058 RX_MATCH_COPIED_on(prog);
2059 s = savepvn(strbeg, i);
2060 prog->subbeg = s;
2061 }
d6a28714 2062 prog->sublen = i;
d6a28714
JH
2063 }
2064 else {
2065 prog->subbeg = strbeg;
2066 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2067 }
2068 }
9041c2e3 2069
d6a28714
JH
2070 return 1;
2071
2072phooey:
a3621e74 2073 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2074 PL_colors[4], PL_colors[5]));
d6a28714 2075 if (PL_reg_eval_set)
4f639d21 2076 restore_pos(aTHX_ prog);
d6a28714
JH
2077 return 0;
2078}
2079
6bda09f9 2080
d6a28714
JH
2081/*
2082 - regtry - try match at specific point
2083 */
2084STATIC I32 /* 0 failure, 1 success */
24b23f37 2085S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
d6a28714 2086{
97aff369 2087 dVAR;
d6a28714
JH
2088 register I32 *sp;
2089 register I32 *ep;
2090 CHECKPOINT lastcp;
3b0527fe 2091 regexp *prog = reginfo->prog;
a3621e74 2092 GET_RE_DEBUG_FLAGS_DECL;
24b23f37 2093 reginfo->cutpoint=NULL;
d6a28714
JH
2094
2095 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2096 MAGIC *mg;
2097
2098 PL_reg_eval_set = RS_init;
a3621e74 2099 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2100 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2101 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2102 ));
ea8d6ae1 2103 SAVESTACK_CXPOS();
d6a28714
JH
2104 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2105 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2106 SAVETMPS;
2107 /* Apparently this is not needed, judging by wantarray. */
e8347627 2108 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2109 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2110
3b0527fe 2111 if (reginfo->sv) {
d6a28714 2112 /* Make $_ available to executed code. */
3b0527fe 2113 if (reginfo->sv != DEFSV) {
59f00321 2114 SAVE_DEFSV;
3b0527fe 2115 DEFSV = reginfo->sv;
b8c5462f 2116 }
d6a28714 2117
3b0527fe
DM
2118 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2119 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2120 /* prepare for quick setting of pos */
d300d9fa 2121#ifdef PERL_OLD_COPY_ON_WRITE
51a9ea20
NC
2122 if (SvIsCOW(reginfo->sv))
2123 sv_force_normal_flags(reginfo->sv, 0);
d300d9fa 2124#endif
3dab1dad 2125 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
d300d9fa 2126 &PL_vtbl_mglob, NULL, 0);
d6a28714 2127 mg->mg_len = -1;
b8c5462f 2128 }
d6a28714
JH
2129 PL_reg_magic = mg;
2130 PL_reg_oldpos = mg->mg_len;
4f639d21 2131 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2132 }
09687e5a 2133 if (!PL_reg_curpm) {
a02a5408 2134 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2135#ifdef USE_ITHREADS
2136 {
be8e71aa 2137 SV* const repointer = newSViv(0);
577e12cc 2138 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2139 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2140 av_push(PL_regex_padav,repointer);
2141 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2142 PL_regex_pad = AvARRAY(PL_regex_padav);
2143 }
2144#endif
2145 }
aaa362c4 2146 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2147 PL_reg_oldcurpm = PL_curpm;
2148 PL_curpm = PL_reg_curpm;
2149 if (RX_MATCH_COPIED(prog)) {
2150 /* Here is a serious problem: we cannot rewrite subbeg,
2151 since it may be needed if this match fails. Thus
2152 $` inside (?{}) could fail... */
2153 PL_reg_oldsaved = prog->subbeg;
2154 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2155#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2156 PL_nrs = prog->saved_copy;
2157#endif
d6a28714
JH
2158 RX_MATCH_COPIED_off(prog);
2159 }
2160 else
bd61b366 2161 PL_reg_oldsaved = NULL;
d6a28714
JH
2162 prog->subbeg = PL_bostr;
2163 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2164 }
24b23f37
YO
2165 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2166 prog->startp[0] = *startpos - PL_bostr;
2167 PL_reginput = *startpos;
d6a28714 2168 PL_reglastparen = &prog->lastparen;
a01268b5 2169 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2170 prog->lastparen = 0;
03994de8 2171 prog->lastcloseparen = 0;
d6a28714 2172 PL_regsize = 0;
6bda09f9
YO
2173 PL_regstartp = prog->startp;
2174 PL_regendp = prog->endp;
d6a28714
JH
2175 if (PL_reg_start_tmpl <= prog->nparens) {
2176 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2177 if(PL_reg_start_tmp)
2178 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2179 else
a02a5408 2180 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2181 }
2182
2183 /* XXXX What this code is doing here?!!! There should be no need
2184 to do this again and again, PL_reglastparen should take care of
3dd2943c 2185 this! --ilya*/
dafc8851
JH
2186
2187 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2188 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2189 * PL_reglastparen), is not needed at all by the test suite
2190 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2191 * enough, for building DynaLoader, or otherwise this
2192 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2193 * will happen. Meanwhile, this code *is* needed for the
2194 * above-mentioned test suite tests to succeed. The common theme
2195 * on those tests seems to be returning null fields from matches.
2196 * --jhi */
dafc8851 2197#if 1
d6a28714
JH
2198 sp = prog->startp;
2199 ep = prog->endp;
2200 if (prog->nparens) {
097eb12c 2201 register I32 i;
eb160463 2202 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2203 *++sp = -1;
2204 *++ep = -1;
2205 }
2206 }
dafc8851 2207#endif
02db2b7b 2208 REGCP_SET(lastcp);
3b0527fe 2209 if (regmatch(reginfo, prog->program + 1)) {
d6a28714
JH
2210 prog->endp[0] = PL_reginput - PL_bostr;
2211 return 1;
2212 }
24b23f37
YO
2213 if (reginfo->cutpoint)
2214 *startpos= reginfo->cutpoint;
02db2b7b 2215 REGCP_UNWIND(lastcp);
d6a28714
JH
2216 return 0;
2217}
2218
02db2b7b 2219
8ba1375e
MJD
2220#define sayYES goto yes
2221#define sayNO goto no
262b90c4 2222#define sayNO_SILENT goto no_silent
8ba1375e 2223
f9f4320a
YO
2224/* we dont use STMT_START/END here because it leads to
2225 "unreachable code" warnings, which are bogus, but distracting. */
2226#define CACHEsayNO \
c476f425
DM
2227 if (ST.cache_mask) \
2228 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
f9f4320a 2229 sayNO
3298f257 2230
a3621e74 2231/* this is used to determine how far from the left messages like
265c4333
YO
2232 'failed...' are printed. It should be set such that messages
2233 are inline with the regop output that created them.
a3621e74 2234*/
265c4333 2235#define REPORT_CODE_OFF 32
a3621e74
YO
2236
2237
2238/* Make sure there is a test for this +1 options in re_tests */
2239#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2240
40a82448
DM
2241#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2242#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
9e137952 2243
86545054
DM
2244#define SLAB_FIRST(s) (&(s)->states[0])
2245#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2246
5d9a96ca
DM
2247/* grab a new slab and return the first slot in it */
2248
2249STATIC regmatch_state *
2250S_push_slab(pTHX)
2251{
54df2634
NC
2252#if PERL_VERSION < 9
2253 dMY_CXT;
2254#endif
5d9a96ca
DM
2255 regmatch_slab *s = PL_regmatch_slab->next;
2256 if (!s) {
2257 Newx(s, 1, regmatch_slab);
2258 s->prev = PL_regmatch_slab;
2259 s->next = NULL;
2260 PL_regmatch_slab->next = s;
2261 }
2262 PL_regmatch_slab = s;
86545054 2263 return SLAB_FIRST(s);
5d9a96ca 2264}
5b47454d 2265
95b24440 2266
40a82448
DM
2267/* push a new state then goto it */
2268
2269#define PUSH_STATE_GOTO(state, node) \
2270 scan = node; \
2271 st->resume_state = state; \
2272 goto push_state;
2273
2274/* push a new state with success backtracking, then goto it */
2275
2276#define PUSH_YES_STATE_GOTO(state, node) \
2277 scan = node; \
2278 st->resume_state = state; \
2279 goto push_yes_state;
2280
aa283a38 2281
aa283a38 2282
d6a28714 2283/*
95b24440 2284
bf1f174e
DM
2285regmatch() - main matching routine
2286
2287This is basically one big switch statement in a loop. We execute an op,
2288set 'next' to point the next op, and continue. If we come to a point which
2289we may need to backtrack to on failure such as (A|B|C), we push a
2290backtrack state onto the backtrack stack. On failure, we pop the top
2291state, and re-enter the loop at the state indicated. If there are no more
2292states to pop, we return failure.
2293
2294Sometimes we also need to backtrack on success; for example /A+/, where
2295after successfully matching one A, we need to go back and try to
2296match another one; similarly for lookahead assertions: if the assertion
2297completes successfully, we backtrack to the state just before the assertion
2298and then carry on. In these cases, the pushed state is marked as
2299'backtrack on success too'. This marking is in fact done by a chain of
2300pointers, each pointing to the previous 'yes' state. On success, we pop to
2301the nearest yes state, discarding any intermediate failure-only states.
2302Sometimes a yes state is pushed just to force some cleanup code to be
2303called at the end of a successful match or submatch; e.g. (??{$re}) uses
2304it to free the inner regex.
2305
2306Note that failure backtracking rewinds the cursor position, while
2307success backtracking leaves it alone.
2308
2309A pattern is complete when the END op is executed, while a subpattern
2310such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2311ops trigger the "pop to last yes state if any, otherwise return true"
2312behaviour.
2313
2314A common convention in this function is to use A and B to refer to the two
2315subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2316the subpattern to be matched possibly multiple times, while B is the entire
2317rest of the pattern. Variable and state names reflect this convention.
2318
2319The states in the main switch are the union of ops and failure/success of
2320substates associated with with that op. For example, IFMATCH is the op
2321that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2322'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2323successfully matched A and IFMATCH_A_fail is a state saying that we have
2324just failed to match A. Resume states always come in pairs. The backtrack
2325state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2326at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2327on success or failure.
2328
2329The struct that holds a backtracking state is actually a big union, with
2330one variant for each major type of op. The variable st points to the
2331top-most backtrack struct. To make the code clearer, within each
2332block of code we #define ST to alias the relevant union.
2333
2334Here's a concrete example of a (vastly oversimplified) IFMATCH
2335implementation:
2336
2337 switch (state) {
2338 ....
2339
2340#define ST st->u.ifmatch
2341
2342 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2343 ST.foo = ...; // some state we wish to save
95b24440 2344 ...
bf1f174e
DM
2345 // push a yes backtrack state with a resume value of
2346 // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2347 // first node of A:
2348 PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2349 // NOTREACHED
2350
2351 case IFMATCH_A: // we have successfully executed A; now continue with B
2352 next = B;
2353 bar = ST.foo; // do something with the preserved value
2354 break;
2355
2356 case IFMATCH_A_fail: // A failed, so the assertion failed
2357 ...; // do some housekeeping, then ...
2358 sayNO; // propagate the failure
2359
2360#undef ST
95b24440 2361
bf1f174e
DM
2362 ...
2363 }
95b24440 2364
bf1f174e
DM
2365For any old-timers reading this who are familiar with the old recursive
2366approach, the code above is equivalent to:
95b24440 2367
bf1f174e
DM
2368 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2369 {
2370 int foo = ...
95b24440 2371 ...
bf1f174e
DM
2372 if (regmatch(A)) {
2373 next = B;
2374 bar = foo;
2375 break;
95b24440 2376 }
bf1f174e
DM
2377 ...; // do some housekeeping, then ...
2378 sayNO; // propagate the failure
95b24440 2379 }
bf1f174e
DM
2380
2381The topmost backtrack state, pointed to by st, is usually free. If you
2382want to claim it, populate any ST.foo fields in it with values you wish to
2383save, then do one of
2384
2385 PUSH_STATE_GOTO(resume_state, node);
2386 PUSH_YES_STATE_GOTO(resume_state, node);
2387
2388which sets that backtrack state's resume value to 'resume_state', pushes a
2389new free entry to the top of the backtrack stack, then goes to 'node'.
2390On backtracking, the free slot is popped, and the saved state becomes the
2391new free state. An ST.foo field in this new top state can be temporarily
2392accessed to retrieve values, but once the main loop is re-entered, it
2393becomes available for reuse.
2394
2395Note that the depth of the backtrack stack constantly increases during the
2396left-to-right execution of the pattern, rather than going up and down with
2397the pattern nesting. For example the stack is at its maximum at Z at the
2398end of the pattern, rather than at X in the following:
2399
2400 /(((X)+)+)+....(Y)+....Z/
2401
2402The only exceptions to this are lookahead/behind assertions and the cut,
2403(?>A), which pop all the backtrack states associated with A before
2404continuing.
2405
2406Bascktrack state structs are allocated in slabs of about 4K in size.
2407PL_regmatch_state and st always point to the currently active state,
2408and PL_regmatch_slab points to the slab currently containing
2409PL_regmatch_state. The first time regmatch() is called, the first slab is
2410allocated, and is never freed until interpreter destruction. When the slab
2411is full, a new one is allocated and chained to the end. At exit from
2412regmatch(), slabs allocated since entry are freed.
2413
2414*/
95b24440 2415
40a82448 2416
5bc10b2c 2417#define DEBUG_STATE_pp(pp) \
265c4333 2418 DEBUG_STATE_r({ \
5bc10b2c
DM
2419 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2420 PerlIO_printf(Perl_debug_log, \
5d458dd8 2421 " %*s"pp" %s%s%s%s%s\n", \
5bc10b2c 2422 depth*2, "", \
5d458dd8
YO
2423 reg_name[st->resume_state], \
2424 ((st==yes_state||st==mark_state) ? "[" : ""), \
2425 ((st==yes_state) ? "Y" : ""), \
2426 ((st==mark_state) ? "M" : ""), \
2427 ((st==yes_state||st==mark_state) ? "]" : "") \
2428 ); \
265c4333 2429 });
5bc10b2c 2430
40a82448 2431
3dab1dad 2432#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
95b24440 2433
3df15adc 2434#ifdef DEBUGGING
5bc10b2c 2435
ab3bbdeb
YO
2436STATIC void
2437S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2438 const char *start, const char *end, const char *blurb)
2439{
2440 const bool utf8_pat= prog->reganch & ROPT_UTF8 ? 1 : 0;
2441 if (!PL_colorset)
2442 reginitcolors();
2443 {
2444 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2445 prog->precomp, prog->prelen, 60);
2446
2447 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2448 start, end - start, 60);
2449
2450 PerlIO_printf(Perl_debug_log,
2451 "%s%s REx%s %s against %s\n",
2452 PL_colors[4], blurb, PL_colors[5], s0, s1);
2453
2454 if (do_utf8||utf8_pat)
1de06328
YO
2455 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2456 utf8_pat ? "pattern" : "",
2457 utf8_pat && do_utf8 ? " and " : "",
2458 do_utf8 ? "string" : ""
ab3bbdeb
YO
2459 );
2460 }
2461}
3df15adc
YO
2462
2463STATIC void
786e8c11
YO
2464S_dump_exec_pos(pTHX_ const char *locinput,
2465 const regnode *scan,
2466 const char *loc_regeol,
2467 const char *loc_bostr,
2468 const char *loc_reg_starttry,
2469 const bool do_utf8)
07be1b83 2470{
786e8c11 2471 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
07be1b83 2472 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
786e8c11 2473 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
07be1b83
YO
2474 /* The part of the string before starttry has one color
2475 (pref0_len chars), between starttry and current
2476 position another one (pref_len - pref0_len chars),
2477 after the current position the third one.
2478 We assume that pref0_len <= pref_len, otherwise we
2479 decrease pref0_len. */
786e8c11
YO
2480 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2481 ? (5 + taill) - l : locinput - loc_bostr;
07be1b83
YO
2482 int pref0_len;
2483
2484 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2485 pref_len++;
786e8c11
YO
2486 pref0_len = pref_len - (locinput - loc_reg_starttry);
2487 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2488 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2489 ? (5 + taill) - pref_len : loc_regeol - locinput);
07be1b83
YO
2490 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2491 l--;
2492 if (pref0_len < 0)
2493 pref0_len = 0;
2494 if (pref0_len > pref_len)
2495 pref0_len = pref_len;
2496 {
3df15adc 2497 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
0df25f3d 2498
ab3bbdeb 2499 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
1de06328 2500 (locinput - pref_len),pref0_len, 60, 4, 5);
0df25f3d 2501
ab3bbdeb 2502 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
3df15adc 2503 (locinput - pref_len + pref0_len),
1de06328 2504 pref_len - pref0_len, 60, 2, 3);
0df25f3d 2505
ab3bbdeb 2506 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
1de06328 2507 locinput, loc_regeol - locinput, 10, 0, 1);
0df25f3d 2508
1de06328 2509 const STRLEN tlen=len0+len1+len2;
3df15adc 2510 PerlIO_printf(Perl_debug_log,
ab3bbdeb 2511 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
786e8c11 2512 (IV)(locinput - loc_bostr),
07be1b83 2513 len0, s0,
07be1b83 2514 len1, s1,
07be1b83 2515 (docolor ? "" : "> <"),
07be1b83 2516 len2, s2,
f9f4320a 2517 (int)(tlen > 19 ? 0 : 19 - tlen),
07be1b83
YO
2518 "");
2519 }
2520}
3df15adc 2521
07be1b83
YO
2522#endif
2523
0a4db386
YO
2524/* reg_check_named_buff_matched()
2525 * Checks to see if a named buffer has matched. The data array of
2526 * buffer numbers corresponding to the buffer is expected to reside
2527 * in the regexp->data->data array in the slot stored in the ARG() of
2528 * node involved. Note that this routine doesn't actually care about the
2529 * name, that information is not preserved from compilation to execution.
2530 * Returns the index of the leftmost defined buffer with the given name
2531 * or 0 if non of the buffers matched.
2532 */
2533STATIC I32
2534S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2535 I32 n;
2536 SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ];
2537 I32 *nums=(I32*)SvPVX(sv_dat);
2538 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2539 if ((I32)*PL_reglastparen >= nums[n] &&
2540 PL_regendp[nums[n]] != -1)
2541 {
2542 return nums[n];
2543 }
2544 }
2545 return 0;
2546}
2547
d6a28714 2548STATIC I32 /* 0 failure, 1 success */
24b23f37 2549S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
d6a28714 2550{
54df2634
NC
2551#if PERL_VERSION < 9
2552 dMY_CXT;
2553#endif
27da23d5 2554 dVAR;
95b24440 2555 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2556 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2557
3b0527fe
DM
2558 regexp *rex = reginfo->prog;
2559
5d9a96ca
DM
2560 regmatch_slab *orig_slab;
2561 regmatch_state *orig_state;
a3621e74 2562
5d9a96ca
DM
2563 /* the current state. This is a cached copy of PL_regmatch_state */
2564 register regmatch_state *st;
95b24440 2565
5d9a96ca
DM
2566 /* cache heavy used fields of st in registers */
2567 register regnode *scan;
2568 register regnode *next;
438e9bae 2569 register U32 n = 0; /* general value; init to avoid compiler warning */
24d3c4a9 2570 register I32 ln = 0; /* len or last; init to avoid compiler warning */
5d9a96ca 2571 register char *locinput = PL_reginput;
5d9a96ca 2572 register I32 nextchr; /* is always set to UCHARAT(locinput) */
24d3c4a9 2573
b69b0499 2574 bool result = 0; /* return value of S_regmatch */
24d3c4a9 2575 int depth = 0; /* depth of backtrack stack */
1a147d38 2576 int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
77cb431f
DM
2577 regmatch_state *yes_state = NULL; /* state to pop to on success of
2578 subpattern */
e2e6a0f1
YO
2579 /* mark_state piggy backs on the yes_state logic so that when we unwind
2580 the stack on success we can update the mark_state as we go */
2581 regmatch_state *mark_state = NULL; /* last mark state we have seen */
5d458dd8 2582
faec1544 2583 regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
b8591aee 2584 struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
40a82448 2585 U32 state_num;
5d458dd8
YO
2586 bool no_final = 0; /* prevent failure from backtracking? */
2587 bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
e2e6a0f1 2588 char *startpoint = PL_reginput;
5d458dd8
YO
2589 SV *popmark = NULL; /* are we looking for a mark? */
2590 SV *sv_commit = NULL; /* last mark name seen in failure */
2591 SV *sv_yes_mark = NULL; /* last mark name we have seen
2592 during a successfull match */
2593 U32 lastopen = 0; /* last open we saw */
2594 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2595
24d3c4a9
DM
2596 /* these three flags are set by various ops to signal information to
2597 * the very next op. They have a useful lifetime of exactly one loop
2598 * iteration, and are not preserved or restored by state pushes/pops
2599 */
2600 bool sw = 0; /* the condition value in (?(cond)a|b) */
2601 bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
2602 int logical = 0; /* the following EVAL is:
2603 0: (?{...})
2604 1: (?(?{...})X|Y)
2605 2: (??{...})
2606 or the following IFMATCH/UNLESSM is:
2607 false: plain (?=foo)
2608 true: used as a condition: (?(?=foo))
2609 */
2610
95b24440 2611#ifdef DEBUGGING
e68ec53f 2612 GET_RE_DEBUG_FLAGS_DECL;
d6a28714
JH
2613#endif
2614
24b23f37
YO
2615 DEBUG_STACK_r( {
2616 PerlIO_printf(Perl_debug_log,"regmatch start\n");
2617 });
5d9a96ca
DM
2618 /* on first ever call to regmatch, allocate first slab */
2619 if (!PL_regmatch_slab) {
2620 Newx(PL_regmatch_slab, 1, regmatch_slab);
2621 PL_regmatch_slab->prev = NULL;
2622 PL_regmatch_slab->next = NULL;
86545054 2623 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2624 }
2625
2626 /* remember current high-water mark for exit */
2627 /* XXX this should be done with SAVE* instead */
2628 orig_slab = PL_regmatch_slab;
2629 orig_state = PL_regmatch_state;
2630
2631 /* grab next free state slot */
2632 st = ++PL_regmatch_state;
86545054 2633 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2634 st = PL_regmatch_state = S_push_slab(aTHX);
2635
d6a28714
JH
2636 /* Note that nextchr is a byte even in UTF */
2637 nextchr = UCHARAT(locinput);
2638 scan = prog;
2639 while (scan != NULL) {
8ba1375e 2640
a3621e74 2641 DEBUG_EXECUTE_r( {
6136c704 2642 SV * const prop = sv_newmortal();
1de06328 2643 regnode *rnext=regnext(scan);
786e8c11 2644 DUMP_EXEC_POS( locinput, scan, do_utf8 );
32fc9b6a 2645 regprop(rex, prop, scan);
07be1b83
YO
2646
2647 PerlIO_printf(Perl_debug_log,
2648 "%3"IVdf":%*s%s(%"IVdf")\n",
5bc10b2c 2649 (IV)(scan - rex->program), depth*2, "",
07be1b83 2650 SvPVX_const(prop),
1de06328
YO
2651 (PL_regkind[OP(scan)] == END || !rnext) ?
2652 0 : (IV)(rnext - rex->program));
2a782b5b 2653 });
d6a28714
JH
2654
2655 next = scan + NEXT_OFF(scan);
2656 if (next == scan)
2657 next = NULL;
40a82448 2658 state_num = OP(scan);
d6a28714 2659
40a82448
DM
2660 reenter_switch:
2661 switch (state_num) {
d6a28714 2662 case BOL:
7fba1cd6 2663 if (locinput == PL_bostr)
d6a28714 2664 {
3b0527fe 2665 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2666 break;
2667 }
d6a28714
JH
2668 sayNO;
2669 case MBOL:
12d33761
HS
2670 if (locinput == PL_bostr ||
2671 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2672 {
b8c5462f
JH
2673 break;
2674 }
d6a28714
JH
2675 sayNO;
2676 case SBOL:
c2a73568 2677 if (locinput == PL_bostr)
b8c5462f 2678 break;
d6a28714
JH
2679 sayNO;
2680 case GPOS:
3b0527fe 2681 if (locinput == reginfo->ganch)
d6a28714
JH
2682 break;
2683 sayNO;
2684 case EOL:
d6a28714
JH
2685 goto seol;
2686 case MEOL:
d6a28714 2687 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2688 sayNO;
b8c5462f 2689 break;
d6a28714
JH
2690 case SEOL:
2691 seol:
2692 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2693 sayNO;
d6a28714 2694 if (PL_regeol - locinput > 1)
b8c5462f 2695 sayNO;
b8c5462f 2696 break;
d6a28714
JH
2697 case EOS:
2698 if (PL_regeol != locinput)
b8c5462f 2699 sayNO;
d6a28714 2700 break;
ffc61ed2 2701 case SANY:
d6a28714 2702 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2703 sayNO;
f33976b4
DB
2704 if (do_utf8) {
2705 locinput += PL_utf8skip[nextchr];
2706 if (locinput > PL_regeol)
2707 sayNO;
2708 nextchr = UCHARAT(locinput);
2709 }
2710 else
2711 nextchr = UCHARAT(++locinput);
2712 break;
2713 case CANY:
2714 if (!nextchr && locinput >= PL_regeol)
2715 sayNO;
b8c5462f 2716 nextchr = UCHARAT(++locinput);
a0d0e21e 2717 break;
ffc61ed2 2718 case REG_ANY:
1aa99e6b
IH
2719 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2720 sayNO;
2721 if (do_utf8) {
b8c5462f 2722 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2723 if (locinput > PL_regeol)
2724 sayNO;
a0ed51b3 2725 nextchr = UCHARAT(locinput);
a0ed51b3 2726 }
1aa99e6b
IH
2727 else
2728 nextchr = UCHARAT(++locinput);
a0ed51b3 2729 break;
166ba7cd
DM
2730
2731#undef ST
2732#define ST st->u.trie
786e8c11
YO
2733 case TRIEC:
2734 /* In this case the charclass data is available inline so
2735 we can fail fast without a lot of extra overhead.
2736 */
2737 if (scan->flags == EXACT || !do_utf8) {
2738 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2739 DEBUG_EXECUTE_r(
2740 PerlIO_printf(Perl_debug_log,
2741 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2742 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
786e8c11
YO
2743 );
2744 sayNO_SILENT;
2745 /* NOTREACHED */
2746 }
2747 }
2748 /* FALL THROUGH */
5b47454d 2749 case TRIE:
3dab1dad 2750 {
07be1b83 2751 /* what type of TRIE am I? (utf8 makes this contextual) */
3dab1dad
YO
2752 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2753 trie_type = do_utf8 ?
2754 (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2755 : trie_plain;
2756
2757 /* what trie are we using right now */
be8e71aa 2758 reg_trie_data * const trie
3dab1dad
YO
2759 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
2760 U32 state = trie->startstate;
166ba7cd 2761
3dab1dad
YO
2762 if (trie->bitmap && trie_type != trie_utf8_fold &&
2763 !TRIE_BITMAP_TEST(trie,*locinput)
2764 ) {
2765 if (trie->states[ state ].wordnum) {
2766 DEBUG_EXECUTE_r(
2767 PerlIO_printf(Perl_debug_log,
2768 "%*s %smatched empty string...%s\n",
5bc10b2c 2769 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2770 );
2771 break;
2772 } else {
2773 DEBUG_EXECUTE_r(
2774 PerlIO_printf(Perl_debug_log,
786e8c11 2775 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2776 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2777 );
2778 sayNO_SILENT;
2779 }
2780 }
166ba7cd 2781
786e8c11
YO
2782 {
2783 U8 *uc = ( U8* )locinput;
2784
2785 STRLEN len = 0;
2786 STRLEN foldlen = 0;
2787 U8 *uscan = (U8*)NULL;
2788 STRLEN bufflen=0;
2789 SV *sv_accept_buff = NULL;
2790 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2791
2792 ST.accepted = 0; /* how many accepting states we have seen */
2793 ST.B = next;
2794 ST.jump = trie->jump;
786e8c11 2795 ST.me = scan;
786e8c11 2796
07be1b83
YO
2797 /*
2798 traverse the TRIE keeping track of all accepting states
2799 we transition through until we get to a failing node.
2800 */
2801
a3621e74 2802 while ( state && uc <= (U8*)PL_regeol ) {
786e8c11 2803 U32 base = trie->states[ state ].trans.base;
f9f4320a 2804 UV uvc = 0;
786e8c11
YO
2805 U16 charid;
2806 /* We use charid to hold the wordnum as we don't use it
2807 for charid until after we have done the wordnum logic.
2808 We define an alias just so that the wordnum logic reads
2809 more naturally. */
2810
2811#define got_wordnum charid
2812 got_wordnum = trie->states[ state ].wordnum;
2813
2814 if ( got_wordnum ) {
2815 if ( ! ST.accepted ) {
5b47454d
DM
2816 ENTER;
2817 SAVETMPS;
2818 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2819 sv_accept_buff=newSV(bufflen *
2820 sizeof(reg_trie_accepted) - 1);
786e8c11 2821 SvCUR_set(sv_accept_buff, 0);
5b47454d
DM
2822 SvPOK_on(sv_accept_buff);
2823 sv_2mortal(sv_accept_buff);
166ba7cd
DM
2824 SAVETMPS;
2825 ST.accept_buff =
5b47454d
DM
2826 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2827 }
786e8c11 2828 do {
166ba7cd 2829 if (ST.accepted >= bufflen) {
5b47454d 2830 bufflen *= 2;
166ba7cd 2831 ST.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2832 SvGROW(sv_accept_buff,
2833 bufflen * sizeof(reg_trie_accepted));
2834 }
2835 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2836 + sizeof(reg_trie_accepted));
a3621e74 2837
786e8c11
YO
2838
2839 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2840 ST.accept_buff[ST.accepted].endpos = uc;
2841 ++ST.accepted;
2842 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2843 }
2844#undef got_wordnum
a3621e74 2845
07be1b83 2846 DEBUG_TRIE_EXECUTE_r({
786e8c11 2847 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
a3621e74 2848 PerlIO_printf( Perl_debug_log,
786e8c11 2849 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
5bc10b2c 2850 2+depth * 2, "", PL_colors[4],
786e8c11 2851 (UV)state, (UV)ST.accepted );
07be1b83 2852 });
a3621e74
YO
2853
2854 if ( base ) {
4cadc6a9
YO
2855 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
2856 uvc, charid, foldlen, foldbuf, uniflags);
a3621e74 2857
5b47454d
DM
2858 if (charid &&
2859 (base + charid > trie->uniquecharcount )
2860 && (base + charid - 1 - trie->uniquecharcount
2861 < trie->lasttrans)
2862 && trie->trans[base + charid - 1 -
2863 trie->uniquecharcount].check == state)
2864 {
2865 state = trie->trans[base + charid - 1 -
2866 trie->uniquecharcount ].next;
2867 }
2868 else {
2869 state = 0;
2870 }
2871 uc += len;
2872
2873 }
2874 else {
a3621e74
YO
2875 state = 0;
2876 }
2877 DEBUG_TRIE_EXECUTE_r(
e4584336 2878 PerlIO_printf( Perl_debug_log,
786e8c11 2879 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
e4584336 2880 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2881 );
2882 }
166ba7cd 2883 if (!ST.accepted )
a3621e74 2884 sayNO;
a3621e74 2885
166ba7cd
DM
2886 DEBUG_EXECUTE_r(
2887 PerlIO_printf( Perl_debug_log,
2888 "%*s %sgot %"IVdf" possible matches%s\n",
5bc10b2c 2889 REPORT_CODE_OFF + depth * 2, "",
166ba7cd
DM
2890 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2891 );
786e8c11 2892 }}
166ba7cd
DM
2893
2894 /* FALL THROUGH */
166ba7cd 2895 case TRIE_next_fail: /* we failed - try next alterative */
5d458dd8
YO
2896 if (do_cutgroup) {
2897 do_cutgroup = 0;
2898 no_final = 0;
2899 }
166ba7cd
DM
2900 if ( ST.accepted == 1 ) {
2901 /* only one choice left - just continue */
2902 DEBUG_EXECUTE_r({
2903 reg_trie_data * const trie
2904 = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
5c9f2f80
YO
2905 SV ** const tmp = av_fetch( trie->words,
2906 ST.accept_buff[ 0 ].wordnum-1, 0 );
166ba7cd
DM
2907 PerlIO_printf( Perl_debug_log,
2908 "%*s %sonly one match left: #%d <%s>%s\n",
5bc10b2c 2909 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd
DM
2910 ST.accept_buff[ 0 ].wordnum,
2911 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
2912 PL_colors[5] );
2913 });
2914 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2915 /* in this case we free tmps/leave before we call regmatch
2916 as we wont be using accept_buff again. */
5d458dd8 2917
166ba7cd
DM
2918 locinput = PL_reginput;
2919 nextchr = UCHARAT(locinput);
5d458dd8
YO
2920 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
2921 scan = ST.B;
2922 else
2923 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
2924 if (!has_cutgroup) {
2925 FREETMPS;
2926 LEAVE;
2927 } else {
2928 ST.accepted--;
2929 PUSH_YES_STATE_GOTO(TRIE_next, scan);
2930 }
786e8c11 2931
166ba7cd
DM
2932 continue; /* execute rest of RE */
2933 }
2934
2935 if (!ST.accepted-- ) {
5d458dd8
YO
2936 DEBUG_EXECUTE_r({
2937 PerlIO_printf( Perl_debug_log,
2938 "%*s %sTRIE failed...%s\n",
2939 REPORT_CODE_OFF+depth*2, "",
2940 PL_colors[4],
2941 PL_colors[5] );
2942 });
166ba7cd
DM
2943 FREETMPS;
2944 LEAVE;
5d458dd8 2945 sayNO_SILENT;
166ba7cd
DM
2946 }
2947
a3621e74 2948 /*
166ba7cd
DM
2949 There are at least two accepting states left. Presumably
2950 the number of accepting states is going to be low,
2951 typically two. So we simply scan through to find the one
2952 with lowest wordnum. Once we find it, we swap the last
2953 state into its place and decrement the size. We then try to
2954 match the rest of the pattern at the point where the word
2955 ends. If we succeed, control just continues along the
2956 regex; if we fail we return here to try the next accepting
2957 state
2958 */
a3621e74 2959
166ba7cd
DM
2960 {
2961 U32 best = 0;
2962 U32 cur;
2963 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
2964 DEBUG_TRIE_EXECUTE_r(
f2278c82 2965 PerlIO_printf( Perl_debug_log,
166ba7cd 2966 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
5bc10b2c 2967 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
166ba7cd
DM
2968 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
2969 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
2970 );
2971
2972 if (ST.accept_buff[cur].wordnum <
2973 ST.accept_buff[best].wordnum)
2974 best = cur;
a3621e74 2975 }
166ba7cd
DM
2976
2977 DEBUG_EXECUTE_r({
2978 reg_trie_data * const trie
2979 = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
5c9f2f80
YO
2980 SV ** const tmp = av_fetch( trie->words,
2981 ST.accept_buff[ best ].wordnum - 1, 0 );
7f69552c 2982 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
265c4333 2983 ST.B :
7f69552c 2984 ST.me + ST.jump[ST.accept_buff[best].wordnum];
265c4333
YO
2985 PerlIO_printf( Perl_debug_log,
2986 "%*s %strying alternation #%d <%s> at node #%d %s\n",
5bc10b2c 2987 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 2988 ST.accept_buff[best].wordnum,
265c4333
YO
2989 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
2990 REG_NODE_NUM(nextop),
166ba7cd
DM
2991 PL_colors[5] );
2992 });
2993
2994 if ( best<ST.accepted ) {
2995 reg_trie_accepted tmp = ST.accept_buff[ best ];
2996 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
2997 ST.accept_buff[ ST.accepted ] = tmp;
2998 best = ST.accepted;
a3621e74 2999 }
166ba7cd 3000 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
7f69552c 3001 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
5d458dd8 3002 scan = ST.B;
786e8c11
YO
3003 /* NOTREACHED */
3004 } else {
5d458dd8 3005 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
786e8c11
YO
3006 /* NOTREACHED */
3007 }
5d458dd8
YO
3008 if (has_cutgroup) {
3009 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3010 /* NOTREACHED */
3011 } else {
3012 PUSH_STATE_GOTO(TRIE_next, scan);
3013 /* NOTREACHED */
3014 }
786e8c11 3015 /* NOTREACHED */
166ba7cd 3016 }
166ba7cd 3017 /* NOTREACHED */
5d458dd8
YO
3018 case TRIE_next:
3019 FREETMPS;
3020 LEAVE;
3021 sayYES;
166ba7cd
DM
3022#undef ST
3023
95b24440
DM
3024 case EXACT: {
3025 char *s = STRING(scan);
24d3c4a9 3026 ln = STR_LEN(scan);
eb160463 3027 if (do_utf8 != UTF) {
bc517b45 3028 /* The target and the pattern have differing utf8ness. */
1aa99e6b 3029 char *l = locinput;
24d3c4a9 3030 const char * const e = s + ln;
a72c7584 3031
5ff6fc6d
JH
3032 if (do_utf8) {
3033 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 3034 while (s < e) {
a3b680e6 3035 STRLEN ulen;
1aa99e6b 3036 if (l >= PL_regeol)
5ff6fc6d
JH
3037 sayNO;
3038 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 3039 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 3040 uniflags))
5ff6fc6d 3041 sayNO;
bc517b45 3042 l += ulen;
5ff6fc6d 3043 s ++;
1aa99e6b 3044 }
5ff6fc6d
JH
3045 }
3046 else {
3047 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 3048 while (s < e) {
a3b680e6 3049 STRLEN ulen;
1aa99e6b
IH
3050 if (l >= PL_regeol)
3051 sayNO;
5ff6fc6d 3052 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 3053 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 3054 uniflags))
1aa99e6b 3055 sayNO;
bc517b45 3056 s += ulen;
a72c7584 3057 l ++;
1aa99e6b 3058 }
5ff6fc6d 3059 }
1aa99e6b
IH
3060 locinput = l;
3061 nextchr = UCHARAT(locinput);
3062 break;
3063 }
bc517b45 3064 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
3065 /* Inline the first character, for speed. */
3066 if (UCHARAT(s) != nextchr)
3067 sayNO;
24d3c4a9 3068 if (PL_regeol - locinput < ln)
d6a28714 3069 sayNO;
24d3c4a9 3070 if (ln > 1 && memNE(s, locinput, ln))
d6a28714 3071 sayNO;
24d3c4a9 3072 locinput += ln;
d6a28714
JH
3073 nextchr = UCHARAT(locinput);
3074 break;
95b24440 3075 }
d6a28714 3076 case EXACTFL:
b8c5462f
JH
3077 PL_reg_flags |= RF_tainted;
3078 /* FALL THROUGH */
95b24440 3079 case EXACTF: {
be8e71aa 3080 char * const s = STRING(scan);
24d3c4a9 3081 ln = STR_LEN(scan);
d6a28714 3082
d07ddd77
JH
3083 if (do_utf8 || UTF) {
3084 /* Either target or the pattern are utf8. */
be8e71aa 3085 const char * const l = locinput;
d07ddd77 3086 char *e = PL_regeol;
bc517b45 3087
24d3c4a9 3088 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 3089 l, &e, 0, do_utf8)) {
5486206c
JH
3090 /* One more case for the sharp s:
3091 * pack("U0U*", 0xDF) =~ /ss/i,
3092 * the 0xC3 0x9F are the UTF-8
3093 * byte sequence for the U+00DF. */
3094 if (!(do_utf8 &&
3095 toLOWER(s[0]) == 's' &&
24d3c4a9 3096 ln >= 2 &&
5486206c
JH
3097 toLOWER(s[1]) == 's' &&
3098 (U8)l[0] == 0xC3 &&
3099 e - l >= 2 &&
3100 (U8)l[1] == 0x9F))
3101 sayNO;
3102 }
d07ddd77
JH
3103 locinput = e;
3104 nextchr = UCHARAT(locinput);
3105 break;
a0ed51b3 3106 }
d6a28714 3107
bc517b45
JH
3108 /* Neither the target and the pattern are utf8. */
3109
d6a28714
JH
3110 /* Inline the first character, for speed. */
3111 if (UCHARAT(s) != nextchr &&
3112 UCHARAT(s) != ((OP(scan) == EXACTF)
3113 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3114 sayNO;
24d3c4a9 3115 if (PL_regeol - locinput < ln)
b8c5462f 3116 sayNO;
24d3c4a9
DM
3117 if (ln > 1 && (OP(scan) == EXACTF
3118 ? ibcmp(s, locinput, ln)
3119 : ibcmp_locale(s, locinput, ln)))
4633a7c4 3120 sayNO;
24d3c4a9 3121 locinput += ln;
d6a28714 3122 nextchr = UCHARAT(locinput);
a0d0e21e 3123 break;
95b24440 3124 }
d6a28714 3125 case ANYOF:
ffc61ed2 3126 if (do_utf8) {
9e55ce06
JH
3127 STRLEN inclasslen = PL_regeol - locinput;
3128
32fc9b6a 3129 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
262b90c4 3130 goto anyof_fail;
ffc61ed2
JH
3131 if (locinput >= PL_regeol)
3132 sayNO;
0f0076b4 3133 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3134 nextchr = UCHARAT(locinput);
e0f9d4a8 3135 break;
ffc61ed2
JH
3136 }
3137 else {
3138 if (nextchr < 0)
3139 nextchr = UCHARAT(locinput);
32fc9b6a 3140 if (!REGINCLASS(rex, scan, (U8*)locinput))
262b90c4 3141 goto anyof_fail;
ffc61ed2
JH
3142 if (!nextchr && locinput >= PL_regeol)
3143 sayNO;
3144 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3145 break;
3146 }
262b90c4 3147 anyof_fail:
e0f9d4a8
JH
3148 /* If we might have the case of the German sharp s
3149 * in a casefolding Unicode character class. */
3150
ebc501f0
JH
3151 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3152 locinput += SHARP_S_SKIP;
e0f9d4a8 3153 nextchr = UCHARAT(locinput);
ffc61ed2 3154 }
e0f9d4a8
JH
3155 else
3156 sayNO;
b8c5462f 3157 break;
d6a28714 3158 case ALNUML:
b8c5462f
JH
3159 PL_reg_flags |= RF_tainted;
3160 /* FALL THROUGH */
d6a28714 3161 case ALNUM:
b8c5462f 3162 if (!nextchr)
4633a7c4 3163 sayNO;
ffc61ed2 3164 if (do_utf8) {
1a4fad37 3165 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3166 if (!(OP(scan) == ALNUM
bb7a0f54 3167 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3168 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3169 {
3170 sayNO;
a0ed51b3 3171 }
b8c5462f 3172 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3173 nextchr = UCHARAT(locinput);
3174 break;
3175 }
ffc61ed2 3176 if (!(OP(scan) == ALNUM
d6a28714 3177 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3178 sayNO;
b8c5462f 3179 nextchr = UCHARAT(++locinput);
a0d0e21e 3180 break;
d6a28714 3181 case NALNUML:
b8c5462f
JH
3182 PL_reg_flags |= RF_tainted;
3183 /* FALL THROUGH */
d6a28714
JH
3184 case NALNUM:
3185 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3186 sayNO;
ffc61ed2 3187 if (do_utf8) {
1a4fad37 3188 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3189 if (OP(scan) == NALNUM
bb7a0f54 3190 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3191 : isALNUM_LC_utf8((U8*)locinput))
3192 {
b8c5462f 3193 sayNO;
d6a28714 3194 }
b8c5462f
JH
3195 locinput += PL_utf8skip[nextchr];
3196 nextchr = UCHARAT(locinput);
3197 break;
3198 }
ffc61ed2 3199 if (OP(scan) == NALNUM
d6a28714 3200 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3201 sayNO;
76e3520e 3202 nextchr = UCHARAT(++locinput);
a0d0e21e 3203 break;
d6a28714
JH
3204 case BOUNDL:
3205 case NBOUNDL:
3280af22 3206 PL_reg_flags |= RF_tainted;
bbce6d69 3207 /* FALL THROUGH */
d6a28714
JH
3208 case BOUND:
3209 case NBOUND:
3210 /* was last char in word? */
ffc61ed2 3211 if (do_utf8) {
12d33761 3212 if (locinput == PL_bostr)
24d3c4a9 3213 ln = '\n';
ffc61ed2 3214 else {
a3b680e6 3215 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3216
24d3c4a9 3217 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3218 }
3219 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3220 ln = isALNUM_uni(ln);
1a4fad37 3221 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3222 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3223 }
3224 else {
24d3c4a9 3225 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3226 n = isALNUM_LC_utf8((U8*)locinput);
3227 }
a0ed51b3 3228 }
d6a28714 3229 else {
24d3c4a9 3230 ln = (locinput != PL_bostr) ?
12d33761 3231 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3232 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3233 ln = isALNUM(ln);
ffc61ed2
JH
3234 n = isALNUM(nextchr);
3235 }
3236 else {
24d3c4a9 3237 ln = isALNUM_LC(ln);
ffc61ed2
JH
3238 n = isALNUM_LC(nextchr);
3239 }
d6a28714 3240 }
24d3c4a9 3241 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3242 OP(scan) == BOUNDL))
3243 sayNO;
a0ed51b3 3244 break;
d6a28714 3245 case SPACEL:
3280af22 3246 PL_reg_flags |= RF_tainted;
bbce6d69 3247 /* FALL THROUGH */
d6a28714 3248 case SPACE:
9442cb0e 3249 if (!nextchr)
4633a7c4 3250 sayNO;
1aa99e6b 3251 if (do_utf8) {
fd400ab9 3252 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3253 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3254 if (!(OP(scan) == SPACE
bb7a0f54 3255 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3256 : isSPACE_LC_utf8((U8*)locinput)))
3257 {
3258 sayNO;
3259 }
3260 locinput += PL_utf8skip[nextchr];
3261 nextchr = UCHARAT(locinput);
3262 break;
d6a28714 3263 }
ffc61ed2
JH
3264 if (!(OP(scan) == SPACE
3265 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3266 sayNO;
3267 nextchr = UCHARAT(++locinput);
3268 }
3269 else {
3270 if (!(OP(scan) == SPACE
3271 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3272 sayNO;
3273 nextchr = UCHARAT(++locinput);
a0ed51b3 3274 }
a0ed51b3 3275 break;
d6a28714 3276 case NSPACEL:
3280af22 3277 PL_reg_flags |= RF_tainted;
bbce6d69 3278 /* FALL THROUGH */
d6a28714 3279 case NSPACE:
9442cb0e 3280 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3281 sayNO;
1aa99e6b 3282 if (do_utf8) {
1a4fad37 3283 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3284 if (OP(scan) == NSPACE
bb7a0f54 3285 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3286 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3287 {
3288 sayNO;
3289 }
3290 locinput += PL_utf8skip[nextchr];
3291 nextchr = UCHARAT(locinput);
3292 break;
a0ed51b3 3293 }
ffc61ed2 3294 if (OP(scan) == NSPACE
d6a28714 3295 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3296 sayNO;
76e3520e 3297 nextchr = UCHARAT(++locinput);
a0d0e21e 3298 break;
d6a28714 3299 case DIGITL:
a0ed51b3
LW
3300 PL_reg_flags |= RF_tainted;
3301 /* FALL THROUGH */
d6a28714 3302 case DIGIT:
9442cb0e 3303 if (!nextchr)
a0ed51b3 3304 sayNO;
1aa99e6b 3305 if (do_utf8) {
1a4fad37 3306 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3307 if (!(OP(scan) == DIGIT
bb7a0f54 3308 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3309 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3310 {
a0ed51b3 3311 sayNO;
dfe13c55 3312 }
6f06b55f 3313 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3314 nextchr = UCHARAT(locinput);
3315 break;
3316 }
ffc61ed2 3317 if (!(OP(scan) == DIGIT
9442cb0e 3318 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3319 sayNO;
3320 nextchr = UCHARAT(++locinput);
3321 break;
d6a28714 3322 case NDIGITL:
b8c5462f
JH
3323 PL_reg_flags |= RF_tainted;
3324 /* FALL THROUGH */
d6a28714 3325 case NDIGIT:
9442cb0e 3326 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3327 sayNO;
1aa99e6b 3328 if (do_utf8) {
1a4fad37 3329 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3330 if (OP(scan) == NDIGIT
bb7a0f54 3331 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3332 : isDIGIT_LC_utf8((U8*)locinput))
3333 {
a0ed51b3 3334 sayNO;
9442cb0e 3335 }
6f06b55f 3336 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3337 nextchr = UCHARAT(locinput);
3338 break;
3339 }
ffc61ed2 3340 if (OP(scan) == NDIGIT
9442cb0e 3341 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3342 sayNO;
3343 nextchr = UCHARAT(++locinput);
3344 break;
3345 case CLUMP:
b7c83a7e 3346 if (locinput >= PL_regeol)
a0ed51b3 3347 sayNO;
b7c83a7e 3348 if (do_utf8) {
1a4fad37 3349 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3350 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3351 sayNO;
3352 locinput += PL_utf8skip[nextchr];
3353 while (locinput < PL_regeol &&
3354 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3355 locinput += UTF8SKIP(locinput);
3356 if (locinput > PL_regeol)
3357 sayNO;
eb08e2da
JH
3358 }
3359 else
3360 locinput++;
a0ed51b3
LW
3361 nextchr = UCHARAT(locinput);
3362 break;
81714fb9
YO
3363
3364 case NREFFL:
3365 {
3366 char *s;
ff1157ca 3367 char type;
81714fb9
YO
3368 PL_reg_flags |= RF_tainted;
3369 /* FALL THROUGH */
3370 case NREF:
3371 case NREFF:
ff1157ca 3372 type = OP(scan);
0a4db386
YO
3373 n = reg_check_named_buff_matched(rex,scan);
3374
3375 if ( n ) {
3376 type = REF + ( type - NREF );
3377 goto do_ref;
3378 } else {
81714fb9 3379 sayNO;
0a4db386
YO
3380 }
3381 /* unreached */
c8756f30 3382 case REFFL:
3280af22 3383 PL_reg_flags |= RF_tainted;
c8756f30 3384 /* FALL THROUGH */
c277df42 3385 case REF:
81714fb9 3386 case REFF:
c277df42 3387 n = ARG(scan); /* which paren pair */
81714fb9
YO
3388 type = OP(scan);
3389 do_ref:
24d3c4a9 3390 ln = PL_regstartp[n];
2c2d71f5 3391 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3b6647e0 3392 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3393 sayNO; /* Do not match unless seen CLOSEn. */
24d3c4a9 3394 if (ln == PL_regendp[n])
a0d0e21e 3395 break;
a0ed51b3 3396
24d3c4a9 3397 s = PL_bostr + ln;
81714fb9 3398 if (do_utf8 && type != REF) { /* REF can do byte comparison */
a0ed51b3 3399 char *l = locinput;
a3b680e6 3400 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3401 /*
3402 * Note that we can't do the "other character" lookup trick as
3403 * in the 8-bit case (no pun intended) because in Unicode we
3404 * have to map both upper and title case to lower case.
3405 */
81714fb9 3406 if (type == REFF) {
a0ed51b3 3407 while (s < e) {
a3b680e6
AL
3408 STRLEN ulen1, ulen2;
3409 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3410 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3411
a0ed51b3
LW
3412 if (l >= PL_regeol)
3413 sayNO;
a2a2844f
JH
3414 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3415 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3416 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3417 sayNO;
a2a2844f
JH
3418 s += ulen1;
3419 l += ulen2;
a0ed51b3
LW
3420 }
3421 }
3422 locinput = l;
3423 nextchr = UCHARAT(locinput);
3424 break;
3425 }
3426
a0d0e21e 3427 /* Inline the first character, for speed. */
76e3520e 3428 if (UCHARAT(s) != nextchr &&
81714fb9
YO
3429 (type == REF ||
3430 (UCHARAT(s) != (type == REFF
3431 ? PL_fold : PL_fold_locale)[nextchr])))
4633a7c4 3432 sayNO;
24d3c4a9
DM
3433 ln = PL_regendp[n] - ln;
3434 if (locinput + ln > PL_regeol)
4633a7c4 3435 sayNO;
81714fb9 3436 if (ln > 1 && (type == REF
24d3c4a9 3437 ? memNE(s, locinput, ln)
81714fb9 3438 : (type == REFF
24d3c4a9
DM
3439 ? ibcmp(s, locinput, ln)
3440 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3441 sayNO;
24d3c4a9 3442 locinput += ln;
76e3520e 3443 nextchr = UCHARAT(locinput);
a0d0e21e 3444 break;
81714fb9 3445 }
a0d0e21e 3446 case NOTHING:
c277df42 3447 case TAIL:
a0d0e21e
LW
3448 break;
3449 case BACK:
3450 break;
40a82448
DM
3451
3452#undef ST
3453#define ST st->u.eval
c277df42 3454 {
c277df42 3455 SV *ret;
6bda09f9 3456 regexp *re;
1a147d38
YO
3457 regnode *startpoint;
3458
3459 case GOSTART:
3460 case GOSUB: /* /(...(?1))/ */
6bda09f9 3461 if (cur_eval && cur_eval->locinput==locinput) {
24b23f37 3462 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
1a147d38 3463 Perl_croak(aTHX_ "Infinite recursion in regex");
6bda09f9 3464 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38
YO
3465 Perl_croak(aTHX_
3466 "Pattern subroutine nesting without pos change"
3467 " exceeded limit in regex");
6bda09f9
YO
3468 } else {
3469 nochange_depth = 0;
1a147d38 3470 }
6bda09f9
YO
3471 re = rex;
3472 (void)ReREFCNT_inc(rex);
1a147d38 3473 if (OP(scan)==GOSUB) {
6bda09f9
YO
3474 startpoint = scan + ARG2L(scan);
3475 ST.close_paren = ARG(scan);
3476 } else {
3477 startpoint = re->program+1;
3478 ST.close_paren = 0;
3479 }
3480 goto eval_recurse_doit;
3481 /* NOTREACHED */
3482 case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
3483 if (cur_eval && cur_eval->locinput==locinput) {
3484 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38 3485 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
6bda09f9
YO
3486 } else {
3487 nochange_depth = 0;
3488 }
8e5e9ebe 3489 {
4aabdb9b
DM
3490 /* execute the code in the {...} */
3491 dSP;
6136c704 3492 SV ** const before = SP;
4aabdb9b
DM
3493 OP_4tree * const oop = PL_op;
3494 COP * const ocurcop = PL_curcop;
3495 PAD *old_comppad;
4aabdb9b
DM
3496
3497 n = ARG(scan);
32fc9b6a 3498 PL_op = (OP_4tree*)rex->data->data[n];
24b23f37
YO
3499 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3500 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3501 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3502 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3503
8e5e9ebe
RGS
3504 CALLRUNOPS(aTHX); /* Scalar context. */
3505 SPAGAIN;
3506 if (SP == before)
075aa684 3507 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3508 else {
3509 ret = POPs;
3510 PUTBACK;
3511 }
4aabdb9b
DM
3512
3513 PL_op = oop;
3514 PAD_RESTORE_LOCAL(old_comppad);
3515 PL_curcop = ocurcop;
24d3c4a9 3516 if (!logical) {
4aabdb9b
DM
3517 /* /(?{...})/ */
3518 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3519 break;
3520 }
8e5e9ebe 3521 }
24d3c4a9
DM
3522 if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3523 logical = 0;
4aabdb9b 3524 {
4f639d21
DM
3525 /* extract RE object from returned value; compiling if
3526 * necessary */
3527
6136c704 3528 MAGIC *mg = NULL;
be8e71aa 3529 const SV *sv;
faf82a0b
AE
3530 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3531 mg = mg_find(sv, PERL_MAGIC_qr);
3532 else if (SvSMAGICAL(ret)) {
3533 if (SvGMAGICAL(ret))
3534 sv_unmagic(ret, PERL_MAGIC_qr);
3535 else
3536 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3537 }
faf82a0b 3538
0f5d15d6
IZ
3539 if (mg) {
3540 re = (regexp *)mg->mg_obj;
df0003d4 3541 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3542 }
3543 else {
3544 STRLEN len;
6136c704 3545 const char * const t = SvPV_const(ret, len);
0f5d15d6 3546 PMOP pm;
a3b680e6 3547 const I32 osize = PL_regsize;
0f5d15d6 3548
5fcd1c1b 3549 Zero(&pm, 1, PMOP);
4aabdb9b 3550 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
f9f4320a 3551 re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
9041c2e3 3552 if (!(SvFLAGS(ret)
faf82a0b
AE
3553 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3554 | SVs_GMG)))
14befaf4
DM
3555 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3556 PERL_MAGIC_qr,0,0);
0f5d15d6 3557 PL_regsize = osize;
0f5d15d6 3558 }
4aabdb9b 3559 }
6bda09f9
YO
3560 DEBUG_EXECUTE_r(
3561 debug_start_match(re, do_utf8, locinput, PL_regeol,
3562 "Matching embedded");
3563 );
3564 startpoint = re->program + 1;
1a147d38 3565 ST.close_paren = 0; /* only used for GOSUB */
6bda09f9
YO
3566 /* borrowed from regtry */
3567 if (PL_reg_start_tmpl <= re->nparens) {
3568 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3569 if(PL_reg_start_tmp)
3570 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3571 else
3572 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3573 }
aa283a38 3574
1a147d38 3575 eval_recurse_doit: /* Share code with GOSUB below this line */
aa283a38 3576 /* run the pattern returned from (??{...}) */
40a82448
DM
3577 ST.cp = regcppush(0); /* Save *all* the positions. */
3578 REGCP_SET(ST.lastcp);
6bda09f9 3579
1a147d38
YO
3580 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3581 PL_regendp = re->endp; /* essentially NOOP on GOSUB */
6bda09f9 3582
4aabdb9b
DM
3583 *PL_reglastparen = 0;
3584 *PL_reglastcloseparen = 0;
4aabdb9b 3585 PL_reginput = locinput;
ae0beba1 3586 PL_regsize = 0;
4aabdb9b
DM
3587
3588 /* XXXX This is too dramatic a measure... */
3589 PL_reg_maxiter = 0;
3590
faec1544
DM
3591 ST.toggle_reg_flags = PL_reg_flags;
3592 if (re->reganch & ROPT_UTF8)
3593 PL_reg_flags |= RF_utf8;
3594 else
3595 PL_reg_flags &= ~RF_utf8;
3596 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3597
40a82448 3598 ST.prev_rex = rex;
faec1544 3599 ST.prev_curlyx = cur_curlyx;
aa283a38 3600 rex = re;
faec1544 3601 cur_curlyx = NULL;
40a82448 3602 ST.B = next;
faec1544
DM
3603 ST.prev_eval = cur_eval;
3604 cur_eval = st;
faec1544 3605 /* now continue from first node in postoned RE */
6bda09f9 3606 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4aabdb9b 3607 /* NOTREACHED */
a0ed51b3 3608 }
24d3c4a9
DM
3609 /* logical is 1, /(?(?{...})X|Y)/ */
3610 sw = (bool)SvTRUE(ret);
3611 logical = 0;
c277df42
IZ
3612 break;
3613 }
40a82448 3614
faec1544
DM
3615 case EVAL_AB: /* cleanup after a successful (??{A})B */
3616 /* note: this is called twice; first after popping B, then A */
3617 PL_reg_flags ^= ST.toggle_reg_flags;
40a82448
DM
3618 ReREFCNT_dec(rex);
3619 rex = ST.prev_rex;
faec1544
DM
3620 regcpblow(ST.cp);
3621 cur_eval = ST.prev_eval;
3622 cur_curlyx = ST.prev_curlyx;
40a82448
DM
3623 /* XXXX This is too dramatic a measure... */
3624 PL_reg_maxiter = 0;
262b90c4 3625 sayYES;
40a82448 3626
40a82448 3627
faec1544
DM
3628 case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3629 /* note: this is called twice; first after popping B, then A */
3630 PL_reg_flags ^= ST.toggle_reg_flags;
3631 ReREFCNT_dec(rex);
3632 rex = ST.prev_rex;
40a82448
DM
3633 PL_reginput = locinput;
3634 REGCP_UNWIND(ST.lastcp);
3635 regcppop(rex);
faec1544
DM
3636 cur_eval = ST.prev_eval;
3637 cur_curlyx = ST.prev_curlyx;
3638 /* XXXX This is too dramatic a measure... */
3639 PL_reg_maxiter = 0;
40a82448 3640 sayNO_SILENT;
40a82448
DM
3641#undef ST
3642
a0d0e21e 3643 case OPEN:
c277df42 3644 n = ARG(scan); /* which paren pair */
3280af22
NIS
3645 PL_reg_start_tmp[n] = locinput;
3646 if (n > PL_regsize)
3647 PL_regsize = n;
e2e6a0f1 3648 lastopen = n;
a0d0e21e
LW
3649 break;
3650 case CLOSE:
c277df42 3651 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3652 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3653 PL_regendp[n] = locinput - PL_bostr;
7f69552c
YO
3654 /*if (n > PL_regsize)
3655 PL_regsize = n;*/
3b6647e0 3656 if (n > *PL_reglastparen)
3280af22 3657 *PL_reglastparen = n;
a01268b5 3658 *PL_reglastcloseparen = n;
3b6647e0 3659 if (cur_eval && cur_eval->u.eval.close_paren == n) {
6bda09f9
YO
3660 goto fake_end;
3661 }
a0d0e21e 3662 break;
e2e6a0f1
YO
3663 case ACCEPT:
3664 if (ARG(scan)){
3665 regnode *cursor;
3666 for (cursor=scan;
3667 cursor && OP(cursor)!=END;
3668 cursor=regnext(cursor))
3669 {
3670 if ( OP(cursor)==CLOSE ){
3671 n = ARG(cursor);
3672 if ( n <= lastopen ) {
3673 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3674 PL_regendp[n] = locinput - PL_bostr;
3675 /*if (n > PL_regsize)
3676 PL_regsize = n;*/
3b6647e0 3677 if (n > *PL_reglastparen)
e2e6a0f1
YO
3678 *PL_reglastparen = n;
3679 *PL_reglastcloseparen = n;
3b6647e0
RB
3680 if ( n == ARG(scan) || (cur_eval &&
3681 cur_eval->u.eval.close_paren == n))
e2e6a0f1
YO
3682 break;
3683 }
3684 }
3685 }
3686 }
3687 goto fake_end;
3688 /*NOTREACHED*/
c277df42
IZ
3689 case GROUPP:
3690 n = ARG(scan); /* which paren pair */
3b6647e0 3691 sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 3692 break;
0a4db386
YO
3693 case NGROUPP:
3694 /* reg_check_named_buff_matched returns 0 for no match */
3695 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3696 break;
1a147d38 3697 case INSUBP:
0a4db386 3698 n = ARG(scan);
3b6647e0 3699 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
0a4db386
YO
3700 break;
3701 case DEFINEP:
3702 sw = 0;
3703 break;
c277df42 3704 case IFTHEN:
2c2d71f5 3705 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
24d3c4a9 3706 if (sw)
c277df42
IZ
3707 next = NEXTOPER(NEXTOPER(scan));
3708 else {
3709 next = scan + ARG(scan);
3710 if (OP(next) == IFTHEN) /* Fake one. */
3711 next = NEXTOPER(NEXTOPER(next));
3712 }
3713 break;
3714 case LOGICAL:
24d3c4a9 3715 logical = scan->flags;
c277df42 3716 break;
c476f425 3717
2ab05381 3718/*******************************************************************
2ab05381 3719
c476f425
DM
3720The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3721pattern, where A and B are subpatterns. (For simple A, CURLYM or
3722STAR/PLUS/CURLY/CURLYN are used instead.)
2ab05381 3723
c476f425 3724A*B is compiled as <CURLYX><A><WHILEM><B>
2ab05381 3725
c476f425
DM
3726On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3727state, which contains the current count, initialised to -1. It also sets
3728cur_curlyx to point to this state, with any previous value saved in the
3729state block.
2ab05381 3730
c476f425
DM
3731CURLYX then jumps straight to the WHILEM op, rather than executing A,
3732since the pattern may possibly match zero times (i.e. it's a while {} loop
3733rather than a do {} while loop).
2ab05381 3734
c476f425
DM
3735Each entry to WHILEM represents a successful match of A. The count in the
3736CURLYX block is incremented, another WHILEM state is pushed, and execution
3737passes to A or B depending on greediness and the current count.
2ab05381 3738
c476f425
DM
3739For example, if matching against the string a1a2a3b (where the aN are
3740substrings that match /A/), then the match progresses as follows: (the
3741pushed states are interspersed with the bits of strings matched so far):
2ab05381 3742
c476f425
DM
3743 <CURLYX cnt=-1>
3744 <CURLYX cnt=0><WHILEM>
3745 <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3746 <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3747 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3748 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
2ab05381 3749
c476f425
DM
3750(Contrast this with something like CURLYM, which maintains only a single
3751backtrack state:
2ab05381 3752
c476f425
DM
3753 <CURLYM cnt=0> a1
3754 a1 <CURLYM cnt=1> a2
3755 a1 a2 <CURLYM cnt=2> a3
3756 a1 a2 a3 <CURLYM cnt=3> b
3757)
2ab05381 3758
c476f425
DM
3759Each WHILEM state block marks a point to backtrack to upon partial failure
3760of A or B, and also contains some minor state data related to that
3761iteration. The CURLYX block, pointed to by cur_curlyx, contains the
3762overall state, such as the count, and pointers to the A and B ops.
2ab05381 3763
c476f425
DM
3764This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3765must always point to the *current* CURLYX block, the rules are:
2ab05381 3766
c476f425
DM
3767When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3768and set cur_curlyx to point the new block.
2ab05381 3769
c476f425
DM
3770When popping the CURLYX block after a successful or unsuccessful match,
3771restore the previous cur_curlyx.
2ab05381 3772
c476f425
DM
3773When WHILEM is about to execute B, save the current cur_curlyx, and set it
3774to the outer one saved in the CURLYX block.
2ab05381 3775
c476f425
DM
3776When popping the WHILEM block after a successful or unsuccessful B match,
3777restore the previous cur_curlyx.
2ab05381 3778
c476f425
DM
3779Here's an example for the pattern (AI* BI)*BO
3780I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
2ab05381 3781
c476f425
DM
3782cur_
3783curlyx backtrack stack
3784------ ---------------
3785NULL
3786CO <CO prev=NULL> <WO>
3787CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3788CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3789NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
2ab05381 3790
c476f425
DM
3791At this point the pattern succeeds, and we work back down the stack to
3792clean up, restoring as we go:
95b24440 3793
c476f425
DM
3794CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3795CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3796CO <CO prev=NULL> <WO>
3797NULL
a0374537 3798
c476f425
DM
3799*******************************************************************/
3800
3801#define ST st->u.curlyx
3802
3803 case CURLYX: /* start of /A*B/ (for complex A) */
3804 {
3805 /* No need to save/restore up to this paren */
3806 I32 parenfloor = scan->flags;
3807
3808 assert(next); /* keep Coverity happy */
3809 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3810 next += ARG(next);
3811
3812 /* XXXX Probably it is better to teach regpush to support
3813 parenfloor > PL_regsize... */
3814 if (parenfloor > (I32)*PL_reglastparen)
3815 parenfloor = *PL_reglastparen; /* Pessimization... */
3816
3817 ST.prev_curlyx= cur_curlyx;
3818 cur_curlyx = st;
3819 ST.cp = PL_savestack_ix;
3820
3821 /* these fields contain the state of the current curly.
3822 * they are accessed by subsequent WHILEMs */
3823 ST.parenfloor = parenfloor;
3824 ST.min = ARG1(scan);
3825 ST.max = ARG2(scan);
3826 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3827 ST.B = next;
24d3c4a9
DM
3828 ST.minmod = minmod;
3829 minmod = 0;
c476f425
DM
3830 ST.count = -1; /* this will be updated by WHILEM */
3831 ST.lastloc = NULL; /* this will be updated by WHILEM */
3832
3833 PL_reginput = locinput;
3834 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
5f66b61c 3835 /* NOTREACHED */
c476f425 3836 }
a0d0e21e 3837
c476f425
DM
3838 case CURLYX_end: /* just finished matching all of A*B */
3839 regcpblow(ST.cp);
3840 cur_curlyx = ST.prev_curlyx;
3841 sayYES;
3842 /* NOTREACHED */
a0d0e21e 3843
c476f425
DM
3844 case CURLYX_end_fail: /* just failed to match all of A*B */
3845 regcpblow(ST.cp);
3846 cur_curlyx = ST.prev_curlyx;
3847 sayNO;
3848 /* NOTREACHED */
4633a7c4 3849
a0d0e21e 3850
c476f425
DM
3851#undef ST
3852#define ST st->u.whilem
3853
3854 case WHILEM: /* just matched an A in /A*B/ (for complex A) */
3855 {
3856 /* see the discussion above about CURLYX/WHILEM */
c476f425
DM
3857 I32 n;
3858 assert(cur_curlyx); /* keep Coverity happy */
3859 n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3860 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3861 ST.cache_offset = 0;
3862 ST.cache_mask = 0;
3863
3864 PL_reginput = locinput;
3865
3866 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3867 "%*s whilem: matched %ld out of %ld..%ld\n",
3868 REPORT_CODE_OFF+depth*2, "", (long)n,
3869 (long)cur_curlyx->u.curlyx.min,
3870 (long)cur_curlyx->u.curlyx.max)
3871 );
a0d0e21e 3872
c476f425 3873 /* First just match a string of min A's. */
a0d0e21e 3874
c476f425
DM
3875 if (n < cur_curlyx->u.curlyx.min) {
3876 cur_curlyx->u.curlyx.lastloc = locinput;
3877 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
3878 /* NOTREACHED */
3879 }
3880
3881 /* If degenerate A matches "", assume A done. */
3882
3883 if (locinput == cur_curlyx->u.curlyx.lastloc) {
3884 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3885 "%*s whilem: empty match detected, trying continuation...\n",
3886 REPORT_CODE_OFF+depth*2, "")
3887 );
3888 goto do_whilem_B_max;
3889 }
3890
3891 /* super-linear cache processing */
3892
3893 if (scan->flags) {
a0d0e21e 3894
2c2d71f5 3895 if (!PL_reg_maxiter) {
c476f425
DM
3896 /* start the countdown: Postpone detection until we
3897 * know the match is not *that* much linear. */
2c2d71f5 3898 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
3899 /* possible overflow for long strings and many CURLYX's */
3900 if (PL_reg_maxiter < 0)
3901 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
3902 PL_reg_leftiter = PL_reg_maxiter;
3903 }
c476f425 3904
2c2d71f5 3905 if (PL_reg_leftiter-- == 0) {
c476f425 3906 /* initialise cache */
3298f257 3907 const I32 size = (PL_reg_maxiter + 7)/8;
2c2d71f5 3908 if (PL_reg_poscache) {
eb160463 3909 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3910 Renew(PL_reg_poscache, size, char);
3911 PL_reg_poscache_size = size;
3912 }
3913 Zero(PL_reg_poscache, size, char);
3914 }
3915 else {
3916 PL_reg_poscache_size = size;
a02a5408 3917 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3918 }
c476f425
DM
3919 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3920 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
3921 PL_colors[4], PL_colors[5])
3922 );
2c2d71f5 3923 }
c476f425 3924
2c2d71f5 3925 if (PL_reg_leftiter < 0) {
c476f425
DM
3926 /* have we already failed at this position? */
3927 I32 offset, mask;
3928 offset = (scan->flags & 0xf) - 1
3929 + (locinput - PL_bostr) * (scan->flags>>4);
3930 mask = 1 << (offset % 8);
3931 offset /= 8;
3932 if (PL_reg_poscache[offset] & mask) {
3933 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3934 "%*s whilem: (cache) already tried at this position...\n",
3935 REPORT_CODE_OFF+depth*2, "")
2c2d71f5 3936 );
3298f257 3937 sayNO; /* cache records failure */
2c2d71f5 3938 }
c476f425
DM
3939 ST.cache_offset = offset;
3940 ST.cache_mask = mask;
2c2d71f5 3941 }
c476f425 3942 }
2c2d71f5 3943
c476f425 3944 /* Prefer B over A for minimal matching. */
a687059c 3945
c476f425
DM
3946 if (cur_curlyx->u.curlyx.minmod) {
3947 ST.save_curlyx = cur_curlyx;
3948 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
3949 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
3950 REGCP_SET(ST.lastcp);
3951 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
3952 /* NOTREACHED */
3953 }
a0d0e21e 3954
c476f425
DM
3955 /* Prefer A over B for maximal matching. */
3956
3957 if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
3958 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
3959 cur_curlyx->u.curlyx.lastloc = locinput;
3960 REGCP_SET(ST.lastcp);
3961 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
3962 /* NOTREACHED */
3963 }
3964 goto do_whilem_B_max;
3965 }
3966 /* NOTREACHED */
3967
3968 case WHILEM_B_min: /* just matched B in a minimal match */
3969 case WHILEM_B_max: /* just matched B in a maximal match */
3970 cur_curlyx = ST.save_curlyx;
3971 sayYES;
3972 /* NOTREACHED */
3973
3974 case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
3975 cur_curlyx = ST.save_curlyx;
3976 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
3977 cur_curlyx->u.curlyx.count--;
3978 CACHEsayNO;
3979 /* NOTREACHED */
3980
3981 case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
3982 REGCP_UNWIND(ST.lastcp);
3983 regcppop(rex);
3984 /* FALL THROUGH */
3985 case WHILEM_A_pre_fail: /* just failed to match even minimal A */
3986 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
3987 cur_curlyx->u.curlyx.count--;
3988 CACHEsayNO;
3989 /* NOTREACHED */
3990
3991 case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
3992 REGCP_UNWIND(ST.lastcp);
3993 regcppop(rex); /* Restore some previous $<digit>s? */
3994 PL_reginput = locinput;
3995 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
3996 "%*s whilem: failed, trying continuation...\n",
3997 REPORT_CODE_OFF+depth*2, "")
3998 );
3999 do_whilem_B_max:
4000 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4001 && ckWARN(WARN_REGEXP)
4002 && !(PL_reg_flags & RF_warned))
4003 {
4004 PL_reg_flags |= RF_warned;
4005 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4006 "Complex regular subexpression recursion",
4007 REG_INFTY - 1);
4008 }
4009
4010 /* now try B */
4011 ST.save_curlyx = cur_curlyx;
4012 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4013 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4014 /* NOTREACHED */
4015
4016 case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4017 cur_curlyx = ST.save_curlyx;
4018 REGCP_UNWIND(ST.lastcp);
4019 regcppop(rex);
4020
4021 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4022 /* Maximum greed exceeded */
4023 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4024 && ckWARN(WARN_REGEXP)
4025 && !(PL_reg_flags & RF_warned))
4026 {
3280af22 4027 PL_reg_flags |= RF_warned;
c476f425
DM
4028 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4029 "%s limit (%d) exceeded",
4030 "Complex regular subexpression recursion",
4031 REG_INFTY - 1);
a0d0e21e 4032 }
c476f425 4033 cur_curlyx->u.curlyx.count--;
3ab3c9b4 4034 CACHEsayNO;
a0d0e21e 4035 }
c476f425
DM
4036
4037 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4038 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4039 );
4040 /* Try grabbing another A and see if it helps. */
4041 PL_reginput = locinput;
4042 cur_curlyx->u.curlyx.lastloc = locinput;
4043 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4044 REGCP_SET(ST.lastcp);
4045 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
5f66b61c 4046 /* NOTREACHED */
40a82448
DM
4047
4048#undef ST
4049#define ST st->u.branch
4050
4051 case BRANCHJ: /* /(...|A|...)/ with long next pointer */
c277df42
IZ
4052 next = scan + ARG(scan);
4053 if (next == scan)
4054 next = NULL;
40a82448
DM
4055 scan = NEXTOPER(scan);
4056 /* FALL THROUGH */
c277df42 4057
40a82448
DM
4058 case BRANCH: /* /(...|A|...)/ */
4059 scan = NEXTOPER(scan); /* scan now points to inner node */
5d458dd8
YO
4060 if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4061 && !has_cutgroup)
4062 {
40a82448
DM
4063 /* last branch; skip state push and jump direct to node */
4064 continue;
5d458dd8 4065 }
40a82448
DM
4066 ST.lastparen = *PL_reglastparen;
4067 ST.next_branch = next;
4068 REGCP_SET(ST.cp);
4069 PL_reginput = locinput;
02db2b7b 4070
40a82448 4071 /* Now go into the branch */
5d458dd8
YO
4072 if (has_cutgroup) {
4073 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4074 } else {
4075 PUSH_STATE_GOTO(BRANCH_next, scan);
4076 }
40a82448 4077 /* NOTREACHED */
5d458dd8
YO
4078 case CUTGROUP:
4079 PL_reginput = locinput;
4080 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4081 (SV*)rex->data->data[ ARG( scan ) ];
4082 PUSH_STATE_GOTO(CUTGROUP_next,next);
4083 /* NOTREACHED */
4084 case CUTGROUP_next_fail:
4085 do_cutgroup = 1;
4086 no_final = 1;
4087 if (st->u.mark.mark_name)
4088 sv_commit = st->u.mark.mark_name;
4089 sayNO;
4090 /* NOTREACHED */
4091 case BRANCH_next:
4092 sayYES;
4093 /* NOTREACHED */
40a82448 4094 case BRANCH_next_fail: /* that branch failed; try the next, if any */
5d458dd8
YO
4095 if (do_cutgroup) {
4096 do_cutgroup = 0;
4097 no_final = 0;
4098 }
40a82448
DM
4099 REGCP_UNWIND(ST.cp);
4100 for (n = *PL_reglastparen; n > ST.lastparen; n--)
4101 PL_regendp[n] = -1;
4102 *PL_reglastparen = n;
0a4db386 4103 /*dmq: *PL_reglastcloseparen = n; */
40a82448
DM
4104 scan = ST.next_branch;
4105 /* no more branches? */
5d458dd8
YO
4106 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4107 DEBUG_EXECUTE_r({
4108 PerlIO_printf( Perl_debug_log,
4109 "%*s %sBRANCH failed...%s\n",
4110 REPORT_CODE_OFF+depth*2, "",
4111 PL_colors[4],
4112 PL_colors[5] );
4113 });
4114 sayNO_SILENT;
4115 }
40a82448
DM
4116 continue; /* execute next BRANCH[J] op */
4117 /* NOTREACHED */
4118
a0d0e21e 4119 case MINMOD:
24d3c4a9 4120 minmod = 1;
a0d0e21e 4121 break;
40a82448
DM
4122
4123#undef ST
4124#define ST st->u.curlym
4125
4126 case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4127
4128 /* This is an optimisation of CURLYX that enables us to push
4129 * only a single backtracking state, no matter now many matches
4130 * there are in {m,n}. It relies on the pattern being constant
4131 * length, with no parens to influence future backrefs
4132 */
4133
4134 ST.me = scan;
dc45a647 4135 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
40a82448
DM
4136
4137 /* if paren positive, emulate an OPEN/CLOSE around A */
4138 if (ST.me->flags) {
3b6647e0 4139 U32 paren = ST.me->flags;
40a82448
DM
4140 if (paren > PL_regsize)
4141 PL_regsize = paren;
3b6647e0 4142 if (paren > *PL_reglastparen)
40a82448 4143 *PL_reglastparen = paren;
c277df42 4144 scan += NEXT_OFF(scan); /* Skip former OPEN. */
6407bf3b 4145 }
40a82448
DM
4146 ST.A = scan;
4147 ST.B = next;
4148 ST.alen = 0;
4149 ST.count = 0;
24d3c4a9
DM
4150 ST.minmod = minmod;
4151 minmod = 0;
40a82448
DM
4152 ST.c1 = CHRTEST_UNINIT;
4153 REGCP_SET(ST.cp);
6407bf3b 4154
40a82448
DM
4155 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4156 goto curlym_do_B;
4157
4158 curlym_do_A: /* execute the A in /A{m,n}B/ */
6407bf3b 4159 PL_reginput = locinput;
40a82448
DM
4160 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4161 /* NOTREACHED */
5f80c4cf 4162
40a82448
DM
4163 case CURLYM_A: /* we've just matched an A */
4164 locinput = st->locinput;
4165 nextchr = UCHARAT(locinput);
4166
4167 ST.count++;
4168 /* after first match, determine A's length: u.curlym.alen */
4169 if (ST.count == 1) {
4170 if (PL_reg_match_utf8) {
4171 char *s = locinput;
4172 while (s < PL_reginput) {
4173 ST.alen++;
4174 s += UTF8SKIP(s);
4175 }
4176 }
4177 else {
4178 ST.alen = PL_reginput - locinput;
4179 }
4180 if (ST.alen == 0)
4181 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4182 }
0cadcf80
DM
4183 DEBUG_EXECUTE_r(
4184 PerlIO_printf(Perl_debug_log,
40a82448 4185 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
5bc10b2c 4186 (int)(REPORT_CODE_OFF+(depth*2)), "",
40a82448 4187 (IV) ST.count, (IV)ST.alen)
0cadcf80
DM
4188 );
4189
40a82448 4190 locinput = PL_reginput;
0a4db386
YO
4191
4192 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4193 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4194 goto fake_end;
4195
4196 if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
40a82448
DM
4197 goto curlym_do_A; /* try to match another A */
4198 goto curlym_do_B; /* try to match B */
5f80c4cf 4199
40a82448
DM
4200 case CURLYM_A_fail: /* just failed to match an A */
4201 REGCP_UNWIND(ST.cp);
0a4db386
YO
4202
4203 if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4204 || (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4205 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
40a82448 4206 sayNO;
0cadcf80 4207
40a82448
DM
4208 curlym_do_B: /* execute the B in /A{m,n}B/ */
4209 PL_reginput = locinput;
4210 if (ST.c1 == CHRTEST_UNINIT) {
4211 /* calculate c1 and c2 for possible match of 1st char
4212 * following curly */
4213 ST.c1 = ST.c2 = CHRTEST_VOID;
4214 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4215 regnode *text_node = ST.B;
4216 if (! HAS_TEXT(text_node))
4217 FIND_NEXT_IMPT(text_node);
4218 if (HAS_TEXT(text_node)
4219 && PL_regkind[OP(text_node)] != REF)
4220 {
4221 ST.c1 = (U8)*STRING(text_node);
4222 ST.c2 =
4223 (OP(text_node) == EXACTF || OP(text_node) == REFF)
4224 ? PL_fold[ST.c1]
4225 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4226 ? PL_fold_locale[ST.c1]
4227 : ST.c1;
c277df42 4228 }
c277df42 4229 }
40a82448
DM
4230 }
4231
4232 DEBUG_EXECUTE_r(
4233 PerlIO_printf(Perl_debug_log,
4234 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
5bc10b2c 4235 (int)(REPORT_CODE_OFF+(depth*2)),
40a82448
DM
4236 "", (IV)ST.count)
4237 );
4238 if (ST.c1 != CHRTEST_VOID
4239 && UCHARAT(PL_reginput) != ST.c1
4240 && UCHARAT(PL_reginput) != ST.c2)
4241 {
4242 /* simulate B failing */
4243 state_num = CURLYM_B_fail;
4244 goto reenter_switch;
4245 }
4246
4247 if (ST.me->flags) {
4248 /* mark current A as captured */
4249 I32 paren = ST.me->flags;
4250 if (ST.count) {
4251 PL_regstartp[paren]
4252 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4253 PL_regendp[paren] = PL_reginput - PL_bostr;
0a4db386 4254 /*dmq: *PL_reglastcloseparen = paren; */
c277df42 4255 }
40a82448
DM
4256 else
4257 PL_regendp[paren] = -1;
0a4db386 4258 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4259 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4260 {
4261 if (ST.count)
4262 goto fake_end;
4263 else
4264 sayNO;
4265 }
c277df42 4266 }
0a4db386 4267
40a82448 4268 PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
5f66b61c 4269 /* NOTREACHED */
40a82448
DM
4270
4271 case CURLYM_B_fail: /* just failed to match a B */
4272 REGCP_UNWIND(ST.cp);
4273 if (ST.minmod) {
4274 if (ST.count == ARG2(ST.me) /* max */)
4275 sayNO;
4276 goto curlym_do_A; /* try to match a further A */
4277 }
4278 /* backtrack one A */
4279 if (ST.count == ARG1(ST.me) /* min */)
4280 sayNO;
4281 ST.count--;
4282 locinput = HOPc(locinput, -ST.alen);
4283 goto curlym_do_B; /* try to match B */
4284
c255a977
DM
4285#undef ST
4286#define ST st->u.curly
40a82448 4287
c255a977
DM
4288#define CURLY_SETPAREN(paren, success) \
4289 if (paren) { \
4290 if (success) { \
4291 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4292 PL_regendp[paren] = locinput - PL_bostr; \
0a4db386 4293 *PL_reglastcloseparen = paren; \
c255a977
DM
4294 } \
4295 else \
4296 PL_regendp[paren] = -1; \
4297 }
4298
4299 case STAR: /* /A*B/ where A is width 1 */
4300 ST.paren = 0;
4301 ST.min = 0;
4302 ST.max = REG_INFTY;
a0d0e21e
LW
4303 scan = NEXTOPER(scan);
4304 goto repeat;
c255a977
DM
4305 case PLUS: /* /A+B/ where A is width 1 */
4306 ST.paren = 0;
4307 ST.min = 1;
4308 ST.max = REG_INFTY;
c277df42 4309 scan = NEXTOPER(scan);
c255a977
DM
4310 goto repeat;
4311 case CURLYN: /* /(A){m,n}B/ where A is width 1 */
4312 ST.paren = scan->flags; /* Which paren to set */
4313 if (ST.paren > PL_regsize)
4314 PL_regsize = ST.paren;
3b6647e0 4315 if (ST.paren > *PL_reglastparen)
c255a977
DM
4316 *PL_reglastparen = ST.paren;
4317 ST.min = ARG1(scan); /* min to match */
4318 ST.max = ARG2(scan); /* max to match */
0a4db386 4319 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4320 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4321 ST.min=1;
4322 ST.max=1;
4323 }
c255a977
DM
4324 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4325 goto repeat;
4326 case CURLY: /* /A{m,n}B/ where A is width 1 */
4327 ST.paren = 0;
4328 ST.min = ARG1(scan); /* min to match */
4329 ST.max = ARG2(scan); /* max to match */
4330 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 4331 repeat:
a0d0e21e
LW
4332 /*
4333 * Lookahead to avoid useless match attempts
4334 * when we know what character comes next.
c255a977 4335 *
5f80c4cf
JP
4336 * Used to only do .*x and .*?x, but now it allows
4337 * for )'s, ('s and (?{ ... })'s to be in the way
4338 * of the quantifier and the EXACT-like node. -- japhy
4339 */
4340
c255a977
DM
4341 if (ST.min > ST.max) /* XXX make this a compile-time check? */
4342 sayNO;
cca55fe3 4343 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4344 U8 *s;
4345 regnode *text_node = next;
4346
3dab1dad
YO
4347 if (! HAS_TEXT(text_node))
4348 FIND_NEXT_IMPT(text_node);
5f80c4cf 4349
9e137952 4350 if (! HAS_TEXT(text_node))
c255a977 4351 ST.c1 = ST.c2 = CHRTEST_VOID;
5f80c4cf 4352 else {
3dab1dad 4353 if (PL_regkind[OP(text_node)] == REF) {
c255a977 4354 ST.c1 = ST.c2 = CHRTEST_VOID;
44a68960 4355 goto assume_ok_easy;
cca55fe3 4356 }
be8e71aa
YO
4357 else
4358 s = (U8*)STRING(text_node);
5f80c4cf
JP
4359
4360 if (!UTF) {
c255a977 4361 ST.c2 = ST.c1 = *s;
f65d3ee7 4362 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c255a977 4363 ST.c2 = PL_fold[ST.c1];
f65d3ee7 4364 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c255a977 4365 ST.c2 = PL_fold_locale[ST.c1];
1aa99e6b 4366 }
5f80c4cf 4367 else { /* UTF */
f65d3ee7 4368 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4369 STRLEN ulen1, ulen2;
89ebb4a3
JH
4370 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4371 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4372
4373 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4374 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
e294cc5d
JH
4375#ifdef EBCDIC
4376 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4377 ckWARN(WARN_UTF8) ?
4378 0 : UTF8_ALLOW_ANY);
4379 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4380 ckWARN(WARN_UTF8) ?
4381 0 : UTF8_ALLOW_ANY);
4382#else
c255a977 4383 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
e294cc5d 4384 uniflags);
c255a977 4385 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
e294cc5d
JH
4386 uniflags);
4387#endif
5f80c4cf
JP
4388 }
4389 else {
c255a977 4390 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4391 uniflags);
5f80c4cf 4392 }
1aa99e6b
IH
4393 }
4394 }
bbce6d69 4395 }
a0d0e21e 4396 else
c255a977 4397 ST.c1 = ST.c2 = CHRTEST_VOID;
cca55fe3 4398 assume_ok_easy:
c255a977
DM
4399
4400 ST.A = scan;
4401 ST.B = next;
3280af22 4402 PL_reginput = locinput;
24d3c4a9
DM
4403 if (minmod) {
4404 minmod = 0;
e2e6a0f1 4405 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4633a7c4 4406 sayNO;
c255a977 4407 ST.count = ST.min;
a0ed51b3 4408 locinput = PL_reginput;
c255a977
DM
4409 REGCP_SET(ST.cp);
4410 if (ST.c1 == CHRTEST_VOID)
4411 goto curly_try_B_min;
4412
4413 ST.oldloc = locinput;
4414
4415 /* set ST.maxpos to the furthest point along the
4416 * string that could possibly match */
4417 if (ST.max == REG_INFTY) {
4418 ST.maxpos = PL_regeol - 1;
4419 if (do_utf8)
4420 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4421 ST.maxpos--;
4422 }
4423 else if (do_utf8) {
4424 int m = ST.max - ST.min;
4425 for (ST.maxpos = locinput;
4426 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4427 ST.maxpos += UTF8SKIP(ST.maxpos);
4428 }
4429 else {
4430 ST.maxpos = locinput + ST.max - ST.min;
4431 if (ST.maxpos >= PL_regeol)
4432 ST.maxpos = PL_regeol - 1;
4433 }
4434 goto curly_try_B_min_known;
4435
4436 }
4437 else {
e2e6a0f1 4438 ST.count = regrepeat(rex, ST.A, ST.max, depth);
c255a977
DM
4439 locinput = PL_reginput;
4440 if (ST.count < ST.min)
4441 sayNO;
4442 if ((ST.count > ST.min)
4443 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4444 {
4445 /* A{m,n} must come at the end of the string, there's
4446 * no point in backing off ... */
4447 ST.min = ST.count;
4448 /* ...except that $ and \Z can match before *and* after
4449 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4450 We may back off by one in this case. */
4451 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4452 ST.min--;
4453 }
4454 REGCP_SET(ST.cp);
4455 goto curly_try_B_max;
4456 }
4457 /* NOTREACHED */
4458
4459
4460 case CURLY_B_min_known_fail:
4461 /* failed to find B in a non-greedy match where c1,c2 valid */
4462 if (ST.paren && ST.count)
4463 PL_regendp[ST.paren] = -1;
4464
4465 PL_reginput = locinput; /* Could be reset... */
4466 REGCP_UNWIND(ST.cp);
4467 /* Couldn't or didn't -- move forward. */
4468 ST.oldloc = locinput;
4469 if (do_utf8)
4470 locinput += UTF8SKIP(locinput);
4471 else
4472 locinput++;
4473 ST.count++;
4474 curly_try_B_min_known:
4475 /* find the next place where 'B' could work, then call B */
4476 {
4477 int n;
4478 if (do_utf8) {
4479 n = (ST.oldloc == locinput) ? 0 : 1;
4480 if (ST.c1 == ST.c2) {
4481 STRLEN len;
4482 /* set n to utf8_distance(oldloc, locinput) */
4483 while (locinput <= ST.maxpos &&
4484 utf8n_to_uvchr((U8*)locinput,
4485 UTF8_MAXBYTES, &len,
4486 uniflags) != (UV)ST.c1) {
4487 locinput += len;
4488 n++;
4489 }
1aa99e6b
IH
4490 }
4491 else {
c255a977
DM
4492 /* set n to utf8_distance(oldloc, locinput) */
4493 while (locinput <= ST.maxpos) {
4494 STRLEN len;
4495 const UV c = utf8n_to_uvchr((U8*)locinput,
4496 UTF8_MAXBYTES, &len,
4497 uniflags);
4498 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4499 break;
4500 locinput += len;
4501 n++;
1aa99e6b 4502 }
0fe9bf95
IZ
4503 }
4504 }
c255a977
DM
4505 else {
4506 if (ST.c1 == ST.c2) {
4507 while (locinput <= ST.maxpos &&
4508 UCHARAT(locinput) != ST.c1)
4509 locinput++;
bbce6d69 4510 }
c255a977
DM
4511 else {
4512 while (locinput <= ST.maxpos
4513 && UCHARAT(locinput) != ST.c1
4514 && UCHARAT(locinput) != ST.c2)
4515 locinput++;
a0ed51b3 4516 }
c255a977
DM
4517 n = locinput - ST.oldloc;
4518 }
4519 if (locinput > ST.maxpos)
4520 sayNO;
4521 /* PL_reginput == oldloc now */
4522 if (n) {
4523 ST.count += n;
e2e6a0f1 4524 if (regrepeat(rex, ST.A, n, depth) < n)
4633a7c4 4525 sayNO;
a0d0e21e 4526 }
c255a977
DM
4527 PL_reginput = locinput;
4528 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4529 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4530 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4531 goto fake_end;
4532 }
c255a977 4533 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
a0d0e21e 4534 }
c255a977
DM
4535 /* NOTREACHED */
4536
4537
4538 case CURLY_B_min_fail:
4539 /* failed to find B in a non-greedy match where c1,c2 invalid */
4540 if (ST.paren && ST.count)
4541 PL_regendp[ST.paren] = -1;
4542
4543 REGCP_UNWIND(ST.cp);
4544 /* failed -- move forward one */
4545 PL_reginput = locinput;
e2e6a0f1 4546 if (regrepeat(rex, ST.A, 1, depth)) {
c255a977 4547 ST.count++;
a0ed51b3 4548 locinput = PL_reginput;
c255a977
DM
4549 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4550 ST.count > 0)) /* count overflow ? */
15272685 4551 {
c255a977
DM
4552 curly_try_B_min:
4553 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4554 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4555 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4556 goto fake_end;
4557 }
c255a977 4558 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
a0d0e21e
LW
4559 }
4560 }
4633a7c4 4561 sayNO;
c255a977
DM
4562 /* NOTREACHED */
4563
4564
4565 curly_try_B_max:
4566 /* a successful greedy match: now try to match B */
40d049e4 4567 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4568 cur_eval->u.eval.close_paren == (U32)ST.paren) {
40d049e4
YO
4569 goto fake_end;
4570 }
c255a977
DM
4571 {
4572 UV c = 0;
4573 if (ST.c1 != CHRTEST_VOID)
4574 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4575 UTF8_MAXBYTES, 0, uniflags)
466787eb 4576 : (UV) UCHARAT(PL_reginput);
c255a977
DM
4577 /* If it could work, try it. */
4578 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4579 CURLY_SETPAREN(ST.paren, ST.count);
4580 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4581 /* NOTREACHED */
4582 }
4583 }
4584 /* FALL THROUGH */
4585 case CURLY_B_max_fail:
4586 /* failed to find B in a greedy match */
4587 if (ST.paren && ST.count)
4588 PL_regendp[ST.paren] = -1;
4589
4590 REGCP_UNWIND(ST.cp);
4591 /* back up. */
4592 if (--ST.count < ST.min)
4593 sayNO;
4594 PL_reginput = locinput = HOPc(locinput, -1);
4595 goto curly_try_B_max;
4596
4597#undef ST
4598
a0d0e21e 4599 case END:
6bda09f9 4600 fake_end:
faec1544
DM
4601 if (cur_eval) {
4602 /* we've just finished A in /(??{A})B/; now continue with B */
4603 I32 tmpix;
4604
4605
4606 st->u.eval.toggle_reg_flags
4607 = cur_eval->u.eval.toggle_reg_flags;
4608 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4609
4610 st->u.eval.prev_rex = rex; /* inner */
4611 rex = cur_eval->u.eval.prev_rex; /* outer */
4612 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4613 ReREFCNT_inc(rex);
4614 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4615 REGCP_SET(st->u.eval.lastcp);
4616 PL_reginput = locinput;
4617
4618 /* Restore parens of the outer rex without popping the
4619 * savestack */
4620 tmpix = PL_savestack_ix;
4621 PL_savestack_ix = cur_eval->u.eval.lastcp;
4622 regcppop(rex);
4623 PL_savestack_ix = tmpix;
4624
4625 st->u.eval.prev_eval = cur_eval;
4626 cur_eval = cur_eval->u.eval.prev_eval;
4627 DEBUG_EXECUTE_r(
2a49f0f5
JH
4628 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4629 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
faec1544
DM
4630 PUSH_YES_STATE_GOTO(EVAL_AB,
4631 st->u.eval.prev_eval->u.eval.B); /* match B */
4632 }
4633
3b0527fe 4634 if (locinput < reginfo->till) {
a3621e74 4635 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4636 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4637 PL_colors[4],
4638 (long)(locinput - PL_reg_starttry),
3b0527fe 4639 (long)(reginfo->till - PL_reg_starttry),
7821416a 4640 PL_colors[5]));
262b90c4 4641 sayNO_SILENT; /* Cannot match: too short. */
7821416a
IZ
4642 }
4643 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4644 sayYES; /* Success! */
dad79028
DM
4645
4646 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4647 DEBUG_EXECUTE_r(
4648 PerlIO_printf(Perl_debug_log,
4649 "%*s %ssubpattern success...%s\n",
5bc10b2c 4650 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
3280af22 4651 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4652 sayYES; /* Success! */
dad79028 4653
40a82448
DM
4654#undef ST
4655#define ST st->u.ifmatch
4656
4657 case SUSPEND: /* (?>A) */
4658 ST.wanted = 1;
9fe1d20c 4659 PL_reginput = locinput;
9041c2e3 4660 goto do_ifmatch;
dad79028 4661
40a82448
DM
4662 case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4663 ST.wanted = 0;
dad79028
DM
4664 goto ifmatch_trivial_fail_test;
4665
40a82448
DM
4666 case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4667 ST.wanted = 1;
dad79028 4668 ifmatch_trivial_fail_test:
a0ed51b3 4669 if (scan->flags) {
52657f30 4670 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4671 if (!s) {
4672 /* trivial fail */
24d3c4a9
DM
4673 if (logical) {
4674 logical = 0;
4675 sw = 1 - (bool)ST.wanted;
dad79028 4676 }
40a82448 4677 else if (ST.wanted)
dad79028
DM
4678 sayNO;
4679 next = scan + ARG(scan);
4680 if (next == scan)
4681 next = NULL;
4682 break;
4683 }
efb30f32 4684 PL_reginput = s;
a0ed51b3
LW
4685 }
4686 else
4687 PL_reginput = locinput;
4688
c277df42 4689 do_ifmatch:
40a82448 4690 ST.me = scan;
24d3c4a9 4691 ST.logical = logical;
40a82448
DM
4692 /* execute body of (?...A) */
4693 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4694 /* NOTREACHED */
4695
4696 case IFMATCH_A_fail: /* body of (?...A) failed */
4697 ST.wanted = !ST.wanted;
4698 /* FALL THROUGH */
4699
4700 case IFMATCH_A: /* body of (?...A) succeeded */
24d3c4a9
DM
4701 if (ST.logical) {
4702 sw = (bool)ST.wanted;
40a82448
DM
4703 }
4704 else if (!ST.wanted)
4705 sayNO;
4706
4707 if (OP(ST.me) == SUSPEND)
4708 locinput = PL_reginput;
4709 else {
4710 locinput = PL_reginput = st->locinput;
4711 nextchr = UCHARAT(locinput);
4712 }
4713 scan = ST.me + ARG(ST.me);
4714 if (scan == ST.me)
4715 scan = NULL;
4716 continue; /* execute B */
4717
4718#undef ST
dad79028 4719
c277df42 4720 case LONGJMP:
c277df42
IZ
4721 next = scan + ARG(scan);
4722 if (next == scan)
4723 next = NULL;
a0d0e21e 4724 break;
54612592 4725 case COMMIT:
e2e6a0f1
YO
4726 reginfo->cutpoint = PL_regeol;
4727 /* FALLTHROUGH */
5d458dd8 4728 case PRUNE:
24b23f37 4729 PL_reginput = locinput;
e2e6a0f1 4730 if (!scan->flags)
5d458dd8 4731 sv_yes_mark = sv_commit = (SV*)rex->data->data[ ARG( scan ) ];
54612592
YO
4732 PUSH_STATE_GOTO(COMMIT_next,next);
4733 /* NOTREACHED */
4734 case COMMIT_next_fail:
4735 no_final = 1;
4736 /* FALLTHROUGH */
7f69552c
YO
4737 case OPFAIL:
4738 sayNO;
e2e6a0f1
YO
4739 /* NOTREACHED */
4740
4741#define ST st->u.mark
4742 case MARKPOINT:
4743 ST.prev_mark = mark_state;
5d458dd8
YO
4744 ST.mark_name = sv_commit = sv_yes_mark
4745 = (SV*)rex->data->data[ ARG( scan ) ];
e2e6a0f1
YO
4746 mark_state = st;
4747 ST.mark_loc = PL_reginput = locinput;
4748 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4749 /* NOTREACHED */
4750 case MARKPOINT_next:
4751 mark_state = ST.prev_mark;
4752 sayYES;
4753 /* NOTREACHED */
4754 case MARKPOINT_next_fail:
5d458dd8 4755 if (popmark && sv_eq(ST.mark_name,popmark))
e2e6a0f1
YO
4756 {
4757 if (ST.mark_loc > startpoint)
4758 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4759 popmark = NULL; /* we found our mark */
4760 sv_commit = ST.mark_name;
4761
4762 DEBUG_EXECUTE_r({
5d458dd8 4763 PerlIO_printf(Perl_debug_log,
e2e6a0f1
YO
4764 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
4765 REPORT_CODE_OFF+depth*2, "",
4766 PL_colors[4], sv_commit, PL_colors[5]);
e2e6a0f1
YO
4767 });
4768 }
4769 mark_state = ST.prev_mark;
5d458dd8
YO
4770 sv_yes_mark = mark_state ?
4771 mark_state->u.mark.mark_name : NULL;
e2e6a0f1
YO
4772 sayNO;
4773 /* NOTREACHED */
5d458dd8
YO
4774 case SKIP:
4775 PL_reginput = locinput;
4776 if (scan->flags) {
4777 /* (*CUT) : if we fail we cut here*/
4778 ST.mark_name = NULL;
e2e6a0f1 4779 ST.mark_loc = locinput;
5d458dd8
YO
4780 PUSH_STATE_GOTO(SKIP_next,next);
4781 } else {
4782 /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was,
4783 otherwise do nothing. Meaning we need to scan
4784 */
4785 regmatch_state *cur = mark_state;
4786 SV *find = (SV*)rex->data->data[ ARG( scan ) ];
4787
4788 while (cur) {
4789 if ( sv_eq( cur->u.mark.mark_name,
4790 find ) )
4791 {
4792 ST.mark_name = find;
4793 PUSH_STATE_GOTO( SKIP_next, next );
4794 }
4795 cur = cur->u.mark.prev_mark;
4796 }
e2e6a0f1 4797 }
5d458dd8
YO
4798 /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
4799 break;
4800 case SKIP_next_fail:
4801 if (ST.mark_name) {
4802 /* (*CUT:NAME) - Set up to search for the name as we
4803 collapse the stack*/
4804 popmark = ST.mark_name;
4805 } else {
4806 /* (*CUT) - No name, we cut here.*/
e2e6a0f1
YO
4807 if (ST.mark_loc > startpoint)
4808 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5d458dd8
YO
4809 /* but we set sv_commit to latest mark_name if there
4810 is one so they can test to see how things lead to this
4811 cut */
4812 if (mark_state)
4813 sv_commit=mark_state->u.mark.mark_name;
4814 }
e2e6a0f1
YO
4815 no_final = 1;
4816 sayNO;
4817 /* NOTREACHED */
4818#undef ST
4819
a0d0e21e 4820 default:
b900a521 4821 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4822 PTR2UV(scan), OP(scan));
cea2e8a9 4823 Perl_croak(aTHX_ "regexp memory corruption");
5d458dd8
YO
4824
4825 } /* end switch */
95b24440 4826
5d458dd8
YO
4827 /* switch break jumps here */
4828 scan = next; /* prepare to execute the next op and ... */
4829 continue; /* ... jump back to the top, reusing st */
95b24440
DM
4830 /* NOTREACHED */
4831
40a82448
DM
4832 push_yes_state:
4833 /* push a state that backtracks on success */
4834 st->u.yes.prev_yes_state = yes_state;
4835 yes_state = st;
4836 /* FALL THROUGH */
4837 push_state:
4838 /* push a new regex state, then continue at scan */
4839 {
4840 regmatch_state *newst;
4841
24b23f37
YO
4842 DEBUG_STACK_r({
4843 regmatch_state *cur = st;
4844 regmatch_state *curyes = yes_state;
4845 int curd = depth;
4846 regmatch_slab *slab = PL_regmatch_slab;
4847 for (;curd > -1;cur--,curd--) {
4848 if (cur < SLAB_FIRST(slab)) {
4849 slab = slab->prev;
4850 cur = SLAB_LAST(slab);
4851 }
4852 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
4853 REPORT_CODE_OFF + 2 + depth * 2,"",
4854 curd, reg_name[cur->resume_state],
4855 (curyes == cur) ? "yes" : ""
4856 );
4857 if (curyes == cur)
4858 curyes = cur->u.yes.prev_yes_state;
4859 }
4860 } else
4861 DEBUG_STATE_pp("push")
4862 );
40a82448 4863 depth++;
40a82448
DM
4864 st->locinput = locinput;
4865 newst = st+1;
4866 if (newst > SLAB_LAST(PL_regmatch_slab))
4867 newst = S_push_slab(aTHX);
4868 PL_regmatch_state = newst;
786e8c11 4869
40a82448
DM
4870 locinput = PL_reginput;
4871 nextchr = UCHARAT(locinput);
4872 st = newst;
4873 continue;
4874 /* NOTREACHED */
4875 }
a0d0e21e 4876 }
a687059c 4877
a0d0e21e
LW
4878 /*
4879 * We get here only if there's trouble -- normally "case END" is
4880 * the terminating point.
4881 */
cea2e8a9 4882 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4883 /*NOTREACHED*/
4633a7c4
LW
4884 sayNO;
4885
262b90c4 4886yes:
77cb431f
DM
4887 if (yes_state) {
4888 /* we have successfully completed a subexpression, but we must now
4889 * pop to the state marked by yes_state and continue from there */
77cb431f 4890 assert(st != yes_state);
5bc10b2c
DM
4891#ifdef DEBUGGING
4892 while (st != yes_state) {
4893 st--;
4894 if (st < SLAB_FIRST(PL_regmatch_slab)) {
4895 PL_regmatch_slab = PL_regmatch_slab->prev;
4896 st = SLAB_LAST(PL_regmatch_slab);
4897 }
e2e6a0f1 4898 DEBUG_STATE_r({
54612592
YO
4899 if (no_final) {
4900 DEBUG_STATE_pp("pop (no final)");
4901 } else {
4902 DEBUG_STATE_pp("pop (yes)");
4903 }
e2e6a0f1 4904 });
5bc10b2c
DM
4905 depth--;
4906 }
4907#else
77cb431f
DM
4908 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4909 || yes_state > SLAB_LAST(PL_regmatch_slab))
4910 {
4911 /* not in this slab, pop slab */
4912 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4913 PL_regmatch_slab = PL_regmatch_slab->prev;
4914 st = SLAB_LAST(PL_regmatch_slab);
4915 }
4916 depth -= (st - yes_state);
5bc10b2c 4917#endif
77cb431f
DM
4918 st = yes_state;
4919 yes_state = st->u.yes.prev_yes_state;
4920 PL_regmatch_state = st;
24b23f37 4921
5d458dd8
YO
4922 if (no_final) {
4923 locinput= st->locinput;
4924 nextchr = UCHARAT(locinput);
4925 }
54612592 4926 state_num = st->resume_state + no_final;
24d3c4a9 4927 goto reenter_switch;
77cb431f
DM
4928 }
4929
a3621e74 4930 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4931 PL_colors[4], PL_colors[5]));
02db2b7b 4932
95b24440 4933 result = 1;
aa283a38 4934 goto final_exit;
4633a7c4
LW
4935
4936no:
a3621e74 4937 DEBUG_EXECUTE_r(
7821416a 4938 PerlIO_printf(Perl_debug_log,
786e8c11 4939 "%*s %sfailed...%s\n",
5bc10b2c 4940 REPORT_CODE_OFF+depth*2, "",
786e8c11 4941 PL_colors[4], PL_colors[5])
7821416a 4942 );
aa283a38 4943
262b90c4 4944no_silent:
54612592
YO
4945 if (no_final) {
4946 if (yes_state) {
4947 goto yes;
4948 } else {
4949 goto final_exit;
4950 }
4951 }
aa283a38
DM
4952 if (depth) {
4953 /* there's a previous state to backtrack to */
40a82448
DM
4954 st--;
4955 if (st < SLAB_FIRST(PL_regmatch_slab)) {
4956 PL_regmatch_slab = PL_regmatch_slab->prev;
4957 st = SLAB_LAST(PL_regmatch_slab);
4958 }
4959 PL_regmatch_state = st;
40a82448
DM
4960 locinput= st->locinput;
4961 nextchr = UCHARAT(locinput);
4962
5bc10b2c
DM
4963 DEBUG_STATE_pp("pop");
4964 depth--;
262b90c4
DM
4965 if (yes_state == st)
4966 yes_state = st->u.yes.prev_yes_state;
5bc10b2c 4967
24d3c4a9
DM
4968 state_num = st->resume_state + 1; /* failure = success + 1 */
4969 goto reenter_switch;
95b24440 4970 }
24d3c4a9 4971 result = 0;
aa283a38 4972
262b90c4 4973 final_exit:
e2e6a0f1 4974 if (rex->reganch & ROPT_VERBARG_SEEN) {
5d458dd8
YO
4975 SV *sv_err = get_sv("REGERROR", 1);
4976 SV *sv_mrk = get_sv("REGMARK", 1);
4977 if (result) {
e2e6a0f1 4978 sv_commit = &PL_sv_no;
5d458dd8
YO
4979 if (!sv_yes_mark)
4980 sv_yes_mark = &PL_sv_yes;
4981 } else {
4982 if (!sv_commit)
4983 sv_commit = &PL_sv_yes;
4984 sv_yes_mark = &PL_sv_no;
4985 }
4986 sv_setsv(sv_err, sv_commit);
4987 sv_setsv(sv_mrk, sv_yes_mark);
e2e6a0f1 4988 }
5d9a96ca
DM
4989 /* restore original high-water mark */
4990 PL_regmatch_slab = orig_slab;
4991 PL_regmatch_state = orig_state;
4992
4993 /* free all slabs above current one */
4994 if (orig_slab->next) {
c4fd8992 4995 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
4996 orig_slab->next = NULL;
4997 while (sl) {
c4fd8992 4998 regmatch_slab * const osl = sl;
5d9a96ca 4999 sl = sl->next;
ad65c075 5000 Safefree(osl);
5d9a96ca
DM
5001 }
5002 }
5003
95b24440 5004 return result;
a687059c
LW
5005}
5006
5007/*
5008 - regrepeat - repeatedly match something simple, report how many
5009 */
5010/*
5011 * [This routine now assumes that it will only match on things of length 1.
5012 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 5013 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 5014 */
76e3520e 5015STATIC I32
e2e6a0f1 5016S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
a687059c 5017{
27da23d5 5018 dVAR;
a0d0e21e 5019 register char *scan;
a0d0e21e 5020 register I32 c;
3280af22 5021 register char *loceol = PL_regeol;
a0ed51b3 5022 register I32 hardcount = 0;
53c4c00c 5023 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 5024
3280af22 5025 scan = PL_reginput;
faf11cac
HS
5026 if (max == REG_INFTY)
5027 max = I32_MAX;
5028 else if (max < loceol - scan)
7f596f4c 5029 loceol = scan + max;
a0d0e21e 5030 switch (OP(p)) {
22c35a8c 5031 case REG_ANY:
1aa99e6b 5032 if (do_utf8) {
ffc61ed2 5033 loceol = PL_regeol;
1aa99e6b 5034 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
5035 scan += UTF8SKIP(scan);
5036 hardcount++;
5037 }
5038 } else {
5039 while (scan < loceol && *scan != '\n')
5040 scan++;
a0ed51b3
LW
5041 }
5042 break;
ffc61ed2 5043 case SANY:
def8e4ea
JH
5044 if (do_utf8) {
5045 loceol = PL_regeol;
a0804c9e 5046 while (scan < loceol && hardcount < max) {
def8e4ea
JH
5047 scan += UTF8SKIP(scan);
5048 hardcount++;
5049 }
5050 }
5051 else
5052 scan = loceol;
a0ed51b3 5053 break;
f33976b4
DB
5054 case CANY:
5055 scan = loceol;
5056 break;
090f7165
JH
5057 case EXACT: /* length of string is 1 */
5058 c = (U8)*STRING(p);
5059 while (scan < loceol && UCHARAT(scan) == c)
5060 scan++;
bbce6d69 5061 break;
5062 case EXACTF: /* length of string is 1 */
cd439c50 5063 c = (U8)*STRING(p);
bbce6d69 5064 while (scan < loceol &&
22c35a8c 5065 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 5066 scan++;
5067 break;
5068 case EXACTFL: /* length of string is 1 */
3280af22 5069 PL_reg_flags |= RF_tainted;
cd439c50 5070 c = (U8)*STRING(p);
bbce6d69 5071 while (scan < loceol &&
22c35a8c 5072 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
5073 scan++;
5074 break;
5075 case ANYOF:
ffc61ed2
JH
5076 if (do_utf8) {
5077 loceol = PL_regeol;
cfc92286 5078 while (hardcount < max && scan < loceol &&
32fc9b6a 5079 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
5080 scan += UTF8SKIP(scan);
5081 hardcount++;
5082 }
5083 } else {
32fc9b6a 5084 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
5085 scan++;
5086 }
a0d0e21e
LW
5087 break;
5088 case ALNUM:
1aa99e6b 5089 if (do_utf8) {
ffc61ed2 5090 loceol = PL_regeol;
1a4fad37 5091 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5092 while (hardcount < max && scan < loceol &&
3568d838 5093 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5094 scan += UTF8SKIP(scan);
5095 hardcount++;
5096 }
5097 } else {
5098 while (scan < loceol && isALNUM(*scan))
5099 scan++;
a0ed51b3
LW
5100 }
5101 break;
bbce6d69 5102 case ALNUML:
3280af22 5103 PL_reg_flags |= RF_tainted;
1aa99e6b 5104 if (do_utf8) {
ffc61ed2 5105 loceol = PL_regeol;
1aa99e6b
IH
5106 while (hardcount < max && scan < loceol &&
5107 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5108 scan += UTF8SKIP(scan);
5109 hardcount++;
5110 }
5111 } else {
5112 while (scan < loceol && isALNUM_LC(*scan))
5113 scan++;
a0ed51b3
LW
5114 }
5115 break;
a0d0e21e 5116 case NALNUM:
1aa99e6b 5117 if (do_utf8) {
ffc61ed2 5118 loceol = PL_regeol;
1a4fad37 5119 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5120 while (hardcount < max && scan < loceol &&
3568d838 5121 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5122 scan += UTF8SKIP(scan);
5123 hardcount++;
5124 }
5125 } else {
5126 while (scan < loceol && !isALNUM(*scan))
5127 scan++;
a0ed51b3
LW
5128 }
5129 break;
bbce6d69 5130 case NALNUML:
3280af22 5131 PL_reg_flags |= RF_tainted;
1aa99e6b 5132 if (do_utf8) {
ffc61ed2 5133 loceol = PL_regeol;
1aa99e6b
IH
5134 while (hardcount < max && scan < loceol &&
5135 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5136 scan += UTF8SKIP(scan);
5137 hardcount++;
5138 }
5139 } else {
5140 while (scan < loceol && !isALNUM_LC(*scan))
5141 scan++;
a0ed51b3
LW
5142 }
5143 break;
a0d0e21e 5144 case SPACE:
1aa99e6b 5145 if (do_utf8) {
ffc61ed2 5146 loceol = PL_regeol;
1a4fad37 5147 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5148 while (hardcount < max && scan < loceol &&
3568d838
JH
5149 (*scan == ' ' ||
5150 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5151 scan += UTF8SKIP(scan);
5152 hardcount++;
5153 }
5154 } else {
5155 while (scan < loceol && isSPACE(*scan))
5156 scan++;
a0ed51b3
LW
5157 }
5158 break;
bbce6d69 5159 case SPACEL:
3280af22 5160 PL_reg_flags |= RF_tainted;
1aa99e6b 5161 if (do_utf8) {
ffc61ed2 5162 loceol = PL_regeol;
1aa99e6b 5163 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5164 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5165 scan += UTF8SKIP(scan);
5166 hardcount++;
5167 }
5168 } else {
5169 while (scan < loceol && isSPACE_LC(*scan))
5170 scan++;
a0ed51b3
LW
5171 }
5172 break;
a0d0e21e 5173 case NSPACE:
1aa99e6b 5174 if (do_utf8) {
ffc61ed2 5175 loceol = PL_regeol;
1a4fad37 5176 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5177 while (hardcount < max && scan < loceol &&
3568d838
JH
5178 !(*scan == ' ' ||
5179 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5180 scan += UTF8SKIP(scan);
5181 hardcount++;
5182 }
5183 } else {
5184 while (scan < loceol && !isSPACE(*scan))
5185 scan++;
5186 break;
a0ed51b3 5187 }
bbce6d69 5188 case NSPACEL:
3280af22 5189 PL_reg_flags |= RF_tainted;
1aa99e6b 5190 if (do_utf8) {
ffc61ed2 5191 loceol = PL_regeol;
1aa99e6b 5192 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5193 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5194 scan += UTF8SKIP(scan);
5195 hardcount++;
5196 }
5197 } else {
5198 while (scan < loceol && !isSPACE_LC(*scan))
5199 scan++;
a0ed51b3
LW
5200 }
5201 break;
a0d0e21e 5202 case DIGIT:
1aa99e6b 5203 if (do_utf8) {
ffc61ed2 5204 loceol = PL_regeol;
1a4fad37 5205 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5206 while (hardcount < max && scan < loceol &&
3568d838 5207 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5208 scan += UTF8SKIP(scan);
5209 hardcount++;
5210 }
5211 } else {
5212 while (scan < loceol && isDIGIT(*scan))
5213 scan++;
a0ed51b3
LW
5214 }
5215 break;
a0d0e21e 5216 case NDIGIT:
1aa99e6b 5217 if (do_utf8) {
ffc61ed2 5218 loceol = PL_regeol;
1a4fad37 5219 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5220 while (hardcount < max && scan < loceol &&
3568d838 5221 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5222 scan += UTF8SKIP(scan);
5223 hardcount++;
5224 }
5225 } else {
5226 while (scan < loceol && !isDIGIT(*scan))
5227 scan++;
a0ed51b3
LW
5228 }
5229 break;
a0d0e21e
LW
5230 default: /* Called on something of 0 width. */
5231 break; /* So match right here or not at all. */
5232 }
a687059c 5233
a0ed51b3
LW
5234 if (hardcount)
5235 c = hardcount;
5236 else
5237 c = scan - PL_reginput;
3280af22 5238 PL_reginput = scan;
a687059c 5239
a3621e74 5240 DEBUG_r({
e68ec53f 5241 GET_RE_DEBUG_FLAGS_DECL;
be8e71aa 5242 DEBUG_EXECUTE_r({
e68ec53f
YO
5243 SV * const prop = sv_newmortal();
5244 regprop(prog, prop, p);
5245 PerlIO_printf(Perl_debug_log,
be8e71aa 5246 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
e2e6a0f1 5247 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
a3621e74 5248 });
be8e71aa 5249 });
9041c2e3 5250
a0d0e21e 5251 return(c);
a687059c
LW
5252}
5253
c277df42 5254
be8e71aa 5255#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
c277df42 5256/*
ffc61ed2
JH
5257- regclass_swash - prepare the utf8 swash
5258*/
5259
5260SV *
32fc9b6a 5261Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 5262{
97aff369 5263 dVAR;
9e55ce06
JH
5264 SV *sw = NULL;
5265 SV *si = NULL;
5266 SV *alt = NULL;
3dab1dad 5267 const struct reg_data * const data = prog ? prog->data : NULL;
ffc61ed2 5268
4f639d21 5269 if (data && data->count) {
a3b680e6 5270 const U32 n = ARG(node);
ffc61ed2 5271
4f639d21
DM
5272 if (data->what[n] == 's') {
5273 SV * const rv = (SV*)data->data[n];
890ce7af 5274 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 5275 SV **const ary = AvARRAY(av);
9e55ce06 5276 SV **a, **b;
9041c2e3 5277
711a919c 5278 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
5279 * documentation of these array elements. */
5280
b11f357e 5281 si = *ary;
8f7f7219 5282 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
5283 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5284
ffc61ed2
JH
5285 if (a)
5286 sw = *a;
5287 else if (si && doinit) {
5288 sw = swash_init("utf8", "", si, 1, 0);
5289 (void)av_store(av, 1, sw);
5290 }
9e55ce06
JH
5291 if (b)
5292 alt = *b;
ffc61ed2
JH
5293 }
5294 }
5295
9e55ce06
JH
5296 if (listsvp)
5297 *listsvp = si;
5298 if (altsvp)
5299 *altsvp = alt;
ffc61ed2
JH
5300
5301 return sw;
5302}
76234dfb 5303#endif
ffc61ed2
JH
5304
5305/*
ba7b4546 5306 - reginclass - determine if a character falls into a character class
832705d4
JH
5307
5308 The n is the ANYOF regnode, the p is the target string, lenp
5309 is pointer to the maximum length of how far to go in the p
5310 (if the lenp is zero, UTF8SKIP(p) is used),
5311 do_utf8 tells whether the target string is in UTF-8.
5312
bbce6d69 5313 */
5314
76e3520e 5315STATIC bool
32fc9b6a 5316S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 5317{
27da23d5 5318 dVAR;
a3b680e6 5319 const char flags = ANYOF_FLAGS(n);
bbce6d69 5320 bool match = FALSE;
cc07378b 5321 UV c = *p;
ae9ddab8 5322 STRLEN len = 0;
9e55ce06 5323 STRLEN plen;
1aa99e6b 5324
19f67299
TS
5325 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5326 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
5327 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5328 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
e8a70c6f
SP
5329 if (len == (STRLEN)-1)
5330 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
19f67299 5331 }
bbce6d69 5332
0f0076b4 5333 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5334 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5335 if (lenp)
5336 *lenp = 0;
ffc61ed2 5337 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5338 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5339 match = TRUE;
bbce6d69 5340 }
3568d838 5341 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5342 match = TRUE;
ffc61ed2 5343 if (!match) {
9e55ce06 5344 AV *av;
32fc9b6a 5345 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5346
5347 if (sw) {
3568d838 5348 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5349 match = TRUE;
5350 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5351 if (!match && lenp && av) {
5352 I32 i;
9e55ce06 5353 for (i = 0; i <= av_len(av); i++) {
890ce7af 5354 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5355 STRLEN len;
890ce7af 5356 const char * const s = SvPV_const(sv, len);
9e55ce06 5357
061b10df 5358 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5359 *lenp = len;
5360 match = TRUE;
5361 break;
5362 }
5363 }
5364 }
5365 if (!match) {
89ebb4a3 5366 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5367 STRLEN tmplen;
5368
9e55ce06
JH
5369 to_utf8_fold(p, tmpbuf, &tmplen);
5370 if (swash_fetch(sw, tmpbuf, do_utf8))
5371 match = TRUE;
5372 }
ffc61ed2
JH
5373 }
5374 }
bbce6d69 5375 }
9e55ce06 5376 if (match && lenp && *lenp == 0)
0f0076b4 5377 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5378 }
1aa99e6b 5379 if (!match && c < 256) {
ffc61ed2
JH
5380 if (ANYOF_BITMAP_TEST(n, c))
5381 match = TRUE;
5382 else if (flags & ANYOF_FOLD) {
eb160463 5383 U8 f;
a0ed51b3 5384
ffc61ed2
JH
5385 if (flags & ANYOF_LOCALE) {
5386 PL_reg_flags |= RF_tainted;
5387 f = PL_fold_locale[c];
5388 }
5389 else
5390 f = PL_fold[c];
5391 if (f != c && ANYOF_BITMAP_TEST(n, f))
5392 match = TRUE;
5393 }
5394
5395 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5396 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5397 if (
5398 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5399 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5400 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5401 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5402 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5403 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5404 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5405 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5406 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5407 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5408 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5409 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5410 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5411 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5412 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5413 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5414 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5415 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5416 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5417 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5418 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5419 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5420 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5421 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5422 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5423 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5424 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5425 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5426 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5427 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5428 ) /* How's that for a conditional? */
5429 {
5430 match = TRUE;
5431 }
a0ed51b3 5432 }
a0ed51b3
LW
5433 }
5434
a0ed51b3
LW
5435 return (flags & ANYOF_INVERT) ? !match : match;
5436}
161b471a 5437
dfe13c55 5438STATIC U8 *
0ce71af7 5439S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5440{
97aff369 5441 dVAR;
a0ed51b3 5442 if (off >= 0) {
1aa99e6b 5443 while (off-- && s < lim) {
ffc61ed2 5444 /* XXX could check well-formedness here */
a0ed51b3 5445 s += UTF8SKIP(s);
ffc61ed2 5446 }
a0ed51b3
LW
5447 }
5448 else {
1de06328
YO
5449 while (off++ && s > lim) {
5450 s--;
5451 if (UTF8_IS_CONTINUED(*s)) {
5452 while (s > lim && UTF8_IS_CONTINUATION(*s))
5453 s--;
a0ed51b3 5454 }
1de06328 5455 /* XXX could check well-formedness here */
a0ed51b3
LW
5456 }
5457 }
5458 return s;
5459}
161b471a 5460
f9f4320a
YO
5461#ifdef XXX_dmq
5462/* there are a bunch of places where we use two reghop3's that should
5463 be replaced with this routine. but since thats not done yet
5464 we ifdef it out - dmq
5465*/
dfe13c55 5466STATIC U8 *
1de06328
YO
5467S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5468{
5469 dVAR;
5470 if (off >= 0) {
5471 while (off-- && s < rlim) {
5472 /* XXX could check well-formedness here */
5473 s += UTF8SKIP(s);
5474 }
5475 }
5476 else {
5477 while (off++ && s > llim) {
5478 s--;
5479 if (UTF8_IS_CONTINUED(*s)) {
5480 while (s > llim && UTF8_IS_CONTINUATION(*s))
5481 s--;
5482 }
5483 /* XXX could check well-formedness here */
5484 }
5485 }
5486 return s;
5487}
f9f4320a 5488#endif
1de06328
YO
5489
5490STATIC U8 *
0ce71af7 5491S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5492{
97aff369 5493 dVAR;
a0ed51b3 5494 if (off >= 0) {
1aa99e6b 5495 while (off-- && s < lim) {
ffc61ed2 5496 /* XXX could check well-formedness here */
a0ed51b3 5497 s += UTF8SKIP(s);
ffc61ed2 5498 }
a0ed51b3 5499 if (off >= 0)
3dab1dad 5500 return NULL;
a0ed51b3
LW
5501 }
5502 else {
1de06328
YO
5503 while (off++ && s > lim) {
5504 s--;
5505 if (UTF8_IS_CONTINUED(*s)) {
5506 while (s > lim && UTF8_IS_CONTINUATION(*s))
5507 s--;
a0ed51b3 5508 }
1de06328 5509 /* XXX could check well-formedness here */
a0ed51b3
LW
5510 }
5511 if (off <= 0)
3dab1dad 5512 return NULL;
a0ed51b3
LW
5513 }
5514 return s;
5515}
51371543 5516
51371543 5517static void
acfe0abc 5518restore_pos(pTHX_ void *arg)
51371543 5519{
97aff369 5520 dVAR;
097eb12c 5521 regexp * const rex = (regexp *)arg;
51371543
GS
5522 if (PL_reg_eval_set) {
5523 if (PL_reg_oldsaved) {
4f639d21
DM
5524 rex->subbeg = PL_reg_oldsaved;
5525 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5526#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5527 rex->saved_copy = PL_nrs;
ed252734 5528#endif
4f639d21 5529 RX_MATCH_COPIED_on(rex);
51371543
GS
5530 }
5531 PL_reg_magic->mg_len = PL_reg_oldpos;
5532 PL_reg_eval_set = 0;
5533 PL_curpm = PL_reg_oldcurpm;
5534 }
5535}
33b8afdf
JH
5536
5537STATIC void
5538S_to_utf8_substr(pTHX_ register regexp *prog)
5539{
33b8afdf 5540 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5541 SV* const sv = newSVsv(prog->float_substr);
5542 prog->float_utf8 = sv;
33b8afdf
JH
5543 sv_utf8_upgrade(sv);
5544 if (SvTAIL(prog->float_substr))
5545 SvTAIL_on(sv);
5546 if (prog->float_substr == prog->check_substr)
5547 prog->check_utf8 = sv;
5548 }
5549 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5550 SV* const sv = newSVsv(prog->anchored_substr);
5551 prog->anchored_utf8 = sv;
33b8afdf
JH
5552 sv_utf8_upgrade(sv);
5553 if (SvTAIL(prog->anchored_substr))
5554 SvTAIL_on(sv);
5555 if (prog->anchored_substr == prog->check_substr)
5556 prog->check_utf8 = sv;
5557 }
5558}
5559
5560STATIC void
5561S_to_byte_substr(pTHX_ register regexp *prog)
5562{
97aff369 5563 dVAR;
33b8afdf 5564 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5565 SV* sv = newSVsv(prog->float_utf8);
5566 prog->float_substr = sv;
33b8afdf
JH
5567 if (sv_utf8_downgrade(sv, TRUE)) {
5568 if (SvTAIL(prog->float_utf8))
5569 SvTAIL_on(sv);
5570 } else {
5571 SvREFCNT_dec(sv);
5572 prog->float_substr = sv = &PL_sv_undef;
5573 }
5574 if (prog->float_utf8 == prog->check_utf8)
5575 prog->check_substr = sv;
5576 }
5577 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5578 SV* sv = newSVsv(prog->anchored_utf8);
5579 prog->anchored_substr = sv;
33b8afdf
JH
5580 if (sv_utf8_downgrade(sv, TRUE)) {
5581 if (SvTAIL(prog->anchored_utf8))
5582 SvTAIL_on(sv);
5583 } else {
5584 SvREFCNT_dec(sv);
5585 prog->anchored_substr = sv = &PL_sv_undef;
5586 }
5587 if (prog->anchored_utf8 == prog->check_utf8)
5588 prog->check_substr = sv;
5589 }
5590}
66610fdd
RGS
5591
5592/*
5593 * Local variables:
5594 * c-indentation-style: bsd
5595 * c-basic-offset: 4
5596 * indent-tabs-mode: t
5597 * End:
5598 *
37442d52
RGS
5599 * ex: set ts=8 sts=4 sw=4 noet:
5600 */