This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Module::Pluggable diff had some hidden binary files in it. I'm
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
166f8a29
DM
16 */
17
a687059c
LW
18/* NOTE: this is derived from Henry Spencer's regexp code, and should not
19 * confused with the original package (see point 3 below). Thanks, Henry!
20 */
21
22/* Additional note: this code is very heavily munged from Henry's version
23 * in places. In some spots I've traded clarity for efficiency, so don't
24 * blame Henry for some of the lack of readability.
25 */
26
e50aee73
AD
27/* The names of the functions have been changed from regcomp and
28 * regexec to pregcomp and pregexec in order to avoid conflicts
29 * with the POSIX routines of the same names.
30*/
31
b9d5759e 32#ifdef PERL_EXT_RE_BUILD
54df2634 33#include "re_top.h"
9041c2e3 34#endif
56953603 35
a687059c 36/*
e50aee73 37 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
38 *
39 * Copyright (c) 1986 by University of Toronto.
40 * Written by Henry Spencer. Not derived from licensed software.
41 *
42 * Permission is granted to anyone to use this software for any
43 * purpose on any computer system, and to redistribute it freely,
44 * subject to the following restrictions:
45 *
46 * 1. The author is not responsible for the consequences of use of
47 * this software, no matter how awful, even if they arise
48 * from defects in it.
49 *
50 * 2. The origin of this software must not be misrepresented, either
51 * by explicit claim or by omission.
52 *
53 * 3. Altered versions must be plainly marked as such, and must not
54 * be misrepresented as being the original software.
55 *
56 **** Alterations to Henry's code are...
57 ****
4bb101f2 58 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 59 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 60 ****
9ef589d8
LW
61 **** You may distribute under the terms of either the GNU General Public
62 **** License or the Artistic License, as specified in the README file.
a687059c
LW
63 *
64 * Beware that some of this code is subtly aware of the way operator
65 * precedence is structured in regular expressions. Serious changes in
66 * regular-expression syntax might require a total rethink.
67 */
68#include "EXTERN.h"
864dbfa3 69#define PERL_IN_REGEXEC_C
a687059c 70#include "perl.h"
0f5d15d6 71
54df2634
NC
72#ifdef PERL_IN_XSUB_RE
73# include "re_comp.h"
74#else
75# include "regcomp.h"
76#endif
a687059c 77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
faec1544 80
ab3bbdeb 81#define RF_utf8 8 /* Pattern contains multibyte chars? */
a0ed51b3 82
eb160463 83#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
32fc9b6a 92#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 93
c277df42
IZ
94/*
95 * Forwards.
96 */
97
33b8afdf 98#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 99#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 100
3dab1dad
YO
101#define HOPc(pos,off) \
102 (char *)(PL_reg_match_utf8 \
52657f30 103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
3dab1dad
YO
104 : (U8*)(pos + off))
105#define HOPBACKc(pos, off) \
07be1b83
YO
106 (char*)(PL_reg_match_utf8\
107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
108 : (pos - off >= PL_bostr) \
8e11feef 109 ? (U8*)pos - off \
3dab1dad 110 : NULL)
efb30f32 111
e7409c1b 112#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
1aa99e6b 113#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 114
1a4fad37
AL
115#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
116 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
117#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
118#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
119#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
120#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 121
3dab1dad
YO
122/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
123
5f80c4cf 124/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
125#define JUMPABLE(rn) ( \
126 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
127 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
128 OP(rn) == PLUS || OP(rn) == MINMOD || \
3dab1dad 129 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
130)
131
cca55fe3 132#define HAS_TEXT(rn) ( \
3dab1dad 133 PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
cca55fe3 134)
e2d8ce26 135
a84d97b6
HS
136/*
137 Search for mandatory following text node; for lookahead, the text must
138 follow but for lookbehind (rn->flags != 0) we skip to the next step.
139*/
cca55fe3 140#define FIND_NEXT_IMPT(rn) STMT_START { \
3dab1dad
YO
141 while (JUMPABLE(rn)) { \
142 const OPCODE type = OP(rn); \
143 if (type == SUSPEND || PL_regkind[type] == CURLY) \
e2d8ce26 144 rn = NEXTOPER(NEXTOPER(rn)); \
3dab1dad 145 else if (type == PLUS) \
cca55fe3 146 rn = NEXTOPER(rn); \
3dab1dad 147 else if (type == IFMATCH) \
a84d97b6 148 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 149 else rn += NEXT_OFF(rn); \
3dab1dad 150 } \
5f80c4cf 151} STMT_END
74750237 152
c476f425 153
acfe0abc 154static void restore_pos(pTHX_ void *arg);
51371543 155
76e3520e 156STATIC CHECKPOINT
cea2e8a9 157S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 158{
97aff369 159 dVAR;
a3b680e6 160 const int retval = PL_savestack_ix;
b1ce53c5 161#define REGCP_PAREN_ELEMS 4
a3b680e6 162 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e 163 int p;
40a82448 164 GET_RE_DEBUG_FLAGS_DECL;
a0d0e21e 165
e49a9654
IH
166 if (paren_elems_to_push < 0)
167 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
168
6bda09f9 169#define REGCP_OTHER_ELEMS 8
4b3c1a47 170 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
7f69552c 171
3280af22 172 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 173/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
174 SSPUSHINT(PL_regendp[p]);
175 SSPUSHINT(PL_regstartp[p]);
3280af22 176 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 177 SSPUSHINT(p);
40a82448
DM
178 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
179 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
180 (UV)p, (IV)PL_regstartp[p],
181 (IV)(PL_reg_start_tmp[p] - PL_bostr),
182 (IV)PL_regendp[p]
183 ));
a0d0e21e 184 }
b1ce53c5 185/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
6bda09f9
YO
186 SSPUSHPTR(PL_regstartp);
187 SSPUSHPTR(PL_regendp);
3280af22
NIS
188 SSPUSHINT(PL_regsize);
189 SSPUSHINT(*PL_reglastparen);
a01268b5 190 SSPUSHINT(*PL_reglastcloseparen);
3280af22 191 SSPUSHPTR(PL_reginput);
41123dfd
JH
192#define REGCP_FRAME_ELEMS 2
193/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
194 * are needed for the regexp context stack bookkeeping. */
195 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 196 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 197
a0d0e21e
LW
198 return retval;
199}
200
c277df42 201/* These are needed since we do not localize EVAL nodes: */
ab3bbdeb
YO
202#define REGCP_SET(cp) \
203 DEBUG_STATE_r( \
ab3bbdeb 204 PerlIO_printf(Perl_debug_log, \
e4f74956 205 " Setting an EVAL scope, savestack=%"IVdf"\n", \
ab3bbdeb
YO
206 (IV)PL_savestack_ix)); \
207 cp = PL_savestack_ix
c3464db5 208
ab3bbdeb 209#define REGCP_UNWIND(cp) \
e4f74956 210 DEBUG_STATE_r( \
ab3bbdeb 211 if (cp != PL_savestack_ix) \
e4f74956
YO
212 PerlIO_printf(Perl_debug_log, \
213 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
ab3bbdeb
YO
214 (IV)(cp), (IV)PL_savestack_ix)); \
215 regcpblow(cp)
c277df42 216
76e3520e 217STATIC char *
097eb12c 218S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 219{
97aff369 220 dVAR;
3b6647e0 221 U32 i;
a0d0e21e 222 char *input;
b1ce53c5 223
a3621e74
YO
224 GET_RE_DEBUG_FLAGS_DECL;
225
b1ce53c5 226 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 227 i = SSPOPINT;
b1ce53c5
JH
228 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
229 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 230 input = (char *) SSPOPPTR;
a01268b5 231 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
232 *PL_reglastparen = SSPOPINT;
233 PL_regsize = SSPOPINT;
6bda09f9
YO
234 PL_regendp=(I32 *) SSPOPPTR;
235 PL_regstartp=(I32 *) SSPOPPTR;
b1ce53c5 236
6bda09f9 237
b1ce53c5 238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 241 I32 tmps;
097eb12c 242 U32 paren = (U32)SSPOPINT;
3280af22 243 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
244 PL_regstartp[paren] = SSPOPINT;
245 tmps = SSPOPINT;
3280af22
NIS
246 if (paren <= *PL_reglastparen)
247 PL_regendp[paren] = tmps;
a3621e74 248 DEBUG_EXECUTE_r(
c3464db5 249 PerlIO_printf(Perl_debug_log,
b900a521 250 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 251 (UV)paren, (IV)PL_regstartp[paren],
b900a521 252 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 253 (IV)PL_regendp[paren],
3280af22 254 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 255 );
a0d0e21e 256 }
a3621e74 257 DEBUG_EXECUTE_r(
bb7a0f54 258 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 259 PerlIO_printf(Perl_debug_log,
faccc32b 260 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 261 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
262 }
263 );
daf18116 264#if 1
dafc8851
JH
265 /* It would seem that the similar code in regtry()
266 * already takes care of this, and in fact it is in
267 * a better location to since this code can #if 0-ed out
268 * but the code in regtry() is needed or otherwise tests
269 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
270 * (as of patchlevel 7877) will fail. Then again,
271 * this code seems to be necessary or otherwise
272 * building DynaLoader will fail:
273 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274 * --jhi */
3b6647e0 275 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
097eb12c
AL
276 if (i > PL_regsize)
277 PL_regstartp[i] = -1;
278 PL_regendp[i] = -1;
a0d0e21e 279 }
dafc8851 280#endif
a0d0e21e
LW
281 return input;
282}
283
02db2b7b 284#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 285
a687059c 286/*
e50aee73 287 * pregexec and friends
a687059c
LW
288 */
289
76234dfb 290#ifndef PERL_IN_XSUB_RE
a687059c 291/*
c277df42 292 - pregexec - match a regexp against a string
a687059c 293 */
c277df42 294I32
864dbfa3 295Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 296 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
297/* strend: pointer to null at end of string */
298/* strbeg: real beginning of string */
299/* minend: end of match must be >=minend after stringarg. */
300/* nosave: For optimizations. */
301{
302 return
9041c2e3 303 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
304 nosave ? 0 : REXEC_COPY_STR);
305}
76234dfb 306#endif
22e551b9 307
9041c2e3 308/*
cad2e5aa
JH
309 * Need to implement the following flags for reg_anch:
310 *
311 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
312 * USE_INTUIT_ML
313 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
314 * INTUIT_AUTORITATIVE_ML
315 * INTUIT_ONCE_NOML - Intuit can match in one location only.
316 * INTUIT_ONCE_ML
317 *
318 * Another flag for this function: SECOND_TIME (so that float substrs
319 * with giant delta may be not rechecked).
320 */
321
322/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
323
3f7c398e 324/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
325 Otherwise, only SvCUR(sv) is used to get strbeg. */
326
327/* XXXX We assume that strpos is strbeg unless sv. */
328
6eb5f6b9
JH
329/* XXXX Some places assume that there is a fixed substring.
330 An update may be needed if optimizer marks as "INTUITable"
331 RExen without fixed substrings. Similarly, it is assumed that
332 lengths of all the strings are no more than minlen, thus they
333 cannot come from lookahead.
40d049e4
YO
334 (Or minlen should take into account lookahead.)
335 NOTE: Some of this comment is not correct. minlen does now take account
336 of lookahead/behind. Further research is required. -- demerphq
337
338*/
6eb5f6b9 339
2c2d71f5
JH
340/* A failure to find a constant substring means that there is no need to make
341 an expensive call to REx engine, thus we celebrate a failure. Similarly,
342 finding a substring too deep into the string means that less calls to
30944b6d
IZ
343 regtry() should be needed.
344
345 REx compiler's optimizer found 4 possible hints:
346 a) Anchored substring;
347 b) Fixed substring;
348 c) Whether we are anchored (beginning-of-line or \G);
349 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 350 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
351 string which does not contradict any of them.
352 */
2c2d71f5 353
6eb5f6b9
JH
354/* Most of decisions we do here should have been done at compile time.
355 The nodes of the REx which we used for the search should have been
356 deleted from the finite automaton. */
357
cad2e5aa
JH
358char *
359Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
360 char *strend, U32 flags, re_scream_pos_data *data)
361{
97aff369 362 dVAR;
b7953727 363 register I32 start_shift = 0;
cad2e5aa 364 /* Should be nonnegative! */
b7953727 365 register I32 end_shift = 0;
2c2d71f5
JH
366 register char *s;
367 register SV *check;
a1933d95 368 char *strbeg;
cad2e5aa 369 char *t;
1de06328 370 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
cad2e5aa 371 I32 ml_anch;
bd61b366
SS
372 register char *other_last = NULL; /* other substr checked before this */
373 char *check_at = NULL; /* check substr found at this pos */
bbe252da 374 const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
30944b6d 375#ifdef DEBUGGING
890ce7af 376 const char * const i_strpos = strpos;
30944b6d 377#endif
a3621e74
YO
378
379 GET_RE_DEBUG_FLAGS_DECL;
380
a30b2f1f 381 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 382
bbe252da 383 if (prog->extflags & RXf_UTF8) {
b8d68ded
JH
384 PL_reg_flags |= RF_utf8;
385 }
ab3bbdeb
YO
386 DEBUG_EXECUTE_r(
387 debug_start_match(prog, do_utf8, strpos, strend,
1de06328
YO
388 sv ? "Guessing start of match in sv for"
389 : "Guessing start of match in string for");
2a782b5b 390 );
cad2e5aa 391
c344f387
JH
392 /* CHR_DIST() would be more correct here but it makes things slow. */
393 if (prog->minlen > strend - strpos) {
a3621e74 394 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 395 "String too short... [re_intuit_start]\n"));
cad2e5aa 396 goto fail;
2c2d71f5 397 }
1de06328 398
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
33b8afdf
JH
401 if (do_utf8) {
402 if (!prog->check_utf8 && prog->check_substr)
403 to_utf8_substr(prog);
404 check = prog->check_utf8;
405 } else {
406 if (!prog->check_substr && prog->check_utf8)
407 to_byte_substr(prog);
408 check = prog->check_substr;
409 }
1de06328 410 if (check == &PL_sv_undef) {
a3621e74 411 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1de06328 412 "Non-utf8 string cannot match utf8 check string\n"));
33b8afdf
JH
413 goto fail;
414 }
bbe252da
YO
415 if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
416 ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
417 || ( (prog->extflags & RXf_ANCH_BOL)
7fba1cd6 418 && !multiline ) ); /* Check after \n? */
cad2e5aa 419
7e25d62c 420 if (!ml_anch) {
bbe252da
YO
421 if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
422 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
3f7c398e 423 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
424 && sv && !SvROK(sv)
425 && (strpos != strbeg)) {
a3621e74 426 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
427 goto fail;
428 }
429 if (prog->check_offset_min == prog->check_offset_max &&
bbe252da 430 !(prog->extflags & RXf_CANY_SEEN)) {
2c2d71f5 431 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
432 I32 slen;
433
1aa99e6b 434 s = HOP3c(strpos, prog->check_offset_min, strend);
1de06328 435
653099ff
GS
436 if (SvTAIL(check)) {
437 slen = SvCUR(check); /* >= 1 */
cad2e5aa 438
9041c2e3 439 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 440 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 441 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 442 goto fail_finish;
cad2e5aa
JH
443 }
444 /* Now should match s[0..slen-2] */
445 slen--;
3f7c398e 446 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 447 || (slen > 1
3f7c398e 448 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 449 report_neq:
a3621e74 450 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
451 goto fail_finish;
452 }
cad2e5aa 453 }
3f7c398e 454 else if (*SvPVX_const(check) != *s
653099ff 455 || ((slen = SvCUR(check)) > 1
3f7c398e 456 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 457 goto report_neq;
c315bfe8 458 check_at = s;
2c2d71f5 459 goto success_at_start;
7e25d62c 460 }
cad2e5aa 461 }
2c2d71f5 462 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 463 s = strpos;
2c2d71f5 464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
1de06328
YO
465 end_shift = prog->check_end_shift;
466
2c2d71f5 467 if (!ml_anch) {
a3b680e6 468 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 469 - (SvTAIL(check) != 0);
a3b680e6 470 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
471
472 if (end_shift < eshift)
473 end_shift = eshift;
474 }
cad2e5aa 475 }
2c2d71f5 476 else { /* Can match at random position */
cad2e5aa
JH
477 ml_anch = 0;
478 s = strpos;
1de06328
YO
479 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
480 end_shift = prog->check_end_shift;
481
482 /* end shift should be non negative here */
cad2e5aa
JH
483 }
484
2c2d71f5 485#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 486 if (end_shift < 0)
1de06328 487 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
b57a0404 488 (IV)end_shift, prog->precomp);
2c2d71f5
JH
489#endif
490
2c2d71f5
JH
491 restart:
492 /* Find a possible match in the region s..strend by looking for
493 the "check" substring in the region corrected by start/end_shift. */
1de06328
YO
494
495 {
496 I32 srch_start_shift = start_shift;
497 I32 srch_end_shift = end_shift;
498 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
499 srch_end_shift -= ((strbeg - s) - srch_start_shift);
500 srch_start_shift = strbeg - s;
501 }
6bda09f9 502 DEBUG_OPTIMISE_MORE_r({
1de06328
YO
503 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
504 (IV)prog->check_offset_min,
505 (IV)srch_start_shift,
506 (IV)srch_end_shift,
507 (IV)prog->check_end_shift);
508 });
509
cad2e5aa 510 if (flags & REXEC_SCREAM) {
cad2e5aa 511 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 512 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 513
2c2d71f5
JH
514 if (PL_screamfirst[BmRARE(check)] >= 0
515 || ( BmRARE(check) == '\n'
516 && (BmPREVIOUS(check) == SvCUR(check) - 1)
517 && SvTAIL(check) ))
9041c2e3 518 s = screaminstr(sv, check,
1de06328 519 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
cad2e5aa 520 else
2c2d71f5 521 goto fail_finish;
4addbd3b
HS
522 /* we may be pointing at the wrong string */
523 if (s && RX_MATCH_COPIED(prog))
3f7c398e 524 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
525 if (data)
526 *data->scream_olds = s;
527 }
1de06328
YO
528 else {
529 U8* start_point;
530 U8* end_point;
bbe252da 531 if (prog->extflags & RXf_CANY_SEEN) {
1de06328
YO
532 start_point= (U8*)(s + srch_start_shift);
533 end_point= (U8*)(strend - srch_end_shift);
534 } else {
535 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
536 end_point= HOP3(strend, -srch_end_shift, strbeg);
537 }
6bda09f9 538 DEBUG_OPTIMISE_MORE_r({
56570a2c 539 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
1de06328 540 (int)(end_point - start_point),
fc8cd66c 541 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
1de06328
YO
542 start_point);
543 });
544
545 s = fbm_instr( start_point, end_point,
7fba1cd6 546 check, multiline ? FBMrf_MULTILINE : 0);
1de06328
YO
547 }
548 }
cad2e5aa
JH
549 /* Update the count-of-usability, remove useless subpatterns,
550 unshift s. */
2c2d71f5 551
ab3bbdeb
YO
552 DEBUG_EXECUTE_r({
553 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
554 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
555 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
2c2d71f5 556 (s ? "Found" : "Did not find"),
ab3bbdeb
YO
557 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
558 ? "anchored" : "floating"),
559 quoted,
560 RE_SV_TAIL(check),
561 (s ? " at offset " : "...\n") );
562 });
2c2d71f5
JH
563
564 if (!s)
565 goto fail_finish;
2c2d71f5 566 /* Finish the diagnostic message */
a3621e74 567 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 568
1de06328
YO
569 /* XXX dmq: first branch is for positive lookbehind...
570 Our check string is offset from the beginning of the pattern.
571 So we need to do any stclass tests offset forward from that
572 point. I think. :-(
573 */
574
575
576
577 check_at=s;
578
579
2c2d71f5
JH
580 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
581 Start with the other substr.
582 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 583 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
584 *always* match. Probably should be marked during compile...
585 Probably it is right to do no SCREAM here...
586 */
587
1de06328
YO
588 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
589 : (prog->float_substr && prog->anchored_substr))
590 {
30944b6d 591 /* Take into account the "other" substring. */
2c2d71f5
JH
592 /* XXXX May be hopelessly wrong for UTF... */
593 if (!other_last)
6eb5f6b9 594 other_last = strpos;
33b8afdf 595 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
596 do_other_anchored:
597 {
890ce7af
AL
598 char * const last = HOP3c(s, -start_shift, strbeg);
599 char *last1, *last2;
be8e71aa 600 char * const saved_s = s;
33b8afdf 601 SV* must;
2c2d71f5 602
2c2d71f5
JH
603 t = s - prog->check_offset_max;
604 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 605 && (!do_utf8
0ce71af7 606 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 607 && t > strpos)))
6f207bd3 608 NOOP;
2c2d71f5
JH
609 else
610 t = strpos;
1aa99e6b 611 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
612 if (t < other_last) /* These positions already checked */
613 t = other_last;
1aa99e6b 614 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
615 if (last < last1)
616 last1 = last;
1de06328
YO
617 /* XXXX It is not documented what units *_offsets are in.
618 We assume bytes, but this is clearly wrong.
619 Meaning this code needs to be carefully reviewed for errors.
620 dmq.
621 */
622
2c2d71f5 623 /* On end-of-str: see comment below. */
33b8afdf
JH
624 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
625 if (must == &PL_sv_undef) {
626 s = (char*)NULL;
1de06328 627 DEBUG_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
628 }
629 else
630 s = fbm_instr(
631 (unsigned char*)t,
632 HOP3(HOP3(last1, prog->anchored_offset, strend)
633 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
634 must,
7fba1cd6 635 multiline ? FBMrf_MULTILINE : 0
33b8afdf 636 );
ab3bbdeb
YO
637 DEBUG_EXECUTE_r({
638 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
639 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
640 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
2c2d71f5 641 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
642 quoted, RE_SV_TAIL(must));
643 });
644
645
2c2d71f5
JH
646 if (!s) {
647 if (last1 >= last2) {
a3621e74 648 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
649 ", giving up...\n"));
650 goto fail_finish;
651 }
a3621e74 652 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 653 ", trying floating at offset %ld...\n",
be8e71aa 654 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
1aa99e6b
IH
655 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
656 s = HOP3c(last, 1, strend);
2c2d71f5
JH
657 goto restart;
658 }
659 else {
a3621e74 660 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 661 (long)(s - i_strpos)));
1aa99e6b
IH
662 t = HOP3c(s, -prog->anchored_offset, strbeg);
663 other_last = HOP3c(s, 1, strend);
be8e71aa 664 s = saved_s;
2c2d71f5
JH
665 if (t == strpos)
666 goto try_at_start;
2c2d71f5
JH
667 goto try_at_offset;
668 }
30944b6d 669 }
2c2d71f5
JH
670 }
671 else { /* Take into account the floating substring. */
33b8afdf 672 char *last, *last1;
be8e71aa 673 char * const saved_s = s;
33b8afdf
JH
674 SV* must;
675
676 t = HOP3c(s, -start_shift, strbeg);
677 last1 = last =
678 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
679 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
680 last = HOP3c(t, prog->float_max_offset, strend);
681 s = HOP3c(t, prog->float_min_offset, strend);
682 if (s < other_last)
683 s = other_last;
2c2d71f5 684 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
685 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
686 /* fbm_instr() takes into account exact value of end-of-str
687 if the check is SvTAIL(ed). Since false positives are OK,
688 and end-of-str is not later than strend we are OK. */
689 if (must == &PL_sv_undef) {
690 s = (char*)NULL;
1de06328 691 DEBUG_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
692 }
693 else
2c2d71f5 694 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
695 (unsigned char*)last + SvCUR(must)
696 - (SvTAIL(must)!=0),
7fba1cd6 697 must, multiline ? FBMrf_MULTILINE : 0);
ab3bbdeb
YO
698 DEBUG_EXECUTE_r({
699 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
700 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
701 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
33b8afdf 702 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
703 quoted, RE_SV_TAIL(must));
704 });
33b8afdf
JH
705 if (!s) {
706 if (last1 == last) {
a3621e74 707 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
708 ", giving up...\n"));
709 goto fail_finish;
2c2d71f5 710 }
a3621e74 711 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf 712 ", trying anchored starting at offset %ld...\n",
be8e71aa 713 (long)(saved_s + 1 - i_strpos)));
33b8afdf
JH
714 other_last = last;
715 s = HOP3c(t, 1, strend);
716 goto restart;
717 }
718 else {
a3621e74 719 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
720 (long)(s - i_strpos)));
721 other_last = s; /* Fix this later. --Hugo */
be8e71aa 722 s = saved_s;
33b8afdf
JH
723 if (t == strpos)
724 goto try_at_start;
725 goto try_at_offset;
726 }
2c2d71f5 727 }
cad2e5aa 728 }
2c2d71f5 729
1de06328 730
9ef43ace 731 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
1de06328 732
6bda09f9 733 DEBUG_OPTIMISE_MORE_r(
1de06328
YO
734 PerlIO_printf(Perl_debug_log,
735 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
736 (IV)prog->check_offset_min,
737 (IV)prog->check_offset_max,
738 (IV)(s-strpos),
739 (IV)(t-strpos),
740 (IV)(t-s),
741 (IV)(strend-strpos)
742 )
743 );
744
2c2d71f5 745 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 746 && (!do_utf8
9ef43ace 747 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
1de06328
YO
748 && t > strpos)))
749 {
2c2d71f5
JH
750 /* Fixed substring is found far enough so that the match
751 cannot start at strpos. */
752 try_at_offset:
cad2e5aa 753 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
754 /* Eventually fbm_*() should handle this, but often
755 anchored_offset is not 0, so this check will not be wasted. */
756 /* XXXX In the code below we prefer to look for "^" even in
757 presence of anchored substrings. And we search even
758 beyond the found float position. These pessimizations
759 are historical artefacts only. */
760 find_anchor:
2c2d71f5 761 while (t < strend - prog->minlen) {
cad2e5aa 762 if (*t == '\n') {
4ee3650e 763 if (t < check_at - prog->check_offset_min) {
33b8afdf 764 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
765 /* Since we moved from the found position,
766 we definitely contradict the found anchored
30944b6d
IZ
767 substr. Due to the above check we do not
768 contradict "check" substr.
769 Thus we can arrive here only if check substr
770 is float. Redo checking for "other"=="fixed".
771 */
9041c2e3 772 strpos = t + 1;
a3621e74 773 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 774 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
775 goto do_other_anchored;
776 }
4ee3650e
GS
777 /* We don't contradict the found floating substring. */
778 /* XXXX Why not check for STCLASS? */
cad2e5aa 779 s = t + 1;
a3621e74 780 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 781 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
782 goto set_useful;
783 }
4ee3650e
GS
784 /* Position contradicts check-string */
785 /* XXXX probably better to look for check-string
786 than for "\n", so one should lower the limit for t? */
a3621e74 787 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 788 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 789 other_last = strpos = s = t + 1;
cad2e5aa
JH
790 goto restart;
791 }
792 t++;
793 }
a3621e74 794 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 795 PL_colors[0], PL_colors[1]));
2c2d71f5 796 goto fail_finish;
cad2e5aa 797 }
f5952150 798 else {
a3621e74 799 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 800 PL_colors[0], PL_colors[1]));
f5952150 801 }
cad2e5aa
JH
802 s = t;
803 set_useful:
33b8afdf 804 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
805 }
806 else {
f5952150 807 /* The found string does not prohibit matching at strpos,
2c2d71f5 808 - no optimization of calling REx engine can be performed,
f5952150
GS
809 unless it was an MBOL and we are not after MBOL,
810 or a future STCLASS check will fail this. */
2c2d71f5
JH
811 try_at_start:
812 /* Even in this situation we may use MBOL flag if strpos is offset
813 wrt the start of the string. */
05b4157f 814 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 815 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d 816 /* May be due to an implicit anchor of m{.*foo} */
bbe252da 817 && !(prog->intflags & PREGf_IMPLICIT))
d506a20d 818 {
cad2e5aa
JH
819 t = strpos;
820 goto find_anchor;
821 }
a3621e74 822 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 823 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 824 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 825 );
2c2d71f5 826 success_at_start:
bbe252da 827 if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
828 && (do_utf8 ? (
829 prog->check_utf8 /* Could be deleted already */
830 && --BmUSEFUL(prog->check_utf8) < 0
831 && (prog->check_utf8 == prog->float_utf8)
832 ) : (
833 prog->check_substr /* Could be deleted already */
834 && --BmUSEFUL(prog->check_substr) < 0
835 && (prog->check_substr == prog->float_substr)
836 )))
66e933ab 837 {
cad2e5aa 838 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 839 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
840 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
841 if (do_utf8 ? prog->check_substr : prog->check_utf8)
842 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
843 prog->check_substr = prog->check_utf8 = NULL; /* disable */
844 prog->float_substr = prog->float_utf8 = NULL; /* clear */
845 check = NULL; /* abort */
cad2e5aa 846 s = strpos;
3cf5c195
IZ
847 /* XXXX This is a remnant of the old implementation. It
848 looks wasteful, since now INTUIT can use many
6eb5f6b9 849 other heuristics. */
bbe252da 850 prog->extflags &= ~RXf_USE_INTUIT;
cad2e5aa
JH
851 }
852 else
853 s = strpos;
854 }
855
6eb5f6b9
JH
856 /* Last resort... */
857 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1de06328
YO
858 /* trie stclasses are too expensive to use here, we are better off to
859 leave it to regmatch itself */
786e8c11 860 if (prog->regstclass && PL_regkind[OP(prog->regstclass)]!=TRIE) {
6eb5f6b9
JH
861 /* minlen == 0 is possible if regstclass is \b or \B,
862 and the fixed substr is ''$.
863 Since minlen is already taken into account, s+1 is before strend;
864 accidentally, minlen >= 1 guaranties no false positives at s + 1
865 even for \b or \B. But (minlen? 1 : 0) below assumes that
866 regstclass does not come from lookahead... */
867 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
868 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 869 const U8* const str = (U8*)STRING(prog->regstclass);
3dab1dad 870 const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
1aa99e6b 871 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 872 : 1);
1de06328
YO
873 char * endpos;
874 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
875 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
876 else if (prog->float_substr || prog->float_utf8)
877 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
878 else
879 endpos= strend;
880
56570a2c 881 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %d s: %d endpos: %d\n",
b57a0404 882 (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg));
1de06328 883
6eb5f6b9 884 t = s;
3b0527fe 885 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
886 if (!s) {
887#ifdef DEBUGGING
cbbf8932 888 const char *what = NULL;
6eb5f6b9
JH
889#endif
890 if (endpos == strend) {
a3621e74 891 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
892 "Could not match STCLASS...\n") );
893 goto fail;
894 }
a3621e74 895 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 896 "This position contradicts STCLASS...\n") );
bbe252da 897 if ((prog->extflags & RXf_ANCH) && !ml_anch)
653099ff 898 goto fail;
6eb5f6b9 899 /* Contradict one of substrings */
33b8afdf
JH
900 if (prog->anchored_substr || prog->anchored_utf8) {
901 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 902 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 903 hop_and_restart:
1aa99e6b 904 s = HOP3c(t, 1, strend);
66e933ab
GS
905 if (s + start_shift + end_shift > strend) {
906 /* XXXX Should be taken into account earlier? */
a3621e74 907 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
908 "Could not match STCLASS...\n") );
909 goto fail;
910 }
5e39e1e5
HS
911 if (!check)
912 goto giveup;
a3621e74 913 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 914 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
915 what, (long)(s + start_shift - i_strpos)) );
916 goto restart;
917 }
66e933ab 918 /* Have both, check_string is floating */
6eb5f6b9
JH
919 if (t + start_shift >= check_at) /* Contradicts floating=check */
920 goto retry_floating_check;
921 /* Recheck anchored substring, but not floating... */
9041c2e3 922 s = check_at;
5e39e1e5
HS
923 if (!check)
924 goto giveup;
a3621e74 925 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 926 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
927 (long)(other_last - i_strpos)) );
928 goto do_other_anchored;
929 }
60e71179
GS
930 /* Another way we could have checked stclass at the
931 current position only: */
932 if (ml_anch) {
933 s = t = t + 1;
5e39e1e5
HS
934 if (!check)
935 goto giveup;
a3621e74 936 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 937 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 938 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 939 goto try_at_offset;
66e933ab 940 }
33b8afdf 941 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 942 goto fail;
6eb5f6b9
JH
943 /* Check is floating subtring. */
944 retry_floating_check:
945 t = check_at - start_shift;
a3621e74 946 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
947 goto hop_and_restart;
948 }
b7953727 949 if (t != s) {
a3621e74 950 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 951 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
952 (long)(t - i_strpos), (long)(s - i_strpos))
953 );
954 }
955 else {
a3621e74 956 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
957 "Does not contradict STCLASS...\n");
958 );
959 }
6eb5f6b9 960 }
5e39e1e5 961 giveup:
a3621e74 962 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
963 PL_colors[4], (check ? "Guessed" : "Giving up"),
964 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 965 return s;
2c2d71f5
JH
966
967 fail_finish: /* Substring not found */
33b8afdf
JH
968 if (prog->check_substr || prog->check_utf8) /* could be removed already */
969 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 970 fail:
a3621e74 971 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 972 PL_colors[4], PL_colors[5]));
bd61b366 973 return NULL;
cad2e5aa 974}
9661b544 975
786e8c11 976
3b0527fe 977
4cadc6a9
YO
978#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
979foldlen, foldbuf, uniflags) STMT_START { \
980 switch (trie_type) { \
981 case trie_utf8_fold: \
982 if ( foldlen>0 ) { \
983 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
984 foldlen -= len; \
985 uscan += len; \
986 len=0; \
987 } else { \
988 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
989 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
990 foldlen -= UNISKIP( uvc ); \
991 uscan = foldbuf + UNISKIP( uvc ); \
992 } \
993 break; \
994 case trie_utf8: \
995 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
996 break; \
997 case trie_plain: \
998 uvc = (UV)*uc; \
999 len = 1; \
1000 } \
1001 \
1002 if (uvc < 256) { \
1003 charid = trie->charmap[ uvc ]; \
1004 } \
1005 else { \
1006 charid = 0; \
1007 if (trie->widecharmap) { \
1008 SV** const svpp = hv_fetch(trie->widecharmap, \
1009 (char*)&uvc, sizeof(UV), 0); \
1010 if (svpp) \
1011 charid = (U16)SvIV(*svpp); \
1012 } \
1013 } \
1014} STMT_END
1015
1016#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1017 if ( (CoNd) \
1018 && (ln == len || \
1019 ibcmp_utf8(s, NULL, 0, do_utf8, \
1020 m, NULL, ln, (bool)UTF)) \
24b23f37 1021 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1022 goto got_it; \
1023 else { \
1024 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1025 uvchr_to_utf8(tmpbuf, c); \
1026 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1027 if ( f != c \
1028 && (f == c1 || f == c2) \
1029 && (ln == foldlen || \
1030 !ibcmp_utf8((char *) foldbuf, \
1031 NULL, foldlen, do_utf8, \
1032 m, \
1033 NULL, ln, (bool)UTF)) \
24b23f37 1034 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1035 goto got_it; \
1036 } \
1037 s += len
1038
1039#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1040STMT_START { \
1041 while (s <= e) { \
1042 if ( (CoNd) \
1043 && (ln == 1 || !(OP(c) == EXACTF \
1044 ? ibcmp(s, m, ln) \
1045 : ibcmp_locale(s, m, ln))) \
24b23f37 1046 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1047 goto got_it; \
1048 s++; \
1049 } \
1050} STMT_END
1051
1052#define REXEC_FBC_UTF8_SCAN(CoDe) \
1053STMT_START { \
1054 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1055 CoDe \
1056 s += uskip; \
1057 } \
1058} STMT_END
1059
1060#define REXEC_FBC_SCAN(CoDe) \
1061STMT_START { \
1062 while (s < strend) { \
1063 CoDe \
1064 s++; \
1065 } \
1066} STMT_END
1067
1068#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1069REXEC_FBC_UTF8_SCAN( \
1070 if (CoNd) { \
24b23f37 1071 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1072 goto got_it; \
1073 else \
1074 tmp = doevery; \
1075 } \
1076 else \
1077 tmp = 1; \
1078)
1079
1080#define REXEC_FBC_CLASS_SCAN(CoNd) \
1081REXEC_FBC_SCAN( \
1082 if (CoNd) { \
24b23f37 1083 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1084 goto got_it; \
1085 else \
1086 tmp = doevery; \
1087 } \
1088 else \
1089 tmp = 1; \
1090)
1091
1092#define REXEC_FBC_TRYIT \
24b23f37 1093if ((!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1094 goto got_it
1095
1096#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1097 if (do_utf8) { \
1098 UtFpReLoAd; \
1099 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1100 } \
1101 else { \
1102 REXEC_FBC_CLASS_SCAN(CoNd); \
1103 } \
1104 break
1105
1106#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1107 PL_reg_flags |= RF_tainted; \
1108 if (do_utf8) { \
1109 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1110 } \
1111 else { \
1112 REXEC_FBC_CLASS_SCAN(CoNd); \
1113 } \
1114 break
1115
786e8c11
YO
1116#define DUMP_EXEC_POS(li,s,doutf8) \
1117 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1118
1119/* We know what class REx starts with. Try to find this position... */
1120/* if reginfo is NULL, its a dryrun */
1121/* annoyingly all the vars in this routine have different names from their counterparts
1122 in regmatch. /grrr */
1123
3c3eec57 1124STATIC char *
07be1b83 1125S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
24b23f37 1126 const char *strend, regmatch_info *reginfo)
a687059c 1127{
27da23d5 1128 dVAR;
bbe252da 1129 const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
6eb5f6b9 1130 char *m;
d8093b23 1131 STRLEN ln;
5dab1207 1132 STRLEN lnc;
078c425b 1133 register STRLEN uskip;
d8093b23
G
1134 unsigned int c1;
1135 unsigned int c2;
6eb5f6b9
JH
1136 char *e;
1137 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 1138 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 1139
6eb5f6b9
JH
1140 /* We know what class it must start with. */
1141 switch (OP(c)) {
6eb5f6b9 1142 case ANYOF:
388cc4de 1143 if (do_utf8) {
4cadc6a9 1144 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
388cc4de 1145 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a 1146 reginclass(prog, c, (U8*)s, 0, do_utf8) :
4cadc6a9 1147 REGINCLASS(prog, c, (U8*)s));
388cc4de
HS
1148 }
1149 else {
1150 while (s < strend) {
1151 STRLEN skip = 1;
1152
32fc9b6a 1153 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
1154 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1155 /* The assignment of 2 is intentional:
1156 * for the folded sharp s, the skip is 2. */
1157 (skip = SHARP_S_SKIP))) {
24b23f37 1158 if (tmp && (!reginfo || regtry(reginfo, &s)))
388cc4de
HS
1159 goto got_it;
1160 else
1161 tmp = doevery;
1162 }
1163 else
1164 tmp = 1;
1165 s += skip;
1166 }
a0d0e21e 1167 }
6eb5f6b9 1168 break;
f33976b4 1169 case CANY:
4cadc6a9 1170 REXEC_FBC_SCAN(
24b23f37 1171 if (tmp && (!reginfo || regtry(reginfo, &s)))
f33976b4
DB
1172 goto got_it;
1173 else
1174 tmp = doevery;
4cadc6a9 1175 );
f33976b4 1176 break;
6eb5f6b9 1177 case EXACTF:
5dab1207
NIS
1178 m = STRING(c);
1179 ln = STR_LEN(c); /* length to match in octets/bytes */
1180 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1181 if (UTF) {
a2a2844f 1182 STRLEN ulen1, ulen2;
5dab1207 1183 U8 *sm = (U8 *) m;
89ebb4a3
JH
1184 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1185 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1186 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1187
1188 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1189 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1190
89ebb4a3 1191 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1192 0, uniflags);
89ebb4a3 1193 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1194 0, uniflags);
5dab1207
NIS
1195 lnc = 0;
1196 while (sm < ((U8 *) m + ln)) {
1197 lnc++;
1198 sm += UTF8SKIP(sm);
1199 }
1aa99e6b
IH
1200 }
1201 else {
1202 c1 = *(U8*)m;
1203 c2 = PL_fold[c1];
1204 }
6eb5f6b9
JH
1205 goto do_exactf;
1206 case EXACTFL:
5dab1207
NIS
1207 m = STRING(c);
1208 ln = STR_LEN(c);
1209 lnc = (I32) ln;
d8093b23 1210 c1 = *(U8*)m;
6eb5f6b9
JH
1211 c2 = PL_fold_locale[c1];
1212 do_exactf:
db12adc6 1213 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1214
3b0527fe 1215 if (!reginfo && e < s)
6eb5f6b9 1216 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1217
60a8b682
JH
1218 /* The idea in the EXACTF* cases is to first find the
1219 * first character of the EXACTF* node and then, if
1220 * necessary, case-insensitively compare the full
1221 * text of the node. The c1 and c2 are the first
1222 * characters (though in Unicode it gets a bit
1223 * more complicated because there are more cases
7f16dd3d
JH
1224 * than just upper and lower: one needs to use
1225 * the so-called folding case for case-insensitive
1226 * matching (called "loose matching" in Unicode).
1227 * ibcmp_utf8() will do just that. */
60a8b682 1228
1aa99e6b 1229 if (do_utf8) {
575cac57 1230 UV c, f;
89ebb4a3 1231 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1232 STRLEN len, foldlen;
4ad0818d 1233 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1234 if (c1 == c2) {
5dab1207
NIS
1235 /* Upper and lower of 1st char are equal -
1236 * probably not a "letter". */
1aa99e6b 1237 while (s <= e) {
89ebb4a3 1238 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1239 uniflags);
4cadc6a9 1240 REXEC_FBC_EXACTISH_CHECK(c == c1);
1aa99e6b 1241 }
09091399
JH
1242 }
1243 else {
1aa99e6b 1244 while (s <= e) {
89ebb4a3 1245 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1246 uniflags);
80aecb99 1247
60a8b682 1248 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1249 * Note that not all the possible combinations
1250 * are handled here: some of them are handled
1251 * by the standard folding rules, and some of
1252 * them (the character class or ANYOF cases)
1253 * are handled during compiletime in
1254 * regexec.c:S_regclass(). */
880bd946
JH
1255 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1256 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1257 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99 1258
4cadc6a9 1259 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1aa99e6b 1260 }
09091399 1261 }
1aa99e6b
IH
1262 }
1263 else {
1264 if (c1 == c2)
4cadc6a9 1265 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1aa99e6b 1266 else
4cadc6a9 1267 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
b3c9acc1
IZ
1268 }
1269 break;
bbce6d69 1270 case BOUNDL:
3280af22 1271 PL_reg_flags |= RF_tainted;
bbce6d69 1272 /* FALL THROUGH */
a0d0e21e 1273 case BOUND:
ffc61ed2 1274 if (do_utf8) {
12d33761 1275 if (s == PL_bostr)
ffc61ed2
JH
1276 tmp = '\n';
1277 else {
6136c704 1278 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1279 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1280 }
1281 tmp = ((OP(c) == BOUND ?
9041c2e3 1282 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1283 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1284 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1285 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1286 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1287 isALNUM_LC_utf8((U8*)s)))
1288 {
1289 tmp = !tmp;
4cadc6a9 1290 REXEC_FBC_TRYIT;
a687059c 1291 }
4cadc6a9 1292 );
a0d0e21e 1293 }
667bb95a 1294 else {
12d33761 1295 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2 1296 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1297 REXEC_FBC_SCAN(
ffc61ed2
JH
1298 if (tmp ==
1299 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1300 tmp = !tmp;
4cadc6a9 1301 REXEC_FBC_TRYIT;
a0ed51b3 1302 }
4cadc6a9 1303 );
a0ed51b3 1304 }
24b23f37 1305 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1306 goto got_it;
1307 break;
bbce6d69 1308 case NBOUNDL:
3280af22 1309 PL_reg_flags |= RF_tainted;
bbce6d69 1310 /* FALL THROUGH */
a0d0e21e 1311 case NBOUND:
ffc61ed2 1312 if (do_utf8) {
12d33761 1313 if (s == PL_bostr)
ffc61ed2
JH
1314 tmp = '\n';
1315 else {
6136c704 1316 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1317 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1318 }
1319 tmp = ((OP(c) == NBOUND ?
9041c2e3 1320 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1321 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1322 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1323 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1324 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1325 isALNUM_LC_utf8((U8*)s)))
1326 tmp = !tmp;
4cadc6a9
YO
1327 else REXEC_FBC_TRYIT;
1328 );
a0d0e21e 1329 }
667bb95a 1330 else {
12d33761 1331 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1332 tmp = ((OP(c) == NBOUND ?
1333 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1334 REXEC_FBC_SCAN(
ffc61ed2
JH
1335 if (tmp ==
1336 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1337 tmp = !tmp;
4cadc6a9
YO
1338 else REXEC_FBC_TRYIT;
1339 );
a0ed51b3 1340 }
24b23f37 1341 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1342 goto got_it;
1343 break;
a0d0e21e 1344 case ALNUM:
4cadc6a9
YO
1345 REXEC_FBC_CSCAN_PRELOAD(
1346 LOAD_UTF8_CHARCLASS_ALNUM(),
1347 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1348 isALNUM(*s)
1349 );
bbce6d69 1350 case ALNUML:
4cadc6a9
YO
1351 REXEC_FBC_CSCAN_TAINT(
1352 isALNUM_LC_utf8((U8*)s),
1353 isALNUM_LC(*s)
1354 );
a0d0e21e 1355 case NALNUM:
4cadc6a9
YO
1356 REXEC_FBC_CSCAN_PRELOAD(
1357 LOAD_UTF8_CHARCLASS_ALNUM(),
1358 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1359 !isALNUM(*s)
1360 );
bbce6d69 1361 case NALNUML:
4cadc6a9
YO
1362 REXEC_FBC_CSCAN_TAINT(
1363 !isALNUM_LC_utf8((U8*)s),
1364 !isALNUM_LC(*s)
1365 );
a0d0e21e 1366 case SPACE:
4cadc6a9
YO
1367 REXEC_FBC_CSCAN_PRELOAD(
1368 LOAD_UTF8_CHARCLASS_SPACE(),
1369 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1370 isSPACE(*s)
1371 );
bbce6d69 1372 case SPACEL:
4cadc6a9
YO
1373 REXEC_FBC_CSCAN_TAINT(
1374 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1375 isSPACE_LC(*s)
1376 );
a0d0e21e 1377 case NSPACE:
4cadc6a9
YO
1378 REXEC_FBC_CSCAN_PRELOAD(
1379 LOAD_UTF8_CHARCLASS_SPACE(),
1380 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1381 !isSPACE(*s)
1382 );
bbce6d69 1383 case NSPACEL:
4cadc6a9
YO
1384 REXEC_FBC_CSCAN_TAINT(
1385 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1386 !isSPACE_LC(*s)
1387 );
a0d0e21e 1388 case DIGIT:
4cadc6a9
YO
1389 REXEC_FBC_CSCAN_PRELOAD(
1390 LOAD_UTF8_CHARCLASS_DIGIT(),
1391 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1392 isDIGIT(*s)
1393 );
b8c5462f 1394 case DIGITL:
4cadc6a9
YO
1395 REXEC_FBC_CSCAN_TAINT(
1396 isDIGIT_LC_utf8((U8*)s),
1397 isDIGIT_LC(*s)
1398 );
a0d0e21e 1399 case NDIGIT:
4cadc6a9
YO
1400 REXEC_FBC_CSCAN_PRELOAD(
1401 LOAD_UTF8_CHARCLASS_DIGIT(),
1402 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1403 !isDIGIT(*s)
1404 );
b8c5462f 1405 case NDIGITL:
4cadc6a9
YO
1406 REXEC_FBC_CSCAN_TAINT(
1407 !isDIGIT_LC_utf8((U8*)s),
1408 !isDIGIT_LC(*s)
1409 );
1de06328
YO
1410 case AHOCORASICKC:
1411 case AHOCORASICK:
07be1b83
YO
1412 {
1413 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1414 trie_type = do_utf8 ?
1415 (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1416 : trie_plain;
1417 /* what trie are we using right now */
1418 reg_ac_data *aho
1419 = (reg_ac_data*)prog->data->data[ ARG( c ) ];
1420 reg_trie_data *trie=aho->trie;
1421
1422 const char *last_start = strend - trie->minlen;
6148ee25 1423#ifdef DEBUGGING
07be1b83 1424 const char *real_start = s;
6148ee25 1425#endif
07be1b83 1426 STRLEN maxlen = trie->maxlen;
be8e71aa
YO
1427 SV *sv_points;
1428 U8 **points; /* map of where we were in the input string
786e8c11 1429 when reading a given char. For ASCII this
be8e71aa
YO
1430 is unnecessary overhead as the relationship
1431 is always 1:1, but for unicode, especially
1432 case folded unicode this is not true. */
f9e705e8 1433 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
786e8c11
YO
1434 U8 *bitmap=NULL;
1435
07be1b83
YO
1436
1437 GET_RE_DEBUG_FLAGS_DECL;
1438
be8e71aa
YO
1439 /* We can't just allocate points here. We need to wrap it in
1440 * an SV so it gets freed properly if there is a croak while
1441 * running the match */
1442 ENTER;
1443 SAVETMPS;
1444 sv_points=newSV(maxlen * sizeof(U8 *));
1445 SvCUR_set(sv_points,
1446 maxlen * sizeof(U8 *));
1447 SvPOK_on(sv_points);
1448 sv_2mortal(sv_points);
1449 points=(U8**)SvPV_nolen(sv_points );
1de06328
YO
1450 if ( trie_type != trie_utf8_fold
1451 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1452 {
786e8c11
YO
1453 if (trie->bitmap)
1454 bitmap=(U8*)trie->bitmap;
1455 else
1456 bitmap=(U8*)ANYOF_BITMAP(c);
07be1b83 1457 }
786e8c11
YO
1458 /* this is the Aho-Corasick algorithm modified a touch
1459 to include special handling for long "unknown char"
1460 sequences. The basic idea being that we use AC as long
1461 as we are dealing with a possible matching char, when
1462 we encounter an unknown char (and we have not encountered
1463 an accepting state) we scan forward until we find a legal
1464 starting char.
1465 AC matching is basically that of trie matching, except
1466 that when we encounter a failing transition, we fall back
1467 to the current states "fail state", and try the current char
1468 again, a process we repeat until we reach the root state,
1469 state 1, or a legal transition. If we fail on the root state
1470 then we can either terminate if we have reached an accepting
1471 state previously, or restart the entire process from the beginning
1472 if we have not.
1473
1474 */
07be1b83
YO
1475 while (s <= last_start) {
1476 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1477 U8 *uc = (U8*)s;
1478 U16 charid = 0;
1479 U32 base = 1;
1480 U32 state = 1;
1481 UV uvc = 0;
1482 STRLEN len = 0;
1483 STRLEN foldlen = 0;
1484 U8 *uscan = (U8*)NULL;
1485 U8 *leftmost = NULL;
786e8c11
YO
1486#ifdef DEBUGGING
1487 U32 accepted_word= 0;
1488#endif
07be1b83
YO
1489 U32 pointpos = 0;
1490
1491 while ( state && uc <= (U8*)strend ) {
1492 int failed=0;
786e8c11
YO
1493 U32 word = aho->states[ state ].wordnum;
1494
1de06328
YO
1495 if( state==1 ) {
1496 if ( bitmap ) {
1497 DEBUG_TRIE_EXECUTE_r(
1498 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1499 dump_exec_pos( (char *)uc, c, strend, real_start,
1500 (char *)uc, do_utf8 );
1501 PerlIO_printf( Perl_debug_log,
1502 " Scanning for legal start char...\n");
1503 }
1504 );
1505 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1506 uc++;
786e8c11 1507 }
1de06328 1508 s= (char *)uc;
786e8c11 1509 }
786e8c11
YO
1510 if (uc >(U8*)last_start) break;
1511 }
1512
1513 if ( word ) {
1514 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1515 if (!leftmost || lpos < leftmost) {
1516 DEBUG_r(accepted_word=word);
07be1b83 1517 leftmost= lpos;
786e8c11 1518 }
07be1b83 1519 if (base==0) break;
786e8c11 1520
07be1b83
YO
1521 }
1522 points[pointpos++ % maxlen]= uc;
4cadc6a9
YO
1523 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
1524 uvc, charid, foldlen, foldbuf, uniflags);
786e8c11
YO
1525 DEBUG_TRIE_EXECUTE_r({
1526 dump_exec_pos( (char *)uc, c, strend, real_start,
1527 s, do_utf8 );
07be1b83 1528 PerlIO_printf(Perl_debug_log,
786e8c11
YO
1529 " Charid:%3u CP:%4"UVxf" ",
1530 charid, uvc);
1531 });
07be1b83
YO
1532
1533 do {
6148ee25 1534#ifdef DEBUGGING
786e8c11 1535 word = aho->states[ state ].wordnum;
6148ee25 1536#endif
07be1b83
YO
1537 base = aho->states[ state ].trans.base;
1538
786e8c11
YO
1539 DEBUG_TRIE_EXECUTE_r({
1540 if (failed)
1541 dump_exec_pos( (char *)uc, c, strend, real_start,
1542 s, do_utf8 );
07be1b83 1543 PerlIO_printf( Perl_debug_log,
786e8c11
YO
1544 "%sState: %4"UVxf", word=%"UVxf,
1545 failed ? " Fail transition to " : "",
1546 (UV)state, (UV)word);
1547 });
07be1b83
YO
1548 if ( base ) {
1549 U32 tmp;
1550 if (charid &&
1551 (base + charid > trie->uniquecharcount )
1552 && (base + charid - 1 - trie->uniquecharcount
1553 < trie->lasttrans)
1554 && trie->trans[base + charid - 1 -
1555 trie->uniquecharcount].check == state
1556 && (tmp=trie->trans[base + charid - 1 -
1557 trie->uniquecharcount ].next))
1558 {
786e8c11
YO
1559 DEBUG_TRIE_EXECUTE_r(
1560 PerlIO_printf( Perl_debug_log," - legal\n"));
07be1b83
YO
1561 state = tmp;
1562 break;
1563 }
1564 else {
786e8c11
YO
1565 DEBUG_TRIE_EXECUTE_r(
1566 PerlIO_printf( Perl_debug_log," - fail\n"));
1567 failed = 1;
1568 state = aho->fail[state];
07be1b83
YO
1569 }
1570 }
1571 else {
1572 /* we must be accepting here */
786e8c11
YO
1573 DEBUG_TRIE_EXECUTE_r(
1574 PerlIO_printf( Perl_debug_log," - accepting\n"));
1575 failed = 1;
07be1b83
YO
1576 break;
1577 }
1578 } while(state);
786e8c11 1579 uc += len;
07be1b83
YO
1580 if (failed) {
1581 if (leftmost)
1582 break;
786e8c11 1583 if (!state) state = 1;
07be1b83
YO
1584 }
1585 }
1586 if ( aho->states[ state ].wordnum ) {
1587 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
786e8c11
YO
1588 if (!leftmost || lpos < leftmost) {
1589 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
07be1b83 1590 leftmost = lpos;
786e8c11 1591 }
07be1b83 1592 }
07be1b83
YO
1593 if (leftmost) {
1594 s = (char*)leftmost;
786e8c11
YO
1595 DEBUG_TRIE_EXECUTE_r({
1596 PerlIO_printf(
1597 Perl_debug_log,"Matches word #%"UVxf" at position %d. Trying full pattern...\n",
1598 (UV)accepted_word, s - real_start
1599 );
1600 });
24b23f37 1601 if (!reginfo || regtry(reginfo, &s)) {
be8e71aa
YO
1602 FREETMPS;
1603 LEAVE;
07be1b83 1604 goto got_it;
be8e71aa 1605 }
07be1b83 1606 s = HOPc(s,1);
786e8c11
YO
1607 DEBUG_TRIE_EXECUTE_r({
1608 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1609 });
07be1b83 1610 } else {
786e8c11
YO
1611 DEBUG_TRIE_EXECUTE_r(
1612 PerlIO_printf( Perl_debug_log,"No match.\n"));
07be1b83
YO
1613 break;
1614 }
1615 }
be8e71aa
YO
1616 FREETMPS;
1617 LEAVE;
07be1b83
YO
1618 }
1619 break;
b3c9acc1 1620 default:
3c3eec57
GS
1621 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1622 break;
d6a28714 1623 }
6eb5f6b9
JH
1624 return 0;
1625 got_it:
1626 return s;
1627}
1628
1629/*
1630 - regexec_flags - match a regexp against a string
1631 */
1632I32
1633Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1634 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1635/* strend: pointer to null at end of string */
1636/* strbeg: real beginning of string */
1637/* minend: end of match must be >=minend after stringarg. */
58e23c8d
YO
1638/* data: May be used for some additional optimizations.
1639 Currently its only used, with a U32 cast, for transmitting
1640 the ganch offset when doing a /g match. This will change */
6eb5f6b9
JH
1641/* nosave: For optimizations. */
1642{
97aff369 1643 dVAR;
24b23f37 1644 /*register*/ char *s;
6eb5f6b9 1645 register regnode *c;
24b23f37 1646 /*register*/ char *startpos = stringarg;
6eb5f6b9
JH
1647 I32 minlen; /* must match at least this many chars */
1648 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1649 I32 end_shift = 0; /* Same for the end. */ /* CC */
1650 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1651 char *scream_olds = NULL;
3dab1dad 1652 SV* const oreplsv = GvSV(PL_replgv);
f9f4320a 1653 const bool do_utf8 = (bool)DO_UTF8(sv);
2757e526 1654 I32 multiline;
0df25f3d 1655
3b0527fe 1656 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1657
1658 GET_RE_DEBUG_FLAGS_DECL;
1659
9d4ba2ae 1660 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1661
1662 /* Be paranoid... */
1663 if (prog == NULL || startpos == NULL) {
1664 Perl_croak(aTHX_ "NULL regexp parameter");
1665 return 0;
1666 }
1667
bbe252da 1668 multiline = prog->extflags & RXf_PMf_MULTILINE;
3b0527fe 1669 reginfo.prog = prog;
2757e526 1670
bac06658 1671 RX_MATCH_UTF8_set(prog, do_utf8);
1de06328
YO
1672 DEBUG_EXECUTE_r(
1673 debug_start_match(prog, do_utf8, startpos, strend,
1674 "Matching");
1675 );
bac06658 1676
6eb5f6b9 1677 minlen = prog->minlen;
1de06328
YO
1678
1679 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
a3621e74 1680 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1681 "String too short [regexec_flags]...\n"));
1682 goto phooey;
1aa99e6b 1683 }
6eb5f6b9 1684
1de06328 1685
6eb5f6b9
JH
1686 /* Check validity of program. */
1687 if (UCHARAT(prog->program) != REG_MAGIC) {
1688 Perl_croak(aTHX_ "corrupted regexp program");
1689 }
1690
1691 PL_reg_flags = 0;
1692 PL_reg_eval_set = 0;
1693 PL_reg_maxiter = 0;
1694
bbe252da 1695 if (prog->extflags & RXf_UTF8)
6eb5f6b9
JH
1696 PL_reg_flags |= RF_utf8;
1697
1698 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1699 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1700 PL_bostr = strbeg;
3b0527fe 1701 reginfo.sv = sv;
6eb5f6b9
JH
1702
1703 /* Mark end of line for $ (and such) */
1704 PL_regeol = strend;
1705
1706 /* see how far we have to get to not match where we matched before */
3b0527fe 1707 reginfo.till = startpos+minend;
6eb5f6b9 1708
6eb5f6b9
JH
1709 /* If there is a "must appear" string, look for it. */
1710 s = startpos;
1711
bbe252da 1712 if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1713 MAGIC *mg;
1714
1715 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
58e23c8d 1716 reginfo.ganch = startpos + prog->gofs;
6eb5f6b9
JH
1717 else if (sv && SvTYPE(sv) >= SVt_PVMG
1718 && SvMAGIC(sv)
14befaf4
DM
1719 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1720 && mg->mg_len >= 0) {
3b0527fe 1721 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
bbe252da 1722 if (prog->extflags & RXf_ANCH_GPOS) {
3b0527fe 1723 if (s > reginfo.ganch)
6eb5f6b9 1724 goto phooey;
58e23c8d 1725 s = reginfo.ganch - prog->gofs;
6eb5f6b9
JH
1726 }
1727 }
58e23c8d
YO
1728 else if (data) {
1729 reginfo.ganch = strbeg + (UV)data;
1730 } else /* pos() not defined */
3b0527fe 1731 reginfo.ganch = strbeg;
6eb5f6b9 1732 }
c74340f9
YO
1733 if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1734 I32 *t;
1735 if (!prog->swap) {
1736 /* We have to be careful. If the previous successful match
1737 was from this regex we don't want a subsequent paritally
1738 successful match to clobber the old results.
1739 So when we detect this possibility we add a swap buffer
1740 to the re, and switch the buffer each match. If we fail
1741 we switch it back, otherwise we leave it swapped.
1742 */
1743 Newxz(prog->swap, 1, regexp_paren_ofs);
1744 /* no need to copy these */
1745 Newxz(prog->swap->startp, prog->nparens + 1, I32);
1746 Newxz(prog->swap->endp, prog->nparens + 1, I32);
1747 }
1748 t = prog->swap->startp;
1749 prog->swap->startp = prog->startp;
1750 prog->startp = t;
1751 t = prog->swap->endp;
1752 prog->swap->endp = prog->endp;
1753 prog->endp = t;
1754 }
a0714e2c 1755 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1756 re_scream_pos_data d;
1757
1758 d.scream_olds = &scream_olds;
1759 d.scream_pos = &scream_pos;
1760 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1761 if (!s) {
a3621e74 1762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1763 goto phooey; /* not present */
3fa9c3d7 1764 }
6eb5f6b9
JH
1765 }
1766
1de06328 1767
6eb5f6b9
JH
1768
1769 /* Simplest case: anchored match need be tried only once. */
1770 /* [unless only anchor is BOL and multiline is set] */
bbe252da 1771 if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
24b23f37 1772 if (s == startpos && regtry(&reginfo, &startpos))
6eb5f6b9 1773 goto got_it;
bbe252da
YO
1774 else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1775 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
6eb5f6b9
JH
1776 {
1777 char *end;
1778
1779 if (minlen)
1780 dontbother = minlen - 1;
1aa99e6b 1781 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1782 /* for multiline we only have to try after newlines */
33b8afdf 1783 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1784 if (s == startpos)
1785 goto after_try;
1786 while (1) {
24b23f37 1787 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1788 goto got_it;
1789 after_try:
1790 if (s >= end)
1791 goto phooey;
bbe252da 1792 if (prog->extflags & RXf_USE_INTUIT) {
6eb5f6b9
JH
1793 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1794 if (!s)
1795 goto phooey;
1796 }
1797 else
1798 s++;
1799 }
1800 } else {
1801 if (s > startpos)
1802 s--;
1803 while (s < end) {
1804 if (*s++ == '\n') { /* don't need PL_utf8skip here */
24b23f37 1805 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1806 goto got_it;
1807 }
1808 }
1809 }
1810 }
1811 goto phooey;
bbe252da 1812 } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
f9f4320a
YO
1813 {
1814 /* the warning about reginfo.ganch being used without intialization
bbe252da 1815 is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
f9f4320a 1816 and we only enter this block when the same bit is set. */
58e23c8d
YO
1817 char *tmp_s = reginfo.ganch - prog->gofs;
1818 if (regtry(&reginfo, &tmp_s))
6eb5f6b9
JH
1819 goto got_it;
1820 goto phooey;
1821 }
1822
1823 /* Messy cases: unanchored match. */
bbe252da 1824 if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
6eb5f6b9
JH
1825 /* we have /x+whatever/ */
1826 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1827 char ch;
bf93d4cc
GS
1828#ifdef DEBUGGING
1829 int did_match = 0;
1830#endif
33b8afdf
JH
1831 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1832 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1833 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1834
1aa99e6b 1835 if (do_utf8) {
4cadc6a9 1836 REXEC_FBC_SCAN(
6eb5f6b9 1837 if (*s == ch) {
a3621e74 1838 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1839 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1840 s += UTF8SKIP(s);
1841 while (s < strend && *s == ch)
1842 s += UTF8SKIP(s);
1843 }
4cadc6a9 1844 );
6eb5f6b9
JH
1845 }
1846 else {
4cadc6a9 1847 REXEC_FBC_SCAN(
6eb5f6b9 1848 if (*s == ch) {
a3621e74 1849 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1850 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1851 s++;
1852 while (s < strend && *s == ch)
1853 s++;
1854 }
4cadc6a9 1855 );
6eb5f6b9 1856 }
a3621e74 1857 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1858 PerlIO_printf(Perl_debug_log,
b7953727
JH
1859 "Did not find anchored character...\n")
1860 );
6eb5f6b9 1861 }
a0714e2c
SS
1862 else if (prog->anchored_substr != NULL
1863 || prog->anchored_utf8 != NULL
1864 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1865 && prog->float_max_offset < strend - s)) {
1866 SV *must;
1867 I32 back_max;
1868 I32 back_min;
1869 char *last;
6eb5f6b9 1870 char *last1; /* Last position checked before */
bf93d4cc
GS
1871#ifdef DEBUGGING
1872 int did_match = 0;
1873#endif
33b8afdf
JH
1874 if (prog->anchored_substr || prog->anchored_utf8) {
1875 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1876 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1877 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1878 back_max = back_min = prog->anchored_offset;
1879 } else {
1880 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1881 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1882 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1883 back_max = prog->float_max_offset;
1884 back_min = prog->float_min_offset;
1885 }
1de06328
YO
1886
1887
33b8afdf
JH
1888 if (must == &PL_sv_undef)
1889 /* could not downgrade utf8 check substring, so must fail */
1890 goto phooey;
1891
1de06328
YO
1892 if (back_min<0) {
1893 last = strend;
1894 } else {
1895 last = HOP3c(strend, /* Cannot start after this */
1896 -(I32)(CHR_SVLEN(must)
1897 - (SvTAIL(must) != 0) + back_min), strbeg);
1898 }
6eb5f6b9
JH
1899 if (s > PL_bostr)
1900 last1 = HOPc(s, -1);
1901 else
1902 last1 = s - 1; /* bogus */
1903
a0288114 1904 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1905 check_substr==must. */
1906 scream_pos = -1;
1907 dontbother = end_shift;
1908 strend = HOPc(strend, -dontbother);
1909 while ( (s <= last) &&
9041c2e3 1910 ((flags & REXEC_SCREAM)
1de06328 1911 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
6eb5f6b9 1912 end_shift, &scream_pos, 0))
1de06328 1913 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
9041c2e3 1914 (unsigned char*)strend, must,
7fba1cd6 1915 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1916 /* we may be pointing at the wrong string */
1917 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1918 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1919 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1920 if (HOPc(s, -back_max) > last1) {
1921 last1 = HOPc(s, -back_min);
1922 s = HOPc(s, -back_max);
1923 }
1924 else {
52657f30 1925 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1926
1927 last1 = HOPc(s, -back_min);
52657f30 1928 s = t;
6eb5f6b9 1929 }
1aa99e6b 1930 if (do_utf8) {
6eb5f6b9 1931 while (s <= last1) {
24b23f37 1932 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1933 goto got_it;
1934 s += UTF8SKIP(s);
1935 }
1936 }
1937 else {
1938 while (s <= last1) {
24b23f37 1939 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1940 goto got_it;
1941 s++;
1942 }
1943 }
1944 }
ab3bbdeb
YO
1945 DEBUG_EXECUTE_r(if (!did_match) {
1946 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1947 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1948 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
33b8afdf 1949 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc 1950 ? "anchored" : "floating"),
ab3bbdeb
YO
1951 quoted, RE_SV_TAIL(must));
1952 });
6eb5f6b9
JH
1953 goto phooey;
1954 }
786e8c11 1955 else if ( (c = prog->regstclass) ) {
f14c76ed 1956 if (minlen) {
be8e71aa 1957 const OPCODE op = OP(prog->regstclass);
66e933ab 1958 /* don't bother with what can't match */
786e8c11 1959 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
f14c76ed
RGS
1960 strend = HOPc(strend, -(minlen - 1));
1961 }
a3621e74 1962 DEBUG_EXECUTE_r({
be8e71aa 1963 SV * const prop = sv_newmortal();
32fc9b6a 1964 regprop(prog, prop, c);
0df25f3d 1965 {
ab3bbdeb
YO
1966 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1967 s,strend-s,60);
0df25f3d 1968 PerlIO_printf(Perl_debug_log,
ab3bbdeb 1969 "Matching stclass %.*s against %s (%d chars)\n",
e4f74956 1970 (int)SvCUR(prop), SvPVX_const(prop),
ab3bbdeb 1971 quoted, (int)(strend - s));
0df25f3d 1972 }
ffc61ed2 1973 });
3b0527fe 1974 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1975 goto got_it;
07be1b83 1976 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
d6a28714
JH
1977 }
1978 else {
1979 dontbother = 0;
a0714e2c 1980 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1981 /* Trim the end. */
d6a28714 1982 char *last;
33b8afdf
JH
1983 SV* float_real;
1984
1985 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1986 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1987 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1988
1989 if (flags & REXEC_SCREAM) {
33b8afdf 1990 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1991 end_shift, &scream_pos, 1); /* last one */
1992 if (!last)
ffc61ed2 1993 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1994 /* we may be pointing at the wrong string */
1995 else if (RX_MATCH_COPIED(prog))
3f7c398e 1996 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1997 }
d6a28714
JH
1998 else {
1999 STRLEN len;
cfd0369c 2000 const char * const little = SvPV_const(float_real, len);
d6a28714 2001
33b8afdf 2002 if (SvTAIL(float_real)) {
d6a28714
JH
2003 if (memEQ(strend - len + 1, little, len - 1))
2004 last = strend - len + 1;
7fba1cd6 2005 else if (!multiline)
9041c2e3 2006 last = memEQ(strend - len, little, len)
bd61b366 2007 ? strend - len : NULL;
b8c5462f 2008 else
d6a28714
JH
2009 goto find_last;
2010 } else {
2011 find_last:
9041c2e3 2012 if (len)
d6a28714 2013 last = rninstr(s, strend, little, little + len);
b8c5462f 2014 else
a0288114 2015 last = strend; /* matching "$" */
b8c5462f 2016 }
b8c5462f 2017 }
bf93d4cc 2018 if (last == NULL) {
6bda09f9
YO
2019 DEBUG_EXECUTE_r(
2020 PerlIO_printf(Perl_debug_log,
2021 "%sCan't trim the tail, match fails (should not happen)%s\n",
2022 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2023 goto phooey; /* Should not happen! */
2024 }
d6a28714
JH
2025 dontbother = strend - last + prog->float_min_offset;
2026 }
2027 if (minlen && (dontbother < minlen))
2028 dontbother = minlen - 1;
2029 strend -= dontbother; /* this one's always in bytes! */
2030 /* We don't know much -- general case. */
1aa99e6b 2031 if (do_utf8) {
d6a28714 2032 for (;;) {
24b23f37 2033 if (regtry(&reginfo, &s))
d6a28714
JH
2034 goto got_it;
2035 if (s >= strend)
2036 break;
b8c5462f 2037 s += UTF8SKIP(s);
d6a28714
JH
2038 };
2039 }
2040 else {
2041 do {
24b23f37 2042 if (regtry(&reginfo, &s))
d6a28714
JH
2043 goto got_it;
2044 } while (s++ < strend);
2045 }
2046 }
2047
2048 /* Failure. */
2049 goto phooey;
2050
2051got_it:
2052 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2053
2054 if (PL_reg_eval_set) {
2055 /* Preserve the current value of $^R */
2056 if (oreplsv != GvSV(PL_replgv))
2057 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2058 restored, the value remains
2059 the same. */
4f639d21 2060 restore_pos(aTHX_ prog);
d6a28714 2061 }
81714fb9
YO
2062 if (prog->paren_names)
2063 (void)hv_iterinit(prog->paren_names);
d6a28714
JH
2064
2065 /* make sure $`, $&, $', and $digit will work later */
2066 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2067 RX_MATCH_COPY_FREE(prog);
d6a28714 2068 if (flags & REXEC_COPY_STR) {
be8e71aa 2069 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2070#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2071 if ((SvIsCOW(sv)
2072 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2073 if (DEBUG_C_TEST) {
2074 PerlIO_printf(Perl_debug_log,
2075 "Copy on write: regexp capture, type %d\n",
2076 (int) SvTYPE(sv));
2077 }
2078 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2079 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2080 assert (SvPOKp(prog->saved_copy));
2081 } else
2082#endif
2083 {
2084 RX_MATCH_COPIED_on(prog);
2085 s = savepvn(strbeg, i);
2086 prog->subbeg = s;
2087 }
d6a28714 2088 prog->sublen = i;
d6a28714
JH
2089 }
2090 else {
2091 prog->subbeg = strbeg;
2092 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2093 }
2094 }
9041c2e3 2095
d6a28714
JH
2096 return 1;
2097
2098phooey:
a3621e74 2099 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2100 PL_colors[4], PL_colors[5]));
d6a28714 2101 if (PL_reg_eval_set)
4f639d21 2102 restore_pos(aTHX_ prog);
c74340f9
YO
2103 if (prog->swap) {
2104 /* we failed :-( roll it back */
2105 I32 *t;
2106 t = prog->swap->startp;
2107 prog->swap->startp = prog->startp;
2108 prog->startp = t;
2109 t = prog->swap->endp;
2110 prog->swap->endp = prog->endp;
2111 prog->endp = t;
2112 }
d6a28714
JH
2113 return 0;
2114}
2115
6bda09f9 2116
d6a28714
JH
2117/*
2118 - regtry - try match at specific point
2119 */
2120STATIC I32 /* 0 failure, 1 success */
24b23f37 2121S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
d6a28714 2122{
97aff369 2123 dVAR;
d6a28714
JH
2124 register I32 *sp;
2125 register I32 *ep;
2126 CHECKPOINT lastcp;
3b0527fe 2127 regexp *prog = reginfo->prog;
a3621e74 2128 GET_RE_DEBUG_FLAGS_DECL;
24b23f37 2129 reginfo->cutpoint=NULL;
d6a28714 2130
bbe252da 2131 if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
d6a28714
JH
2132 MAGIC *mg;
2133
2134 PL_reg_eval_set = RS_init;
a3621e74 2135 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2136 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2137 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2138 ));
ea8d6ae1 2139 SAVESTACK_CXPOS();
d6a28714
JH
2140 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2141 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2142 SAVETMPS;
2143 /* Apparently this is not needed, judging by wantarray. */
e8347627 2144 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2145 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2146
3b0527fe 2147 if (reginfo->sv) {
d6a28714 2148 /* Make $_ available to executed code. */
3b0527fe 2149 if (reginfo->sv != DEFSV) {
59f00321 2150 SAVE_DEFSV;
3b0527fe 2151 DEFSV = reginfo->sv;
b8c5462f 2152 }
d6a28714 2153
3b0527fe
DM
2154 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2155 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2156 /* prepare for quick setting of pos */
d300d9fa 2157#ifdef PERL_OLD_COPY_ON_WRITE
51a9ea20
NC
2158 if (SvIsCOW(reginfo->sv))
2159 sv_force_normal_flags(reginfo->sv, 0);
d300d9fa 2160#endif
3dab1dad 2161 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
d300d9fa 2162 &PL_vtbl_mglob, NULL, 0);
d6a28714 2163 mg->mg_len = -1;
b8c5462f 2164 }
d6a28714
JH
2165 PL_reg_magic = mg;
2166 PL_reg_oldpos = mg->mg_len;
4f639d21 2167 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2168 }
09687e5a 2169 if (!PL_reg_curpm) {
a02a5408 2170 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2171#ifdef USE_ITHREADS
2172 {
be8e71aa 2173 SV* const repointer = newSViv(0);
577e12cc 2174 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2175 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2176 av_push(PL_regex_padav,repointer);
2177 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2178 PL_regex_pad = AvARRAY(PL_regex_padav);
2179 }
2180#endif
2181 }
aaa362c4 2182 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2183 PL_reg_oldcurpm = PL_curpm;
2184 PL_curpm = PL_reg_curpm;
2185 if (RX_MATCH_COPIED(prog)) {
2186 /* Here is a serious problem: we cannot rewrite subbeg,
2187 since it may be needed if this match fails. Thus
2188 $` inside (?{}) could fail... */
2189 PL_reg_oldsaved = prog->subbeg;
2190 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2191#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2192 PL_nrs = prog->saved_copy;
2193#endif
d6a28714
JH
2194 RX_MATCH_COPIED_off(prog);
2195 }
2196 else
bd61b366 2197 PL_reg_oldsaved = NULL;
d6a28714
JH
2198 prog->subbeg = PL_bostr;
2199 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2200 }
24b23f37
YO
2201 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2202 prog->startp[0] = *startpos - PL_bostr;
2203 PL_reginput = *startpos;
d6a28714 2204 PL_reglastparen = &prog->lastparen;
a01268b5 2205 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2206 prog->lastparen = 0;
03994de8 2207 prog->lastcloseparen = 0;
d6a28714 2208 PL_regsize = 0;
6bda09f9
YO
2209 PL_regstartp = prog->startp;
2210 PL_regendp = prog->endp;
d6a28714
JH
2211 if (PL_reg_start_tmpl <= prog->nparens) {
2212 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2213 if(PL_reg_start_tmp)
2214 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2215 else
a02a5408 2216 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2217 }
2218
2219 /* XXXX What this code is doing here?!!! There should be no need
2220 to do this again and again, PL_reglastparen should take care of
3dd2943c 2221 this! --ilya*/
dafc8851
JH
2222
2223 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2224 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2225 * PL_reglastparen), is not needed at all by the test suite
2226 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2227 * enough, for building DynaLoader, or otherwise this
2228 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2229 * will happen. Meanwhile, this code *is* needed for the
2230 * above-mentioned test suite tests to succeed. The common theme
2231 * on those tests seems to be returning null fields from matches.
2232 * --jhi */
dafc8851 2233#if 1
c74340f9
YO
2234 sp = PL_regstartp;
2235 ep = PL_regendp;
d6a28714 2236 if (prog->nparens) {
097eb12c 2237 register I32 i;
eb160463 2238 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2239 *++sp = -1;
2240 *++ep = -1;
2241 }
2242 }
dafc8851 2243#endif
02db2b7b 2244 REGCP_SET(lastcp);
3b0527fe 2245 if (regmatch(reginfo, prog->program + 1)) {
c74340f9 2246 PL_regendp[0] = PL_reginput - PL_bostr;
d6a28714
JH
2247 return 1;
2248 }
24b23f37
YO
2249 if (reginfo->cutpoint)
2250 *startpos= reginfo->cutpoint;
02db2b7b 2251 REGCP_UNWIND(lastcp);
d6a28714
JH
2252 return 0;
2253}
2254
02db2b7b 2255
8ba1375e
MJD
2256#define sayYES goto yes
2257#define sayNO goto no
262b90c4 2258#define sayNO_SILENT goto no_silent
8ba1375e 2259
f9f4320a
YO
2260/* we dont use STMT_START/END here because it leads to
2261 "unreachable code" warnings, which are bogus, but distracting. */
2262#define CACHEsayNO \
c476f425
DM
2263 if (ST.cache_mask) \
2264 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
f9f4320a 2265 sayNO
3298f257 2266
a3621e74 2267/* this is used to determine how far from the left messages like
265c4333
YO
2268 'failed...' are printed. It should be set such that messages
2269 are inline with the regop output that created them.
a3621e74 2270*/
265c4333 2271#define REPORT_CODE_OFF 32
a3621e74
YO
2272
2273
2274/* Make sure there is a test for this +1 options in re_tests */
2275#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2276
40a82448
DM
2277#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2278#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
9e137952 2279
86545054
DM
2280#define SLAB_FIRST(s) (&(s)->states[0])
2281#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2282
5d9a96ca
DM
2283/* grab a new slab and return the first slot in it */
2284
2285STATIC regmatch_state *
2286S_push_slab(pTHX)
2287{
54df2634
NC
2288#if PERL_VERSION < 9
2289 dMY_CXT;
2290#endif
5d9a96ca
DM
2291 regmatch_slab *s = PL_regmatch_slab->next;
2292 if (!s) {
2293 Newx(s, 1, regmatch_slab);
2294 s->prev = PL_regmatch_slab;
2295 s->next = NULL;
2296 PL_regmatch_slab->next = s;
2297 }
2298 PL_regmatch_slab = s;
86545054 2299 return SLAB_FIRST(s);
5d9a96ca 2300}
5b47454d 2301
95b24440 2302
40a82448
DM
2303/* push a new state then goto it */
2304
2305#define PUSH_STATE_GOTO(state, node) \
2306 scan = node; \
2307 st->resume_state = state; \
2308 goto push_state;
2309
2310/* push a new state with success backtracking, then goto it */
2311
2312#define PUSH_YES_STATE_GOTO(state, node) \
2313 scan = node; \
2314 st->resume_state = state; \
2315 goto push_yes_state;
2316
aa283a38 2317
aa283a38 2318
d6a28714 2319/*
95b24440 2320
bf1f174e
DM
2321regmatch() - main matching routine
2322
2323This is basically one big switch statement in a loop. We execute an op,
2324set 'next' to point the next op, and continue. If we come to a point which
2325we may need to backtrack to on failure such as (A|B|C), we push a
2326backtrack state onto the backtrack stack. On failure, we pop the top
2327state, and re-enter the loop at the state indicated. If there are no more
2328states to pop, we return failure.
2329
2330Sometimes we also need to backtrack on success; for example /A+/, where
2331after successfully matching one A, we need to go back and try to
2332match another one; similarly for lookahead assertions: if the assertion
2333completes successfully, we backtrack to the state just before the assertion
2334and then carry on. In these cases, the pushed state is marked as
2335'backtrack on success too'. This marking is in fact done by a chain of
2336pointers, each pointing to the previous 'yes' state. On success, we pop to
2337the nearest yes state, discarding any intermediate failure-only states.
2338Sometimes a yes state is pushed just to force some cleanup code to be
2339called at the end of a successful match or submatch; e.g. (??{$re}) uses
2340it to free the inner regex.
2341
2342Note that failure backtracking rewinds the cursor position, while
2343success backtracking leaves it alone.
2344
2345A pattern is complete when the END op is executed, while a subpattern
2346such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2347ops trigger the "pop to last yes state if any, otherwise return true"
2348behaviour.
2349
2350A common convention in this function is to use A and B to refer to the two
2351subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2352the subpattern to be matched possibly multiple times, while B is the entire
2353rest of the pattern. Variable and state names reflect this convention.
2354
2355The states in the main switch are the union of ops and failure/success of
2356substates associated with with that op. For example, IFMATCH is the op
2357that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2358'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2359successfully matched A and IFMATCH_A_fail is a state saying that we have
2360just failed to match A. Resume states always come in pairs. The backtrack
2361state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2362at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2363on success or failure.
2364
2365The struct that holds a backtracking state is actually a big union, with
2366one variant for each major type of op. The variable st points to the
2367top-most backtrack struct. To make the code clearer, within each
2368block of code we #define ST to alias the relevant union.
2369
2370Here's a concrete example of a (vastly oversimplified) IFMATCH
2371implementation:
2372
2373 switch (state) {
2374 ....
2375
2376#define ST st->u.ifmatch
2377
2378 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2379 ST.foo = ...; // some state we wish to save
95b24440 2380 ...
bf1f174e
DM
2381 // push a yes backtrack state with a resume value of
2382 // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2383 // first node of A:
2384 PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2385 // NOTREACHED
2386
2387 case IFMATCH_A: // we have successfully executed A; now continue with B
2388 next = B;
2389 bar = ST.foo; // do something with the preserved value
2390 break;
2391
2392 case IFMATCH_A_fail: // A failed, so the assertion failed
2393 ...; // do some housekeeping, then ...
2394 sayNO; // propagate the failure
2395
2396#undef ST
95b24440 2397
bf1f174e
DM
2398 ...
2399 }
95b24440 2400
bf1f174e
DM
2401For any old-timers reading this who are familiar with the old recursive
2402approach, the code above is equivalent to:
95b24440 2403
bf1f174e
DM
2404 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2405 {
2406 int foo = ...
95b24440 2407 ...
bf1f174e
DM
2408 if (regmatch(A)) {
2409 next = B;
2410 bar = foo;
2411 break;
95b24440 2412 }
bf1f174e
DM
2413 ...; // do some housekeeping, then ...
2414 sayNO; // propagate the failure
95b24440 2415 }
bf1f174e
DM
2416
2417The topmost backtrack state, pointed to by st, is usually free. If you
2418want to claim it, populate any ST.foo fields in it with values you wish to
2419save, then do one of
2420
2421 PUSH_STATE_GOTO(resume_state, node);
2422 PUSH_YES_STATE_GOTO(resume_state, node);
2423
2424which sets that backtrack state's resume value to 'resume_state', pushes a
2425new free entry to the top of the backtrack stack, then goes to 'node'.
2426On backtracking, the free slot is popped, and the saved state becomes the
2427new free state. An ST.foo field in this new top state can be temporarily
2428accessed to retrieve values, but once the main loop is re-entered, it
2429becomes available for reuse.
2430
2431Note that the depth of the backtrack stack constantly increases during the
2432left-to-right execution of the pattern, rather than going up and down with
2433the pattern nesting. For example the stack is at its maximum at Z at the
2434end of the pattern, rather than at X in the following:
2435
2436 /(((X)+)+)+....(Y)+....Z/
2437
2438The only exceptions to this are lookahead/behind assertions and the cut,
2439(?>A), which pop all the backtrack states associated with A before
2440continuing.
2441
2442Bascktrack state structs are allocated in slabs of about 4K in size.
2443PL_regmatch_state and st always point to the currently active state,
2444and PL_regmatch_slab points to the slab currently containing
2445PL_regmatch_state. The first time regmatch() is called, the first slab is
2446allocated, and is never freed until interpreter destruction. When the slab
2447is full, a new one is allocated and chained to the end. At exit from
2448regmatch(), slabs allocated since entry are freed.
2449
2450*/
95b24440 2451
40a82448 2452
5bc10b2c 2453#define DEBUG_STATE_pp(pp) \
265c4333 2454 DEBUG_STATE_r({ \
5bc10b2c
DM
2455 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2456 PerlIO_printf(Perl_debug_log, \
5d458dd8 2457 " %*s"pp" %s%s%s%s%s\n", \
5bc10b2c 2458 depth*2, "", \
5d458dd8
YO
2459 reg_name[st->resume_state], \
2460 ((st==yes_state||st==mark_state) ? "[" : ""), \
2461 ((st==yes_state) ? "Y" : ""), \
2462 ((st==mark_state) ? "M" : ""), \
2463 ((st==yes_state||st==mark_state) ? "]" : "") \
2464 ); \
265c4333 2465 });
5bc10b2c 2466
40a82448 2467
3dab1dad 2468#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
95b24440 2469
3df15adc 2470#ifdef DEBUGGING
5bc10b2c 2471
ab3bbdeb
YO
2472STATIC void
2473S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2474 const char *start, const char *end, const char *blurb)
2475{
bbe252da 2476 const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
ab3bbdeb
YO
2477 if (!PL_colorset)
2478 reginitcolors();
2479 {
2480 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2481 prog->precomp, prog->prelen, 60);
2482
2483 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2484 start, end - start, 60);
2485
2486 PerlIO_printf(Perl_debug_log,
2487 "%s%s REx%s %s against %s\n",
2488 PL_colors[4], blurb, PL_colors[5], s0, s1);
2489
2490 if (do_utf8||utf8_pat)
1de06328
YO
2491 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2492 utf8_pat ? "pattern" : "",
2493 utf8_pat && do_utf8 ? " and " : "",
2494 do_utf8 ? "string" : ""
ab3bbdeb
YO
2495 );
2496 }
2497}
3df15adc
YO
2498
2499STATIC void
786e8c11
YO
2500S_dump_exec_pos(pTHX_ const char *locinput,
2501 const regnode *scan,
2502 const char *loc_regeol,
2503 const char *loc_bostr,
2504 const char *loc_reg_starttry,
2505 const bool do_utf8)
07be1b83 2506{
786e8c11 2507 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
07be1b83 2508 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
786e8c11 2509 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
07be1b83
YO
2510 /* The part of the string before starttry has one color
2511 (pref0_len chars), between starttry and current
2512 position another one (pref_len - pref0_len chars),
2513 after the current position the third one.
2514 We assume that pref0_len <= pref_len, otherwise we
2515 decrease pref0_len. */
786e8c11
YO
2516 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2517 ? (5 + taill) - l : locinput - loc_bostr;
07be1b83
YO
2518 int pref0_len;
2519
2520 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2521 pref_len++;
786e8c11
YO
2522 pref0_len = pref_len - (locinput - loc_reg_starttry);
2523 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2524 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2525 ? (5 + taill) - pref_len : loc_regeol - locinput);
07be1b83
YO
2526 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2527 l--;
2528 if (pref0_len < 0)
2529 pref0_len = 0;
2530 if (pref0_len > pref_len)
2531 pref0_len = pref_len;
2532 {
3df15adc 2533 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
0df25f3d 2534
ab3bbdeb 2535 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
1de06328 2536 (locinput - pref_len),pref0_len, 60, 4, 5);
0df25f3d 2537
ab3bbdeb 2538 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
3df15adc 2539 (locinput - pref_len + pref0_len),
1de06328 2540 pref_len - pref0_len, 60, 2, 3);
0df25f3d 2541
ab3bbdeb 2542 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
1de06328 2543 locinput, loc_regeol - locinput, 10, 0, 1);
0df25f3d 2544
1de06328 2545 const STRLEN tlen=len0+len1+len2;
3df15adc 2546 PerlIO_printf(Perl_debug_log,
ab3bbdeb 2547 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
786e8c11 2548 (IV)(locinput - loc_bostr),
07be1b83 2549 len0, s0,
07be1b83 2550 len1, s1,
07be1b83 2551 (docolor ? "" : "> <"),
07be1b83 2552 len2, s2,
f9f4320a 2553 (int)(tlen > 19 ? 0 : 19 - tlen),
07be1b83
YO
2554 "");
2555 }
2556}
3df15adc 2557
07be1b83
YO
2558#endif
2559
0a4db386
YO
2560/* reg_check_named_buff_matched()
2561 * Checks to see if a named buffer has matched. The data array of
2562 * buffer numbers corresponding to the buffer is expected to reside
2563 * in the regexp->data->data array in the slot stored in the ARG() of
2564 * node involved. Note that this routine doesn't actually care about the
2565 * name, that information is not preserved from compilation to execution.
2566 * Returns the index of the leftmost defined buffer with the given name
2567 * or 0 if non of the buffers matched.
2568 */
2569STATIC I32
2570S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2571 I32 n;
2572 SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ];
2573 I32 *nums=(I32*)SvPVX(sv_dat);
2574 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2575 if ((I32)*PL_reglastparen >= nums[n] &&
2576 PL_regendp[nums[n]] != -1)
2577 {
2578 return nums[n];
2579 }
2580 }
2581 return 0;
2582}
2583
d6a28714 2584STATIC I32 /* 0 failure, 1 success */
24b23f37 2585S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
d6a28714 2586{
54df2634
NC
2587#if PERL_VERSION < 9
2588 dMY_CXT;
2589#endif
27da23d5 2590 dVAR;
95b24440 2591 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2592 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2593
3b0527fe
DM
2594 regexp *rex = reginfo->prog;
2595
5d9a96ca
DM
2596 regmatch_slab *orig_slab;
2597 regmatch_state *orig_state;
a3621e74 2598
5d9a96ca
DM
2599 /* the current state. This is a cached copy of PL_regmatch_state */
2600 register regmatch_state *st;
95b24440 2601
5d9a96ca
DM
2602 /* cache heavy used fields of st in registers */
2603 register regnode *scan;
2604 register regnode *next;
438e9bae 2605 register U32 n = 0; /* general value; init to avoid compiler warning */
24d3c4a9 2606 register I32 ln = 0; /* len or last; init to avoid compiler warning */
5d9a96ca 2607 register char *locinput = PL_reginput;
5d9a96ca 2608 register I32 nextchr; /* is always set to UCHARAT(locinput) */
24d3c4a9 2609
b69b0499 2610 bool result = 0; /* return value of S_regmatch */
24d3c4a9 2611 int depth = 0; /* depth of backtrack stack */
1a147d38 2612 int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
77cb431f
DM
2613 regmatch_state *yes_state = NULL; /* state to pop to on success of
2614 subpattern */
e2e6a0f1
YO
2615 /* mark_state piggy backs on the yes_state logic so that when we unwind
2616 the stack on success we can update the mark_state as we go */
2617 regmatch_state *mark_state = NULL; /* last mark state we have seen */
5d458dd8 2618
faec1544 2619 regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
b8591aee 2620 struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
40a82448 2621 U32 state_num;
5d458dd8
YO
2622 bool no_final = 0; /* prevent failure from backtracking? */
2623 bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
e2e6a0f1 2624 char *startpoint = PL_reginput;
5d458dd8
YO
2625 SV *popmark = NULL; /* are we looking for a mark? */
2626 SV *sv_commit = NULL; /* last mark name seen in failure */
2627 SV *sv_yes_mark = NULL; /* last mark name we have seen
2628 during a successfull match */
2629 U32 lastopen = 0; /* last open we saw */
2630 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
58e23c8d 2631
5d458dd8 2632
24d3c4a9
DM
2633 /* these three flags are set by various ops to signal information to
2634 * the very next op. They have a useful lifetime of exactly one loop
2635 * iteration, and are not preserved or restored by state pushes/pops
2636 */
2637 bool sw = 0; /* the condition value in (?(cond)a|b) */
2638 bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
2639 int logical = 0; /* the following EVAL is:
2640 0: (?{...})
2641 1: (?(?{...})X|Y)
2642 2: (??{...})
2643 or the following IFMATCH/UNLESSM is:
2644 false: plain (?=foo)
2645 true: used as a condition: (?(?=foo))
2646 */
2647
95b24440 2648#ifdef DEBUGGING
e68ec53f 2649 GET_RE_DEBUG_FLAGS_DECL;
d6a28714
JH
2650#endif
2651
58e23c8d 2652 DEBUG_OPTIMISE_r( {
24b23f37
YO
2653 PerlIO_printf(Perl_debug_log,"regmatch start\n");
2654 });
5d9a96ca
DM
2655 /* on first ever call to regmatch, allocate first slab */
2656 if (!PL_regmatch_slab) {
2657 Newx(PL_regmatch_slab, 1, regmatch_slab);
2658 PL_regmatch_slab->prev = NULL;
2659 PL_regmatch_slab->next = NULL;
86545054 2660 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2661 }
2662
2663 /* remember current high-water mark for exit */
2664 /* XXX this should be done with SAVE* instead */
2665 orig_slab = PL_regmatch_slab;
2666 orig_state = PL_regmatch_state;
2667
2668 /* grab next free state slot */
2669 st = ++PL_regmatch_state;
86545054 2670 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2671 st = PL_regmatch_state = S_push_slab(aTHX);
2672
d6a28714
JH
2673 /* Note that nextchr is a byte even in UTF */
2674 nextchr = UCHARAT(locinput);
2675 scan = prog;
2676 while (scan != NULL) {
8ba1375e 2677
a3621e74 2678 DEBUG_EXECUTE_r( {
6136c704 2679 SV * const prop = sv_newmortal();
1de06328 2680 regnode *rnext=regnext(scan);
786e8c11 2681 DUMP_EXEC_POS( locinput, scan, do_utf8 );
32fc9b6a 2682 regprop(rex, prop, scan);
07be1b83
YO
2683
2684 PerlIO_printf(Perl_debug_log,
2685 "%3"IVdf":%*s%s(%"IVdf")\n",
5bc10b2c 2686 (IV)(scan - rex->program), depth*2, "",
07be1b83 2687 SvPVX_const(prop),
1de06328
YO
2688 (PL_regkind[OP(scan)] == END || !rnext) ?
2689 0 : (IV)(rnext - rex->program));
2a782b5b 2690 });
d6a28714
JH
2691
2692 next = scan + NEXT_OFF(scan);
2693 if (next == scan)
2694 next = NULL;
40a82448 2695 state_num = OP(scan);
d6a28714 2696
40a82448
DM
2697 reenter_switch:
2698 switch (state_num) {
d6a28714 2699 case BOL:
7fba1cd6 2700 if (locinput == PL_bostr)
d6a28714 2701 {
3b0527fe 2702 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2703 break;
2704 }
d6a28714
JH
2705 sayNO;
2706 case MBOL:
12d33761
HS
2707 if (locinput == PL_bostr ||
2708 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2709 {
b8c5462f
JH
2710 break;
2711 }
d6a28714
JH
2712 sayNO;
2713 case SBOL:
c2a73568 2714 if (locinput == PL_bostr)
b8c5462f 2715 break;
d6a28714
JH
2716 sayNO;
2717 case GPOS:
3b0527fe 2718 if (locinput == reginfo->ganch)
d6a28714
JH
2719 break;
2720 sayNO;
2721 case EOL:
d6a28714
JH
2722 goto seol;
2723 case MEOL:
d6a28714 2724 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2725 sayNO;
b8c5462f 2726 break;
d6a28714
JH
2727 case SEOL:
2728 seol:
2729 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2730 sayNO;
d6a28714 2731 if (PL_regeol - locinput > 1)
b8c5462f 2732 sayNO;
b8c5462f 2733 break;
d6a28714
JH
2734 case EOS:
2735 if (PL_regeol != locinput)
b8c5462f 2736 sayNO;
d6a28714 2737 break;
ffc61ed2 2738 case SANY:
d6a28714 2739 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2740 sayNO;
f33976b4
DB
2741 if (do_utf8) {
2742 locinput += PL_utf8skip[nextchr];
2743 if (locinput > PL_regeol)
2744 sayNO;
2745 nextchr = UCHARAT(locinput);
2746 }
2747 else
2748 nextchr = UCHARAT(++locinput);
2749 break;
2750 case CANY:
2751 if (!nextchr && locinput >= PL_regeol)
2752 sayNO;
b8c5462f 2753 nextchr = UCHARAT(++locinput);
a0d0e21e 2754 break;
ffc61ed2 2755 case REG_ANY:
1aa99e6b
IH
2756 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2757 sayNO;
2758 if (do_utf8) {
b8c5462f 2759 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2760 if (locinput > PL_regeol)
2761 sayNO;
a0ed51b3 2762 nextchr = UCHARAT(locinput);
a0ed51b3 2763 }
1aa99e6b
IH
2764 else
2765 nextchr = UCHARAT(++locinput);
a0ed51b3 2766 break;
166ba7cd
DM
2767
2768#undef ST
2769#define ST st->u.trie
786e8c11
YO
2770 case TRIEC:
2771 /* In this case the charclass data is available inline so
2772 we can fail fast without a lot of extra overhead.
2773 */
2774 if (scan->flags == EXACT || !do_utf8) {
2775 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2776 DEBUG_EXECUTE_r(
2777 PerlIO_printf(Perl_debug_log,
2778 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2779 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
786e8c11
YO
2780 );
2781 sayNO_SILENT;
2782 /* NOTREACHED */
2783 }
2784 }
2785 /* FALL THROUGH */
5b47454d 2786 case TRIE:
3dab1dad 2787 {
07be1b83 2788 /* what type of TRIE am I? (utf8 makes this contextual) */
3dab1dad
YO
2789 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2790 trie_type = do_utf8 ?
2791 (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2792 : trie_plain;
2793
2794 /* what trie are we using right now */
be8e71aa 2795 reg_trie_data * const trie
3dab1dad
YO
2796 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
2797 U32 state = trie->startstate;
166ba7cd 2798
3dab1dad
YO
2799 if (trie->bitmap && trie_type != trie_utf8_fold &&
2800 !TRIE_BITMAP_TEST(trie,*locinput)
2801 ) {
2802 if (trie->states[ state ].wordnum) {
2803 DEBUG_EXECUTE_r(
2804 PerlIO_printf(Perl_debug_log,
2805 "%*s %smatched empty string...%s\n",
5bc10b2c 2806 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2807 );
2808 break;
2809 } else {
2810 DEBUG_EXECUTE_r(
2811 PerlIO_printf(Perl_debug_log,
786e8c11 2812 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2813 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2814 );
2815 sayNO_SILENT;
2816 }
2817 }
166ba7cd 2818
786e8c11
YO
2819 {
2820 U8 *uc = ( U8* )locinput;
2821
2822 STRLEN len = 0;
2823 STRLEN foldlen = 0;
2824 U8 *uscan = (U8*)NULL;
2825 STRLEN bufflen=0;
2826 SV *sv_accept_buff = NULL;
2827 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2828
2829 ST.accepted = 0; /* how many accepting states we have seen */
2830 ST.B = next;
2831 ST.jump = trie->jump;
786e8c11 2832 ST.me = scan;
786e8c11 2833
07be1b83
YO
2834 /*
2835 traverse the TRIE keeping track of all accepting states
2836 we transition through until we get to a failing node.
2837 */
2838
a3621e74 2839 while ( state && uc <= (U8*)PL_regeol ) {
786e8c11 2840 U32 base = trie->states[ state ].trans.base;
f9f4320a 2841 UV uvc = 0;
786e8c11
YO
2842 U16 charid;
2843 /* We use charid to hold the wordnum as we don't use it
2844 for charid until after we have done the wordnum logic.
2845 We define an alias just so that the wordnum logic reads
2846 more naturally. */
2847
2848#define got_wordnum charid
2849 got_wordnum = trie->states[ state ].wordnum;
2850
2851 if ( got_wordnum ) {
2852 if ( ! ST.accepted ) {
5b47454d
DM
2853 ENTER;
2854 SAVETMPS;
2855 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2856 sv_accept_buff=newSV(bufflen *
2857 sizeof(reg_trie_accepted) - 1);
786e8c11 2858 SvCUR_set(sv_accept_buff, 0);
5b47454d
DM
2859 SvPOK_on(sv_accept_buff);
2860 sv_2mortal(sv_accept_buff);
166ba7cd
DM
2861 SAVETMPS;
2862 ST.accept_buff =
5b47454d
DM
2863 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2864 }
786e8c11 2865 do {
166ba7cd 2866 if (ST.accepted >= bufflen) {
5b47454d 2867 bufflen *= 2;
166ba7cd 2868 ST.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2869 SvGROW(sv_accept_buff,
2870 bufflen * sizeof(reg_trie_accepted));
2871 }
2872 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2873 + sizeof(reg_trie_accepted));
a3621e74 2874
786e8c11
YO
2875
2876 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2877 ST.accept_buff[ST.accepted].endpos = uc;
2878 ++ST.accepted;
2879 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2880 }
2881#undef got_wordnum
a3621e74 2882
07be1b83 2883 DEBUG_TRIE_EXECUTE_r({
786e8c11 2884 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
a3621e74 2885 PerlIO_printf( Perl_debug_log,
786e8c11 2886 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
5bc10b2c 2887 2+depth * 2, "", PL_colors[4],
786e8c11 2888 (UV)state, (UV)ST.accepted );
07be1b83 2889 });
a3621e74
YO
2890
2891 if ( base ) {
4cadc6a9
YO
2892 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
2893 uvc, charid, foldlen, foldbuf, uniflags);
a3621e74 2894
5b47454d
DM
2895 if (charid &&
2896 (base + charid > trie->uniquecharcount )
2897 && (base + charid - 1 - trie->uniquecharcount
2898 < trie->lasttrans)
2899 && trie->trans[base + charid - 1 -
2900 trie->uniquecharcount].check == state)
2901 {
2902 state = trie->trans[base + charid - 1 -
2903 trie->uniquecharcount ].next;
2904 }
2905 else {
2906 state = 0;
2907 }
2908 uc += len;
2909
2910 }
2911 else {
a3621e74
YO
2912 state = 0;
2913 }
2914 DEBUG_TRIE_EXECUTE_r(
e4584336 2915 PerlIO_printf( Perl_debug_log,
786e8c11 2916 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
e4584336 2917 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2918 );
2919 }
166ba7cd 2920 if (!ST.accepted )
a3621e74 2921 sayNO;
a3621e74 2922
166ba7cd
DM
2923 DEBUG_EXECUTE_r(
2924 PerlIO_printf( Perl_debug_log,
2925 "%*s %sgot %"IVdf" possible matches%s\n",
5bc10b2c 2926 REPORT_CODE_OFF + depth * 2, "",
166ba7cd
DM
2927 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2928 );
786e8c11 2929 }}
166ba7cd
DM
2930
2931 /* FALL THROUGH */
166ba7cd 2932 case TRIE_next_fail: /* we failed - try next alterative */
5d458dd8
YO
2933 if (do_cutgroup) {
2934 do_cutgroup = 0;
2935 no_final = 0;
2936 }
166ba7cd
DM
2937 if ( ST.accepted == 1 ) {
2938 /* only one choice left - just continue */
2939 DEBUG_EXECUTE_r({
2940 reg_trie_data * const trie
2941 = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
5c9f2f80
YO
2942 SV ** const tmp = av_fetch( trie->words,
2943 ST.accept_buff[ 0 ].wordnum-1, 0 );
de734bd5
A
2944 SV *sv= tmp ? sv_newmortal() : NULL;
2945
166ba7cd
DM
2946 PerlIO_printf( Perl_debug_log,
2947 "%*s %sonly one match left: #%d <%s>%s\n",
5bc10b2c 2948 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 2949 ST.accept_buff[ 0 ].wordnum,
de734bd5
A
2950 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
2951 PL_colors[0], PL_colors[1],
2952 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
2953 )
2954 : "not compiled under -Dr",
166ba7cd
DM
2955 PL_colors[5] );
2956 });
2957 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2958 /* in this case we free tmps/leave before we call regmatch
2959 as we wont be using accept_buff again. */
5d458dd8 2960
166ba7cd
DM
2961 locinput = PL_reginput;
2962 nextchr = UCHARAT(locinput);
5d458dd8
YO
2963 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
2964 scan = ST.B;
2965 else
2966 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
2967 if (!has_cutgroup) {
2968 FREETMPS;
2969 LEAVE;
2970 } else {
2971 ST.accepted--;
2972 PUSH_YES_STATE_GOTO(TRIE_next, scan);
2973 }
786e8c11 2974
166ba7cd
DM
2975 continue; /* execute rest of RE */
2976 }
2977
2978 if (!ST.accepted-- ) {
5d458dd8
YO
2979 DEBUG_EXECUTE_r({
2980 PerlIO_printf( Perl_debug_log,
2981 "%*s %sTRIE failed...%s\n",
2982 REPORT_CODE_OFF+depth*2, "",
2983 PL_colors[4],
2984 PL_colors[5] );
2985 });
166ba7cd
DM
2986 FREETMPS;
2987 LEAVE;
5d458dd8 2988 sayNO_SILENT;
166ba7cd
DM
2989 }
2990
a3621e74 2991 /*
166ba7cd
DM
2992 There are at least two accepting states left. Presumably
2993 the number of accepting states is going to be low,
2994 typically two. So we simply scan through to find the one
2995 with lowest wordnum. Once we find it, we swap the last
2996 state into its place and decrement the size. We then try to
2997 match the rest of the pattern at the point where the word
2998 ends. If we succeed, control just continues along the
2999 regex; if we fail we return here to try the next accepting
3000 state
3001 */
a3621e74 3002
166ba7cd
DM
3003 {
3004 U32 best = 0;
3005 U32 cur;
3006 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3007 DEBUG_TRIE_EXECUTE_r(
f2278c82 3008 PerlIO_printf( Perl_debug_log,
166ba7cd 3009 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
5bc10b2c 3010 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
166ba7cd
DM
3011 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3012 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3013 );
3014
3015 if (ST.accept_buff[cur].wordnum <
3016 ST.accept_buff[best].wordnum)
3017 best = cur;
a3621e74 3018 }
166ba7cd
DM
3019
3020 DEBUG_EXECUTE_r({
3021 reg_trie_data * const trie
3022 = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
5c9f2f80
YO
3023 SV ** const tmp = av_fetch( trie->words,
3024 ST.accept_buff[ best ].wordnum - 1, 0 );
7f69552c 3025 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
265c4333 3026 ST.B :
7f69552c 3027 ST.me + ST.jump[ST.accept_buff[best].wordnum];
de734bd5
A
3028 SV *sv= tmp ? sv_newmortal() : NULL;
3029
265c4333
YO
3030 PerlIO_printf( Perl_debug_log,
3031 "%*s %strying alternation #%d <%s> at node #%d %s\n",
5bc10b2c 3032 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 3033 ST.accept_buff[best].wordnum,
de734bd5
A
3034 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3035 PL_colors[0], PL_colors[1],
3036 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3037 ) : "not compiled under -Dr",
265c4333 3038 REG_NODE_NUM(nextop),
166ba7cd
DM
3039 PL_colors[5] );
3040 });
3041
3042 if ( best<ST.accepted ) {
3043 reg_trie_accepted tmp = ST.accept_buff[ best ];
3044 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3045 ST.accept_buff[ ST.accepted ] = tmp;
3046 best = ST.accepted;
a3621e74 3047 }
166ba7cd 3048 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
7f69552c 3049 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
5d458dd8 3050 scan = ST.B;
786e8c11
YO
3051 /* NOTREACHED */
3052 } else {
5d458dd8 3053 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
786e8c11
YO
3054 /* NOTREACHED */
3055 }
5d458dd8
YO
3056 if (has_cutgroup) {
3057 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3058 /* NOTREACHED */
3059 } else {
3060 PUSH_STATE_GOTO(TRIE_next, scan);
3061 /* NOTREACHED */
3062 }
786e8c11 3063 /* NOTREACHED */
166ba7cd 3064 }
166ba7cd 3065 /* NOTREACHED */
5d458dd8
YO
3066 case TRIE_next:
3067 FREETMPS;
3068 LEAVE;
3069 sayYES;
166ba7cd
DM
3070#undef ST
3071
95b24440
DM
3072 case EXACT: {
3073 char *s = STRING(scan);
24d3c4a9 3074 ln = STR_LEN(scan);
eb160463 3075 if (do_utf8 != UTF) {
bc517b45 3076 /* The target and the pattern have differing utf8ness. */
1aa99e6b 3077 char *l = locinput;
24d3c4a9 3078 const char * const e = s + ln;
a72c7584 3079
5ff6fc6d
JH
3080 if (do_utf8) {
3081 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 3082 while (s < e) {
a3b680e6 3083 STRLEN ulen;
1aa99e6b 3084 if (l >= PL_regeol)
5ff6fc6d
JH
3085 sayNO;
3086 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 3087 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 3088 uniflags))
5ff6fc6d 3089 sayNO;
bc517b45 3090 l += ulen;
5ff6fc6d 3091 s ++;
1aa99e6b 3092 }
5ff6fc6d
JH
3093 }
3094 else {
3095 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 3096 while (s < e) {
a3b680e6 3097 STRLEN ulen;
1aa99e6b
IH
3098 if (l >= PL_regeol)
3099 sayNO;
5ff6fc6d 3100 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 3101 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 3102 uniflags))
1aa99e6b 3103 sayNO;
bc517b45 3104 s += ulen;
a72c7584 3105 l ++;
1aa99e6b 3106 }
5ff6fc6d 3107 }
1aa99e6b
IH
3108 locinput = l;
3109 nextchr = UCHARAT(locinput);
3110 break;
3111 }
bc517b45 3112 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
3113 /* Inline the first character, for speed. */
3114 if (UCHARAT(s) != nextchr)
3115 sayNO;
24d3c4a9 3116 if (PL_regeol - locinput < ln)
d6a28714 3117 sayNO;
24d3c4a9 3118 if (ln > 1 && memNE(s, locinput, ln))
d6a28714 3119 sayNO;
24d3c4a9 3120 locinput += ln;
d6a28714
JH
3121 nextchr = UCHARAT(locinput);
3122 break;
95b24440 3123 }
d6a28714 3124 case EXACTFL:
b8c5462f
JH
3125 PL_reg_flags |= RF_tainted;
3126 /* FALL THROUGH */
95b24440 3127 case EXACTF: {
be8e71aa 3128 char * const s = STRING(scan);
24d3c4a9 3129 ln = STR_LEN(scan);
d6a28714 3130
d07ddd77
JH
3131 if (do_utf8 || UTF) {
3132 /* Either target or the pattern are utf8. */
be8e71aa 3133 const char * const l = locinput;
d07ddd77 3134 char *e = PL_regeol;
bc517b45 3135
24d3c4a9 3136 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 3137 l, &e, 0, do_utf8)) {
5486206c
JH
3138 /* One more case for the sharp s:
3139 * pack("U0U*", 0xDF) =~ /ss/i,
3140 * the 0xC3 0x9F are the UTF-8
3141 * byte sequence for the U+00DF. */
3142 if (!(do_utf8 &&
3143 toLOWER(s[0]) == 's' &&
24d3c4a9 3144 ln >= 2 &&
5486206c
JH
3145 toLOWER(s[1]) == 's' &&
3146 (U8)l[0] == 0xC3 &&
3147 e - l >= 2 &&
3148 (U8)l[1] == 0x9F))
3149 sayNO;
3150 }
d07ddd77
JH
3151 locinput = e;
3152 nextchr = UCHARAT(locinput);
3153 break;
a0ed51b3 3154 }
d6a28714 3155
bc517b45
JH
3156 /* Neither the target and the pattern are utf8. */
3157
d6a28714
JH
3158 /* Inline the first character, for speed. */
3159 if (UCHARAT(s) != nextchr &&
3160 UCHARAT(s) != ((OP(scan) == EXACTF)
3161 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3162 sayNO;
24d3c4a9 3163 if (PL_regeol - locinput < ln)
b8c5462f 3164 sayNO;
24d3c4a9
DM
3165 if (ln > 1 && (OP(scan) == EXACTF
3166 ? ibcmp(s, locinput, ln)
3167 : ibcmp_locale(s, locinput, ln)))
4633a7c4 3168 sayNO;
24d3c4a9 3169 locinput += ln;
d6a28714 3170 nextchr = UCHARAT(locinput);
a0d0e21e 3171 break;
95b24440 3172 }
d6a28714 3173 case ANYOF:
ffc61ed2 3174 if (do_utf8) {
9e55ce06
JH
3175 STRLEN inclasslen = PL_regeol - locinput;
3176
32fc9b6a 3177 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
262b90c4 3178 goto anyof_fail;
ffc61ed2
JH
3179 if (locinput >= PL_regeol)
3180 sayNO;
0f0076b4 3181 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3182 nextchr = UCHARAT(locinput);
e0f9d4a8 3183 break;
ffc61ed2
JH
3184 }
3185 else {
3186 if (nextchr < 0)
3187 nextchr = UCHARAT(locinput);
32fc9b6a 3188 if (!REGINCLASS(rex, scan, (U8*)locinput))
262b90c4 3189 goto anyof_fail;
ffc61ed2
JH
3190 if (!nextchr && locinput >= PL_regeol)
3191 sayNO;
3192 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3193 break;
3194 }
262b90c4 3195 anyof_fail:
e0f9d4a8
JH
3196 /* If we might have the case of the German sharp s
3197 * in a casefolding Unicode character class. */
3198
ebc501f0
JH
3199 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3200 locinput += SHARP_S_SKIP;
e0f9d4a8 3201 nextchr = UCHARAT(locinput);
ffc61ed2 3202 }
e0f9d4a8
JH
3203 else
3204 sayNO;
b8c5462f 3205 break;
d6a28714 3206 case ALNUML:
b8c5462f
JH
3207 PL_reg_flags |= RF_tainted;
3208 /* FALL THROUGH */
d6a28714 3209 case ALNUM:
b8c5462f 3210 if (!nextchr)
4633a7c4 3211 sayNO;
ffc61ed2 3212 if (do_utf8) {
1a4fad37 3213 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3214 if (!(OP(scan) == ALNUM
bb7a0f54 3215 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3216 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3217 {
3218 sayNO;
a0ed51b3 3219 }
b8c5462f 3220 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3221 nextchr = UCHARAT(locinput);
3222 break;
3223 }
ffc61ed2 3224 if (!(OP(scan) == ALNUM
d6a28714 3225 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3226 sayNO;
b8c5462f 3227 nextchr = UCHARAT(++locinput);
a0d0e21e 3228 break;
d6a28714 3229 case NALNUML:
b8c5462f
JH
3230 PL_reg_flags |= RF_tainted;
3231 /* FALL THROUGH */
d6a28714
JH
3232 case NALNUM:
3233 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3234 sayNO;
ffc61ed2 3235 if (do_utf8) {
1a4fad37 3236 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3237 if (OP(scan) == NALNUM
bb7a0f54 3238 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3239 : isALNUM_LC_utf8((U8*)locinput))
3240 {
b8c5462f 3241 sayNO;
d6a28714 3242 }
b8c5462f
JH
3243 locinput += PL_utf8skip[nextchr];
3244 nextchr = UCHARAT(locinput);
3245 break;
3246 }
ffc61ed2 3247 if (OP(scan) == NALNUM
d6a28714 3248 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3249 sayNO;
76e3520e 3250 nextchr = UCHARAT(++locinput);
a0d0e21e 3251 break;
d6a28714
JH
3252 case BOUNDL:
3253 case NBOUNDL:
3280af22 3254 PL_reg_flags |= RF_tainted;
bbce6d69 3255 /* FALL THROUGH */
d6a28714
JH
3256 case BOUND:
3257 case NBOUND:
3258 /* was last char in word? */
ffc61ed2 3259 if (do_utf8) {
12d33761 3260 if (locinput == PL_bostr)
24d3c4a9 3261 ln = '\n';
ffc61ed2 3262 else {
a3b680e6 3263 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3264
24d3c4a9 3265 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3266 }
3267 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3268 ln = isALNUM_uni(ln);
1a4fad37 3269 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3270 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3271 }
3272 else {
24d3c4a9 3273 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3274 n = isALNUM_LC_utf8((U8*)locinput);
3275 }
a0ed51b3 3276 }
d6a28714 3277 else {
24d3c4a9 3278 ln = (locinput != PL_bostr) ?
12d33761 3279 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3280 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3281 ln = isALNUM(ln);
ffc61ed2
JH
3282 n = isALNUM(nextchr);
3283 }
3284 else {
24d3c4a9 3285 ln = isALNUM_LC(ln);
ffc61ed2
JH
3286 n = isALNUM_LC(nextchr);
3287 }
d6a28714 3288 }
24d3c4a9 3289 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3290 OP(scan) == BOUNDL))
3291 sayNO;
a0ed51b3 3292 break;
d6a28714 3293 case SPACEL:
3280af22 3294 PL_reg_flags |= RF_tainted;
bbce6d69 3295 /* FALL THROUGH */
d6a28714 3296 case SPACE:
9442cb0e 3297 if (!nextchr)
4633a7c4 3298 sayNO;
1aa99e6b 3299 if (do_utf8) {
fd400ab9 3300 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3301 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3302 if (!(OP(scan) == SPACE
bb7a0f54 3303 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3304 : isSPACE_LC_utf8((U8*)locinput)))
3305 {
3306 sayNO;
3307 }
3308 locinput += PL_utf8skip[nextchr];
3309 nextchr = UCHARAT(locinput);
3310 break;
d6a28714 3311 }
ffc61ed2
JH
3312 if (!(OP(scan) == SPACE
3313 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3314 sayNO;
3315 nextchr = UCHARAT(++locinput);
3316 }
3317 else {
3318 if (!(OP(scan) == SPACE
3319 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3320 sayNO;
3321 nextchr = UCHARAT(++locinput);
a0ed51b3 3322 }
a0ed51b3 3323 break;
d6a28714 3324 case NSPACEL:
3280af22 3325 PL_reg_flags |= RF_tainted;
bbce6d69 3326 /* FALL THROUGH */
d6a28714 3327 case NSPACE:
9442cb0e 3328 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3329 sayNO;
1aa99e6b 3330 if (do_utf8) {
1a4fad37 3331 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3332 if (OP(scan) == NSPACE
bb7a0f54 3333 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3334 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3335 {
3336 sayNO;
3337 }
3338 locinput += PL_utf8skip[nextchr];
3339 nextchr = UCHARAT(locinput);
3340 break;
a0ed51b3 3341 }
ffc61ed2 3342 if (OP(scan) == NSPACE
d6a28714 3343 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3344 sayNO;
76e3520e 3345 nextchr = UCHARAT(++locinput);
a0d0e21e 3346 break;
d6a28714 3347 case DIGITL:
a0ed51b3
LW
3348 PL_reg_flags |= RF_tainted;
3349 /* FALL THROUGH */
d6a28714 3350 case DIGIT:
9442cb0e 3351 if (!nextchr)
a0ed51b3 3352 sayNO;
1aa99e6b 3353 if (do_utf8) {
1a4fad37 3354 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3355 if (!(OP(scan) == DIGIT
bb7a0f54 3356 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3357 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3358 {
a0ed51b3 3359 sayNO;
dfe13c55 3360 }
6f06b55f 3361 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3362 nextchr = UCHARAT(locinput);
3363 break;
3364 }
ffc61ed2 3365 if (!(OP(scan) == DIGIT
9442cb0e 3366 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3367 sayNO;
3368 nextchr = UCHARAT(++locinput);
3369 break;
d6a28714 3370 case NDIGITL:
b8c5462f
JH
3371 PL_reg_flags |= RF_tainted;
3372 /* FALL THROUGH */
d6a28714 3373 case NDIGIT:
9442cb0e 3374 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3375 sayNO;
1aa99e6b 3376 if (do_utf8) {
1a4fad37 3377 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3378 if (OP(scan) == NDIGIT
bb7a0f54 3379 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3380 : isDIGIT_LC_utf8((U8*)locinput))
3381 {
a0ed51b3 3382 sayNO;
9442cb0e 3383 }
6f06b55f 3384 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3385 nextchr = UCHARAT(locinput);
3386 break;
3387 }
ffc61ed2 3388 if (OP(scan) == NDIGIT
9442cb0e 3389 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3390 sayNO;
3391 nextchr = UCHARAT(++locinput);
3392 break;
3393 case CLUMP:
b7c83a7e 3394 if (locinput >= PL_regeol)
a0ed51b3 3395 sayNO;
b7c83a7e 3396 if (do_utf8) {
1a4fad37 3397 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3398 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3399 sayNO;
3400 locinput += PL_utf8skip[nextchr];
3401 while (locinput < PL_regeol &&
3402 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3403 locinput += UTF8SKIP(locinput);
3404 if (locinput > PL_regeol)
3405 sayNO;
eb08e2da
JH
3406 }
3407 else
3408 locinput++;
a0ed51b3
LW
3409 nextchr = UCHARAT(locinput);
3410 break;
81714fb9
YO
3411
3412 case NREFFL:
3413 {
3414 char *s;
ff1157ca 3415 char type;
81714fb9
YO
3416 PL_reg_flags |= RF_tainted;
3417 /* FALL THROUGH */
3418 case NREF:
3419 case NREFF:
ff1157ca 3420 type = OP(scan);
0a4db386
YO
3421 n = reg_check_named_buff_matched(rex,scan);
3422
3423 if ( n ) {
3424 type = REF + ( type - NREF );
3425 goto do_ref;
3426 } else {
81714fb9 3427 sayNO;
0a4db386
YO
3428 }
3429 /* unreached */
c8756f30 3430 case REFFL:
3280af22 3431 PL_reg_flags |= RF_tainted;
c8756f30 3432 /* FALL THROUGH */
c277df42 3433 case REF:
81714fb9 3434 case REFF:
c277df42 3435 n = ARG(scan); /* which paren pair */
81714fb9
YO
3436 type = OP(scan);
3437 do_ref:
24d3c4a9 3438 ln = PL_regstartp[n];
2c2d71f5 3439 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3b6647e0 3440 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3441 sayNO; /* Do not match unless seen CLOSEn. */
24d3c4a9 3442 if (ln == PL_regendp[n])
a0d0e21e 3443 break;
a0ed51b3 3444
24d3c4a9 3445 s = PL_bostr + ln;
81714fb9 3446 if (do_utf8 && type != REF) { /* REF can do byte comparison */
a0ed51b3 3447 char *l = locinput;
a3b680e6 3448 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3449 /*
3450 * Note that we can't do the "other character" lookup trick as
3451 * in the 8-bit case (no pun intended) because in Unicode we
3452 * have to map both upper and title case to lower case.
3453 */
81714fb9 3454 if (type == REFF) {
a0ed51b3 3455 while (s < e) {
a3b680e6
AL
3456 STRLEN ulen1, ulen2;
3457 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3458 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3459
a0ed51b3
LW
3460 if (l >= PL_regeol)
3461 sayNO;
a2a2844f
JH
3462 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3463 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3464 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3465 sayNO;
a2a2844f
JH
3466 s += ulen1;
3467 l += ulen2;
a0ed51b3
LW
3468 }
3469 }
3470 locinput = l;
3471 nextchr = UCHARAT(locinput);
3472 break;
3473 }
3474
a0d0e21e 3475 /* Inline the first character, for speed. */
76e3520e 3476 if (UCHARAT(s) != nextchr &&
81714fb9
YO
3477 (type == REF ||
3478 (UCHARAT(s) != (type == REFF
3479 ? PL_fold : PL_fold_locale)[nextchr])))
4633a7c4 3480 sayNO;
24d3c4a9
DM
3481 ln = PL_regendp[n] - ln;
3482 if (locinput + ln > PL_regeol)
4633a7c4 3483 sayNO;
81714fb9 3484 if (ln > 1 && (type == REF
24d3c4a9 3485 ? memNE(s, locinput, ln)
81714fb9 3486 : (type == REFF
24d3c4a9
DM
3487 ? ibcmp(s, locinput, ln)
3488 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3489 sayNO;
24d3c4a9 3490 locinput += ln;
76e3520e 3491 nextchr = UCHARAT(locinput);
a0d0e21e 3492 break;
81714fb9 3493 }
a0d0e21e 3494 case NOTHING:
c277df42 3495 case TAIL:
a0d0e21e
LW
3496 break;
3497 case BACK:
3498 break;
40a82448
DM
3499
3500#undef ST
3501#define ST st->u.eval
c277df42 3502 {
c277df42 3503 SV *ret;
6bda09f9 3504 regexp *re;
1a147d38
YO
3505 regnode *startpoint;
3506
3507 case GOSTART:
3508 case GOSUB: /* /(...(?1))/ */
6bda09f9 3509 if (cur_eval && cur_eval->locinput==locinput) {
24b23f37 3510 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
1a147d38 3511 Perl_croak(aTHX_ "Infinite recursion in regex");
6bda09f9 3512 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38
YO
3513 Perl_croak(aTHX_
3514 "Pattern subroutine nesting without pos change"
3515 " exceeded limit in regex");
6bda09f9
YO
3516 } else {
3517 nochange_depth = 0;
1a147d38 3518 }
6bda09f9
YO
3519 re = rex;
3520 (void)ReREFCNT_inc(rex);
1a147d38 3521 if (OP(scan)==GOSUB) {
6bda09f9
YO
3522 startpoint = scan + ARG2L(scan);
3523 ST.close_paren = ARG(scan);
3524 } else {
3525 startpoint = re->program+1;
3526 ST.close_paren = 0;
3527 }
3528 goto eval_recurse_doit;
3529 /* NOTREACHED */
3530 case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
3531 if (cur_eval && cur_eval->locinput==locinput) {
3532 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38 3533 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
6bda09f9
YO
3534 } else {
3535 nochange_depth = 0;
3536 }
8e5e9ebe 3537 {
4aabdb9b
DM
3538 /* execute the code in the {...} */
3539 dSP;
6136c704 3540 SV ** const before = SP;
4aabdb9b
DM
3541 OP_4tree * const oop = PL_op;
3542 COP * const ocurcop = PL_curcop;
3543 PAD *old_comppad;
4aabdb9b
DM
3544
3545 n = ARG(scan);
32fc9b6a 3546 PL_op = (OP_4tree*)rex->data->data[n];
24b23f37
YO
3547 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3548 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3549 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3550 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3551
8e5e9ebe
RGS
3552 CALLRUNOPS(aTHX); /* Scalar context. */
3553 SPAGAIN;
3554 if (SP == before)
075aa684 3555 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3556 else {
3557 ret = POPs;
3558 PUTBACK;
3559 }
4aabdb9b
DM
3560
3561 PL_op = oop;
3562 PAD_RESTORE_LOCAL(old_comppad);
3563 PL_curcop = ocurcop;
24d3c4a9 3564 if (!logical) {
4aabdb9b
DM
3565 /* /(?{...})/ */
3566 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3567 break;
3568 }
8e5e9ebe 3569 }
24d3c4a9
DM
3570 if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3571 logical = 0;
4aabdb9b 3572 {
4f639d21
DM
3573 /* extract RE object from returned value; compiling if
3574 * necessary */
3575
6136c704 3576 MAGIC *mg = NULL;
be8e71aa 3577 const SV *sv;
faf82a0b
AE
3578 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3579 mg = mg_find(sv, PERL_MAGIC_qr);
3580 else if (SvSMAGICAL(ret)) {
3581 if (SvGMAGICAL(ret))
3582 sv_unmagic(ret, PERL_MAGIC_qr);
3583 else
3584 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3585 }
faf82a0b 3586
0f5d15d6
IZ
3587 if (mg) {
3588 re = (regexp *)mg->mg_obj;
df0003d4 3589 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3590 }
3591 else {
3592 STRLEN len;
6136c704 3593 const char * const t = SvPV_const(ret, len);
0f5d15d6 3594 PMOP pm;
a3b680e6 3595 const I32 osize = PL_regsize;
0f5d15d6 3596
5fcd1c1b 3597 Zero(&pm, 1, PMOP);
4aabdb9b 3598 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
f9f4320a 3599 re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
9041c2e3 3600 if (!(SvFLAGS(ret)
faf82a0b
AE
3601 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3602 | SVs_GMG)))
14befaf4
DM
3603 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3604 PERL_MAGIC_qr,0,0);
0f5d15d6 3605 PL_regsize = osize;
0f5d15d6 3606 }
4aabdb9b 3607 }
6bda09f9
YO
3608 DEBUG_EXECUTE_r(
3609 debug_start_match(re, do_utf8, locinput, PL_regeol,
3610 "Matching embedded");
3611 );
3612 startpoint = re->program + 1;
1a147d38 3613 ST.close_paren = 0; /* only used for GOSUB */
6bda09f9
YO
3614 /* borrowed from regtry */
3615 if (PL_reg_start_tmpl <= re->nparens) {
3616 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3617 if(PL_reg_start_tmp)
3618 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3619 else
3620 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3621 }
aa283a38 3622
1a147d38 3623 eval_recurse_doit: /* Share code with GOSUB below this line */
aa283a38 3624 /* run the pattern returned from (??{...}) */
40a82448
DM
3625 ST.cp = regcppush(0); /* Save *all* the positions. */
3626 REGCP_SET(ST.lastcp);
6bda09f9 3627
1a147d38
YO
3628 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3629 PL_regendp = re->endp; /* essentially NOOP on GOSUB */
6bda09f9 3630
4aabdb9b
DM
3631 *PL_reglastparen = 0;
3632 *PL_reglastcloseparen = 0;
4aabdb9b 3633 PL_reginput = locinput;
ae0beba1 3634 PL_regsize = 0;
4aabdb9b
DM
3635
3636 /* XXXX This is too dramatic a measure... */
3637 PL_reg_maxiter = 0;
3638
faec1544 3639 ST.toggle_reg_flags = PL_reg_flags;
bbe252da 3640 if (re->extflags & RXf_UTF8)
faec1544
DM
3641 PL_reg_flags |= RF_utf8;
3642 else
3643 PL_reg_flags &= ~RF_utf8;
3644 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3645
40a82448 3646 ST.prev_rex = rex;
faec1544 3647 ST.prev_curlyx = cur_curlyx;
aa283a38 3648 rex = re;
faec1544 3649 cur_curlyx = NULL;
40a82448 3650 ST.B = next;
faec1544
DM
3651 ST.prev_eval = cur_eval;
3652 cur_eval = st;
faec1544 3653 /* now continue from first node in postoned RE */
6bda09f9 3654 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4aabdb9b 3655 /* NOTREACHED */
a0ed51b3 3656 }
24d3c4a9
DM
3657 /* logical is 1, /(?(?{...})X|Y)/ */
3658 sw = (bool)SvTRUE(ret);
3659 logical = 0;
c277df42
IZ
3660 break;
3661 }
40a82448 3662
faec1544
DM
3663 case EVAL_AB: /* cleanup after a successful (??{A})B */
3664 /* note: this is called twice; first after popping B, then A */
3665 PL_reg_flags ^= ST.toggle_reg_flags;
40a82448
DM
3666 ReREFCNT_dec(rex);
3667 rex = ST.prev_rex;
faec1544
DM
3668 regcpblow(ST.cp);
3669 cur_eval = ST.prev_eval;
3670 cur_curlyx = ST.prev_curlyx;
40a82448
DM
3671 /* XXXX This is too dramatic a measure... */
3672 PL_reg_maxiter = 0;
262b90c4 3673 sayYES;
40a82448 3674
40a82448 3675
faec1544
DM
3676 case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3677 /* note: this is called twice; first after popping B, then A */
3678 PL_reg_flags ^= ST.toggle_reg_flags;
3679 ReREFCNT_dec(rex);
3680 rex = ST.prev_rex;
40a82448
DM
3681 PL_reginput = locinput;
3682 REGCP_UNWIND(ST.lastcp);
3683 regcppop(rex);
faec1544
DM
3684 cur_eval = ST.prev_eval;
3685 cur_curlyx = ST.prev_curlyx;
3686 /* XXXX This is too dramatic a measure... */
3687 PL_reg_maxiter = 0;
40a82448 3688 sayNO_SILENT;
40a82448
DM
3689#undef ST
3690
a0d0e21e 3691 case OPEN:
c277df42 3692 n = ARG(scan); /* which paren pair */
3280af22
NIS
3693 PL_reg_start_tmp[n] = locinput;
3694 if (n > PL_regsize)
3695 PL_regsize = n;
e2e6a0f1 3696 lastopen = n;
a0d0e21e
LW
3697 break;
3698 case CLOSE:
c277df42 3699 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3700 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3701 PL_regendp[n] = locinput - PL_bostr;
7f69552c
YO
3702 /*if (n > PL_regsize)
3703 PL_regsize = n;*/
3b6647e0 3704 if (n > *PL_reglastparen)
3280af22 3705 *PL_reglastparen = n;
a01268b5 3706 *PL_reglastcloseparen = n;
3b6647e0 3707 if (cur_eval && cur_eval->u.eval.close_paren == n) {
6bda09f9
YO
3708 goto fake_end;
3709 }
a0d0e21e 3710 break;
e2e6a0f1
YO
3711 case ACCEPT:
3712 if (ARG(scan)){
3713 regnode *cursor;
3714 for (cursor=scan;
3715 cursor && OP(cursor)!=END;
3716 cursor=regnext(cursor))
3717 {
3718 if ( OP(cursor)==CLOSE ){
3719 n = ARG(cursor);
3720 if ( n <= lastopen ) {
3721 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3722 PL_regendp[n] = locinput - PL_bostr;
3723 /*if (n > PL_regsize)
3724 PL_regsize = n;*/
3b6647e0 3725 if (n > *PL_reglastparen)
e2e6a0f1
YO
3726 *PL_reglastparen = n;
3727 *PL_reglastcloseparen = n;
3b6647e0
RB
3728 if ( n == ARG(scan) || (cur_eval &&
3729 cur_eval->u.eval.close_paren == n))
e2e6a0f1
YO
3730 break;
3731 }
3732 }
3733 }
3734 }
3735 goto fake_end;
3736 /*NOTREACHED*/
c277df42
IZ
3737 case GROUPP:
3738 n = ARG(scan); /* which paren pair */
3b6647e0 3739 sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 3740 break;
0a4db386
YO
3741 case NGROUPP:
3742 /* reg_check_named_buff_matched returns 0 for no match */
3743 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3744 break;
1a147d38 3745 case INSUBP:
0a4db386 3746 n = ARG(scan);
3b6647e0 3747 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
0a4db386
YO
3748 break;
3749 case DEFINEP:
3750 sw = 0;
3751 break;
c277df42 3752 case IFTHEN:
2c2d71f5 3753 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
24d3c4a9 3754 if (sw)
c277df42
IZ
3755 next = NEXTOPER(NEXTOPER(scan));
3756 else {
3757 next = scan + ARG(scan);
3758 if (OP(next) == IFTHEN) /* Fake one. */
3759 next = NEXTOPER(NEXTOPER(next));
3760 }
3761 break;
3762 case LOGICAL:
24d3c4a9 3763 logical = scan->flags;
c277df42 3764 break;
c476f425 3765
2ab05381 3766/*******************************************************************
2ab05381 3767
c476f425
DM
3768The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3769pattern, where A and B are subpatterns. (For simple A, CURLYM or
3770STAR/PLUS/CURLY/CURLYN are used instead.)
2ab05381 3771
c476f425 3772A*B is compiled as <CURLYX><A><WHILEM><B>
2ab05381 3773
c476f425
DM
3774On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3775state, which contains the current count, initialised to -1. It also sets
3776cur_curlyx to point to this state, with any previous value saved in the
3777state block.
2ab05381 3778
c476f425
DM
3779CURLYX then jumps straight to the WHILEM op, rather than executing A,
3780since the pattern may possibly match zero times (i.e. it's a while {} loop
3781rather than a do {} while loop).
2ab05381 3782
c476f425
DM
3783Each entry to WHILEM represents a successful match of A. The count in the
3784CURLYX block is incremented, another WHILEM state is pushed, and execution
3785passes to A or B depending on greediness and the current count.
2ab05381 3786
c476f425
DM
3787For example, if matching against the string a1a2a3b (where the aN are
3788substrings that match /A/), then the match progresses as follows: (the
3789pushed states are interspersed with the bits of strings matched so far):
2ab05381 3790
c476f425
DM
3791 <CURLYX cnt=-1>
3792 <CURLYX cnt=0><WHILEM>
3793 <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3794 <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3795 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3796 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
2ab05381 3797
c476f425
DM
3798(Contrast this with something like CURLYM, which maintains only a single
3799backtrack state:
2ab05381 3800
c476f425
DM
3801 <CURLYM cnt=0> a1
3802 a1 <CURLYM cnt=1> a2
3803 a1 a2 <CURLYM cnt=2> a3
3804 a1 a2 a3 <CURLYM cnt=3> b
3805)
2ab05381 3806
c476f425
DM
3807Each WHILEM state block marks a point to backtrack to upon partial failure
3808of A or B, and also contains some minor state data related to that
3809iteration. The CURLYX block, pointed to by cur_curlyx, contains the
3810overall state, such as the count, and pointers to the A and B ops.
2ab05381 3811
c476f425
DM
3812This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3813must always point to the *current* CURLYX block, the rules are:
2ab05381 3814
c476f425
DM
3815When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3816and set cur_curlyx to point the new block.
2ab05381 3817
c476f425
DM
3818When popping the CURLYX block after a successful or unsuccessful match,
3819restore the previous cur_curlyx.
2ab05381 3820
c476f425
DM
3821When WHILEM is about to execute B, save the current cur_curlyx, and set it
3822to the outer one saved in the CURLYX block.
2ab05381 3823
c476f425
DM
3824When popping the WHILEM block after a successful or unsuccessful B match,
3825restore the previous cur_curlyx.
2ab05381 3826
c476f425
DM
3827Here's an example for the pattern (AI* BI)*BO
3828I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
2ab05381 3829
c476f425
DM
3830cur_
3831curlyx backtrack stack
3832------ ---------------
3833NULL
3834CO <CO prev=NULL> <WO>
3835CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3836CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3837NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
2ab05381 3838
c476f425
DM
3839At this point the pattern succeeds, and we work back down the stack to
3840clean up, restoring as we go:
95b24440 3841
c476f425
DM
3842CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3843CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3844CO <CO prev=NULL> <WO>
3845NULL
a0374537 3846
c476f425
DM
3847*******************************************************************/
3848
3849#define ST st->u.curlyx
3850
3851 case CURLYX: /* start of /A*B/ (for complex A) */
3852 {
3853 /* No need to save/restore up to this paren */
3854 I32 parenfloor = scan->flags;
3855
3856 assert(next); /* keep Coverity happy */
3857 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3858 next += ARG(next);
3859
3860 /* XXXX Probably it is better to teach regpush to support
3861 parenfloor > PL_regsize... */
3862 if (parenfloor > (I32)*PL_reglastparen)
3863 parenfloor = *PL_reglastparen; /* Pessimization... */
3864
3865 ST.prev_curlyx= cur_curlyx;
3866 cur_curlyx = st;
3867 ST.cp = PL_savestack_ix;
3868
3869 /* these fields contain the state of the current curly.
3870 * they are accessed by subsequent WHILEMs */
3871 ST.parenfloor = parenfloor;
3872 ST.min = ARG1(scan);
3873 ST.max = ARG2(scan);
3874 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3875 ST.B = next;
24d3c4a9
DM
3876 ST.minmod = minmod;
3877 minmod = 0;
c476f425
DM
3878 ST.count = -1; /* this will be updated by WHILEM */
3879 ST.lastloc = NULL; /* this will be updated by WHILEM */
3880
3881 PL_reginput = locinput;
3882 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
5f66b61c 3883 /* NOTREACHED */
c476f425 3884 }
a0d0e21e 3885
c476f425 3886 case CURLYX_end: /* just finished matching all of A*B */
f0852a51
YO
3887 if (PL_reg_eval_set){
3888 SV *pres= GvSV(PL_replgv);
3889 SvREFCNT_inc(pres);
3890 regcpblow(ST.cp);
3891 sv_setsv(GvSV(PL_replgv), pres);
3892 SvREFCNT_dec(pres);
3893 } else {
3894 regcpblow(ST.cp);
3895 }
c476f425
DM
3896 cur_curlyx = ST.prev_curlyx;
3897 sayYES;
3898 /* NOTREACHED */
a0d0e21e 3899
c476f425
DM
3900 case CURLYX_end_fail: /* just failed to match all of A*B */
3901 regcpblow(ST.cp);
3902 cur_curlyx = ST.prev_curlyx;
3903 sayNO;
3904 /* NOTREACHED */
4633a7c4 3905
a0d0e21e 3906
c476f425
DM
3907#undef ST
3908#define ST st->u.whilem
3909
3910 case WHILEM: /* just matched an A in /A*B/ (for complex A) */
3911 {
3912 /* see the discussion above about CURLYX/WHILEM */
c476f425
DM
3913 I32 n;
3914 assert(cur_curlyx); /* keep Coverity happy */
3915 n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3916 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3917 ST.cache_offset = 0;
3918 ST.cache_mask = 0;
3919
3920 PL_reginput = locinput;
3921
3922 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3923 "%*s whilem: matched %ld out of %ld..%ld\n",
3924 REPORT_CODE_OFF+depth*2, "", (long)n,
3925 (long)cur_curlyx->u.curlyx.min,
3926 (long)cur_curlyx->u.curlyx.max)
3927 );
a0d0e21e 3928
c476f425 3929 /* First just match a string of min A's. */
a0d0e21e 3930
c476f425
DM
3931 if (n < cur_curlyx->u.curlyx.min) {
3932 cur_curlyx->u.curlyx.lastloc = locinput;
3933 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
3934 /* NOTREACHED */
3935 }
3936
3937 /* If degenerate A matches "", assume A done. */
3938
3939 if (locinput == cur_curlyx->u.curlyx.lastloc) {
3940 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3941 "%*s whilem: empty match detected, trying continuation...\n",
3942 REPORT_CODE_OFF+depth*2, "")
3943 );
3944 goto do_whilem_B_max;
3945 }
3946
3947 /* super-linear cache processing */
3948
3949 if (scan->flags) {
a0d0e21e 3950
2c2d71f5 3951 if (!PL_reg_maxiter) {
c476f425
DM
3952 /* start the countdown: Postpone detection until we
3953 * know the match is not *that* much linear. */
2c2d71f5 3954 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
3955 /* possible overflow for long strings and many CURLYX's */
3956 if (PL_reg_maxiter < 0)
3957 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
3958 PL_reg_leftiter = PL_reg_maxiter;
3959 }
c476f425 3960
2c2d71f5 3961 if (PL_reg_leftiter-- == 0) {
c476f425 3962 /* initialise cache */
3298f257 3963 const I32 size = (PL_reg_maxiter + 7)/8;
2c2d71f5 3964 if (PL_reg_poscache) {
eb160463 3965 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3966 Renew(PL_reg_poscache, size, char);
3967 PL_reg_poscache_size = size;
3968 }
3969 Zero(PL_reg_poscache, size, char);
3970 }
3971 else {
3972 PL_reg_poscache_size = size;
a02a5408 3973 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3974 }
c476f425
DM
3975 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3976 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
3977 PL_colors[4], PL_colors[5])
3978 );
2c2d71f5 3979 }
c476f425 3980
2c2d71f5 3981 if (PL_reg_leftiter < 0) {
c476f425
DM
3982 /* have we already failed at this position? */
3983 I32 offset, mask;
3984 offset = (scan->flags & 0xf) - 1
3985 + (locinput - PL_bostr) * (scan->flags>>4);
3986 mask = 1 << (offset % 8);
3987 offset /= 8;
3988 if (PL_reg_poscache[offset] & mask) {
3989 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3990 "%*s whilem: (cache) already tried at this position...\n",
3991 REPORT_CODE_OFF+depth*2, "")
2c2d71f5 3992 );
3298f257 3993 sayNO; /* cache records failure */
2c2d71f5 3994 }
c476f425
DM
3995 ST.cache_offset = offset;
3996 ST.cache_mask = mask;
2c2d71f5 3997 }
c476f425 3998 }
2c2d71f5 3999
c476f425 4000 /* Prefer B over A for minimal matching. */
a687059c 4001
c476f425
DM
4002 if (cur_curlyx->u.curlyx.minmod) {
4003 ST.save_curlyx = cur_curlyx;
4004 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4005 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4006 REGCP_SET(ST.lastcp);
4007 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4008 /* NOTREACHED */
4009 }
a0d0e21e 4010
c476f425
DM
4011 /* Prefer A over B for maximal matching. */
4012
4013 if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4014 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4015 cur_curlyx->u.curlyx.lastloc = locinput;
4016 REGCP_SET(ST.lastcp);
4017 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4018 /* NOTREACHED */
4019 }
4020 goto do_whilem_B_max;
4021 }
4022 /* NOTREACHED */
4023
4024 case WHILEM_B_min: /* just matched B in a minimal match */
4025 case WHILEM_B_max: /* just matched B in a maximal match */
4026 cur_curlyx = ST.save_curlyx;
4027 sayYES;
4028 /* NOTREACHED */
4029
4030 case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4031 cur_curlyx = ST.save_curlyx;
4032 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4033 cur_curlyx->u.curlyx.count--;
4034 CACHEsayNO;
4035 /* NOTREACHED */
4036
4037 case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4038 REGCP_UNWIND(ST.lastcp);
4039 regcppop(rex);
4040 /* FALL THROUGH */
4041 case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4042 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4043 cur_curlyx->u.curlyx.count--;
4044 CACHEsayNO;
4045 /* NOTREACHED */
4046
4047 case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4048 REGCP_UNWIND(ST.lastcp);
4049 regcppop(rex); /* Restore some previous $<digit>s? */
4050 PL_reginput = locinput;
4051 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4052 "%*s whilem: failed, trying continuation...\n",
4053 REPORT_CODE_OFF+depth*2, "")
4054 );
4055 do_whilem_B_max:
4056 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4057 && ckWARN(WARN_REGEXP)
4058 && !(PL_reg_flags & RF_warned))
4059 {
4060 PL_reg_flags |= RF_warned;
4061 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4062 "Complex regular subexpression recursion",
4063 REG_INFTY - 1);
4064 }
4065
4066 /* now try B */
4067 ST.save_curlyx = cur_curlyx;
4068 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4069 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4070 /* NOTREACHED */
4071
4072 case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4073 cur_curlyx = ST.save_curlyx;
4074 REGCP_UNWIND(ST.lastcp);
4075 regcppop(rex);
4076
4077 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4078 /* Maximum greed exceeded */
4079 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4080 && ckWARN(WARN_REGEXP)
4081 && !(PL_reg_flags & RF_warned))
4082 {
3280af22 4083 PL_reg_flags |= RF_warned;
c476f425
DM
4084 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4085 "%s limit (%d) exceeded",
4086 "Complex regular subexpression recursion",
4087 REG_INFTY - 1);
a0d0e21e 4088 }
c476f425 4089 cur_curlyx->u.curlyx.count--;
3ab3c9b4 4090 CACHEsayNO;
a0d0e21e 4091 }
c476f425
DM
4092
4093 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4094 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4095 );
4096 /* Try grabbing another A and see if it helps. */
4097 PL_reginput = locinput;
4098 cur_curlyx->u.curlyx.lastloc = locinput;
4099 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4100 REGCP_SET(ST.lastcp);
4101 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
5f66b61c 4102 /* NOTREACHED */
40a82448
DM
4103
4104#undef ST
4105#define ST st->u.branch
4106
4107 case BRANCHJ: /* /(...|A|...)/ with long next pointer */
c277df42
IZ
4108 next = scan + ARG(scan);
4109 if (next == scan)
4110 next = NULL;
40a82448
DM
4111 scan = NEXTOPER(scan);
4112 /* FALL THROUGH */
c277df42 4113
40a82448
DM
4114 case BRANCH: /* /(...|A|...)/ */
4115 scan = NEXTOPER(scan); /* scan now points to inner node */
5d458dd8
YO
4116 if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4117 && !has_cutgroup)
4118 {
40a82448
DM
4119 /* last branch; skip state push and jump direct to node */
4120 continue;
5d458dd8 4121 }
40a82448
DM
4122 ST.lastparen = *PL_reglastparen;
4123 ST.next_branch = next;
4124 REGCP_SET(ST.cp);
4125 PL_reginput = locinput;
02db2b7b 4126
40a82448 4127 /* Now go into the branch */
5d458dd8
YO
4128 if (has_cutgroup) {
4129 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4130 } else {
4131 PUSH_STATE_GOTO(BRANCH_next, scan);
4132 }
40a82448 4133 /* NOTREACHED */
5d458dd8
YO
4134 case CUTGROUP:
4135 PL_reginput = locinput;
4136 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4137 (SV*)rex->data->data[ ARG( scan ) ];
4138 PUSH_STATE_GOTO(CUTGROUP_next,next);
4139 /* NOTREACHED */
4140 case CUTGROUP_next_fail:
4141 do_cutgroup = 1;
4142 no_final = 1;
4143 if (st->u.mark.mark_name)
4144 sv_commit = st->u.mark.mark_name;
4145 sayNO;
4146 /* NOTREACHED */
4147 case BRANCH_next:
4148 sayYES;
4149 /* NOTREACHED */
40a82448 4150 case BRANCH_next_fail: /* that branch failed; try the next, if any */
5d458dd8
YO
4151 if (do_cutgroup) {
4152 do_cutgroup = 0;
4153 no_final = 0;
4154 }
40a82448
DM
4155 REGCP_UNWIND(ST.cp);
4156 for (n = *PL_reglastparen; n > ST.lastparen; n--)
4157 PL_regendp[n] = -1;
4158 *PL_reglastparen = n;
0a4db386 4159 /*dmq: *PL_reglastcloseparen = n; */
40a82448
DM
4160 scan = ST.next_branch;
4161 /* no more branches? */
5d458dd8
YO
4162 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4163 DEBUG_EXECUTE_r({
4164 PerlIO_printf( Perl_debug_log,
4165 "%*s %sBRANCH failed...%s\n",
4166 REPORT_CODE_OFF+depth*2, "",
4167 PL_colors[4],
4168 PL_colors[5] );
4169 });
4170 sayNO_SILENT;
4171 }
40a82448
DM
4172 continue; /* execute next BRANCH[J] op */
4173 /* NOTREACHED */
4174
a0d0e21e 4175 case MINMOD:
24d3c4a9 4176 minmod = 1;
a0d0e21e 4177 break;
40a82448
DM
4178
4179#undef ST
4180#define ST st->u.curlym
4181
4182 case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4183
4184 /* This is an optimisation of CURLYX that enables us to push
4185 * only a single backtracking state, no matter now many matches
4186 * there are in {m,n}. It relies on the pattern being constant
4187 * length, with no parens to influence future backrefs
4188 */
4189
4190 ST.me = scan;
dc45a647 4191 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
40a82448
DM
4192
4193 /* if paren positive, emulate an OPEN/CLOSE around A */
4194 if (ST.me->flags) {
3b6647e0 4195 U32 paren = ST.me->flags;
40a82448
DM
4196 if (paren > PL_regsize)
4197 PL_regsize = paren;
3b6647e0 4198 if (paren > *PL_reglastparen)
40a82448 4199 *PL_reglastparen = paren;
c277df42 4200 scan += NEXT_OFF(scan); /* Skip former OPEN. */
6407bf3b 4201 }
40a82448
DM
4202 ST.A = scan;
4203 ST.B = next;
4204 ST.alen = 0;
4205 ST.count = 0;
24d3c4a9
DM
4206 ST.minmod = minmod;
4207 minmod = 0;
40a82448
DM
4208 ST.c1 = CHRTEST_UNINIT;
4209 REGCP_SET(ST.cp);
6407bf3b 4210
40a82448
DM
4211 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4212 goto curlym_do_B;
4213
4214 curlym_do_A: /* execute the A in /A{m,n}B/ */
6407bf3b 4215 PL_reginput = locinput;
40a82448
DM
4216 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4217 /* NOTREACHED */
5f80c4cf 4218
40a82448
DM
4219 case CURLYM_A: /* we've just matched an A */
4220 locinput = st->locinput;
4221 nextchr = UCHARAT(locinput);
4222
4223 ST.count++;
4224 /* after first match, determine A's length: u.curlym.alen */
4225 if (ST.count == 1) {
4226 if (PL_reg_match_utf8) {
4227 char *s = locinput;
4228 while (s < PL_reginput) {
4229 ST.alen++;
4230 s += UTF8SKIP(s);
4231 }
4232 }
4233 else {
4234 ST.alen = PL_reginput - locinput;
4235 }
4236 if (ST.alen == 0)
4237 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4238 }
0cadcf80
DM
4239 DEBUG_EXECUTE_r(
4240 PerlIO_printf(Perl_debug_log,
40a82448 4241 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
5bc10b2c 4242 (int)(REPORT_CODE_OFF+(depth*2)), "",
40a82448 4243 (IV) ST.count, (IV)ST.alen)
0cadcf80
DM
4244 );
4245
40a82448 4246 locinput = PL_reginput;
0a4db386
YO
4247
4248 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4249 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4250 goto fake_end;
4251
4252 if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
40a82448
DM
4253 goto curlym_do_A; /* try to match another A */
4254 goto curlym_do_B; /* try to match B */
5f80c4cf 4255
40a82448
DM
4256 case CURLYM_A_fail: /* just failed to match an A */
4257 REGCP_UNWIND(ST.cp);
0a4db386
YO
4258
4259 if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4260 || (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4261 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
40a82448 4262 sayNO;
0cadcf80 4263
40a82448
DM
4264 curlym_do_B: /* execute the B in /A{m,n}B/ */
4265 PL_reginput = locinput;
4266 if (ST.c1 == CHRTEST_UNINIT) {
4267 /* calculate c1 and c2 for possible match of 1st char
4268 * following curly */
4269 ST.c1 = ST.c2 = CHRTEST_VOID;
4270 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4271 regnode *text_node = ST.B;
4272 if (! HAS_TEXT(text_node))
4273 FIND_NEXT_IMPT(text_node);
4274 if (HAS_TEXT(text_node)
4275 && PL_regkind[OP(text_node)] != REF)
4276 {
4277 ST.c1 = (U8)*STRING(text_node);
4278 ST.c2 =
4279 (OP(text_node) == EXACTF || OP(text_node) == REFF)
4280 ? PL_fold[ST.c1]
4281 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4282 ? PL_fold_locale[ST.c1]
4283 : ST.c1;
c277df42 4284 }
c277df42 4285 }
40a82448
DM
4286 }
4287
4288 DEBUG_EXECUTE_r(
4289 PerlIO_printf(Perl_debug_log,
4290 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
5bc10b2c 4291 (int)(REPORT_CODE_OFF+(depth*2)),
40a82448
DM
4292 "", (IV)ST.count)
4293 );
4294 if (ST.c1 != CHRTEST_VOID
4295 && UCHARAT(PL_reginput) != ST.c1
4296 && UCHARAT(PL_reginput) != ST.c2)
4297 {
4298 /* simulate B failing */
4299 state_num = CURLYM_B_fail;
4300 goto reenter_switch;
4301 }
4302
4303 if (ST.me->flags) {
4304 /* mark current A as captured */
4305 I32 paren = ST.me->flags;
4306 if (ST.count) {
4307 PL_regstartp[paren]
4308 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4309 PL_regendp[paren] = PL_reginput - PL_bostr;
0a4db386 4310 /*dmq: *PL_reglastcloseparen = paren; */
c277df42 4311 }
40a82448
DM
4312 else
4313 PL_regendp[paren] = -1;
0a4db386 4314 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4315 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4316 {
4317 if (ST.count)
4318 goto fake_end;
4319 else
4320 sayNO;
4321 }
c277df42 4322 }
0a4db386 4323
40a82448 4324 PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
5f66b61c 4325 /* NOTREACHED */
40a82448
DM
4326
4327 case CURLYM_B_fail: /* just failed to match a B */
4328 REGCP_UNWIND(ST.cp);
4329 if (ST.minmod) {
4330 if (ST.count == ARG2(ST.me) /* max */)
4331 sayNO;
4332 goto curlym_do_A; /* try to match a further A */
4333 }
4334 /* backtrack one A */
4335 if (ST.count == ARG1(ST.me) /* min */)
4336 sayNO;
4337 ST.count--;
4338 locinput = HOPc(locinput, -ST.alen);
4339 goto curlym_do_B; /* try to match B */
4340
c255a977
DM
4341#undef ST
4342#define ST st->u.curly
40a82448 4343
c255a977
DM
4344#define CURLY_SETPAREN(paren, success) \
4345 if (paren) { \
4346 if (success) { \
4347 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4348 PL_regendp[paren] = locinput - PL_bostr; \
0a4db386 4349 *PL_reglastcloseparen = paren; \
c255a977
DM
4350 } \
4351 else \
4352 PL_regendp[paren] = -1; \
4353 }
4354
4355 case STAR: /* /A*B/ where A is width 1 */
4356 ST.paren = 0;
4357 ST.min = 0;
4358 ST.max = REG_INFTY;
a0d0e21e
LW
4359 scan = NEXTOPER(scan);
4360 goto repeat;
c255a977
DM
4361 case PLUS: /* /A+B/ where A is width 1 */
4362 ST.paren = 0;
4363 ST.min = 1;
4364 ST.max = REG_INFTY;
c277df42 4365 scan = NEXTOPER(scan);
c255a977
DM
4366 goto repeat;
4367 case CURLYN: /* /(A){m,n}B/ where A is width 1 */
4368 ST.paren = scan->flags; /* Which paren to set */
4369 if (ST.paren > PL_regsize)
4370 PL_regsize = ST.paren;
3b6647e0 4371 if (ST.paren > *PL_reglastparen)
c255a977
DM
4372 *PL_reglastparen = ST.paren;
4373 ST.min = ARG1(scan); /* min to match */
4374 ST.max = ARG2(scan); /* max to match */
0a4db386 4375 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4376 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4377 ST.min=1;
4378 ST.max=1;
4379 }
c255a977
DM
4380 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4381 goto repeat;
4382 case CURLY: /* /A{m,n}B/ where A is width 1 */
4383 ST.paren = 0;
4384 ST.min = ARG1(scan); /* min to match */
4385 ST.max = ARG2(scan); /* max to match */
4386 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 4387 repeat:
a0d0e21e
LW
4388 /*
4389 * Lookahead to avoid useless match attempts
4390 * when we know what character comes next.
c255a977 4391 *
5f80c4cf
JP
4392 * Used to only do .*x and .*?x, but now it allows
4393 * for )'s, ('s and (?{ ... })'s to be in the way
4394 * of the quantifier and the EXACT-like node. -- japhy
4395 */
4396
c255a977
DM
4397 if (ST.min > ST.max) /* XXX make this a compile-time check? */
4398 sayNO;
cca55fe3 4399 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4400 U8 *s;
4401 regnode *text_node = next;
4402
3dab1dad
YO
4403 if (! HAS_TEXT(text_node))
4404 FIND_NEXT_IMPT(text_node);
5f80c4cf 4405
9e137952 4406 if (! HAS_TEXT(text_node))
c255a977 4407 ST.c1 = ST.c2 = CHRTEST_VOID;
5f80c4cf 4408 else {
3dab1dad 4409 if (PL_regkind[OP(text_node)] == REF) {
c255a977 4410 ST.c1 = ST.c2 = CHRTEST_VOID;
44a68960 4411 goto assume_ok_easy;
cca55fe3 4412 }
be8e71aa
YO
4413 else
4414 s = (U8*)STRING(text_node);
5f80c4cf
JP
4415
4416 if (!UTF) {
c255a977 4417 ST.c2 = ST.c1 = *s;
f65d3ee7 4418 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c255a977 4419 ST.c2 = PL_fold[ST.c1];
f65d3ee7 4420 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c255a977 4421 ST.c2 = PL_fold_locale[ST.c1];
1aa99e6b 4422 }
5f80c4cf 4423 else { /* UTF */
f65d3ee7 4424 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4425 STRLEN ulen1, ulen2;
89ebb4a3
JH
4426 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4427 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4428
4429 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4430 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
e294cc5d
JH
4431#ifdef EBCDIC
4432 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4433 ckWARN(WARN_UTF8) ?
4434 0 : UTF8_ALLOW_ANY);
4435 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4436 ckWARN(WARN_UTF8) ?
4437 0 : UTF8_ALLOW_ANY);
4438#else
c255a977 4439 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
e294cc5d 4440 uniflags);
c255a977 4441 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
e294cc5d
JH
4442 uniflags);
4443#endif
5f80c4cf
JP
4444 }
4445 else {
c255a977 4446 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4447 uniflags);
5f80c4cf 4448 }
1aa99e6b
IH
4449 }
4450 }
bbce6d69 4451 }
a0d0e21e 4452 else
c255a977 4453 ST.c1 = ST.c2 = CHRTEST_VOID;
cca55fe3 4454 assume_ok_easy:
c255a977
DM
4455
4456 ST.A = scan;
4457 ST.B = next;
3280af22 4458 PL_reginput = locinput;
24d3c4a9
DM
4459 if (minmod) {
4460 minmod = 0;
e2e6a0f1 4461 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4633a7c4 4462 sayNO;
c255a977 4463 ST.count = ST.min;
a0ed51b3 4464 locinput = PL_reginput;
c255a977
DM
4465 REGCP_SET(ST.cp);
4466 if (ST.c1 == CHRTEST_VOID)
4467 goto curly_try_B_min;
4468
4469 ST.oldloc = locinput;
4470
4471 /* set ST.maxpos to the furthest point along the
4472 * string that could possibly match */
4473 if (ST.max == REG_INFTY) {
4474 ST.maxpos = PL_regeol - 1;
4475 if (do_utf8)
4476 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4477 ST.maxpos--;
4478 }
4479 else if (do_utf8) {
4480 int m = ST.max - ST.min;
4481 for (ST.maxpos = locinput;
4482 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4483 ST.maxpos += UTF8SKIP(ST.maxpos);
4484 }
4485 else {
4486 ST.maxpos = locinput + ST.max - ST.min;
4487 if (ST.maxpos >= PL_regeol)
4488 ST.maxpos = PL_regeol - 1;
4489 }
4490 goto curly_try_B_min_known;
4491
4492 }
4493 else {
e2e6a0f1 4494 ST.count = regrepeat(rex, ST.A, ST.max, depth);
c255a977
DM
4495 locinput = PL_reginput;
4496 if (ST.count < ST.min)
4497 sayNO;
4498 if ((ST.count > ST.min)
4499 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4500 {
4501 /* A{m,n} must come at the end of the string, there's
4502 * no point in backing off ... */
4503 ST.min = ST.count;
4504 /* ...except that $ and \Z can match before *and* after
4505 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4506 We may back off by one in this case. */
4507 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4508 ST.min--;
4509 }
4510 REGCP_SET(ST.cp);
4511 goto curly_try_B_max;
4512 }
4513 /* NOTREACHED */
4514
4515
4516 case CURLY_B_min_known_fail:
4517 /* failed to find B in a non-greedy match where c1,c2 valid */
4518 if (ST.paren && ST.count)
4519 PL_regendp[ST.paren] = -1;
4520
4521 PL_reginput = locinput; /* Could be reset... */
4522 REGCP_UNWIND(ST.cp);
4523 /* Couldn't or didn't -- move forward. */
4524 ST.oldloc = locinput;
4525 if (do_utf8)
4526 locinput += UTF8SKIP(locinput);
4527 else
4528 locinput++;
4529 ST.count++;
4530 curly_try_B_min_known:
4531 /* find the next place where 'B' could work, then call B */
4532 {
4533 int n;
4534 if (do_utf8) {
4535 n = (ST.oldloc == locinput) ? 0 : 1;
4536 if (ST.c1 == ST.c2) {
4537 STRLEN len;
4538 /* set n to utf8_distance(oldloc, locinput) */
4539 while (locinput <= ST.maxpos &&
4540 utf8n_to_uvchr((U8*)locinput,
4541 UTF8_MAXBYTES, &len,
4542 uniflags) != (UV)ST.c1) {
4543 locinput += len;
4544 n++;
4545 }
1aa99e6b
IH
4546 }
4547 else {
c255a977
DM
4548 /* set n to utf8_distance(oldloc, locinput) */
4549 while (locinput <= ST.maxpos) {
4550 STRLEN len;
4551 const UV c = utf8n_to_uvchr((U8*)locinput,
4552 UTF8_MAXBYTES, &len,
4553 uniflags);
4554 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4555 break;
4556 locinput += len;
4557 n++;
1aa99e6b 4558 }
0fe9bf95
IZ
4559 }
4560 }
c255a977
DM
4561 else {
4562 if (ST.c1 == ST.c2) {
4563 while (locinput <= ST.maxpos &&
4564 UCHARAT(locinput) != ST.c1)
4565 locinput++;
bbce6d69 4566 }
c255a977
DM
4567 else {
4568 while (locinput <= ST.maxpos
4569 && UCHARAT(locinput) != ST.c1
4570 && UCHARAT(locinput) != ST.c2)
4571 locinput++;
a0ed51b3 4572 }
c255a977
DM
4573 n = locinput - ST.oldloc;
4574 }
4575 if (locinput > ST.maxpos)
4576 sayNO;
4577 /* PL_reginput == oldloc now */
4578 if (n) {
4579 ST.count += n;
e2e6a0f1 4580 if (regrepeat(rex, ST.A, n, depth) < n)
4633a7c4 4581 sayNO;
a0d0e21e 4582 }
c255a977
DM
4583 PL_reginput = locinput;
4584 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4585 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4586 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4587 goto fake_end;
4588 }
c255a977 4589 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
a0d0e21e 4590 }
c255a977
DM
4591 /* NOTREACHED */
4592
4593
4594 case CURLY_B_min_fail:
4595 /* failed to find B in a non-greedy match where c1,c2 invalid */
4596 if (ST.paren && ST.count)
4597 PL_regendp[ST.paren] = -1;
4598
4599 REGCP_UNWIND(ST.cp);
4600 /* failed -- move forward one */
4601 PL_reginput = locinput;
e2e6a0f1 4602 if (regrepeat(rex, ST.A, 1, depth)) {
c255a977 4603 ST.count++;
a0ed51b3 4604 locinput = PL_reginput;
c255a977
DM
4605 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4606 ST.count > 0)) /* count overflow ? */
15272685 4607 {
c255a977
DM
4608 curly_try_B_min:
4609 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4610 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4611 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4612 goto fake_end;
4613 }
c255a977 4614 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
a0d0e21e
LW
4615 }
4616 }
4633a7c4 4617 sayNO;
c255a977
DM
4618 /* NOTREACHED */
4619
4620
4621 curly_try_B_max:
4622 /* a successful greedy match: now try to match B */
40d049e4 4623 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4624 cur_eval->u.eval.close_paren == (U32)ST.paren) {
40d049e4
YO
4625 goto fake_end;
4626 }
c255a977
DM
4627 {
4628 UV c = 0;
4629 if (ST.c1 != CHRTEST_VOID)
4630 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4631 UTF8_MAXBYTES, 0, uniflags)
466787eb 4632 : (UV) UCHARAT(PL_reginput);
c255a977
DM
4633 /* If it could work, try it. */
4634 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4635 CURLY_SETPAREN(ST.paren, ST.count);
4636 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4637 /* NOTREACHED */
4638 }
4639 }
4640 /* FALL THROUGH */
4641 case CURLY_B_max_fail:
4642 /* failed to find B in a greedy match */
4643 if (ST.paren && ST.count)
4644 PL_regendp[ST.paren] = -1;
4645
4646 REGCP_UNWIND(ST.cp);
4647 /* back up. */
4648 if (--ST.count < ST.min)
4649 sayNO;
4650 PL_reginput = locinput = HOPc(locinput, -1);
4651 goto curly_try_B_max;
4652
4653#undef ST
4654
a0d0e21e 4655 case END:
6bda09f9 4656 fake_end:
faec1544
DM
4657 if (cur_eval) {
4658 /* we've just finished A in /(??{A})B/; now continue with B */
4659 I32 tmpix;
4660
4661
4662 st->u.eval.toggle_reg_flags
4663 = cur_eval->u.eval.toggle_reg_flags;
4664 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4665
4666 st->u.eval.prev_rex = rex; /* inner */
4667 rex = cur_eval->u.eval.prev_rex; /* outer */
4668 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4669 ReREFCNT_inc(rex);
4670 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4671 REGCP_SET(st->u.eval.lastcp);
4672 PL_reginput = locinput;
4673
4674 /* Restore parens of the outer rex without popping the
4675 * savestack */
4676 tmpix = PL_savestack_ix;
4677 PL_savestack_ix = cur_eval->u.eval.lastcp;
4678 regcppop(rex);
4679 PL_savestack_ix = tmpix;
4680
4681 st->u.eval.prev_eval = cur_eval;
4682 cur_eval = cur_eval->u.eval.prev_eval;
4683 DEBUG_EXECUTE_r(
2a49f0f5
JH
4684 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4685 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
faec1544
DM
4686 PUSH_YES_STATE_GOTO(EVAL_AB,
4687 st->u.eval.prev_eval->u.eval.B); /* match B */
4688 }
4689
3b0527fe 4690 if (locinput < reginfo->till) {
a3621e74 4691 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4692 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4693 PL_colors[4],
4694 (long)(locinput - PL_reg_starttry),
3b0527fe 4695 (long)(reginfo->till - PL_reg_starttry),
7821416a 4696 PL_colors[5]));
58e23c8d 4697
262b90c4 4698 sayNO_SILENT; /* Cannot match: too short. */
7821416a
IZ
4699 }
4700 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4701 sayYES; /* Success! */
dad79028
DM
4702
4703 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4704 DEBUG_EXECUTE_r(
4705 PerlIO_printf(Perl_debug_log,
4706 "%*s %ssubpattern success...%s\n",
5bc10b2c 4707 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
3280af22 4708 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4709 sayYES; /* Success! */
dad79028 4710
40a82448
DM
4711#undef ST
4712#define ST st->u.ifmatch
4713
4714 case SUSPEND: /* (?>A) */
4715 ST.wanted = 1;
9fe1d20c 4716 PL_reginput = locinput;
9041c2e3 4717 goto do_ifmatch;
dad79028 4718
40a82448
DM
4719 case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4720 ST.wanted = 0;
dad79028
DM
4721 goto ifmatch_trivial_fail_test;
4722
40a82448
DM
4723 case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4724 ST.wanted = 1;
dad79028 4725 ifmatch_trivial_fail_test:
a0ed51b3 4726 if (scan->flags) {
52657f30 4727 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4728 if (!s) {
4729 /* trivial fail */
24d3c4a9
DM
4730 if (logical) {
4731 logical = 0;
4732 sw = 1 - (bool)ST.wanted;
dad79028 4733 }
40a82448 4734 else if (ST.wanted)
dad79028
DM
4735 sayNO;
4736 next = scan + ARG(scan);
4737 if (next == scan)
4738 next = NULL;
4739 break;
4740 }
efb30f32 4741 PL_reginput = s;
a0ed51b3
LW
4742 }
4743 else
4744 PL_reginput = locinput;
4745
c277df42 4746 do_ifmatch:
40a82448 4747 ST.me = scan;
24d3c4a9 4748 ST.logical = logical;
40a82448
DM
4749 /* execute body of (?...A) */
4750 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4751 /* NOTREACHED */
4752
4753 case IFMATCH_A_fail: /* body of (?...A) failed */
4754 ST.wanted = !ST.wanted;
4755 /* FALL THROUGH */
4756
4757 case IFMATCH_A: /* body of (?...A) succeeded */
24d3c4a9
DM
4758 if (ST.logical) {
4759 sw = (bool)ST.wanted;
40a82448
DM
4760 }
4761 else if (!ST.wanted)
4762 sayNO;
4763
4764 if (OP(ST.me) == SUSPEND)
4765 locinput = PL_reginput;
4766 else {
4767 locinput = PL_reginput = st->locinput;
4768 nextchr = UCHARAT(locinput);
4769 }
4770 scan = ST.me + ARG(ST.me);
4771 if (scan == ST.me)
4772 scan = NULL;
4773 continue; /* execute B */
4774
4775#undef ST
dad79028 4776
c277df42 4777 case LONGJMP:
c277df42
IZ
4778 next = scan + ARG(scan);
4779 if (next == scan)
4780 next = NULL;
a0d0e21e 4781 break;
54612592 4782 case COMMIT:
e2e6a0f1
YO
4783 reginfo->cutpoint = PL_regeol;
4784 /* FALLTHROUGH */
5d458dd8 4785 case PRUNE:
24b23f37 4786 PL_reginput = locinput;
e2e6a0f1 4787 if (!scan->flags)
5d458dd8 4788 sv_yes_mark = sv_commit = (SV*)rex->data->data[ ARG( scan ) ];
54612592
YO
4789 PUSH_STATE_GOTO(COMMIT_next,next);
4790 /* NOTREACHED */
4791 case COMMIT_next_fail:
4792 no_final = 1;
4793 /* FALLTHROUGH */
7f69552c
YO
4794 case OPFAIL:
4795 sayNO;
e2e6a0f1
YO
4796 /* NOTREACHED */
4797
4798#define ST st->u.mark
4799 case MARKPOINT:
4800 ST.prev_mark = mark_state;
5d458dd8
YO
4801 ST.mark_name = sv_commit = sv_yes_mark
4802 = (SV*)rex->data->data[ ARG( scan ) ];
e2e6a0f1
YO
4803 mark_state = st;
4804 ST.mark_loc = PL_reginput = locinput;
4805 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4806 /* NOTREACHED */
4807 case MARKPOINT_next:
4808 mark_state = ST.prev_mark;
4809 sayYES;
4810 /* NOTREACHED */
4811 case MARKPOINT_next_fail:
5d458dd8 4812 if (popmark && sv_eq(ST.mark_name,popmark))
e2e6a0f1
YO
4813 {
4814 if (ST.mark_loc > startpoint)
4815 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4816 popmark = NULL; /* we found our mark */
4817 sv_commit = ST.mark_name;
4818
4819 DEBUG_EXECUTE_r({
5d458dd8 4820 PerlIO_printf(Perl_debug_log,
e2e6a0f1
YO
4821 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
4822 REPORT_CODE_OFF+depth*2, "",
4823 PL_colors[4], sv_commit, PL_colors[5]);
e2e6a0f1
YO
4824 });
4825 }
4826 mark_state = ST.prev_mark;
5d458dd8
YO
4827 sv_yes_mark = mark_state ?
4828 mark_state->u.mark.mark_name : NULL;
e2e6a0f1
YO
4829 sayNO;
4830 /* NOTREACHED */
5d458dd8
YO
4831 case SKIP:
4832 PL_reginput = locinput;
4833 if (scan->flags) {
4834 /* (*CUT) : if we fail we cut here*/
4835 ST.mark_name = NULL;
e2e6a0f1 4836 ST.mark_loc = locinput;
5d458dd8
YO
4837 PUSH_STATE_GOTO(SKIP_next,next);
4838 } else {
4839 /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was,
4840 otherwise do nothing. Meaning we need to scan
4841 */
4842 regmatch_state *cur = mark_state;
4843 SV *find = (SV*)rex->data->data[ ARG( scan ) ];
4844
4845 while (cur) {
4846 if ( sv_eq( cur->u.mark.mark_name,
4847 find ) )
4848 {
4849 ST.mark_name = find;
4850 PUSH_STATE_GOTO( SKIP_next, next );
4851 }
4852 cur = cur->u.mark.prev_mark;
4853 }
e2e6a0f1 4854 }
5d458dd8
YO
4855 /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
4856 break;
4857 case SKIP_next_fail:
4858 if (ST.mark_name) {
4859 /* (*CUT:NAME) - Set up to search for the name as we
4860 collapse the stack*/
4861 popmark = ST.mark_name;
4862 } else {
4863 /* (*CUT) - No name, we cut here.*/
e2e6a0f1
YO
4864 if (ST.mark_loc > startpoint)
4865 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5d458dd8
YO
4866 /* but we set sv_commit to latest mark_name if there
4867 is one so they can test to see how things lead to this
4868 cut */
4869 if (mark_state)
4870 sv_commit=mark_state->u.mark.mark_name;
4871 }
e2e6a0f1
YO
4872 no_final = 1;
4873 sayNO;
4874 /* NOTREACHED */
4875#undef ST
4876
a0d0e21e 4877 default:
b900a521 4878 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4879 PTR2UV(scan), OP(scan));
cea2e8a9 4880 Perl_croak(aTHX_ "regexp memory corruption");
5d458dd8
YO
4881
4882 } /* end switch */
95b24440 4883
5d458dd8
YO
4884 /* switch break jumps here */
4885 scan = next; /* prepare to execute the next op and ... */
4886 continue; /* ... jump back to the top, reusing st */
95b24440
DM
4887 /* NOTREACHED */
4888
40a82448
DM
4889 push_yes_state:
4890 /* push a state that backtracks on success */
4891 st->u.yes.prev_yes_state = yes_state;
4892 yes_state = st;
4893 /* FALL THROUGH */
4894 push_state:
4895 /* push a new regex state, then continue at scan */
4896 {
4897 regmatch_state *newst;
4898
24b23f37
YO
4899 DEBUG_STACK_r({
4900 regmatch_state *cur = st;
4901 regmatch_state *curyes = yes_state;
4902 int curd = depth;
4903 regmatch_slab *slab = PL_regmatch_slab;
4904 for (;curd > -1;cur--,curd--) {
4905 if (cur < SLAB_FIRST(slab)) {
4906 slab = slab->prev;
4907 cur = SLAB_LAST(slab);
4908 }
4909 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
4910 REPORT_CODE_OFF + 2 + depth * 2,"",
4911 curd, reg_name[cur->resume_state],
4912 (curyes == cur) ? "yes" : ""
4913 );
4914 if (curyes == cur)
4915 curyes = cur->u.yes.prev_yes_state;
4916 }
4917 } else
4918 DEBUG_STATE_pp("push")
4919 );
40a82448 4920 depth++;
40a82448
DM
4921 st->locinput = locinput;
4922 newst = st+1;
4923 if (newst > SLAB_LAST(PL_regmatch_slab))
4924 newst = S_push_slab(aTHX);
4925 PL_regmatch_state = newst;
786e8c11 4926
40a82448
DM
4927 locinput = PL_reginput;
4928 nextchr = UCHARAT(locinput);
4929 st = newst;
4930 continue;
4931 /* NOTREACHED */
4932 }
a0d0e21e 4933 }
a687059c 4934
a0d0e21e
LW
4935 /*
4936 * We get here only if there's trouble -- normally "case END" is
4937 * the terminating point.
4938 */
cea2e8a9 4939 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4940 /*NOTREACHED*/
4633a7c4
LW
4941 sayNO;
4942
262b90c4 4943yes:
77cb431f
DM
4944 if (yes_state) {
4945 /* we have successfully completed a subexpression, but we must now
4946 * pop to the state marked by yes_state and continue from there */
77cb431f 4947 assert(st != yes_state);
5bc10b2c
DM
4948#ifdef DEBUGGING
4949 while (st != yes_state) {
4950 st--;
4951 if (st < SLAB_FIRST(PL_regmatch_slab)) {
4952 PL_regmatch_slab = PL_regmatch_slab->prev;
4953 st = SLAB_LAST(PL_regmatch_slab);
4954 }
e2e6a0f1 4955 DEBUG_STATE_r({
54612592
YO
4956 if (no_final) {
4957 DEBUG_STATE_pp("pop (no final)");
4958 } else {
4959 DEBUG_STATE_pp("pop (yes)");
4960 }
e2e6a0f1 4961 });
5bc10b2c
DM
4962 depth--;
4963 }
4964#else
77cb431f
DM
4965 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4966 || yes_state > SLAB_LAST(PL_regmatch_slab))
4967 {
4968 /* not in this slab, pop slab */
4969 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4970 PL_regmatch_slab = PL_regmatch_slab->prev;
4971 st = SLAB_LAST(PL_regmatch_slab);
4972 }
4973 depth -= (st - yes_state);
5bc10b2c 4974#endif
77cb431f
DM
4975 st = yes_state;
4976 yes_state = st->u.yes.prev_yes_state;
4977 PL_regmatch_state = st;
24b23f37 4978
5d458dd8
YO
4979 if (no_final) {
4980 locinput= st->locinput;
4981 nextchr = UCHARAT(locinput);
4982 }
54612592 4983 state_num = st->resume_state + no_final;
24d3c4a9 4984 goto reenter_switch;
77cb431f
DM
4985 }
4986
a3621e74 4987 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4988 PL_colors[4], PL_colors[5]));
02db2b7b 4989
95b24440 4990 result = 1;
aa283a38 4991 goto final_exit;
4633a7c4
LW
4992
4993no:
a3621e74 4994 DEBUG_EXECUTE_r(
7821416a 4995 PerlIO_printf(Perl_debug_log,
786e8c11 4996 "%*s %sfailed...%s\n",
5bc10b2c 4997 REPORT_CODE_OFF+depth*2, "",
786e8c11 4998 PL_colors[4], PL_colors[5])
7821416a 4999 );
aa283a38 5000
262b90c4 5001no_silent:
54612592
YO
5002 if (no_final) {
5003 if (yes_state) {
5004 goto yes;
5005 } else {
5006 goto final_exit;
5007 }
5008 }
aa283a38
DM
5009 if (depth) {
5010 /* there's a previous state to backtrack to */
40a82448
DM
5011 st--;
5012 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5013 PL_regmatch_slab = PL_regmatch_slab->prev;
5014 st = SLAB_LAST(PL_regmatch_slab);
5015 }
5016 PL_regmatch_state = st;
40a82448
DM
5017 locinput= st->locinput;
5018 nextchr = UCHARAT(locinput);
5019
5bc10b2c
DM
5020 DEBUG_STATE_pp("pop");
5021 depth--;
262b90c4
DM
5022 if (yes_state == st)
5023 yes_state = st->u.yes.prev_yes_state;
5bc10b2c 5024
24d3c4a9
DM
5025 state_num = st->resume_state + 1; /* failure = success + 1 */
5026 goto reenter_switch;
95b24440 5027 }
24d3c4a9 5028 result = 0;
aa283a38 5029
262b90c4 5030 final_exit:
bbe252da 5031 if (rex->intflags & PREGf_VERBARG_SEEN) {
5d458dd8
YO
5032 SV *sv_err = get_sv("REGERROR", 1);
5033 SV *sv_mrk = get_sv("REGMARK", 1);
5034 if (result) {
e2e6a0f1 5035 sv_commit = &PL_sv_no;
5d458dd8
YO
5036 if (!sv_yes_mark)
5037 sv_yes_mark = &PL_sv_yes;
5038 } else {
5039 if (!sv_commit)
5040 sv_commit = &PL_sv_yes;
5041 sv_yes_mark = &PL_sv_no;
5042 }
5043 sv_setsv(sv_err, sv_commit);
5044 sv_setsv(sv_mrk, sv_yes_mark);
e2e6a0f1 5045 }
5d9a96ca
DM
5046 /* restore original high-water mark */
5047 PL_regmatch_slab = orig_slab;
5048 PL_regmatch_state = orig_state;
5049
5050 /* free all slabs above current one */
5051 if (orig_slab->next) {
c4fd8992 5052 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
5053 orig_slab->next = NULL;
5054 while (sl) {
c4fd8992 5055 regmatch_slab * const osl = sl;
5d9a96ca 5056 sl = sl->next;
ad65c075 5057 Safefree(osl);
5d9a96ca
DM
5058 }
5059 }
5060
95b24440 5061 return result;
a687059c
LW
5062}
5063
5064/*
5065 - regrepeat - repeatedly match something simple, report how many
5066 */
5067/*
5068 * [This routine now assumes that it will only match on things of length 1.
5069 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 5070 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 5071 */
76e3520e 5072STATIC I32
e2e6a0f1 5073S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
a687059c 5074{
27da23d5 5075 dVAR;
a0d0e21e 5076 register char *scan;
a0d0e21e 5077 register I32 c;
3280af22 5078 register char *loceol = PL_regeol;
a0ed51b3 5079 register I32 hardcount = 0;
53c4c00c 5080 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 5081
3280af22 5082 scan = PL_reginput;
faf11cac
HS
5083 if (max == REG_INFTY)
5084 max = I32_MAX;
5085 else if (max < loceol - scan)
7f596f4c 5086 loceol = scan + max;
a0d0e21e 5087 switch (OP(p)) {
22c35a8c 5088 case REG_ANY:
1aa99e6b 5089 if (do_utf8) {
ffc61ed2 5090 loceol = PL_regeol;
1aa99e6b 5091 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
5092 scan += UTF8SKIP(scan);
5093 hardcount++;
5094 }
5095 } else {
5096 while (scan < loceol && *scan != '\n')
5097 scan++;
a0ed51b3
LW
5098 }
5099 break;
ffc61ed2 5100 case SANY:
def8e4ea
JH
5101 if (do_utf8) {
5102 loceol = PL_regeol;
a0804c9e 5103 while (scan < loceol && hardcount < max) {
def8e4ea
JH
5104 scan += UTF8SKIP(scan);
5105 hardcount++;
5106 }
5107 }
5108 else
5109 scan = loceol;
a0ed51b3 5110 break;
f33976b4
DB
5111 case CANY:
5112 scan = loceol;
5113 break;
090f7165
JH
5114 case EXACT: /* length of string is 1 */
5115 c = (U8)*STRING(p);
5116 while (scan < loceol && UCHARAT(scan) == c)
5117 scan++;
bbce6d69 5118 break;
5119 case EXACTF: /* length of string is 1 */
cd439c50 5120 c = (U8)*STRING(p);
bbce6d69 5121 while (scan < loceol &&
22c35a8c 5122 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 5123 scan++;
5124 break;
5125 case EXACTFL: /* length of string is 1 */
3280af22 5126 PL_reg_flags |= RF_tainted;
cd439c50 5127 c = (U8)*STRING(p);
bbce6d69 5128 while (scan < loceol &&
22c35a8c 5129 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
5130 scan++;
5131 break;
5132 case ANYOF:
ffc61ed2
JH
5133 if (do_utf8) {
5134 loceol = PL_regeol;
cfc92286 5135 while (hardcount < max && scan < loceol &&
32fc9b6a 5136 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
5137 scan += UTF8SKIP(scan);
5138 hardcount++;
5139 }
5140 } else {
32fc9b6a 5141 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
5142 scan++;
5143 }
a0d0e21e
LW
5144 break;
5145 case ALNUM:
1aa99e6b 5146 if (do_utf8) {
ffc61ed2 5147 loceol = PL_regeol;
1a4fad37 5148 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5149 while (hardcount < max && scan < loceol &&
3568d838 5150 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5151 scan += UTF8SKIP(scan);
5152 hardcount++;
5153 }
5154 } else {
5155 while (scan < loceol && isALNUM(*scan))
5156 scan++;
a0ed51b3
LW
5157 }
5158 break;
bbce6d69 5159 case ALNUML:
3280af22 5160 PL_reg_flags |= RF_tainted;
1aa99e6b 5161 if (do_utf8) {
ffc61ed2 5162 loceol = PL_regeol;
1aa99e6b
IH
5163 while (hardcount < max && scan < loceol &&
5164 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5165 scan += UTF8SKIP(scan);
5166 hardcount++;
5167 }
5168 } else {
5169 while (scan < loceol && isALNUM_LC(*scan))
5170 scan++;
a0ed51b3
LW
5171 }
5172 break;
a0d0e21e 5173 case NALNUM:
1aa99e6b 5174 if (do_utf8) {
ffc61ed2 5175 loceol = PL_regeol;
1a4fad37 5176 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5177 while (hardcount < max && scan < loceol &&
3568d838 5178 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5179 scan += UTF8SKIP(scan);
5180 hardcount++;
5181 }
5182 } else {
5183 while (scan < loceol && !isALNUM(*scan))
5184 scan++;
a0ed51b3
LW
5185 }
5186 break;
bbce6d69 5187 case NALNUML:
3280af22 5188 PL_reg_flags |= RF_tainted;
1aa99e6b 5189 if (do_utf8) {
ffc61ed2 5190 loceol = PL_regeol;
1aa99e6b
IH
5191 while (hardcount < max && scan < loceol &&
5192 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5193 scan += UTF8SKIP(scan);
5194 hardcount++;
5195 }
5196 } else {
5197 while (scan < loceol && !isALNUM_LC(*scan))
5198 scan++;
a0ed51b3
LW
5199 }
5200 break;
a0d0e21e 5201 case SPACE:
1aa99e6b 5202 if (do_utf8) {
ffc61ed2 5203 loceol = PL_regeol;
1a4fad37 5204 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5205 while (hardcount < max && scan < loceol &&
3568d838
JH
5206 (*scan == ' ' ||
5207 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5208 scan += UTF8SKIP(scan);
5209 hardcount++;
5210 }
5211 } else {
5212 while (scan < loceol && isSPACE(*scan))
5213 scan++;
a0ed51b3
LW
5214 }
5215 break;
bbce6d69 5216 case SPACEL:
3280af22 5217 PL_reg_flags |= RF_tainted;
1aa99e6b 5218 if (do_utf8) {
ffc61ed2 5219 loceol = PL_regeol;
1aa99e6b 5220 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5221 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5222 scan += UTF8SKIP(scan);
5223 hardcount++;
5224 }
5225 } else {
5226 while (scan < loceol && isSPACE_LC(*scan))
5227 scan++;
a0ed51b3
LW
5228 }
5229 break;
a0d0e21e 5230 case NSPACE:
1aa99e6b 5231 if (do_utf8) {
ffc61ed2 5232 loceol = PL_regeol;
1a4fad37 5233 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5234 while (hardcount < max && scan < loceol &&
3568d838
JH
5235 !(*scan == ' ' ||
5236 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5237 scan += UTF8SKIP(scan);
5238 hardcount++;
5239 }
5240 } else {
5241 while (scan < loceol && !isSPACE(*scan))
5242 scan++;
5243 break;
a0ed51b3 5244 }
bbce6d69 5245 case NSPACEL:
3280af22 5246 PL_reg_flags |= RF_tainted;
1aa99e6b 5247 if (do_utf8) {
ffc61ed2 5248 loceol = PL_regeol;
1aa99e6b 5249 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5250 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5251 scan += UTF8SKIP(scan);
5252 hardcount++;
5253 }
5254 } else {
5255 while (scan < loceol && !isSPACE_LC(*scan))
5256 scan++;
a0ed51b3
LW
5257 }
5258 break;
a0d0e21e 5259 case DIGIT:
1aa99e6b 5260 if (do_utf8) {
ffc61ed2 5261 loceol = PL_regeol;
1a4fad37 5262 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5263 while (hardcount < max && scan < loceol &&
3568d838 5264 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5265 scan += UTF8SKIP(scan);
5266 hardcount++;
5267 }
5268 } else {
5269 while (scan < loceol && isDIGIT(*scan))
5270 scan++;
a0ed51b3
LW
5271 }
5272 break;
a0d0e21e 5273 case NDIGIT:
1aa99e6b 5274 if (do_utf8) {
ffc61ed2 5275 loceol = PL_regeol;
1a4fad37 5276 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5277 while (hardcount < max && scan < loceol &&
3568d838 5278 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5279 scan += UTF8SKIP(scan);
5280 hardcount++;
5281 }
5282 } else {
5283 while (scan < loceol && !isDIGIT(*scan))
5284 scan++;
a0ed51b3
LW
5285 }
5286 break;
a0d0e21e
LW
5287 default: /* Called on something of 0 width. */
5288 break; /* So match right here or not at all. */
5289 }
a687059c 5290
a0ed51b3
LW
5291 if (hardcount)
5292 c = hardcount;
5293 else
5294 c = scan - PL_reginput;
3280af22 5295 PL_reginput = scan;
a687059c 5296
a3621e74 5297 DEBUG_r({
e68ec53f 5298 GET_RE_DEBUG_FLAGS_DECL;
be8e71aa 5299 DEBUG_EXECUTE_r({
e68ec53f
YO
5300 SV * const prop = sv_newmortal();
5301 regprop(prog, prop, p);
5302 PerlIO_printf(Perl_debug_log,
be8e71aa 5303 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
e2e6a0f1 5304 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
a3621e74 5305 });
be8e71aa 5306 });
9041c2e3 5307
a0d0e21e 5308 return(c);
a687059c
LW
5309}
5310
c277df42 5311
be8e71aa 5312#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
c277df42 5313/*
ffc61ed2
JH
5314- regclass_swash - prepare the utf8 swash
5315*/
5316
5317SV *
32fc9b6a 5318Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 5319{
97aff369 5320 dVAR;
9e55ce06
JH
5321 SV *sw = NULL;
5322 SV *si = NULL;
5323 SV *alt = NULL;
3dab1dad 5324 const struct reg_data * const data = prog ? prog->data : NULL;
ffc61ed2 5325
4f639d21 5326 if (data && data->count) {
a3b680e6 5327 const U32 n = ARG(node);
ffc61ed2 5328
4f639d21
DM
5329 if (data->what[n] == 's') {
5330 SV * const rv = (SV*)data->data[n];
890ce7af 5331 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 5332 SV **const ary = AvARRAY(av);
9e55ce06 5333 SV **a, **b;
9041c2e3 5334
711a919c 5335 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
5336 * documentation of these array elements. */
5337
b11f357e 5338 si = *ary;
8f7f7219 5339 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
5340 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5341
ffc61ed2
JH
5342 if (a)
5343 sw = *a;
5344 else if (si && doinit) {
5345 sw = swash_init("utf8", "", si, 1, 0);
5346 (void)av_store(av, 1, sw);
5347 }
9e55ce06
JH
5348 if (b)
5349 alt = *b;
ffc61ed2
JH
5350 }
5351 }
5352
9e55ce06
JH
5353 if (listsvp)
5354 *listsvp = si;
5355 if (altsvp)
5356 *altsvp = alt;
ffc61ed2
JH
5357
5358 return sw;
5359}
76234dfb 5360#endif
ffc61ed2
JH
5361
5362/*
ba7b4546 5363 - reginclass - determine if a character falls into a character class
832705d4
JH
5364
5365 The n is the ANYOF regnode, the p is the target string, lenp
5366 is pointer to the maximum length of how far to go in the p
5367 (if the lenp is zero, UTF8SKIP(p) is used),
5368 do_utf8 tells whether the target string is in UTF-8.
5369
bbce6d69 5370 */
5371
76e3520e 5372STATIC bool
32fc9b6a 5373S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 5374{
27da23d5 5375 dVAR;
a3b680e6 5376 const char flags = ANYOF_FLAGS(n);
bbce6d69 5377 bool match = FALSE;
cc07378b 5378 UV c = *p;
ae9ddab8 5379 STRLEN len = 0;
9e55ce06 5380 STRLEN plen;
1aa99e6b 5381
19f67299
TS
5382 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5383 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
5384 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5385 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
e8a70c6f
SP
5386 if (len == (STRLEN)-1)
5387 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
19f67299 5388 }
bbce6d69 5389
0f0076b4 5390 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5391 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5392 if (lenp)
5393 *lenp = 0;
ffc61ed2 5394 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5395 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5396 match = TRUE;
bbce6d69 5397 }
3568d838 5398 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5399 match = TRUE;
ffc61ed2 5400 if (!match) {
9e55ce06 5401 AV *av;
32fc9b6a 5402 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5403
5404 if (sw) {
3568d838 5405 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5406 match = TRUE;
5407 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5408 if (!match && lenp && av) {
5409 I32 i;
9e55ce06 5410 for (i = 0; i <= av_len(av); i++) {
890ce7af 5411 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5412 STRLEN len;
890ce7af 5413 const char * const s = SvPV_const(sv, len);
9e55ce06 5414
061b10df 5415 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5416 *lenp = len;
5417 match = TRUE;
5418 break;
5419 }
5420 }
5421 }
5422 if (!match) {
89ebb4a3 5423 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5424 STRLEN tmplen;
5425
9e55ce06
JH
5426 to_utf8_fold(p, tmpbuf, &tmplen);
5427 if (swash_fetch(sw, tmpbuf, do_utf8))
5428 match = TRUE;
5429 }
ffc61ed2
JH
5430 }
5431 }
bbce6d69 5432 }
9e55ce06 5433 if (match && lenp && *lenp == 0)
0f0076b4 5434 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5435 }
1aa99e6b 5436 if (!match && c < 256) {
ffc61ed2
JH
5437 if (ANYOF_BITMAP_TEST(n, c))
5438 match = TRUE;
5439 else if (flags & ANYOF_FOLD) {
eb160463 5440 U8 f;
a0ed51b3 5441
ffc61ed2
JH
5442 if (flags & ANYOF_LOCALE) {
5443 PL_reg_flags |= RF_tainted;
5444 f = PL_fold_locale[c];
5445 }
5446 else
5447 f = PL_fold[c];
5448 if (f != c && ANYOF_BITMAP_TEST(n, f))
5449 match = TRUE;
5450 }
5451
5452 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5453 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5454 if (
5455 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5456 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5457 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5458 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5459 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5460 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5461 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5462 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5463 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5464 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5465 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5466 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5467 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5468 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5469 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5470 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5471 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5472 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5473 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5474 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5475 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5476 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5477 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5478 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5479 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5480 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5481 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5482 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5483 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5484 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5485 ) /* How's that for a conditional? */
5486 {
5487 match = TRUE;
5488 }
a0ed51b3 5489 }
a0ed51b3
LW
5490 }
5491
a0ed51b3
LW
5492 return (flags & ANYOF_INVERT) ? !match : match;
5493}
161b471a 5494
dfe13c55 5495STATIC U8 *
0ce71af7 5496S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5497{
97aff369 5498 dVAR;
a0ed51b3 5499 if (off >= 0) {
1aa99e6b 5500 while (off-- && s < lim) {
ffc61ed2 5501 /* XXX could check well-formedness here */
a0ed51b3 5502 s += UTF8SKIP(s);
ffc61ed2 5503 }
a0ed51b3
LW
5504 }
5505 else {
1de06328
YO
5506 while (off++ && s > lim) {
5507 s--;
5508 if (UTF8_IS_CONTINUED(*s)) {
5509 while (s > lim && UTF8_IS_CONTINUATION(*s))
5510 s--;
a0ed51b3 5511 }
1de06328 5512 /* XXX could check well-formedness here */
a0ed51b3
LW
5513 }
5514 }
5515 return s;
5516}
161b471a 5517
f9f4320a
YO
5518#ifdef XXX_dmq
5519/* there are a bunch of places where we use two reghop3's that should
5520 be replaced with this routine. but since thats not done yet
5521 we ifdef it out - dmq
5522*/
dfe13c55 5523STATIC U8 *
1de06328
YO
5524S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5525{
5526 dVAR;
5527 if (off >= 0) {
5528 while (off-- && s < rlim) {
5529 /* XXX could check well-formedness here */
5530 s += UTF8SKIP(s);
5531 }
5532 }
5533 else {
5534 while (off++ && s > llim) {
5535 s--;
5536 if (UTF8_IS_CONTINUED(*s)) {
5537 while (s > llim && UTF8_IS_CONTINUATION(*s))
5538 s--;
5539 }
5540 /* XXX could check well-formedness here */
5541 }
5542 }
5543 return s;
5544}
f9f4320a 5545#endif
1de06328
YO
5546
5547STATIC U8 *
0ce71af7 5548S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5549{
97aff369 5550 dVAR;
a0ed51b3 5551 if (off >= 0) {
1aa99e6b 5552 while (off-- && s < lim) {
ffc61ed2 5553 /* XXX could check well-formedness here */
a0ed51b3 5554 s += UTF8SKIP(s);
ffc61ed2 5555 }
a0ed51b3 5556 if (off >= 0)
3dab1dad 5557 return NULL;
a0ed51b3
LW
5558 }
5559 else {
1de06328
YO
5560 while (off++ && s > lim) {
5561 s--;
5562 if (UTF8_IS_CONTINUED(*s)) {
5563 while (s > lim && UTF8_IS_CONTINUATION(*s))
5564 s--;
a0ed51b3 5565 }
1de06328 5566 /* XXX could check well-formedness here */
a0ed51b3
LW
5567 }
5568 if (off <= 0)
3dab1dad 5569 return NULL;
a0ed51b3
LW
5570 }
5571 return s;
5572}
51371543 5573
51371543 5574static void
acfe0abc 5575restore_pos(pTHX_ void *arg)
51371543 5576{
97aff369 5577 dVAR;
097eb12c 5578 regexp * const rex = (regexp *)arg;
51371543
GS
5579 if (PL_reg_eval_set) {
5580 if (PL_reg_oldsaved) {
4f639d21
DM
5581 rex->subbeg = PL_reg_oldsaved;
5582 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5583#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5584 rex->saved_copy = PL_nrs;
ed252734 5585#endif
4f639d21 5586 RX_MATCH_COPIED_on(rex);
51371543
GS
5587 }
5588 PL_reg_magic->mg_len = PL_reg_oldpos;
5589 PL_reg_eval_set = 0;
5590 PL_curpm = PL_reg_oldcurpm;
5591 }
5592}
33b8afdf
JH
5593
5594STATIC void
5595S_to_utf8_substr(pTHX_ register regexp *prog)
5596{
33b8afdf 5597 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5598 SV* const sv = newSVsv(prog->float_substr);
5599 prog->float_utf8 = sv;
33b8afdf
JH
5600 sv_utf8_upgrade(sv);
5601 if (SvTAIL(prog->float_substr))
5602 SvTAIL_on(sv);
5603 if (prog->float_substr == prog->check_substr)
5604 prog->check_utf8 = sv;
5605 }
5606 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5607 SV* const sv = newSVsv(prog->anchored_substr);
5608 prog->anchored_utf8 = sv;
33b8afdf
JH
5609 sv_utf8_upgrade(sv);
5610 if (SvTAIL(prog->anchored_substr))
5611 SvTAIL_on(sv);
5612 if (prog->anchored_substr == prog->check_substr)
5613 prog->check_utf8 = sv;
5614 }
5615}
5616
5617STATIC void
5618S_to_byte_substr(pTHX_ register regexp *prog)
5619{
97aff369 5620 dVAR;
33b8afdf 5621 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5622 SV* sv = newSVsv(prog->float_utf8);
5623 prog->float_substr = sv;
33b8afdf
JH
5624 if (sv_utf8_downgrade(sv, TRUE)) {
5625 if (SvTAIL(prog->float_utf8))
5626 SvTAIL_on(sv);
5627 } else {
5628 SvREFCNT_dec(sv);
5629 prog->float_substr = sv = &PL_sv_undef;
5630 }
5631 if (prog->float_utf8 == prog->check_utf8)
5632 prog->check_substr = sv;
5633 }
5634 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5635 SV* sv = newSVsv(prog->anchored_utf8);
5636 prog->anchored_substr = sv;
33b8afdf
JH
5637 if (sv_utf8_downgrade(sv, TRUE)) {
5638 if (SvTAIL(prog->anchored_utf8))
5639 SvTAIL_on(sv);
5640 } else {
5641 SvREFCNT_dec(sv);
5642 prog->anchored_substr = sv = &PL_sv_undef;
5643 }
5644 if (prog->anchored_utf8 == prog->check_utf8)
5645 prog->check_substr = sv;
5646 }
5647}
66610fdd
RGS
5648
5649/*
5650 * Local variables:
5651 * c-indentation-style: bsd
5652 * c-basic-offset: 4
5653 * indent-tabs-mode: t
5654 * End:
5655 *
37442d52
RGS
5656 * ex: set ts=8 sts=4 sw=4 noet:
5657 */