This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlhack: some portability updates
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
166f8a29
DM
16 */
17
a687059c
LW
18/* NOTE: this is derived from Henry Spencer's regexp code, and should not
19 * confused with the original package (see point 3 below). Thanks, Henry!
20 */
21
22/* Additional note: this code is very heavily munged from Henry's version
23 * in places. In some spots I've traded clarity for efficiency, so don't
24 * blame Henry for some of the lack of readability.
25 */
26
e50aee73
AD
27/* The names of the functions have been changed from regcomp and
28 * regexec to pregcomp and pregexec in order to avoid conflicts
29 * with the POSIX routines of the same names.
30*/
31
b9d5759e 32#ifdef PERL_EXT_RE_BUILD
54df2634 33#include "re_top.h"
9041c2e3 34#endif
56953603 35
a687059c 36/*
e50aee73 37 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
38 *
39 * Copyright (c) 1986 by University of Toronto.
40 * Written by Henry Spencer. Not derived from licensed software.
41 *
42 * Permission is granted to anyone to use this software for any
43 * purpose on any computer system, and to redistribute it freely,
44 * subject to the following restrictions:
45 *
46 * 1. The author is not responsible for the consequences of use of
47 * this software, no matter how awful, even if they arise
48 * from defects in it.
49 *
50 * 2. The origin of this software must not be misrepresented, either
51 * by explicit claim or by omission.
52 *
53 * 3. Altered versions must be plainly marked as such, and must not
54 * be misrepresented as being the original software.
55 *
56 **** Alterations to Henry's code are...
57 ****
4bb101f2 58 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 59 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 60 ****
9ef589d8
LW
61 **** You may distribute under the terms of either the GNU General Public
62 **** License or the Artistic License, as specified in the README file.
a687059c
LW
63 *
64 * Beware that some of this code is subtly aware of the way operator
65 * precedence is structured in regular expressions. Serious changes in
66 * regular-expression syntax might require a total rethink.
67 */
68#include "EXTERN.h"
864dbfa3 69#define PERL_IN_REGEXEC_C
a687059c 70#include "perl.h"
0f5d15d6 71
54df2634
NC
72#ifdef PERL_IN_XSUB_RE
73# include "re_comp.h"
74#else
75# include "regcomp.h"
76#endif
a687059c 77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
faec1544 80
ab3bbdeb 81#define RF_utf8 8 /* Pattern contains multibyte chars? */
a0ed51b3 82
eb160463 83#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
32fc9b6a 92#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 93
c277df42
IZ
94/*
95 * Forwards.
96 */
97
33b8afdf 98#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 99#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 100
3dab1dad
YO
101#define HOPc(pos,off) \
102 (char *)(PL_reg_match_utf8 \
52657f30 103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
3dab1dad
YO
104 : (U8*)(pos + off))
105#define HOPBACKc(pos, off) \
07be1b83
YO
106 (char*)(PL_reg_match_utf8\
107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
108 : (pos - off >= PL_bostr) \
8e11feef 109 ? (U8*)pos - off \
3dab1dad 110 : NULL)
efb30f32 111
e7409c1b 112#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
1aa99e6b 113#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 114
1a4fad37
AL
115#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
116 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
117#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
118#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
119#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
120#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 121
3dab1dad
YO
122/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
123
5f80c4cf 124/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
125#define JUMPABLE(rn) ( \
126 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
127 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
128 OP(rn) == PLUS || OP(rn) == MINMOD || \
3dab1dad 129 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
130)
131
cca55fe3 132#define HAS_TEXT(rn) ( \
3dab1dad 133 PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
cca55fe3 134)
e2d8ce26 135
a84d97b6
HS
136/*
137 Search for mandatory following text node; for lookahead, the text must
138 follow but for lookbehind (rn->flags != 0) we skip to the next step.
139*/
cca55fe3 140#define FIND_NEXT_IMPT(rn) STMT_START { \
3dab1dad
YO
141 while (JUMPABLE(rn)) { \
142 const OPCODE type = OP(rn); \
143 if (type == SUSPEND || PL_regkind[type] == CURLY) \
e2d8ce26 144 rn = NEXTOPER(NEXTOPER(rn)); \
3dab1dad 145 else if (type == PLUS) \
cca55fe3 146 rn = NEXTOPER(rn); \
3dab1dad 147 else if (type == IFMATCH) \
a84d97b6 148 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 149 else rn += NEXT_OFF(rn); \
3dab1dad 150 } \
5f80c4cf 151} STMT_END
74750237 152
c476f425 153
acfe0abc 154static void restore_pos(pTHX_ void *arg);
51371543 155
76e3520e 156STATIC CHECKPOINT
cea2e8a9 157S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 158{
97aff369 159 dVAR;
a3b680e6 160 const int retval = PL_savestack_ix;
b1ce53c5 161#define REGCP_PAREN_ELEMS 4
a3b680e6 162 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e 163 int p;
40a82448 164 GET_RE_DEBUG_FLAGS_DECL;
a0d0e21e 165
e49a9654
IH
166 if (paren_elems_to_push < 0)
167 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
168
6bda09f9 169#define REGCP_OTHER_ELEMS 8
4b3c1a47 170 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
7f69552c 171
3280af22 172 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 173/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
174 SSPUSHINT(PL_regendp[p]);
175 SSPUSHINT(PL_regstartp[p]);
3280af22 176 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 177 SSPUSHINT(p);
40a82448
DM
178 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
179 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
180 (UV)p, (IV)PL_regstartp[p],
181 (IV)(PL_reg_start_tmp[p] - PL_bostr),
182 (IV)PL_regendp[p]
183 ));
a0d0e21e 184 }
b1ce53c5 185/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
6bda09f9
YO
186 SSPUSHPTR(PL_regstartp);
187 SSPUSHPTR(PL_regendp);
3280af22
NIS
188 SSPUSHINT(PL_regsize);
189 SSPUSHINT(*PL_reglastparen);
a01268b5 190 SSPUSHINT(*PL_reglastcloseparen);
3280af22 191 SSPUSHPTR(PL_reginput);
41123dfd
JH
192#define REGCP_FRAME_ELEMS 2
193/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
194 * are needed for the regexp context stack bookkeeping. */
195 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 196 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 197
a0d0e21e
LW
198 return retval;
199}
200
c277df42 201/* These are needed since we do not localize EVAL nodes: */
ab3bbdeb
YO
202#define REGCP_SET(cp) \
203 DEBUG_STATE_r( \
ab3bbdeb 204 PerlIO_printf(Perl_debug_log, \
e4f74956 205 " Setting an EVAL scope, savestack=%"IVdf"\n", \
ab3bbdeb
YO
206 (IV)PL_savestack_ix)); \
207 cp = PL_savestack_ix
c3464db5 208
ab3bbdeb 209#define REGCP_UNWIND(cp) \
e4f74956 210 DEBUG_STATE_r( \
ab3bbdeb 211 if (cp != PL_savestack_ix) \
e4f74956
YO
212 PerlIO_printf(Perl_debug_log, \
213 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
ab3bbdeb
YO
214 (IV)(cp), (IV)PL_savestack_ix)); \
215 regcpblow(cp)
c277df42 216
76e3520e 217STATIC char *
097eb12c 218S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 219{
97aff369 220 dVAR;
3b6647e0 221 U32 i;
a0d0e21e 222 char *input;
b1ce53c5 223
a3621e74
YO
224 GET_RE_DEBUG_FLAGS_DECL;
225
b1ce53c5 226 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 227 i = SSPOPINT;
b1ce53c5
JH
228 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
229 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 230 input = (char *) SSPOPPTR;
a01268b5 231 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
232 *PL_reglastparen = SSPOPINT;
233 PL_regsize = SSPOPINT;
6bda09f9
YO
234 PL_regendp=(I32 *) SSPOPPTR;
235 PL_regstartp=(I32 *) SSPOPPTR;
b1ce53c5 236
6bda09f9 237
b1ce53c5 238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 241 I32 tmps;
097eb12c 242 U32 paren = (U32)SSPOPINT;
3280af22 243 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
244 PL_regstartp[paren] = SSPOPINT;
245 tmps = SSPOPINT;
3280af22
NIS
246 if (paren <= *PL_reglastparen)
247 PL_regendp[paren] = tmps;
a3621e74 248 DEBUG_EXECUTE_r(
c3464db5 249 PerlIO_printf(Perl_debug_log,
b900a521 250 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 251 (UV)paren, (IV)PL_regstartp[paren],
b900a521 252 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 253 (IV)PL_regendp[paren],
3280af22 254 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 255 );
a0d0e21e 256 }
a3621e74 257 DEBUG_EXECUTE_r(
bb7a0f54 258 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 259 PerlIO_printf(Perl_debug_log,
faccc32b 260 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 261 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
262 }
263 );
daf18116 264#if 1
dafc8851
JH
265 /* It would seem that the similar code in regtry()
266 * already takes care of this, and in fact it is in
267 * a better location to since this code can #if 0-ed out
268 * but the code in regtry() is needed or otherwise tests
269 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
270 * (as of patchlevel 7877) will fail. Then again,
271 * this code seems to be necessary or otherwise
272 * building DynaLoader will fail:
273 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274 * --jhi */
3b6647e0 275 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
097eb12c
AL
276 if (i > PL_regsize)
277 PL_regstartp[i] = -1;
278 PL_regendp[i] = -1;
a0d0e21e 279 }
dafc8851 280#endif
a0d0e21e
LW
281 return input;
282}
283
02db2b7b 284#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 285
a687059c 286/*
e50aee73 287 * pregexec and friends
a687059c
LW
288 */
289
76234dfb 290#ifndef PERL_IN_XSUB_RE
a687059c 291/*
c277df42 292 - pregexec - match a regexp against a string
a687059c 293 */
c277df42 294I32
864dbfa3 295Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 296 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
297/* strend: pointer to null at end of string */
298/* strbeg: real beginning of string */
299/* minend: end of match must be >=minend after stringarg. */
300/* nosave: For optimizations. */
301{
302 return
9041c2e3 303 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
304 nosave ? 0 : REXEC_COPY_STR);
305}
76234dfb 306#endif
22e551b9 307
9041c2e3 308/*
cad2e5aa
JH
309 * Need to implement the following flags for reg_anch:
310 *
311 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
312 * USE_INTUIT_ML
313 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
314 * INTUIT_AUTORITATIVE_ML
315 * INTUIT_ONCE_NOML - Intuit can match in one location only.
316 * INTUIT_ONCE_ML
317 *
318 * Another flag for this function: SECOND_TIME (so that float substrs
319 * with giant delta may be not rechecked).
320 */
321
322/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
323
3f7c398e 324/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
325 Otherwise, only SvCUR(sv) is used to get strbeg. */
326
327/* XXXX We assume that strpos is strbeg unless sv. */
328
6eb5f6b9
JH
329/* XXXX Some places assume that there is a fixed substring.
330 An update may be needed if optimizer marks as "INTUITable"
331 RExen without fixed substrings. Similarly, it is assumed that
332 lengths of all the strings are no more than minlen, thus they
333 cannot come from lookahead.
40d049e4
YO
334 (Or minlen should take into account lookahead.)
335 NOTE: Some of this comment is not correct. minlen does now take account
336 of lookahead/behind. Further research is required. -- demerphq
337
338*/
6eb5f6b9 339
2c2d71f5
JH
340/* A failure to find a constant substring means that there is no need to make
341 an expensive call to REx engine, thus we celebrate a failure. Similarly,
342 finding a substring too deep into the string means that less calls to
30944b6d
IZ
343 regtry() should be needed.
344
345 REx compiler's optimizer found 4 possible hints:
346 a) Anchored substring;
347 b) Fixed substring;
348 c) Whether we are anchored (beginning-of-line or \G);
349 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 350 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
351 string which does not contradict any of them.
352 */
2c2d71f5 353
6eb5f6b9
JH
354/* Most of decisions we do here should have been done at compile time.
355 The nodes of the REx which we used for the search should have been
356 deleted from the finite automaton. */
357
cad2e5aa
JH
358char *
359Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
360 char *strend, U32 flags, re_scream_pos_data *data)
361{
97aff369 362 dVAR;
b7953727 363 register I32 start_shift = 0;
cad2e5aa 364 /* Should be nonnegative! */
b7953727 365 register I32 end_shift = 0;
2c2d71f5
JH
366 register char *s;
367 register SV *check;
a1933d95 368 char *strbeg;
cad2e5aa 369 char *t;
1de06328 370 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
cad2e5aa 371 I32 ml_anch;
bd61b366
SS
372 register char *other_last = NULL; /* other substr checked before this */
373 char *check_at = NULL; /* check substr found at this pos */
bbe252da 374 const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
f8fc2ecf 375 RXi_GET_DECL(prog,progi);
30944b6d 376#ifdef DEBUGGING
890ce7af 377 const char * const i_strpos = strpos;
30944b6d 378#endif
a3621e74
YO
379
380 GET_RE_DEBUG_FLAGS_DECL;
381
a30b2f1f 382 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 383
bbe252da 384 if (prog->extflags & RXf_UTF8) {
b8d68ded
JH
385 PL_reg_flags |= RF_utf8;
386 }
ab3bbdeb
YO
387 DEBUG_EXECUTE_r(
388 debug_start_match(prog, do_utf8, strpos, strend,
1de06328
YO
389 sv ? "Guessing start of match in sv for"
390 : "Guessing start of match in string for");
2a782b5b 391 );
cad2e5aa 392
c344f387
JH
393 /* CHR_DIST() would be more correct here but it makes things slow. */
394 if (prog->minlen > strend - strpos) {
a3621e74 395 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 396 "String too short... [re_intuit_start]\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
1de06328 399
a1933d95 400 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 401 PL_regeol = strend;
33b8afdf
JH
402 if (do_utf8) {
403 if (!prog->check_utf8 && prog->check_substr)
404 to_utf8_substr(prog);
405 check = prog->check_utf8;
406 } else {
407 if (!prog->check_substr && prog->check_utf8)
408 to_byte_substr(prog);
409 check = prog->check_substr;
410 }
1de06328 411 if (check == &PL_sv_undef) {
a3621e74 412 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1de06328 413 "Non-utf8 string cannot match utf8 check string\n"));
33b8afdf
JH
414 goto fail;
415 }
bbe252da
YO
416 if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
417 ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
418 || ( (prog->extflags & RXf_ANCH_BOL)
7fba1cd6 419 && !multiline ) ); /* Check after \n? */
cad2e5aa 420
7e25d62c 421 if (!ml_anch) {
bbe252da
YO
422 if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
423 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
3f7c398e 424 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
425 && sv && !SvROK(sv)
426 && (strpos != strbeg)) {
a3621e74 427 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
428 goto fail;
429 }
430 if (prog->check_offset_min == prog->check_offset_max &&
bbe252da 431 !(prog->extflags & RXf_CANY_SEEN)) {
2c2d71f5 432 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
433 I32 slen;
434
1aa99e6b 435 s = HOP3c(strpos, prog->check_offset_min, strend);
1de06328 436
653099ff
GS
437 if (SvTAIL(check)) {
438 slen = SvCUR(check); /* >= 1 */
cad2e5aa 439
9041c2e3 440 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 441 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 442 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 443 goto fail_finish;
cad2e5aa
JH
444 }
445 /* Now should match s[0..slen-2] */
446 slen--;
3f7c398e 447 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 448 || (slen > 1
3f7c398e 449 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 450 report_neq:
a3621e74 451 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
452 goto fail_finish;
453 }
cad2e5aa 454 }
3f7c398e 455 else if (*SvPVX_const(check) != *s
653099ff 456 || ((slen = SvCUR(check)) > 1
3f7c398e 457 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 458 goto report_neq;
c315bfe8 459 check_at = s;
2c2d71f5 460 goto success_at_start;
7e25d62c 461 }
cad2e5aa 462 }
2c2d71f5 463 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 464 s = strpos;
2c2d71f5 465 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
1de06328
YO
466 end_shift = prog->check_end_shift;
467
2c2d71f5 468 if (!ml_anch) {
a3b680e6 469 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 470 - (SvTAIL(check) != 0);
a3b680e6 471 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
472
473 if (end_shift < eshift)
474 end_shift = eshift;
475 }
cad2e5aa 476 }
2c2d71f5 477 else { /* Can match at random position */
cad2e5aa
JH
478 ml_anch = 0;
479 s = strpos;
1de06328
YO
480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
481 end_shift = prog->check_end_shift;
482
483 /* end shift should be non negative here */
cad2e5aa
JH
484 }
485
2c2d71f5 486#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 487 if (end_shift < 0)
1de06328 488 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
b57a0404 489 (IV)end_shift, prog->precomp);
2c2d71f5
JH
490#endif
491
2c2d71f5
JH
492 restart:
493 /* Find a possible match in the region s..strend by looking for
494 the "check" substring in the region corrected by start/end_shift. */
1de06328
YO
495
496 {
497 I32 srch_start_shift = start_shift;
498 I32 srch_end_shift = end_shift;
499 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
500 srch_end_shift -= ((strbeg - s) - srch_start_shift);
501 srch_start_shift = strbeg - s;
502 }
6bda09f9 503 DEBUG_OPTIMISE_MORE_r({
1de06328
YO
504 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
505 (IV)prog->check_offset_min,
506 (IV)srch_start_shift,
507 (IV)srch_end_shift,
508 (IV)prog->check_end_shift);
509 });
510
cad2e5aa 511 if (flags & REXEC_SCREAM) {
cad2e5aa 512 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 513 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 514
2c2d71f5
JH
515 if (PL_screamfirst[BmRARE(check)] >= 0
516 || ( BmRARE(check) == '\n'
517 && (BmPREVIOUS(check) == SvCUR(check) - 1)
518 && SvTAIL(check) ))
9041c2e3 519 s = screaminstr(sv, check,
1de06328 520 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
cad2e5aa 521 else
2c2d71f5 522 goto fail_finish;
4addbd3b
HS
523 /* we may be pointing at the wrong string */
524 if (s && RX_MATCH_COPIED(prog))
3f7c398e 525 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
526 if (data)
527 *data->scream_olds = s;
528 }
1de06328
YO
529 else {
530 U8* start_point;
531 U8* end_point;
bbe252da 532 if (prog->extflags & RXf_CANY_SEEN) {
1de06328
YO
533 start_point= (U8*)(s + srch_start_shift);
534 end_point= (U8*)(strend - srch_end_shift);
535 } else {
536 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
537 end_point= HOP3(strend, -srch_end_shift, strbeg);
538 }
6bda09f9 539 DEBUG_OPTIMISE_MORE_r({
56570a2c 540 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
1de06328 541 (int)(end_point - start_point),
fc8cd66c 542 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
1de06328
YO
543 start_point);
544 });
545
546 s = fbm_instr( start_point, end_point,
7fba1cd6 547 check, multiline ? FBMrf_MULTILINE : 0);
1de06328
YO
548 }
549 }
cad2e5aa
JH
550 /* Update the count-of-usability, remove useless subpatterns,
551 unshift s. */
2c2d71f5 552
ab3bbdeb
YO
553 DEBUG_EXECUTE_r({
554 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
555 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
556 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
2c2d71f5 557 (s ? "Found" : "Did not find"),
ab3bbdeb
YO
558 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
559 ? "anchored" : "floating"),
560 quoted,
561 RE_SV_TAIL(check),
562 (s ? " at offset " : "...\n") );
563 });
2c2d71f5
JH
564
565 if (!s)
566 goto fail_finish;
2c2d71f5 567 /* Finish the diagnostic message */
a3621e74 568 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 569
1de06328
YO
570 /* XXX dmq: first branch is for positive lookbehind...
571 Our check string is offset from the beginning of the pattern.
572 So we need to do any stclass tests offset forward from that
573 point. I think. :-(
574 */
575
576
577
578 check_at=s;
579
580
2c2d71f5
JH
581 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
582 Start with the other substr.
583 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 584 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
585 *always* match. Probably should be marked during compile...
586 Probably it is right to do no SCREAM here...
587 */
588
1de06328
YO
589 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
590 : (prog->float_substr && prog->anchored_substr))
591 {
30944b6d 592 /* Take into account the "other" substring. */
2c2d71f5
JH
593 /* XXXX May be hopelessly wrong for UTF... */
594 if (!other_last)
6eb5f6b9 595 other_last = strpos;
33b8afdf 596 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
597 do_other_anchored:
598 {
890ce7af
AL
599 char * const last = HOP3c(s, -start_shift, strbeg);
600 char *last1, *last2;
be8e71aa 601 char * const saved_s = s;
33b8afdf 602 SV* must;
2c2d71f5 603
2c2d71f5
JH
604 t = s - prog->check_offset_max;
605 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 606 && (!do_utf8
0ce71af7 607 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 608 && t > strpos)))
6f207bd3 609 NOOP;
2c2d71f5
JH
610 else
611 t = strpos;
1aa99e6b 612 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
613 if (t < other_last) /* These positions already checked */
614 t = other_last;
1aa99e6b 615 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
616 if (last < last1)
617 last1 = last;
1de06328
YO
618 /* XXXX It is not documented what units *_offsets are in.
619 We assume bytes, but this is clearly wrong.
620 Meaning this code needs to be carefully reviewed for errors.
621 dmq.
622 */
623
2c2d71f5 624 /* On end-of-str: see comment below. */
33b8afdf
JH
625 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
626 if (must == &PL_sv_undef) {
627 s = (char*)NULL;
1de06328 628 DEBUG_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
629 }
630 else
631 s = fbm_instr(
632 (unsigned char*)t,
633 HOP3(HOP3(last1, prog->anchored_offset, strend)
634 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
635 must,
7fba1cd6 636 multiline ? FBMrf_MULTILINE : 0
33b8afdf 637 );
ab3bbdeb
YO
638 DEBUG_EXECUTE_r({
639 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
640 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
641 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
2c2d71f5 642 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
643 quoted, RE_SV_TAIL(must));
644 });
645
646
2c2d71f5
JH
647 if (!s) {
648 if (last1 >= last2) {
a3621e74 649 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
650 ", giving up...\n"));
651 goto fail_finish;
652 }
a3621e74 653 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 654 ", trying floating at offset %ld...\n",
be8e71aa 655 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
1aa99e6b
IH
656 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
657 s = HOP3c(last, 1, strend);
2c2d71f5
JH
658 goto restart;
659 }
660 else {
a3621e74 661 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 662 (long)(s - i_strpos)));
1aa99e6b
IH
663 t = HOP3c(s, -prog->anchored_offset, strbeg);
664 other_last = HOP3c(s, 1, strend);
be8e71aa 665 s = saved_s;
2c2d71f5
JH
666 if (t == strpos)
667 goto try_at_start;
2c2d71f5
JH
668 goto try_at_offset;
669 }
30944b6d 670 }
2c2d71f5
JH
671 }
672 else { /* Take into account the floating substring. */
33b8afdf 673 char *last, *last1;
be8e71aa 674 char * const saved_s = s;
33b8afdf
JH
675 SV* must;
676
677 t = HOP3c(s, -start_shift, strbeg);
678 last1 = last =
679 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
680 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
681 last = HOP3c(t, prog->float_max_offset, strend);
682 s = HOP3c(t, prog->float_min_offset, strend);
683 if (s < other_last)
684 s = other_last;
2c2d71f5 685 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
686 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
687 /* fbm_instr() takes into account exact value of end-of-str
688 if the check is SvTAIL(ed). Since false positives are OK,
689 and end-of-str is not later than strend we are OK. */
690 if (must == &PL_sv_undef) {
691 s = (char*)NULL;
1de06328 692 DEBUG_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
693 }
694 else
2c2d71f5 695 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
696 (unsigned char*)last + SvCUR(must)
697 - (SvTAIL(must)!=0),
7fba1cd6 698 must, multiline ? FBMrf_MULTILINE : 0);
ab3bbdeb
YO
699 DEBUG_EXECUTE_r({
700 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
701 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
702 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
33b8afdf 703 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
704 quoted, RE_SV_TAIL(must));
705 });
33b8afdf
JH
706 if (!s) {
707 if (last1 == last) {
a3621e74 708 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
709 ", giving up...\n"));
710 goto fail_finish;
2c2d71f5 711 }
a3621e74 712 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf 713 ", trying anchored starting at offset %ld...\n",
be8e71aa 714 (long)(saved_s + 1 - i_strpos)));
33b8afdf
JH
715 other_last = last;
716 s = HOP3c(t, 1, strend);
717 goto restart;
718 }
719 else {
a3621e74 720 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
721 (long)(s - i_strpos)));
722 other_last = s; /* Fix this later. --Hugo */
be8e71aa 723 s = saved_s;
33b8afdf
JH
724 if (t == strpos)
725 goto try_at_start;
726 goto try_at_offset;
727 }
2c2d71f5 728 }
cad2e5aa 729 }
2c2d71f5 730
1de06328 731
9ef43ace 732 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
1de06328 733
6bda09f9 734 DEBUG_OPTIMISE_MORE_r(
1de06328
YO
735 PerlIO_printf(Perl_debug_log,
736 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
737 (IV)prog->check_offset_min,
738 (IV)prog->check_offset_max,
739 (IV)(s-strpos),
740 (IV)(t-strpos),
741 (IV)(t-s),
742 (IV)(strend-strpos)
743 )
744 );
745
2c2d71f5 746 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 747 && (!do_utf8
9ef43ace 748 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
1de06328
YO
749 && t > strpos)))
750 {
2c2d71f5
JH
751 /* Fixed substring is found far enough so that the match
752 cannot start at strpos. */
753 try_at_offset:
cad2e5aa 754 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
755 /* Eventually fbm_*() should handle this, but often
756 anchored_offset is not 0, so this check will not be wasted. */
757 /* XXXX In the code below we prefer to look for "^" even in
758 presence of anchored substrings. And we search even
759 beyond the found float position. These pessimizations
760 are historical artefacts only. */
761 find_anchor:
2c2d71f5 762 while (t < strend - prog->minlen) {
cad2e5aa 763 if (*t == '\n') {
4ee3650e 764 if (t < check_at - prog->check_offset_min) {
33b8afdf 765 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
766 /* Since we moved from the found position,
767 we definitely contradict the found anchored
30944b6d
IZ
768 substr. Due to the above check we do not
769 contradict "check" substr.
770 Thus we can arrive here only if check substr
771 is float. Redo checking for "other"=="fixed".
772 */
9041c2e3 773 strpos = t + 1;
a3621e74 774 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 775 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
776 goto do_other_anchored;
777 }
4ee3650e
GS
778 /* We don't contradict the found floating substring. */
779 /* XXXX Why not check for STCLASS? */
cad2e5aa 780 s = t + 1;
a3621e74 781 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 782 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
783 goto set_useful;
784 }
4ee3650e
GS
785 /* Position contradicts check-string */
786 /* XXXX probably better to look for check-string
787 than for "\n", so one should lower the limit for t? */
a3621e74 788 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 789 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 790 other_last = strpos = s = t + 1;
cad2e5aa
JH
791 goto restart;
792 }
793 t++;
794 }
a3621e74 795 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 796 PL_colors[0], PL_colors[1]));
2c2d71f5 797 goto fail_finish;
cad2e5aa 798 }
f5952150 799 else {
a3621e74 800 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 801 PL_colors[0], PL_colors[1]));
f5952150 802 }
cad2e5aa
JH
803 s = t;
804 set_useful:
33b8afdf 805 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
806 }
807 else {
f5952150 808 /* The found string does not prohibit matching at strpos,
2c2d71f5 809 - no optimization of calling REx engine can be performed,
f5952150
GS
810 unless it was an MBOL and we are not after MBOL,
811 or a future STCLASS check will fail this. */
2c2d71f5
JH
812 try_at_start:
813 /* Even in this situation we may use MBOL flag if strpos is offset
814 wrt the start of the string. */
05b4157f 815 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 816 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d 817 /* May be due to an implicit anchor of m{.*foo} */
bbe252da 818 && !(prog->intflags & PREGf_IMPLICIT))
d506a20d 819 {
cad2e5aa
JH
820 t = strpos;
821 goto find_anchor;
822 }
a3621e74 823 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 824 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 825 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 826 );
2c2d71f5 827 success_at_start:
bbe252da 828 if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
829 && (do_utf8 ? (
830 prog->check_utf8 /* Could be deleted already */
831 && --BmUSEFUL(prog->check_utf8) < 0
832 && (prog->check_utf8 == prog->float_utf8)
833 ) : (
834 prog->check_substr /* Could be deleted already */
835 && --BmUSEFUL(prog->check_substr) < 0
836 && (prog->check_substr == prog->float_substr)
837 )))
66e933ab 838 {
cad2e5aa 839 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 840 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
841 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
842 if (do_utf8 ? prog->check_substr : prog->check_utf8)
843 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
844 prog->check_substr = prog->check_utf8 = NULL; /* disable */
845 prog->float_substr = prog->float_utf8 = NULL; /* clear */
846 check = NULL; /* abort */
cad2e5aa 847 s = strpos;
3cf5c195
IZ
848 /* XXXX This is a remnant of the old implementation. It
849 looks wasteful, since now INTUIT can use many
6eb5f6b9 850 other heuristics. */
bbe252da 851 prog->extflags &= ~RXf_USE_INTUIT;
cad2e5aa
JH
852 }
853 else
854 s = strpos;
855 }
856
6eb5f6b9
JH
857 /* Last resort... */
858 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1de06328
YO
859 /* trie stclasses are too expensive to use here, we are better off to
860 leave it to regmatch itself */
f8fc2ecf 861 if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
6eb5f6b9
JH
862 /* minlen == 0 is possible if regstclass is \b or \B,
863 and the fixed substr is ''$.
864 Since minlen is already taken into account, s+1 is before strend;
865 accidentally, minlen >= 1 guaranties no false positives at s + 1
866 even for \b or \B. But (minlen? 1 : 0) below assumes that
867 regstclass does not come from lookahead... */
868 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
869 This leaves EXACTF only, which is dealt with in find_byclass(). */
f8fc2ecf
YO
870 const U8* const str = (U8*)STRING(progi->regstclass);
871 const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
872 ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
66e933ab 873 : 1);
1de06328
YO
874 char * endpos;
875 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
876 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
877 else if (prog->float_substr || prog->float_utf8)
878 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
879 else
880 endpos= strend;
881
56570a2c 882 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %d s: %d endpos: %d\n",
b57a0404 883 (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg));
1de06328 884
6eb5f6b9 885 t = s;
f8fc2ecf 886 s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
6eb5f6b9
JH
887 if (!s) {
888#ifdef DEBUGGING
cbbf8932 889 const char *what = NULL;
6eb5f6b9
JH
890#endif
891 if (endpos == strend) {
a3621e74 892 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
893 "Could not match STCLASS...\n") );
894 goto fail;
895 }
a3621e74 896 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 897 "This position contradicts STCLASS...\n") );
bbe252da 898 if ((prog->extflags & RXf_ANCH) && !ml_anch)
653099ff 899 goto fail;
6eb5f6b9 900 /* Contradict one of substrings */
33b8afdf
JH
901 if (prog->anchored_substr || prog->anchored_utf8) {
902 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 903 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 904 hop_and_restart:
1aa99e6b 905 s = HOP3c(t, 1, strend);
66e933ab
GS
906 if (s + start_shift + end_shift > strend) {
907 /* XXXX Should be taken into account earlier? */
a3621e74 908 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
909 "Could not match STCLASS...\n") );
910 goto fail;
911 }
5e39e1e5
HS
912 if (!check)
913 goto giveup;
a3621e74 914 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 915 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
916 what, (long)(s + start_shift - i_strpos)) );
917 goto restart;
918 }
66e933ab 919 /* Have both, check_string is floating */
6eb5f6b9
JH
920 if (t + start_shift >= check_at) /* Contradicts floating=check */
921 goto retry_floating_check;
922 /* Recheck anchored substring, but not floating... */
9041c2e3 923 s = check_at;
5e39e1e5
HS
924 if (!check)
925 goto giveup;
a3621e74 926 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 927 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
928 (long)(other_last - i_strpos)) );
929 goto do_other_anchored;
930 }
60e71179
GS
931 /* Another way we could have checked stclass at the
932 current position only: */
933 if (ml_anch) {
934 s = t = t + 1;
5e39e1e5
HS
935 if (!check)
936 goto giveup;
a3621e74 937 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 938 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 939 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 940 goto try_at_offset;
66e933ab 941 }
33b8afdf 942 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 943 goto fail;
6eb5f6b9
JH
944 /* Check is floating subtring. */
945 retry_floating_check:
946 t = check_at - start_shift;
a3621e74 947 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
948 goto hop_and_restart;
949 }
b7953727 950 if (t != s) {
a3621e74 951 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 952 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
953 (long)(t - i_strpos), (long)(s - i_strpos))
954 );
955 }
956 else {
a3621e74 957 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
958 "Does not contradict STCLASS...\n");
959 );
960 }
6eb5f6b9 961 }
5e39e1e5 962 giveup:
a3621e74 963 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
964 PL_colors[4], (check ? "Guessed" : "Giving up"),
965 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 966 return s;
2c2d71f5
JH
967
968 fail_finish: /* Substring not found */
33b8afdf
JH
969 if (prog->check_substr || prog->check_utf8) /* could be removed already */
970 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 971 fail:
a3621e74 972 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 973 PL_colors[4], PL_colors[5]));
bd61b366 974 return NULL;
cad2e5aa 975}
9661b544 976
786e8c11 977
3b0527fe 978
4cadc6a9
YO
979#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
980foldlen, foldbuf, uniflags) STMT_START { \
981 switch (trie_type) { \
982 case trie_utf8_fold: \
983 if ( foldlen>0 ) { \
984 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
985 foldlen -= len; \
986 uscan += len; \
987 len=0; \
988 } else { \
989 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
990 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
991 foldlen -= UNISKIP( uvc ); \
992 uscan = foldbuf + UNISKIP( uvc ); \
993 } \
994 break; \
995 case trie_utf8: \
996 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
997 break; \
998 case trie_plain: \
999 uvc = (UV)*uc; \
1000 len = 1; \
1001 } \
1002 \
1003 if (uvc < 256) { \
1004 charid = trie->charmap[ uvc ]; \
1005 } \
1006 else { \
1007 charid = 0; \
1008 if (trie->widecharmap) { \
1009 SV** const svpp = hv_fetch(trie->widecharmap, \
1010 (char*)&uvc, sizeof(UV), 0); \
1011 if (svpp) \
1012 charid = (U16)SvIV(*svpp); \
1013 } \
1014 } \
1015} STMT_END
1016
1017#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1018 if ( (CoNd) \
1019 && (ln == len || \
1020 ibcmp_utf8(s, NULL, 0, do_utf8, \
1021 m, NULL, ln, (bool)UTF)) \
24b23f37 1022 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1023 goto got_it; \
1024 else { \
1025 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1026 uvchr_to_utf8(tmpbuf, c); \
1027 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1028 if ( f != c \
1029 && (f == c1 || f == c2) \
1030 && (ln == foldlen || \
1031 !ibcmp_utf8((char *) foldbuf, \
1032 NULL, foldlen, do_utf8, \
1033 m, \
1034 NULL, ln, (bool)UTF)) \
24b23f37 1035 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1036 goto got_it; \
1037 } \
1038 s += len
1039
1040#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1041STMT_START { \
1042 while (s <= e) { \
1043 if ( (CoNd) \
1044 && (ln == 1 || !(OP(c) == EXACTF \
1045 ? ibcmp(s, m, ln) \
1046 : ibcmp_locale(s, m, ln))) \
24b23f37 1047 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1048 goto got_it; \
1049 s++; \
1050 } \
1051} STMT_END
1052
1053#define REXEC_FBC_UTF8_SCAN(CoDe) \
1054STMT_START { \
1055 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1056 CoDe \
1057 s += uskip; \
1058 } \
1059} STMT_END
1060
1061#define REXEC_FBC_SCAN(CoDe) \
1062STMT_START { \
1063 while (s < strend) { \
1064 CoDe \
1065 s++; \
1066 } \
1067} STMT_END
1068
1069#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1070REXEC_FBC_UTF8_SCAN( \
1071 if (CoNd) { \
24b23f37 1072 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1073 goto got_it; \
1074 else \
1075 tmp = doevery; \
1076 } \
1077 else \
1078 tmp = 1; \
1079)
1080
1081#define REXEC_FBC_CLASS_SCAN(CoNd) \
1082REXEC_FBC_SCAN( \
1083 if (CoNd) { \
24b23f37 1084 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1085 goto got_it; \
1086 else \
1087 tmp = doevery; \
1088 } \
1089 else \
1090 tmp = 1; \
1091)
1092
1093#define REXEC_FBC_TRYIT \
24b23f37 1094if ((!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1095 goto got_it
1096
1097#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1098 if (do_utf8) { \
1099 UtFpReLoAd; \
1100 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1101 } \
1102 else { \
1103 REXEC_FBC_CLASS_SCAN(CoNd); \
1104 } \
1105 break
1106
1107#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1108 PL_reg_flags |= RF_tainted; \
1109 if (do_utf8) { \
1110 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1111 } \
1112 else { \
1113 REXEC_FBC_CLASS_SCAN(CoNd); \
1114 } \
1115 break
1116
786e8c11
YO
1117#define DUMP_EXEC_POS(li,s,doutf8) \
1118 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1119
1120/* We know what class REx starts with. Try to find this position... */
1121/* if reginfo is NULL, its a dryrun */
1122/* annoyingly all the vars in this routine have different names from their counterparts
1123 in regmatch. /grrr */
1124
3c3eec57 1125STATIC char *
07be1b83 1126S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
24b23f37 1127 const char *strend, regmatch_info *reginfo)
a687059c 1128{
27da23d5 1129 dVAR;
bbe252da 1130 const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
6eb5f6b9 1131 char *m;
d8093b23 1132 STRLEN ln;
5dab1207 1133 STRLEN lnc;
078c425b 1134 register STRLEN uskip;
d8093b23
G
1135 unsigned int c1;
1136 unsigned int c2;
6eb5f6b9
JH
1137 char *e;
1138 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 1139 register const bool do_utf8 = PL_reg_match_utf8;
f8fc2ecf
YO
1140 RXi_GET_DECL(prog,progi);
1141
6eb5f6b9
JH
1142 /* We know what class it must start with. */
1143 switch (OP(c)) {
6eb5f6b9 1144 case ANYOF:
388cc4de 1145 if (do_utf8) {
4cadc6a9 1146 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
388cc4de 1147 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a 1148 reginclass(prog, c, (U8*)s, 0, do_utf8) :
4cadc6a9 1149 REGINCLASS(prog, c, (U8*)s));
388cc4de
HS
1150 }
1151 else {
1152 while (s < strend) {
1153 STRLEN skip = 1;
1154
32fc9b6a 1155 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
1156 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1157 /* The assignment of 2 is intentional:
1158 * for the folded sharp s, the skip is 2. */
1159 (skip = SHARP_S_SKIP))) {
24b23f37 1160 if (tmp && (!reginfo || regtry(reginfo, &s)))
388cc4de
HS
1161 goto got_it;
1162 else
1163 tmp = doevery;
1164 }
1165 else
1166 tmp = 1;
1167 s += skip;
1168 }
a0d0e21e 1169 }
6eb5f6b9 1170 break;
f33976b4 1171 case CANY:
4cadc6a9 1172 REXEC_FBC_SCAN(
24b23f37 1173 if (tmp && (!reginfo || regtry(reginfo, &s)))
f33976b4
DB
1174 goto got_it;
1175 else
1176 tmp = doevery;
4cadc6a9 1177 );
f33976b4 1178 break;
6eb5f6b9 1179 case EXACTF:
5dab1207
NIS
1180 m = STRING(c);
1181 ln = STR_LEN(c); /* length to match in octets/bytes */
1182 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1183 if (UTF) {
a2a2844f 1184 STRLEN ulen1, ulen2;
5dab1207 1185 U8 *sm = (U8 *) m;
89ebb4a3
JH
1186 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1187 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1188 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1189
1190 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1191 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1192
89ebb4a3 1193 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1194 0, uniflags);
89ebb4a3 1195 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1196 0, uniflags);
5dab1207
NIS
1197 lnc = 0;
1198 while (sm < ((U8 *) m + ln)) {
1199 lnc++;
1200 sm += UTF8SKIP(sm);
1201 }
1aa99e6b
IH
1202 }
1203 else {
1204 c1 = *(U8*)m;
1205 c2 = PL_fold[c1];
1206 }
6eb5f6b9
JH
1207 goto do_exactf;
1208 case EXACTFL:
5dab1207
NIS
1209 m = STRING(c);
1210 ln = STR_LEN(c);
1211 lnc = (I32) ln;
d8093b23 1212 c1 = *(U8*)m;
6eb5f6b9
JH
1213 c2 = PL_fold_locale[c1];
1214 do_exactf:
db12adc6 1215 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1216
3b0527fe 1217 if (!reginfo && e < s)
6eb5f6b9 1218 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1219
60a8b682
JH
1220 /* The idea in the EXACTF* cases is to first find the
1221 * first character of the EXACTF* node and then, if
1222 * necessary, case-insensitively compare the full
1223 * text of the node. The c1 and c2 are the first
1224 * characters (though in Unicode it gets a bit
1225 * more complicated because there are more cases
7f16dd3d
JH
1226 * than just upper and lower: one needs to use
1227 * the so-called folding case for case-insensitive
1228 * matching (called "loose matching" in Unicode).
1229 * ibcmp_utf8() will do just that. */
60a8b682 1230
1aa99e6b 1231 if (do_utf8) {
575cac57 1232 UV c, f;
89ebb4a3 1233 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1234 STRLEN len, foldlen;
4ad0818d 1235 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1236 if (c1 == c2) {
5dab1207
NIS
1237 /* Upper and lower of 1st char are equal -
1238 * probably not a "letter". */
1aa99e6b 1239 while (s <= e) {
89ebb4a3 1240 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1241 uniflags);
4cadc6a9 1242 REXEC_FBC_EXACTISH_CHECK(c == c1);
1aa99e6b 1243 }
09091399
JH
1244 }
1245 else {
1aa99e6b 1246 while (s <= e) {
89ebb4a3 1247 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1248 uniflags);
80aecb99 1249
60a8b682 1250 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1251 * Note that not all the possible combinations
1252 * are handled here: some of them are handled
1253 * by the standard folding rules, and some of
1254 * them (the character class or ANYOF cases)
1255 * are handled during compiletime in
1256 * regexec.c:S_regclass(). */
880bd946
JH
1257 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1258 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1259 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99 1260
4cadc6a9 1261 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1aa99e6b 1262 }
09091399 1263 }
1aa99e6b
IH
1264 }
1265 else {
1266 if (c1 == c2)
4cadc6a9 1267 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1aa99e6b 1268 else
4cadc6a9 1269 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
b3c9acc1
IZ
1270 }
1271 break;
bbce6d69 1272 case BOUNDL:
3280af22 1273 PL_reg_flags |= RF_tainted;
bbce6d69 1274 /* FALL THROUGH */
a0d0e21e 1275 case BOUND:
ffc61ed2 1276 if (do_utf8) {
12d33761 1277 if (s == PL_bostr)
ffc61ed2
JH
1278 tmp = '\n';
1279 else {
6136c704 1280 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1281 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1282 }
1283 tmp = ((OP(c) == BOUND ?
9041c2e3 1284 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1285 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1286 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1287 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1288 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1289 isALNUM_LC_utf8((U8*)s)))
1290 {
1291 tmp = !tmp;
4cadc6a9 1292 REXEC_FBC_TRYIT;
a687059c 1293 }
4cadc6a9 1294 );
a0d0e21e 1295 }
667bb95a 1296 else {
12d33761 1297 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2 1298 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1299 REXEC_FBC_SCAN(
ffc61ed2
JH
1300 if (tmp ==
1301 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1302 tmp = !tmp;
4cadc6a9 1303 REXEC_FBC_TRYIT;
a0ed51b3 1304 }
4cadc6a9 1305 );
a0ed51b3 1306 }
24b23f37 1307 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1308 goto got_it;
1309 break;
bbce6d69 1310 case NBOUNDL:
3280af22 1311 PL_reg_flags |= RF_tainted;
bbce6d69 1312 /* FALL THROUGH */
a0d0e21e 1313 case NBOUND:
ffc61ed2 1314 if (do_utf8) {
12d33761 1315 if (s == PL_bostr)
ffc61ed2
JH
1316 tmp = '\n';
1317 else {
6136c704 1318 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1319 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1320 }
1321 tmp = ((OP(c) == NBOUND ?
9041c2e3 1322 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1323 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1324 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1325 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1326 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1327 isALNUM_LC_utf8((U8*)s)))
1328 tmp = !tmp;
4cadc6a9
YO
1329 else REXEC_FBC_TRYIT;
1330 );
a0d0e21e 1331 }
667bb95a 1332 else {
12d33761 1333 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1334 tmp = ((OP(c) == NBOUND ?
1335 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1336 REXEC_FBC_SCAN(
ffc61ed2
JH
1337 if (tmp ==
1338 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1339 tmp = !tmp;
4cadc6a9
YO
1340 else REXEC_FBC_TRYIT;
1341 );
a0ed51b3 1342 }
24b23f37 1343 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1344 goto got_it;
1345 break;
a0d0e21e 1346 case ALNUM:
4cadc6a9
YO
1347 REXEC_FBC_CSCAN_PRELOAD(
1348 LOAD_UTF8_CHARCLASS_ALNUM(),
1349 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1350 isALNUM(*s)
1351 );
bbce6d69 1352 case ALNUML:
4cadc6a9
YO
1353 REXEC_FBC_CSCAN_TAINT(
1354 isALNUM_LC_utf8((U8*)s),
1355 isALNUM_LC(*s)
1356 );
a0d0e21e 1357 case NALNUM:
4cadc6a9
YO
1358 REXEC_FBC_CSCAN_PRELOAD(
1359 LOAD_UTF8_CHARCLASS_ALNUM(),
1360 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1361 !isALNUM(*s)
1362 );
bbce6d69 1363 case NALNUML:
4cadc6a9
YO
1364 REXEC_FBC_CSCAN_TAINT(
1365 !isALNUM_LC_utf8((U8*)s),
1366 !isALNUM_LC(*s)
1367 );
a0d0e21e 1368 case SPACE:
4cadc6a9
YO
1369 REXEC_FBC_CSCAN_PRELOAD(
1370 LOAD_UTF8_CHARCLASS_SPACE(),
1371 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1372 isSPACE(*s)
1373 );
bbce6d69 1374 case SPACEL:
4cadc6a9
YO
1375 REXEC_FBC_CSCAN_TAINT(
1376 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1377 isSPACE_LC(*s)
1378 );
a0d0e21e 1379 case NSPACE:
4cadc6a9
YO
1380 REXEC_FBC_CSCAN_PRELOAD(
1381 LOAD_UTF8_CHARCLASS_SPACE(),
1382 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1383 !isSPACE(*s)
1384 );
bbce6d69 1385 case NSPACEL:
4cadc6a9
YO
1386 REXEC_FBC_CSCAN_TAINT(
1387 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1388 !isSPACE_LC(*s)
1389 );
a0d0e21e 1390 case DIGIT:
4cadc6a9
YO
1391 REXEC_FBC_CSCAN_PRELOAD(
1392 LOAD_UTF8_CHARCLASS_DIGIT(),
1393 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1394 isDIGIT(*s)
1395 );
b8c5462f 1396 case DIGITL:
4cadc6a9
YO
1397 REXEC_FBC_CSCAN_TAINT(
1398 isDIGIT_LC_utf8((U8*)s),
1399 isDIGIT_LC(*s)
1400 );
a0d0e21e 1401 case NDIGIT:
4cadc6a9
YO
1402 REXEC_FBC_CSCAN_PRELOAD(
1403 LOAD_UTF8_CHARCLASS_DIGIT(),
1404 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1405 !isDIGIT(*s)
1406 );
b8c5462f 1407 case NDIGITL:
4cadc6a9
YO
1408 REXEC_FBC_CSCAN_TAINT(
1409 !isDIGIT_LC_utf8((U8*)s),
1410 !isDIGIT_LC(*s)
1411 );
1de06328
YO
1412 case AHOCORASICKC:
1413 case AHOCORASICK:
07be1b83
YO
1414 {
1415 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1416 trie_type = do_utf8 ?
1417 (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1418 : trie_plain;
1419 /* what trie are we using right now */
1420 reg_ac_data *aho
f8fc2ecf 1421 = (reg_ac_data*)progi->data->data[ ARG( c ) ];
07be1b83
YO
1422 reg_trie_data *trie=aho->trie;
1423
1424 const char *last_start = strend - trie->minlen;
6148ee25 1425#ifdef DEBUGGING
07be1b83 1426 const char *real_start = s;
6148ee25 1427#endif
07be1b83 1428 STRLEN maxlen = trie->maxlen;
be8e71aa
YO
1429 SV *sv_points;
1430 U8 **points; /* map of where we were in the input string
786e8c11 1431 when reading a given char. For ASCII this
be8e71aa
YO
1432 is unnecessary overhead as the relationship
1433 is always 1:1, but for unicode, especially
1434 case folded unicode this is not true. */
f9e705e8 1435 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
786e8c11
YO
1436 U8 *bitmap=NULL;
1437
07be1b83
YO
1438
1439 GET_RE_DEBUG_FLAGS_DECL;
1440
be8e71aa
YO
1441 /* We can't just allocate points here. We need to wrap it in
1442 * an SV so it gets freed properly if there is a croak while
1443 * running the match */
1444 ENTER;
1445 SAVETMPS;
1446 sv_points=newSV(maxlen * sizeof(U8 *));
1447 SvCUR_set(sv_points,
1448 maxlen * sizeof(U8 *));
1449 SvPOK_on(sv_points);
1450 sv_2mortal(sv_points);
1451 points=(U8**)SvPV_nolen(sv_points );
1de06328
YO
1452 if ( trie_type != trie_utf8_fold
1453 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1454 {
786e8c11
YO
1455 if (trie->bitmap)
1456 bitmap=(U8*)trie->bitmap;
1457 else
1458 bitmap=(U8*)ANYOF_BITMAP(c);
07be1b83 1459 }
786e8c11
YO
1460 /* this is the Aho-Corasick algorithm modified a touch
1461 to include special handling for long "unknown char"
1462 sequences. The basic idea being that we use AC as long
1463 as we are dealing with a possible matching char, when
1464 we encounter an unknown char (and we have not encountered
1465 an accepting state) we scan forward until we find a legal
1466 starting char.
1467 AC matching is basically that of trie matching, except
1468 that when we encounter a failing transition, we fall back
1469 to the current states "fail state", and try the current char
1470 again, a process we repeat until we reach the root state,
1471 state 1, or a legal transition. If we fail on the root state
1472 then we can either terminate if we have reached an accepting
1473 state previously, or restart the entire process from the beginning
1474 if we have not.
1475
1476 */
07be1b83
YO
1477 while (s <= last_start) {
1478 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1479 U8 *uc = (U8*)s;
1480 U16 charid = 0;
1481 U32 base = 1;
1482 U32 state = 1;
1483 UV uvc = 0;
1484 STRLEN len = 0;
1485 STRLEN foldlen = 0;
1486 U8 *uscan = (U8*)NULL;
1487 U8 *leftmost = NULL;
786e8c11
YO
1488#ifdef DEBUGGING
1489 U32 accepted_word= 0;
1490#endif
07be1b83
YO
1491 U32 pointpos = 0;
1492
1493 while ( state && uc <= (U8*)strend ) {
1494 int failed=0;
786e8c11
YO
1495 U32 word = aho->states[ state ].wordnum;
1496
1de06328
YO
1497 if( state==1 ) {
1498 if ( bitmap ) {
1499 DEBUG_TRIE_EXECUTE_r(
1500 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1501 dump_exec_pos( (char *)uc, c, strend, real_start,
1502 (char *)uc, do_utf8 );
1503 PerlIO_printf( Perl_debug_log,
1504 " Scanning for legal start char...\n");
1505 }
1506 );
1507 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1508 uc++;
786e8c11 1509 }
1de06328 1510 s= (char *)uc;
786e8c11 1511 }
786e8c11
YO
1512 if (uc >(U8*)last_start) break;
1513 }
1514
1515 if ( word ) {
1516 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1517 if (!leftmost || lpos < leftmost) {
1518 DEBUG_r(accepted_word=word);
07be1b83 1519 leftmost= lpos;
786e8c11 1520 }
07be1b83 1521 if (base==0) break;
786e8c11 1522
07be1b83
YO
1523 }
1524 points[pointpos++ % maxlen]= uc;
4cadc6a9
YO
1525 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
1526 uvc, charid, foldlen, foldbuf, uniflags);
786e8c11
YO
1527 DEBUG_TRIE_EXECUTE_r({
1528 dump_exec_pos( (char *)uc, c, strend, real_start,
1529 s, do_utf8 );
07be1b83 1530 PerlIO_printf(Perl_debug_log,
786e8c11
YO
1531 " Charid:%3u CP:%4"UVxf" ",
1532 charid, uvc);
1533 });
07be1b83
YO
1534
1535 do {
6148ee25 1536#ifdef DEBUGGING
786e8c11 1537 word = aho->states[ state ].wordnum;
6148ee25 1538#endif
07be1b83
YO
1539 base = aho->states[ state ].trans.base;
1540
786e8c11
YO
1541 DEBUG_TRIE_EXECUTE_r({
1542 if (failed)
1543 dump_exec_pos( (char *)uc, c, strend, real_start,
1544 s, do_utf8 );
07be1b83 1545 PerlIO_printf( Perl_debug_log,
786e8c11
YO
1546 "%sState: %4"UVxf", word=%"UVxf,
1547 failed ? " Fail transition to " : "",
1548 (UV)state, (UV)word);
1549 });
07be1b83
YO
1550 if ( base ) {
1551 U32 tmp;
1552 if (charid &&
1553 (base + charid > trie->uniquecharcount )
1554 && (base + charid - 1 - trie->uniquecharcount
1555 < trie->lasttrans)
1556 && trie->trans[base + charid - 1 -
1557 trie->uniquecharcount].check == state
1558 && (tmp=trie->trans[base + charid - 1 -
1559 trie->uniquecharcount ].next))
1560 {
786e8c11
YO
1561 DEBUG_TRIE_EXECUTE_r(
1562 PerlIO_printf( Perl_debug_log," - legal\n"));
07be1b83
YO
1563 state = tmp;
1564 break;
1565 }
1566 else {
786e8c11
YO
1567 DEBUG_TRIE_EXECUTE_r(
1568 PerlIO_printf( Perl_debug_log," - fail\n"));
1569 failed = 1;
1570 state = aho->fail[state];
07be1b83
YO
1571 }
1572 }
1573 else {
1574 /* we must be accepting here */
786e8c11
YO
1575 DEBUG_TRIE_EXECUTE_r(
1576 PerlIO_printf( Perl_debug_log," - accepting\n"));
1577 failed = 1;
07be1b83
YO
1578 break;
1579 }
1580 } while(state);
786e8c11 1581 uc += len;
07be1b83
YO
1582 if (failed) {
1583 if (leftmost)
1584 break;
786e8c11 1585 if (!state) state = 1;
07be1b83
YO
1586 }
1587 }
1588 if ( aho->states[ state ].wordnum ) {
1589 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
786e8c11
YO
1590 if (!leftmost || lpos < leftmost) {
1591 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
07be1b83 1592 leftmost = lpos;
786e8c11 1593 }
07be1b83 1594 }
07be1b83
YO
1595 if (leftmost) {
1596 s = (char*)leftmost;
786e8c11
YO
1597 DEBUG_TRIE_EXECUTE_r({
1598 PerlIO_printf(
1599 Perl_debug_log,"Matches word #%"UVxf" at position %d. Trying full pattern...\n",
1600 (UV)accepted_word, s - real_start
1601 );
1602 });
24b23f37 1603 if (!reginfo || regtry(reginfo, &s)) {
be8e71aa
YO
1604 FREETMPS;
1605 LEAVE;
07be1b83 1606 goto got_it;
be8e71aa 1607 }
07be1b83 1608 s = HOPc(s,1);
786e8c11
YO
1609 DEBUG_TRIE_EXECUTE_r({
1610 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1611 });
07be1b83 1612 } else {
786e8c11
YO
1613 DEBUG_TRIE_EXECUTE_r(
1614 PerlIO_printf( Perl_debug_log,"No match.\n"));
07be1b83
YO
1615 break;
1616 }
1617 }
be8e71aa
YO
1618 FREETMPS;
1619 LEAVE;
07be1b83
YO
1620 }
1621 break;
b3c9acc1 1622 default:
3c3eec57
GS
1623 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1624 break;
d6a28714 1625 }
6eb5f6b9
JH
1626 return 0;
1627 got_it:
1628 return s;
1629}
1630
1631/*
1632 - regexec_flags - match a regexp against a string
1633 */
1634I32
1635Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1636 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1637/* strend: pointer to null at end of string */
1638/* strbeg: real beginning of string */
1639/* minend: end of match must be >=minend after stringarg. */
58e23c8d
YO
1640/* data: May be used for some additional optimizations.
1641 Currently its only used, with a U32 cast, for transmitting
1642 the ganch offset when doing a /g match. This will change */
6eb5f6b9
JH
1643/* nosave: For optimizations. */
1644{
97aff369 1645 dVAR;
24b23f37 1646 /*register*/ char *s;
6eb5f6b9 1647 register regnode *c;
24b23f37 1648 /*register*/ char *startpos = stringarg;
6eb5f6b9
JH
1649 I32 minlen; /* must match at least this many chars */
1650 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1651 I32 end_shift = 0; /* Same for the end. */ /* CC */
1652 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1653 char *scream_olds = NULL;
3dab1dad 1654 SV* const oreplsv = GvSV(PL_replgv);
f9f4320a 1655 const bool do_utf8 = (bool)DO_UTF8(sv);
2757e526 1656 I32 multiline;
f8fc2ecf 1657 RXi_GET_DECL(prog,progi);
3b0527fe 1658 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1659
1660 GET_RE_DEBUG_FLAGS_DECL;
1661
9d4ba2ae 1662 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1663
1664 /* Be paranoid... */
1665 if (prog == NULL || startpos == NULL) {
1666 Perl_croak(aTHX_ "NULL regexp parameter");
1667 return 0;
1668 }
1669
bbe252da 1670 multiline = prog->extflags & RXf_PMf_MULTILINE;
3b0527fe 1671 reginfo.prog = prog;
2757e526 1672
bac06658 1673 RX_MATCH_UTF8_set(prog, do_utf8);
1de06328
YO
1674 DEBUG_EXECUTE_r(
1675 debug_start_match(prog, do_utf8, startpos, strend,
1676 "Matching");
1677 );
bac06658 1678
6eb5f6b9 1679 minlen = prog->minlen;
1de06328
YO
1680
1681 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
a3621e74 1682 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1683 "String too short [regexec_flags]...\n"));
1684 goto phooey;
1aa99e6b 1685 }
6eb5f6b9 1686
1de06328 1687
6eb5f6b9 1688 /* Check validity of program. */
f8fc2ecf 1689 if (UCHARAT(progi->program) != REG_MAGIC) {
6eb5f6b9
JH
1690 Perl_croak(aTHX_ "corrupted regexp program");
1691 }
1692
1693 PL_reg_flags = 0;
1694 PL_reg_eval_set = 0;
1695 PL_reg_maxiter = 0;
1696
bbe252da 1697 if (prog->extflags & RXf_UTF8)
6eb5f6b9
JH
1698 PL_reg_flags |= RF_utf8;
1699
1700 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1701 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1702 PL_bostr = strbeg;
3b0527fe 1703 reginfo.sv = sv;
6eb5f6b9
JH
1704
1705 /* Mark end of line for $ (and such) */
1706 PL_regeol = strend;
1707
1708 /* see how far we have to get to not match where we matched before */
3b0527fe 1709 reginfo.till = startpos+minend;
6eb5f6b9 1710
6eb5f6b9
JH
1711 /* If there is a "must appear" string, look for it. */
1712 s = startpos;
1713
bbe252da 1714 if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1715 MAGIC *mg;
1716
1717 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
58e23c8d 1718 reginfo.ganch = startpos + prog->gofs;
6eb5f6b9
JH
1719 else if (sv && SvTYPE(sv) >= SVt_PVMG
1720 && SvMAGIC(sv)
14befaf4
DM
1721 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1722 && mg->mg_len >= 0) {
3b0527fe 1723 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
bbe252da 1724 if (prog->extflags & RXf_ANCH_GPOS) {
3b0527fe 1725 if (s > reginfo.ganch)
6eb5f6b9 1726 goto phooey;
58e23c8d 1727 s = reginfo.ganch - prog->gofs;
6eb5f6b9
JH
1728 }
1729 }
58e23c8d
YO
1730 else if (data) {
1731 reginfo.ganch = strbeg + (UV)data;
1732 } else /* pos() not defined */
3b0527fe 1733 reginfo.ganch = strbeg;
6eb5f6b9 1734 }
c74340f9
YO
1735 if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1736 I32 *t;
f8fc2ecf 1737 if (!progi->swap) {
c74340f9
YO
1738 /* We have to be careful. If the previous successful match
1739 was from this regex we don't want a subsequent paritally
1740 successful match to clobber the old results.
1741 So when we detect this possibility we add a swap buffer
1742 to the re, and switch the buffer each match. If we fail
1743 we switch it back, otherwise we leave it swapped.
1744 */
f8fc2ecf 1745 Newxz(progi->swap, 1, regexp_paren_ofs);
c74340f9 1746 /* no need to copy these */
f8fc2ecf
YO
1747 Newxz(progi->swap->startp, prog->nparens + 1, I32);
1748 Newxz(progi->swap->endp, prog->nparens + 1, I32);
c74340f9 1749 }
f8fc2ecf
YO
1750 t = progi->swap->startp;
1751 progi->swap->startp = prog->startp;
c74340f9 1752 prog->startp = t;
f8fc2ecf
YO
1753 t = progi->swap->endp;
1754 progi->swap->endp = prog->endp;
c74340f9
YO
1755 prog->endp = t;
1756 }
a0714e2c 1757 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1758 re_scream_pos_data d;
1759
1760 d.scream_olds = &scream_olds;
1761 d.scream_pos = &scream_pos;
1762 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1763 if (!s) {
a3621e74 1764 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1765 goto phooey; /* not present */
3fa9c3d7 1766 }
6eb5f6b9
JH
1767 }
1768
1de06328 1769
6eb5f6b9
JH
1770
1771 /* Simplest case: anchored match need be tried only once. */
1772 /* [unless only anchor is BOL and multiline is set] */
bbe252da 1773 if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
24b23f37 1774 if (s == startpos && regtry(&reginfo, &startpos))
6eb5f6b9 1775 goto got_it;
bbe252da
YO
1776 else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1777 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
6eb5f6b9
JH
1778 {
1779 char *end;
1780
1781 if (minlen)
1782 dontbother = minlen - 1;
1aa99e6b 1783 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1784 /* for multiline we only have to try after newlines */
33b8afdf 1785 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1786 if (s == startpos)
1787 goto after_try;
1788 while (1) {
24b23f37 1789 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1790 goto got_it;
1791 after_try:
1792 if (s >= end)
1793 goto phooey;
bbe252da 1794 if (prog->extflags & RXf_USE_INTUIT) {
6eb5f6b9
JH
1795 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1796 if (!s)
1797 goto phooey;
1798 }
1799 else
1800 s++;
1801 }
1802 } else {
1803 if (s > startpos)
1804 s--;
1805 while (s < end) {
1806 if (*s++ == '\n') { /* don't need PL_utf8skip here */
24b23f37 1807 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1808 goto got_it;
1809 }
1810 }
1811 }
1812 }
1813 goto phooey;
bbe252da 1814 } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
f9f4320a
YO
1815 {
1816 /* the warning about reginfo.ganch being used without intialization
bbe252da 1817 is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
f9f4320a 1818 and we only enter this block when the same bit is set. */
58e23c8d
YO
1819 char *tmp_s = reginfo.ganch - prog->gofs;
1820 if (regtry(&reginfo, &tmp_s))
6eb5f6b9
JH
1821 goto got_it;
1822 goto phooey;
1823 }
1824
1825 /* Messy cases: unanchored match. */
bbe252da 1826 if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
6eb5f6b9
JH
1827 /* we have /x+whatever/ */
1828 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1829 char ch;
bf93d4cc
GS
1830#ifdef DEBUGGING
1831 int did_match = 0;
1832#endif
33b8afdf
JH
1833 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1834 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1835 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1836
1aa99e6b 1837 if (do_utf8) {
4cadc6a9 1838 REXEC_FBC_SCAN(
6eb5f6b9 1839 if (*s == ch) {
a3621e74 1840 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1841 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1842 s += UTF8SKIP(s);
1843 while (s < strend && *s == ch)
1844 s += UTF8SKIP(s);
1845 }
4cadc6a9 1846 );
6eb5f6b9
JH
1847 }
1848 else {
4cadc6a9 1849 REXEC_FBC_SCAN(
6eb5f6b9 1850 if (*s == ch) {
a3621e74 1851 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1852 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1853 s++;
1854 while (s < strend && *s == ch)
1855 s++;
1856 }
4cadc6a9 1857 );
6eb5f6b9 1858 }
a3621e74 1859 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1860 PerlIO_printf(Perl_debug_log,
b7953727
JH
1861 "Did not find anchored character...\n")
1862 );
6eb5f6b9 1863 }
a0714e2c
SS
1864 else if (prog->anchored_substr != NULL
1865 || prog->anchored_utf8 != NULL
1866 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1867 && prog->float_max_offset < strend - s)) {
1868 SV *must;
1869 I32 back_max;
1870 I32 back_min;
1871 char *last;
6eb5f6b9 1872 char *last1; /* Last position checked before */
bf93d4cc
GS
1873#ifdef DEBUGGING
1874 int did_match = 0;
1875#endif
33b8afdf
JH
1876 if (prog->anchored_substr || prog->anchored_utf8) {
1877 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1878 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1879 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1880 back_max = back_min = prog->anchored_offset;
1881 } else {
1882 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1883 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1884 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1885 back_max = prog->float_max_offset;
1886 back_min = prog->float_min_offset;
1887 }
1de06328
YO
1888
1889
33b8afdf
JH
1890 if (must == &PL_sv_undef)
1891 /* could not downgrade utf8 check substring, so must fail */
1892 goto phooey;
1893
1de06328
YO
1894 if (back_min<0) {
1895 last = strend;
1896 } else {
1897 last = HOP3c(strend, /* Cannot start after this */
1898 -(I32)(CHR_SVLEN(must)
1899 - (SvTAIL(must) != 0) + back_min), strbeg);
1900 }
6eb5f6b9
JH
1901 if (s > PL_bostr)
1902 last1 = HOPc(s, -1);
1903 else
1904 last1 = s - 1; /* bogus */
1905
a0288114 1906 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1907 check_substr==must. */
1908 scream_pos = -1;
1909 dontbother = end_shift;
1910 strend = HOPc(strend, -dontbother);
1911 while ( (s <= last) &&
9041c2e3 1912 ((flags & REXEC_SCREAM)
1de06328 1913 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
6eb5f6b9 1914 end_shift, &scream_pos, 0))
1de06328 1915 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
9041c2e3 1916 (unsigned char*)strend, must,
7fba1cd6 1917 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1918 /* we may be pointing at the wrong string */
1919 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1920 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1921 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1922 if (HOPc(s, -back_max) > last1) {
1923 last1 = HOPc(s, -back_min);
1924 s = HOPc(s, -back_max);
1925 }
1926 else {
52657f30 1927 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1928
1929 last1 = HOPc(s, -back_min);
52657f30 1930 s = t;
6eb5f6b9 1931 }
1aa99e6b 1932 if (do_utf8) {
6eb5f6b9 1933 while (s <= last1) {
24b23f37 1934 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1935 goto got_it;
1936 s += UTF8SKIP(s);
1937 }
1938 }
1939 else {
1940 while (s <= last1) {
24b23f37 1941 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1942 goto got_it;
1943 s++;
1944 }
1945 }
1946 }
ab3bbdeb
YO
1947 DEBUG_EXECUTE_r(if (!did_match) {
1948 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1949 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1950 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
33b8afdf 1951 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc 1952 ? "anchored" : "floating"),
ab3bbdeb
YO
1953 quoted, RE_SV_TAIL(must));
1954 });
6eb5f6b9
JH
1955 goto phooey;
1956 }
f8fc2ecf 1957 else if ( (c = progi->regstclass) ) {
f14c76ed 1958 if (minlen) {
f8fc2ecf 1959 const OPCODE op = OP(progi->regstclass);
66e933ab 1960 /* don't bother with what can't match */
786e8c11 1961 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
f14c76ed
RGS
1962 strend = HOPc(strend, -(minlen - 1));
1963 }
a3621e74 1964 DEBUG_EXECUTE_r({
be8e71aa 1965 SV * const prop = sv_newmortal();
32fc9b6a 1966 regprop(prog, prop, c);
0df25f3d 1967 {
ab3bbdeb
YO
1968 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1969 s,strend-s,60);
0df25f3d 1970 PerlIO_printf(Perl_debug_log,
ab3bbdeb 1971 "Matching stclass %.*s against %s (%d chars)\n",
e4f74956 1972 (int)SvCUR(prop), SvPVX_const(prop),
ab3bbdeb 1973 quoted, (int)(strend - s));
0df25f3d 1974 }
ffc61ed2 1975 });
3b0527fe 1976 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1977 goto got_it;
07be1b83 1978 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
d6a28714
JH
1979 }
1980 else {
1981 dontbother = 0;
a0714e2c 1982 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1983 /* Trim the end. */
d6a28714 1984 char *last;
33b8afdf
JH
1985 SV* float_real;
1986
1987 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1988 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1989 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1990
1991 if (flags & REXEC_SCREAM) {
33b8afdf 1992 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1993 end_shift, &scream_pos, 1); /* last one */
1994 if (!last)
ffc61ed2 1995 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1996 /* we may be pointing at the wrong string */
1997 else if (RX_MATCH_COPIED(prog))
3f7c398e 1998 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1999 }
d6a28714
JH
2000 else {
2001 STRLEN len;
cfd0369c 2002 const char * const little = SvPV_const(float_real, len);
d6a28714 2003
33b8afdf 2004 if (SvTAIL(float_real)) {
d6a28714
JH
2005 if (memEQ(strend - len + 1, little, len - 1))
2006 last = strend - len + 1;
7fba1cd6 2007 else if (!multiline)
9041c2e3 2008 last = memEQ(strend - len, little, len)
bd61b366 2009 ? strend - len : NULL;
b8c5462f 2010 else
d6a28714
JH
2011 goto find_last;
2012 } else {
2013 find_last:
9041c2e3 2014 if (len)
d6a28714 2015 last = rninstr(s, strend, little, little + len);
b8c5462f 2016 else
a0288114 2017 last = strend; /* matching "$" */
b8c5462f 2018 }
b8c5462f 2019 }
bf93d4cc 2020 if (last == NULL) {
6bda09f9
YO
2021 DEBUG_EXECUTE_r(
2022 PerlIO_printf(Perl_debug_log,
2023 "%sCan't trim the tail, match fails (should not happen)%s\n",
2024 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2025 goto phooey; /* Should not happen! */
2026 }
d6a28714
JH
2027 dontbother = strend - last + prog->float_min_offset;
2028 }
2029 if (minlen && (dontbother < minlen))
2030 dontbother = minlen - 1;
2031 strend -= dontbother; /* this one's always in bytes! */
2032 /* We don't know much -- general case. */
1aa99e6b 2033 if (do_utf8) {
d6a28714 2034 for (;;) {
24b23f37 2035 if (regtry(&reginfo, &s))
d6a28714
JH
2036 goto got_it;
2037 if (s >= strend)
2038 break;
b8c5462f 2039 s += UTF8SKIP(s);
d6a28714
JH
2040 };
2041 }
2042 else {
2043 do {
24b23f37 2044 if (regtry(&reginfo, &s))
d6a28714
JH
2045 goto got_it;
2046 } while (s++ < strend);
2047 }
2048 }
2049
2050 /* Failure. */
2051 goto phooey;
2052
2053got_it:
2054 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2055
2056 if (PL_reg_eval_set) {
2057 /* Preserve the current value of $^R */
2058 if (oreplsv != GvSV(PL_replgv))
2059 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2060 restored, the value remains
2061 the same. */
4f639d21 2062 restore_pos(aTHX_ prog);
d6a28714 2063 }
81714fb9
YO
2064 if (prog->paren_names)
2065 (void)hv_iterinit(prog->paren_names);
d6a28714
JH
2066
2067 /* make sure $`, $&, $', and $digit will work later */
2068 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2069 RX_MATCH_COPY_FREE(prog);
d6a28714 2070 if (flags & REXEC_COPY_STR) {
be8e71aa 2071 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2072#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2073 if ((SvIsCOW(sv)
2074 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2075 if (DEBUG_C_TEST) {
2076 PerlIO_printf(Perl_debug_log,
2077 "Copy on write: regexp capture, type %d\n",
2078 (int) SvTYPE(sv));
2079 }
2080 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2081 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2082 assert (SvPOKp(prog->saved_copy));
2083 } else
2084#endif
2085 {
2086 RX_MATCH_COPIED_on(prog);
2087 s = savepvn(strbeg, i);
2088 prog->subbeg = s;
2089 }
d6a28714 2090 prog->sublen = i;
d6a28714
JH
2091 }
2092 else {
2093 prog->subbeg = strbeg;
2094 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2095 }
2096 }
9041c2e3 2097
d6a28714
JH
2098 return 1;
2099
2100phooey:
a3621e74 2101 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2102 PL_colors[4], PL_colors[5]));
d6a28714 2103 if (PL_reg_eval_set)
4f639d21 2104 restore_pos(aTHX_ prog);
f8fc2ecf 2105 if (progi->swap) {
c74340f9
YO
2106 /* we failed :-( roll it back */
2107 I32 *t;
f8fc2ecf
YO
2108 t = progi->swap->startp;
2109 progi->swap->startp = prog->startp;
c74340f9 2110 prog->startp = t;
f8fc2ecf
YO
2111 t = progi->swap->endp;
2112 progi->swap->endp = prog->endp;
c74340f9
YO
2113 prog->endp = t;
2114 }
d6a28714
JH
2115 return 0;
2116}
2117
6bda09f9 2118
d6a28714
JH
2119/*
2120 - regtry - try match at specific point
2121 */
2122STATIC I32 /* 0 failure, 1 success */
24b23f37 2123S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
d6a28714 2124{
97aff369 2125 dVAR;
d6a28714
JH
2126 register I32 *sp;
2127 register I32 *ep;
2128 CHECKPOINT lastcp;
3b0527fe 2129 regexp *prog = reginfo->prog;
f8fc2ecf 2130 RXi_GET_DECL(prog,progi);
a3621e74 2131 GET_RE_DEBUG_FLAGS_DECL;
24b23f37 2132 reginfo->cutpoint=NULL;
d6a28714 2133
bbe252da 2134 if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
d6a28714
JH
2135 MAGIC *mg;
2136
2137 PL_reg_eval_set = RS_init;
a3621e74 2138 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2139 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2140 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2141 ));
ea8d6ae1 2142 SAVESTACK_CXPOS();
d6a28714
JH
2143 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2144 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2145 SAVETMPS;
2146 /* Apparently this is not needed, judging by wantarray. */
e8347627 2147 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2148 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2149
3b0527fe 2150 if (reginfo->sv) {
d6a28714 2151 /* Make $_ available to executed code. */
3b0527fe 2152 if (reginfo->sv != DEFSV) {
59f00321 2153 SAVE_DEFSV;
3b0527fe 2154 DEFSV = reginfo->sv;
b8c5462f 2155 }
d6a28714 2156
3b0527fe
DM
2157 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2158 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2159 /* prepare for quick setting of pos */
d300d9fa 2160#ifdef PERL_OLD_COPY_ON_WRITE
51a9ea20
NC
2161 if (SvIsCOW(reginfo->sv))
2162 sv_force_normal_flags(reginfo->sv, 0);
d300d9fa 2163#endif
3dab1dad 2164 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
d300d9fa 2165 &PL_vtbl_mglob, NULL, 0);
d6a28714 2166 mg->mg_len = -1;
b8c5462f 2167 }
d6a28714
JH
2168 PL_reg_magic = mg;
2169 PL_reg_oldpos = mg->mg_len;
4f639d21 2170 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2171 }
09687e5a 2172 if (!PL_reg_curpm) {
a02a5408 2173 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2174#ifdef USE_ITHREADS
2175 {
be8e71aa 2176 SV* const repointer = newSViv(0);
577e12cc 2177 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2178 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2179 av_push(PL_regex_padav,repointer);
2180 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2181 PL_regex_pad = AvARRAY(PL_regex_padav);
2182 }
2183#endif
2184 }
aaa362c4 2185 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2186 PL_reg_oldcurpm = PL_curpm;
2187 PL_curpm = PL_reg_curpm;
2188 if (RX_MATCH_COPIED(prog)) {
2189 /* Here is a serious problem: we cannot rewrite subbeg,
2190 since it may be needed if this match fails. Thus
2191 $` inside (?{}) could fail... */
2192 PL_reg_oldsaved = prog->subbeg;
2193 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2194#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2195 PL_nrs = prog->saved_copy;
2196#endif
d6a28714
JH
2197 RX_MATCH_COPIED_off(prog);
2198 }
2199 else
bd61b366 2200 PL_reg_oldsaved = NULL;
d6a28714
JH
2201 prog->subbeg = PL_bostr;
2202 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2203 }
24b23f37
YO
2204 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2205 prog->startp[0] = *startpos - PL_bostr;
2206 PL_reginput = *startpos;
d6a28714 2207 PL_reglastparen = &prog->lastparen;
a01268b5 2208 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2209 prog->lastparen = 0;
03994de8 2210 prog->lastcloseparen = 0;
d6a28714 2211 PL_regsize = 0;
6bda09f9
YO
2212 PL_regstartp = prog->startp;
2213 PL_regendp = prog->endp;
d6a28714
JH
2214 if (PL_reg_start_tmpl <= prog->nparens) {
2215 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2216 if(PL_reg_start_tmp)
2217 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2218 else
a02a5408 2219 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2220 }
2221
2222 /* XXXX What this code is doing here?!!! There should be no need
2223 to do this again and again, PL_reglastparen should take care of
3dd2943c 2224 this! --ilya*/
dafc8851
JH
2225
2226 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2227 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2228 * PL_reglastparen), is not needed at all by the test suite
2229 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2230 * enough, for building DynaLoader, or otherwise this
2231 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2232 * will happen. Meanwhile, this code *is* needed for the
2233 * above-mentioned test suite tests to succeed. The common theme
2234 * on those tests seems to be returning null fields from matches.
2235 * --jhi */
dafc8851 2236#if 1
c74340f9
YO
2237 sp = PL_regstartp;
2238 ep = PL_regendp;
d6a28714 2239 if (prog->nparens) {
097eb12c 2240 register I32 i;
eb160463 2241 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2242 *++sp = -1;
2243 *++ep = -1;
2244 }
2245 }
dafc8851 2246#endif
02db2b7b 2247 REGCP_SET(lastcp);
f8fc2ecf 2248 if (regmatch(reginfo, progi->program + 1)) {
c74340f9 2249 PL_regendp[0] = PL_reginput - PL_bostr;
d6a28714
JH
2250 return 1;
2251 }
24b23f37
YO
2252 if (reginfo->cutpoint)
2253 *startpos= reginfo->cutpoint;
02db2b7b 2254 REGCP_UNWIND(lastcp);
d6a28714
JH
2255 return 0;
2256}
2257
02db2b7b 2258
8ba1375e
MJD
2259#define sayYES goto yes
2260#define sayNO goto no
262b90c4 2261#define sayNO_SILENT goto no_silent
8ba1375e 2262
f9f4320a
YO
2263/* we dont use STMT_START/END here because it leads to
2264 "unreachable code" warnings, which are bogus, but distracting. */
2265#define CACHEsayNO \
c476f425
DM
2266 if (ST.cache_mask) \
2267 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
f9f4320a 2268 sayNO
3298f257 2269
a3621e74 2270/* this is used to determine how far from the left messages like
265c4333
YO
2271 'failed...' are printed. It should be set such that messages
2272 are inline with the regop output that created them.
a3621e74 2273*/
265c4333 2274#define REPORT_CODE_OFF 32
a3621e74
YO
2275
2276
2277/* Make sure there is a test for this +1 options in re_tests */
2278#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2279
40a82448
DM
2280#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2281#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
9e137952 2282
86545054
DM
2283#define SLAB_FIRST(s) (&(s)->states[0])
2284#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2285
5d9a96ca
DM
2286/* grab a new slab and return the first slot in it */
2287
2288STATIC regmatch_state *
2289S_push_slab(pTHX)
2290{
54df2634
NC
2291#if PERL_VERSION < 9
2292 dMY_CXT;
2293#endif
5d9a96ca
DM
2294 regmatch_slab *s = PL_regmatch_slab->next;
2295 if (!s) {
2296 Newx(s, 1, regmatch_slab);
2297 s->prev = PL_regmatch_slab;
2298 s->next = NULL;
2299 PL_regmatch_slab->next = s;
2300 }
2301 PL_regmatch_slab = s;
86545054 2302 return SLAB_FIRST(s);
5d9a96ca 2303}
5b47454d 2304
95b24440 2305
40a82448
DM
2306/* push a new state then goto it */
2307
2308#define PUSH_STATE_GOTO(state, node) \
2309 scan = node; \
2310 st->resume_state = state; \
2311 goto push_state;
2312
2313/* push a new state with success backtracking, then goto it */
2314
2315#define PUSH_YES_STATE_GOTO(state, node) \
2316 scan = node; \
2317 st->resume_state = state; \
2318 goto push_yes_state;
2319
aa283a38 2320
aa283a38 2321
d6a28714 2322/*
95b24440 2323
bf1f174e
DM
2324regmatch() - main matching routine
2325
2326This is basically one big switch statement in a loop. We execute an op,
2327set 'next' to point the next op, and continue. If we come to a point which
2328we may need to backtrack to on failure such as (A|B|C), we push a
2329backtrack state onto the backtrack stack. On failure, we pop the top
2330state, and re-enter the loop at the state indicated. If there are no more
2331states to pop, we return failure.
2332
2333Sometimes we also need to backtrack on success; for example /A+/, where
2334after successfully matching one A, we need to go back and try to
2335match another one; similarly for lookahead assertions: if the assertion
2336completes successfully, we backtrack to the state just before the assertion
2337and then carry on. In these cases, the pushed state is marked as
2338'backtrack on success too'. This marking is in fact done by a chain of
2339pointers, each pointing to the previous 'yes' state. On success, we pop to
2340the nearest yes state, discarding any intermediate failure-only states.
2341Sometimes a yes state is pushed just to force some cleanup code to be
2342called at the end of a successful match or submatch; e.g. (??{$re}) uses
2343it to free the inner regex.
2344
2345Note that failure backtracking rewinds the cursor position, while
2346success backtracking leaves it alone.
2347
2348A pattern is complete when the END op is executed, while a subpattern
2349such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2350ops trigger the "pop to last yes state if any, otherwise return true"
2351behaviour.
2352
2353A common convention in this function is to use A and B to refer to the two
2354subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2355the subpattern to be matched possibly multiple times, while B is the entire
2356rest of the pattern. Variable and state names reflect this convention.
2357
2358The states in the main switch are the union of ops and failure/success of
2359substates associated with with that op. For example, IFMATCH is the op
2360that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2361'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2362successfully matched A and IFMATCH_A_fail is a state saying that we have
2363just failed to match A. Resume states always come in pairs. The backtrack
2364state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2365at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2366on success or failure.
2367
2368The struct that holds a backtracking state is actually a big union, with
2369one variant for each major type of op. The variable st points to the
2370top-most backtrack struct. To make the code clearer, within each
2371block of code we #define ST to alias the relevant union.
2372
2373Here's a concrete example of a (vastly oversimplified) IFMATCH
2374implementation:
2375
2376 switch (state) {
2377 ....
2378
2379#define ST st->u.ifmatch
2380
2381 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2382 ST.foo = ...; // some state we wish to save
95b24440 2383 ...
bf1f174e
DM
2384 // push a yes backtrack state with a resume value of
2385 // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2386 // first node of A:
2387 PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2388 // NOTREACHED
2389
2390 case IFMATCH_A: // we have successfully executed A; now continue with B
2391 next = B;
2392 bar = ST.foo; // do something with the preserved value
2393 break;
2394
2395 case IFMATCH_A_fail: // A failed, so the assertion failed
2396 ...; // do some housekeeping, then ...
2397 sayNO; // propagate the failure
2398
2399#undef ST
95b24440 2400
bf1f174e
DM
2401 ...
2402 }
95b24440 2403
bf1f174e
DM
2404For any old-timers reading this who are familiar with the old recursive
2405approach, the code above is equivalent to:
95b24440 2406
bf1f174e
DM
2407 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2408 {
2409 int foo = ...
95b24440 2410 ...
bf1f174e
DM
2411 if (regmatch(A)) {
2412 next = B;
2413 bar = foo;
2414 break;
95b24440 2415 }
bf1f174e
DM
2416 ...; // do some housekeeping, then ...
2417 sayNO; // propagate the failure
95b24440 2418 }
bf1f174e
DM
2419
2420The topmost backtrack state, pointed to by st, is usually free. If you
2421want to claim it, populate any ST.foo fields in it with values you wish to
2422save, then do one of
2423
2424 PUSH_STATE_GOTO(resume_state, node);
2425 PUSH_YES_STATE_GOTO(resume_state, node);
2426
2427which sets that backtrack state's resume value to 'resume_state', pushes a
2428new free entry to the top of the backtrack stack, then goes to 'node'.
2429On backtracking, the free slot is popped, and the saved state becomes the
2430new free state. An ST.foo field in this new top state can be temporarily
2431accessed to retrieve values, but once the main loop is re-entered, it
2432becomes available for reuse.
2433
2434Note that the depth of the backtrack stack constantly increases during the
2435left-to-right execution of the pattern, rather than going up and down with
2436the pattern nesting. For example the stack is at its maximum at Z at the
2437end of the pattern, rather than at X in the following:
2438
2439 /(((X)+)+)+....(Y)+....Z/
2440
2441The only exceptions to this are lookahead/behind assertions and the cut,
2442(?>A), which pop all the backtrack states associated with A before
2443continuing.
2444
2445Bascktrack state structs are allocated in slabs of about 4K in size.
2446PL_regmatch_state and st always point to the currently active state,
2447and PL_regmatch_slab points to the slab currently containing
2448PL_regmatch_state. The first time regmatch() is called, the first slab is
2449allocated, and is never freed until interpreter destruction. When the slab
2450is full, a new one is allocated and chained to the end. At exit from
2451regmatch(), slabs allocated since entry are freed.
2452
2453*/
95b24440 2454
40a82448 2455
5bc10b2c 2456#define DEBUG_STATE_pp(pp) \
265c4333 2457 DEBUG_STATE_r({ \
5bc10b2c
DM
2458 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2459 PerlIO_printf(Perl_debug_log, \
5d458dd8 2460 " %*s"pp" %s%s%s%s%s\n", \
5bc10b2c 2461 depth*2, "", \
5d458dd8
YO
2462 reg_name[st->resume_state], \
2463 ((st==yes_state||st==mark_state) ? "[" : ""), \
2464 ((st==yes_state) ? "Y" : ""), \
2465 ((st==mark_state) ? "M" : ""), \
2466 ((st==yes_state||st==mark_state) ? "]" : "") \
2467 ); \
265c4333 2468 });
5bc10b2c 2469
40a82448 2470
3dab1dad 2471#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
95b24440 2472
3df15adc 2473#ifdef DEBUGGING
5bc10b2c 2474
ab3bbdeb
YO
2475STATIC void
2476S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2477 const char *start, const char *end, const char *blurb)
2478{
bbe252da 2479 const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
ab3bbdeb
YO
2480 if (!PL_colorset)
2481 reginitcolors();
2482 {
2483 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2484 prog->precomp, prog->prelen, 60);
2485
2486 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2487 start, end - start, 60);
2488
2489 PerlIO_printf(Perl_debug_log,
2490 "%s%s REx%s %s against %s\n",
2491 PL_colors[4], blurb, PL_colors[5], s0, s1);
2492
2493 if (do_utf8||utf8_pat)
1de06328
YO
2494 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2495 utf8_pat ? "pattern" : "",
2496 utf8_pat && do_utf8 ? " and " : "",
2497 do_utf8 ? "string" : ""
ab3bbdeb
YO
2498 );
2499 }
2500}
3df15adc
YO
2501
2502STATIC void
786e8c11
YO
2503S_dump_exec_pos(pTHX_ const char *locinput,
2504 const regnode *scan,
2505 const char *loc_regeol,
2506 const char *loc_bostr,
2507 const char *loc_reg_starttry,
2508 const bool do_utf8)
07be1b83 2509{
786e8c11 2510 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
07be1b83 2511 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
786e8c11 2512 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
07be1b83
YO
2513 /* The part of the string before starttry has one color
2514 (pref0_len chars), between starttry and current
2515 position another one (pref_len - pref0_len chars),
2516 after the current position the third one.
2517 We assume that pref0_len <= pref_len, otherwise we
2518 decrease pref0_len. */
786e8c11
YO
2519 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2520 ? (5 + taill) - l : locinput - loc_bostr;
07be1b83
YO
2521 int pref0_len;
2522
2523 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2524 pref_len++;
786e8c11
YO
2525 pref0_len = pref_len - (locinput - loc_reg_starttry);
2526 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2527 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2528 ? (5 + taill) - pref_len : loc_regeol - locinput);
07be1b83
YO
2529 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2530 l--;
2531 if (pref0_len < 0)
2532 pref0_len = 0;
2533 if (pref0_len > pref_len)
2534 pref0_len = pref_len;
2535 {
3df15adc 2536 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
0df25f3d 2537
ab3bbdeb 2538 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
1de06328 2539 (locinput - pref_len),pref0_len, 60, 4, 5);
0df25f3d 2540
ab3bbdeb 2541 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
3df15adc 2542 (locinput - pref_len + pref0_len),
1de06328 2543 pref_len - pref0_len, 60, 2, 3);
0df25f3d 2544
ab3bbdeb 2545 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
1de06328 2546 locinput, loc_regeol - locinput, 10, 0, 1);
0df25f3d 2547
1de06328 2548 const STRLEN tlen=len0+len1+len2;
3df15adc 2549 PerlIO_printf(Perl_debug_log,
ab3bbdeb 2550 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
786e8c11 2551 (IV)(locinput - loc_bostr),
07be1b83 2552 len0, s0,
07be1b83 2553 len1, s1,
07be1b83 2554 (docolor ? "" : "> <"),
07be1b83 2555 len2, s2,
f9f4320a 2556 (int)(tlen > 19 ? 0 : 19 - tlen),
07be1b83
YO
2557 "");
2558 }
2559}
3df15adc 2560
07be1b83
YO
2561#endif
2562
0a4db386
YO
2563/* reg_check_named_buff_matched()
2564 * Checks to see if a named buffer has matched. The data array of
2565 * buffer numbers corresponding to the buffer is expected to reside
2566 * in the regexp->data->data array in the slot stored in the ARG() of
2567 * node involved. Note that this routine doesn't actually care about the
2568 * name, that information is not preserved from compilation to execution.
2569 * Returns the index of the leftmost defined buffer with the given name
2570 * or 0 if non of the buffers matched.
2571 */
2572STATIC I32
2573S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2574 I32 n;
f8fc2ecf
YO
2575 RXi_GET_DECL(rex,rexi);
2576 SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
0a4db386
YO
2577 I32 *nums=(I32*)SvPVX(sv_dat);
2578 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2579 if ((I32)*PL_reglastparen >= nums[n] &&
2580 PL_regendp[nums[n]] != -1)
2581 {
2582 return nums[n];
2583 }
2584 }
2585 return 0;
2586}
2587
d6a28714 2588STATIC I32 /* 0 failure, 1 success */
24b23f37 2589S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
d6a28714 2590{
54df2634
NC
2591#if PERL_VERSION < 9
2592 dMY_CXT;
2593#endif
27da23d5 2594 dVAR;
95b24440 2595 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2596 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2597
3b0527fe 2598 regexp *rex = reginfo->prog;
f8fc2ecf
YO
2599 RXi_GET_DECL(rex,rexi);
2600
5d9a96ca
DM
2601 regmatch_slab *orig_slab;
2602 regmatch_state *orig_state;
a3621e74 2603
5d9a96ca
DM
2604 /* the current state. This is a cached copy of PL_regmatch_state */
2605 register regmatch_state *st;
95b24440 2606
5d9a96ca
DM
2607 /* cache heavy used fields of st in registers */
2608 register regnode *scan;
2609 register regnode *next;
438e9bae 2610 register U32 n = 0; /* general value; init to avoid compiler warning */
24d3c4a9 2611 register I32 ln = 0; /* len or last; init to avoid compiler warning */
5d9a96ca 2612 register char *locinput = PL_reginput;
5d9a96ca 2613 register I32 nextchr; /* is always set to UCHARAT(locinput) */
24d3c4a9 2614
b69b0499 2615 bool result = 0; /* return value of S_regmatch */
24d3c4a9 2616 int depth = 0; /* depth of backtrack stack */
1a147d38 2617 int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
77cb431f
DM
2618 regmatch_state *yes_state = NULL; /* state to pop to on success of
2619 subpattern */
e2e6a0f1
YO
2620 /* mark_state piggy backs on the yes_state logic so that when we unwind
2621 the stack on success we can update the mark_state as we go */
2622 regmatch_state *mark_state = NULL; /* last mark state we have seen */
5d458dd8 2623
faec1544 2624 regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
b8591aee 2625 struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
40a82448 2626 U32 state_num;
5d458dd8
YO
2627 bool no_final = 0; /* prevent failure from backtracking? */
2628 bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
e2e6a0f1 2629 char *startpoint = PL_reginput;
5d458dd8
YO
2630 SV *popmark = NULL; /* are we looking for a mark? */
2631 SV *sv_commit = NULL; /* last mark name seen in failure */
2632 SV *sv_yes_mark = NULL; /* last mark name we have seen
2633 during a successfull match */
2634 U32 lastopen = 0; /* last open we saw */
2635 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
58e23c8d 2636
5d458dd8 2637
24d3c4a9
DM
2638 /* these three flags are set by various ops to signal information to
2639 * the very next op. They have a useful lifetime of exactly one loop
2640 * iteration, and are not preserved or restored by state pushes/pops
2641 */
2642 bool sw = 0; /* the condition value in (?(cond)a|b) */
2643 bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
2644 int logical = 0; /* the following EVAL is:
2645 0: (?{...})
2646 1: (?(?{...})X|Y)
2647 2: (??{...})
2648 or the following IFMATCH/UNLESSM is:
2649 false: plain (?=foo)
2650 true: used as a condition: (?(?=foo))
2651 */
2652
95b24440 2653#ifdef DEBUGGING
e68ec53f 2654 GET_RE_DEBUG_FLAGS_DECL;
d6a28714
JH
2655#endif
2656
58e23c8d 2657 DEBUG_OPTIMISE_r( {
24b23f37
YO
2658 PerlIO_printf(Perl_debug_log,"regmatch start\n");
2659 });
5d9a96ca
DM
2660 /* on first ever call to regmatch, allocate first slab */
2661 if (!PL_regmatch_slab) {
2662 Newx(PL_regmatch_slab, 1, regmatch_slab);
2663 PL_regmatch_slab->prev = NULL;
2664 PL_regmatch_slab->next = NULL;
86545054 2665 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2666 }
2667
2668 /* remember current high-water mark for exit */
2669 /* XXX this should be done with SAVE* instead */
2670 orig_slab = PL_regmatch_slab;
2671 orig_state = PL_regmatch_state;
2672
2673 /* grab next free state slot */
2674 st = ++PL_regmatch_state;
86545054 2675 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2676 st = PL_regmatch_state = S_push_slab(aTHX);
2677
d6a28714
JH
2678 /* Note that nextchr is a byte even in UTF */
2679 nextchr = UCHARAT(locinput);
2680 scan = prog;
2681 while (scan != NULL) {
8ba1375e 2682
a3621e74 2683 DEBUG_EXECUTE_r( {
6136c704 2684 SV * const prop = sv_newmortal();
1de06328 2685 regnode *rnext=regnext(scan);
786e8c11 2686 DUMP_EXEC_POS( locinput, scan, do_utf8 );
32fc9b6a 2687 regprop(rex, prop, scan);
07be1b83
YO
2688
2689 PerlIO_printf(Perl_debug_log,
2690 "%3"IVdf":%*s%s(%"IVdf")\n",
f8fc2ecf 2691 (IV)(scan - rexi->program), depth*2, "",
07be1b83 2692 SvPVX_const(prop),
1de06328 2693 (PL_regkind[OP(scan)] == END || !rnext) ?
f8fc2ecf 2694 0 : (IV)(rnext - rexi->program));
2a782b5b 2695 });
d6a28714
JH
2696
2697 next = scan + NEXT_OFF(scan);
2698 if (next == scan)
2699 next = NULL;
40a82448 2700 state_num = OP(scan);
d6a28714 2701
40a82448
DM
2702 reenter_switch:
2703 switch (state_num) {
d6a28714 2704 case BOL:
7fba1cd6 2705 if (locinput == PL_bostr)
d6a28714 2706 {
3b0527fe 2707 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2708 break;
2709 }
d6a28714
JH
2710 sayNO;
2711 case MBOL:
12d33761
HS
2712 if (locinput == PL_bostr ||
2713 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2714 {
b8c5462f
JH
2715 break;
2716 }
d6a28714
JH
2717 sayNO;
2718 case SBOL:
c2a73568 2719 if (locinput == PL_bostr)
b8c5462f 2720 break;
d6a28714
JH
2721 sayNO;
2722 case GPOS:
3b0527fe 2723 if (locinput == reginfo->ganch)
d6a28714
JH
2724 break;
2725 sayNO;
2726 case EOL:
d6a28714
JH
2727 goto seol;
2728 case MEOL:
d6a28714 2729 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2730 sayNO;
b8c5462f 2731 break;
d6a28714
JH
2732 case SEOL:
2733 seol:
2734 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2735 sayNO;
d6a28714 2736 if (PL_regeol - locinput > 1)
b8c5462f 2737 sayNO;
b8c5462f 2738 break;
d6a28714
JH
2739 case EOS:
2740 if (PL_regeol != locinput)
b8c5462f 2741 sayNO;
d6a28714 2742 break;
ffc61ed2 2743 case SANY:
d6a28714 2744 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2745 sayNO;
f33976b4
DB
2746 if (do_utf8) {
2747 locinput += PL_utf8skip[nextchr];
2748 if (locinput > PL_regeol)
2749 sayNO;
2750 nextchr = UCHARAT(locinput);
2751 }
2752 else
2753 nextchr = UCHARAT(++locinput);
2754 break;
2755 case CANY:
2756 if (!nextchr && locinput >= PL_regeol)
2757 sayNO;
b8c5462f 2758 nextchr = UCHARAT(++locinput);
a0d0e21e 2759 break;
ffc61ed2 2760 case REG_ANY:
1aa99e6b
IH
2761 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2762 sayNO;
2763 if (do_utf8) {
b8c5462f 2764 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2765 if (locinput > PL_regeol)
2766 sayNO;
a0ed51b3 2767 nextchr = UCHARAT(locinput);
a0ed51b3 2768 }
1aa99e6b
IH
2769 else
2770 nextchr = UCHARAT(++locinput);
a0ed51b3 2771 break;
166ba7cd
DM
2772
2773#undef ST
2774#define ST st->u.trie
786e8c11
YO
2775 case TRIEC:
2776 /* In this case the charclass data is available inline so
2777 we can fail fast without a lot of extra overhead.
2778 */
2779 if (scan->flags == EXACT || !do_utf8) {
2780 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2781 DEBUG_EXECUTE_r(
2782 PerlIO_printf(Perl_debug_log,
2783 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2784 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
786e8c11
YO
2785 );
2786 sayNO_SILENT;
2787 /* NOTREACHED */
2788 }
2789 }
2790 /* FALL THROUGH */
5b47454d 2791 case TRIE:
3dab1dad 2792 {
07be1b83 2793 /* what type of TRIE am I? (utf8 makes this contextual) */
3dab1dad
YO
2794 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2795 trie_type = do_utf8 ?
2796 (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2797 : trie_plain;
2798
2799 /* what trie are we using right now */
be8e71aa 2800 reg_trie_data * const trie
f8fc2ecf 2801 = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3dab1dad 2802 U32 state = trie->startstate;
166ba7cd 2803
3dab1dad
YO
2804 if (trie->bitmap && trie_type != trie_utf8_fold &&
2805 !TRIE_BITMAP_TEST(trie,*locinput)
2806 ) {
2807 if (trie->states[ state ].wordnum) {
2808 DEBUG_EXECUTE_r(
2809 PerlIO_printf(Perl_debug_log,
2810 "%*s %smatched empty string...%s\n",
5bc10b2c 2811 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2812 );
2813 break;
2814 } else {
2815 DEBUG_EXECUTE_r(
2816 PerlIO_printf(Perl_debug_log,
786e8c11 2817 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2818 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2819 );
2820 sayNO_SILENT;
2821 }
2822 }
166ba7cd 2823
786e8c11
YO
2824 {
2825 U8 *uc = ( U8* )locinput;
2826
2827 STRLEN len = 0;
2828 STRLEN foldlen = 0;
2829 U8 *uscan = (U8*)NULL;
2830 STRLEN bufflen=0;
2831 SV *sv_accept_buff = NULL;
2832 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2833
2834 ST.accepted = 0; /* how many accepting states we have seen */
2835 ST.B = next;
2836 ST.jump = trie->jump;
786e8c11 2837 ST.me = scan;
786e8c11 2838
07be1b83
YO
2839 /*
2840 traverse the TRIE keeping track of all accepting states
2841 we transition through until we get to a failing node.
2842 */
2843
a3621e74 2844 while ( state && uc <= (U8*)PL_regeol ) {
786e8c11 2845 U32 base = trie->states[ state ].trans.base;
f9f4320a 2846 UV uvc = 0;
786e8c11
YO
2847 U16 charid;
2848 /* We use charid to hold the wordnum as we don't use it
2849 for charid until after we have done the wordnum logic.
2850 We define an alias just so that the wordnum logic reads
2851 more naturally. */
2852
2853#define got_wordnum charid
2854 got_wordnum = trie->states[ state ].wordnum;
2855
2856 if ( got_wordnum ) {
2857 if ( ! ST.accepted ) {
5b47454d
DM
2858 ENTER;
2859 SAVETMPS;
2860 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2861 sv_accept_buff=newSV(bufflen *
2862 sizeof(reg_trie_accepted) - 1);
786e8c11 2863 SvCUR_set(sv_accept_buff, 0);
5b47454d
DM
2864 SvPOK_on(sv_accept_buff);
2865 sv_2mortal(sv_accept_buff);
166ba7cd
DM
2866 SAVETMPS;
2867 ST.accept_buff =
5b47454d
DM
2868 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2869 }
786e8c11 2870 do {
166ba7cd 2871 if (ST.accepted >= bufflen) {
5b47454d 2872 bufflen *= 2;
166ba7cd 2873 ST.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2874 SvGROW(sv_accept_buff,
2875 bufflen * sizeof(reg_trie_accepted));
2876 }
2877 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2878 + sizeof(reg_trie_accepted));
a3621e74 2879
786e8c11
YO
2880
2881 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2882 ST.accept_buff[ST.accepted].endpos = uc;
2883 ++ST.accepted;
2884 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2885 }
2886#undef got_wordnum
a3621e74 2887
07be1b83 2888 DEBUG_TRIE_EXECUTE_r({
786e8c11 2889 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
a3621e74 2890 PerlIO_printf( Perl_debug_log,
786e8c11 2891 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
5bc10b2c 2892 2+depth * 2, "", PL_colors[4],
786e8c11 2893 (UV)state, (UV)ST.accepted );
07be1b83 2894 });
a3621e74
YO
2895
2896 if ( base ) {
4cadc6a9
YO
2897 REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
2898 uvc, charid, foldlen, foldbuf, uniflags);
a3621e74 2899
5b47454d
DM
2900 if (charid &&
2901 (base + charid > trie->uniquecharcount )
2902 && (base + charid - 1 - trie->uniquecharcount
2903 < trie->lasttrans)
2904 && trie->trans[base + charid - 1 -
2905 trie->uniquecharcount].check == state)
2906 {
2907 state = trie->trans[base + charid - 1 -
2908 trie->uniquecharcount ].next;
2909 }
2910 else {
2911 state = 0;
2912 }
2913 uc += len;
2914
2915 }
2916 else {
a3621e74
YO
2917 state = 0;
2918 }
2919 DEBUG_TRIE_EXECUTE_r(
e4584336 2920 PerlIO_printf( Perl_debug_log,
786e8c11 2921 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
e4584336 2922 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2923 );
2924 }
166ba7cd 2925 if (!ST.accepted )
a3621e74 2926 sayNO;
a3621e74 2927
166ba7cd
DM
2928 DEBUG_EXECUTE_r(
2929 PerlIO_printf( Perl_debug_log,
2930 "%*s %sgot %"IVdf" possible matches%s\n",
5bc10b2c 2931 REPORT_CODE_OFF + depth * 2, "",
166ba7cd
DM
2932 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2933 );
786e8c11 2934 }}
166ba7cd
DM
2935
2936 /* FALL THROUGH */
166ba7cd 2937 case TRIE_next_fail: /* we failed - try next alterative */
5d458dd8
YO
2938 if (do_cutgroup) {
2939 do_cutgroup = 0;
2940 no_final = 0;
2941 }
166ba7cd
DM
2942 if ( ST.accepted == 1 ) {
2943 /* only one choice left - just continue */
2944 DEBUG_EXECUTE_r({
2945 reg_trie_data * const trie
f8fc2ecf 2946 = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
5c9f2f80
YO
2947 SV ** const tmp = av_fetch( trie->words,
2948 ST.accept_buff[ 0 ].wordnum-1, 0 );
de734bd5
A
2949 SV *sv= tmp ? sv_newmortal() : NULL;
2950
166ba7cd
DM
2951 PerlIO_printf( Perl_debug_log,
2952 "%*s %sonly one match left: #%d <%s>%s\n",
5bc10b2c 2953 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 2954 ST.accept_buff[ 0 ].wordnum,
de734bd5
A
2955 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
2956 PL_colors[0], PL_colors[1],
2957 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
2958 )
2959 : "not compiled under -Dr",
166ba7cd
DM
2960 PL_colors[5] );
2961 });
2962 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2963 /* in this case we free tmps/leave before we call regmatch
2964 as we wont be using accept_buff again. */
5d458dd8 2965
166ba7cd
DM
2966 locinput = PL_reginput;
2967 nextchr = UCHARAT(locinput);
5d458dd8
YO
2968 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
2969 scan = ST.B;
2970 else
2971 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
2972 if (!has_cutgroup) {
2973 FREETMPS;
2974 LEAVE;
2975 } else {
2976 ST.accepted--;
2977 PUSH_YES_STATE_GOTO(TRIE_next, scan);
2978 }
786e8c11 2979
166ba7cd
DM
2980 continue; /* execute rest of RE */
2981 }
2982
2983 if (!ST.accepted-- ) {
5d458dd8
YO
2984 DEBUG_EXECUTE_r({
2985 PerlIO_printf( Perl_debug_log,
2986 "%*s %sTRIE failed...%s\n",
2987 REPORT_CODE_OFF+depth*2, "",
2988 PL_colors[4],
2989 PL_colors[5] );
2990 });
166ba7cd
DM
2991 FREETMPS;
2992 LEAVE;
5d458dd8 2993 sayNO_SILENT;
166ba7cd
DM
2994 }
2995
a3621e74 2996 /*
166ba7cd
DM
2997 There are at least two accepting states left. Presumably
2998 the number of accepting states is going to be low,
2999 typically two. So we simply scan through to find the one
3000 with lowest wordnum. Once we find it, we swap the last
3001 state into its place and decrement the size. We then try to
3002 match the rest of the pattern at the point where the word
3003 ends. If we succeed, control just continues along the
3004 regex; if we fail we return here to try the next accepting
3005 state
3006 */
a3621e74 3007
166ba7cd
DM
3008 {
3009 U32 best = 0;
3010 U32 cur;
3011 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3012 DEBUG_TRIE_EXECUTE_r(
f2278c82 3013 PerlIO_printf( Perl_debug_log,
166ba7cd 3014 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
5bc10b2c 3015 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
166ba7cd
DM
3016 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3017 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3018 );
3019
3020 if (ST.accept_buff[cur].wordnum <
3021 ST.accept_buff[best].wordnum)
3022 best = cur;
a3621e74 3023 }
166ba7cd
DM
3024
3025 DEBUG_EXECUTE_r({
3026 reg_trie_data * const trie
f8fc2ecf 3027 = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
5c9f2f80
YO
3028 SV ** const tmp = av_fetch( trie->words,
3029 ST.accept_buff[ best ].wordnum - 1, 0 );
7f69552c 3030 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
265c4333 3031 ST.B :
7f69552c 3032 ST.me + ST.jump[ST.accept_buff[best].wordnum];
de734bd5
A
3033 SV *sv= tmp ? sv_newmortal() : NULL;
3034
265c4333
YO
3035 PerlIO_printf( Perl_debug_log,
3036 "%*s %strying alternation #%d <%s> at node #%d %s\n",
5bc10b2c 3037 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 3038 ST.accept_buff[best].wordnum,
de734bd5
A
3039 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3040 PL_colors[0], PL_colors[1],
3041 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3042 ) : "not compiled under -Dr",
265c4333 3043 REG_NODE_NUM(nextop),
166ba7cd
DM
3044 PL_colors[5] );
3045 });
3046
3047 if ( best<ST.accepted ) {
3048 reg_trie_accepted tmp = ST.accept_buff[ best ];
3049 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3050 ST.accept_buff[ ST.accepted ] = tmp;
3051 best = ST.accepted;
a3621e74 3052 }
166ba7cd 3053 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
7f69552c 3054 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
5d458dd8 3055 scan = ST.B;
786e8c11
YO
3056 /* NOTREACHED */
3057 } else {
5d458dd8 3058 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
786e8c11
YO
3059 /* NOTREACHED */
3060 }
5d458dd8
YO
3061 if (has_cutgroup) {
3062 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3063 /* NOTREACHED */
3064 } else {
3065 PUSH_STATE_GOTO(TRIE_next, scan);
3066 /* NOTREACHED */
3067 }
786e8c11 3068 /* NOTREACHED */
166ba7cd 3069 }
166ba7cd 3070 /* NOTREACHED */
5d458dd8
YO
3071 case TRIE_next:
3072 FREETMPS;
3073 LEAVE;
3074 sayYES;
166ba7cd
DM
3075#undef ST
3076
95b24440
DM
3077 case EXACT: {
3078 char *s = STRING(scan);
24d3c4a9 3079 ln = STR_LEN(scan);
eb160463 3080 if (do_utf8 != UTF) {
bc517b45 3081 /* The target and the pattern have differing utf8ness. */
1aa99e6b 3082 char *l = locinput;
24d3c4a9 3083 const char * const e = s + ln;
a72c7584 3084
5ff6fc6d
JH
3085 if (do_utf8) {
3086 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 3087 while (s < e) {
a3b680e6 3088 STRLEN ulen;
1aa99e6b 3089 if (l >= PL_regeol)
5ff6fc6d
JH
3090 sayNO;
3091 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 3092 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 3093 uniflags))
5ff6fc6d 3094 sayNO;
bc517b45 3095 l += ulen;
5ff6fc6d 3096 s ++;
1aa99e6b 3097 }
5ff6fc6d
JH
3098 }
3099 else {
3100 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 3101 while (s < e) {
a3b680e6 3102 STRLEN ulen;
1aa99e6b
IH
3103 if (l >= PL_regeol)
3104 sayNO;
5ff6fc6d 3105 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 3106 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 3107 uniflags))
1aa99e6b 3108 sayNO;
bc517b45 3109 s += ulen;
a72c7584 3110 l ++;
1aa99e6b 3111 }
5ff6fc6d 3112 }
1aa99e6b
IH
3113 locinput = l;
3114 nextchr = UCHARAT(locinput);
3115 break;
3116 }
bc517b45 3117 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
3118 /* Inline the first character, for speed. */
3119 if (UCHARAT(s) != nextchr)
3120 sayNO;
24d3c4a9 3121 if (PL_regeol - locinput < ln)
d6a28714 3122 sayNO;
24d3c4a9 3123 if (ln > 1 && memNE(s, locinput, ln))
d6a28714 3124 sayNO;
24d3c4a9 3125 locinput += ln;
d6a28714
JH
3126 nextchr = UCHARAT(locinput);
3127 break;
95b24440 3128 }
d6a28714 3129 case EXACTFL:
b8c5462f
JH
3130 PL_reg_flags |= RF_tainted;
3131 /* FALL THROUGH */
95b24440 3132 case EXACTF: {
be8e71aa 3133 char * const s = STRING(scan);
24d3c4a9 3134 ln = STR_LEN(scan);
d6a28714 3135
d07ddd77
JH
3136 if (do_utf8 || UTF) {
3137 /* Either target or the pattern are utf8. */
be8e71aa 3138 const char * const l = locinput;
d07ddd77 3139 char *e = PL_regeol;
bc517b45 3140
24d3c4a9 3141 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 3142 l, &e, 0, do_utf8)) {
5486206c
JH
3143 /* One more case for the sharp s:
3144 * pack("U0U*", 0xDF) =~ /ss/i,
3145 * the 0xC3 0x9F are the UTF-8
3146 * byte sequence for the U+00DF. */
3147 if (!(do_utf8 &&
3148 toLOWER(s[0]) == 's' &&
24d3c4a9 3149 ln >= 2 &&
5486206c
JH
3150 toLOWER(s[1]) == 's' &&
3151 (U8)l[0] == 0xC3 &&
3152 e - l >= 2 &&
3153 (U8)l[1] == 0x9F))
3154 sayNO;
3155 }
d07ddd77
JH
3156 locinput = e;
3157 nextchr = UCHARAT(locinput);
3158 break;
a0ed51b3 3159 }
d6a28714 3160
bc517b45
JH
3161 /* Neither the target and the pattern are utf8. */
3162
d6a28714
JH
3163 /* Inline the first character, for speed. */
3164 if (UCHARAT(s) != nextchr &&
3165 UCHARAT(s) != ((OP(scan) == EXACTF)
3166 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3167 sayNO;
24d3c4a9 3168 if (PL_regeol - locinput < ln)
b8c5462f 3169 sayNO;
24d3c4a9
DM
3170 if (ln > 1 && (OP(scan) == EXACTF
3171 ? ibcmp(s, locinput, ln)
3172 : ibcmp_locale(s, locinput, ln)))
4633a7c4 3173 sayNO;
24d3c4a9 3174 locinput += ln;
d6a28714 3175 nextchr = UCHARAT(locinput);
a0d0e21e 3176 break;
95b24440 3177 }
d6a28714 3178 case ANYOF:
ffc61ed2 3179 if (do_utf8) {
9e55ce06
JH
3180 STRLEN inclasslen = PL_regeol - locinput;
3181
32fc9b6a 3182 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
262b90c4 3183 goto anyof_fail;
ffc61ed2
JH
3184 if (locinput >= PL_regeol)
3185 sayNO;
0f0076b4 3186 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3187 nextchr = UCHARAT(locinput);
e0f9d4a8 3188 break;
ffc61ed2
JH
3189 }
3190 else {
3191 if (nextchr < 0)
3192 nextchr = UCHARAT(locinput);
32fc9b6a 3193 if (!REGINCLASS(rex, scan, (U8*)locinput))
262b90c4 3194 goto anyof_fail;
ffc61ed2
JH
3195 if (!nextchr && locinput >= PL_regeol)
3196 sayNO;
3197 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3198 break;
3199 }
262b90c4 3200 anyof_fail:
e0f9d4a8
JH
3201 /* If we might have the case of the German sharp s
3202 * in a casefolding Unicode character class. */
3203
ebc501f0
JH
3204 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3205 locinput += SHARP_S_SKIP;
e0f9d4a8 3206 nextchr = UCHARAT(locinput);
ffc61ed2 3207 }
e0f9d4a8
JH
3208 else
3209 sayNO;
b8c5462f 3210 break;
d6a28714 3211 case ALNUML:
b8c5462f
JH
3212 PL_reg_flags |= RF_tainted;
3213 /* FALL THROUGH */
d6a28714 3214 case ALNUM:
b8c5462f 3215 if (!nextchr)
4633a7c4 3216 sayNO;
ffc61ed2 3217 if (do_utf8) {
1a4fad37 3218 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3219 if (!(OP(scan) == ALNUM
bb7a0f54 3220 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3221 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3222 {
3223 sayNO;
a0ed51b3 3224 }
b8c5462f 3225 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3226 nextchr = UCHARAT(locinput);
3227 break;
3228 }
ffc61ed2 3229 if (!(OP(scan) == ALNUM
d6a28714 3230 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3231 sayNO;
b8c5462f 3232 nextchr = UCHARAT(++locinput);
a0d0e21e 3233 break;
d6a28714 3234 case NALNUML:
b8c5462f
JH
3235 PL_reg_flags |= RF_tainted;
3236 /* FALL THROUGH */
d6a28714
JH
3237 case NALNUM:
3238 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3239 sayNO;
ffc61ed2 3240 if (do_utf8) {
1a4fad37 3241 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3242 if (OP(scan) == NALNUM
bb7a0f54 3243 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3244 : isALNUM_LC_utf8((U8*)locinput))
3245 {
b8c5462f 3246 sayNO;
d6a28714 3247 }
b8c5462f
JH
3248 locinput += PL_utf8skip[nextchr];
3249 nextchr = UCHARAT(locinput);
3250 break;
3251 }
ffc61ed2 3252 if (OP(scan) == NALNUM
d6a28714 3253 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3254 sayNO;
76e3520e 3255 nextchr = UCHARAT(++locinput);
a0d0e21e 3256 break;
d6a28714
JH
3257 case BOUNDL:
3258 case NBOUNDL:
3280af22 3259 PL_reg_flags |= RF_tainted;
bbce6d69 3260 /* FALL THROUGH */
d6a28714
JH
3261 case BOUND:
3262 case NBOUND:
3263 /* was last char in word? */
ffc61ed2 3264 if (do_utf8) {
12d33761 3265 if (locinput == PL_bostr)
24d3c4a9 3266 ln = '\n';
ffc61ed2 3267 else {
a3b680e6 3268 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3269
24d3c4a9 3270 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3271 }
3272 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3273 ln = isALNUM_uni(ln);
1a4fad37 3274 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3275 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3276 }
3277 else {
24d3c4a9 3278 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3279 n = isALNUM_LC_utf8((U8*)locinput);
3280 }
a0ed51b3 3281 }
d6a28714 3282 else {
24d3c4a9 3283 ln = (locinput != PL_bostr) ?
12d33761 3284 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3285 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3286 ln = isALNUM(ln);
ffc61ed2
JH
3287 n = isALNUM(nextchr);
3288 }
3289 else {
24d3c4a9 3290 ln = isALNUM_LC(ln);
ffc61ed2
JH
3291 n = isALNUM_LC(nextchr);
3292 }
d6a28714 3293 }
24d3c4a9 3294 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3295 OP(scan) == BOUNDL))
3296 sayNO;
a0ed51b3 3297 break;
d6a28714 3298 case SPACEL:
3280af22 3299 PL_reg_flags |= RF_tainted;
bbce6d69 3300 /* FALL THROUGH */
d6a28714 3301 case SPACE:
9442cb0e 3302 if (!nextchr)
4633a7c4 3303 sayNO;
1aa99e6b 3304 if (do_utf8) {
fd400ab9 3305 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3306 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3307 if (!(OP(scan) == SPACE
bb7a0f54 3308 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3309 : isSPACE_LC_utf8((U8*)locinput)))
3310 {
3311 sayNO;
3312 }
3313 locinput += PL_utf8skip[nextchr];
3314 nextchr = UCHARAT(locinput);
3315 break;
d6a28714 3316 }
ffc61ed2
JH
3317 if (!(OP(scan) == SPACE
3318 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3319 sayNO;
3320 nextchr = UCHARAT(++locinput);
3321 }
3322 else {
3323 if (!(OP(scan) == SPACE
3324 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3325 sayNO;
3326 nextchr = UCHARAT(++locinput);
a0ed51b3 3327 }
a0ed51b3 3328 break;
d6a28714 3329 case NSPACEL:
3280af22 3330 PL_reg_flags |= RF_tainted;
bbce6d69 3331 /* FALL THROUGH */
d6a28714 3332 case NSPACE:
9442cb0e 3333 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3334 sayNO;
1aa99e6b 3335 if (do_utf8) {
1a4fad37 3336 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3337 if (OP(scan) == NSPACE
bb7a0f54 3338 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3339 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3340 {
3341 sayNO;
3342 }
3343 locinput += PL_utf8skip[nextchr];
3344 nextchr = UCHARAT(locinput);
3345 break;
a0ed51b3 3346 }
ffc61ed2 3347 if (OP(scan) == NSPACE
d6a28714 3348 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3349 sayNO;
76e3520e 3350 nextchr = UCHARAT(++locinput);
a0d0e21e 3351 break;
d6a28714 3352 case DIGITL:
a0ed51b3
LW
3353 PL_reg_flags |= RF_tainted;
3354 /* FALL THROUGH */
d6a28714 3355 case DIGIT:
9442cb0e 3356 if (!nextchr)
a0ed51b3 3357 sayNO;
1aa99e6b 3358 if (do_utf8) {
1a4fad37 3359 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3360 if (!(OP(scan) == DIGIT
bb7a0f54 3361 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3362 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3363 {
a0ed51b3 3364 sayNO;
dfe13c55 3365 }
6f06b55f 3366 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3367 nextchr = UCHARAT(locinput);
3368 break;
3369 }
ffc61ed2 3370 if (!(OP(scan) == DIGIT
9442cb0e 3371 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3372 sayNO;
3373 nextchr = UCHARAT(++locinput);
3374 break;
d6a28714 3375 case NDIGITL:
b8c5462f
JH
3376 PL_reg_flags |= RF_tainted;
3377 /* FALL THROUGH */
d6a28714 3378 case NDIGIT:
9442cb0e 3379 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3380 sayNO;
1aa99e6b 3381 if (do_utf8) {
1a4fad37 3382 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3383 if (OP(scan) == NDIGIT
bb7a0f54 3384 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3385 : isDIGIT_LC_utf8((U8*)locinput))
3386 {
a0ed51b3 3387 sayNO;
9442cb0e 3388 }
6f06b55f 3389 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3390 nextchr = UCHARAT(locinput);
3391 break;
3392 }
ffc61ed2 3393 if (OP(scan) == NDIGIT
9442cb0e 3394 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3395 sayNO;
3396 nextchr = UCHARAT(++locinput);
3397 break;
3398 case CLUMP:
b7c83a7e 3399 if (locinput >= PL_regeol)
a0ed51b3 3400 sayNO;
b7c83a7e 3401 if (do_utf8) {
1a4fad37 3402 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3403 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3404 sayNO;
3405 locinput += PL_utf8skip[nextchr];
3406 while (locinput < PL_regeol &&
3407 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3408 locinput += UTF8SKIP(locinput);
3409 if (locinput > PL_regeol)
3410 sayNO;
eb08e2da
JH
3411 }
3412 else
3413 locinput++;
a0ed51b3
LW
3414 nextchr = UCHARAT(locinput);
3415 break;
81714fb9
YO
3416
3417 case NREFFL:
3418 {
3419 char *s;
ff1157ca 3420 char type;
81714fb9
YO
3421 PL_reg_flags |= RF_tainted;
3422 /* FALL THROUGH */
3423 case NREF:
3424 case NREFF:
ff1157ca 3425 type = OP(scan);
0a4db386
YO
3426 n = reg_check_named_buff_matched(rex,scan);
3427
3428 if ( n ) {
3429 type = REF + ( type - NREF );
3430 goto do_ref;
3431 } else {
81714fb9 3432 sayNO;
0a4db386
YO
3433 }
3434 /* unreached */
c8756f30 3435 case REFFL:
3280af22 3436 PL_reg_flags |= RF_tainted;
c8756f30 3437 /* FALL THROUGH */
c277df42 3438 case REF:
81714fb9 3439 case REFF:
c277df42 3440 n = ARG(scan); /* which paren pair */
81714fb9
YO
3441 type = OP(scan);
3442 do_ref:
24d3c4a9 3443 ln = PL_regstartp[n];
2c2d71f5 3444 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3b6647e0 3445 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3446 sayNO; /* Do not match unless seen CLOSEn. */
24d3c4a9 3447 if (ln == PL_regendp[n])
a0d0e21e 3448 break;
a0ed51b3 3449
24d3c4a9 3450 s = PL_bostr + ln;
81714fb9 3451 if (do_utf8 && type != REF) { /* REF can do byte comparison */
a0ed51b3 3452 char *l = locinput;
a3b680e6 3453 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3454 /*
3455 * Note that we can't do the "other character" lookup trick as
3456 * in the 8-bit case (no pun intended) because in Unicode we
3457 * have to map both upper and title case to lower case.
3458 */
81714fb9 3459 if (type == REFF) {
a0ed51b3 3460 while (s < e) {
a3b680e6
AL
3461 STRLEN ulen1, ulen2;
3462 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3463 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3464
a0ed51b3
LW
3465 if (l >= PL_regeol)
3466 sayNO;
a2a2844f
JH
3467 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3468 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3469 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3470 sayNO;
a2a2844f
JH
3471 s += ulen1;
3472 l += ulen2;
a0ed51b3
LW
3473 }
3474 }
3475 locinput = l;
3476 nextchr = UCHARAT(locinput);
3477 break;
3478 }
3479
a0d0e21e 3480 /* Inline the first character, for speed. */
76e3520e 3481 if (UCHARAT(s) != nextchr &&
81714fb9
YO
3482 (type == REF ||
3483 (UCHARAT(s) != (type == REFF
3484 ? PL_fold : PL_fold_locale)[nextchr])))
4633a7c4 3485 sayNO;
24d3c4a9
DM
3486 ln = PL_regendp[n] - ln;
3487 if (locinput + ln > PL_regeol)
4633a7c4 3488 sayNO;
81714fb9 3489 if (ln > 1 && (type == REF
24d3c4a9 3490 ? memNE(s, locinput, ln)
81714fb9 3491 : (type == REFF
24d3c4a9
DM
3492 ? ibcmp(s, locinput, ln)
3493 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3494 sayNO;
24d3c4a9 3495 locinput += ln;
76e3520e 3496 nextchr = UCHARAT(locinput);
a0d0e21e 3497 break;
81714fb9 3498 }
a0d0e21e 3499 case NOTHING:
c277df42 3500 case TAIL:
a0d0e21e
LW
3501 break;
3502 case BACK:
3503 break;
40a82448
DM
3504
3505#undef ST
3506#define ST st->u.eval
c277df42 3507 {
c277df42 3508 SV *ret;
6bda09f9 3509 regexp *re;
f8fc2ecf 3510 regexp_internal *rei;
1a147d38
YO
3511 regnode *startpoint;
3512
3513 case GOSTART:
3514 case GOSUB: /* /(...(?1))/ */
6bda09f9 3515 if (cur_eval && cur_eval->locinput==locinput) {
24b23f37 3516 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
1a147d38 3517 Perl_croak(aTHX_ "Infinite recursion in regex");
6bda09f9 3518 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38
YO
3519 Perl_croak(aTHX_
3520 "Pattern subroutine nesting without pos change"
3521 " exceeded limit in regex");
6bda09f9
YO
3522 } else {
3523 nochange_depth = 0;
1a147d38 3524 }
6bda09f9 3525 re = rex;
f8fc2ecf 3526 rei = rexi;
6bda09f9 3527 (void)ReREFCNT_inc(rex);
1a147d38 3528 if (OP(scan)==GOSUB) {
6bda09f9
YO
3529 startpoint = scan + ARG2L(scan);
3530 ST.close_paren = ARG(scan);
3531 } else {
f8fc2ecf 3532 startpoint = rei->program+1;
6bda09f9
YO
3533 ST.close_paren = 0;
3534 }
3535 goto eval_recurse_doit;
3536 /* NOTREACHED */
3537 case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
3538 if (cur_eval && cur_eval->locinput==locinput) {
3539 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
1a147d38 3540 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
6bda09f9
YO
3541 } else {
3542 nochange_depth = 0;
3543 }
8e5e9ebe 3544 {
4aabdb9b
DM
3545 /* execute the code in the {...} */
3546 dSP;
6136c704 3547 SV ** const before = SP;
4aabdb9b
DM
3548 OP_4tree * const oop = PL_op;
3549 COP * const ocurcop = PL_curcop;
3550 PAD *old_comppad;
4aabdb9b
DM
3551
3552 n = ARG(scan);
f8fc2ecf 3553 PL_op = (OP_4tree*)rexi->data->data[n];
24b23f37
YO
3554 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3555 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f8fc2ecf 3556 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
4aabdb9b
DM
3557 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3558
8e5e9ebe
RGS
3559 CALLRUNOPS(aTHX); /* Scalar context. */
3560 SPAGAIN;
3561 if (SP == before)
075aa684 3562 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3563 else {
3564 ret = POPs;
3565 PUTBACK;
3566 }
4aabdb9b
DM
3567
3568 PL_op = oop;
3569 PAD_RESTORE_LOCAL(old_comppad);
3570 PL_curcop = ocurcop;
24d3c4a9 3571 if (!logical) {
4aabdb9b
DM
3572 /* /(?{...})/ */
3573 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3574 break;
3575 }
8e5e9ebe 3576 }
24d3c4a9
DM
3577 if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3578 logical = 0;
4aabdb9b 3579 {
4f639d21
DM
3580 /* extract RE object from returned value; compiling if
3581 * necessary */
3582
6136c704 3583 MAGIC *mg = NULL;
be8e71aa 3584 const SV *sv;
faf82a0b
AE
3585 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3586 mg = mg_find(sv, PERL_MAGIC_qr);
3587 else if (SvSMAGICAL(ret)) {
3588 if (SvGMAGICAL(ret))
3589 sv_unmagic(ret, PERL_MAGIC_qr);
3590 else
3591 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3592 }
faf82a0b 3593
0f5d15d6
IZ
3594 if (mg) {
3595 re = (regexp *)mg->mg_obj;
df0003d4 3596 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3597 }
3598 else {
3599 STRLEN len;
6136c704 3600 const char * const t = SvPV_const(ret, len);
0f5d15d6 3601 PMOP pm;
a3b680e6 3602 const I32 osize = PL_regsize;
0f5d15d6 3603
5fcd1c1b 3604 Zero(&pm, 1, PMOP);
4aabdb9b 3605 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
f9f4320a 3606 re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
9041c2e3 3607 if (!(SvFLAGS(ret)
faf82a0b
AE
3608 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3609 | SVs_GMG)))
14befaf4
DM
3610 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3611 PERL_MAGIC_qr,0,0);
0f5d15d6 3612 PL_regsize = osize;
0f5d15d6 3613 }
4aabdb9b 3614 }
f8fc2ecf 3615 rei = RXi_GET(re);
6bda09f9
YO
3616 DEBUG_EXECUTE_r(
3617 debug_start_match(re, do_utf8, locinput, PL_regeol,
3618 "Matching embedded");
3619 );
f8fc2ecf 3620 startpoint = rei->program + 1;
1a147d38 3621 ST.close_paren = 0; /* only used for GOSUB */
6bda09f9
YO
3622 /* borrowed from regtry */
3623 if (PL_reg_start_tmpl <= re->nparens) {
3624 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3625 if(PL_reg_start_tmp)
3626 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3627 else
3628 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3629 }
aa283a38 3630
1a147d38 3631 eval_recurse_doit: /* Share code with GOSUB below this line */
aa283a38 3632 /* run the pattern returned from (??{...}) */
40a82448
DM
3633 ST.cp = regcppush(0); /* Save *all* the positions. */
3634 REGCP_SET(ST.lastcp);
6bda09f9 3635
1a147d38
YO
3636 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3637 PL_regendp = re->endp; /* essentially NOOP on GOSUB */
6bda09f9 3638
4aabdb9b
DM
3639 *PL_reglastparen = 0;
3640 *PL_reglastcloseparen = 0;
4aabdb9b 3641 PL_reginput = locinput;
ae0beba1 3642 PL_regsize = 0;
4aabdb9b
DM
3643
3644 /* XXXX This is too dramatic a measure... */
3645 PL_reg_maxiter = 0;
3646
faec1544 3647 ST.toggle_reg_flags = PL_reg_flags;
bbe252da 3648 if (re->extflags & RXf_UTF8)
faec1544
DM
3649 PL_reg_flags |= RF_utf8;
3650 else
3651 PL_reg_flags &= ~RF_utf8;
3652 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3653
40a82448 3654 ST.prev_rex = rex;
faec1544 3655 ST.prev_curlyx = cur_curlyx;
aa283a38 3656 rex = re;
f8fc2ecf 3657 rexi = rei;
faec1544 3658 cur_curlyx = NULL;
40a82448 3659 ST.B = next;
faec1544
DM
3660 ST.prev_eval = cur_eval;
3661 cur_eval = st;
faec1544 3662 /* now continue from first node in postoned RE */
6bda09f9 3663 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4aabdb9b 3664 /* NOTREACHED */
a0ed51b3 3665 }
24d3c4a9
DM
3666 /* logical is 1, /(?(?{...})X|Y)/ */
3667 sw = (bool)SvTRUE(ret);
3668 logical = 0;
c277df42
IZ
3669 break;
3670 }
40a82448 3671
faec1544
DM
3672 case EVAL_AB: /* cleanup after a successful (??{A})B */
3673 /* note: this is called twice; first after popping B, then A */
3674 PL_reg_flags ^= ST.toggle_reg_flags;
40a82448
DM
3675 ReREFCNT_dec(rex);
3676 rex = ST.prev_rex;
f8fc2ecf 3677 rexi = RXi_GET(rex);
faec1544
DM
3678 regcpblow(ST.cp);
3679 cur_eval = ST.prev_eval;
3680 cur_curlyx = ST.prev_curlyx;
40a82448
DM
3681 /* XXXX This is too dramatic a measure... */
3682 PL_reg_maxiter = 0;
262b90c4 3683 sayYES;
40a82448 3684
40a82448 3685
faec1544
DM
3686 case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3687 /* note: this is called twice; first after popping B, then A */
3688 PL_reg_flags ^= ST.toggle_reg_flags;
3689 ReREFCNT_dec(rex);
3690 rex = ST.prev_rex;
f8fc2ecf 3691 rexi = RXi_GET(rex);
40a82448
DM
3692 PL_reginput = locinput;
3693 REGCP_UNWIND(ST.lastcp);
3694 regcppop(rex);
faec1544
DM
3695 cur_eval = ST.prev_eval;
3696 cur_curlyx = ST.prev_curlyx;
3697 /* XXXX This is too dramatic a measure... */
3698 PL_reg_maxiter = 0;
40a82448 3699 sayNO_SILENT;
40a82448
DM
3700#undef ST
3701
a0d0e21e 3702 case OPEN:
c277df42 3703 n = ARG(scan); /* which paren pair */
3280af22
NIS
3704 PL_reg_start_tmp[n] = locinput;
3705 if (n > PL_regsize)
3706 PL_regsize = n;
e2e6a0f1 3707 lastopen = n;
a0d0e21e
LW
3708 break;
3709 case CLOSE:
c277df42 3710 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3711 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3712 PL_regendp[n] = locinput - PL_bostr;
7f69552c
YO
3713 /*if (n > PL_regsize)
3714 PL_regsize = n;*/
3b6647e0 3715 if (n > *PL_reglastparen)
3280af22 3716 *PL_reglastparen = n;
a01268b5 3717 *PL_reglastcloseparen = n;
3b6647e0 3718 if (cur_eval && cur_eval->u.eval.close_paren == n) {
6bda09f9
YO
3719 goto fake_end;
3720 }
a0d0e21e 3721 break;
e2e6a0f1
YO
3722 case ACCEPT:
3723 if (ARG(scan)){
3724 regnode *cursor;
3725 for (cursor=scan;
3726 cursor && OP(cursor)!=END;
3727 cursor=regnext(cursor))
3728 {
3729 if ( OP(cursor)==CLOSE ){
3730 n = ARG(cursor);
3731 if ( n <= lastopen ) {
3732 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3733 PL_regendp[n] = locinput - PL_bostr;
3734 /*if (n > PL_regsize)
3735 PL_regsize = n;*/
3b6647e0 3736 if (n > *PL_reglastparen)
e2e6a0f1
YO
3737 *PL_reglastparen = n;
3738 *PL_reglastcloseparen = n;
3b6647e0
RB
3739 if ( n == ARG(scan) || (cur_eval &&
3740 cur_eval->u.eval.close_paren == n))
e2e6a0f1
YO
3741 break;
3742 }
3743 }
3744 }
3745 }
3746 goto fake_end;
3747 /*NOTREACHED*/
c277df42
IZ
3748 case GROUPP:
3749 n = ARG(scan); /* which paren pair */
3b6647e0 3750 sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 3751 break;
0a4db386
YO
3752 case NGROUPP:
3753 /* reg_check_named_buff_matched returns 0 for no match */
3754 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3755 break;
1a147d38 3756 case INSUBP:
0a4db386 3757 n = ARG(scan);
3b6647e0 3758 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
0a4db386
YO
3759 break;
3760 case DEFINEP:
3761 sw = 0;
3762 break;
c277df42 3763 case IFTHEN:
2c2d71f5 3764 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
24d3c4a9 3765 if (sw)
c277df42
IZ
3766 next = NEXTOPER(NEXTOPER(scan));
3767 else {
3768 next = scan + ARG(scan);
3769 if (OP(next) == IFTHEN) /* Fake one. */
3770 next = NEXTOPER(NEXTOPER(next));
3771 }
3772 break;
3773 case LOGICAL:
24d3c4a9 3774 logical = scan->flags;
c277df42 3775 break;
c476f425 3776
2ab05381 3777/*******************************************************************
2ab05381 3778
c476f425
DM
3779The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3780pattern, where A and B are subpatterns. (For simple A, CURLYM or
3781STAR/PLUS/CURLY/CURLYN are used instead.)
2ab05381 3782
c476f425 3783A*B is compiled as <CURLYX><A><WHILEM><B>
2ab05381 3784
c476f425
DM
3785On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3786state, which contains the current count, initialised to -1. It also sets
3787cur_curlyx to point to this state, with any previous value saved in the
3788state block.
2ab05381 3789
c476f425
DM
3790CURLYX then jumps straight to the WHILEM op, rather than executing A,
3791since the pattern may possibly match zero times (i.e. it's a while {} loop
3792rather than a do {} while loop).
2ab05381 3793
c476f425
DM
3794Each entry to WHILEM represents a successful match of A. The count in the
3795CURLYX block is incremented, another WHILEM state is pushed, and execution
3796passes to A or B depending on greediness and the current count.
2ab05381 3797
c476f425
DM
3798For example, if matching against the string a1a2a3b (where the aN are
3799substrings that match /A/), then the match progresses as follows: (the
3800pushed states are interspersed with the bits of strings matched so far):
2ab05381 3801
c476f425
DM
3802 <CURLYX cnt=-1>
3803 <CURLYX cnt=0><WHILEM>
3804 <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3805 <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3806 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3807 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
2ab05381 3808
c476f425
DM
3809(Contrast this with something like CURLYM, which maintains only a single
3810backtrack state:
2ab05381 3811
c476f425
DM
3812 <CURLYM cnt=0> a1
3813 a1 <CURLYM cnt=1> a2
3814 a1 a2 <CURLYM cnt=2> a3
3815 a1 a2 a3 <CURLYM cnt=3> b
3816)
2ab05381 3817
c476f425
DM
3818Each WHILEM state block marks a point to backtrack to upon partial failure
3819of A or B, and also contains some minor state data related to that
3820iteration. The CURLYX block, pointed to by cur_curlyx, contains the
3821overall state, such as the count, and pointers to the A and B ops.
2ab05381 3822
c476f425
DM
3823This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3824must always point to the *current* CURLYX block, the rules are:
2ab05381 3825
c476f425
DM
3826When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3827and set cur_curlyx to point the new block.
2ab05381 3828
c476f425
DM
3829When popping the CURLYX block after a successful or unsuccessful match,
3830restore the previous cur_curlyx.
2ab05381 3831
c476f425
DM
3832When WHILEM is about to execute B, save the current cur_curlyx, and set it
3833to the outer one saved in the CURLYX block.
2ab05381 3834
c476f425
DM
3835When popping the WHILEM block after a successful or unsuccessful B match,
3836restore the previous cur_curlyx.
2ab05381 3837
c476f425
DM
3838Here's an example for the pattern (AI* BI)*BO
3839I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
2ab05381 3840
c476f425
DM
3841cur_
3842curlyx backtrack stack
3843------ ---------------
3844NULL
3845CO <CO prev=NULL> <WO>
3846CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3847CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3848NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
2ab05381 3849
c476f425
DM
3850At this point the pattern succeeds, and we work back down the stack to
3851clean up, restoring as we go:
95b24440 3852
c476f425
DM
3853CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3854CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3855CO <CO prev=NULL> <WO>
3856NULL
a0374537 3857
c476f425
DM
3858*******************************************************************/
3859
3860#define ST st->u.curlyx
3861
3862 case CURLYX: /* start of /A*B/ (for complex A) */
3863 {
3864 /* No need to save/restore up to this paren */
3865 I32 parenfloor = scan->flags;
3866
3867 assert(next); /* keep Coverity happy */
3868 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3869 next += ARG(next);
3870
3871 /* XXXX Probably it is better to teach regpush to support
3872 parenfloor > PL_regsize... */
3873 if (parenfloor > (I32)*PL_reglastparen)
3874 parenfloor = *PL_reglastparen; /* Pessimization... */
3875
3876 ST.prev_curlyx= cur_curlyx;
3877 cur_curlyx = st;
3878 ST.cp = PL_savestack_ix;
3879
3880 /* these fields contain the state of the current curly.
3881 * they are accessed by subsequent WHILEMs */
3882 ST.parenfloor = parenfloor;
3883 ST.min = ARG1(scan);
3884 ST.max = ARG2(scan);
3885 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3886 ST.B = next;
24d3c4a9
DM
3887 ST.minmod = minmod;
3888 minmod = 0;
c476f425
DM
3889 ST.count = -1; /* this will be updated by WHILEM */
3890 ST.lastloc = NULL; /* this will be updated by WHILEM */
3891
3892 PL_reginput = locinput;
3893 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
5f66b61c 3894 /* NOTREACHED */
c476f425 3895 }
a0d0e21e 3896
c476f425 3897 case CURLYX_end: /* just finished matching all of A*B */
f0852a51
YO
3898 if (PL_reg_eval_set){
3899 SV *pres= GvSV(PL_replgv);
3900 SvREFCNT_inc(pres);
3901 regcpblow(ST.cp);
3902 sv_setsv(GvSV(PL_replgv), pres);
3903 SvREFCNT_dec(pres);
3904 } else {
3905 regcpblow(ST.cp);
3906 }
c476f425
DM
3907 cur_curlyx = ST.prev_curlyx;
3908 sayYES;
3909 /* NOTREACHED */
a0d0e21e 3910
c476f425
DM
3911 case CURLYX_end_fail: /* just failed to match all of A*B */
3912 regcpblow(ST.cp);
3913 cur_curlyx = ST.prev_curlyx;
3914 sayNO;
3915 /* NOTREACHED */
4633a7c4 3916
a0d0e21e 3917
c476f425
DM
3918#undef ST
3919#define ST st->u.whilem
3920
3921 case WHILEM: /* just matched an A in /A*B/ (for complex A) */
3922 {
3923 /* see the discussion above about CURLYX/WHILEM */
c476f425
DM
3924 I32 n;
3925 assert(cur_curlyx); /* keep Coverity happy */
3926 n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3927 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3928 ST.cache_offset = 0;
3929 ST.cache_mask = 0;
3930
3931 PL_reginput = locinput;
3932
3933 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3934 "%*s whilem: matched %ld out of %ld..%ld\n",
3935 REPORT_CODE_OFF+depth*2, "", (long)n,
3936 (long)cur_curlyx->u.curlyx.min,
3937 (long)cur_curlyx->u.curlyx.max)
3938 );
a0d0e21e 3939
c476f425 3940 /* First just match a string of min A's. */
a0d0e21e 3941
c476f425
DM
3942 if (n < cur_curlyx->u.curlyx.min) {
3943 cur_curlyx->u.curlyx.lastloc = locinput;
3944 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
3945 /* NOTREACHED */
3946 }
3947
3948 /* If degenerate A matches "", assume A done. */
3949
3950 if (locinput == cur_curlyx->u.curlyx.lastloc) {
3951 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3952 "%*s whilem: empty match detected, trying continuation...\n",
3953 REPORT_CODE_OFF+depth*2, "")
3954 );
3955 goto do_whilem_B_max;
3956 }
3957
3958 /* super-linear cache processing */
3959
3960 if (scan->flags) {
a0d0e21e 3961
2c2d71f5 3962 if (!PL_reg_maxiter) {
c476f425
DM
3963 /* start the countdown: Postpone detection until we
3964 * know the match is not *that* much linear. */
2c2d71f5 3965 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
3966 /* possible overflow for long strings and many CURLYX's */
3967 if (PL_reg_maxiter < 0)
3968 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
3969 PL_reg_leftiter = PL_reg_maxiter;
3970 }
c476f425 3971
2c2d71f5 3972 if (PL_reg_leftiter-- == 0) {
c476f425 3973 /* initialise cache */
3298f257 3974 const I32 size = (PL_reg_maxiter + 7)/8;
2c2d71f5 3975 if (PL_reg_poscache) {
eb160463 3976 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3977 Renew(PL_reg_poscache, size, char);
3978 PL_reg_poscache_size = size;
3979 }
3980 Zero(PL_reg_poscache, size, char);
3981 }
3982 else {
3983 PL_reg_poscache_size = size;
a02a5408 3984 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3985 }
c476f425
DM
3986 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3987 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
3988 PL_colors[4], PL_colors[5])
3989 );
2c2d71f5 3990 }
c476f425 3991
2c2d71f5 3992 if (PL_reg_leftiter < 0) {
c476f425
DM
3993 /* have we already failed at this position? */
3994 I32 offset, mask;
3995 offset = (scan->flags & 0xf) - 1
3996 + (locinput - PL_bostr) * (scan->flags>>4);
3997 mask = 1 << (offset % 8);
3998 offset /= 8;
3999 if (PL_reg_poscache[offset] & mask) {
4000 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4001 "%*s whilem: (cache) already tried at this position...\n",
4002 REPORT_CODE_OFF+depth*2, "")
2c2d71f5 4003 );
3298f257 4004 sayNO; /* cache records failure */
2c2d71f5 4005 }
c476f425
DM
4006 ST.cache_offset = offset;
4007 ST.cache_mask = mask;
2c2d71f5 4008 }
c476f425 4009 }
2c2d71f5 4010
c476f425 4011 /* Prefer B over A for minimal matching. */
a687059c 4012
c476f425
DM
4013 if (cur_curlyx->u.curlyx.minmod) {
4014 ST.save_curlyx = cur_curlyx;
4015 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4016 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4017 REGCP_SET(ST.lastcp);
4018 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4019 /* NOTREACHED */
4020 }
a0d0e21e 4021
c476f425
DM
4022 /* Prefer A over B for maximal matching. */
4023
4024 if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4025 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4026 cur_curlyx->u.curlyx.lastloc = locinput;
4027 REGCP_SET(ST.lastcp);
4028 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4029 /* NOTREACHED */
4030 }
4031 goto do_whilem_B_max;
4032 }
4033 /* NOTREACHED */
4034
4035 case WHILEM_B_min: /* just matched B in a minimal match */
4036 case WHILEM_B_max: /* just matched B in a maximal match */
4037 cur_curlyx = ST.save_curlyx;
4038 sayYES;
4039 /* NOTREACHED */
4040
4041 case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4042 cur_curlyx = ST.save_curlyx;
4043 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4044 cur_curlyx->u.curlyx.count--;
4045 CACHEsayNO;
4046 /* NOTREACHED */
4047
4048 case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4049 REGCP_UNWIND(ST.lastcp);
4050 regcppop(rex);
4051 /* FALL THROUGH */
4052 case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4053 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4054 cur_curlyx->u.curlyx.count--;
4055 CACHEsayNO;
4056 /* NOTREACHED */
4057
4058 case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4059 REGCP_UNWIND(ST.lastcp);
4060 regcppop(rex); /* Restore some previous $<digit>s? */
4061 PL_reginput = locinput;
4062 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4063 "%*s whilem: failed, trying continuation...\n",
4064 REPORT_CODE_OFF+depth*2, "")
4065 );
4066 do_whilem_B_max:
4067 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4068 && ckWARN(WARN_REGEXP)
4069 && !(PL_reg_flags & RF_warned))
4070 {
4071 PL_reg_flags |= RF_warned;
4072 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4073 "Complex regular subexpression recursion",
4074 REG_INFTY - 1);
4075 }
4076
4077 /* now try B */
4078 ST.save_curlyx = cur_curlyx;
4079 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4080 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4081 /* NOTREACHED */
4082
4083 case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4084 cur_curlyx = ST.save_curlyx;
4085 REGCP_UNWIND(ST.lastcp);
4086 regcppop(rex);
4087
4088 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4089 /* Maximum greed exceeded */
4090 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4091 && ckWARN(WARN_REGEXP)
4092 && !(PL_reg_flags & RF_warned))
4093 {
3280af22 4094 PL_reg_flags |= RF_warned;
c476f425
DM
4095 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4096 "%s limit (%d) exceeded",
4097 "Complex regular subexpression recursion",
4098 REG_INFTY - 1);
a0d0e21e 4099 }
c476f425 4100 cur_curlyx->u.curlyx.count--;
3ab3c9b4 4101 CACHEsayNO;
a0d0e21e 4102 }
c476f425
DM
4103
4104 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4105 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4106 );
4107 /* Try grabbing another A and see if it helps. */
4108 PL_reginput = locinput;
4109 cur_curlyx->u.curlyx.lastloc = locinput;
4110 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4111 REGCP_SET(ST.lastcp);
4112 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
5f66b61c 4113 /* NOTREACHED */
40a82448
DM
4114
4115#undef ST
4116#define ST st->u.branch
4117
4118 case BRANCHJ: /* /(...|A|...)/ with long next pointer */
c277df42
IZ
4119 next = scan + ARG(scan);
4120 if (next == scan)
4121 next = NULL;
40a82448
DM
4122 scan = NEXTOPER(scan);
4123 /* FALL THROUGH */
c277df42 4124
40a82448
DM
4125 case BRANCH: /* /(...|A|...)/ */
4126 scan = NEXTOPER(scan); /* scan now points to inner node */
5d458dd8
YO
4127 if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4128 && !has_cutgroup)
4129 {
40a82448
DM
4130 /* last branch; skip state push and jump direct to node */
4131 continue;
5d458dd8 4132 }
40a82448
DM
4133 ST.lastparen = *PL_reglastparen;
4134 ST.next_branch = next;
4135 REGCP_SET(ST.cp);
4136 PL_reginput = locinput;
02db2b7b 4137
40a82448 4138 /* Now go into the branch */
5d458dd8
YO
4139 if (has_cutgroup) {
4140 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4141 } else {
4142 PUSH_STATE_GOTO(BRANCH_next, scan);
4143 }
40a82448 4144 /* NOTREACHED */
5d458dd8
YO
4145 case CUTGROUP:
4146 PL_reginput = locinput;
4147 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
f8fc2ecf 4148 (SV*)rexi->data->data[ ARG( scan ) ];
5d458dd8
YO
4149 PUSH_STATE_GOTO(CUTGROUP_next,next);
4150 /* NOTREACHED */
4151 case CUTGROUP_next_fail:
4152 do_cutgroup = 1;
4153 no_final = 1;
4154 if (st->u.mark.mark_name)
4155 sv_commit = st->u.mark.mark_name;
4156 sayNO;
4157 /* NOTREACHED */
4158 case BRANCH_next:
4159 sayYES;
4160 /* NOTREACHED */
40a82448 4161 case BRANCH_next_fail: /* that branch failed; try the next, if any */
5d458dd8
YO
4162 if (do_cutgroup) {
4163 do_cutgroup = 0;
4164 no_final = 0;
4165 }
40a82448
DM
4166 REGCP_UNWIND(ST.cp);
4167 for (n = *PL_reglastparen; n > ST.lastparen; n--)
4168 PL_regendp[n] = -1;
4169 *PL_reglastparen = n;
0a4db386 4170 /*dmq: *PL_reglastcloseparen = n; */
40a82448
DM
4171 scan = ST.next_branch;
4172 /* no more branches? */
5d458dd8
YO
4173 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4174 DEBUG_EXECUTE_r({
4175 PerlIO_printf( Perl_debug_log,
4176 "%*s %sBRANCH failed...%s\n",
4177 REPORT_CODE_OFF+depth*2, "",
4178 PL_colors[4],
4179 PL_colors[5] );
4180 });
4181 sayNO_SILENT;
4182 }
40a82448
DM
4183 continue; /* execute next BRANCH[J] op */
4184 /* NOTREACHED */
4185
a0d0e21e 4186 case MINMOD:
24d3c4a9 4187 minmod = 1;
a0d0e21e 4188 break;
40a82448
DM
4189
4190#undef ST
4191#define ST st->u.curlym
4192
4193 case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4194
4195 /* This is an optimisation of CURLYX that enables us to push
4196 * only a single backtracking state, no matter now many matches
4197 * there are in {m,n}. It relies on the pattern being constant
4198 * length, with no parens to influence future backrefs
4199 */
4200
4201 ST.me = scan;
dc45a647 4202 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
40a82448
DM
4203
4204 /* if paren positive, emulate an OPEN/CLOSE around A */
4205 if (ST.me->flags) {
3b6647e0 4206 U32 paren = ST.me->flags;
40a82448
DM
4207 if (paren > PL_regsize)
4208 PL_regsize = paren;
3b6647e0 4209 if (paren > *PL_reglastparen)
40a82448 4210 *PL_reglastparen = paren;
c277df42 4211 scan += NEXT_OFF(scan); /* Skip former OPEN. */
6407bf3b 4212 }
40a82448
DM
4213 ST.A = scan;
4214 ST.B = next;
4215 ST.alen = 0;
4216 ST.count = 0;
24d3c4a9
DM
4217 ST.minmod = minmod;
4218 minmod = 0;
40a82448
DM
4219 ST.c1 = CHRTEST_UNINIT;
4220 REGCP_SET(ST.cp);
6407bf3b 4221
40a82448
DM
4222 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4223 goto curlym_do_B;
4224
4225 curlym_do_A: /* execute the A in /A{m,n}B/ */
6407bf3b 4226 PL_reginput = locinput;
40a82448
DM
4227 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4228 /* NOTREACHED */
5f80c4cf 4229
40a82448
DM
4230 case CURLYM_A: /* we've just matched an A */
4231 locinput = st->locinput;
4232 nextchr = UCHARAT(locinput);
4233
4234 ST.count++;
4235 /* after first match, determine A's length: u.curlym.alen */
4236 if (ST.count == 1) {
4237 if (PL_reg_match_utf8) {
4238 char *s = locinput;
4239 while (s < PL_reginput) {
4240 ST.alen++;
4241 s += UTF8SKIP(s);
4242 }
4243 }
4244 else {
4245 ST.alen = PL_reginput - locinput;
4246 }
4247 if (ST.alen == 0)
4248 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4249 }
0cadcf80
DM
4250 DEBUG_EXECUTE_r(
4251 PerlIO_printf(Perl_debug_log,
40a82448 4252 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
5bc10b2c 4253 (int)(REPORT_CODE_OFF+(depth*2)), "",
40a82448 4254 (IV) ST.count, (IV)ST.alen)
0cadcf80
DM
4255 );
4256
40a82448 4257 locinput = PL_reginput;
0a4db386
YO
4258
4259 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4260 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4261 goto fake_end;
4262
4263 if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
40a82448
DM
4264 goto curlym_do_A; /* try to match another A */
4265 goto curlym_do_B; /* try to match B */
5f80c4cf 4266
40a82448
DM
4267 case CURLYM_A_fail: /* just failed to match an A */
4268 REGCP_UNWIND(ST.cp);
0a4db386
YO
4269
4270 if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4271 || (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4272 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
40a82448 4273 sayNO;
0cadcf80 4274
40a82448
DM
4275 curlym_do_B: /* execute the B in /A{m,n}B/ */
4276 PL_reginput = locinput;
4277 if (ST.c1 == CHRTEST_UNINIT) {
4278 /* calculate c1 and c2 for possible match of 1st char
4279 * following curly */
4280 ST.c1 = ST.c2 = CHRTEST_VOID;
4281 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4282 regnode *text_node = ST.B;
4283 if (! HAS_TEXT(text_node))
4284 FIND_NEXT_IMPT(text_node);
4285 if (HAS_TEXT(text_node)
4286 && PL_regkind[OP(text_node)] != REF)
4287 {
4288 ST.c1 = (U8)*STRING(text_node);
4289 ST.c2 =
4290 (OP(text_node) == EXACTF || OP(text_node) == REFF)
4291 ? PL_fold[ST.c1]
4292 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4293 ? PL_fold_locale[ST.c1]
4294 : ST.c1;
c277df42 4295 }
c277df42 4296 }
40a82448
DM
4297 }
4298
4299 DEBUG_EXECUTE_r(
4300 PerlIO_printf(Perl_debug_log,
4301 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
5bc10b2c 4302 (int)(REPORT_CODE_OFF+(depth*2)),
40a82448
DM
4303 "", (IV)ST.count)
4304 );
4305 if (ST.c1 != CHRTEST_VOID
4306 && UCHARAT(PL_reginput) != ST.c1
4307 && UCHARAT(PL_reginput) != ST.c2)
4308 {
4309 /* simulate B failing */
4310 state_num = CURLYM_B_fail;
4311 goto reenter_switch;
4312 }
4313
4314 if (ST.me->flags) {
4315 /* mark current A as captured */
4316 I32 paren = ST.me->flags;
4317 if (ST.count) {
4318 PL_regstartp[paren]
4319 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4320 PL_regendp[paren] = PL_reginput - PL_bostr;
0a4db386 4321 /*dmq: *PL_reglastcloseparen = paren; */
c277df42 4322 }
40a82448
DM
4323 else
4324 PL_regendp[paren] = -1;
0a4db386 4325 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4326 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4327 {
4328 if (ST.count)
4329 goto fake_end;
4330 else
4331 sayNO;
4332 }
c277df42 4333 }
0a4db386 4334
40a82448 4335 PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
5f66b61c 4336 /* NOTREACHED */
40a82448
DM
4337
4338 case CURLYM_B_fail: /* just failed to match a B */
4339 REGCP_UNWIND(ST.cp);
4340 if (ST.minmod) {
4341 if (ST.count == ARG2(ST.me) /* max */)
4342 sayNO;
4343 goto curlym_do_A; /* try to match a further A */
4344 }
4345 /* backtrack one A */
4346 if (ST.count == ARG1(ST.me) /* min */)
4347 sayNO;
4348 ST.count--;
4349 locinput = HOPc(locinput, -ST.alen);
4350 goto curlym_do_B; /* try to match B */
4351
c255a977
DM
4352#undef ST
4353#define ST st->u.curly
40a82448 4354
c255a977
DM
4355#define CURLY_SETPAREN(paren, success) \
4356 if (paren) { \
4357 if (success) { \
4358 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4359 PL_regendp[paren] = locinput - PL_bostr; \
0a4db386 4360 *PL_reglastcloseparen = paren; \
c255a977
DM
4361 } \
4362 else \
4363 PL_regendp[paren] = -1; \
4364 }
4365
4366 case STAR: /* /A*B/ where A is width 1 */
4367 ST.paren = 0;
4368 ST.min = 0;
4369 ST.max = REG_INFTY;
a0d0e21e
LW
4370 scan = NEXTOPER(scan);
4371 goto repeat;
c255a977
DM
4372 case PLUS: /* /A+B/ where A is width 1 */
4373 ST.paren = 0;
4374 ST.min = 1;
4375 ST.max = REG_INFTY;
c277df42 4376 scan = NEXTOPER(scan);
c255a977
DM
4377 goto repeat;
4378 case CURLYN: /* /(A){m,n}B/ where A is width 1 */
4379 ST.paren = scan->flags; /* Which paren to set */
4380 if (ST.paren > PL_regsize)
4381 PL_regsize = ST.paren;
3b6647e0 4382 if (ST.paren > *PL_reglastparen)
c255a977
DM
4383 *PL_reglastparen = ST.paren;
4384 ST.min = ARG1(scan); /* min to match */
4385 ST.max = ARG2(scan); /* max to match */
0a4db386 4386 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4387 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4388 ST.min=1;
4389 ST.max=1;
4390 }
c255a977
DM
4391 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4392 goto repeat;
4393 case CURLY: /* /A{m,n}B/ where A is width 1 */
4394 ST.paren = 0;
4395 ST.min = ARG1(scan); /* min to match */
4396 ST.max = ARG2(scan); /* max to match */
4397 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 4398 repeat:
a0d0e21e
LW
4399 /*
4400 * Lookahead to avoid useless match attempts
4401 * when we know what character comes next.
c255a977 4402 *
5f80c4cf
JP
4403 * Used to only do .*x and .*?x, but now it allows
4404 * for )'s, ('s and (?{ ... })'s to be in the way
4405 * of the quantifier and the EXACT-like node. -- japhy
4406 */
4407
c255a977
DM
4408 if (ST.min > ST.max) /* XXX make this a compile-time check? */
4409 sayNO;
cca55fe3 4410 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4411 U8 *s;
4412 regnode *text_node = next;
4413
3dab1dad
YO
4414 if (! HAS_TEXT(text_node))
4415 FIND_NEXT_IMPT(text_node);
5f80c4cf 4416
9e137952 4417 if (! HAS_TEXT(text_node))
c255a977 4418 ST.c1 = ST.c2 = CHRTEST_VOID;
5f80c4cf 4419 else {
3dab1dad 4420 if (PL_regkind[OP(text_node)] == REF) {
c255a977 4421 ST.c1 = ST.c2 = CHRTEST_VOID;
44a68960 4422 goto assume_ok_easy;
cca55fe3 4423 }
be8e71aa
YO
4424 else
4425 s = (U8*)STRING(text_node);
5f80c4cf
JP
4426
4427 if (!UTF) {
c255a977 4428 ST.c2 = ST.c1 = *s;
f65d3ee7 4429 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c255a977 4430 ST.c2 = PL_fold[ST.c1];
f65d3ee7 4431 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c255a977 4432 ST.c2 = PL_fold_locale[ST.c1];
1aa99e6b 4433 }
5f80c4cf 4434 else { /* UTF */
f65d3ee7 4435 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4436 STRLEN ulen1, ulen2;
89ebb4a3
JH
4437 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4438 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4439
4440 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4441 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
e294cc5d
JH
4442#ifdef EBCDIC
4443 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4444 ckWARN(WARN_UTF8) ?
4445 0 : UTF8_ALLOW_ANY);
4446 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4447 ckWARN(WARN_UTF8) ?
4448 0 : UTF8_ALLOW_ANY);
4449#else
c255a977 4450 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
e294cc5d 4451 uniflags);
c255a977 4452 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
e294cc5d
JH
4453 uniflags);
4454#endif
5f80c4cf
JP
4455 }
4456 else {
c255a977 4457 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4458 uniflags);
5f80c4cf 4459 }
1aa99e6b
IH
4460 }
4461 }
bbce6d69 4462 }
a0d0e21e 4463 else
c255a977 4464 ST.c1 = ST.c2 = CHRTEST_VOID;
cca55fe3 4465 assume_ok_easy:
c255a977
DM
4466
4467 ST.A = scan;
4468 ST.B = next;
3280af22 4469 PL_reginput = locinput;
24d3c4a9
DM
4470 if (minmod) {
4471 minmod = 0;
e2e6a0f1 4472 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4633a7c4 4473 sayNO;
c255a977 4474 ST.count = ST.min;
a0ed51b3 4475 locinput = PL_reginput;
c255a977
DM
4476 REGCP_SET(ST.cp);
4477 if (ST.c1 == CHRTEST_VOID)
4478 goto curly_try_B_min;
4479
4480 ST.oldloc = locinput;
4481
4482 /* set ST.maxpos to the furthest point along the
4483 * string that could possibly match */
4484 if (ST.max == REG_INFTY) {
4485 ST.maxpos = PL_regeol - 1;
4486 if (do_utf8)
4487 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4488 ST.maxpos--;
4489 }
4490 else if (do_utf8) {
4491 int m = ST.max - ST.min;
4492 for (ST.maxpos = locinput;
4493 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4494 ST.maxpos += UTF8SKIP(ST.maxpos);
4495 }
4496 else {
4497 ST.maxpos = locinput + ST.max - ST.min;
4498 if (ST.maxpos >= PL_regeol)
4499 ST.maxpos = PL_regeol - 1;
4500 }
4501 goto curly_try_B_min_known;
4502
4503 }
4504 else {
e2e6a0f1 4505 ST.count = regrepeat(rex, ST.A, ST.max, depth);
c255a977
DM
4506 locinput = PL_reginput;
4507 if (ST.count < ST.min)
4508 sayNO;
4509 if ((ST.count > ST.min)
4510 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4511 {
4512 /* A{m,n} must come at the end of the string, there's
4513 * no point in backing off ... */
4514 ST.min = ST.count;
4515 /* ...except that $ and \Z can match before *and* after
4516 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4517 We may back off by one in this case. */
4518 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4519 ST.min--;
4520 }
4521 REGCP_SET(ST.cp);
4522 goto curly_try_B_max;
4523 }
4524 /* NOTREACHED */
4525
4526
4527 case CURLY_B_min_known_fail:
4528 /* failed to find B in a non-greedy match where c1,c2 valid */
4529 if (ST.paren && ST.count)
4530 PL_regendp[ST.paren] = -1;
4531
4532 PL_reginput = locinput; /* Could be reset... */
4533 REGCP_UNWIND(ST.cp);
4534 /* Couldn't or didn't -- move forward. */
4535 ST.oldloc = locinput;
4536 if (do_utf8)
4537 locinput += UTF8SKIP(locinput);
4538 else
4539 locinput++;
4540 ST.count++;
4541 curly_try_B_min_known:
4542 /* find the next place where 'B' could work, then call B */
4543 {
4544 int n;
4545 if (do_utf8) {
4546 n = (ST.oldloc == locinput) ? 0 : 1;
4547 if (ST.c1 == ST.c2) {
4548 STRLEN len;
4549 /* set n to utf8_distance(oldloc, locinput) */
4550 while (locinput <= ST.maxpos &&
4551 utf8n_to_uvchr((U8*)locinput,
4552 UTF8_MAXBYTES, &len,
4553 uniflags) != (UV)ST.c1) {
4554 locinput += len;
4555 n++;
4556 }
1aa99e6b
IH
4557 }
4558 else {
c255a977
DM
4559 /* set n to utf8_distance(oldloc, locinput) */
4560 while (locinput <= ST.maxpos) {
4561 STRLEN len;
4562 const UV c = utf8n_to_uvchr((U8*)locinput,
4563 UTF8_MAXBYTES, &len,
4564 uniflags);
4565 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4566 break;
4567 locinput += len;
4568 n++;
1aa99e6b 4569 }
0fe9bf95
IZ
4570 }
4571 }
c255a977
DM
4572 else {
4573 if (ST.c1 == ST.c2) {
4574 while (locinput <= ST.maxpos &&
4575 UCHARAT(locinput) != ST.c1)
4576 locinput++;
bbce6d69 4577 }
c255a977
DM
4578 else {
4579 while (locinput <= ST.maxpos
4580 && UCHARAT(locinput) != ST.c1
4581 && UCHARAT(locinput) != ST.c2)
4582 locinput++;
a0ed51b3 4583 }
c255a977
DM
4584 n = locinput - ST.oldloc;
4585 }
4586 if (locinput > ST.maxpos)
4587 sayNO;
4588 /* PL_reginput == oldloc now */
4589 if (n) {
4590 ST.count += n;
e2e6a0f1 4591 if (regrepeat(rex, ST.A, n, depth) < n)
4633a7c4 4592 sayNO;
a0d0e21e 4593 }
c255a977
DM
4594 PL_reginput = locinput;
4595 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4596 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4597 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4598 goto fake_end;
4599 }
c255a977 4600 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
a0d0e21e 4601 }
c255a977
DM
4602 /* NOTREACHED */
4603
4604
4605 case CURLY_B_min_fail:
4606 /* failed to find B in a non-greedy match where c1,c2 invalid */
4607 if (ST.paren && ST.count)
4608 PL_regendp[ST.paren] = -1;
4609
4610 REGCP_UNWIND(ST.cp);
4611 /* failed -- move forward one */
4612 PL_reginput = locinput;
e2e6a0f1 4613 if (regrepeat(rex, ST.A, 1, depth)) {
c255a977 4614 ST.count++;
a0ed51b3 4615 locinput = PL_reginput;
c255a977
DM
4616 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4617 ST.count > 0)) /* count overflow ? */
15272685 4618 {
c255a977
DM
4619 curly_try_B_min:
4620 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4621 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4622 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4623 goto fake_end;
4624 }
c255a977 4625 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
a0d0e21e
LW
4626 }
4627 }
4633a7c4 4628 sayNO;
c255a977
DM
4629 /* NOTREACHED */
4630
4631
4632 curly_try_B_max:
4633 /* a successful greedy match: now try to match B */
40d049e4 4634 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4635 cur_eval->u.eval.close_paren == (U32)ST.paren) {
40d049e4
YO
4636 goto fake_end;
4637 }
c255a977
DM
4638 {
4639 UV c = 0;
4640 if (ST.c1 != CHRTEST_VOID)
4641 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4642 UTF8_MAXBYTES, 0, uniflags)
466787eb 4643 : (UV) UCHARAT(PL_reginput);
c255a977
DM
4644 /* If it could work, try it. */
4645 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4646 CURLY_SETPAREN(ST.paren, ST.count);
4647 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4648 /* NOTREACHED */
4649 }
4650 }
4651 /* FALL THROUGH */
4652 case CURLY_B_max_fail:
4653 /* failed to find B in a greedy match */
4654 if (ST.paren && ST.count)
4655 PL_regendp[ST.paren] = -1;
4656
4657 REGCP_UNWIND(ST.cp);
4658 /* back up. */
4659 if (--ST.count < ST.min)
4660 sayNO;
4661 PL_reginput = locinput = HOPc(locinput, -1);
4662 goto curly_try_B_max;
4663
4664#undef ST
4665
a0d0e21e 4666 case END:
6bda09f9 4667 fake_end:
faec1544
DM
4668 if (cur_eval) {
4669 /* we've just finished A in /(??{A})B/; now continue with B */
4670 I32 tmpix;
4671
4672
4673 st->u.eval.toggle_reg_flags
4674 = cur_eval->u.eval.toggle_reg_flags;
4675 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4676
4677 st->u.eval.prev_rex = rex; /* inner */
f8fc2ecf
YO
4678 rex = cur_eval->u.eval.prev_rex; /* outer */
4679 rexi = RXi_GET(rex);
faec1544
DM
4680 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4681 ReREFCNT_inc(rex);
4682 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4683 REGCP_SET(st->u.eval.lastcp);
4684 PL_reginput = locinput;
4685
4686 /* Restore parens of the outer rex without popping the
4687 * savestack */
4688 tmpix = PL_savestack_ix;
4689 PL_savestack_ix = cur_eval->u.eval.lastcp;
4690 regcppop(rex);
4691 PL_savestack_ix = tmpix;
4692
4693 st->u.eval.prev_eval = cur_eval;
4694 cur_eval = cur_eval->u.eval.prev_eval;
4695 DEBUG_EXECUTE_r(
2a49f0f5
JH
4696 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4697 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
faec1544
DM
4698 PUSH_YES_STATE_GOTO(EVAL_AB,
4699 st->u.eval.prev_eval->u.eval.B); /* match B */
4700 }
4701
3b0527fe 4702 if (locinput < reginfo->till) {
a3621e74 4703 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4704 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4705 PL_colors[4],
4706 (long)(locinput - PL_reg_starttry),
3b0527fe 4707 (long)(reginfo->till - PL_reg_starttry),
7821416a 4708 PL_colors[5]));
58e23c8d 4709
262b90c4 4710 sayNO_SILENT; /* Cannot match: too short. */
7821416a
IZ
4711 }
4712 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4713 sayYES; /* Success! */
dad79028
DM
4714
4715 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4716 DEBUG_EXECUTE_r(
4717 PerlIO_printf(Perl_debug_log,
4718 "%*s %ssubpattern success...%s\n",
5bc10b2c 4719 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
3280af22 4720 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4721 sayYES; /* Success! */
dad79028 4722
40a82448
DM
4723#undef ST
4724#define ST st->u.ifmatch
4725
4726 case SUSPEND: /* (?>A) */
4727 ST.wanted = 1;
9fe1d20c 4728 PL_reginput = locinput;
9041c2e3 4729 goto do_ifmatch;
dad79028 4730
40a82448
DM
4731 case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4732 ST.wanted = 0;
dad79028
DM
4733 goto ifmatch_trivial_fail_test;
4734
40a82448
DM
4735 case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4736 ST.wanted = 1;
dad79028 4737 ifmatch_trivial_fail_test:
a0ed51b3 4738 if (scan->flags) {
52657f30 4739 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4740 if (!s) {
4741 /* trivial fail */
24d3c4a9
DM
4742 if (logical) {
4743 logical = 0;
4744 sw = 1 - (bool)ST.wanted;
dad79028 4745 }
40a82448 4746 else if (ST.wanted)
dad79028
DM
4747 sayNO;
4748 next = scan + ARG(scan);
4749 if (next == scan)
4750 next = NULL;
4751 break;
4752 }
efb30f32 4753 PL_reginput = s;
a0ed51b3
LW
4754 }
4755 else
4756 PL_reginput = locinput;
4757
c277df42 4758 do_ifmatch:
40a82448 4759 ST.me = scan;
24d3c4a9 4760 ST.logical = logical;
40a82448
DM
4761 /* execute body of (?...A) */
4762 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4763 /* NOTREACHED */
4764
4765 case IFMATCH_A_fail: /* body of (?...A) failed */
4766 ST.wanted = !ST.wanted;
4767 /* FALL THROUGH */
4768
4769 case IFMATCH_A: /* body of (?...A) succeeded */
24d3c4a9
DM
4770 if (ST.logical) {
4771 sw = (bool)ST.wanted;
40a82448
DM
4772 }
4773 else if (!ST.wanted)
4774 sayNO;
4775
4776 if (OP(ST.me) == SUSPEND)
4777 locinput = PL_reginput;
4778 else {
4779 locinput = PL_reginput = st->locinput;
4780 nextchr = UCHARAT(locinput);
4781 }
4782 scan = ST.me + ARG(ST.me);
4783 if (scan == ST.me)
4784 scan = NULL;
4785 continue; /* execute B */
4786
4787#undef ST
dad79028 4788
c277df42 4789 case LONGJMP:
c277df42
IZ
4790 next = scan + ARG(scan);
4791 if (next == scan)
4792 next = NULL;
a0d0e21e 4793 break;
54612592 4794 case COMMIT:
e2e6a0f1
YO
4795 reginfo->cutpoint = PL_regeol;
4796 /* FALLTHROUGH */
5d458dd8 4797 case PRUNE:
24b23f37 4798 PL_reginput = locinput;
e2e6a0f1 4799 if (!scan->flags)
f8fc2ecf 4800 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
54612592
YO
4801 PUSH_STATE_GOTO(COMMIT_next,next);
4802 /* NOTREACHED */
4803 case COMMIT_next_fail:
4804 no_final = 1;
4805 /* FALLTHROUGH */
7f69552c
YO
4806 case OPFAIL:
4807 sayNO;
e2e6a0f1
YO
4808 /* NOTREACHED */
4809
4810#define ST st->u.mark
4811 case MARKPOINT:
4812 ST.prev_mark = mark_state;
5d458dd8 4813 ST.mark_name = sv_commit = sv_yes_mark
f8fc2ecf 4814 = (SV*)rexi->data->data[ ARG( scan ) ];
e2e6a0f1
YO
4815 mark_state = st;
4816 ST.mark_loc = PL_reginput = locinput;
4817 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4818 /* NOTREACHED */
4819 case MARKPOINT_next:
4820 mark_state = ST.prev_mark;
4821 sayYES;
4822 /* NOTREACHED */
4823 case MARKPOINT_next_fail:
5d458dd8 4824 if (popmark && sv_eq(ST.mark_name,popmark))
e2e6a0f1
YO
4825 {
4826 if (ST.mark_loc > startpoint)
4827 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4828 popmark = NULL; /* we found our mark */
4829 sv_commit = ST.mark_name;
4830
4831 DEBUG_EXECUTE_r({
5d458dd8 4832 PerlIO_printf(Perl_debug_log,
e2e6a0f1
YO
4833 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
4834 REPORT_CODE_OFF+depth*2, "",
4835 PL_colors[4], sv_commit, PL_colors[5]);
e2e6a0f1
YO
4836 });
4837 }
4838 mark_state = ST.prev_mark;
5d458dd8
YO
4839 sv_yes_mark = mark_state ?
4840 mark_state->u.mark.mark_name : NULL;
e2e6a0f1
YO
4841 sayNO;
4842 /* NOTREACHED */
5d458dd8
YO
4843 case SKIP:
4844 PL_reginput = locinput;
4845 if (scan->flags) {
4846 /* (*CUT) : if we fail we cut here*/
4847 ST.mark_name = NULL;
e2e6a0f1 4848 ST.mark_loc = locinput;
5d458dd8
YO
4849 PUSH_STATE_GOTO(SKIP_next,next);
4850 } else {
4851 /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was,
4852 otherwise do nothing. Meaning we need to scan
4853 */
4854 regmatch_state *cur = mark_state;
f8fc2ecf 4855 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5d458dd8
YO
4856
4857 while (cur) {
4858 if ( sv_eq( cur->u.mark.mark_name,
4859 find ) )
4860 {
4861 ST.mark_name = find;
4862 PUSH_STATE_GOTO( SKIP_next, next );
4863 }
4864 cur = cur->u.mark.prev_mark;
4865 }
e2e6a0f1 4866 }
5d458dd8
YO
4867 /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
4868 break;
4869 case SKIP_next_fail:
4870 if (ST.mark_name) {
4871 /* (*CUT:NAME) - Set up to search for the name as we
4872 collapse the stack*/
4873 popmark = ST.mark_name;
4874 } else {
4875 /* (*CUT) - No name, we cut here.*/
e2e6a0f1
YO
4876 if (ST.mark_loc > startpoint)
4877 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5d458dd8
YO
4878 /* but we set sv_commit to latest mark_name if there
4879 is one so they can test to see how things lead to this
4880 cut */
4881 if (mark_state)
4882 sv_commit=mark_state->u.mark.mark_name;
4883 }
e2e6a0f1
YO
4884 no_final = 1;
4885 sayNO;
4886 /* NOTREACHED */
4887#undef ST
4888
a0d0e21e 4889 default:
b900a521 4890 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4891 PTR2UV(scan), OP(scan));
cea2e8a9 4892 Perl_croak(aTHX_ "regexp memory corruption");
5d458dd8
YO
4893
4894 } /* end switch */
95b24440 4895
5d458dd8
YO
4896 /* switch break jumps here */
4897 scan = next; /* prepare to execute the next op and ... */
4898 continue; /* ... jump back to the top, reusing st */
95b24440
DM
4899 /* NOTREACHED */
4900
40a82448
DM
4901 push_yes_state:
4902 /* push a state that backtracks on success */
4903 st->u.yes.prev_yes_state = yes_state;
4904 yes_state = st;
4905 /* FALL THROUGH */
4906 push_state:
4907 /* push a new regex state, then continue at scan */
4908 {
4909 regmatch_state *newst;
4910
24b23f37
YO
4911 DEBUG_STACK_r({
4912 regmatch_state *cur = st;
4913 regmatch_state *curyes = yes_state;
4914 int curd = depth;
4915 regmatch_slab *slab = PL_regmatch_slab;
4916 for (;curd > -1;cur--,curd--) {
4917 if (cur < SLAB_FIRST(slab)) {
4918 slab = slab->prev;
4919 cur = SLAB_LAST(slab);
4920 }
4921 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
4922 REPORT_CODE_OFF + 2 + depth * 2,"",
4923 curd, reg_name[cur->resume_state],
4924 (curyes == cur) ? "yes" : ""
4925 );
4926 if (curyes == cur)
4927 curyes = cur->u.yes.prev_yes_state;
4928 }
4929 } else
4930 DEBUG_STATE_pp("push")
4931 );
40a82448 4932 depth++;
40a82448
DM
4933 st->locinput = locinput;
4934 newst = st+1;
4935 if (newst > SLAB_LAST(PL_regmatch_slab))
4936 newst = S_push_slab(aTHX);
4937 PL_regmatch_state = newst;
786e8c11 4938
40a82448
DM
4939 locinput = PL_reginput;
4940 nextchr = UCHARAT(locinput);
4941 st = newst;
4942 continue;
4943 /* NOTREACHED */
4944 }
a0d0e21e 4945 }
a687059c 4946
a0d0e21e
LW
4947 /*
4948 * We get here only if there's trouble -- normally "case END" is
4949 * the terminating point.
4950 */
cea2e8a9 4951 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4952 /*NOTREACHED*/
4633a7c4
LW
4953 sayNO;
4954
262b90c4 4955yes:
77cb431f
DM
4956 if (yes_state) {
4957 /* we have successfully completed a subexpression, but we must now
4958 * pop to the state marked by yes_state and continue from there */
77cb431f 4959 assert(st != yes_state);
5bc10b2c
DM
4960#ifdef DEBUGGING
4961 while (st != yes_state) {
4962 st--;
4963 if (st < SLAB_FIRST(PL_regmatch_slab)) {
4964 PL_regmatch_slab = PL_regmatch_slab->prev;
4965 st = SLAB_LAST(PL_regmatch_slab);
4966 }
e2e6a0f1 4967 DEBUG_STATE_r({
54612592
YO
4968 if (no_final) {
4969 DEBUG_STATE_pp("pop (no final)");
4970 } else {
4971 DEBUG_STATE_pp("pop (yes)");
4972 }
e2e6a0f1 4973 });
5bc10b2c
DM
4974 depth--;
4975 }
4976#else
77cb431f
DM
4977 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4978 || yes_state > SLAB_LAST(PL_regmatch_slab))
4979 {
4980 /* not in this slab, pop slab */
4981 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4982 PL_regmatch_slab = PL_regmatch_slab->prev;
4983 st = SLAB_LAST(PL_regmatch_slab);
4984 }
4985 depth -= (st - yes_state);
5bc10b2c 4986#endif
77cb431f
DM
4987 st = yes_state;
4988 yes_state = st->u.yes.prev_yes_state;
4989 PL_regmatch_state = st;
24b23f37 4990
5d458dd8
YO
4991 if (no_final) {
4992 locinput= st->locinput;
4993 nextchr = UCHARAT(locinput);
4994 }
54612592 4995 state_num = st->resume_state + no_final;
24d3c4a9 4996 goto reenter_switch;
77cb431f
DM
4997 }
4998
a3621e74 4999 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 5000 PL_colors[4], PL_colors[5]));
02db2b7b 5001
95b24440 5002 result = 1;
aa283a38 5003 goto final_exit;
4633a7c4
LW
5004
5005no:
a3621e74 5006 DEBUG_EXECUTE_r(
7821416a 5007 PerlIO_printf(Perl_debug_log,
786e8c11 5008 "%*s %sfailed...%s\n",
5bc10b2c 5009 REPORT_CODE_OFF+depth*2, "",
786e8c11 5010 PL_colors[4], PL_colors[5])
7821416a 5011 );
aa283a38 5012
262b90c4 5013no_silent:
54612592
YO
5014 if (no_final) {
5015 if (yes_state) {
5016 goto yes;
5017 } else {
5018 goto final_exit;
5019 }
5020 }
aa283a38
DM
5021 if (depth) {
5022 /* there's a previous state to backtrack to */
40a82448
DM
5023 st--;
5024 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5025 PL_regmatch_slab = PL_regmatch_slab->prev;
5026 st = SLAB_LAST(PL_regmatch_slab);
5027 }
5028 PL_regmatch_state = st;
40a82448
DM
5029 locinput= st->locinput;
5030 nextchr = UCHARAT(locinput);
5031
5bc10b2c
DM
5032 DEBUG_STATE_pp("pop");
5033 depth--;
262b90c4
DM
5034 if (yes_state == st)
5035 yes_state = st->u.yes.prev_yes_state;
5bc10b2c 5036
24d3c4a9
DM
5037 state_num = st->resume_state + 1; /* failure = success + 1 */
5038 goto reenter_switch;
95b24440 5039 }
24d3c4a9 5040 result = 0;
aa283a38 5041
262b90c4 5042 final_exit:
bbe252da 5043 if (rex->intflags & PREGf_VERBARG_SEEN) {
5d458dd8
YO
5044 SV *sv_err = get_sv("REGERROR", 1);
5045 SV *sv_mrk = get_sv("REGMARK", 1);
5046 if (result) {
e2e6a0f1 5047 sv_commit = &PL_sv_no;
5d458dd8
YO
5048 if (!sv_yes_mark)
5049 sv_yes_mark = &PL_sv_yes;
5050 } else {
5051 if (!sv_commit)
5052 sv_commit = &PL_sv_yes;
5053 sv_yes_mark = &PL_sv_no;
5054 }
5055 sv_setsv(sv_err, sv_commit);
5056 sv_setsv(sv_mrk, sv_yes_mark);
e2e6a0f1 5057 }
5d9a96ca
DM
5058 /* restore original high-water mark */
5059 PL_regmatch_slab = orig_slab;
5060 PL_regmatch_state = orig_state;
5061
5062 /* free all slabs above current one */
5063 if (orig_slab->next) {
c4fd8992 5064 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
5065 orig_slab->next = NULL;
5066 while (sl) {
c4fd8992 5067 regmatch_slab * const osl = sl;
5d9a96ca 5068 sl = sl->next;
ad65c075 5069 Safefree(osl);
5d9a96ca
DM
5070 }
5071 }
5072
95b24440 5073 return result;
a687059c
LW
5074}
5075
5076/*
5077 - regrepeat - repeatedly match something simple, report how many
5078 */
5079/*
5080 * [This routine now assumes that it will only match on things of length 1.
5081 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 5082 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 5083 */
76e3520e 5084STATIC I32
e2e6a0f1 5085S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
a687059c 5086{
27da23d5 5087 dVAR;
a0d0e21e 5088 register char *scan;
a0d0e21e 5089 register I32 c;
3280af22 5090 register char *loceol = PL_regeol;
a0ed51b3 5091 register I32 hardcount = 0;
53c4c00c 5092 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 5093
3280af22 5094 scan = PL_reginput;
faf11cac
HS
5095 if (max == REG_INFTY)
5096 max = I32_MAX;
5097 else if (max < loceol - scan)
7f596f4c 5098 loceol = scan + max;
a0d0e21e 5099 switch (OP(p)) {
22c35a8c 5100 case REG_ANY:
1aa99e6b 5101 if (do_utf8) {
ffc61ed2 5102 loceol = PL_regeol;
1aa99e6b 5103 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
5104 scan += UTF8SKIP(scan);
5105 hardcount++;
5106 }
5107 } else {
5108 while (scan < loceol && *scan != '\n')
5109 scan++;
a0ed51b3
LW
5110 }
5111 break;
ffc61ed2 5112 case SANY:
def8e4ea
JH
5113 if (do_utf8) {
5114 loceol = PL_regeol;
a0804c9e 5115 while (scan < loceol && hardcount < max) {
def8e4ea
JH
5116 scan += UTF8SKIP(scan);
5117 hardcount++;
5118 }
5119 }
5120 else
5121 scan = loceol;
a0ed51b3 5122 break;
f33976b4
DB
5123 case CANY:
5124 scan = loceol;
5125 break;
090f7165
JH
5126 case EXACT: /* length of string is 1 */
5127 c = (U8)*STRING(p);
5128 while (scan < loceol && UCHARAT(scan) == c)
5129 scan++;
bbce6d69 5130 break;
5131 case EXACTF: /* length of string is 1 */
cd439c50 5132 c = (U8)*STRING(p);
bbce6d69 5133 while (scan < loceol &&
22c35a8c 5134 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 5135 scan++;
5136 break;
5137 case EXACTFL: /* length of string is 1 */
3280af22 5138 PL_reg_flags |= RF_tainted;
cd439c50 5139 c = (U8)*STRING(p);
bbce6d69 5140 while (scan < loceol &&
22c35a8c 5141 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
5142 scan++;
5143 break;
5144 case ANYOF:
ffc61ed2
JH
5145 if (do_utf8) {
5146 loceol = PL_regeol;
cfc92286 5147 while (hardcount < max && scan < loceol &&
32fc9b6a 5148 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
5149 scan += UTF8SKIP(scan);
5150 hardcount++;
5151 }
5152 } else {
32fc9b6a 5153 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
5154 scan++;
5155 }
a0d0e21e
LW
5156 break;
5157 case ALNUM:
1aa99e6b 5158 if (do_utf8) {
ffc61ed2 5159 loceol = PL_regeol;
1a4fad37 5160 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5161 while (hardcount < max && scan < loceol &&
3568d838 5162 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5163 scan += UTF8SKIP(scan);
5164 hardcount++;
5165 }
5166 } else {
5167 while (scan < loceol && isALNUM(*scan))
5168 scan++;
a0ed51b3
LW
5169 }
5170 break;
bbce6d69 5171 case ALNUML:
3280af22 5172 PL_reg_flags |= RF_tainted;
1aa99e6b 5173 if (do_utf8) {
ffc61ed2 5174 loceol = PL_regeol;
1aa99e6b
IH
5175 while (hardcount < max && scan < loceol &&
5176 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5177 scan += UTF8SKIP(scan);
5178 hardcount++;
5179 }
5180 } else {
5181 while (scan < loceol && isALNUM_LC(*scan))
5182 scan++;
a0ed51b3
LW
5183 }
5184 break;
a0d0e21e 5185 case NALNUM:
1aa99e6b 5186 if (do_utf8) {
ffc61ed2 5187 loceol = PL_regeol;
1a4fad37 5188 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5189 while (hardcount < max && scan < loceol &&
3568d838 5190 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5191 scan += UTF8SKIP(scan);
5192 hardcount++;
5193 }
5194 } else {
5195 while (scan < loceol && !isALNUM(*scan))
5196 scan++;
a0ed51b3
LW
5197 }
5198 break;
bbce6d69 5199 case NALNUML:
3280af22 5200 PL_reg_flags |= RF_tainted;
1aa99e6b 5201 if (do_utf8) {
ffc61ed2 5202 loceol = PL_regeol;
1aa99e6b
IH
5203 while (hardcount < max && scan < loceol &&
5204 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5205 scan += UTF8SKIP(scan);
5206 hardcount++;
5207 }
5208 } else {
5209 while (scan < loceol && !isALNUM_LC(*scan))
5210 scan++;
a0ed51b3
LW
5211 }
5212 break;
a0d0e21e 5213 case SPACE:
1aa99e6b 5214 if (do_utf8) {
ffc61ed2 5215 loceol = PL_regeol;
1a4fad37 5216 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5217 while (hardcount < max && scan < loceol &&
3568d838
JH
5218 (*scan == ' ' ||
5219 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5220 scan += UTF8SKIP(scan);
5221 hardcount++;
5222 }
5223 } else {
5224 while (scan < loceol && isSPACE(*scan))
5225 scan++;
a0ed51b3
LW
5226 }
5227 break;
bbce6d69 5228 case SPACEL:
3280af22 5229 PL_reg_flags |= RF_tainted;
1aa99e6b 5230 if (do_utf8) {
ffc61ed2 5231 loceol = PL_regeol;
1aa99e6b 5232 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5233 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5234 scan += UTF8SKIP(scan);
5235 hardcount++;
5236 }
5237 } else {
5238 while (scan < loceol && isSPACE_LC(*scan))
5239 scan++;
a0ed51b3
LW
5240 }
5241 break;
a0d0e21e 5242 case NSPACE:
1aa99e6b 5243 if (do_utf8) {
ffc61ed2 5244 loceol = PL_regeol;
1a4fad37 5245 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5246 while (hardcount < max && scan < loceol &&
3568d838
JH
5247 !(*scan == ' ' ||
5248 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5249 scan += UTF8SKIP(scan);
5250 hardcount++;
5251 }
5252 } else {
5253 while (scan < loceol && !isSPACE(*scan))
5254 scan++;
5255 break;
a0ed51b3 5256 }
bbce6d69 5257 case NSPACEL:
3280af22 5258 PL_reg_flags |= RF_tainted;
1aa99e6b 5259 if (do_utf8) {
ffc61ed2 5260 loceol = PL_regeol;
1aa99e6b 5261 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5262 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5263 scan += UTF8SKIP(scan);
5264 hardcount++;
5265 }
5266 } else {
5267 while (scan < loceol && !isSPACE_LC(*scan))
5268 scan++;
a0ed51b3
LW
5269 }
5270 break;
a0d0e21e 5271 case DIGIT:
1aa99e6b 5272 if (do_utf8) {
ffc61ed2 5273 loceol = PL_regeol;
1a4fad37 5274 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5275 while (hardcount < max && scan < loceol &&
3568d838 5276 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5277 scan += UTF8SKIP(scan);
5278 hardcount++;
5279 }
5280 } else {
5281 while (scan < loceol && isDIGIT(*scan))
5282 scan++;
a0ed51b3
LW
5283 }
5284 break;
a0d0e21e 5285 case NDIGIT:
1aa99e6b 5286 if (do_utf8) {
ffc61ed2 5287 loceol = PL_regeol;
1a4fad37 5288 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5289 while (hardcount < max && scan < loceol &&
3568d838 5290 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5291 scan += UTF8SKIP(scan);
5292 hardcount++;
5293 }
5294 } else {
5295 while (scan < loceol && !isDIGIT(*scan))
5296 scan++;
a0ed51b3
LW
5297 }
5298 break;
a0d0e21e
LW
5299 default: /* Called on something of 0 width. */
5300 break; /* So match right here or not at all. */
5301 }
a687059c 5302
a0ed51b3
LW
5303 if (hardcount)
5304 c = hardcount;
5305 else
5306 c = scan - PL_reginput;
3280af22 5307 PL_reginput = scan;
a687059c 5308
a3621e74 5309 DEBUG_r({
e68ec53f 5310 GET_RE_DEBUG_FLAGS_DECL;
be8e71aa 5311 DEBUG_EXECUTE_r({
e68ec53f
YO
5312 SV * const prop = sv_newmortal();
5313 regprop(prog, prop, p);
5314 PerlIO_printf(Perl_debug_log,
be8e71aa 5315 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
e2e6a0f1 5316 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
a3621e74 5317 });
be8e71aa 5318 });
9041c2e3 5319
a0d0e21e 5320 return(c);
a687059c
LW
5321}
5322
c277df42 5323
be8e71aa 5324#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
c277df42 5325/*
ffc61ed2
JH
5326- regclass_swash - prepare the utf8 swash
5327*/
5328
5329SV *
32fc9b6a 5330Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 5331{
97aff369 5332 dVAR;
9e55ce06
JH
5333 SV *sw = NULL;
5334 SV *si = NULL;
5335 SV *alt = NULL;
f8fc2ecf
YO
5336 RXi_GET_DECL(prog,progi);
5337 const struct reg_data * const data = prog ? progi->data : NULL;
ffc61ed2 5338
4f639d21 5339 if (data && data->count) {
a3b680e6 5340 const U32 n = ARG(node);
ffc61ed2 5341
4f639d21
DM
5342 if (data->what[n] == 's') {
5343 SV * const rv = (SV*)data->data[n];
890ce7af 5344 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 5345 SV **const ary = AvARRAY(av);
9e55ce06 5346 SV **a, **b;
9041c2e3 5347
711a919c 5348 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
5349 * documentation of these array elements. */
5350
b11f357e 5351 si = *ary;
8f7f7219 5352 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
5353 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5354
ffc61ed2
JH
5355 if (a)
5356 sw = *a;
5357 else if (si && doinit) {
5358 sw = swash_init("utf8", "", si, 1, 0);
5359 (void)av_store(av, 1, sw);
5360 }
9e55ce06
JH
5361 if (b)
5362 alt = *b;
ffc61ed2
JH
5363 }
5364 }
5365
9e55ce06
JH
5366 if (listsvp)
5367 *listsvp = si;
5368 if (altsvp)
5369 *altsvp = alt;
ffc61ed2
JH
5370
5371 return sw;
5372}
76234dfb 5373#endif
ffc61ed2
JH
5374
5375/*
ba7b4546 5376 - reginclass - determine if a character falls into a character class
832705d4
JH
5377
5378 The n is the ANYOF regnode, the p is the target string, lenp
5379 is pointer to the maximum length of how far to go in the p
5380 (if the lenp is zero, UTF8SKIP(p) is used),
5381 do_utf8 tells whether the target string is in UTF-8.
5382
bbce6d69 5383 */
5384
76e3520e 5385STATIC bool
32fc9b6a 5386S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 5387{
27da23d5 5388 dVAR;
a3b680e6 5389 const char flags = ANYOF_FLAGS(n);
bbce6d69 5390 bool match = FALSE;
cc07378b 5391 UV c = *p;
ae9ddab8 5392 STRLEN len = 0;
9e55ce06 5393 STRLEN plen;
1aa99e6b 5394
19f67299
TS
5395 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5396 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
5397 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5398 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
e8a70c6f
SP
5399 if (len == (STRLEN)-1)
5400 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
19f67299 5401 }
bbce6d69 5402
0f0076b4 5403 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5404 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5405 if (lenp)
5406 *lenp = 0;
ffc61ed2 5407 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5408 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5409 match = TRUE;
bbce6d69 5410 }
3568d838 5411 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5412 match = TRUE;
ffc61ed2 5413 if (!match) {
9e55ce06 5414 AV *av;
32fc9b6a 5415 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5416
5417 if (sw) {
3568d838 5418 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5419 match = TRUE;
5420 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5421 if (!match && lenp && av) {
5422 I32 i;
9e55ce06 5423 for (i = 0; i <= av_len(av); i++) {
890ce7af 5424 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5425 STRLEN len;
890ce7af 5426 const char * const s = SvPV_const(sv, len);
9e55ce06 5427
061b10df 5428 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5429 *lenp = len;
5430 match = TRUE;
5431 break;
5432 }
5433 }
5434 }
5435 if (!match) {
89ebb4a3 5436 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5437 STRLEN tmplen;
5438
9e55ce06
JH
5439 to_utf8_fold(p, tmpbuf, &tmplen);
5440 if (swash_fetch(sw, tmpbuf, do_utf8))
5441 match = TRUE;
5442 }
ffc61ed2
JH
5443 }
5444 }
bbce6d69 5445 }
9e55ce06 5446 if (match && lenp && *lenp == 0)
0f0076b4 5447 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5448 }
1aa99e6b 5449 if (!match && c < 256) {
ffc61ed2
JH
5450 if (ANYOF_BITMAP_TEST(n, c))
5451 match = TRUE;
5452 else if (flags & ANYOF_FOLD) {
eb160463 5453 U8 f;
a0ed51b3 5454
ffc61ed2
JH
5455 if (flags & ANYOF_LOCALE) {
5456 PL_reg_flags |= RF_tainted;
5457 f = PL_fold_locale[c];
5458 }
5459 else
5460 f = PL_fold[c];
5461 if (f != c && ANYOF_BITMAP_TEST(n, f))
5462 match = TRUE;
5463 }
5464
5465 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5466 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5467 if (
5468 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5469 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5470 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5471 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5472 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5473 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5474 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5475 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5476 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5477 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5478 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5479 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5480 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5481 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5482 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5483 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5484 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5485 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5486 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5487 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5488 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5489 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5490 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5491 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5492 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5493 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5494 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5495 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5496 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5497 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5498 ) /* How's that for a conditional? */
5499 {
5500 match = TRUE;
5501 }
a0ed51b3 5502 }
a0ed51b3
LW
5503 }
5504
a0ed51b3
LW
5505 return (flags & ANYOF_INVERT) ? !match : match;
5506}
161b471a 5507
dfe13c55 5508STATIC U8 *
0ce71af7 5509S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5510{
97aff369 5511 dVAR;
a0ed51b3 5512 if (off >= 0) {
1aa99e6b 5513 while (off-- && s < lim) {
ffc61ed2 5514 /* XXX could check well-formedness here */
a0ed51b3 5515 s += UTF8SKIP(s);
ffc61ed2 5516 }
a0ed51b3
LW
5517 }
5518 else {
1de06328
YO
5519 while (off++ && s > lim) {
5520 s--;
5521 if (UTF8_IS_CONTINUED(*s)) {
5522 while (s > lim && UTF8_IS_CONTINUATION(*s))
5523 s--;
a0ed51b3 5524 }
1de06328 5525 /* XXX could check well-formedness here */
a0ed51b3
LW
5526 }
5527 }
5528 return s;
5529}
161b471a 5530
f9f4320a
YO
5531#ifdef XXX_dmq
5532/* there are a bunch of places where we use two reghop3's that should
5533 be replaced with this routine. but since thats not done yet
5534 we ifdef it out - dmq
5535*/
dfe13c55 5536STATIC U8 *
1de06328
YO
5537S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5538{
5539 dVAR;
5540 if (off >= 0) {
5541 while (off-- && s < rlim) {
5542 /* XXX could check well-formedness here */
5543 s += UTF8SKIP(s);
5544 }
5545 }
5546 else {
5547 while (off++ && s > llim) {
5548 s--;
5549 if (UTF8_IS_CONTINUED(*s)) {
5550 while (s > llim && UTF8_IS_CONTINUATION(*s))
5551 s--;
5552 }
5553 /* XXX could check well-formedness here */
5554 }
5555 }
5556 return s;
5557}
f9f4320a 5558#endif
1de06328
YO
5559
5560STATIC U8 *
0ce71af7 5561S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5562{
97aff369 5563 dVAR;
a0ed51b3 5564 if (off >= 0) {
1aa99e6b 5565 while (off-- && s < lim) {
ffc61ed2 5566 /* XXX could check well-formedness here */
a0ed51b3 5567 s += UTF8SKIP(s);
ffc61ed2 5568 }
a0ed51b3 5569 if (off >= 0)
3dab1dad 5570 return NULL;
a0ed51b3
LW
5571 }
5572 else {
1de06328
YO
5573 while (off++ && s > lim) {
5574 s--;
5575 if (UTF8_IS_CONTINUED(*s)) {
5576 while (s > lim && UTF8_IS_CONTINUATION(*s))
5577 s--;
a0ed51b3 5578 }
1de06328 5579 /* XXX could check well-formedness here */
a0ed51b3
LW
5580 }
5581 if (off <= 0)
3dab1dad 5582 return NULL;
a0ed51b3
LW
5583 }
5584 return s;
5585}
51371543 5586
51371543 5587static void
acfe0abc 5588restore_pos(pTHX_ void *arg)
51371543 5589{
97aff369 5590 dVAR;
097eb12c 5591 regexp * const rex = (regexp *)arg;
51371543
GS
5592 if (PL_reg_eval_set) {
5593 if (PL_reg_oldsaved) {
4f639d21
DM
5594 rex->subbeg = PL_reg_oldsaved;
5595 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5596#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5597 rex->saved_copy = PL_nrs;
ed252734 5598#endif
4f639d21 5599 RX_MATCH_COPIED_on(rex);
51371543
GS
5600 }
5601 PL_reg_magic->mg_len = PL_reg_oldpos;
5602 PL_reg_eval_set = 0;
5603 PL_curpm = PL_reg_oldcurpm;
5604 }
5605}
33b8afdf
JH
5606
5607STATIC void
5608S_to_utf8_substr(pTHX_ register regexp *prog)
5609{
33b8afdf 5610 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5611 SV* const sv = newSVsv(prog->float_substr);
5612 prog->float_utf8 = sv;
33b8afdf
JH
5613 sv_utf8_upgrade(sv);
5614 if (SvTAIL(prog->float_substr))
5615 SvTAIL_on(sv);
5616 if (prog->float_substr == prog->check_substr)
5617 prog->check_utf8 = sv;
5618 }
5619 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5620 SV* const sv = newSVsv(prog->anchored_substr);
5621 prog->anchored_utf8 = sv;
33b8afdf
JH
5622 sv_utf8_upgrade(sv);
5623 if (SvTAIL(prog->anchored_substr))
5624 SvTAIL_on(sv);
5625 if (prog->anchored_substr == prog->check_substr)
5626 prog->check_utf8 = sv;
5627 }
5628}
5629
5630STATIC void
5631S_to_byte_substr(pTHX_ register regexp *prog)
5632{
97aff369 5633 dVAR;
33b8afdf 5634 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5635 SV* sv = newSVsv(prog->float_utf8);
5636 prog->float_substr = sv;
33b8afdf
JH
5637 if (sv_utf8_downgrade(sv, TRUE)) {
5638 if (SvTAIL(prog->float_utf8))
5639 SvTAIL_on(sv);
5640 } else {
5641 SvREFCNT_dec(sv);
5642 prog->float_substr = sv = &PL_sv_undef;
5643 }
5644 if (prog->float_utf8 == prog->check_utf8)
5645 prog->check_substr = sv;
5646 }
5647 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5648 SV* sv = newSVsv(prog->anchored_utf8);
5649 prog->anchored_substr = sv;
33b8afdf
JH
5650 if (sv_utf8_downgrade(sv, TRUE)) {
5651 if (SvTAIL(prog->anchored_utf8))
5652 SvTAIL_on(sv);
5653 } else {
5654 SvREFCNT_dec(sv);
5655 prog->anchored_substr = sv = &PL_sv_undef;
5656 }
5657 if (prog->anchored_utf8 == prog->check_utf8)
5658 prog->check_substr = sv;
5659 }
5660}
66610fdd
RGS
5661
5662/*
5663 * Local variables:
5664 * c-indentation-style: bsd
5665 * c-basic-offset: 4
5666 * indent-tabs-mode: t
5667 * End:
5668 *
37442d52
RGS
5669 * ex: set ts=8 sts=4 sw=4 noet:
5670 */