This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
test.pl fix
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
166f8a29
DM
16 */
17
a687059c
LW
18/* NOTE: this is derived from Henry Spencer's regexp code, and should not
19 * confused with the original package (see point 3 below). Thanks, Henry!
20 */
21
22/* Additional note: this code is very heavily munged from Henry's version
23 * in places. In some spots I've traded clarity for efficiency, so don't
24 * blame Henry for some of the lack of readability.
25 */
26
e50aee73
AD
27/* The names of the functions have been changed from regcomp and
28 * regexec to pregcomp and pregexec in order to avoid conflicts
29 * with the POSIX routines of the same names.
30*/
31
b9d5759e 32#ifdef PERL_EXT_RE_BUILD
54df2634 33#include "re_top.h"
9041c2e3 34#endif
56953603 35
a687059c 36/*
e50aee73 37 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
38 *
39 * Copyright (c) 1986 by University of Toronto.
40 * Written by Henry Spencer. Not derived from licensed software.
41 *
42 * Permission is granted to anyone to use this software for any
43 * purpose on any computer system, and to redistribute it freely,
44 * subject to the following restrictions:
45 *
46 * 1. The author is not responsible for the consequences of use of
47 * this software, no matter how awful, even if they arise
48 * from defects in it.
49 *
50 * 2. The origin of this software must not be misrepresented, either
51 * by explicit claim or by omission.
52 *
53 * 3. Altered versions must be plainly marked as such, and must not
54 * be misrepresented as being the original software.
55 *
56 **** Alterations to Henry's code are...
57 ****
4bb101f2 58 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
fdf8c088 59 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
a687059c 60 ****
9ef589d8
LW
61 **** You may distribute under the terms of either the GNU General Public
62 **** License or the Artistic License, as specified in the README file.
a687059c
LW
63 *
64 * Beware that some of this code is subtly aware of the way operator
65 * precedence is structured in regular expressions. Serious changes in
66 * regular-expression syntax might require a total rethink.
67 */
68#include "EXTERN.h"
864dbfa3 69#define PERL_IN_REGEXEC_C
a687059c 70#include "perl.h"
0f5d15d6 71
54df2634
NC
72#ifdef PERL_IN_XSUB_RE
73# include "re_comp.h"
74#else
75# include "regcomp.h"
76#endif
a687059c 77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
faec1544 80
ab3bbdeb 81#define RF_utf8 8 /* Pattern contains multibyte chars? */
a0ed51b3 82
eb160463 83#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
32fc9b6a 92#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 93
c277df42
IZ
94/*
95 * Forwards.
96 */
97
33b8afdf 98#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 99#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 100
3dab1dad
YO
101#define HOPc(pos,off) \
102 (char *)(PL_reg_match_utf8 \
52657f30 103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
3dab1dad
YO
104 : (U8*)(pos + off))
105#define HOPBACKc(pos, off) \
07be1b83
YO
106 (char*)(PL_reg_match_utf8\
107 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
108 : (pos - off >= PL_bostr) \
8e11feef 109 ? (U8*)pos - off \
3dab1dad 110 : NULL)
efb30f32 111
e7409c1b 112#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
1aa99e6b 113#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 114
1a4fad37
AL
115#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
116 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
117#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
118#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
119#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
120#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 121
3dab1dad
YO
122/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
123
5f80c4cf 124/* for use after a quantifier and before an EXACT-like node -- japhy */
3e901dc0
YO
125/* it would be nice to rework regcomp.sym to generate this stuff. sigh */
126#define JUMPABLE(rn) ( \
127 OP(rn) == OPEN || \
128 (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
129 OP(rn) == EVAL || \
cca55fe3
JP
130 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
131 OP(rn) == PLUS || OP(rn) == MINMOD || \
ee9b8eae 132 OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
3dab1dad 133 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26 134)
ee9b8eae 135#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
e2d8ce26 136
ee9b8eae
YO
137#define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
138
139#if 0
140/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
141 we don't need this definition. */
142#define IS_TEXT(rn) ( OP(rn)==EXACT || OP(rn)==REF || OP(rn)==NREF )
143#define IS_TEXTF(rn) ( OP(rn)==EXACTF || OP(rn)==REFF || OP(rn)==NREFF )
144#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
145
146#else
147/* ... so we use this as its faster. */
148#define IS_TEXT(rn) ( OP(rn)==EXACT )
149#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
150#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
151
152#endif
e2d8ce26 153
a84d97b6
HS
154/*
155 Search for mandatory following text node; for lookahead, the text must
156 follow but for lookbehind (rn->flags != 0) we skip to the next step.
157*/
cca55fe3 158#define FIND_NEXT_IMPT(rn) STMT_START { \
3dab1dad
YO
159 while (JUMPABLE(rn)) { \
160 const OPCODE type = OP(rn); \
161 if (type == SUSPEND || PL_regkind[type] == CURLY) \
e2d8ce26 162 rn = NEXTOPER(NEXTOPER(rn)); \
3dab1dad 163 else if (type == PLUS) \
cca55fe3 164 rn = NEXTOPER(rn); \
3dab1dad 165 else if (type == IFMATCH) \
a84d97b6 166 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 167 else rn += NEXT_OFF(rn); \
3dab1dad 168 } \
5f80c4cf 169} STMT_END
74750237 170
c476f425 171
acfe0abc 172static void restore_pos(pTHX_ void *arg);
51371543 173
76e3520e 174STATIC CHECKPOINT
cea2e8a9 175S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 176{
97aff369 177 dVAR;
a3b680e6 178 const int retval = PL_savestack_ix;
b1ce53c5 179#define REGCP_PAREN_ELEMS 4
a3b680e6 180 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e 181 int p;
40a82448 182 GET_RE_DEBUG_FLAGS_DECL;
a0d0e21e 183
e49a9654
IH
184 if (paren_elems_to_push < 0)
185 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
186
f0ab9afb 187#define REGCP_OTHER_ELEMS 7
4b3c1a47 188 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
7f69552c 189
3280af22 190 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 191/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
f0ab9afb
NC
192 SSPUSHINT(PL_regoffs[p].end);
193 SSPUSHINT(PL_regoffs[p].start);
3280af22 194 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 195 SSPUSHINT(p);
e7707071 196 DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
40a82448 197 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
f0ab9afb 198 (UV)p, (IV)PL_regoffs[p].start,
40a82448 199 (IV)(PL_reg_start_tmp[p] - PL_bostr),
f0ab9afb 200 (IV)PL_regoffs[p].end
40a82448 201 ));
a0d0e21e 202 }
b1ce53c5 203/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
f0ab9afb 204 SSPUSHPTR(PL_regoffs);
3280af22
NIS
205 SSPUSHINT(PL_regsize);
206 SSPUSHINT(*PL_reglastparen);
a01268b5 207 SSPUSHINT(*PL_reglastcloseparen);
3280af22 208 SSPUSHPTR(PL_reginput);
41123dfd
JH
209#define REGCP_FRAME_ELEMS 2
210/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
211 * are needed for the regexp context stack bookkeeping. */
212 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 213 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 214
a0d0e21e
LW
215 return retval;
216}
217
c277df42 218/* These are needed since we do not localize EVAL nodes: */
ab3bbdeb
YO
219#define REGCP_SET(cp) \
220 DEBUG_STATE_r( \
ab3bbdeb 221 PerlIO_printf(Perl_debug_log, \
e4f74956 222 " Setting an EVAL scope, savestack=%"IVdf"\n", \
ab3bbdeb
YO
223 (IV)PL_savestack_ix)); \
224 cp = PL_savestack_ix
c3464db5 225
ab3bbdeb 226#define REGCP_UNWIND(cp) \
e4f74956 227 DEBUG_STATE_r( \
ab3bbdeb 228 if (cp != PL_savestack_ix) \
e4f74956
YO
229 PerlIO_printf(Perl_debug_log, \
230 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
ab3bbdeb
YO
231 (IV)(cp), (IV)PL_savestack_ix)); \
232 regcpblow(cp)
c277df42 233
76e3520e 234STATIC char *
097eb12c 235S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 236{
97aff369 237 dVAR;
3b6647e0 238 U32 i;
a0d0e21e 239 char *input;
a3621e74
YO
240 GET_RE_DEBUG_FLAGS_DECL;
241
7918f24d
NC
242 PERL_ARGS_ASSERT_REGCPPOP;
243
b1ce53c5 244 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 245 i = SSPOPINT;
b1ce53c5
JH
246 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
247 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 248 input = (char *) SSPOPPTR;
a01268b5 249 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
250 *PL_reglastparen = SSPOPINT;
251 PL_regsize = SSPOPINT;
f0ab9afb 252 PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
b1ce53c5 253
6bda09f9 254
b1ce53c5 255 /* Now restore the parentheses context. */
41123dfd
JH
256 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
257 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 258 I32 tmps;
097eb12c 259 U32 paren = (U32)SSPOPINT;
3280af22 260 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
f0ab9afb 261 PL_regoffs[paren].start = SSPOPINT;
cf93c79d 262 tmps = SSPOPINT;
3280af22 263 if (paren <= *PL_reglastparen)
f0ab9afb 264 PL_regoffs[paren].end = tmps;
e7707071 265 DEBUG_BUFFERS_r(
c3464db5 266 PerlIO_printf(Perl_debug_log,
b900a521 267 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
f0ab9afb 268 (UV)paren, (IV)PL_regoffs[paren].start,
b900a521 269 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
f0ab9afb 270 (IV)PL_regoffs[paren].end,
3280af22 271 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 272 );
a0d0e21e 273 }
e7707071 274 DEBUG_BUFFERS_r(
bb7a0f54 275 if (*PL_reglastparen + 1 <= rex->nparens) {
c3464db5 276 PerlIO_printf(Perl_debug_log,
faccc32b 277 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 278 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
279 }
280 );
daf18116 281#if 1
dafc8851
JH
282 /* It would seem that the similar code in regtry()
283 * already takes care of this, and in fact it is in
284 * a better location to since this code can #if 0-ed out
285 * but the code in regtry() is needed or otherwise tests
286 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
287 * (as of patchlevel 7877) will fail. Then again,
288 * this code seems to be necessary or otherwise
225593e1
DM
289 * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
290 * --jhi updated by dapm */
3b6647e0 291 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
097eb12c 292 if (i > PL_regsize)
f0ab9afb
NC
293 PL_regoffs[i].start = -1;
294 PL_regoffs[i].end = -1;
a0d0e21e 295 }
dafc8851 296#endif
a0d0e21e
LW
297 return input;
298}
299
02db2b7b 300#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 301
a687059c 302/*
e50aee73 303 * pregexec and friends
a687059c
LW
304 */
305
76234dfb 306#ifndef PERL_IN_XSUB_RE
a687059c 307/*
c277df42 308 - pregexec - match a regexp against a string
a687059c 309 */
c277df42 310I32
49d7dfbc 311Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
c3464db5 312 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
313/* strend: pointer to null at end of string */
314/* strbeg: real beginning of string */
315/* minend: end of match must be >=minend after stringarg. */
316/* nosave: For optimizations. */
317{
7918f24d
NC
318 PERL_ARGS_ASSERT_PREGEXEC;
319
c277df42 320 return
9041c2e3 321 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
322 nosave ? 0 : REXEC_COPY_STR);
323}
76234dfb 324#endif
22e551b9 325
9041c2e3 326/*
cad2e5aa
JH
327 * Need to implement the following flags for reg_anch:
328 *
329 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
330 * USE_INTUIT_ML
331 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
332 * INTUIT_AUTORITATIVE_ML
333 * INTUIT_ONCE_NOML - Intuit can match in one location only.
334 * INTUIT_ONCE_ML
335 *
336 * Another flag for this function: SECOND_TIME (so that float substrs
337 * with giant delta may be not rechecked).
338 */
339
340/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
341
3f7c398e 342/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
343 Otherwise, only SvCUR(sv) is used to get strbeg. */
344
345/* XXXX We assume that strpos is strbeg unless sv. */
346
6eb5f6b9
JH
347/* XXXX Some places assume that there is a fixed substring.
348 An update may be needed if optimizer marks as "INTUITable"
349 RExen without fixed substrings. Similarly, it is assumed that
350 lengths of all the strings are no more than minlen, thus they
351 cannot come from lookahead.
40d049e4
YO
352 (Or minlen should take into account lookahead.)
353 NOTE: Some of this comment is not correct. minlen does now take account
354 of lookahead/behind. Further research is required. -- demerphq
355
356*/
6eb5f6b9 357
2c2d71f5
JH
358/* A failure to find a constant substring means that there is no need to make
359 an expensive call to REx engine, thus we celebrate a failure. Similarly,
360 finding a substring too deep into the string means that less calls to
30944b6d
IZ
361 regtry() should be needed.
362
363 REx compiler's optimizer found 4 possible hints:
364 a) Anchored substring;
365 b) Fixed substring;
366 c) Whether we are anchored (beginning-of-line or \G);
367 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 368 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
369 string which does not contradict any of them.
370 */
2c2d71f5 371
6eb5f6b9
JH
372/* Most of decisions we do here should have been done at compile time.
373 The nodes of the REx which we used for the search should have been
374 deleted from the finite automaton. */
375
cad2e5aa 376char *
288b8c02 377Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
9f61653a 378 char *strend, const U32 flags, re_scream_pos_data *data)
cad2e5aa 379{
97aff369 380 dVAR;
288b8c02 381 struct regexp *const prog = (struct regexp *)SvANY(rx);
b7953727 382 register I32 start_shift = 0;
cad2e5aa 383 /* Should be nonnegative! */
b7953727 384 register I32 end_shift = 0;
2c2d71f5
JH
385 register char *s;
386 register SV *check;
a1933d95 387 char *strbeg;
cad2e5aa 388 char *t;
1de06328 389 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
cad2e5aa 390 I32 ml_anch;
bd61b366
SS
391 register char *other_last = NULL; /* other substr checked before this */
392 char *check_at = NULL; /* check substr found at this pos */
bbe252da 393 const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
f8fc2ecf 394 RXi_GET_DECL(prog,progi);
30944b6d 395#ifdef DEBUGGING
890ce7af 396 const char * const i_strpos = strpos;
30944b6d 397#endif
a3621e74
YO
398 GET_RE_DEBUG_FLAGS_DECL;
399
7918f24d
NC
400 PERL_ARGS_ASSERT_RE_INTUIT_START;
401
288b8c02 402 RX_MATCH_UTF8_set(rx,do_utf8);
cad2e5aa 403
3c8556c3 404 if (RX_UTF8(rx)) {
b8d68ded
JH
405 PL_reg_flags |= RF_utf8;
406 }
ab3bbdeb 407 DEBUG_EXECUTE_r(
efd26800 408 debug_start_match(rx, do_utf8, strpos, strend,
1de06328
YO
409 sv ? "Guessing start of match in sv for"
410 : "Guessing start of match in string for");
2a782b5b 411 );
cad2e5aa 412
c344f387
JH
413 /* CHR_DIST() would be more correct here but it makes things slow. */
414 if (prog->minlen > strend - strpos) {
a3621e74 415 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 416 "String too short... [re_intuit_start]\n"));
cad2e5aa 417 goto fail;
2c2d71f5 418 }
1de06328 419
a1933d95 420 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 421 PL_regeol = strend;
33b8afdf
JH
422 if (do_utf8) {
423 if (!prog->check_utf8 && prog->check_substr)
424 to_utf8_substr(prog);
425 check = prog->check_utf8;
426 } else {
427 if (!prog->check_substr && prog->check_utf8)
428 to_byte_substr(prog);
429 check = prog->check_substr;
430 }
1de06328 431 if (check == &PL_sv_undef) {
a3621e74 432 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1de06328 433 "Non-utf8 string cannot match utf8 check string\n"));
33b8afdf
JH
434 goto fail;
435 }
bbe252da
YO
436 if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
437 ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
438 || ( (prog->extflags & RXf_ANCH_BOL)
7fba1cd6 439 && !multiline ) ); /* Check after \n? */
cad2e5aa 440
7e25d62c 441 if (!ml_anch) {
bbe252da
YO
442 if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
443 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
3f7c398e 444 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
445 && sv && !SvROK(sv)
446 && (strpos != strbeg)) {
a3621e74 447 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
448 goto fail;
449 }
450 if (prog->check_offset_min == prog->check_offset_max &&
bbe252da 451 !(prog->extflags & RXf_CANY_SEEN)) {
2c2d71f5 452 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
453 I32 slen;
454
1aa99e6b 455 s = HOP3c(strpos, prog->check_offset_min, strend);
1de06328 456
653099ff
GS
457 if (SvTAIL(check)) {
458 slen = SvCUR(check); /* >= 1 */
cad2e5aa 459
9041c2e3 460 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 461 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 462 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 463 goto fail_finish;
cad2e5aa
JH
464 }
465 /* Now should match s[0..slen-2] */
466 slen--;
3f7c398e 467 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 468 || (slen > 1
3f7c398e 469 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 470 report_neq:
a3621e74 471 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
472 goto fail_finish;
473 }
cad2e5aa 474 }
3f7c398e 475 else if (*SvPVX_const(check) != *s
653099ff 476 || ((slen = SvCUR(check)) > 1
3f7c398e 477 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 478 goto report_neq;
c315bfe8 479 check_at = s;
2c2d71f5 480 goto success_at_start;
7e25d62c 481 }
cad2e5aa 482 }
2c2d71f5 483 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 484 s = strpos;
2c2d71f5 485 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
1de06328
YO
486 end_shift = prog->check_end_shift;
487
2c2d71f5 488 if (!ml_anch) {
a3b680e6 489 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 490 - (SvTAIL(check) != 0);
a3b680e6 491 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
492
493 if (end_shift < eshift)
494 end_shift = eshift;
495 }
cad2e5aa 496 }
2c2d71f5 497 else { /* Can match at random position */
cad2e5aa
JH
498 ml_anch = 0;
499 s = strpos;
1de06328
YO
500 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
501 end_shift = prog->check_end_shift;
502
503 /* end shift should be non negative here */
cad2e5aa
JH
504 }
505
bcdf7404 506#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 507 if (end_shift < 0)
1de06328 508 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
220fc49f 509 (IV)end_shift, RX_PRECOMP(prog));
2c2d71f5
JH
510#endif
511
2c2d71f5
JH
512 restart:
513 /* Find a possible match in the region s..strend by looking for
514 the "check" substring in the region corrected by start/end_shift. */
1de06328
YO
515
516 {
517 I32 srch_start_shift = start_shift;
518 I32 srch_end_shift = end_shift;
519 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
520 srch_end_shift -= ((strbeg - s) - srch_start_shift);
521 srch_start_shift = strbeg - s;
522 }
6bda09f9 523 DEBUG_OPTIMISE_MORE_r({
1de06328
YO
524 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
525 (IV)prog->check_offset_min,
526 (IV)srch_start_shift,
527 (IV)srch_end_shift,
528 (IV)prog->check_end_shift);
529 });
530
cad2e5aa 531 if (flags & REXEC_SCREAM) {
cad2e5aa 532 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 533 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 534
2c2d71f5
JH
535 if (PL_screamfirst[BmRARE(check)] >= 0
536 || ( BmRARE(check) == '\n'
85c508c3 537 && (BmPREVIOUS(check) == SvCUR(check) - 1)
2c2d71f5 538 && SvTAIL(check) ))
9041c2e3 539 s = screaminstr(sv, check,
1de06328 540 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
cad2e5aa 541 else
2c2d71f5 542 goto fail_finish;
4addbd3b 543 /* we may be pointing at the wrong string */
07bc277f 544 if (s && RXp_MATCH_COPIED(prog))
3f7c398e 545 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
546 if (data)
547 *data->scream_olds = s;
548 }
1de06328
YO
549 else {
550 U8* start_point;
551 U8* end_point;
bbe252da 552 if (prog->extflags & RXf_CANY_SEEN) {
1de06328
YO
553 start_point= (U8*)(s + srch_start_shift);
554 end_point= (U8*)(strend - srch_end_shift);
555 } else {
556 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
557 end_point= HOP3(strend, -srch_end_shift, strbeg);
558 }
6bda09f9 559 DEBUG_OPTIMISE_MORE_r({
56570a2c 560 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
1de06328 561 (int)(end_point - start_point),
fc8cd66c 562 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
1de06328
YO
563 start_point);
564 });
565
566 s = fbm_instr( start_point, end_point,
7fba1cd6 567 check, multiline ? FBMrf_MULTILINE : 0);
1de06328
YO
568 }
569 }
cad2e5aa
JH
570 /* Update the count-of-usability, remove useless subpatterns,
571 unshift s. */
2c2d71f5 572
ab3bbdeb
YO
573 DEBUG_EXECUTE_r({
574 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
575 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
576 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
2c2d71f5 577 (s ? "Found" : "Did not find"),
ab3bbdeb
YO
578 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
579 ? "anchored" : "floating"),
580 quoted,
581 RE_SV_TAIL(check),
582 (s ? " at offset " : "...\n") );
583 });
2c2d71f5
JH
584
585 if (!s)
586 goto fail_finish;
2c2d71f5 587 /* Finish the diagnostic message */
a3621e74 588 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 589
1de06328
YO
590 /* XXX dmq: first branch is for positive lookbehind...
591 Our check string is offset from the beginning of the pattern.
592 So we need to do any stclass tests offset forward from that
593 point. I think. :-(
594 */
595
596
597
598 check_at=s;
599
600
2c2d71f5
JH
601 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
602 Start with the other substr.
603 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 604 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
605 *always* match. Probably should be marked during compile...
606 Probably it is right to do no SCREAM here...
607 */
608
1de06328
YO
609 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
610 : (prog->float_substr && prog->anchored_substr))
611 {
30944b6d 612 /* Take into account the "other" substring. */
2c2d71f5
JH
613 /* XXXX May be hopelessly wrong for UTF... */
614 if (!other_last)
6eb5f6b9 615 other_last = strpos;
33b8afdf 616 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
617 do_other_anchored:
618 {
890ce7af
AL
619 char * const last = HOP3c(s, -start_shift, strbeg);
620 char *last1, *last2;
be8e71aa 621 char * const saved_s = s;
33b8afdf 622 SV* must;
2c2d71f5 623
2c2d71f5
JH
624 t = s - prog->check_offset_max;
625 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 626 && (!do_utf8
0ce71af7 627 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
2c2d71f5 628 && t > strpos)))
6f207bd3 629 NOOP;
2c2d71f5
JH
630 else
631 t = strpos;
1aa99e6b 632 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
633 if (t < other_last) /* These positions already checked */
634 t = other_last;
1aa99e6b 635 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
636 if (last < last1)
637 last1 = last;
1de06328
YO
638 /* XXXX It is not documented what units *_offsets are in.
639 We assume bytes, but this is clearly wrong.
640 Meaning this code needs to be carefully reviewed for errors.
641 dmq.
642 */
643
2c2d71f5 644 /* On end-of-str: see comment below. */
33b8afdf
JH
645 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
646 if (must == &PL_sv_undef) {
647 s = (char*)NULL;
1de06328 648 DEBUG_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
649 }
650 else
651 s = fbm_instr(
652 (unsigned char*)t,
653 HOP3(HOP3(last1, prog->anchored_offset, strend)
654 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
655 must,
7fba1cd6 656 multiline ? FBMrf_MULTILINE : 0
33b8afdf 657 );
ab3bbdeb
YO
658 DEBUG_EXECUTE_r({
659 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
660 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
661 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
2c2d71f5 662 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
663 quoted, RE_SV_TAIL(must));
664 });
665
666
2c2d71f5
JH
667 if (!s) {
668 if (last1 >= last2) {
a3621e74 669 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
670 ", giving up...\n"));
671 goto fail_finish;
672 }
a3621e74 673 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 674 ", trying floating at offset %ld...\n",
be8e71aa 675 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
1aa99e6b
IH
676 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
677 s = HOP3c(last, 1, strend);
2c2d71f5
JH
678 goto restart;
679 }
680 else {
a3621e74 681 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 682 (long)(s - i_strpos)));
1aa99e6b
IH
683 t = HOP3c(s, -prog->anchored_offset, strbeg);
684 other_last = HOP3c(s, 1, strend);
be8e71aa 685 s = saved_s;
2c2d71f5
JH
686 if (t == strpos)
687 goto try_at_start;
2c2d71f5
JH
688 goto try_at_offset;
689 }
30944b6d 690 }
2c2d71f5
JH
691 }
692 else { /* Take into account the floating substring. */
33b8afdf 693 char *last, *last1;
be8e71aa 694 char * const saved_s = s;
33b8afdf
JH
695 SV* must;
696
697 t = HOP3c(s, -start_shift, strbeg);
698 last1 = last =
699 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
700 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
701 last = HOP3c(t, prog->float_max_offset, strend);
702 s = HOP3c(t, prog->float_min_offset, strend);
703 if (s < other_last)
704 s = other_last;
2c2d71f5 705 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
706 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
707 /* fbm_instr() takes into account exact value of end-of-str
708 if the check is SvTAIL(ed). Since false positives are OK,
709 and end-of-str is not later than strend we are OK. */
710 if (must == &PL_sv_undef) {
711 s = (char*)NULL;
1de06328 712 DEBUG_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
713 }
714 else
2c2d71f5 715 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
716 (unsigned char*)last + SvCUR(must)
717 - (SvTAIL(must)!=0),
7fba1cd6 718 must, multiline ? FBMrf_MULTILINE : 0);
ab3bbdeb
YO
719 DEBUG_EXECUTE_r({
720 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
721 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
722 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
33b8afdf 723 (s ? "Found" : "Contradicts"),
ab3bbdeb
YO
724 quoted, RE_SV_TAIL(must));
725 });
33b8afdf
JH
726 if (!s) {
727 if (last1 == last) {
a3621e74 728 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
729 ", giving up...\n"));
730 goto fail_finish;
2c2d71f5 731 }
a3621e74 732 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf 733 ", trying anchored starting at offset %ld...\n",
be8e71aa 734 (long)(saved_s + 1 - i_strpos)));
33b8afdf
JH
735 other_last = last;
736 s = HOP3c(t, 1, strend);
737 goto restart;
738 }
739 else {
a3621e74 740 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
741 (long)(s - i_strpos)));
742 other_last = s; /* Fix this later. --Hugo */
be8e71aa 743 s = saved_s;
33b8afdf
JH
744 if (t == strpos)
745 goto try_at_start;
746 goto try_at_offset;
747 }
2c2d71f5 748 }
cad2e5aa 749 }
2c2d71f5 750
1de06328 751
9ef43ace 752 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
1de06328 753
6bda09f9 754 DEBUG_OPTIMISE_MORE_r(
1de06328
YO
755 PerlIO_printf(Perl_debug_log,
756 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
757 (IV)prog->check_offset_min,
758 (IV)prog->check_offset_max,
759 (IV)(s-strpos),
760 (IV)(t-strpos),
761 (IV)(t-s),
762 (IV)(strend-strpos)
763 )
764 );
765
2c2d71f5 766 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 767 && (!do_utf8
9ef43ace 768 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
1de06328
YO
769 && t > strpos)))
770 {
2c2d71f5
JH
771 /* Fixed substring is found far enough so that the match
772 cannot start at strpos. */
773 try_at_offset:
cad2e5aa 774 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
775 /* Eventually fbm_*() should handle this, but often
776 anchored_offset is not 0, so this check will not be wasted. */
777 /* XXXX In the code below we prefer to look for "^" even in
778 presence of anchored substrings. And we search even
779 beyond the found float position. These pessimizations
780 are historical artefacts only. */
781 find_anchor:
2c2d71f5 782 while (t < strend - prog->minlen) {
cad2e5aa 783 if (*t == '\n') {
4ee3650e 784 if (t < check_at - prog->check_offset_min) {
33b8afdf 785 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
786 /* Since we moved from the found position,
787 we definitely contradict the found anchored
30944b6d
IZ
788 substr. Due to the above check we do not
789 contradict "check" substr.
790 Thus we can arrive here only if check substr
791 is float. Redo checking for "other"=="fixed".
792 */
9041c2e3 793 strpos = t + 1;
a3621e74 794 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 795 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
796 goto do_other_anchored;
797 }
4ee3650e
GS
798 /* We don't contradict the found floating substring. */
799 /* XXXX Why not check for STCLASS? */
cad2e5aa 800 s = t + 1;
a3621e74 801 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 802 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
803 goto set_useful;
804 }
4ee3650e
GS
805 /* Position contradicts check-string */
806 /* XXXX probably better to look for check-string
807 than for "\n", so one should lower the limit for t? */
a3621e74 808 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 809 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 810 other_last = strpos = s = t + 1;
cad2e5aa
JH
811 goto restart;
812 }
813 t++;
814 }
a3621e74 815 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 816 PL_colors[0], PL_colors[1]));
2c2d71f5 817 goto fail_finish;
cad2e5aa 818 }
f5952150 819 else {
a3621e74 820 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 821 PL_colors[0], PL_colors[1]));
f5952150 822 }
cad2e5aa
JH
823 s = t;
824 set_useful:
33b8afdf 825 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
826 }
827 else {
f5952150 828 /* The found string does not prohibit matching at strpos,
2c2d71f5 829 - no optimization of calling REx engine can be performed,
f5952150
GS
830 unless it was an MBOL and we are not after MBOL,
831 or a future STCLASS check will fail this. */
2c2d71f5
JH
832 try_at_start:
833 /* Even in this situation we may use MBOL flag if strpos is offset
834 wrt the start of the string. */
05b4157f 835 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 836 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d 837 /* May be due to an implicit anchor of m{.*foo} */
bbe252da 838 && !(prog->intflags & PREGf_IMPLICIT))
d506a20d 839 {
cad2e5aa
JH
840 t = strpos;
841 goto find_anchor;
842 }
a3621e74 843 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 844 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
70685ca0 845 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 846 );
2c2d71f5 847 success_at_start:
bbe252da 848 if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
849 && (do_utf8 ? (
850 prog->check_utf8 /* Could be deleted already */
851 && --BmUSEFUL(prog->check_utf8) < 0
852 && (prog->check_utf8 == prog->float_utf8)
853 ) : (
854 prog->check_substr /* Could be deleted already */
855 && --BmUSEFUL(prog->check_substr) < 0
856 && (prog->check_substr == prog->float_substr)
857 )))
66e933ab 858 {
cad2e5aa 859 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 860 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
861 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
862 if (do_utf8 ? prog->check_substr : prog->check_utf8)
863 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
864 prog->check_substr = prog->check_utf8 = NULL; /* disable */
865 prog->float_substr = prog->float_utf8 = NULL; /* clear */
866 check = NULL; /* abort */
cad2e5aa 867 s = strpos;
3cf5c195
IZ
868 /* XXXX This is a remnant of the old implementation. It
869 looks wasteful, since now INTUIT can use many
6eb5f6b9 870 other heuristics. */
bbe252da 871 prog->extflags &= ~RXf_USE_INTUIT;
cad2e5aa
JH
872 }
873 else
874 s = strpos;
875 }
876
6eb5f6b9
JH
877 /* Last resort... */
878 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1de06328
YO
879 /* trie stclasses are too expensive to use here, we are better off to
880 leave it to regmatch itself */
f8fc2ecf 881 if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
6eb5f6b9
JH
882 /* minlen == 0 is possible if regstclass is \b or \B,
883 and the fixed substr is ''$.
884 Since minlen is already taken into account, s+1 is before strend;
885 accidentally, minlen >= 1 guaranties no false positives at s + 1
886 even for \b or \B. But (minlen? 1 : 0) below assumes that
887 regstclass does not come from lookahead... */
888 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
889 This leaves EXACTF only, which is dealt with in find_byclass(). */
f8fc2ecf
YO
890 const U8* const str = (U8*)STRING(progi->regstclass);
891 const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
892 ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
66e933ab 893 : 1);
1de06328
YO
894 char * endpos;
895 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
896 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
897 else if (prog->float_substr || prog->float_utf8)
898 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
899 else
900 endpos= strend;
901
70685ca0
JH
902 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
903 (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1de06328 904
6eb5f6b9 905 t = s;
f8fc2ecf 906 s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
6eb5f6b9
JH
907 if (!s) {
908#ifdef DEBUGGING
cbbf8932 909 const char *what = NULL;
6eb5f6b9
JH
910#endif
911 if (endpos == strend) {
a3621e74 912 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
913 "Could not match STCLASS...\n") );
914 goto fail;
915 }
a3621e74 916 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 917 "This position contradicts STCLASS...\n") );
bbe252da 918 if ((prog->extflags & RXf_ANCH) && !ml_anch)
653099ff 919 goto fail;
6eb5f6b9 920 /* Contradict one of substrings */
33b8afdf
JH
921 if (prog->anchored_substr || prog->anchored_utf8) {
922 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 923 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 924 hop_and_restart:
1aa99e6b 925 s = HOP3c(t, 1, strend);
66e933ab
GS
926 if (s + start_shift + end_shift > strend) {
927 /* XXXX Should be taken into account earlier? */
a3621e74 928 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
929 "Could not match STCLASS...\n") );
930 goto fail;
931 }
5e39e1e5
HS
932 if (!check)
933 goto giveup;
a3621e74 934 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 935 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
936 what, (long)(s + start_shift - i_strpos)) );
937 goto restart;
938 }
66e933ab 939 /* Have both, check_string is floating */
6eb5f6b9
JH
940 if (t + start_shift >= check_at) /* Contradicts floating=check */
941 goto retry_floating_check;
942 /* Recheck anchored substring, but not floating... */
9041c2e3 943 s = check_at;
5e39e1e5
HS
944 if (!check)
945 goto giveup;
a3621e74 946 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 947 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
948 (long)(other_last - i_strpos)) );
949 goto do_other_anchored;
950 }
60e71179
GS
951 /* Another way we could have checked stclass at the
952 current position only: */
953 if (ml_anch) {
954 s = t = t + 1;
5e39e1e5
HS
955 if (!check)
956 goto giveup;
a3621e74 957 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 958 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 959 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 960 goto try_at_offset;
66e933ab 961 }
33b8afdf 962 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 963 goto fail;
6eb5f6b9
JH
964 /* Check is floating subtring. */
965 retry_floating_check:
966 t = check_at - start_shift;
a3621e74 967 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
968 goto hop_and_restart;
969 }
b7953727 970 if (t != s) {
a3621e74 971 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 972 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
973 (long)(t - i_strpos), (long)(s - i_strpos))
974 );
975 }
976 else {
a3621e74 977 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
978 "Does not contradict STCLASS...\n");
979 );
980 }
6eb5f6b9 981 }
5e39e1e5 982 giveup:
a3621e74 983 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
984 PL_colors[4], (check ? "Guessed" : "Giving up"),
985 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 986 return s;
2c2d71f5
JH
987
988 fail_finish: /* Substring not found */
33b8afdf
JH
989 if (prog->check_substr || prog->check_utf8) /* could be removed already */
990 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 991 fail:
a3621e74 992 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 993 PL_colors[4], PL_colors[5]));
bd61b366 994 return NULL;
cad2e5aa 995}
9661b544 996
a0a388a1
YO
997#define DECL_TRIE_TYPE(scan) \
998 const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
999 trie_type = (scan->flags != EXACT) \
1000 ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1001 : (do_utf8 ? trie_utf8 : trie_plain)
3b0527fe 1002
55eed653
NC
1003#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
1004uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
4cadc6a9
YO
1005 switch (trie_type) { \
1006 case trie_utf8_fold: \
1007 if ( foldlen>0 ) { \
1008 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1009 foldlen -= len; \
1010 uscan += len; \
1011 len=0; \
1012 } else { \
1013 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1014 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
1015 foldlen -= UNISKIP( uvc ); \
1016 uscan = foldbuf + UNISKIP( uvc ); \
1017 } \
1018 break; \
a0a388a1
YO
1019 case trie_latin_utf8_fold: \
1020 if ( foldlen>0 ) { \
1021 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1022 foldlen -= len; \
1023 uscan += len; \
1024 len=0; \
1025 } else { \
1026 len = 1; \
1027 uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen ); \
1028 foldlen -= UNISKIP( uvc ); \
1029 uscan = foldbuf + UNISKIP( uvc ); \
1030 } \
1031 break; \
4cadc6a9
YO
1032 case trie_utf8: \
1033 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1034 break; \
1035 case trie_plain: \
1036 uvc = (UV)*uc; \
1037 len = 1; \
1038 } \
1039 \
1040 if (uvc < 256) { \
1041 charid = trie->charmap[ uvc ]; \
1042 } \
1043 else { \
1044 charid = 0; \
55eed653
NC
1045 if (widecharmap) { \
1046 SV** const svpp = hv_fetch(widecharmap, \
4cadc6a9
YO
1047 (char*)&uvc, sizeof(UV), 0); \
1048 if (svpp) \
1049 charid = (U16)SvIV(*svpp); \
1050 } \
1051 } \
1052} STMT_END
1053
a0a388a1
YO
1054#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1055{ \
1056 char *my_strend= (char *)strend; \
4cadc6a9
YO
1057 if ( (CoNd) \
1058 && (ln == len || \
a0a388a1 1059 !ibcmp_utf8(s, &my_strend, 0, do_utf8, \
4cadc6a9 1060 m, NULL, ln, (bool)UTF)) \
a0a388a1 1061 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1062 goto got_it; \
1063 else { \
1064 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1065 uvchr_to_utf8(tmpbuf, c); \
1066 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1067 if ( f != c \
1068 && (f == c1 || f == c2) \
a0a388a1
YO
1069 && (ln == len || \
1070 !ibcmp_utf8(s, &my_strend, 0, do_utf8,\
1071 m, NULL, ln, (bool)UTF)) \
1072 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1073 goto got_it; \
1074 } \
a0a388a1
YO
1075} \
1076s += len
4cadc6a9
YO
1077
1078#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1079STMT_START { \
1080 while (s <= e) { \
1081 if ( (CoNd) \
1082 && (ln == 1 || !(OP(c) == EXACTF \
1083 ? ibcmp(s, m, ln) \
1084 : ibcmp_locale(s, m, ln))) \
24b23f37 1085 && (!reginfo || regtry(reginfo, &s)) ) \
4cadc6a9
YO
1086 goto got_it; \
1087 s++; \
1088 } \
1089} STMT_END
1090
1091#define REXEC_FBC_UTF8_SCAN(CoDe) \
1092STMT_START { \
1093 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1094 CoDe \
1095 s += uskip; \
1096 } \
1097} STMT_END
1098
1099#define REXEC_FBC_SCAN(CoDe) \
1100STMT_START { \
1101 while (s < strend) { \
1102 CoDe \
1103 s++; \
1104 } \
1105} STMT_END
1106
1107#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1108REXEC_FBC_UTF8_SCAN( \
1109 if (CoNd) { \
24b23f37 1110 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1111 goto got_it; \
1112 else \
1113 tmp = doevery; \
1114 } \
1115 else \
1116 tmp = 1; \
1117)
1118
1119#define REXEC_FBC_CLASS_SCAN(CoNd) \
1120REXEC_FBC_SCAN( \
1121 if (CoNd) { \
24b23f37 1122 if (tmp && (!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1123 goto got_it; \
1124 else \
1125 tmp = doevery; \
1126 } \
1127 else \
1128 tmp = 1; \
1129)
1130
1131#define REXEC_FBC_TRYIT \
24b23f37 1132if ((!reginfo || regtry(reginfo, &s))) \
4cadc6a9
YO
1133 goto got_it
1134
e1d1eefb
YO
1135#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
1136 if (do_utf8) { \
1137 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1138 } \
1139 else { \
1140 REXEC_FBC_CLASS_SCAN(CoNd); \
1141 } \
1142 break
1143
4cadc6a9
YO
1144#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1145 if (do_utf8) { \
1146 UtFpReLoAd; \
1147 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1148 } \
1149 else { \
1150 REXEC_FBC_CLASS_SCAN(CoNd); \
1151 } \
1152 break
1153
1154#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1155 PL_reg_flags |= RF_tainted; \
1156 if (do_utf8) { \
1157 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1158 } \
1159 else { \
1160 REXEC_FBC_CLASS_SCAN(CoNd); \
1161 } \
1162 break
1163
786e8c11
YO
1164#define DUMP_EXEC_POS(li,s,doutf8) \
1165 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1166
1167/* We know what class REx starts with. Try to find this position... */
1168/* if reginfo is NULL, its a dryrun */
1169/* annoyingly all the vars in this routine have different names from their counterparts
1170 in regmatch. /grrr */
1171
3c3eec57 1172STATIC char *
07be1b83 1173S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
24b23f37 1174 const char *strend, regmatch_info *reginfo)
a687059c 1175{
27da23d5 1176 dVAR;
bbe252da 1177 const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
6eb5f6b9 1178 char *m;
d8093b23 1179 STRLEN ln;
5dab1207 1180 STRLEN lnc;
078c425b 1181 register STRLEN uskip;
d8093b23
G
1182 unsigned int c1;
1183 unsigned int c2;
6eb5f6b9
JH
1184 char *e;
1185 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 1186 register const bool do_utf8 = PL_reg_match_utf8;
f8fc2ecf 1187 RXi_GET_DECL(prog,progi);
7918f24d
NC
1188
1189 PERL_ARGS_ASSERT_FIND_BYCLASS;
f8fc2ecf 1190
6eb5f6b9
JH
1191 /* We know what class it must start with. */
1192 switch (OP(c)) {
6eb5f6b9 1193 case ANYOF:
388cc4de 1194 if (do_utf8) {
4cadc6a9 1195 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
388cc4de 1196 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a 1197 reginclass(prog, c, (U8*)s, 0, do_utf8) :
4cadc6a9 1198 REGINCLASS(prog, c, (U8*)s));
388cc4de
HS
1199 }
1200 else {
1201 while (s < strend) {
1202 STRLEN skip = 1;
1203
32fc9b6a 1204 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
1205 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1206 /* The assignment of 2 is intentional:
1207 * for the folded sharp s, the skip is 2. */
1208 (skip = SHARP_S_SKIP))) {
24b23f37 1209 if (tmp && (!reginfo || regtry(reginfo, &s)))
388cc4de
HS
1210 goto got_it;
1211 else
1212 tmp = doevery;
1213 }
1214 else
1215 tmp = 1;
1216 s += skip;
1217 }
a0d0e21e 1218 }
6eb5f6b9 1219 break;
f33976b4 1220 case CANY:
4cadc6a9 1221 REXEC_FBC_SCAN(
24b23f37 1222 if (tmp && (!reginfo || regtry(reginfo, &s)))
f33976b4
DB
1223 goto got_it;
1224 else
1225 tmp = doevery;
4cadc6a9 1226 );
f33976b4 1227 break;
6eb5f6b9 1228 case EXACTF:
5dab1207
NIS
1229 m = STRING(c);
1230 ln = STR_LEN(c); /* length to match in octets/bytes */
1231 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1232 if (UTF) {
a2a2844f 1233 STRLEN ulen1, ulen2;
5dab1207 1234 U8 *sm = (U8 *) m;
89ebb4a3
JH
1235 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1236 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
97dc7d3e
RGS
1237 /* used by commented-out code below */
1238 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
a0a388a1
YO
1239
1240 /* XXX: Since the node will be case folded at compile
1241 time this logic is a little odd, although im not
1242 sure that its actually wrong. --dmq */
1243
1244 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1245 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1246
1247 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1248 codepoint of the first character in the converted
1249 form, yet originally we did the extra step.
1250 No tests fail by commenting this code out however
1251 so Ive left it out. -- dmq.
1252
89ebb4a3 1253 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1254 0, uniflags);
89ebb4a3 1255 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1256 0, uniflags);
a0a388a1
YO
1257 */
1258
5dab1207
NIS
1259 lnc = 0;
1260 while (sm < ((U8 *) m + ln)) {
1261 lnc++;
1262 sm += UTF8SKIP(sm);
1263 }
1aa99e6b
IH
1264 }
1265 else {
1266 c1 = *(U8*)m;
1267 c2 = PL_fold[c1];
1268 }
6eb5f6b9
JH
1269 goto do_exactf;
1270 case EXACTFL:
5dab1207
NIS
1271 m = STRING(c);
1272 ln = STR_LEN(c);
1273 lnc = (I32) ln;
d8093b23 1274 c1 = *(U8*)m;
6eb5f6b9
JH
1275 c2 = PL_fold_locale[c1];
1276 do_exactf:
db12adc6 1277 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1278
3b0527fe 1279 if (!reginfo && e < s)
6eb5f6b9 1280 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1281
60a8b682
JH
1282 /* The idea in the EXACTF* cases is to first find the
1283 * first character of the EXACTF* node and then, if
1284 * necessary, case-insensitively compare the full
1285 * text of the node. The c1 and c2 are the first
1286 * characters (though in Unicode it gets a bit
1287 * more complicated because there are more cases
7f16dd3d
JH
1288 * than just upper and lower: one needs to use
1289 * the so-called folding case for case-insensitive
1290 * matching (called "loose matching" in Unicode).
1291 * ibcmp_utf8() will do just that. */
60a8b682 1292
a0a388a1 1293 if (do_utf8 || UTF) {
575cac57 1294 UV c, f;
89ebb4a3 1295 U8 tmpbuf [UTF8_MAXBYTES+1];
a0a388a1
YO
1296 STRLEN len = 1;
1297 STRLEN foldlen;
4ad0818d 1298 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1299 if (c1 == c2) {
5dab1207
NIS
1300 /* Upper and lower of 1st char are equal -
1301 * probably not a "letter". */
1aa99e6b 1302 while (s <= e) {
a0a388a1
YO
1303 if (do_utf8) {
1304 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1305 uniflags);
a0a388a1
YO
1306 } else {
1307 c = *((U8*)s);
1308 }
4cadc6a9 1309 REXEC_FBC_EXACTISH_CHECK(c == c1);
1aa99e6b 1310 }
09091399
JH
1311 }
1312 else {
1aa99e6b 1313 while (s <= e) {
a0a388a1
YO
1314 if (do_utf8) {
1315 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1316 uniflags);
a0a388a1
YO
1317 } else {
1318 c = *((U8*)s);
1319 }
80aecb99 1320
60a8b682 1321 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1322 * Note that not all the possible combinations
1323 * are handled here: some of them are handled
1324 * by the standard folding rules, and some of
1325 * them (the character class or ANYOF cases)
1326 * are handled during compiletime in
1327 * regexec.c:S_regclass(). */
880bd946
JH
1328 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1329 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1330 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99 1331
4cadc6a9 1332 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1aa99e6b 1333 }
09091399 1334 }
1aa99e6b
IH
1335 }
1336 else {
a0a388a1 1337 /* Neither pattern nor string are UTF8 */
1aa99e6b 1338 if (c1 == c2)
4cadc6a9 1339 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1aa99e6b 1340 else
4cadc6a9 1341 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
b3c9acc1
IZ
1342 }
1343 break;
bbce6d69 1344 case BOUNDL:
3280af22 1345 PL_reg_flags |= RF_tainted;
bbce6d69 1346 /* FALL THROUGH */
a0d0e21e 1347 case BOUND:
ffc61ed2 1348 if (do_utf8) {
12d33761 1349 if (s == PL_bostr)
ffc61ed2
JH
1350 tmp = '\n';
1351 else {
6136c704 1352 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1353 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1354 }
1355 tmp = ((OP(c) == BOUND ?
9041c2e3 1356 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1357 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1358 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1359 if (tmp == !(OP(c) == BOUND ?
bb7a0f54 1360 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1361 isALNUM_LC_utf8((U8*)s)))
1362 {
1363 tmp = !tmp;
4cadc6a9 1364 REXEC_FBC_TRYIT;
a687059c 1365 }
4cadc6a9 1366 );
a0d0e21e 1367 }
667bb95a 1368 else {
12d33761 1369 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2 1370 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1371 REXEC_FBC_SCAN(
ffc61ed2
JH
1372 if (tmp ==
1373 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1374 tmp = !tmp;
4cadc6a9 1375 REXEC_FBC_TRYIT;
a0ed51b3 1376 }
4cadc6a9 1377 );
a0ed51b3 1378 }
24b23f37 1379 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1380 goto got_it;
1381 break;
bbce6d69 1382 case NBOUNDL:
3280af22 1383 PL_reg_flags |= RF_tainted;
bbce6d69 1384 /* FALL THROUGH */
a0d0e21e 1385 case NBOUND:
ffc61ed2 1386 if (do_utf8) {
12d33761 1387 if (s == PL_bostr)
ffc61ed2
JH
1388 tmp = '\n';
1389 else {
6136c704 1390 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1391 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1392 }
1393 tmp = ((OP(c) == NBOUND ?
9041c2e3 1394 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1395 LOAD_UTF8_CHARCLASS_ALNUM();
4cadc6a9 1396 REXEC_FBC_UTF8_SCAN(
ffc61ed2 1397 if (tmp == !(OP(c) == NBOUND ?
bb7a0f54 1398 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1399 isALNUM_LC_utf8((U8*)s)))
1400 tmp = !tmp;
4cadc6a9
YO
1401 else REXEC_FBC_TRYIT;
1402 );
a0d0e21e 1403 }
667bb95a 1404 else {
12d33761 1405 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1406 tmp = ((OP(c) == NBOUND ?
1407 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
4cadc6a9 1408 REXEC_FBC_SCAN(
ffc61ed2
JH
1409 if (tmp ==
1410 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1411 tmp = !tmp;
4cadc6a9
YO
1412 else REXEC_FBC_TRYIT;
1413 );
a0ed51b3 1414 }
24b23f37 1415 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
a0ed51b3
LW
1416 goto got_it;
1417 break;
a0d0e21e 1418 case ALNUM:
4cadc6a9
YO
1419 REXEC_FBC_CSCAN_PRELOAD(
1420 LOAD_UTF8_CHARCLASS_ALNUM(),
1421 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1422 isALNUM(*s)
1423 );
bbce6d69 1424 case ALNUML:
4cadc6a9
YO
1425 REXEC_FBC_CSCAN_TAINT(
1426 isALNUM_LC_utf8((U8*)s),
1427 isALNUM_LC(*s)
1428 );
a0d0e21e 1429 case NALNUM:
4cadc6a9
YO
1430 REXEC_FBC_CSCAN_PRELOAD(
1431 LOAD_UTF8_CHARCLASS_ALNUM(),
1432 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1433 !isALNUM(*s)
1434 );
bbce6d69 1435 case NALNUML:
4cadc6a9
YO
1436 REXEC_FBC_CSCAN_TAINT(
1437 !isALNUM_LC_utf8((U8*)s),
1438 !isALNUM_LC(*s)
1439 );
a0d0e21e 1440 case SPACE:
4cadc6a9
YO
1441 REXEC_FBC_CSCAN_PRELOAD(
1442 LOAD_UTF8_CHARCLASS_SPACE(),
1443 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1444 isSPACE(*s)
1445 );
bbce6d69 1446 case SPACEL:
4cadc6a9
YO
1447 REXEC_FBC_CSCAN_TAINT(
1448 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1449 isSPACE_LC(*s)
1450 );
a0d0e21e 1451 case NSPACE:
4cadc6a9
YO
1452 REXEC_FBC_CSCAN_PRELOAD(
1453 LOAD_UTF8_CHARCLASS_SPACE(),
1454 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1455 !isSPACE(*s)
1456 );
bbce6d69 1457 case NSPACEL:
4cadc6a9
YO
1458 REXEC_FBC_CSCAN_TAINT(
1459 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1460 !isSPACE_LC(*s)
1461 );
a0d0e21e 1462 case DIGIT:
4cadc6a9
YO
1463 REXEC_FBC_CSCAN_PRELOAD(
1464 LOAD_UTF8_CHARCLASS_DIGIT(),
1465 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1466 isDIGIT(*s)
1467 );
b8c5462f 1468 case DIGITL:
4cadc6a9
YO
1469 REXEC_FBC_CSCAN_TAINT(
1470 isDIGIT_LC_utf8((U8*)s),
1471 isDIGIT_LC(*s)
1472 );
a0d0e21e 1473 case NDIGIT:
4cadc6a9
YO
1474 REXEC_FBC_CSCAN_PRELOAD(
1475 LOAD_UTF8_CHARCLASS_DIGIT(),
1476 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1477 !isDIGIT(*s)
1478 );
b8c5462f 1479 case NDIGITL:
4cadc6a9
YO
1480 REXEC_FBC_CSCAN_TAINT(
1481 !isDIGIT_LC_utf8((U8*)s),
1482 !isDIGIT_LC(*s)
1483 );
e1d1eefb
YO
1484 case LNBREAK:
1485 REXEC_FBC_CSCAN(
1486 is_LNBREAK_utf8(s),
1487 is_LNBREAK_latin1(s)
1488 );
1489 case VERTWS:
1490 REXEC_FBC_CSCAN(
1491 is_VERTWS_utf8(s),
1492 is_VERTWS_latin1(s)
1493 );
1494 case NVERTWS:
1495 REXEC_FBC_CSCAN(
1496 !is_VERTWS_utf8(s),
1497 !is_VERTWS_latin1(s)
1498 );
1499 case HORIZWS:
1500 REXEC_FBC_CSCAN(
1501 is_HORIZWS_utf8(s),
1502 is_HORIZWS_latin1(s)
1503 );
1504 case NHORIZWS:
1505 REXEC_FBC_CSCAN(
1506 !is_HORIZWS_utf8(s),
1507 !is_HORIZWS_latin1(s)
1508 );
1de06328
YO
1509 case AHOCORASICKC:
1510 case AHOCORASICK:
07be1b83 1511 {
a0a388a1 1512 DECL_TRIE_TYPE(c);
07be1b83
YO
1513 /* what trie are we using right now */
1514 reg_ac_data *aho
f8fc2ecf 1515 = (reg_ac_data*)progi->data->data[ ARG( c ) ];
3251b653
NC
1516 reg_trie_data *trie
1517 = (reg_trie_data*)progi->data->data[ aho->trie ];
55eed653 1518 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
07be1b83
YO
1519
1520 const char *last_start = strend - trie->minlen;
6148ee25 1521#ifdef DEBUGGING
07be1b83 1522 const char *real_start = s;
6148ee25 1523#endif
07be1b83 1524 STRLEN maxlen = trie->maxlen;
be8e71aa
YO
1525 SV *sv_points;
1526 U8 **points; /* map of where we were in the input string
786e8c11 1527 when reading a given char. For ASCII this
be8e71aa 1528 is unnecessary overhead as the relationship
38a44b82
NC
1529 is always 1:1, but for Unicode, especially
1530 case folded Unicode this is not true. */
f9e705e8 1531 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
786e8c11
YO
1532 U8 *bitmap=NULL;
1533
07be1b83
YO
1534
1535 GET_RE_DEBUG_FLAGS_DECL;
1536
be8e71aa
YO
1537 /* We can't just allocate points here. We need to wrap it in
1538 * an SV so it gets freed properly if there is a croak while
1539 * running the match */
1540 ENTER;
1541 SAVETMPS;
1542 sv_points=newSV(maxlen * sizeof(U8 *));
1543 SvCUR_set(sv_points,
1544 maxlen * sizeof(U8 *));
1545 SvPOK_on(sv_points);
1546 sv_2mortal(sv_points);
1547 points=(U8**)SvPV_nolen(sv_points );
1de06328
YO
1548 if ( trie_type != trie_utf8_fold
1549 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1550 {
786e8c11
YO
1551 if (trie->bitmap)
1552 bitmap=(U8*)trie->bitmap;
1553 else
1554 bitmap=(U8*)ANYOF_BITMAP(c);
07be1b83 1555 }
786e8c11
YO
1556 /* this is the Aho-Corasick algorithm modified a touch
1557 to include special handling for long "unknown char"
1558 sequences. The basic idea being that we use AC as long
1559 as we are dealing with a possible matching char, when
1560 we encounter an unknown char (and we have not encountered
1561 an accepting state) we scan forward until we find a legal
1562 starting char.
1563 AC matching is basically that of trie matching, except
1564 that when we encounter a failing transition, we fall back
1565 to the current states "fail state", and try the current char
1566 again, a process we repeat until we reach the root state,
1567 state 1, or a legal transition. If we fail on the root state
1568 then we can either terminate if we have reached an accepting
1569 state previously, or restart the entire process from the beginning
1570 if we have not.
1571
1572 */
07be1b83
YO
1573 while (s <= last_start) {
1574 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1575 U8 *uc = (U8*)s;
1576 U16 charid = 0;
1577 U32 base = 1;
1578 U32 state = 1;
1579 UV uvc = 0;
1580 STRLEN len = 0;
1581 STRLEN foldlen = 0;
1582 U8 *uscan = (U8*)NULL;
1583 U8 *leftmost = NULL;
786e8c11
YO
1584#ifdef DEBUGGING
1585 U32 accepted_word= 0;
1586#endif
07be1b83
YO
1587 U32 pointpos = 0;
1588
1589 while ( state && uc <= (U8*)strend ) {
1590 int failed=0;
786e8c11
YO
1591 U32 word = aho->states[ state ].wordnum;
1592
1de06328
YO
1593 if( state==1 ) {
1594 if ( bitmap ) {
1595 DEBUG_TRIE_EXECUTE_r(
1596 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1597 dump_exec_pos( (char *)uc, c, strend, real_start,
1598 (char *)uc, do_utf8 );
1599 PerlIO_printf( Perl_debug_log,
1600 " Scanning for legal start char...\n");
1601 }
1602 );
1603 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1604 uc++;
786e8c11 1605 }
1de06328 1606 s= (char *)uc;
786e8c11 1607 }
786e8c11
YO
1608 if (uc >(U8*)last_start) break;
1609 }
1610
1611 if ( word ) {
1612 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1613 if (!leftmost || lpos < leftmost) {
1614 DEBUG_r(accepted_word=word);
07be1b83 1615 leftmost= lpos;
786e8c11 1616 }
07be1b83 1617 if (base==0) break;
786e8c11 1618
07be1b83
YO
1619 }
1620 points[pointpos++ % maxlen]= uc;
55eed653
NC
1621 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1622 uscan, len, uvc, charid, foldlen,
1623 foldbuf, uniflags);
786e8c11
YO
1624 DEBUG_TRIE_EXECUTE_r({
1625 dump_exec_pos( (char *)uc, c, strend, real_start,
1626 s, do_utf8 );
07be1b83 1627 PerlIO_printf(Perl_debug_log,
786e8c11
YO
1628 " Charid:%3u CP:%4"UVxf" ",
1629 charid, uvc);
1630 });
07be1b83
YO
1631
1632 do {
6148ee25 1633#ifdef DEBUGGING
786e8c11 1634 word = aho->states[ state ].wordnum;
6148ee25 1635#endif
07be1b83
YO
1636 base = aho->states[ state ].trans.base;
1637
786e8c11
YO
1638 DEBUG_TRIE_EXECUTE_r({
1639 if (failed)
1640 dump_exec_pos( (char *)uc, c, strend, real_start,
1641 s, do_utf8 );
07be1b83 1642 PerlIO_printf( Perl_debug_log,
786e8c11
YO
1643 "%sState: %4"UVxf", word=%"UVxf,
1644 failed ? " Fail transition to " : "",
1645 (UV)state, (UV)word);
1646 });
07be1b83
YO
1647 if ( base ) {
1648 U32 tmp;
1649 if (charid &&
1650 (base + charid > trie->uniquecharcount )
1651 && (base + charid - 1 - trie->uniquecharcount
1652 < trie->lasttrans)
1653 && trie->trans[base + charid - 1 -
1654 trie->uniquecharcount].check == state
1655 && (tmp=trie->trans[base + charid - 1 -
1656 trie->uniquecharcount ].next))
1657 {
786e8c11
YO
1658 DEBUG_TRIE_EXECUTE_r(
1659 PerlIO_printf( Perl_debug_log," - legal\n"));
07be1b83
YO
1660 state = tmp;
1661 break;
1662 }
1663 else {
786e8c11
YO
1664 DEBUG_TRIE_EXECUTE_r(
1665 PerlIO_printf( Perl_debug_log," - fail\n"));
1666 failed = 1;
1667 state = aho->fail[state];
07be1b83
YO
1668 }
1669 }
1670 else {
1671 /* we must be accepting here */
786e8c11
YO
1672 DEBUG_TRIE_EXECUTE_r(
1673 PerlIO_printf( Perl_debug_log," - accepting\n"));
1674 failed = 1;
07be1b83
YO
1675 break;
1676 }
1677 } while(state);
786e8c11 1678 uc += len;
07be1b83
YO
1679 if (failed) {
1680 if (leftmost)
1681 break;
786e8c11 1682 if (!state) state = 1;
07be1b83
YO
1683 }
1684 }
1685 if ( aho->states[ state ].wordnum ) {
1686 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
786e8c11
YO
1687 if (!leftmost || lpos < leftmost) {
1688 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
07be1b83 1689 leftmost = lpos;
786e8c11 1690 }
07be1b83 1691 }
07be1b83
YO
1692 if (leftmost) {
1693 s = (char*)leftmost;
786e8c11
YO
1694 DEBUG_TRIE_EXECUTE_r({
1695 PerlIO_printf(
70685ca0
JH
1696 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1697 (UV)accepted_word, (IV)(s - real_start)
786e8c11
YO
1698 );
1699 });
24b23f37 1700 if (!reginfo || regtry(reginfo, &s)) {
be8e71aa
YO
1701 FREETMPS;
1702 LEAVE;
07be1b83 1703 goto got_it;
be8e71aa 1704 }
07be1b83 1705 s = HOPc(s,1);
786e8c11
YO
1706 DEBUG_TRIE_EXECUTE_r({
1707 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1708 });
07be1b83 1709 } else {
786e8c11
YO
1710 DEBUG_TRIE_EXECUTE_r(
1711 PerlIO_printf( Perl_debug_log,"No match.\n"));
07be1b83
YO
1712 break;
1713 }
1714 }
be8e71aa
YO
1715 FREETMPS;
1716 LEAVE;
07be1b83
YO
1717 }
1718 break;
b3c9acc1 1719 default:
3c3eec57
GS
1720 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1721 break;
d6a28714 1722 }
6eb5f6b9
JH
1723 return 0;
1724 got_it:
1725 return s;
1726}
1727
e6aad7ab 1728static void
7918f24d
NC
1729S_swap_match_buff (pTHX_ regexp *prog)
1730{
f0ab9afb 1731 regexp_paren_pair *t;
fae667d5 1732
7918f24d
NC
1733 PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1734
28d8d7f4 1735 if (!prog->swap) {
fae667d5
YO
1736 /* We have to be careful. If the previous successful match
1737 was from this regex we don't want a subsequent paritally
1738 successful match to clobber the old results.
1739 So when we detect this possibility we add a swap buffer
1740 to the re, and switch the buffer each match. If we fail
1741 we switch it back, otherwise we leave it swapped.
1742 */
f0ab9afb 1743 Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
fae667d5 1744 }
f0ab9afb
NC
1745 t = prog->swap;
1746 prog->swap = prog->offs;
1747 prog->offs = t;
fae667d5
YO
1748}
1749
1750
6eb5f6b9
JH
1751/*
1752 - regexec_flags - match a regexp against a string
1753 */
1754I32
288b8c02 1755Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
6eb5f6b9
JH
1756 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1757/* strend: pointer to null at end of string */
1758/* strbeg: real beginning of string */
1759/* minend: end of match must be >=minend after stringarg. */
58e23c8d
YO
1760/* data: May be used for some additional optimizations.
1761 Currently its only used, with a U32 cast, for transmitting
1762 the ganch offset when doing a /g match. This will change */
6eb5f6b9
JH
1763/* nosave: For optimizations. */
1764{
97aff369 1765 dVAR;
288b8c02 1766 struct regexp *const prog = (struct regexp *)SvANY(rx);
24b23f37 1767 /*register*/ char *s;
6eb5f6b9 1768 register regnode *c;
24b23f37 1769 /*register*/ char *startpos = stringarg;
6eb5f6b9
JH
1770 I32 minlen; /* must match at least this many chars */
1771 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1772 I32 end_shift = 0; /* Same for the end. */ /* CC */
1773 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1774 char *scream_olds = NULL;
f9f4320a 1775 const bool do_utf8 = (bool)DO_UTF8(sv);
2757e526 1776 I32 multiline;
f8fc2ecf 1777 RXi_GET_DECL(prog,progi);
3b0527fe 1778 regmatch_info reginfo; /* create some info to pass to regtry etc */
fae667d5 1779 bool swap_on_fail = 0;
a3621e74
YO
1780 GET_RE_DEBUG_FLAGS_DECL;
1781
7918f24d 1782 PERL_ARGS_ASSERT_REGEXEC_FLAGS;
9d4ba2ae 1783 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1784
1785 /* Be paranoid... */
1786 if (prog == NULL || startpos == NULL) {
1787 Perl_croak(aTHX_ "NULL regexp parameter");
1788 return 0;
1789 }
1790
bbe252da 1791 multiline = prog->extflags & RXf_PMf_MULTILINE;
288b8c02 1792 reginfo.prog = rx; /* Yes, sorry that this is confusing. */
2757e526 1793
288b8c02 1794 RX_MATCH_UTF8_set(rx, do_utf8);
1de06328 1795 DEBUG_EXECUTE_r(
efd26800 1796 debug_start_match(rx, do_utf8, startpos, strend,
1de06328
YO
1797 "Matching");
1798 );
bac06658 1799
6eb5f6b9 1800 minlen = prog->minlen;
1de06328
YO
1801
1802 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
a3621e74 1803 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1804 "String too short [regexec_flags]...\n"));
1805 goto phooey;
1aa99e6b 1806 }
6eb5f6b9 1807
1de06328 1808
6eb5f6b9 1809 /* Check validity of program. */
f8fc2ecf 1810 if (UCHARAT(progi->program) != REG_MAGIC) {
6eb5f6b9
JH
1811 Perl_croak(aTHX_ "corrupted regexp program");
1812 }
1813
1814 PL_reg_flags = 0;
1815 PL_reg_eval_set = 0;
1816 PL_reg_maxiter = 0;
1817
3c8556c3 1818 if (RX_UTF8(rx))
6eb5f6b9
JH
1819 PL_reg_flags |= RF_utf8;
1820
1821 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1822 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1823 PL_bostr = strbeg;
3b0527fe 1824 reginfo.sv = sv;
6eb5f6b9
JH
1825
1826 /* Mark end of line for $ (and such) */
1827 PL_regeol = strend;
1828
1829 /* see how far we have to get to not match where we matched before */
3b0527fe 1830 reginfo.till = startpos+minend;
6eb5f6b9 1831
6eb5f6b9
JH
1832 /* If there is a "must appear" string, look for it. */
1833 s = startpos;
1834
bbe252da 1835 if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1836 MAGIC *mg;
1837
1838 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
58e23c8d 1839 reginfo.ganch = startpos + prog->gofs;
6eb5f6b9
JH
1840 else if (sv && SvTYPE(sv) >= SVt_PVMG
1841 && SvMAGIC(sv)
14befaf4
DM
1842 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1843 && mg->mg_len >= 0) {
3b0527fe 1844 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
bbe252da 1845 if (prog->extflags & RXf_ANCH_GPOS) {
3b0527fe 1846 if (s > reginfo.ganch)
6eb5f6b9 1847 goto phooey;
58e23c8d 1848 s = reginfo.ganch - prog->gofs;
6eb5f6b9
JH
1849 }
1850 }
58e23c8d 1851 else if (data) {
70685ca0 1852 reginfo.ganch = strbeg + PTR2UV(data);
58e23c8d 1853 } else /* pos() not defined */
3b0527fe 1854 reginfo.ganch = strbeg;
6eb5f6b9 1855 }
288b8c02 1856 if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
fae667d5
YO
1857 swap_on_fail = 1;
1858 swap_match_buff(prog); /* do we need a save destructor here for
1859 eval dies? */
c74340f9 1860 }
a0714e2c 1861 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1862 re_scream_pos_data d;
1863
1864 d.scream_olds = &scream_olds;
1865 d.scream_pos = &scream_pos;
288b8c02 1866 s = re_intuit_start(rx, sv, s, strend, flags, &d);
3fa9c3d7 1867 if (!s) {
a3621e74 1868 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1869 goto phooey; /* not present */
3fa9c3d7 1870 }
6eb5f6b9
JH
1871 }
1872
1de06328 1873
6eb5f6b9
JH
1874
1875 /* Simplest case: anchored match need be tried only once. */
1876 /* [unless only anchor is BOL and multiline is set] */
bbe252da 1877 if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
24b23f37 1878 if (s == startpos && regtry(&reginfo, &startpos))
6eb5f6b9 1879 goto got_it;
bbe252da
YO
1880 else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1881 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
6eb5f6b9
JH
1882 {
1883 char *end;
1884
1885 if (minlen)
1886 dontbother = minlen - 1;
1aa99e6b 1887 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1888 /* for multiline we only have to try after newlines */
33b8afdf 1889 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1890 if (s == startpos)
1891 goto after_try;
1892 while (1) {
24b23f37 1893 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1894 goto got_it;
1895 after_try:
5339e136 1896 if (s > end)
6eb5f6b9 1897 goto phooey;
bbe252da 1898 if (prog->extflags & RXf_USE_INTUIT) {
288b8c02 1899 s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
6eb5f6b9
JH
1900 if (!s)
1901 goto phooey;
1902 }
1903 else
1904 s++;
1905 }
1906 } else {
1907 if (s > startpos)
1908 s--;
1909 while (s < end) {
1910 if (*s++ == '\n') { /* don't need PL_utf8skip here */
24b23f37 1911 if (regtry(&reginfo, &s))
6eb5f6b9
JH
1912 goto got_it;
1913 }
1914 }
1915 }
1916 }
1917 goto phooey;
bbe252da 1918 } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
f9f4320a
YO
1919 {
1920 /* the warning about reginfo.ganch being used without intialization
bbe252da 1921 is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
f9f4320a 1922 and we only enter this block when the same bit is set. */
58e23c8d
YO
1923 char *tmp_s = reginfo.ganch - prog->gofs;
1924 if (regtry(&reginfo, &tmp_s))
6eb5f6b9
JH
1925 goto got_it;
1926 goto phooey;
1927 }
1928
1929 /* Messy cases: unanchored match. */
bbe252da 1930 if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
6eb5f6b9
JH
1931 /* we have /x+whatever/ */
1932 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1933 char ch;
bf93d4cc
GS
1934#ifdef DEBUGGING
1935 int did_match = 0;
1936#endif
33b8afdf
JH
1937 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1938 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1939 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1940
1aa99e6b 1941 if (do_utf8) {
4cadc6a9 1942 REXEC_FBC_SCAN(
6eb5f6b9 1943 if (*s == ch) {
a3621e74 1944 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1945 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1946 s += UTF8SKIP(s);
1947 while (s < strend && *s == ch)
1948 s += UTF8SKIP(s);
1949 }
4cadc6a9 1950 );
6eb5f6b9
JH
1951 }
1952 else {
4cadc6a9 1953 REXEC_FBC_SCAN(
6eb5f6b9 1954 if (*s == ch) {
a3621e74 1955 DEBUG_EXECUTE_r( did_match = 1 );
24b23f37 1956 if (regtry(&reginfo, &s)) goto got_it;
6eb5f6b9
JH
1957 s++;
1958 while (s < strend && *s == ch)
1959 s++;
1960 }
4cadc6a9 1961 );
6eb5f6b9 1962 }
a3621e74 1963 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1964 PerlIO_printf(Perl_debug_log,
b7953727
JH
1965 "Did not find anchored character...\n")
1966 );
6eb5f6b9 1967 }
a0714e2c
SS
1968 else if (prog->anchored_substr != NULL
1969 || prog->anchored_utf8 != NULL
1970 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1971 && prog->float_max_offset < strend - s)) {
1972 SV *must;
1973 I32 back_max;
1974 I32 back_min;
1975 char *last;
6eb5f6b9 1976 char *last1; /* Last position checked before */
bf93d4cc
GS
1977#ifdef DEBUGGING
1978 int did_match = 0;
1979#endif
33b8afdf
JH
1980 if (prog->anchored_substr || prog->anchored_utf8) {
1981 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1982 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1983 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1984 back_max = back_min = prog->anchored_offset;
1985 } else {
1986 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1987 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1988 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1989 back_max = prog->float_max_offset;
1990 back_min = prog->float_min_offset;
1991 }
1de06328
YO
1992
1993
33b8afdf
JH
1994 if (must == &PL_sv_undef)
1995 /* could not downgrade utf8 check substring, so must fail */
1996 goto phooey;
1997
1de06328
YO
1998 if (back_min<0) {
1999 last = strend;
2000 } else {
2001 last = HOP3c(strend, /* Cannot start after this */
2002 -(I32)(CHR_SVLEN(must)
2003 - (SvTAIL(must) != 0) + back_min), strbeg);
2004 }
6eb5f6b9
JH
2005 if (s > PL_bostr)
2006 last1 = HOPc(s, -1);
2007 else
2008 last1 = s - 1; /* bogus */
2009
a0288114 2010 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
2011 check_substr==must. */
2012 scream_pos = -1;
2013 dontbother = end_shift;
2014 strend = HOPc(strend, -dontbother);
2015 while ( (s <= last) &&
9041c2e3 2016 ((flags & REXEC_SCREAM)
1de06328 2017 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
6eb5f6b9 2018 end_shift, &scream_pos, 0))
1de06328 2019 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
9041c2e3 2020 (unsigned char*)strend, must,
7fba1cd6 2021 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b 2022 /* we may be pointing at the wrong string */
07bc277f 2023 if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
3f7c398e 2024 s = strbeg + (s - SvPVX_const(sv));
a3621e74 2025 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
2026 if (HOPc(s, -back_max) > last1) {
2027 last1 = HOPc(s, -back_min);
2028 s = HOPc(s, -back_max);
2029 }
2030 else {
52657f30 2031 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
2032
2033 last1 = HOPc(s, -back_min);
52657f30 2034 s = t;
6eb5f6b9 2035 }
1aa99e6b 2036 if (do_utf8) {
6eb5f6b9 2037 while (s <= last1) {
24b23f37 2038 if (regtry(&reginfo, &s))
6eb5f6b9
JH
2039 goto got_it;
2040 s += UTF8SKIP(s);
2041 }
2042 }
2043 else {
2044 while (s <= last1) {
24b23f37 2045 if (regtry(&reginfo, &s))
6eb5f6b9
JH
2046 goto got_it;
2047 s++;
2048 }
2049 }
2050 }
ab3bbdeb
YO
2051 DEBUG_EXECUTE_r(if (!did_match) {
2052 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2053 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2054 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
33b8afdf 2055 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc 2056 ? "anchored" : "floating"),
ab3bbdeb
YO
2057 quoted, RE_SV_TAIL(must));
2058 });
6eb5f6b9
JH
2059 goto phooey;
2060 }
f8fc2ecf 2061 else if ( (c = progi->regstclass) ) {
f14c76ed 2062 if (minlen) {
f8fc2ecf 2063 const OPCODE op = OP(progi->regstclass);
66e933ab 2064 /* don't bother with what can't match */
786e8c11 2065 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
f14c76ed
RGS
2066 strend = HOPc(strend, -(minlen - 1));
2067 }
a3621e74 2068 DEBUG_EXECUTE_r({
be8e71aa 2069 SV * const prop = sv_newmortal();
32fc9b6a 2070 regprop(prog, prop, c);
0df25f3d 2071 {
02daf0ab 2072 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
ab3bbdeb 2073 s,strend-s,60);
0df25f3d 2074 PerlIO_printf(Perl_debug_log,
ab3bbdeb 2075 "Matching stclass %.*s against %s (%d chars)\n",
e4f74956 2076 (int)SvCUR(prop), SvPVX_const(prop),
ab3bbdeb 2077 quoted, (int)(strend - s));
0df25f3d 2078 }
ffc61ed2 2079 });
3b0527fe 2080 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 2081 goto got_it;
07be1b83 2082 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
d6a28714
JH
2083 }
2084 else {
2085 dontbother = 0;
a0714e2c 2086 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 2087 /* Trim the end. */
d6a28714 2088 char *last;
33b8afdf
JH
2089 SV* float_real;
2090
2091 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2092 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2093 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
2094
2095 if (flags & REXEC_SCREAM) {
33b8afdf 2096 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
2097 end_shift, &scream_pos, 1); /* last one */
2098 if (!last)
ffc61ed2 2099 last = scream_olds; /* Only one occurrence. */
4addbd3b 2100 /* we may be pointing at the wrong string */
07bc277f 2101 else if (RXp_MATCH_COPIED(prog))
3f7c398e 2102 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 2103 }
d6a28714
JH
2104 else {
2105 STRLEN len;
cfd0369c 2106 const char * const little = SvPV_const(float_real, len);
d6a28714 2107
33b8afdf 2108 if (SvTAIL(float_real)) {
d6a28714
JH
2109 if (memEQ(strend - len + 1, little, len - 1))
2110 last = strend - len + 1;
7fba1cd6 2111 else if (!multiline)
9041c2e3 2112 last = memEQ(strend - len, little, len)
bd61b366 2113 ? strend - len : NULL;
b8c5462f 2114 else
d6a28714
JH
2115 goto find_last;
2116 } else {
2117 find_last:
9041c2e3 2118 if (len)
d6a28714 2119 last = rninstr(s, strend, little, little + len);
b8c5462f 2120 else
a0288114 2121 last = strend; /* matching "$" */
b8c5462f 2122 }
b8c5462f 2123 }
bf93d4cc 2124 if (last == NULL) {
6bda09f9
YO
2125 DEBUG_EXECUTE_r(
2126 PerlIO_printf(Perl_debug_log,
2127 "%sCan't trim the tail, match fails (should not happen)%s\n",
2128 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2129 goto phooey; /* Should not happen! */
2130 }
d6a28714
JH
2131 dontbother = strend - last + prog->float_min_offset;
2132 }
2133 if (minlen && (dontbother < minlen))
2134 dontbother = minlen - 1;
2135 strend -= dontbother; /* this one's always in bytes! */
2136 /* We don't know much -- general case. */
1aa99e6b 2137 if (do_utf8) {
d6a28714 2138 for (;;) {
24b23f37 2139 if (regtry(&reginfo, &s))
d6a28714
JH
2140 goto got_it;
2141 if (s >= strend)
2142 break;
b8c5462f 2143 s += UTF8SKIP(s);
d6a28714
JH
2144 };
2145 }
2146 else {
2147 do {
24b23f37 2148 if (regtry(&reginfo, &s))
d6a28714
JH
2149 goto got_it;
2150 } while (s++ < strend);
2151 }
2152 }
2153
2154 /* Failure. */
2155 goto phooey;
2156
2157got_it:
288b8c02 2158 RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
d6a28714 2159
19b95bf0 2160 if (PL_reg_eval_set)
4f639d21 2161 restore_pos(aTHX_ prog);
5daac39c
NC
2162 if (RXp_PAREN_NAMES(prog))
2163 (void)hv_iterinit(RXp_PAREN_NAMES(prog));
d6a28714
JH
2164
2165 /* make sure $`, $&, $', and $digit will work later */
2166 if ( !(flags & REXEC_NOT_FIRST) ) {
288b8c02 2167 RX_MATCH_COPY_FREE(rx);
d6a28714 2168 if (flags & REXEC_COPY_STR) {
be8e71aa 2169 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2170#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2171 if ((SvIsCOW(sv)
2172 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2173 if (DEBUG_C_TEST) {
2174 PerlIO_printf(Perl_debug_log,
2175 "Copy on write: regexp capture, type %d\n",
2176 (int) SvTYPE(sv));
2177 }
2178 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2179 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2180 assert (SvPOKp(prog->saved_copy));
2181 } else
2182#endif
2183 {
288b8c02 2184 RX_MATCH_COPIED_on(rx);
ed252734
NC
2185 s = savepvn(strbeg, i);
2186 prog->subbeg = s;
2187 }
d6a28714 2188 prog->sublen = i;
d6a28714
JH
2189 }
2190 else {
2191 prog->subbeg = strbeg;
2192 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2193 }
2194 }
9041c2e3 2195
d6a28714
JH
2196 return 1;
2197
2198phooey:
a3621e74 2199 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2200 PL_colors[4], PL_colors[5]));
d6a28714 2201 if (PL_reg_eval_set)
4f639d21 2202 restore_pos(aTHX_ prog);
fae667d5 2203 if (swap_on_fail)
c74340f9 2204 /* we failed :-( roll it back */
fae667d5
YO
2205 swap_match_buff(prog);
2206
d6a28714
JH
2207 return 0;
2208}
2209
6bda09f9 2210
d6a28714
JH
2211/*
2212 - regtry - try match at specific point
2213 */
2214STATIC I32 /* 0 failure, 1 success */
24b23f37 2215S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
d6a28714 2216{
97aff369 2217 dVAR;
d6a28714 2218 CHECKPOINT lastcp;
288b8c02
NC
2219 REGEXP *const rx = reginfo->prog;
2220 regexp *const prog = (struct regexp *)SvANY(rx);
f8fc2ecf 2221 RXi_GET_DECL(prog,progi);
a3621e74 2222 GET_RE_DEBUG_FLAGS_DECL;
7918f24d
NC
2223
2224 PERL_ARGS_ASSERT_REGTRY;
2225
24b23f37 2226 reginfo->cutpoint=NULL;
d6a28714 2227
bbe252da 2228 if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
d6a28714
JH
2229 MAGIC *mg;
2230
2231 PL_reg_eval_set = RS_init;
a3621e74 2232 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2233 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2234 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2235 ));
ea8d6ae1 2236 SAVESTACK_CXPOS();
d6a28714
JH
2237 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2238 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2239 SAVETMPS;
2240 /* Apparently this is not needed, judging by wantarray. */
e8347627 2241 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2242 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2243
3b0527fe 2244 if (reginfo->sv) {
d6a28714 2245 /* Make $_ available to executed code. */
3b0527fe 2246 if (reginfo->sv != DEFSV) {
59f00321 2247 SAVE_DEFSV;
3b0527fe 2248 DEFSV = reginfo->sv;
b8c5462f 2249 }
d6a28714 2250
3b0527fe
DM
2251 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2252 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2253 /* prepare for quick setting of pos */
d300d9fa 2254#ifdef PERL_OLD_COPY_ON_WRITE
51a9ea20
NC
2255 if (SvIsCOW(reginfo->sv))
2256 sv_force_normal_flags(reginfo->sv, 0);
d300d9fa 2257#endif
3dab1dad 2258 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
d300d9fa 2259 &PL_vtbl_mglob, NULL, 0);
d6a28714 2260 mg->mg_len = -1;
b8c5462f 2261 }
d6a28714
JH
2262 PL_reg_magic = mg;
2263 PL_reg_oldpos = mg->mg_len;
4f639d21 2264 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2265 }
09687e5a 2266 if (!PL_reg_curpm) {
a02a5408 2267 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2268#ifdef USE_ITHREADS
2269 {
14a49a24 2270 SV* const repointer = &PL_sv_undef;
92313705
NC
2271 /* this regexp is also owned by the new PL_reg_curpm, which
2272 will try to free it. */
d2ece331 2273 av_push(PL_regex_padav, repointer);
09687e5a
AB
2274 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2275 PL_regex_pad = AvARRAY(PL_regex_padav);
2276 }
2277#endif
2278 }
86c29d75
NC
2279#ifdef USE_ITHREADS
2280 /* It seems that non-ithreads works both with and without this code.
2281 So for efficiency reasons it seems best not to have the code
2282 compiled when it is not needed. */
92313705
NC
2283 /* This is safe against NULLs: */
2284 ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2285 /* PM_reg_curpm owns a reference to this regexp. */
2286 ReREFCNT_inc(rx);
86c29d75 2287#endif
288b8c02 2288 PM_SETRE(PL_reg_curpm, rx);
d6a28714
JH
2289 PL_reg_oldcurpm = PL_curpm;
2290 PL_curpm = PL_reg_curpm;
07bc277f 2291 if (RXp_MATCH_COPIED(prog)) {
d6a28714
JH
2292 /* Here is a serious problem: we cannot rewrite subbeg,
2293 since it may be needed if this match fails. Thus
2294 $` inside (?{}) could fail... */
2295 PL_reg_oldsaved = prog->subbeg;
2296 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2297#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2298 PL_nrs = prog->saved_copy;
2299#endif
07bc277f 2300 RXp_MATCH_COPIED_off(prog);
d6a28714
JH
2301 }
2302 else
bd61b366 2303 PL_reg_oldsaved = NULL;
d6a28714
JH
2304 prog->subbeg = PL_bostr;
2305 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2306 }
24b23f37 2307 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
f0ab9afb 2308 prog->offs[0].start = *startpos - PL_bostr;
24b23f37 2309 PL_reginput = *startpos;
d6a28714 2310 PL_reglastparen = &prog->lastparen;
a01268b5 2311 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2312 prog->lastparen = 0;
03994de8 2313 prog->lastcloseparen = 0;
d6a28714 2314 PL_regsize = 0;
f0ab9afb 2315 PL_regoffs = prog->offs;
d6a28714
JH
2316 if (PL_reg_start_tmpl <= prog->nparens) {
2317 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2318 if(PL_reg_start_tmp)
2319 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2320 else
a02a5408 2321 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2322 }
2323
2324 /* XXXX What this code is doing here?!!! There should be no need
2325 to do this again and again, PL_reglastparen should take care of
3dd2943c 2326 this! --ilya*/
dafc8851
JH
2327
2328 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2329 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116 2330 * PL_reglastparen), is not needed at all by the test suite
225593e1
DM
2331 * (op/regexp, op/pat, op/split), but that code is needed otherwise
2332 * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2333 * Meanwhile, this code *is* needed for the
daf18116
JH
2334 * above-mentioned test suite tests to succeed. The common theme
2335 * on those tests seems to be returning null fields from matches.
225593e1 2336 * --jhi updated by dapm */
dafc8851 2337#if 1
d6a28714 2338 if (prog->nparens) {
f0ab9afb 2339 regexp_paren_pair *pp = PL_regoffs;
097eb12c 2340 register I32 i;
eb160463 2341 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
f0ab9afb
NC
2342 ++pp;
2343 pp->start = -1;
2344 pp->end = -1;
d6a28714
JH
2345 }
2346 }
dafc8851 2347#endif
02db2b7b 2348 REGCP_SET(lastcp);
f8fc2ecf 2349 if (regmatch(reginfo, progi->program + 1)) {
f0ab9afb 2350 PL_regoffs[0].end = PL_reginput - PL_bostr;
d6a28714
JH
2351 return 1;
2352 }
24b23f37
YO
2353 if (reginfo->cutpoint)
2354 *startpos= reginfo->cutpoint;
02db2b7b 2355 REGCP_UNWIND(lastcp);
d6a28714
JH
2356 return 0;
2357}
2358
02db2b7b 2359
8ba1375e
MJD
2360#define sayYES goto yes
2361#define sayNO goto no
262b90c4 2362#define sayNO_SILENT goto no_silent
8ba1375e 2363
f9f4320a
YO
2364/* we dont use STMT_START/END here because it leads to
2365 "unreachable code" warnings, which are bogus, but distracting. */
2366#define CACHEsayNO \
c476f425
DM
2367 if (ST.cache_mask) \
2368 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
f9f4320a 2369 sayNO
3298f257 2370
a3621e74 2371/* this is used to determine how far from the left messages like
265c4333
YO
2372 'failed...' are printed. It should be set such that messages
2373 are inline with the regop output that created them.
a3621e74 2374*/
265c4333 2375#define REPORT_CODE_OFF 32
a3621e74
YO
2376
2377
2378/* Make sure there is a test for this +1 options in re_tests */
2379#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2380
40a82448
DM
2381#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2382#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
9e137952 2383
86545054
DM
2384#define SLAB_FIRST(s) (&(s)->states[0])
2385#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2386
5d9a96ca
DM
2387/* grab a new slab and return the first slot in it */
2388
2389STATIC regmatch_state *
2390S_push_slab(pTHX)
2391{
a35a87e7 2392#if PERL_VERSION < 9 && !defined(PERL_CORE)
54df2634
NC
2393 dMY_CXT;
2394#endif
5d9a96ca
DM
2395 regmatch_slab *s = PL_regmatch_slab->next;
2396 if (!s) {
2397 Newx(s, 1, regmatch_slab);
2398 s->prev = PL_regmatch_slab;
2399 s->next = NULL;
2400 PL_regmatch_slab->next = s;
2401 }
2402 PL_regmatch_slab = s;
86545054 2403 return SLAB_FIRST(s);
5d9a96ca 2404}
5b47454d 2405
95b24440 2406
40a82448
DM
2407/* push a new state then goto it */
2408
2409#define PUSH_STATE_GOTO(state, node) \
2410 scan = node; \
2411 st->resume_state = state; \
2412 goto push_state;
2413
2414/* push a new state with success backtracking, then goto it */
2415
2416#define PUSH_YES_STATE_GOTO(state, node) \
2417 scan = node; \
2418 st->resume_state = state; \
2419 goto push_yes_state;
2420
aa283a38 2421
aa283a38 2422
d6a28714 2423/*
95b24440 2424
bf1f174e
DM
2425regmatch() - main matching routine
2426
2427This is basically one big switch statement in a loop. We execute an op,
2428set 'next' to point the next op, and continue. If we come to a point which
2429we may need to backtrack to on failure such as (A|B|C), we push a
2430backtrack state onto the backtrack stack. On failure, we pop the top
2431state, and re-enter the loop at the state indicated. If there are no more
2432states to pop, we return failure.
2433
2434Sometimes we also need to backtrack on success; for example /A+/, where
2435after successfully matching one A, we need to go back and try to
2436match another one; similarly for lookahead assertions: if the assertion
2437completes successfully, we backtrack to the state just before the assertion
2438and then carry on. In these cases, the pushed state is marked as
2439'backtrack on success too'. This marking is in fact done by a chain of
2440pointers, each pointing to the previous 'yes' state. On success, we pop to
2441the nearest yes state, discarding any intermediate failure-only states.
2442Sometimes a yes state is pushed just to force some cleanup code to be
2443called at the end of a successful match or submatch; e.g. (??{$re}) uses
2444it to free the inner regex.
2445
2446Note that failure backtracking rewinds the cursor position, while
2447success backtracking leaves it alone.
2448
2449A pattern is complete when the END op is executed, while a subpattern
2450such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2451ops trigger the "pop to last yes state if any, otherwise return true"
2452behaviour.
2453
2454A common convention in this function is to use A and B to refer to the two
2455subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2456the subpattern to be matched possibly multiple times, while B is the entire
2457rest of the pattern. Variable and state names reflect this convention.
2458
2459The states in the main switch are the union of ops and failure/success of
2460substates associated with with that op. For example, IFMATCH is the op
2461that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2462'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2463successfully matched A and IFMATCH_A_fail is a state saying that we have
2464just failed to match A. Resume states always come in pairs. The backtrack
2465state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2466at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2467on success or failure.
2468
2469The struct that holds a backtracking state is actually a big union, with
2470one variant for each major type of op. The variable st points to the
2471top-most backtrack struct. To make the code clearer, within each
2472block of code we #define ST to alias the relevant union.
2473
2474Here's a concrete example of a (vastly oversimplified) IFMATCH
2475implementation:
2476
2477 switch (state) {
2478 ....
2479
2480#define ST st->u.ifmatch
2481
2482 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2483 ST.foo = ...; // some state we wish to save
95b24440 2484 ...
bf1f174e
DM
2485 // push a yes backtrack state with a resume value of
2486 // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2487 // first node of A:
2488 PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2489 // NOTREACHED
2490
2491 case IFMATCH_A: // we have successfully executed A; now continue with B
2492 next = B;
2493 bar = ST.foo; // do something with the preserved value
2494 break;
2495
2496 case IFMATCH_A_fail: // A failed, so the assertion failed
2497 ...; // do some housekeeping, then ...
2498 sayNO; // propagate the failure
2499
2500#undef ST
95b24440 2501
bf1f174e
DM
2502 ...
2503 }
95b24440 2504
bf1f174e
DM
2505For any old-timers reading this who are familiar with the old recursive
2506approach, the code above is equivalent to:
95b24440 2507
bf1f174e
DM
2508 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2509 {
2510 int foo = ...
95b24440 2511 ...
bf1f174e
DM
2512 if (regmatch(A)) {
2513 next = B;
2514 bar = foo;
2515 break;
95b24440 2516 }
bf1f174e
DM
2517 ...; // do some housekeeping, then ...
2518 sayNO; // propagate the failure
95b24440 2519 }
bf1f174e
DM
2520
2521The topmost backtrack state, pointed to by st, is usually free. If you
2522want to claim it, populate any ST.foo fields in it with values you wish to
2523save, then do one of
2524
2525 PUSH_STATE_GOTO(resume_state, node);
2526 PUSH_YES_STATE_GOTO(resume_state, node);
2527
2528which sets that backtrack state's resume value to 'resume_state', pushes a
2529new free entry to the top of the backtrack stack, then goes to 'node'.
2530On backtracking, the free slot is popped, and the saved state becomes the
2531new free state. An ST.foo field in this new top state can be temporarily
2532accessed to retrieve values, but once the main loop is re-entered, it
2533becomes available for reuse.
2534
2535Note that the depth of the backtrack stack constantly increases during the
2536left-to-right execution of the pattern, rather than going up and down with
2537the pattern nesting. For example the stack is at its maximum at Z at the
2538end of the pattern, rather than at X in the following:
2539
2540 /(((X)+)+)+....(Y)+....Z/
2541
2542The only exceptions to this are lookahead/behind assertions and the cut,
2543(?>A), which pop all the backtrack states associated with A before
2544continuing.
2545
2546Bascktrack state structs are allocated in slabs of about 4K in size.
2547PL_regmatch_state and st always point to the currently active state,
2548and PL_regmatch_slab points to the slab currently containing
2549PL_regmatch_state. The first time regmatch() is called, the first slab is
2550allocated, and is never freed until interpreter destruction. When the slab
2551is full, a new one is allocated and chained to the end. At exit from
2552regmatch(), slabs allocated since entry are freed.
2553
2554*/
95b24440 2555
40a82448 2556
5bc10b2c 2557#define DEBUG_STATE_pp(pp) \
265c4333 2558 DEBUG_STATE_r({ \
5bc10b2c
DM
2559 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2560 PerlIO_printf(Perl_debug_log, \
5d458dd8 2561 " %*s"pp" %s%s%s%s%s\n", \
5bc10b2c 2562 depth*2, "", \
13d6edb4 2563 PL_reg_name[st->resume_state], \
5d458dd8
YO
2564 ((st==yes_state||st==mark_state) ? "[" : ""), \
2565 ((st==yes_state) ? "Y" : ""), \
2566 ((st==mark_state) ? "M" : ""), \
2567 ((st==yes_state||st==mark_state) ? "]" : "") \
2568 ); \
265c4333 2569 });
5bc10b2c 2570
40a82448 2571
3dab1dad 2572#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
95b24440 2573
3df15adc 2574#ifdef DEBUGGING
5bc10b2c 2575
ab3bbdeb 2576STATIC void
efd26800 2577S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
ab3bbdeb
YO
2578 const char *start, const char *end, const char *blurb)
2579{
efd26800 2580 const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
7918f24d
NC
2581
2582 PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2583
ab3bbdeb
YO
2584 if (!PL_colorset)
2585 reginitcolors();
2586 {
2587 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
efd26800 2588 RX_PRECOMP(prog), RX_PRELEN(prog), 60);
ab3bbdeb
YO
2589
2590 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2591 start, end - start, 60);
2592
2593 PerlIO_printf(Perl_debug_log,
2594 "%s%s REx%s %s against %s\n",
2595 PL_colors[4], blurb, PL_colors[5], s0, s1);
2596
2597 if (do_utf8||utf8_pat)
1de06328
YO
2598 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2599 utf8_pat ? "pattern" : "",
2600 utf8_pat && do_utf8 ? " and " : "",
2601 do_utf8 ? "string" : ""
ab3bbdeb
YO
2602 );
2603 }
2604}
3df15adc
YO
2605
2606STATIC void
786e8c11
YO
2607S_dump_exec_pos(pTHX_ const char *locinput,
2608 const regnode *scan,
2609 const char *loc_regeol,
2610 const char *loc_bostr,
2611 const char *loc_reg_starttry,
2612 const bool do_utf8)
07be1b83 2613{
786e8c11 2614 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
07be1b83 2615 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
786e8c11 2616 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
07be1b83
YO
2617 /* The part of the string before starttry has one color
2618 (pref0_len chars), between starttry and current
2619 position another one (pref_len - pref0_len chars),
2620 after the current position the third one.
2621 We assume that pref0_len <= pref_len, otherwise we
2622 decrease pref0_len. */
786e8c11
YO
2623 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2624 ? (5 + taill) - l : locinput - loc_bostr;
07be1b83
YO
2625 int pref0_len;
2626
7918f24d
NC
2627 PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2628
07be1b83
YO
2629 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2630 pref_len++;
786e8c11
YO
2631 pref0_len = pref_len - (locinput - loc_reg_starttry);
2632 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2633 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2634 ? (5 + taill) - pref_len : loc_regeol - locinput);
07be1b83
YO
2635 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2636 l--;
2637 if (pref0_len < 0)
2638 pref0_len = 0;
2639 if (pref0_len > pref_len)
2640 pref0_len = pref_len;
2641 {
3df15adc 2642 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
0df25f3d 2643
ab3bbdeb 2644 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
1de06328 2645 (locinput - pref_len),pref0_len, 60, 4, 5);
0df25f3d 2646
ab3bbdeb 2647 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
3df15adc 2648 (locinput - pref_len + pref0_len),
1de06328 2649 pref_len - pref0_len, 60, 2, 3);
0df25f3d 2650
ab3bbdeb 2651 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
1de06328 2652 locinput, loc_regeol - locinput, 10, 0, 1);
0df25f3d 2653
1de06328 2654 const STRLEN tlen=len0+len1+len2;
3df15adc 2655 PerlIO_printf(Perl_debug_log,
ab3bbdeb 2656 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
786e8c11 2657 (IV)(locinput - loc_bostr),
07be1b83 2658 len0, s0,
07be1b83 2659 len1, s1,
07be1b83 2660 (docolor ? "" : "> <"),
07be1b83 2661 len2, s2,
f9f4320a 2662 (int)(tlen > 19 ? 0 : 19 - tlen),
07be1b83
YO
2663 "");
2664 }
2665}
3df15adc 2666
07be1b83
YO
2667#endif
2668
0a4db386
YO
2669/* reg_check_named_buff_matched()
2670 * Checks to see if a named buffer has matched. The data array of
2671 * buffer numbers corresponding to the buffer is expected to reside
2672 * in the regexp->data->data array in the slot stored in the ARG() of
2673 * node involved. Note that this routine doesn't actually care about the
2674 * name, that information is not preserved from compilation to execution.
2675 * Returns the index of the leftmost defined buffer with the given name
2676 * or 0 if non of the buffers matched.
2677 */
2678STATIC I32
7918f24d
NC
2679S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2680{
0a4db386 2681 I32 n;
f8fc2ecf
YO
2682 RXi_GET_DECL(rex,rexi);
2683 SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
0a4db386 2684 I32 *nums=(I32*)SvPVX(sv_dat);
7918f24d
NC
2685
2686 PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2687
0a4db386
YO
2688 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2689 if ((I32)*PL_reglastparen >= nums[n] &&
f0ab9afb 2690 PL_regoffs[nums[n]].end != -1)
0a4db386
YO
2691 {
2692 return nums[n];
2693 }
2694 }
2695 return 0;
2696}
2697
2f554ef7
DM
2698
2699/* free all slabs above current one - called during LEAVE_SCOPE */
2700
2701STATIC void
2702S_clear_backtrack_stack(pTHX_ void *p)
2703{
2704 regmatch_slab *s = PL_regmatch_slab->next;
2705 PERL_UNUSED_ARG(p);
2706
2707 if (!s)
2708 return;
2709 PL_regmatch_slab->next = NULL;
2710 while (s) {
2711 regmatch_slab * const osl = s;
2712 s = s->next;
2713 Safefree(osl);
2714 }
2715}
2716
2717
28d8d7f4
YO
2718#define SETREX(Re1,Re2) \
2719 if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2720 Re1 = (Re2)
2721
d6a28714 2722STATIC I32 /* 0 failure, 1 success */
24b23f37 2723S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
d6a28714 2724{
a35a87e7 2725#if PERL_VERSION < 9 && !defined(PERL_CORE)
54df2634
NC
2726 dMY_CXT;
2727#endif
27da23d5 2728 dVAR;
95b24440 2729 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2730 const U32 uniflags = UTF8_ALLOW_DEFAULT;
288b8c02
NC
2731 REGEXP *rex_sv = reginfo->prog;
2732 regexp *rex = (struct regexp *)SvANY(rex_sv);
f8fc2ecf 2733 RXi_GET_DECL(rex,rexi);
2f554ef7 2734 I32 oldsave;
5d9a96ca
DM
2735 /* the current state. This is a cached copy of PL_regmatch_state */
2736 register regmatch_state *st;
5d9a96ca
DM
2737 /* cache heavy used fields of st in registers */
2738 register regnode *scan;
2739 register regnode *next;
438e9bae 2740 register U32 n = 0; /* general value; init to avoid compiler warning */
24d3c4a9 2741 register I32 ln = 0; /* len or last; init to avoid compiler warning */
5d9a96ca 2742 register char *locinput = PL_reginput;
5d9a96ca 2743 register I32 nextchr; /* is always set to UCHARAT(locinput) */
24d3c4a9 2744
b69b0499 2745 bool result = 0; /* return value of S_regmatch */
24d3c4a9 2746 int depth = 0; /* depth of backtrack stack */
4b196cd4
YO
2747 U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2748 const U32 max_nochange_depth =
2749 (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2750 3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
77cb431f
DM
2751 regmatch_state *yes_state = NULL; /* state to pop to on success of
2752 subpattern */
e2e6a0f1
YO
2753 /* mark_state piggy backs on the yes_state logic so that when we unwind
2754 the stack on success we can update the mark_state as we go */
2755 regmatch_state *mark_state = NULL; /* last mark state we have seen */
faec1544 2756 regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
b8591aee 2757 struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
40a82448 2758 U32 state_num;
5d458dd8
YO
2759 bool no_final = 0; /* prevent failure from backtracking? */
2760 bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
e2e6a0f1 2761 char *startpoint = PL_reginput;
5d458dd8
YO
2762 SV *popmark = NULL; /* are we looking for a mark? */
2763 SV *sv_commit = NULL; /* last mark name seen in failure */
2764 SV *sv_yes_mark = NULL; /* last mark name we have seen
2765 during a successfull match */
2766 U32 lastopen = 0; /* last open we saw */
2767 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
19b95bf0 2768 SV* const oreplsv = GvSV(PL_replgv);
24d3c4a9
DM
2769 /* these three flags are set by various ops to signal information to
2770 * the very next op. They have a useful lifetime of exactly one loop
2771 * iteration, and are not preserved or restored by state pushes/pops
2772 */
2773 bool sw = 0; /* the condition value in (?(cond)a|b) */
2774 bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
2775 int logical = 0; /* the following EVAL is:
2776 0: (?{...})
2777 1: (?(?{...})X|Y)
2778 2: (??{...})
2779 or the following IFMATCH/UNLESSM is:
2780 false: plain (?=foo)
2781 true: used as a condition: (?(?=foo))
2782 */
95b24440 2783#ifdef DEBUGGING
e68ec53f 2784 GET_RE_DEBUG_FLAGS_DECL;
d6a28714
JH
2785#endif
2786
7918f24d
NC
2787 PERL_ARGS_ASSERT_REGMATCH;
2788
3b57cd43 2789 DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
24b23f37 2790 PerlIO_printf(Perl_debug_log,"regmatch start\n");
3b57cd43 2791 }));
5d9a96ca
DM
2792 /* on first ever call to regmatch, allocate first slab */
2793 if (!PL_regmatch_slab) {
2794 Newx(PL_regmatch_slab, 1, regmatch_slab);
2795 PL_regmatch_slab->prev = NULL;
2796 PL_regmatch_slab->next = NULL;
86545054 2797 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2798 }
2799
2f554ef7
DM
2800 oldsave = PL_savestack_ix;
2801 SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2802 SAVEVPTR(PL_regmatch_slab);
2803 SAVEVPTR(PL_regmatch_state);
5d9a96ca
DM
2804
2805 /* grab next free state slot */
2806 st = ++PL_regmatch_state;
86545054 2807 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2808 st = PL_regmatch_state = S_push_slab(aTHX);
2809
d6a28714
JH
2810 /* Note that nextchr is a byte even in UTF */
2811 nextchr = UCHARAT(locinput);
2812 scan = prog;
2813 while (scan != NULL) {
8ba1375e 2814
a3621e74 2815 DEBUG_EXECUTE_r( {
6136c704 2816 SV * const prop = sv_newmortal();
1de06328 2817 regnode *rnext=regnext(scan);
786e8c11 2818 DUMP_EXEC_POS( locinput, scan, do_utf8 );
32fc9b6a 2819 regprop(rex, prop, scan);
07be1b83
YO
2820
2821 PerlIO_printf(Perl_debug_log,
2822 "%3"IVdf":%*s%s(%"IVdf")\n",
f8fc2ecf 2823 (IV)(scan - rexi->program), depth*2, "",
07be1b83 2824 SvPVX_const(prop),
1de06328 2825 (PL_regkind[OP(scan)] == END || !rnext) ?
f8fc2ecf 2826 0 : (IV)(rnext - rexi->program));
2a782b5b 2827 });
d6a28714
JH
2828
2829 next = scan + NEXT_OFF(scan);
2830 if (next == scan)
2831 next = NULL;
40a82448 2832 state_num = OP(scan);
d6a28714 2833
40a82448
DM
2834 reenter_switch:
2835 switch (state_num) {
d6a28714 2836 case BOL:
7fba1cd6 2837 if (locinput == PL_bostr)
d6a28714 2838 {
3b0527fe 2839 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2840 break;
2841 }
d6a28714
JH
2842 sayNO;
2843 case MBOL:
12d33761
HS
2844 if (locinput == PL_bostr ||
2845 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2846 {
b8c5462f
JH
2847 break;
2848 }
d6a28714
JH
2849 sayNO;
2850 case SBOL:
c2a73568 2851 if (locinput == PL_bostr)
b8c5462f 2852 break;
d6a28714
JH
2853 sayNO;
2854 case GPOS:
3b0527fe 2855 if (locinput == reginfo->ganch)
d6a28714
JH
2856 break;
2857 sayNO;
ee9b8eae
YO
2858
2859 case KEEPS:
2860 /* update the startpoint */
f0ab9afb 2861 st->u.keeper.val = PL_regoffs[0].start;
ee9b8eae 2862 PL_reginput = locinput;
f0ab9afb 2863 PL_regoffs[0].start = locinput - PL_bostr;
ee9b8eae
YO
2864 PUSH_STATE_GOTO(KEEPS_next, next);
2865 /*NOT-REACHED*/
2866 case KEEPS_next_fail:
2867 /* rollback the start point change */
f0ab9afb 2868 PL_regoffs[0].start = st->u.keeper.val;
ee9b8eae
YO
2869 sayNO_SILENT;
2870 /*NOT-REACHED*/
d6a28714 2871 case EOL:
d6a28714
JH
2872 goto seol;
2873 case MEOL:
d6a28714 2874 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2875 sayNO;
b8c5462f 2876 break;
d6a28714
JH
2877 case SEOL:
2878 seol:
2879 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2880 sayNO;
d6a28714 2881 if (PL_regeol - locinput > 1)
b8c5462f 2882 sayNO;
b8c5462f 2883 break;
d6a28714
JH
2884 case EOS:
2885 if (PL_regeol != locinput)
b8c5462f 2886 sayNO;
d6a28714 2887 break;
ffc61ed2 2888 case SANY:
d6a28714 2889 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2890 sayNO;
f33976b4
DB
2891 if (do_utf8) {
2892 locinput += PL_utf8skip[nextchr];
2893 if (locinput > PL_regeol)
2894 sayNO;
2895 nextchr = UCHARAT(locinput);
2896 }
2897 else
2898 nextchr = UCHARAT(++locinput);
2899 break;
2900 case CANY:
2901 if (!nextchr && locinput >= PL_regeol)
2902 sayNO;
b8c5462f 2903 nextchr = UCHARAT(++locinput);
a0d0e21e 2904 break;
ffc61ed2 2905 case REG_ANY:
1aa99e6b
IH
2906 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2907 sayNO;
2908 if (do_utf8) {
b8c5462f 2909 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2910 if (locinput > PL_regeol)
2911 sayNO;
a0ed51b3 2912 nextchr = UCHARAT(locinput);
a0ed51b3 2913 }
1aa99e6b
IH
2914 else
2915 nextchr = UCHARAT(++locinput);
a0ed51b3 2916 break;
166ba7cd
DM
2917
2918#undef ST
2919#define ST st->u.trie
786e8c11
YO
2920 case TRIEC:
2921 /* In this case the charclass data is available inline so
2922 we can fail fast without a lot of extra overhead.
2923 */
2924 if (scan->flags == EXACT || !do_utf8) {
2925 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2926 DEBUG_EXECUTE_r(
2927 PerlIO_printf(Perl_debug_log,
2928 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2929 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
786e8c11
YO
2930 );
2931 sayNO_SILENT;
2932 /* NOTREACHED */
2933 }
2934 }
2935 /* FALL THROUGH */
5b47454d 2936 case TRIE:
3dab1dad 2937 {
07be1b83 2938 /* what type of TRIE am I? (utf8 makes this contextual) */
a0a388a1 2939 DECL_TRIE_TYPE(scan);
3dab1dad
YO
2940
2941 /* what trie are we using right now */
be8e71aa 2942 reg_trie_data * const trie
f8fc2ecf 2943 = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
55eed653 2944 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
3dab1dad 2945 U32 state = trie->startstate;
166ba7cd 2946
3dab1dad
YO
2947 if (trie->bitmap && trie_type != trie_utf8_fold &&
2948 !TRIE_BITMAP_TEST(trie,*locinput)
2949 ) {
2950 if (trie->states[ state ].wordnum) {
2951 DEBUG_EXECUTE_r(
2952 PerlIO_printf(Perl_debug_log,
2953 "%*s %smatched empty string...%s\n",
5bc10b2c 2954 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2955 );
2956 break;
2957 } else {
2958 DEBUG_EXECUTE_r(
2959 PerlIO_printf(Perl_debug_log,
786e8c11 2960 "%*s %sfailed to match trie start class...%s\n",
5bc10b2c 2961 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3dab1dad
YO
2962 );
2963 sayNO_SILENT;
2964 }
2965 }
166ba7cd 2966
786e8c11
YO
2967 {
2968 U8 *uc = ( U8* )locinput;
2969
2970 STRLEN len = 0;
2971 STRLEN foldlen = 0;
2972 U8 *uscan = (U8*)NULL;
2973 STRLEN bufflen=0;
2974 SV *sv_accept_buff = NULL;
2975 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2976
2977 ST.accepted = 0; /* how many accepting states we have seen */
2978 ST.B = next;
2979 ST.jump = trie->jump;
786e8c11 2980 ST.me = scan;
07be1b83
YO
2981 /*
2982 traverse the TRIE keeping track of all accepting states
2983 we transition through until we get to a failing node.
2984 */
2985
a3621e74 2986 while ( state && uc <= (U8*)PL_regeol ) {
786e8c11 2987 U32 base = trie->states[ state ].trans.base;
f9f4320a 2988 UV uvc = 0;
786e8c11
YO
2989 U16 charid;
2990 /* We use charid to hold the wordnum as we don't use it
2991 for charid until after we have done the wordnum logic.
2992 We define an alias just so that the wordnum logic reads
2993 more naturally. */
2994
2995#define got_wordnum charid
2996 got_wordnum = trie->states[ state ].wordnum;
2997
2998 if ( got_wordnum ) {
2999 if ( ! ST.accepted ) {
5b47454d 3000 ENTER;
6b173516 3001 /* SAVETMPS; */ /* XXX is this necessary? dmq */
5b47454d
DM
3002 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3003 sv_accept_buff=newSV(bufflen *
3004 sizeof(reg_trie_accepted) - 1);
786e8c11 3005 SvCUR_set(sv_accept_buff, 0);
5b47454d
DM
3006 SvPOK_on(sv_accept_buff);
3007 sv_2mortal(sv_accept_buff);
166ba7cd
DM
3008 SAVETMPS;
3009 ST.accept_buff =
5b47454d
DM
3010 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3011 }
786e8c11 3012 do {
166ba7cd 3013 if (ST.accepted >= bufflen) {
5b47454d 3014 bufflen *= 2;
166ba7cd 3015 ST.accept_buff =(reg_trie_accepted*)
5b47454d
DM
3016 SvGROW(sv_accept_buff,
3017 bufflen * sizeof(reg_trie_accepted));
3018 }
3019 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3020 + sizeof(reg_trie_accepted));
a3621e74 3021
786e8c11
YO
3022
3023 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3024 ST.accept_buff[ST.accepted].endpos = uc;
3025 ++ST.accepted;
3026 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3027 }
3028#undef got_wordnum
a3621e74 3029
07be1b83 3030 DEBUG_TRIE_EXECUTE_r({
786e8c11 3031 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
a3621e74 3032 PerlIO_printf( Perl_debug_log,
786e8c11 3033 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
5bc10b2c 3034 2+depth * 2, "", PL_colors[4],
786e8c11 3035 (UV)state, (UV)ST.accepted );
07be1b83 3036 });
a3621e74
YO
3037
3038 if ( base ) {
55eed653
NC
3039 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3040 uscan, len, uvc, charid, foldlen,
3041 foldbuf, uniflags);
a3621e74 3042
5b47454d
DM
3043 if (charid &&
3044 (base + charid > trie->uniquecharcount )
3045 && (base + charid - 1 - trie->uniquecharcount
3046 < trie->lasttrans)
3047 && trie->trans[base + charid - 1 -
3048 trie->uniquecharcount].check == state)
3049 {
3050 state = trie->trans[base + charid - 1 -
3051 trie->uniquecharcount ].next;
3052 }
3053 else {
3054 state = 0;
3055 }
3056 uc += len;
3057
3058 }
3059 else {
a3621e74
YO
3060 state = 0;
3061 }
3062 DEBUG_TRIE_EXECUTE_r(
e4584336 3063 PerlIO_printf( Perl_debug_log,
786e8c11 3064 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
e4584336 3065 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
3066 );
3067 }
166ba7cd 3068 if (!ST.accepted )
a3621e74 3069 sayNO;
a3621e74 3070
166ba7cd
DM
3071 DEBUG_EXECUTE_r(
3072 PerlIO_printf( Perl_debug_log,
3073 "%*s %sgot %"IVdf" possible matches%s\n",
5bc10b2c 3074 REPORT_CODE_OFF + depth * 2, "",
166ba7cd
DM
3075 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3076 );
786e8c11 3077 }}
fae667d5
YO
3078 goto trie_first_try; /* jump into the fail handler */
3079 /* NOTREACHED */
166ba7cd 3080 case TRIE_next_fail: /* we failed - try next alterative */
fae667d5
YO
3081 if ( ST.jump) {
3082 REGCP_UNWIND(ST.cp);
3083 for (n = *PL_reglastparen; n > ST.lastparen; n--)
f0ab9afb 3084 PL_regoffs[n].end = -1;
fae667d5
YO
3085 *PL_reglastparen = n;
3086 }
3087 trie_first_try:
5d458dd8
YO
3088 if (do_cutgroup) {
3089 do_cutgroup = 0;
3090 no_final = 0;
3091 }
fae667d5
YO
3092
3093 if ( ST.jump) {
3094 ST.lastparen = *PL_reglastparen;
3095 REGCP_SET(ST.cp);
3096 }
166ba7cd
DM
3097 if ( ST.accepted == 1 ) {
3098 /* only one choice left - just continue */
3099 DEBUG_EXECUTE_r({
2b8b4781
NC
3100 AV *const trie_words
3101 = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3102 SV ** const tmp = av_fetch( trie_words,
5c9f2f80 3103 ST.accept_buff[ 0 ].wordnum-1, 0 );
de734bd5
A
3104 SV *sv= tmp ? sv_newmortal() : NULL;
3105
166ba7cd
DM
3106 PerlIO_printf( Perl_debug_log,
3107 "%*s %sonly one match left: #%d <%s>%s\n",
5bc10b2c 3108 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 3109 ST.accept_buff[ 0 ].wordnum,
de734bd5
A
3110 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3111 PL_colors[0], PL_colors[1],
3112 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3113 )
3114 : "not compiled under -Dr",
166ba7cd
DM
3115 PL_colors[5] );
3116 });
3117 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3118 /* in this case we free tmps/leave before we call regmatch
3119 as we wont be using accept_buff again. */
5d458dd8 3120
166ba7cd
DM
3121 locinput = PL_reginput;
3122 nextchr = UCHARAT(locinput);
5d458dd8
YO
3123 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3124 scan = ST.B;
3125 else
3126 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3127 if (!has_cutgroup) {
3128 FREETMPS;
3129 LEAVE;
3130 } else {
3131 ST.accepted--;
3132 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3133 }
786e8c11 3134
166ba7cd
DM
3135 continue; /* execute rest of RE */
3136 }
fae667d5
YO
3137
3138 if ( !ST.accepted-- ) {
5d458dd8
YO
3139 DEBUG_EXECUTE_r({
3140 PerlIO_printf( Perl_debug_log,
3141 "%*s %sTRIE failed...%s\n",
3142 REPORT_CODE_OFF+depth*2, "",
3143 PL_colors[4],
3144 PL_colors[5] );
3145 });
166ba7cd
DM
3146 FREETMPS;
3147 LEAVE;
5d458dd8 3148 sayNO_SILENT;
fae667d5
YO
3149 /*NOTREACHED*/
3150 }
166ba7cd 3151
a3621e74 3152 /*
166ba7cd
DM
3153 There are at least two accepting states left. Presumably
3154 the number of accepting states is going to be low,
3155 typically two. So we simply scan through to find the one
3156 with lowest wordnum. Once we find it, we swap the last
3157 state into its place and decrement the size. We then try to
3158 match the rest of the pattern at the point where the word
3159 ends. If we succeed, control just continues along the
3160 regex; if we fail we return here to try the next accepting
3161 state
3162 */
a3621e74 3163
166ba7cd
DM
3164 {
3165 U32 best = 0;
3166 U32 cur;
3167 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3168 DEBUG_TRIE_EXECUTE_r(
f2278c82 3169 PerlIO_printf( Perl_debug_log,
166ba7cd 3170 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
5bc10b2c 3171 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
166ba7cd
DM
3172 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3173 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3174 );
3175
3176 if (ST.accept_buff[cur].wordnum <
3177 ST.accept_buff[best].wordnum)
3178 best = cur;
a3621e74 3179 }
166ba7cd
DM
3180
3181 DEBUG_EXECUTE_r({
2b8b4781
NC
3182 AV *const trie_words
3183 = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3184 SV ** const tmp = av_fetch( trie_words,
5c9f2f80 3185 ST.accept_buff[ best ].wordnum - 1, 0 );
7f69552c 3186 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
265c4333 3187 ST.B :
7f69552c 3188 ST.me + ST.jump[ST.accept_buff[best].wordnum];
de734bd5
A
3189 SV *sv= tmp ? sv_newmortal() : NULL;
3190
265c4333
YO
3191 PerlIO_printf( Perl_debug_log,
3192 "%*s %strying alternation #%d <%s> at node #%d %s\n",
5bc10b2c 3193 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
166ba7cd 3194 ST.accept_buff[best].wordnum,
de734bd5
A
3195 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3196 PL_colors[0], PL_colors[1],
3197 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3198 ) : "not compiled under -Dr",
265c4333 3199 REG_NODE_NUM(nextop),
166ba7cd
DM
3200 PL_colors[5] );
3201 });
3202
3203 if ( best<ST.accepted ) {
3204 reg_trie_accepted tmp = ST.accept_buff[ best ];
3205 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3206 ST.accept_buff[ ST.accepted ] = tmp;
3207 best = ST.accepted;
a3621e74 3208 }
166ba7cd 3209 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
7f69552c 3210 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
5d458dd8 3211 scan = ST.B;
786e8c11 3212 } else {
5d458dd8 3213 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
5d458dd8 3214 }
6b173516 3215 PUSH_YES_STATE_GOTO(TRIE_next, scan);
786e8c11 3216 /* NOTREACHED */
166ba7cd 3217 }
166ba7cd 3218 /* NOTREACHED */
5d458dd8
YO
3219 case TRIE_next:
3220 FREETMPS;
3221 LEAVE;
3222 sayYES;
166ba7cd
DM
3223#undef ST
3224
95b24440
DM
3225 case EXACT: {
3226 char *s = STRING(scan);
24d3c4a9 3227 ln = STR_LEN(scan);
eb160463 3228 if (do_utf8 != UTF) {
bc517b45 3229 /* The target and the pattern have differing utf8ness. */
1aa99e6b 3230 char *l = locinput;
24d3c4a9 3231 const char * const e = s + ln;
a72c7584 3232
5ff6fc6d
JH
3233 if (do_utf8) {
3234 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 3235 while (s < e) {
a3b680e6 3236 STRLEN ulen;
1aa99e6b 3237 if (l >= PL_regeol)
5ff6fc6d
JH
3238 sayNO;
3239 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 3240 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 3241 uniflags))
5ff6fc6d 3242 sayNO;
bc517b45 3243 l += ulen;
5ff6fc6d 3244 s ++;
1aa99e6b 3245 }
5ff6fc6d
JH
3246 }
3247 else {
3248 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 3249 while (s < e) {
a3b680e6 3250 STRLEN ulen;
1aa99e6b
IH
3251 if (l >= PL_regeol)
3252 sayNO;
5ff6fc6d 3253 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 3254 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 3255 uniflags))
1aa99e6b 3256 sayNO;
bc517b45 3257 s += ulen;
a72c7584 3258 l ++;
1aa99e6b 3259 }
5ff6fc6d 3260 }
1aa99e6b
IH
3261 locinput = l;
3262 nextchr = UCHARAT(locinput);
3263 break;
3264 }
bc517b45 3265 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
3266 /* Inline the first character, for speed. */
3267 if (UCHARAT(s) != nextchr)
3268 sayNO;
24d3c4a9 3269 if (PL_regeol - locinput < ln)
d6a28714 3270 sayNO;
24d3c4a9 3271 if (ln > 1 && memNE(s, locinput, ln))
d6a28714 3272 sayNO;
24d3c4a9 3273 locinput += ln;
d6a28714
JH
3274 nextchr = UCHARAT(locinput);
3275 break;
95b24440 3276 }
d6a28714 3277 case EXACTFL:
b8c5462f
JH
3278 PL_reg_flags |= RF_tainted;
3279 /* FALL THROUGH */
95b24440 3280 case EXACTF: {
be8e71aa 3281 char * const s = STRING(scan);
24d3c4a9 3282 ln = STR_LEN(scan);
d6a28714 3283
d07ddd77
JH
3284 if (do_utf8 || UTF) {
3285 /* Either target or the pattern are utf8. */
be8e71aa 3286 const char * const l = locinput;
d07ddd77 3287 char *e = PL_regeol;
bc517b45 3288
24d3c4a9 3289 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 3290 l, &e, 0, do_utf8)) {
5486206c
JH
3291 /* One more case for the sharp s:
3292 * pack("U0U*", 0xDF) =~ /ss/i,
3293 * the 0xC3 0x9F are the UTF-8
3294 * byte sequence for the U+00DF. */
e1d1eefb 3295
5486206c 3296 if (!(do_utf8 &&
e1d1eefb 3297 toLOWER(s[0]) == 's' &&
24d3c4a9 3298 ln >= 2 &&
5486206c
JH
3299 toLOWER(s[1]) == 's' &&
3300 (U8)l[0] == 0xC3 &&
3301 e - l >= 2 &&
3302 (U8)l[1] == 0x9F))
3303 sayNO;
3304 }
d07ddd77
JH
3305 locinput = e;
3306 nextchr = UCHARAT(locinput);
3307 break;
a0ed51b3 3308 }
d6a28714 3309
bc517b45
JH
3310 /* Neither the target and the pattern are utf8. */
3311
d6a28714
JH
3312 /* Inline the first character, for speed. */
3313 if (UCHARAT(s) != nextchr &&
3314 UCHARAT(s) != ((OP(scan) == EXACTF)
3315 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3316 sayNO;
24d3c4a9 3317 if (PL_regeol - locinput < ln)
b8c5462f 3318 sayNO;
24d3c4a9
DM
3319 if (ln > 1 && (OP(scan) == EXACTF
3320 ? ibcmp(s, locinput, ln)
3321 : ibcmp_locale(s, locinput, ln)))
4633a7c4 3322 sayNO;
24d3c4a9 3323 locinput += ln;
d6a28714 3324 nextchr = UCHARAT(locinput);
a0d0e21e 3325 break;
95b24440 3326 }
d6a28714 3327 case ANYOF:
ffc61ed2 3328 if (do_utf8) {
9e55ce06
JH
3329 STRLEN inclasslen = PL_regeol - locinput;
3330
32fc9b6a 3331 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
262b90c4 3332 goto anyof_fail;
ffc61ed2
JH
3333 if (locinput >= PL_regeol)
3334 sayNO;
0f0076b4 3335 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3336 nextchr = UCHARAT(locinput);
e0f9d4a8 3337 break;
ffc61ed2
JH
3338 }
3339 else {
3340 if (nextchr < 0)
3341 nextchr = UCHARAT(locinput);
32fc9b6a 3342 if (!REGINCLASS(rex, scan, (U8*)locinput))
262b90c4 3343 goto anyof_fail;
ffc61ed2
JH
3344 if (!nextchr && locinput >= PL_regeol)
3345 sayNO;
3346 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3347 break;
3348 }
262b90c4 3349 anyof_fail:
e0f9d4a8
JH
3350 /* If we might have the case of the German sharp s
3351 * in a casefolding Unicode character class. */
3352
ebc501f0
JH
3353 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3354 locinput += SHARP_S_SKIP;
e0f9d4a8 3355 nextchr = UCHARAT(locinput);
ffc61ed2 3356 }
e0f9d4a8
JH
3357 else
3358 sayNO;
b8c5462f 3359 break;
d6a28714 3360 case ALNUML:
b8c5462f
JH
3361 PL_reg_flags |= RF_tainted;
3362 /* FALL THROUGH */
d6a28714 3363 case ALNUM:
b8c5462f 3364 if (!nextchr)
4633a7c4 3365 sayNO;
ffc61ed2 3366 if (do_utf8) {
1a4fad37 3367 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3368 if (!(OP(scan) == ALNUM
bb7a0f54 3369 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3370 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3371 {
3372 sayNO;
a0ed51b3 3373 }
b8c5462f 3374 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3375 nextchr = UCHARAT(locinput);
3376 break;
3377 }
ffc61ed2 3378 if (!(OP(scan) == ALNUM
d6a28714 3379 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3380 sayNO;
b8c5462f 3381 nextchr = UCHARAT(++locinput);
a0d0e21e 3382 break;
d6a28714 3383 case NALNUML:
b8c5462f
JH
3384 PL_reg_flags |= RF_tainted;
3385 /* FALL THROUGH */
d6a28714
JH
3386 case NALNUM:
3387 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3388 sayNO;
ffc61ed2 3389 if (do_utf8) {
1a4fad37 3390 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3391 if (OP(scan) == NALNUM
bb7a0f54 3392 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3393 : isALNUM_LC_utf8((U8*)locinput))
3394 {
b8c5462f 3395 sayNO;
d6a28714 3396 }
b8c5462f
JH
3397 locinput += PL_utf8skip[nextchr];
3398 nextchr = UCHARAT(locinput);
3399 break;
3400 }
ffc61ed2 3401 if (OP(scan) == NALNUM
d6a28714 3402 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3403 sayNO;
76e3520e 3404 nextchr = UCHARAT(++locinput);
a0d0e21e 3405 break;
d6a28714
JH
3406 case BOUNDL:
3407 case NBOUNDL:
3280af22 3408 PL_reg_flags |= RF_tainted;
bbce6d69 3409 /* FALL THROUGH */
d6a28714
JH
3410 case BOUND:
3411 case NBOUND:
3412 /* was last char in word? */
ffc61ed2 3413 if (do_utf8) {
12d33761 3414 if (locinput == PL_bostr)
24d3c4a9 3415 ln = '\n';
ffc61ed2 3416 else {
a3b680e6 3417 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3418
24d3c4a9 3419 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3420 }
3421 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3422 ln = isALNUM_uni(ln);
1a4fad37 3423 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3424 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3425 }
3426 else {
24d3c4a9 3427 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3428 n = isALNUM_LC_utf8((U8*)locinput);
3429 }
a0ed51b3 3430 }
d6a28714 3431 else {
24d3c4a9 3432 ln = (locinput != PL_bostr) ?
12d33761 3433 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3434 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
24d3c4a9 3435 ln = isALNUM(ln);
ffc61ed2
JH
3436 n = isALNUM(nextchr);
3437 }
3438 else {
24d3c4a9 3439 ln = isALNUM_LC(ln);
ffc61ed2
JH
3440 n = isALNUM_LC(nextchr);
3441 }
d6a28714 3442 }
24d3c4a9 3443 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3444 OP(scan) == BOUNDL))
3445 sayNO;
a0ed51b3 3446 break;
d6a28714 3447 case SPACEL:
3280af22 3448 PL_reg_flags |= RF_tainted;
bbce6d69 3449 /* FALL THROUGH */
d6a28714 3450 case SPACE:
9442cb0e 3451 if (!nextchr)
4633a7c4 3452 sayNO;
1aa99e6b 3453 if (do_utf8) {
fd400ab9 3454 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3455 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3456 if (!(OP(scan) == SPACE
bb7a0f54 3457 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3458 : isSPACE_LC_utf8((U8*)locinput)))
3459 {
3460 sayNO;
3461 }
3462 locinput += PL_utf8skip[nextchr];
3463 nextchr = UCHARAT(locinput);
3464 break;
d6a28714 3465 }
ffc61ed2
JH
3466 if (!(OP(scan) == SPACE
3467 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3468 sayNO;
3469 nextchr = UCHARAT(++locinput);
3470 }
3471 else {
3472 if (!(OP(scan) == SPACE
3473 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3474 sayNO;
3475 nextchr = UCHARAT(++locinput);
a0ed51b3 3476 }
a0ed51b3 3477 break;
d6a28714 3478 case NSPACEL:
3280af22 3479 PL_reg_flags |= RF_tainted;
bbce6d69 3480 /* FALL THROUGH */
d6a28714 3481 case NSPACE:
9442cb0e 3482 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3483 sayNO;
1aa99e6b 3484 if (do_utf8) {
1a4fad37 3485 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3486 if (OP(scan) == NSPACE
bb7a0f54 3487 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3488 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3489 {
3490 sayNO;
3491 }
3492 locinput += PL_utf8skip[nextchr];
3493 nextchr = UCHARAT(locinput);
3494 break;
a0ed51b3 3495 }
ffc61ed2 3496 if (OP(scan) == NSPACE
d6a28714 3497 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3498 sayNO;
76e3520e 3499 nextchr = UCHARAT(++locinput);
a0d0e21e 3500 break;
d6a28714 3501 case DIGITL:
a0ed51b3
LW
3502 PL_reg_flags |= RF_tainted;
3503 /* FALL THROUGH */
d6a28714 3504 case DIGIT:
9442cb0e 3505 if (!nextchr)
a0ed51b3 3506 sayNO;
1aa99e6b 3507 if (do_utf8) {
1a4fad37 3508 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3509 if (!(OP(scan) == DIGIT
bb7a0f54 3510 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3511 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3512 {
a0ed51b3 3513 sayNO;
dfe13c55 3514 }
6f06b55f 3515 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3516 nextchr = UCHARAT(locinput);
3517 break;
3518 }
ffc61ed2 3519 if (!(OP(scan) == DIGIT
9442cb0e 3520 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3521 sayNO;
3522 nextchr = UCHARAT(++locinput);
3523 break;
d6a28714 3524 case NDIGITL:
b8c5462f
JH
3525 PL_reg_flags |= RF_tainted;
3526 /* FALL THROUGH */
d6a28714 3527 case NDIGIT:
9442cb0e 3528 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3529 sayNO;
1aa99e6b 3530 if (do_utf8) {
1a4fad37 3531 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3532 if (OP(scan) == NDIGIT
bb7a0f54 3533 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3534 : isDIGIT_LC_utf8((U8*)locinput))
3535 {
a0ed51b3 3536 sayNO;
9442cb0e 3537 }
6f06b55f 3538 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3539 nextchr = UCHARAT(locinput);
3540 break;
3541 }
ffc61ed2 3542 if (OP(scan) == NDIGIT
9442cb0e 3543 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3544 sayNO;
3545 nextchr = UCHARAT(++locinput);
3546 break;
3547 case CLUMP:
b7c83a7e 3548 if (locinput >= PL_regeol)
a0ed51b3 3549 sayNO;
b7c83a7e 3550 if (do_utf8) {
1a4fad37 3551 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3552 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3553 sayNO;
3554 locinput += PL_utf8skip[nextchr];
3555 while (locinput < PL_regeol &&
3556 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3557 locinput += UTF8SKIP(locinput);
3558 if (locinput > PL_regeol)
3559 sayNO;
eb08e2da
JH
3560 }
3561 else
3562 locinput++;
a0ed51b3
LW
3563 nextchr = UCHARAT(locinput);
3564 break;
81714fb9
YO
3565
3566 case NREFFL:
3567 {
3568 char *s;
ff1157ca 3569 char type;
81714fb9
YO
3570 PL_reg_flags |= RF_tainted;
3571 /* FALL THROUGH */
3572 case NREF:
3573 case NREFF:
ff1157ca 3574 type = OP(scan);
0a4db386
YO
3575 n = reg_check_named_buff_matched(rex,scan);
3576
3577 if ( n ) {
3578 type = REF + ( type - NREF );
3579 goto do_ref;
3580 } else {
81714fb9 3581 sayNO;
0a4db386
YO
3582 }
3583 /* unreached */
c8756f30 3584 case REFFL:
3280af22 3585 PL_reg_flags |= RF_tainted;
c8756f30 3586 /* FALL THROUGH */
c277df42 3587 case REF:
81714fb9 3588 case REFF:
c277df42 3589 n = ARG(scan); /* which paren pair */
81714fb9
YO
3590 type = OP(scan);
3591 do_ref:
f0ab9afb 3592 ln = PL_regoffs[n].start;
2c2d71f5 3593 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3b6647e0 3594 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3595 sayNO; /* Do not match unless seen CLOSEn. */
f0ab9afb 3596 if (ln == PL_regoffs[n].end)
a0d0e21e 3597 break;
a0ed51b3 3598
24d3c4a9 3599 s = PL_bostr + ln;
81714fb9 3600 if (do_utf8 && type != REF) { /* REF can do byte comparison */
a0ed51b3 3601 char *l = locinput;
f0ab9afb 3602 const char *e = PL_bostr + PL_regoffs[n].end;
a0ed51b3
LW
3603 /*
3604 * Note that we can't do the "other character" lookup trick as
3605 * in the 8-bit case (no pun intended) because in Unicode we
3606 * have to map both upper and title case to lower case.
3607 */
81714fb9 3608 if (type == REFF) {
a0ed51b3 3609 while (s < e) {
a3b680e6
AL
3610 STRLEN ulen1, ulen2;
3611 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3612 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3613
a0ed51b3
LW
3614 if (l >= PL_regeol)
3615 sayNO;
a2a2844f
JH
3616 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3617 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3618 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3619 sayNO;
a2a2844f
JH
3620 s += ulen1;
3621 l += ulen2;
a0ed51b3
LW
3622 }
3623 }
3624 locinput = l;
3625 nextchr = UCHARAT(locinput);
3626 break;
3627 }
3628
a0d0e21e 3629 /* Inline the first character, for speed. */
76e3520e 3630 if (UCHARAT(s) != nextchr &&
81714fb9
YO
3631 (type == REF ||
3632 (UCHARAT(s) != (type == REFF
3633 ? PL_fold : PL_fold_locale)[nextchr])))
4633a7c4 3634 sayNO;
f0ab9afb 3635 ln = PL_regoffs[n].end - ln;
24d3c4a9 3636 if (locinput + ln > PL_regeol)
4633a7c4 3637 sayNO;
81714fb9 3638 if (ln > 1 && (type == REF
24d3c4a9 3639 ? memNE(s, locinput, ln)
81714fb9 3640 : (type == REFF
24d3c4a9
DM
3641 ? ibcmp(s, locinput, ln)
3642 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3643 sayNO;
24d3c4a9 3644 locinput += ln;
76e3520e 3645 nextchr = UCHARAT(locinput);
a0d0e21e 3646 break;
81714fb9 3647 }
a0d0e21e 3648 case NOTHING:
c277df42 3649 case TAIL:
a0d0e21e
LW
3650 break;
3651 case BACK:
3652 break;
40a82448
DM
3653
3654#undef ST
3655#define ST st->u.eval
c277df42 3656 {
c277df42 3657 SV *ret;
d2f13c59 3658 REGEXP *re_sv;
6bda09f9 3659 regexp *re;
f8fc2ecf 3660 regexp_internal *rei;
1a147d38
YO
3661 regnode *startpoint;
3662
3663 case GOSTART:
e7707071
YO
3664 case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
3665 if (cur_eval && cur_eval->locinput==locinput) {
24b23f37 3666 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
1a147d38 3667 Perl_croak(aTHX_ "Infinite recursion in regex");
4b196cd4 3668 if ( ++nochange_depth > max_nochange_depth )
1a147d38
YO
3669 Perl_croak(aTHX_
3670 "Pattern subroutine nesting without pos change"
3671 " exceeded limit in regex");
6bda09f9
YO
3672 } else {
3673 nochange_depth = 0;
1a147d38 3674 }
288b8c02 3675 re_sv = rex_sv;
6bda09f9 3676 re = rex;
f8fc2ecf 3677 rei = rexi;
288b8c02 3678 (void)ReREFCNT_inc(rex_sv);
1a147d38 3679 if (OP(scan)==GOSUB) {
6bda09f9
YO
3680 startpoint = scan + ARG2L(scan);
3681 ST.close_paren = ARG(scan);
3682 } else {
f8fc2ecf 3683 startpoint = rei->program+1;
6bda09f9
YO
3684 ST.close_paren = 0;
3685 }
3686 goto eval_recurse_doit;
3687 /* NOTREACHED */
3688 case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
3689 if (cur_eval && cur_eval->locinput==locinput) {
4b196cd4 3690 if ( ++nochange_depth > max_nochange_depth )
1a147d38 3691 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
6bda09f9
YO
3692 } else {
3693 nochange_depth = 0;
3694 }
8e5e9ebe 3695 {
4aabdb9b
DM
3696 /* execute the code in the {...} */
3697 dSP;
6136c704 3698 SV ** const before = SP;
4aabdb9b
DM
3699 OP_4tree * const oop = PL_op;
3700 COP * const ocurcop = PL_curcop;
3701 PAD *old_comppad;
4aabdb9b
DM
3702
3703 n = ARG(scan);
f8fc2ecf 3704 PL_op = (OP_4tree*)rexi->data->data[n];
24b23f37
YO
3705 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3706 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f8fc2ecf 3707 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
f0ab9afb 3708 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
4aabdb9b 3709
2bf803e2
YO
3710 if (sv_yes_mark) {
3711 SV *sv_mrk = get_sv("REGMARK", 1);
3712 sv_setsv(sv_mrk, sv_yes_mark);
3713 }
3714
8e5e9ebe
RGS
3715 CALLRUNOPS(aTHX); /* Scalar context. */
3716 SPAGAIN;
3717 if (SP == before)
075aa684 3718 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3719 else {
3720 ret = POPs;
3721 PUTBACK;
3722 }
4aabdb9b
DM
3723
3724 PL_op = oop;
3725 PAD_RESTORE_LOCAL(old_comppad);
3726 PL_curcop = ocurcop;
24d3c4a9 3727 if (!logical) {
4aabdb9b
DM
3728 /* /(?{...})/ */
3729 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3730 break;
3731 }
8e5e9ebe 3732 }
24d3c4a9
DM
3733 if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3734 logical = 0;
4aabdb9b 3735 {
4f639d21
DM
3736 /* extract RE object from returned value; compiling if
3737 * necessary */
6136c704 3738 MAGIC *mg = NULL;
288b8c02 3739 REGEXP *rx = NULL;
5c35adbb
NC
3740
3741 if (SvROK(ret)) {
288b8c02 3742 SV *const sv = SvRV(ret);
5c35adbb
NC
3743
3744 if (SvTYPE(sv) == SVt_REGEXP) {
d2f13c59 3745 rx = (REGEXP*) sv;
5c35adbb
NC
3746 } else if (SvSMAGICAL(sv)) {
3747 mg = mg_find(sv, PERL_MAGIC_qr);
3748 assert(mg);
3749 }
3750 } else if (SvTYPE(ret) == SVt_REGEXP) {
d2f13c59 3751 rx = (REGEXP*) ret;
5c35adbb 3752 } else if (SvSMAGICAL(ret)) {
124ee91a
NC
3753 if (SvGMAGICAL(ret)) {
3754 /* I don't believe that there is ever qr magic
3755 here. */
3756 assert(!mg_find(ret, PERL_MAGIC_qr));
faf82a0b 3757 sv_unmagic(ret, PERL_MAGIC_qr);
124ee91a
NC
3758 }
3759 else {
faf82a0b 3760 mg = mg_find(ret, PERL_MAGIC_qr);
124ee91a
NC
3761 /* testing suggests mg only ends up non-NULL for
3762 scalars who were upgraded and compiled in the
3763 else block below. In turn, this is only
3764 triggered in the "postponed utf8 string" tests
3765 in t/op/pat.t */
3766 }
0f5d15d6 3767 }
faf82a0b 3768
0f5d15d6 3769 if (mg) {
d2f13c59 3770 rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
e2560c33 3771 assert(rx);
0f5d15d6 3772 }
288b8c02
NC
3773 if (rx) {
3774 rx = reg_temp_copy(rx);
3775 }
0f5d15d6 3776 else {
c737faaf 3777 U32 pm_flags = 0;
a3b680e6 3778 const I32 osize = PL_regsize;
0f5d15d6 3779
b9ad30b4
NC
3780 if (DO_UTF8(ret)) {
3781 assert (SvUTF8(ret));
3782 } else if (SvUTF8(ret)) {
3783 /* Not doing UTF-8, despite what the SV says. Is
3784 this only if we're trapped in use 'bytes'? */
3785 /* Make a copy of the octet sequence, but without
3786 the flag on, as the compiler now honours the
3787 SvUTF8 flag on ret. */
3788 STRLEN len;
3789 const char *const p = SvPV(ret, len);
3790 ret = newSVpvn_flags(p, len, SVs_TEMP);
3791 }
288b8c02 3792 rx = CALLREGCOMP(ret, pm_flags);
9041c2e3 3793 if (!(SvFLAGS(ret)
faf82a0b 3794 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3ce3ed55 3795 | SVs_GMG))) {
a2794585
NC
3796 /* This isn't a first class regexp. Instead, it's
3797 caching a regexp onto an existing, Perl visible
3798 scalar. */
d2f13c59 3799 sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
3ce3ed55 3800 }
0f5d15d6 3801 PL_regsize = osize;
0f5d15d6 3802 }
288b8c02
NC
3803 re_sv = rx;
3804 re = (struct regexp *)SvANY(rx);
4aabdb9b 3805 }
07bc277f 3806 RXp_MATCH_COPIED_off(re);
28d8d7f4
YO
3807 re->subbeg = rex->subbeg;
3808 re->sublen = rex->sublen;
f8fc2ecf 3809 rei = RXi_GET(re);
6bda09f9 3810 DEBUG_EXECUTE_r(
efd26800 3811 debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
6bda09f9
YO
3812 "Matching embedded");
3813 );
f8fc2ecf 3814 startpoint = rei->program + 1;
1a147d38 3815 ST.close_paren = 0; /* only used for GOSUB */
6bda09f9
YO
3816 /* borrowed from regtry */
3817 if (PL_reg_start_tmpl <= re->nparens) {
3818 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3819 if(PL_reg_start_tmp)
3820 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3821 else
3822 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
dd5def09 3823 }
aa283a38 3824
1a147d38 3825 eval_recurse_doit: /* Share code with GOSUB below this line */
aa283a38 3826 /* run the pattern returned from (??{...}) */
40a82448
DM
3827 ST.cp = regcppush(0); /* Save *all* the positions. */
3828 REGCP_SET(ST.lastcp);
6bda09f9 3829
f0ab9afb 3830 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
6bda09f9 3831
0357f1fd
ML
3832 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3833 PL_reglastparen = &re->lastparen;
3834 PL_reglastcloseparen = &re->lastcloseparen;
3835 re->lastparen = 0;
3836 re->lastcloseparen = 0;
3837
4aabdb9b 3838 PL_reginput = locinput;
ae0beba1 3839 PL_regsize = 0;
4aabdb9b
DM
3840
3841 /* XXXX This is too dramatic a measure... */
3842 PL_reg_maxiter = 0;
3843
faec1544 3844 ST.toggle_reg_flags = PL_reg_flags;
3c8556c3 3845 if (RX_UTF8(re_sv))
faec1544
DM
3846 PL_reg_flags |= RF_utf8;
3847 else
3848 PL_reg_flags &= ~RF_utf8;
3849 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3850
288b8c02 3851 ST.prev_rex = rex_sv;
faec1544 3852 ST.prev_curlyx = cur_curlyx;
288b8c02
NC
3853 SETREX(rex_sv,re_sv);
3854 rex = re;
f8fc2ecf 3855 rexi = rei;
faec1544 3856 cur_curlyx = NULL;
40a82448 3857 ST.B = next;
faec1544
DM
3858 ST.prev_eval = cur_eval;
3859 cur_eval = st;
faec1544 3860 /* now continue from first node in postoned RE */
6bda09f9 3861 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4aabdb9b 3862 /* NOTREACHED */
a0ed51b3 3863 }
24d3c4a9
DM
3864 /* logical is 1, /(?(?{...})X|Y)/ */
3865 sw = (bool)SvTRUE(ret);
3866 logical = 0;
c277df42
IZ
3867 break;
3868 }
40a82448 3869
faec1544
DM
3870 case EVAL_AB: /* cleanup after a successful (??{A})B */
3871 /* note: this is called twice; first after popping B, then A */
3872 PL_reg_flags ^= ST.toggle_reg_flags;
288b8c02
NC
3873 ReREFCNT_dec(rex_sv);
3874 SETREX(rex_sv,ST.prev_rex);
3875 rex = (struct regexp *)SvANY(rex_sv);
f8fc2ecf 3876 rexi = RXi_GET(rex);
faec1544
DM
3877 regcpblow(ST.cp);
3878 cur_eval = ST.prev_eval;
3879 cur_curlyx = ST.prev_curlyx;
0357f1fd
ML
3880
3881 PL_reglastparen = &rex->lastparen;
3882 PL_reglastcloseparen = &rex->lastcloseparen;
3883
40a82448
DM
3884 /* XXXX This is too dramatic a measure... */
3885 PL_reg_maxiter = 0;
e7707071 3886 if ( nochange_depth )
4b196cd4 3887 nochange_depth--;
262b90c4 3888 sayYES;
40a82448 3889
40a82448 3890
faec1544
DM
3891 case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3892 /* note: this is called twice; first after popping B, then A */
3893 PL_reg_flags ^= ST.toggle_reg_flags;
288b8c02
NC
3894 ReREFCNT_dec(rex_sv);
3895 SETREX(rex_sv,ST.prev_rex);
3896 rex = (struct regexp *)SvANY(rex_sv);
f8fc2ecf 3897 rexi = RXi_GET(rex);
0357f1fd
ML
3898 PL_reglastparen = &rex->lastparen;
3899 PL_reglastcloseparen = &rex->lastcloseparen;
3900
40a82448
DM
3901 PL_reginput = locinput;
3902 REGCP_UNWIND(ST.lastcp);
3903 regcppop(rex);
faec1544
DM
3904 cur_eval = ST.prev_eval;
3905 cur_curlyx = ST.prev_curlyx;
3906 /* XXXX This is too dramatic a measure... */
3907 PL_reg_maxiter = 0;
e7707071 3908 if ( nochange_depth )
4b196cd4 3909 nochange_depth--;
40a82448 3910 sayNO_SILENT;
40a82448
DM
3911#undef ST
3912
a0d0e21e 3913 case OPEN:
c277df42 3914 n = ARG(scan); /* which paren pair */
3280af22
NIS
3915 PL_reg_start_tmp[n] = locinput;
3916 if (n > PL_regsize)
3917 PL_regsize = n;
e2e6a0f1 3918 lastopen = n;
a0d0e21e
LW
3919 break;
3920 case CLOSE:
c277df42 3921 n = ARG(scan); /* which paren pair */
f0ab9afb
NC
3922 PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3923 PL_regoffs[n].end = locinput - PL_bostr;
7f69552c
YO
3924 /*if (n > PL_regsize)
3925 PL_regsize = n;*/
3b6647e0 3926 if (n > *PL_reglastparen)
3280af22 3927 *PL_reglastparen = n;
a01268b5 3928 *PL_reglastcloseparen = n;
3b6647e0 3929 if (cur_eval && cur_eval->u.eval.close_paren == n) {
6bda09f9
YO
3930 goto fake_end;
3931 }
a0d0e21e 3932 break;
e2e6a0f1
YO
3933 case ACCEPT:
3934 if (ARG(scan)){
3935 regnode *cursor;
3936 for (cursor=scan;
3937 cursor && OP(cursor)!=END;
3938 cursor=regnext(cursor))
3939 {
3940 if ( OP(cursor)==CLOSE ){
3941 n = ARG(cursor);
3942 if ( n <= lastopen ) {
f0ab9afb
NC
3943 PL_regoffs[n].start
3944 = PL_reg_start_tmp[n] - PL_bostr;
3945 PL_regoffs[n].end = locinput - PL_bostr;
e2e6a0f1
YO
3946 /*if (n > PL_regsize)
3947 PL_regsize = n;*/
3b6647e0 3948 if (n > *PL_reglastparen)
e2e6a0f1
YO
3949 *PL_reglastparen = n;
3950 *PL_reglastcloseparen = n;
3b6647e0
RB
3951 if ( n == ARG(scan) || (cur_eval &&
3952 cur_eval->u.eval.close_paren == n))
e2e6a0f1
YO
3953 break;
3954 }
3955 }
3956 }
3957 }
3958 goto fake_end;
3959 /*NOTREACHED*/
c277df42
IZ
3960 case GROUPP:
3961 n = ARG(scan); /* which paren pair */
f0ab9afb 3962 sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
c277df42 3963 break;
0a4db386
YO
3964 case NGROUPP:
3965 /* reg_check_named_buff_matched returns 0 for no match */
3966 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3967 break;
1a147d38 3968 case INSUBP:
0a4db386 3969 n = ARG(scan);
3b6647e0 3970 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
0a4db386
YO
3971 break;
3972 case DEFINEP:
3973 sw = 0;
3974 break;
c277df42 3975 case IFTHEN:
2c2d71f5 3976 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
24d3c4a9 3977 if (sw)
c277df42
IZ
3978 next = NEXTOPER(NEXTOPER(scan));
3979 else {
3980 next = scan + ARG(scan);
3981 if (OP(next) == IFTHEN) /* Fake one. */
3982 next = NEXTOPER(NEXTOPER(next));
3983 }
3984 break;
3985 case LOGICAL:
24d3c4a9 3986 logical = scan->flags;
c277df42 3987 break;
c476f425 3988
2ab05381 3989/*******************************************************************
2ab05381 3990
c476f425
DM
3991The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3992pattern, where A and B are subpatterns. (For simple A, CURLYM or
3993STAR/PLUS/CURLY/CURLYN are used instead.)
2ab05381 3994
c476f425 3995A*B is compiled as <CURLYX><A><WHILEM><B>
2ab05381 3996
c476f425
DM
3997On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3998state, which contains the current count, initialised to -1. It also sets
3999cur_curlyx to point to this state, with any previous value saved in the
4000state block.
2ab05381 4001
c476f425
DM
4002CURLYX then jumps straight to the WHILEM op, rather than executing A,
4003since the pattern may possibly match zero times (i.e. it's a while {} loop
4004rather than a do {} while loop).
2ab05381 4005
c476f425
DM
4006Each entry to WHILEM represents a successful match of A. The count in the
4007CURLYX block is incremented, another WHILEM state is pushed, and execution
4008passes to A or B depending on greediness and the current count.
2ab05381 4009
c476f425
DM
4010For example, if matching against the string a1a2a3b (where the aN are
4011substrings that match /A/), then the match progresses as follows: (the
4012pushed states are interspersed with the bits of strings matched so far):
2ab05381 4013
c476f425
DM
4014 <CURLYX cnt=-1>
4015 <CURLYX cnt=0><WHILEM>
4016 <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4017 <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4018 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4019 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
2ab05381 4020
c476f425
DM
4021(Contrast this with something like CURLYM, which maintains only a single
4022backtrack state:
2ab05381 4023
c476f425
DM
4024 <CURLYM cnt=0> a1
4025 a1 <CURLYM cnt=1> a2
4026 a1 a2 <CURLYM cnt=2> a3
4027 a1 a2 a3 <CURLYM cnt=3> b
4028)
2ab05381 4029
c476f425
DM
4030Each WHILEM state block marks a point to backtrack to upon partial failure
4031of A or B, and also contains some minor state data related to that
4032iteration. The CURLYX block, pointed to by cur_curlyx, contains the
4033overall state, such as the count, and pointers to the A and B ops.
2ab05381 4034
c476f425
DM
4035This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4036must always point to the *current* CURLYX block, the rules are:
2ab05381 4037
c476f425
DM
4038When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4039and set cur_curlyx to point the new block.
2ab05381 4040
c476f425
DM
4041When popping the CURLYX block after a successful or unsuccessful match,
4042restore the previous cur_curlyx.
2ab05381 4043
c476f425
DM
4044When WHILEM is about to execute B, save the current cur_curlyx, and set it
4045to the outer one saved in the CURLYX block.
2ab05381 4046
c476f425
DM
4047When popping the WHILEM block after a successful or unsuccessful B match,
4048restore the previous cur_curlyx.
2ab05381 4049
c476f425
DM
4050Here's an example for the pattern (AI* BI)*BO
4051I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
2ab05381 4052
c476f425
DM
4053cur_
4054curlyx backtrack stack
4055------ ---------------
4056NULL
4057CO <CO prev=NULL> <WO>
4058CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4059CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4060NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
2ab05381 4061
c476f425
DM
4062At this point the pattern succeeds, and we work back down the stack to
4063clean up, restoring as we go:
95b24440 4064
c476f425
DM
4065CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4066CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4067CO <CO prev=NULL> <WO>
4068NULL
a0374537 4069
c476f425
DM
4070*******************************************************************/
4071
4072#define ST st->u.curlyx
4073
4074 case CURLYX: /* start of /A*B/ (for complex A) */
4075 {
4076 /* No need to save/restore up to this paren */
4077 I32 parenfloor = scan->flags;
4078
4079 assert(next); /* keep Coverity happy */
4080 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4081 next += ARG(next);
4082
4083 /* XXXX Probably it is better to teach regpush to support
4084 parenfloor > PL_regsize... */
4085 if (parenfloor > (I32)*PL_reglastparen)
4086 parenfloor = *PL_reglastparen; /* Pessimization... */
4087
4088 ST.prev_curlyx= cur_curlyx;
4089 cur_curlyx = st;
4090 ST.cp = PL_savestack_ix;
4091
4092 /* these fields contain the state of the current curly.
4093 * they are accessed by subsequent WHILEMs */
4094 ST.parenfloor = parenfloor;
4095 ST.min = ARG1(scan);
4096 ST.max = ARG2(scan);
4097 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4098 ST.B = next;
24d3c4a9
DM
4099 ST.minmod = minmod;
4100 minmod = 0;
c476f425
DM
4101 ST.count = -1; /* this will be updated by WHILEM */
4102 ST.lastloc = NULL; /* this will be updated by WHILEM */
4103
4104 PL_reginput = locinput;
4105 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
5f66b61c 4106 /* NOTREACHED */
c476f425 4107 }
a0d0e21e 4108
c476f425 4109 case CURLYX_end: /* just finished matching all of A*B */
c476f425
DM
4110 cur_curlyx = ST.prev_curlyx;
4111 sayYES;
4112 /* NOTREACHED */
a0d0e21e 4113
c476f425
DM
4114 case CURLYX_end_fail: /* just failed to match all of A*B */
4115 regcpblow(ST.cp);
4116 cur_curlyx = ST.prev_curlyx;
4117 sayNO;
4118 /* NOTREACHED */
4633a7c4 4119
a0d0e21e 4120
c476f425
DM
4121#undef ST
4122#define ST st->u.whilem
4123
4124 case WHILEM: /* just matched an A in /A*B/ (for complex A) */
4125 {
4126 /* see the discussion above about CURLYX/WHILEM */
c476f425
DM
4127 I32 n;
4128 assert(cur_curlyx); /* keep Coverity happy */
4129 n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4130 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4131 ST.cache_offset = 0;
4132 ST.cache_mask = 0;
4133
4134 PL_reginput = locinput;
4135
4136 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4137 "%*s whilem: matched %ld out of %ld..%ld\n",
4138 REPORT_CODE_OFF+depth*2, "", (long)n,
4139 (long)cur_curlyx->u.curlyx.min,
4140 (long)cur_curlyx->u.curlyx.max)
4141 );
a0d0e21e 4142
c476f425 4143 /* First just match a string of min A's. */
a0d0e21e 4144
c476f425
DM
4145 if (n < cur_curlyx->u.curlyx.min) {
4146 cur_curlyx->u.curlyx.lastloc = locinput;
4147 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4148 /* NOTREACHED */
4149 }
4150
4151 /* If degenerate A matches "", assume A done. */
4152
4153 if (locinput == cur_curlyx->u.curlyx.lastloc) {
4154 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4155 "%*s whilem: empty match detected, trying continuation...\n",
4156 REPORT_CODE_OFF+depth*2, "")
4157 );
4158 goto do_whilem_B_max;
4159 }
4160
4161 /* super-linear cache processing */
4162
4163 if (scan->flags) {
a0d0e21e 4164
2c2d71f5 4165 if (!PL_reg_maxiter) {
c476f425
DM
4166 /* start the countdown: Postpone detection until we
4167 * know the match is not *that* much linear. */
2c2d71f5 4168 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
66bf836d
DM
4169 /* possible overflow for long strings and many CURLYX's */
4170 if (PL_reg_maxiter < 0)
4171 PL_reg_maxiter = I32_MAX;
2c2d71f5
JH
4172 PL_reg_leftiter = PL_reg_maxiter;
4173 }
c476f425 4174
2c2d71f5 4175 if (PL_reg_leftiter-- == 0) {
c476f425 4176 /* initialise cache */
3298f257 4177 const I32 size = (PL_reg_maxiter + 7)/8;
2c2d71f5 4178 if (PL_reg_poscache) {
eb160463 4179 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
4180 Renew(PL_reg_poscache, size, char);
4181 PL_reg_poscache_size = size;
4182 }
4183 Zero(PL_reg_poscache, size, char);
4184 }
4185 else {
4186 PL_reg_poscache_size = size;
a02a5408 4187 Newxz(PL_reg_poscache, size, char);
2c2d71f5 4188 }
c476f425
DM
4189 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4190 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4191 PL_colors[4], PL_colors[5])
4192 );
2c2d71f5 4193 }
c476f425 4194
2c2d71f5 4195 if (PL_reg_leftiter < 0) {
c476f425
DM
4196 /* have we already failed at this position? */
4197 I32 offset, mask;
4198 offset = (scan->flags & 0xf) - 1
4199 + (locinput - PL_bostr) * (scan->flags>>4);
4200 mask = 1 << (offset % 8);
4201 offset /= 8;
4202 if (PL_reg_poscache[offset] & mask) {
4203 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4204 "%*s whilem: (cache) already tried at this position...\n",
4205 REPORT_CODE_OFF+depth*2, "")
2c2d71f5 4206 );
3298f257 4207 sayNO; /* cache records failure */
2c2d71f5 4208 }
c476f425
DM
4209 ST.cache_offset = offset;
4210 ST.cache_mask = mask;
2c2d71f5 4211 }
c476f425 4212 }
2c2d71f5 4213
c476f425 4214 /* Prefer B over A for minimal matching. */
a687059c 4215
c476f425
DM
4216 if (cur_curlyx->u.curlyx.minmod) {
4217 ST.save_curlyx = cur_curlyx;
4218 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4219 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4220 REGCP_SET(ST.lastcp);
4221 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4222 /* NOTREACHED */
4223 }
a0d0e21e 4224
c476f425
DM
4225 /* Prefer A over B for maximal matching. */
4226
4227 if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4228 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4229 cur_curlyx->u.curlyx.lastloc = locinput;
4230 REGCP_SET(ST.lastcp);
4231 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4232 /* NOTREACHED */
4233 }
4234 goto do_whilem_B_max;
4235 }
4236 /* NOTREACHED */
4237
4238 case WHILEM_B_min: /* just matched B in a minimal match */
4239 case WHILEM_B_max: /* just matched B in a maximal match */
4240 cur_curlyx = ST.save_curlyx;
4241 sayYES;
4242 /* NOTREACHED */
4243
4244 case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4245 cur_curlyx = ST.save_curlyx;
4246 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4247 cur_curlyx->u.curlyx.count--;
4248 CACHEsayNO;
4249 /* NOTREACHED */
4250
4251 case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4252 REGCP_UNWIND(ST.lastcp);
4253 regcppop(rex);
4254 /* FALL THROUGH */
4255 case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4256 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4257 cur_curlyx->u.curlyx.count--;
4258 CACHEsayNO;
4259 /* NOTREACHED */
4260
4261 case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4262 REGCP_UNWIND(ST.lastcp);
4263 regcppop(rex); /* Restore some previous $<digit>s? */
4264 PL_reginput = locinput;
4265 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4266 "%*s whilem: failed, trying continuation...\n",
4267 REPORT_CODE_OFF+depth*2, "")
4268 );
4269 do_whilem_B_max:
4270 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4271 && ckWARN(WARN_REGEXP)
4272 && !(PL_reg_flags & RF_warned))
4273 {
4274 PL_reg_flags |= RF_warned;
4275 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4276 "Complex regular subexpression recursion",
4277 REG_INFTY - 1);
4278 }
4279
4280 /* now try B */
4281 ST.save_curlyx = cur_curlyx;
4282 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4283 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4284 /* NOTREACHED */
4285
4286 case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4287 cur_curlyx = ST.save_curlyx;
4288 REGCP_UNWIND(ST.lastcp);
4289 regcppop(rex);
4290
4291 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4292 /* Maximum greed exceeded */
4293 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4294 && ckWARN(WARN_REGEXP)
4295 && !(PL_reg_flags & RF_warned))
4296 {
3280af22 4297 PL_reg_flags |= RF_warned;
c476f425
DM
4298 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4299 "%s limit (%d) exceeded",
4300 "Complex regular subexpression recursion",
4301 REG_INFTY - 1);
a0d0e21e 4302 }
c476f425 4303 cur_curlyx->u.curlyx.count--;
3ab3c9b4 4304 CACHEsayNO;
a0d0e21e 4305 }
c476f425
DM
4306
4307 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4308 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4309 );
4310 /* Try grabbing another A and see if it helps. */
4311 PL_reginput = locinput;
4312 cur_curlyx->u.curlyx.lastloc = locinput;
4313 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4314 REGCP_SET(ST.lastcp);
4315 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
5f66b61c 4316 /* NOTREACHED */
40a82448
DM
4317
4318#undef ST
4319#define ST st->u.branch
4320
4321 case BRANCHJ: /* /(...|A|...)/ with long next pointer */
c277df42
IZ
4322 next = scan + ARG(scan);
4323 if (next == scan)
4324 next = NULL;
40a82448
DM
4325 scan = NEXTOPER(scan);
4326 /* FALL THROUGH */
c277df42 4327
40a82448
DM
4328 case BRANCH: /* /(...|A|...)/ */
4329 scan = NEXTOPER(scan); /* scan now points to inner node */
40a82448
DM
4330 ST.lastparen = *PL_reglastparen;
4331 ST.next_branch = next;
4332 REGCP_SET(ST.cp);
4333 PL_reginput = locinput;
02db2b7b 4334
40a82448 4335 /* Now go into the branch */
5d458dd8
YO
4336 if (has_cutgroup) {
4337 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4338 } else {
4339 PUSH_STATE_GOTO(BRANCH_next, scan);
4340 }
40a82448 4341 /* NOTREACHED */
5d458dd8
YO
4342 case CUTGROUP:
4343 PL_reginput = locinput;
4344 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
f8fc2ecf 4345 (SV*)rexi->data->data[ ARG( scan ) ];
5d458dd8
YO
4346 PUSH_STATE_GOTO(CUTGROUP_next,next);
4347 /* NOTREACHED */
4348 case CUTGROUP_next_fail:
4349 do_cutgroup = 1;
4350 no_final = 1;
4351 if (st->u.mark.mark_name)
4352 sv_commit = st->u.mark.mark_name;
4353 sayNO;
4354 /* NOTREACHED */
4355 case BRANCH_next:
4356 sayYES;
4357 /* NOTREACHED */
40a82448 4358 case BRANCH_next_fail: /* that branch failed; try the next, if any */
5d458dd8
YO
4359 if (do_cutgroup) {
4360 do_cutgroup = 0;
4361 no_final = 0;
4362 }
40a82448
DM
4363 REGCP_UNWIND(ST.cp);
4364 for (n = *PL_reglastparen; n > ST.lastparen; n--)
f0ab9afb 4365 PL_regoffs[n].end = -1;
40a82448 4366 *PL_reglastparen = n;
0a4db386 4367 /*dmq: *PL_reglastcloseparen = n; */
40a82448
DM
4368 scan = ST.next_branch;
4369 /* no more branches? */
5d458dd8
YO
4370 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4371 DEBUG_EXECUTE_r({
4372 PerlIO_printf( Perl_debug_log,
4373 "%*s %sBRANCH failed...%s\n",
4374 REPORT_CODE_OFF+depth*2, "",
4375 PL_colors[4],
4376 PL_colors[5] );
4377 });
4378 sayNO_SILENT;
4379 }
40a82448
DM
4380 continue; /* execute next BRANCH[J] op */
4381 /* NOTREACHED */
4382
a0d0e21e 4383 case MINMOD:
24d3c4a9 4384 minmod = 1;
a0d0e21e 4385 break;
40a82448
DM
4386
4387#undef ST
4388#define ST st->u.curlym
4389
4390 case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4391
4392 /* This is an optimisation of CURLYX that enables us to push
4393 * only a single backtracking state, no matter now many matches
4394 * there are in {m,n}. It relies on the pattern being constant
4395 * length, with no parens to influence future backrefs
4396 */
4397
4398 ST.me = scan;
dc45a647 4399 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
40a82448
DM
4400
4401 /* if paren positive, emulate an OPEN/CLOSE around A */
4402 if (ST.me->flags) {
3b6647e0 4403 U32 paren = ST.me->flags;
40a82448
DM
4404 if (paren > PL_regsize)
4405 PL_regsize = paren;
3b6647e0 4406 if (paren > *PL_reglastparen)
40a82448 4407 *PL_reglastparen = paren;
c277df42 4408 scan += NEXT_OFF(scan); /* Skip former OPEN. */
6407bf3b 4409 }
40a82448
DM
4410 ST.A = scan;
4411 ST.B = next;
4412 ST.alen = 0;
4413 ST.count = 0;
24d3c4a9
DM
4414 ST.minmod = minmod;
4415 minmod = 0;
40a82448
DM
4416 ST.c1 = CHRTEST_UNINIT;
4417 REGCP_SET(ST.cp);
6407bf3b 4418
40a82448
DM
4419 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4420 goto curlym_do_B;
4421
4422 curlym_do_A: /* execute the A in /A{m,n}B/ */
6407bf3b 4423 PL_reginput = locinput;
40a82448
DM
4424 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4425 /* NOTREACHED */
5f80c4cf 4426
40a82448
DM
4427 case CURLYM_A: /* we've just matched an A */
4428 locinput = st->locinput;
4429 nextchr = UCHARAT(locinput);
4430
4431 ST.count++;
4432 /* after first match, determine A's length: u.curlym.alen */
4433 if (ST.count == 1) {
4434 if (PL_reg_match_utf8) {
4435 char *s = locinput;
4436 while (s < PL_reginput) {
4437 ST.alen++;
4438 s += UTF8SKIP(s);
4439 }
4440 }
4441 else {
4442 ST.alen = PL_reginput - locinput;
4443 }
4444 if (ST.alen == 0)
4445 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4446 }
0cadcf80
DM
4447 DEBUG_EXECUTE_r(
4448 PerlIO_printf(Perl_debug_log,
40a82448 4449 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
5bc10b2c 4450 (int)(REPORT_CODE_OFF+(depth*2)), "",
40a82448 4451 (IV) ST.count, (IV)ST.alen)
0cadcf80
DM
4452 );
4453
40a82448 4454 locinput = PL_reginput;
0a4db386
YO
4455
4456 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4457 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4458 goto fake_end;
4459
4460 if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
40a82448
DM
4461 goto curlym_do_A; /* try to match another A */
4462 goto curlym_do_B; /* try to match B */
5f80c4cf 4463
40a82448
DM
4464 case CURLYM_A_fail: /* just failed to match an A */
4465 REGCP_UNWIND(ST.cp);
0a4db386
YO
4466
4467 if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4468 || (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4469 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
40a82448 4470 sayNO;
0cadcf80 4471
40a82448
DM
4472 curlym_do_B: /* execute the B in /A{m,n}B/ */
4473 PL_reginput = locinput;
4474 if (ST.c1 == CHRTEST_UNINIT) {
4475 /* calculate c1 and c2 for possible match of 1st char
4476 * following curly */
4477 ST.c1 = ST.c2 = CHRTEST_VOID;
4478 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4479 regnode *text_node = ST.B;
4480 if (! HAS_TEXT(text_node))
4481 FIND_NEXT_IMPT(text_node);
ee9b8eae
YO
4482 /* this used to be
4483
4484 (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4485
4486 But the former is redundant in light of the latter.
4487
4488 if this changes back then the macro for
4489 IS_TEXT and friends need to change.
4490 */
4491 if (PL_regkind[OP(text_node)] == EXACT)
40a82448 4492 {
ee9b8eae 4493
40a82448
DM
4494 ST.c1 = (U8)*STRING(text_node);
4495 ST.c2 =
ee9b8eae 4496 (IS_TEXTF(text_node))
40a82448 4497 ? PL_fold[ST.c1]
ee9b8eae 4498 : (IS_TEXTFL(text_node))
40a82448
DM
4499 ? PL_fold_locale[ST.c1]
4500 : ST.c1;
c277df42 4501 }
c277df42 4502 }
40a82448
DM
4503 }
4504
4505 DEBUG_EXECUTE_r(
4506 PerlIO_printf(Perl_debug_log,
4507 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
5bc10b2c 4508 (int)(REPORT_CODE_OFF+(depth*2)),
40a82448
DM
4509 "", (IV)ST.count)
4510 );
4511 if (ST.c1 != CHRTEST_VOID
4512 && UCHARAT(PL_reginput) != ST.c1
4513 && UCHARAT(PL_reginput) != ST.c2)
4514 {
4515 /* simulate B failing */
3e901dc0
YO
4516 DEBUG_OPTIMISE_r(
4517 PerlIO_printf(Perl_debug_log,
4518 "%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4519 (int)(REPORT_CODE_OFF+(depth*2)),"",
4520 (IV)ST.c1,(IV)ST.c2
4521 ));
40a82448
DM
4522 state_num = CURLYM_B_fail;
4523 goto reenter_switch;
4524 }
4525
4526 if (ST.me->flags) {
4527 /* mark current A as captured */
4528 I32 paren = ST.me->flags;
4529 if (ST.count) {
f0ab9afb 4530 PL_regoffs[paren].start
40a82448 4531 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
f0ab9afb 4532 PL_regoffs[paren].end = PL_reginput - PL_bostr;
0a4db386 4533 /*dmq: *PL_reglastcloseparen = paren; */
c277df42 4534 }
40a82448 4535 else
f0ab9afb 4536 PL_regoffs[paren].end = -1;
0a4db386 4537 if (cur_eval && cur_eval->u.eval.close_paren &&
24b23f37 4538 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
0a4db386
YO
4539 {
4540 if (ST.count)
4541 goto fake_end;
4542 else
4543 sayNO;
4544 }
c277df42 4545 }
0a4db386 4546
40a82448 4547 PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
5f66b61c 4548 /* NOTREACHED */
40a82448
DM
4549
4550 case CURLYM_B_fail: /* just failed to match a B */
4551 REGCP_UNWIND(ST.cp);
4552 if (ST.minmod) {
4553 if (ST.count == ARG2(ST.me) /* max */)
4554 sayNO;
4555 goto curlym_do_A; /* try to match a further A */
4556 }
4557 /* backtrack one A */
4558 if (ST.count == ARG1(ST.me) /* min */)
4559 sayNO;
4560 ST.count--;
4561 locinput = HOPc(locinput, -ST.alen);
4562 goto curlym_do_B; /* try to match B */
4563
c255a977
DM
4564#undef ST
4565#define ST st->u.curly
40a82448 4566
c255a977
DM
4567#define CURLY_SETPAREN(paren, success) \
4568 if (paren) { \
4569 if (success) { \
f0ab9afb
NC
4570 PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4571 PL_regoffs[paren].end = locinput - PL_bostr; \
0a4db386 4572 *PL_reglastcloseparen = paren; \
c255a977
DM
4573 } \
4574 else \
f0ab9afb 4575 PL_regoffs[paren].end = -1; \
c255a977
DM
4576 }
4577
4578 case STAR: /* /A*B/ where A is width 1 */
4579 ST.paren = 0;
4580 ST.min = 0;
4581 ST.max = REG_INFTY;
a0d0e21e
LW
4582 scan = NEXTOPER(scan);
4583 goto repeat;
c255a977
DM
4584 case PLUS: /* /A+B/ where A is width 1 */
4585 ST.paren = 0;
4586 ST.min = 1;
4587 ST.max = REG_INFTY;
c277df42 4588 scan = NEXTOPER(scan);
c255a977
DM
4589 goto repeat;
4590 case CURLYN: /* /(A){m,n}B/ where A is width 1 */
4591 ST.paren = scan->flags; /* Which paren to set */
4592 if (ST.paren > PL_regsize)
4593 PL_regsize = ST.paren;
3b6647e0 4594 if (ST.paren > *PL_reglastparen)
c255a977
DM
4595 *PL_reglastparen = ST.paren;
4596 ST.min = ARG1(scan); /* min to match */
4597 ST.max = ARG2(scan); /* max to match */
0a4db386 4598 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4599 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4600 ST.min=1;
4601 ST.max=1;
4602 }
c255a977
DM
4603 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4604 goto repeat;
4605 case CURLY: /* /A{m,n}B/ where A is width 1 */
4606 ST.paren = 0;
4607 ST.min = ARG1(scan); /* min to match */
4608 ST.max = ARG2(scan); /* max to match */
4609 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 4610 repeat:
a0d0e21e
LW
4611 /*
4612 * Lookahead to avoid useless match attempts
4613 * when we know what character comes next.
c255a977 4614 *
5f80c4cf
JP
4615 * Used to only do .*x and .*?x, but now it allows
4616 * for )'s, ('s and (?{ ... })'s to be in the way
4617 * of the quantifier and the EXACT-like node. -- japhy
4618 */
4619
c255a977
DM
4620 if (ST.min > ST.max) /* XXX make this a compile-time check? */
4621 sayNO;
cca55fe3 4622 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4623 U8 *s;
4624 regnode *text_node = next;
4625
3dab1dad
YO
4626 if (! HAS_TEXT(text_node))
4627 FIND_NEXT_IMPT(text_node);
5f80c4cf 4628
9e137952 4629 if (! HAS_TEXT(text_node))
c255a977 4630 ST.c1 = ST.c2 = CHRTEST_VOID;
5f80c4cf 4631 else {
ee9b8eae 4632 if ( PL_regkind[OP(text_node)] != EXACT ) {
c255a977 4633 ST.c1 = ST.c2 = CHRTEST_VOID;
44a68960 4634 goto assume_ok_easy;
cca55fe3 4635 }
be8e71aa
YO
4636 else
4637 s = (U8*)STRING(text_node);
ee9b8eae
YO
4638
4639 /* Currently we only get here when
4640
4641 PL_rekind[OP(text_node)] == EXACT
4642
4643 if this changes back then the macro for IS_TEXT and
4644 friends need to change. */
5f80c4cf 4645 if (!UTF) {
c255a977 4646 ST.c2 = ST.c1 = *s;
ee9b8eae 4647 if (IS_TEXTF(text_node))
c255a977 4648 ST.c2 = PL_fold[ST.c1];
ee9b8eae 4649 else if (IS_TEXTFL(text_node))
c255a977 4650 ST.c2 = PL_fold_locale[ST.c1];
1aa99e6b 4651 }
5f80c4cf 4652 else { /* UTF */
ee9b8eae 4653 if (IS_TEXTF(text_node)) {
a2a2844f 4654 STRLEN ulen1, ulen2;
89ebb4a3
JH
4655 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4656 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4657
4658 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4659 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
e294cc5d
JH
4660#ifdef EBCDIC
4661 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4662 ckWARN(WARN_UTF8) ?
4663 0 : UTF8_ALLOW_ANY);
4664 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4665 ckWARN(WARN_UTF8) ?
4666 0 : UTF8_ALLOW_ANY);
4667#else
c255a977 4668 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
e294cc5d 4669 uniflags);
c255a977 4670 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
e294cc5d
JH
4671 uniflags);
4672#endif
5f80c4cf
JP
4673 }
4674 else {
c255a977 4675 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4676 uniflags);
5f80c4cf 4677 }
1aa99e6b
IH
4678 }
4679 }
bbce6d69 4680 }
a0d0e21e 4681 else
c255a977 4682 ST.c1 = ST.c2 = CHRTEST_VOID;
cca55fe3 4683 assume_ok_easy:
c255a977
DM
4684
4685 ST.A = scan;
4686 ST.B = next;
3280af22 4687 PL_reginput = locinput;
24d3c4a9
DM
4688 if (minmod) {
4689 minmod = 0;
e2e6a0f1 4690 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4633a7c4 4691 sayNO;
c255a977 4692 ST.count = ST.min;
a0ed51b3 4693 locinput = PL_reginput;
c255a977
DM
4694 REGCP_SET(ST.cp);
4695 if (ST.c1 == CHRTEST_VOID)
4696 goto curly_try_B_min;
4697
4698 ST.oldloc = locinput;
4699
4700 /* set ST.maxpos to the furthest point along the
4701 * string that could possibly match */
4702 if (ST.max == REG_INFTY) {
4703 ST.maxpos = PL_regeol - 1;
4704 if (do_utf8)
4705 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4706 ST.maxpos--;
4707 }
4708 else if (do_utf8) {
4709 int m = ST.max - ST.min;
4710 for (ST.maxpos = locinput;
4711 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4712 ST.maxpos += UTF8SKIP(ST.maxpos);
4713 }
4714 else {
4715 ST.maxpos = locinput + ST.max - ST.min;
4716 if (ST.maxpos >= PL_regeol)
4717 ST.maxpos = PL_regeol - 1;
4718 }
4719 goto curly_try_B_min_known;
4720
4721 }
4722 else {
e2e6a0f1 4723 ST.count = regrepeat(rex, ST.A, ST.max, depth);
c255a977
DM
4724 locinput = PL_reginput;
4725 if (ST.count < ST.min)
4726 sayNO;
4727 if ((ST.count > ST.min)
4728 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4729 {
4730 /* A{m,n} must come at the end of the string, there's
4731 * no point in backing off ... */
4732 ST.min = ST.count;
4733 /* ...except that $ and \Z can match before *and* after
4734 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4735 We may back off by one in this case. */
4736 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4737 ST.min--;
4738 }
4739 REGCP_SET(ST.cp);
4740 goto curly_try_B_max;
4741 }
4742 /* NOTREACHED */
4743
4744
4745 case CURLY_B_min_known_fail:
4746 /* failed to find B in a non-greedy match where c1,c2 valid */
4747 if (ST.paren && ST.count)
f0ab9afb 4748 PL_regoffs[ST.paren].end = -1;
c255a977
DM
4749
4750 PL_reginput = locinput; /* Could be reset... */
4751 REGCP_UNWIND(ST.cp);
4752 /* Couldn't or didn't -- move forward. */
4753 ST.oldloc = locinput;
4754 if (do_utf8)
4755 locinput += UTF8SKIP(locinput);
4756 else
4757 locinput++;
4758 ST.count++;
4759 curly_try_B_min_known:
4760 /* find the next place where 'B' could work, then call B */
4761 {
4762 int n;
4763 if (do_utf8) {
4764 n = (ST.oldloc == locinput) ? 0 : 1;
4765 if (ST.c1 == ST.c2) {
4766 STRLEN len;
4767 /* set n to utf8_distance(oldloc, locinput) */
4768 while (locinput <= ST.maxpos &&
4769 utf8n_to_uvchr((U8*)locinput,
4770 UTF8_MAXBYTES, &len,
4771 uniflags) != (UV)ST.c1) {
4772 locinput += len;
4773 n++;
4774 }
1aa99e6b
IH
4775 }
4776 else {
c255a977
DM
4777 /* set n to utf8_distance(oldloc, locinput) */
4778 while (locinput <= ST.maxpos) {
4779 STRLEN len;
4780 const UV c = utf8n_to_uvchr((U8*)locinput,
4781 UTF8_MAXBYTES, &len,
4782 uniflags);
4783 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4784 break;
4785 locinput += len;
4786 n++;
1aa99e6b 4787 }
0fe9bf95
IZ
4788 }
4789 }
c255a977
DM
4790 else {
4791 if (ST.c1 == ST.c2) {
4792 while (locinput <= ST.maxpos &&
4793 UCHARAT(locinput) != ST.c1)
4794 locinput++;
bbce6d69 4795 }
c255a977
DM
4796 else {
4797 while (locinput <= ST.maxpos
4798 && UCHARAT(locinput) != ST.c1
4799 && UCHARAT(locinput) != ST.c2)
4800 locinput++;
a0ed51b3 4801 }
c255a977
DM
4802 n = locinput - ST.oldloc;
4803 }
4804 if (locinput > ST.maxpos)
4805 sayNO;
4806 /* PL_reginput == oldloc now */
4807 if (n) {
4808 ST.count += n;
e2e6a0f1 4809 if (regrepeat(rex, ST.A, n, depth) < n)
4633a7c4 4810 sayNO;
a0d0e21e 4811 }
c255a977
DM
4812 PL_reginput = locinput;
4813 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4814 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4815 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4816 goto fake_end;
4817 }
c255a977 4818 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
a0d0e21e 4819 }
c255a977
DM
4820 /* NOTREACHED */
4821
4822
4823 case CURLY_B_min_fail:
4824 /* failed to find B in a non-greedy match where c1,c2 invalid */
4825 if (ST.paren && ST.count)
f0ab9afb 4826 PL_regoffs[ST.paren].end = -1;
c255a977
DM
4827
4828 REGCP_UNWIND(ST.cp);
4829 /* failed -- move forward one */
4830 PL_reginput = locinput;
e2e6a0f1 4831 if (regrepeat(rex, ST.A, 1, depth)) {
c255a977 4832 ST.count++;
a0ed51b3 4833 locinput = PL_reginput;
c255a977
DM
4834 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4835 ST.count > 0)) /* count overflow ? */
15272685 4836 {
c255a977
DM
4837 curly_try_B_min:
4838 CURLY_SETPAREN(ST.paren, ST.count);
0a4db386 4839 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4840 cur_eval->u.eval.close_paren == (U32)ST.paren) {
0a4db386
YO
4841 goto fake_end;
4842 }
c255a977 4843 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
a0d0e21e
LW
4844 }
4845 }
4633a7c4 4846 sayNO;
c255a977
DM
4847 /* NOTREACHED */
4848
4849
4850 curly_try_B_max:
4851 /* a successful greedy match: now try to match B */
40d049e4 4852 if (cur_eval && cur_eval->u.eval.close_paren &&
86413ec0 4853 cur_eval->u.eval.close_paren == (U32)ST.paren) {
40d049e4
YO
4854 goto fake_end;
4855 }
c255a977
DM
4856 {
4857 UV c = 0;
4858 if (ST.c1 != CHRTEST_VOID)
4859 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4860 UTF8_MAXBYTES, 0, uniflags)
466787eb 4861 : (UV) UCHARAT(PL_reginput);
c255a977
DM
4862 /* If it could work, try it. */
4863 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4864 CURLY_SETPAREN(ST.paren, ST.count);
4865 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4866 /* NOTREACHED */
4867 }
4868 }
4869 /* FALL THROUGH */
4870 case CURLY_B_max_fail:
4871 /* failed to find B in a greedy match */
4872 if (ST.paren && ST.count)
f0ab9afb 4873 PL_regoffs[ST.paren].end = -1;
c255a977
DM
4874
4875 REGCP_UNWIND(ST.cp);
4876 /* back up. */
4877 if (--ST.count < ST.min)
4878 sayNO;
4879 PL_reginput = locinput = HOPc(locinput, -1);
4880 goto curly_try_B_max;
4881
4882#undef ST
4883
a0d0e21e 4884 case END:
6bda09f9 4885 fake_end:
faec1544
DM
4886 if (cur_eval) {
4887 /* we've just finished A in /(??{A})B/; now continue with B */
4888 I32 tmpix;
faec1544
DM
4889 st->u.eval.toggle_reg_flags
4890 = cur_eval->u.eval.toggle_reg_flags;
4891 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4892
288b8c02
NC
4893 st->u.eval.prev_rex = rex_sv; /* inner */
4894 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4895 rex = (struct regexp *)SvANY(rex_sv);
f8fc2ecf 4896 rexi = RXi_GET(rex);
faec1544 4897 cur_curlyx = cur_eval->u.eval.prev_curlyx;
288b8c02 4898 ReREFCNT_inc(rex_sv);
faec1544
DM
4899 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4900 REGCP_SET(st->u.eval.lastcp);
4901 PL_reginput = locinput;
4902
4903 /* Restore parens of the outer rex without popping the
4904 * savestack */
4905 tmpix = PL_savestack_ix;
4906 PL_savestack_ix = cur_eval->u.eval.lastcp;
4907 regcppop(rex);
4908 PL_savestack_ix = tmpix;
4909
4910 st->u.eval.prev_eval = cur_eval;
4911 cur_eval = cur_eval->u.eval.prev_eval;
4912 DEBUG_EXECUTE_r(
2a49f0f5
JH
4913 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4914 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
e7707071
YO
4915 if ( nochange_depth )
4916 nochange_depth--;
4917
4918 PUSH_YES_STATE_GOTO(EVAL_AB,
faec1544
DM
4919 st->u.eval.prev_eval->u.eval.B); /* match B */
4920 }
4921
3b0527fe 4922 if (locinput < reginfo->till) {
a3621e74 4923 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4924 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4925 PL_colors[4],
4926 (long)(locinput - PL_reg_starttry),
3b0527fe 4927 (long)(reginfo->till - PL_reg_starttry),
7821416a 4928 PL_colors[5]));
58e23c8d 4929
262b90c4 4930 sayNO_SILENT; /* Cannot match: too short. */
7821416a
IZ
4931 }
4932 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4933 sayYES; /* Success! */
dad79028
DM
4934
4935 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4936 DEBUG_EXECUTE_r(
4937 PerlIO_printf(Perl_debug_log,
4938 "%*s %ssubpattern success...%s\n",
5bc10b2c 4939 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
3280af22 4940 PL_reginput = locinput; /* put where regtry can find it */
262b90c4 4941 sayYES; /* Success! */
dad79028 4942
40a82448
DM
4943#undef ST
4944#define ST st->u.ifmatch
4945
4946 case SUSPEND: /* (?>A) */
4947 ST.wanted = 1;
9fe1d20c 4948 PL_reginput = locinput;
9041c2e3 4949 goto do_ifmatch;
dad79028 4950
40a82448
DM
4951 case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4952 ST.wanted = 0;
dad79028
DM
4953 goto ifmatch_trivial_fail_test;
4954
40a82448
DM
4955 case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4956 ST.wanted = 1;
dad79028 4957 ifmatch_trivial_fail_test:
a0ed51b3 4958 if (scan->flags) {
52657f30 4959 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4960 if (!s) {
4961 /* trivial fail */
24d3c4a9
DM
4962 if (logical) {
4963 logical = 0;
4964 sw = 1 - (bool)ST.wanted;
dad79028 4965 }
40a82448 4966 else if (ST.wanted)
dad79028
DM
4967 sayNO;
4968 next = scan + ARG(scan);
4969 if (next == scan)
4970 next = NULL;
4971 break;
4972 }
efb30f32 4973 PL_reginput = s;
a0ed51b3
LW
4974 }
4975 else
4976 PL_reginput = locinput;
4977
c277df42 4978 do_ifmatch:
40a82448 4979 ST.me = scan;
24d3c4a9 4980 ST.logical = logical;
40a82448
DM
4981 /* execute body of (?...A) */
4982 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4983 /* NOTREACHED */
4984
4985 case IFMATCH_A_fail: /* body of (?...A) failed */
4986 ST.wanted = !ST.wanted;
4987 /* FALL THROUGH */
4988
4989 case IFMATCH_A: /* body of (?...A) succeeded */
24d3c4a9
DM
4990 if (ST.logical) {
4991 sw = (bool)ST.wanted;
40a82448
DM
4992 }
4993 else if (!ST.wanted)
4994 sayNO;
4995
4996 if (OP(ST.me) == SUSPEND)
4997 locinput = PL_reginput;
4998 else {
4999 locinput = PL_reginput = st->locinput;
5000 nextchr = UCHARAT(locinput);
5001 }
5002 scan = ST.me + ARG(ST.me);
5003 if (scan == ST.me)
5004 scan = NULL;
5005 continue; /* execute B */
5006
5007#undef ST
dad79028 5008
c277df42 5009 case LONGJMP:
c277df42
IZ
5010 next = scan + ARG(scan);
5011 if (next == scan)
5012 next = NULL;
a0d0e21e 5013 break;
54612592 5014 case COMMIT:
e2e6a0f1
YO
5015 reginfo->cutpoint = PL_regeol;
5016 /* FALLTHROUGH */
5d458dd8 5017 case PRUNE:
24b23f37 5018 PL_reginput = locinput;
e2e6a0f1 5019 if (!scan->flags)
f8fc2ecf 5020 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
54612592
YO
5021 PUSH_STATE_GOTO(COMMIT_next,next);
5022 /* NOTREACHED */
5023 case COMMIT_next_fail:
5024 no_final = 1;
5025 /* FALLTHROUGH */
7f69552c
YO
5026 case OPFAIL:
5027 sayNO;
e2e6a0f1
YO
5028 /* NOTREACHED */
5029
5030#define ST st->u.mark
5031 case MARKPOINT:
5032 ST.prev_mark = mark_state;
5d458dd8 5033 ST.mark_name = sv_commit = sv_yes_mark
f8fc2ecf 5034 = (SV*)rexi->data->data[ ARG( scan ) ];
e2e6a0f1
YO
5035 mark_state = st;
5036 ST.mark_loc = PL_reginput = locinput;
5037 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5038 /* NOTREACHED */
5039 case MARKPOINT_next:
5040 mark_state = ST.prev_mark;
5041 sayYES;
5042 /* NOTREACHED */
5043 case MARKPOINT_next_fail:
5d458dd8 5044 if (popmark && sv_eq(ST.mark_name,popmark))
e2e6a0f1
YO
5045 {
5046 if (ST.mark_loc > startpoint)
5047 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5048 popmark = NULL; /* we found our mark */
5049 sv_commit = ST.mark_name;
5050
5051 DEBUG_EXECUTE_r({
5d458dd8 5052 PerlIO_printf(Perl_debug_log,
e2e6a0f1
YO
5053 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
5054 REPORT_CODE_OFF+depth*2, "",
be2597df 5055 PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
e2e6a0f1
YO
5056 });
5057 }
5058 mark_state = ST.prev_mark;
5d458dd8
YO
5059 sv_yes_mark = mark_state ?
5060 mark_state->u.mark.mark_name : NULL;
e2e6a0f1
YO
5061 sayNO;
5062 /* NOTREACHED */
5d458dd8
YO
5063 case SKIP:
5064 PL_reginput = locinput;
5065 if (scan->flags) {
2bf803e2 5066 /* (*SKIP) : if we fail we cut here*/
5d458dd8 5067 ST.mark_name = NULL;
e2e6a0f1 5068 ST.mark_loc = locinput;
5d458dd8
YO
5069 PUSH_STATE_GOTO(SKIP_next,next);
5070 } else {
2bf803e2 5071 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5d458dd8
YO
5072 otherwise do nothing. Meaning we need to scan
5073 */
5074 regmatch_state *cur = mark_state;
f8fc2ecf 5075 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5d458dd8
YO
5076
5077 while (cur) {
5078 if ( sv_eq( cur->u.mark.mark_name,
5079 find ) )
5080 {
5081 ST.mark_name = find;
5082 PUSH_STATE_GOTO( SKIP_next, next );
5083 }
5084 cur = cur->u.mark.prev_mark;
5085 }
e2e6a0f1 5086 }
2bf803e2 5087 /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5d458dd8
YO
5088 break;
5089 case SKIP_next_fail:
5090 if (ST.mark_name) {
5091 /* (*CUT:NAME) - Set up to search for the name as we
5092 collapse the stack*/
5093 popmark = ST.mark_name;
5094 } else {
5095 /* (*CUT) - No name, we cut here.*/
e2e6a0f1
YO
5096 if (ST.mark_loc > startpoint)
5097 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5d458dd8
YO
5098 /* but we set sv_commit to latest mark_name if there
5099 is one so they can test to see how things lead to this
5100 cut */
5101 if (mark_state)
5102 sv_commit=mark_state->u.mark.mark_name;
5103 }
e2e6a0f1
YO
5104 no_final = 1;
5105 sayNO;
5106 /* NOTREACHED */
5107#undef ST
32e6a07c
YO
5108 case FOLDCHAR:
5109 n = ARG(scan);
81d4fa0f 5110 if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
e64b1bd1
YO
5111 locinput += ln;
5112 } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5113 sayNO;
5114 } else {
5115 U8 folded[UTF8_MAXBYTES_CASE+1];
5116 STRLEN foldlen;
5117 const char * const l = locinput;
5118 char *e = PL_regeol;
5119 to_uni_fold(n, folded, &foldlen);
5120
59fe32ea 5121 if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
e64b1bd1 5122 l, &e, 0, do_utf8)) {
32e6a07c 5123 sayNO;
e64b1bd1
YO
5124 }
5125 locinput = e;
32e6a07c
YO
5126 }
5127 nextchr = UCHARAT(locinput);
5128 break;
e1d1eefb
YO
5129 case LNBREAK:
5130 if ((n=is_LNBREAK(locinput,do_utf8))) {
5131 locinput += n;
5132 nextchr = UCHARAT(locinput);
5133 } else
5134 sayNO;
5135 break;
5136
5137#define CASE_CLASS(nAmE) \
5138 case nAmE: \
5139 if ((n=is_##nAmE(locinput,do_utf8))) { \
5140 locinput += n; \
5141 nextchr = UCHARAT(locinput); \
5142 } else \
5143 sayNO; \
5144 break; \
5145 case N##nAmE: \
5146 if ((n=is_##nAmE(locinput,do_utf8))) { \
5147 sayNO; \
5148 } else { \
5149 locinput += UTF8SKIP(locinput); \
5150 nextchr = UCHARAT(locinput); \
5151 } \
5152 break
5153
5154 CASE_CLASS(VERTWS);
5155 CASE_CLASS(HORIZWS);
5156#undef CASE_CLASS
5157
a0d0e21e 5158 default:
b900a521 5159 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 5160 PTR2UV(scan), OP(scan));
cea2e8a9 5161 Perl_croak(aTHX_ "regexp memory corruption");
5d458dd8
YO
5162
5163 } /* end switch */
95b24440 5164
5d458dd8
YO
5165 /* switch break jumps here */
5166 scan = next; /* prepare to execute the next op and ... */
5167 continue; /* ... jump back to the top, reusing st */
95b24440
DM
5168 /* NOTREACHED */
5169
40a82448
DM
5170 push_yes_state:
5171 /* push a state that backtracks on success */
5172 st->u.yes.prev_yes_state = yes_state;
5173 yes_state = st;
5174 /* FALL THROUGH */
5175 push_state:
5176 /* push a new regex state, then continue at scan */
5177 {
5178 regmatch_state *newst;
5179
24b23f37
YO
5180 DEBUG_STACK_r({
5181 regmatch_state *cur = st;
5182 regmatch_state *curyes = yes_state;
5183 int curd = depth;
5184 regmatch_slab *slab = PL_regmatch_slab;
5185 for (;curd > -1;cur--,curd--) {
5186 if (cur < SLAB_FIRST(slab)) {
5187 slab = slab->prev;
5188 cur = SLAB_LAST(slab);
5189 }
5190 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5191 REPORT_CODE_OFF + 2 + depth * 2,"",
13d6edb4 5192 curd, PL_reg_name[cur->resume_state],
24b23f37
YO
5193 (curyes == cur) ? "yes" : ""
5194 );
5195 if (curyes == cur)
5196 curyes = cur->u.yes.prev_yes_state;
5197 }
5198 } else
5199 DEBUG_STATE_pp("push")
5200 );
40a82448 5201 depth++;
40a82448
DM
5202 st->locinput = locinput;
5203 newst = st+1;
5204 if (newst > SLAB_LAST(PL_regmatch_slab))
5205 newst = S_push_slab(aTHX);
5206 PL_regmatch_state = newst;
786e8c11 5207
40a82448
DM
5208 locinput = PL_reginput;
5209 nextchr = UCHARAT(locinput);
5210 st = newst;
5211 continue;
5212 /* NOTREACHED */
5213 }
a0d0e21e 5214 }
a687059c 5215
a0d0e21e
LW
5216 /*
5217 * We get here only if there's trouble -- normally "case END" is
5218 * the terminating point.
5219 */
cea2e8a9 5220 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 5221 /*NOTREACHED*/
4633a7c4
LW
5222 sayNO;
5223
262b90c4 5224yes:
77cb431f
DM
5225 if (yes_state) {
5226 /* we have successfully completed a subexpression, but we must now
5227 * pop to the state marked by yes_state and continue from there */
77cb431f 5228 assert(st != yes_state);
5bc10b2c
DM
5229#ifdef DEBUGGING
5230 while (st != yes_state) {
5231 st--;
5232 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5233 PL_regmatch_slab = PL_regmatch_slab->prev;
5234 st = SLAB_LAST(PL_regmatch_slab);
5235 }
e2e6a0f1 5236 DEBUG_STATE_r({
54612592
YO
5237 if (no_final) {
5238 DEBUG_STATE_pp("pop (no final)");
5239 } else {
5240 DEBUG_STATE_pp("pop (yes)");
5241 }
e2e6a0f1 5242 });
5bc10b2c
DM
5243 depth--;
5244 }
5245#else
77cb431f
DM
5246 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5247 || yes_state > SLAB_LAST(PL_regmatch_slab))
5248 {
5249 /* not in this slab, pop slab */
5250 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5251 PL_regmatch_slab = PL_regmatch_slab->prev;
5252 st = SLAB_LAST(PL_regmatch_slab);
5253 }
5254 depth -= (st - yes_state);
5bc10b2c 5255#endif
77cb431f
DM
5256 st = yes_state;
5257 yes_state = st->u.yes.prev_yes_state;
5258 PL_regmatch_state = st;
24b23f37 5259
5d458dd8
YO
5260 if (no_final) {
5261 locinput= st->locinput;
5262 nextchr = UCHARAT(locinput);
5263 }
54612592 5264 state_num = st->resume_state + no_final;
24d3c4a9 5265 goto reenter_switch;
77cb431f
DM
5266 }
5267
a3621e74 5268 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 5269 PL_colors[4], PL_colors[5]));
02db2b7b 5270
19b95bf0
DM
5271 if (PL_reg_eval_set) {
5272 /* each successfully executed (?{...}) block does the equivalent of
5273 * local $^R = do {...}
5274 * When popping the save stack, all these locals would be undone;
5275 * bypass this by setting the outermost saved $^R to the latest
5276 * value */
5277 if (oreplsv != GvSV(PL_replgv))
5278 sv_setsv(oreplsv, GvSV(PL_replgv));
5279 }
95b24440 5280 result = 1;
aa283a38 5281 goto final_exit;
4633a7c4
LW
5282
5283no:
a3621e74 5284 DEBUG_EXECUTE_r(
7821416a 5285 PerlIO_printf(Perl_debug_log,
786e8c11 5286 "%*s %sfailed...%s\n",
5bc10b2c 5287 REPORT_CODE_OFF+depth*2, "",
786e8c11 5288 PL_colors[4], PL_colors[5])
7821416a 5289 );
aa283a38 5290
262b90c4 5291no_silent:
54612592
YO
5292 if (no_final) {
5293 if (yes_state) {
5294 goto yes;
5295 } else {
5296 goto final_exit;
5297 }
5298 }
aa283a38
DM
5299 if (depth) {
5300 /* there's a previous state to backtrack to */
40a82448
DM
5301 st--;
5302 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5303 PL_regmatch_slab = PL_regmatch_slab->prev;
5304 st = SLAB_LAST(PL_regmatch_slab);
5305 }
5306 PL_regmatch_state = st;
40a82448
DM
5307 locinput= st->locinput;
5308 nextchr = UCHARAT(locinput);
5309
5bc10b2c
DM
5310 DEBUG_STATE_pp("pop");
5311 depth--;
262b90c4
DM
5312 if (yes_state == st)
5313 yes_state = st->u.yes.prev_yes_state;
5bc10b2c 5314
24d3c4a9
DM
5315 state_num = st->resume_state + 1; /* failure = success + 1 */
5316 goto reenter_switch;
95b24440 5317 }
24d3c4a9 5318 result = 0;
aa283a38 5319
262b90c4 5320 final_exit:
bbe252da 5321 if (rex->intflags & PREGf_VERBARG_SEEN) {
5d458dd8
YO
5322 SV *sv_err = get_sv("REGERROR", 1);
5323 SV *sv_mrk = get_sv("REGMARK", 1);
5324 if (result) {
e2e6a0f1 5325 sv_commit = &PL_sv_no;
5d458dd8
YO
5326 if (!sv_yes_mark)
5327 sv_yes_mark = &PL_sv_yes;
5328 } else {
5329 if (!sv_commit)
5330 sv_commit = &PL_sv_yes;
5331 sv_yes_mark = &PL_sv_no;
5332 }
5333 sv_setsv(sv_err, sv_commit);
5334 sv_setsv(sv_mrk, sv_yes_mark);
e2e6a0f1 5335 }
19b95bf0 5336
2f554ef7
DM
5337 /* clean up; in particular, free all slabs above current one */
5338 LEAVE_SCOPE(oldsave);
5d9a96ca 5339
95b24440 5340 return result;
a687059c
LW
5341}
5342
5343/*
5344 - regrepeat - repeatedly match something simple, report how many
5345 */
5346/*
5347 * [This routine now assumes that it will only match on things of length 1.
5348 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 5349 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 5350 */
76e3520e 5351STATIC I32
e2e6a0f1 5352S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
a687059c 5353{
27da23d5 5354 dVAR;
a0d0e21e 5355 register char *scan;
a0d0e21e 5356 register I32 c;
3280af22 5357 register char *loceol = PL_regeol;
a0ed51b3 5358 register I32 hardcount = 0;
53c4c00c 5359 register bool do_utf8 = PL_reg_match_utf8;
4f55667c
SP
5360#ifndef DEBUGGING
5361 PERL_UNUSED_ARG(depth);
5362#endif
a0d0e21e 5363
7918f24d
NC
5364 PERL_ARGS_ASSERT_REGREPEAT;
5365
3280af22 5366 scan = PL_reginput;
faf11cac
HS
5367 if (max == REG_INFTY)
5368 max = I32_MAX;
5369 else if (max < loceol - scan)
7f596f4c 5370 loceol = scan + max;
a0d0e21e 5371 switch (OP(p)) {
22c35a8c 5372 case REG_ANY:
1aa99e6b 5373 if (do_utf8) {
ffc61ed2 5374 loceol = PL_regeol;
1aa99e6b 5375 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
5376 scan += UTF8SKIP(scan);
5377 hardcount++;
5378 }
5379 } else {
5380 while (scan < loceol && *scan != '\n')
5381 scan++;
a0ed51b3
LW
5382 }
5383 break;
ffc61ed2 5384 case SANY:
def8e4ea
JH
5385 if (do_utf8) {
5386 loceol = PL_regeol;
a0804c9e 5387 while (scan < loceol && hardcount < max) {
def8e4ea
JH
5388 scan += UTF8SKIP(scan);
5389 hardcount++;
5390 }
5391 }
5392 else
5393 scan = loceol;
a0ed51b3 5394 break;
f33976b4
DB
5395 case CANY:
5396 scan = loceol;
5397 break;
090f7165
JH
5398 case EXACT: /* length of string is 1 */
5399 c = (U8)*STRING(p);
5400 while (scan < loceol && UCHARAT(scan) == c)
5401 scan++;
bbce6d69 5402 break;
5403 case EXACTF: /* length of string is 1 */
cd439c50 5404 c = (U8)*STRING(p);
bbce6d69 5405 while (scan < loceol &&
22c35a8c 5406 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 5407 scan++;
5408 break;
5409 case EXACTFL: /* length of string is 1 */
3280af22 5410 PL_reg_flags |= RF_tainted;
cd439c50 5411 c = (U8)*STRING(p);
bbce6d69 5412 while (scan < loceol &&
22c35a8c 5413 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
5414 scan++;
5415 break;
5416 case ANYOF:
ffc61ed2
JH
5417 if (do_utf8) {
5418 loceol = PL_regeol;
cfc92286 5419 while (hardcount < max && scan < loceol &&
32fc9b6a 5420 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
5421 scan += UTF8SKIP(scan);
5422 hardcount++;
5423 }
5424 } else {
32fc9b6a 5425 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
5426 scan++;
5427 }
a0d0e21e
LW
5428 break;
5429 case ALNUM:
1aa99e6b 5430 if (do_utf8) {
ffc61ed2 5431 loceol = PL_regeol;
1a4fad37 5432 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5433 while (hardcount < max && scan < loceol &&
3568d838 5434 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5435 scan += UTF8SKIP(scan);
5436 hardcount++;
5437 }
5438 } else {
5439 while (scan < loceol && isALNUM(*scan))
5440 scan++;
a0ed51b3
LW
5441 }
5442 break;
bbce6d69 5443 case ALNUML:
3280af22 5444 PL_reg_flags |= RF_tainted;
1aa99e6b 5445 if (do_utf8) {
ffc61ed2 5446 loceol = PL_regeol;
1aa99e6b
IH
5447 while (hardcount < max && scan < loceol &&
5448 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5449 scan += UTF8SKIP(scan);
5450 hardcount++;
5451 }
5452 } else {
5453 while (scan < loceol && isALNUM_LC(*scan))
5454 scan++;
a0ed51b3
LW
5455 }
5456 break;
a0d0e21e 5457 case NALNUM:
1aa99e6b 5458 if (do_utf8) {
ffc61ed2 5459 loceol = PL_regeol;
1a4fad37 5460 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 5461 while (hardcount < max && scan < loceol &&
3568d838 5462 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5463 scan += UTF8SKIP(scan);
5464 hardcount++;
5465 }
5466 } else {
5467 while (scan < loceol && !isALNUM(*scan))
5468 scan++;
a0ed51b3
LW
5469 }
5470 break;
bbce6d69 5471 case NALNUML:
3280af22 5472 PL_reg_flags |= RF_tainted;
1aa99e6b 5473 if (do_utf8) {
ffc61ed2 5474 loceol = PL_regeol;
1aa99e6b
IH
5475 while (hardcount < max && scan < loceol &&
5476 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
5477 scan += UTF8SKIP(scan);
5478 hardcount++;
5479 }
5480 } else {
5481 while (scan < loceol && !isALNUM_LC(*scan))
5482 scan++;
a0ed51b3
LW
5483 }
5484 break;
a0d0e21e 5485 case SPACE:
1aa99e6b 5486 if (do_utf8) {
ffc61ed2 5487 loceol = PL_regeol;
1a4fad37 5488 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5489 while (hardcount < max && scan < loceol &&
3568d838
JH
5490 (*scan == ' ' ||
5491 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5492 scan += UTF8SKIP(scan);
5493 hardcount++;
5494 }
5495 } else {
5496 while (scan < loceol && isSPACE(*scan))
5497 scan++;
a0ed51b3
LW
5498 }
5499 break;
bbce6d69 5500 case SPACEL:
3280af22 5501 PL_reg_flags |= RF_tainted;
1aa99e6b 5502 if (do_utf8) {
ffc61ed2 5503 loceol = PL_regeol;
1aa99e6b 5504 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5505 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5506 scan += UTF8SKIP(scan);
5507 hardcount++;
5508 }
5509 } else {
5510 while (scan < loceol && isSPACE_LC(*scan))
5511 scan++;
a0ed51b3
LW
5512 }
5513 break;
a0d0e21e 5514 case NSPACE:
1aa99e6b 5515 if (do_utf8) {
ffc61ed2 5516 loceol = PL_regeol;
1a4fad37 5517 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 5518 while (hardcount < max && scan < loceol &&
3568d838
JH
5519 !(*scan == ' ' ||
5520 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
5521 scan += UTF8SKIP(scan);
5522 hardcount++;
5523 }
5524 } else {
5525 while (scan < loceol && !isSPACE(*scan))
5526 scan++;
a0ed51b3 5527 }
0008a298 5528 break;
bbce6d69 5529 case NSPACEL:
3280af22 5530 PL_reg_flags |= RF_tainted;
1aa99e6b 5531 if (do_utf8) {
ffc61ed2 5532 loceol = PL_regeol;
1aa99e6b 5533 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
5534 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5535 scan += UTF8SKIP(scan);
5536 hardcount++;
5537 }
5538 } else {
5539 while (scan < loceol && !isSPACE_LC(*scan))
5540 scan++;
a0ed51b3
LW
5541 }
5542 break;
a0d0e21e 5543 case DIGIT:
1aa99e6b 5544 if (do_utf8) {
ffc61ed2 5545 loceol = PL_regeol;
1a4fad37 5546 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5547 while (hardcount < max && scan < loceol &&
3568d838 5548 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5549 scan += UTF8SKIP(scan);
5550 hardcount++;
5551 }
5552 } else {
5553 while (scan < loceol && isDIGIT(*scan))
5554 scan++;
a0ed51b3
LW
5555 }
5556 break;
a0d0e21e 5557 case NDIGIT:
1aa99e6b 5558 if (do_utf8) {
ffc61ed2 5559 loceol = PL_regeol;
1a4fad37 5560 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 5561 while (hardcount < max && scan < loceol &&
3568d838 5562 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
5563 scan += UTF8SKIP(scan);
5564 hardcount++;
5565 }
5566 } else {
5567 while (scan < loceol && !isDIGIT(*scan))
5568 scan++;
a0ed51b3 5569 }
e1d1eefb
YO
5570 case LNBREAK:
5571 if (do_utf8) {
5572 loceol = PL_regeol;
5573 while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5574 scan += c;
5575 hardcount++;
5576 }
5577 } else {
5578 /*
5579 LNBREAK can match two latin chars, which is ok,
5580 because we have a null terminated string, but we
5581 have to use hardcount in this situation
5582 */
5583 while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
5584 scan+=c;
5585 hardcount++;
5586 }
5587 }
5588 break;
5589 case HORIZWS:
5590 if (do_utf8) {
5591 loceol = PL_regeol;
5592 while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5593 scan += c;
5594 hardcount++;
5595 }
5596 } else {
5597 while (scan < loceol && is_HORIZWS_latin1(scan))
5598 scan++;
5599 }
a0ed51b3 5600 break;
e1d1eefb
YO
5601 case NHORIZWS:
5602 if (do_utf8) {
5603 loceol = PL_regeol;
5604 while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5605 scan += UTF8SKIP(scan);
5606 hardcount++;
5607 }
5608 } else {
5609 while (scan < loceol && !is_HORIZWS_latin1(scan))
5610 scan++;
5611
5612 }
5613 break;
5614 case VERTWS:
5615 if (do_utf8) {
5616 loceol = PL_regeol;
5617 while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5618 scan += c;
5619 hardcount++;
5620 }
5621 } else {
5622 while (scan < loceol && is_VERTWS_latin1(scan))
5623 scan++;
5624
5625 }
5626 break;
5627 case NVERTWS:
5628 if (do_utf8) {
5629 loceol = PL_regeol;
5630 while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5631 scan += UTF8SKIP(scan);
5632 hardcount++;
5633 }
5634 } else {
5635 while (scan < loceol && !is_VERTWS_latin1(scan))
5636 scan++;
5637
5638 }
5639 break;
5640
a0d0e21e
LW
5641 default: /* Called on something of 0 width. */
5642 break; /* So match right here or not at all. */
5643 }
a687059c 5644
a0ed51b3
LW
5645 if (hardcount)
5646 c = hardcount;
5647 else
5648 c = scan - PL_reginput;
3280af22 5649 PL_reginput = scan;
a687059c 5650
a3621e74 5651 DEBUG_r({
e68ec53f 5652 GET_RE_DEBUG_FLAGS_DECL;
be8e71aa 5653 DEBUG_EXECUTE_r({
e68ec53f
YO
5654 SV * const prop = sv_newmortal();
5655 regprop(prog, prop, p);
5656 PerlIO_printf(Perl_debug_log,
be8e71aa 5657 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
e2e6a0f1 5658 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
a3621e74 5659 });
be8e71aa 5660 });
9041c2e3 5661
a0d0e21e 5662 return(c);
a687059c
LW
5663}
5664
c277df42 5665
be8e71aa 5666#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
c277df42 5667/*
ffc61ed2
JH
5668- regclass_swash - prepare the utf8 swash
5669*/
5670
5671SV *
32fc9b6a 5672Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 5673{
97aff369 5674 dVAR;
9e55ce06
JH
5675 SV *sw = NULL;
5676 SV *si = NULL;
5677 SV *alt = NULL;
f8fc2ecf
YO
5678 RXi_GET_DECL(prog,progi);
5679 const struct reg_data * const data = prog ? progi->data : NULL;
ffc61ed2 5680
7918f24d
NC
5681 PERL_ARGS_ASSERT_REGCLASS_SWASH;
5682
4f639d21 5683 if (data && data->count) {
a3b680e6 5684 const U32 n = ARG(node);
ffc61ed2 5685
4f639d21
DM
5686 if (data->what[n] == 's') {
5687 SV * const rv = (SV*)data->data[n];
890ce7af 5688 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 5689 SV **const ary = AvARRAY(av);
9e55ce06 5690 SV **a, **b;
9041c2e3 5691
711a919c 5692 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
5693 * documentation of these array elements. */
5694
b11f357e 5695 si = *ary;
fe5bfecd
JH
5696 a = SvROK(ary[1]) ? &ary[1] : NULL;
5697 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
b11f357e 5698
ffc61ed2
JH
5699 if (a)
5700 sw = *a;
5701 else if (si && doinit) {
5702 sw = swash_init("utf8", "", si, 1, 0);
5703 (void)av_store(av, 1, sw);
5704 }
9e55ce06
JH
5705 if (b)
5706 alt = *b;
ffc61ed2
JH
5707 }
5708 }
5709
9e55ce06
JH
5710 if (listsvp)
5711 *listsvp = si;
5712 if (altsvp)
5713 *altsvp = alt;
ffc61ed2
JH
5714
5715 return sw;
5716}
76234dfb 5717#endif
ffc61ed2
JH
5718
5719/*
ba7b4546 5720 - reginclass - determine if a character falls into a character class
832705d4
JH
5721
5722 The n is the ANYOF regnode, the p is the target string, lenp
5723 is pointer to the maximum length of how far to go in the p
5724 (if the lenp is zero, UTF8SKIP(p) is used),
5725 do_utf8 tells whether the target string is in UTF-8.
5726
bbce6d69 5727 */
5728
76e3520e 5729STATIC bool
32fc9b6a 5730S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 5731{
27da23d5 5732 dVAR;
a3b680e6 5733 const char flags = ANYOF_FLAGS(n);
bbce6d69 5734 bool match = FALSE;
cc07378b 5735 UV c = *p;
ae9ddab8 5736 STRLEN len = 0;
9e55ce06 5737 STRLEN plen;
1aa99e6b 5738
7918f24d
NC
5739 PERL_ARGS_ASSERT_REGINCLASS;
5740
19f67299
TS
5741 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5742 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
5743 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5744 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
e8a70c6f
SP
5745 if (len == (STRLEN)-1)
5746 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
19f67299 5747 }
bbce6d69 5748
0f0076b4 5749 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5750 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5751 if (lenp)
5752 *lenp = 0;
ffc61ed2 5753 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5754 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5755 match = TRUE;
bbce6d69 5756 }
3568d838 5757 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5758 match = TRUE;
ffc61ed2 5759 if (!match) {
9e55ce06 5760 AV *av;
32fc9b6a 5761 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5762
5763 if (sw) {
3568d838 5764 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5765 match = TRUE;
5766 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5767 if (!match && lenp && av) {
5768 I32 i;
9e55ce06 5769 for (i = 0; i <= av_len(av); i++) {
890ce7af 5770 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5771 STRLEN len;
890ce7af 5772 const char * const s = SvPV_const(sv, len);
9e55ce06 5773
061b10df 5774 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5775 *lenp = len;
5776 match = TRUE;
5777 break;
5778 }
5779 }
5780 }
5781 if (!match) {
89ebb4a3 5782 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5783 STRLEN tmplen;
5784
9e55ce06
JH
5785 to_utf8_fold(p, tmpbuf, &tmplen);
5786 if (swash_fetch(sw, tmpbuf, do_utf8))
5787 match = TRUE;
5788 }
ffc61ed2
JH
5789 }
5790 }
bbce6d69 5791 }
9e55ce06 5792 if (match && lenp && *lenp == 0)
0f0076b4 5793 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5794 }
1aa99e6b 5795 if (!match && c < 256) {
ffc61ed2
JH
5796 if (ANYOF_BITMAP_TEST(n, c))
5797 match = TRUE;
5798 else if (flags & ANYOF_FOLD) {
eb160463 5799 U8 f;
a0ed51b3 5800
ffc61ed2
JH
5801 if (flags & ANYOF_LOCALE) {
5802 PL_reg_flags |= RF_tainted;
5803 f = PL_fold_locale[c];
5804 }
5805 else
5806 f = PL_fold[c];
5807 if (f != c && ANYOF_BITMAP_TEST(n, f))
5808 match = TRUE;
5809 }
5810
5811 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5812 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5813 if (
5814 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5815 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5816 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5817 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5818 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5819 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5820 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5821 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5822 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5823 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5824 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5825 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5826 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5827 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5828 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5829 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5830 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5831 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5832 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5833 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5834 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5835 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5836 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5837 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5838 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5839 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5840 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5841 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5842 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5843 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5844 ) /* How's that for a conditional? */
5845 {
5846 match = TRUE;
5847 }
a0ed51b3 5848 }
a0ed51b3
LW
5849 }
5850
a0ed51b3
LW
5851 return (flags & ANYOF_INVERT) ? !match : match;
5852}
161b471a 5853
dfe13c55 5854STATIC U8 *
0ce71af7 5855S_reghop3(U8 *s, I32 off, const U8* lim)
9041c2e3 5856{
97aff369 5857 dVAR;
7918f24d
NC
5858
5859 PERL_ARGS_ASSERT_REGHOP3;
5860
a0ed51b3 5861 if (off >= 0) {
1aa99e6b 5862 while (off-- && s < lim) {
ffc61ed2 5863 /* XXX could check well-formedness here */
a0ed51b3 5864 s += UTF8SKIP(s);
ffc61ed2 5865 }
a0ed51b3
LW
5866 }
5867 else {
1de06328
YO
5868 while (off++ && s > lim) {
5869 s--;
5870 if (UTF8_IS_CONTINUED(*s)) {
5871 while (s > lim && UTF8_IS_CONTINUATION(*s))
5872 s--;
a0ed51b3 5873 }
1de06328 5874 /* XXX could check well-formedness here */
a0ed51b3
LW
5875 }
5876 }
5877 return s;
5878}
161b471a 5879
f9f4320a
YO
5880#ifdef XXX_dmq
5881/* there are a bunch of places where we use two reghop3's that should
5882 be replaced with this routine. but since thats not done yet
5883 we ifdef it out - dmq
5884*/
dfe13c55 5885STATIC U8 *
1de06328
YO
5886S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5887{
5888 dVAR;
7918f24d
NC
5889
5890 PERL_ARGS_ASSERT_REGHOP4;
5891
1de06328
YO
5892 if (off >= 0) {
5893 while (off-- && s < rlim) {
5894 /* XXX could check well-formedness here */
5895 s += UTF8SKIP(s);
5896 }
5897 }
5898 else {
5899 while (off++ && s > llim) {
5900 s--;
5901 if (UTF8_IS_CONTINUED(*s)) {
5902 while (s > llim && UTF8_IS_CONTINUATION(*s))
5903 s--;
5904 }
5905 /* XXX could check well-formedness here */
5906 }
5907 }
5908 return s;
5909}
f9f4320a 5910#endif
1de06328
YO
5911
5912STATIC U8 *
0ce71af7 5913S_reghopmaybe3(U8* s, I32 off, const U8* lim)
a0ed51b3 5914{
97aff369 5915 dVAR;
7918f24d
NC
5916
5917 PERL_ARGS_ASSERT_REGHOPMAYBE3;
5918
a0ed51b3 5919 if (off >= 0) {
1aa99e6b 5920 while (off-- && s < lim) {
ffc61ed2 5921 /* XXX could check well-formedness here */
a0ed51b3 5922 s += UTF8SKIP(s);
ffc61ed2 5923 }
a0ed51b3 5924 if (off >= 0)
3dab1dad 5925 return NULL;
a0ed51b3
LW
5926 }
5927 else {
1de06328
YO
5928 while (off++ && s > lim) {
5929 s--;
5930 if (UTF8_IS_CONTINUED(*s)) {
5931 while (s > lim && UTF8_IS_CONTINUATION(*s))
5932 s--;
a0ed51b3 5933 }
1de06328 5934 /* XXX could check well-formedness here */
a0ed51b3
LW
5935 }
5936 if (off <= 0)
3dab1dad 5937 return NULL;
a0ed51b3
LW
5938 }
5939 return s;
5940}
51371543 5941
51371543 5942static void
acfe0abc 5943restore_pos(pTHX_ void *arg)
51371543 5944{
97aff369 5945 dVAR;
097eb12c 5946 regexp * const rex = (regexp *)arg;
51371543
GS
5947 if (PL_reg_eval_set) {
5948 if (PL_reg_oldsaved) {
4f639d21
DM
5949 rex->subbeg = PL_reg_oldsaved;
5950 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5951#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5952 rex->saved_copy = PL_nrs;
ed252734 5953#endif
07bc277f 5954 RXp_MATCH_COPIED_on(rex);
51371543
GS
5955 }
5956 PL_reg_magic->mg_len = PL_reg_oldpos;
5957 PL_reg_eval_set = 0;
5958 PL_curpm = PL_reg_oldcurpm;
5959 }
5960}
33b8afdf
JH
5961
5962STATIC void
5963S_to_utf8_substr(pTHX_ register regexp *prog)
5964{
a1cac82e 5965 int i = 1;
7918f24d
NC
5966
5967 PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5968
a1cac82e
NC
5969 do {
5970 if (prog->substrs->data[i].substr
5971 && !prog->substrs->data[i].utf8_substr) {
5972 SV* const sv = newSVsv(prog->substrs->data[i].substr);
5973 prog->substrs->data[i].utf8_substr = sv;
5974 sv_utf8_upgrade(sv);
610460f9
NC
5975 if (SvVALID(prog->substrs->data[i].substr)) {
5976 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5977 if (flags & FBMcf_TAIL) {
5978 /* Trim the trailing \n that fbm_compile added last
5979 time. */
5980 SvCUR_set(sv, SvCUR(sv) - 1);
5981 /* Whilst this makes the SV technically "invalid" (as its
5982 buffer is no longer followed by "\0") when fbm_compile()
5983 adds the "\n" back, a "\0" is restored. */
5984 }
5985 fbm_compile(sv, flags);
5986 }
a1cac82e
NC
5987 if (prog->substrs->data[i].substr == prog->check_substr)
5988 prog->check_utf8 = sv;
5989 }
5990 } while (i--);
33b8afdf
JH
5991}
5992
5993STATIC void
5994S_to_byte_substr(pTHX_ register regexp *prog)
5995{
97aff369 5996 dVAR;
a1cac82e 5997 int i = 1;
7918f24d
NC
5998
5999 PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6000
a1cac82e
NC
6001 do {
6002 if (prog->substrs->data[i].utf8_substr
6003 && !prog->substrs->data[i].substr) {
6004 SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6005 if (sv_utf8_downgrade(sv, TRUE)) {
610460f9
NC
6006 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6007 const U8 flags
6008 = BmFLAGS(prog->substrs->data[i].utf8_substr);
6009 if (flags & FBMcf_TAIL) {
6010 /* Trim the trailing \n that fbm_compile added last
6011 time. */
6012 SvCUR_set(sv, SvCUR(sv) - 1);
6013 }
6014 fbm_compile(sv, flags);
6015 }
a1cac82e
NC
6016 } else {
6017 SvREFCNT_dec(sv);
6018 sv = &PL_sv_undef;
6019 }
6020 prog->substrs->data[i].substr = sv;
6021 if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6022 prog->check_substr = sv;
33b8afdf 6023 }
a1cac82e 6024 } while (i--);
33b8afdf 6025}
66610fdd
RGS
6026
6027/*
6028 * Local variables:
6029 * c-indentation-style: bsd
6030 * c-basic-offset: 4
6031 * indent-tabs-mode: t
6032 * End:
6033 *
37442d52
RGS
6034 * ex: set ts=8 sts=4 sw=4 noet:
6035 */