This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #38906] Regex engine does not release MINMOD
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
097eb12c 216S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e 220 char *input;
b1ce53c5 221
a3621e74
YO
222 GET_RE_DEBUG_FLAGS_DECL;
223
b1ce53c5 224 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 225 i = SSPOPINT;
b1ce53c5
JH
226 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
227 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 228 input = (char *) SSPOPPTR;
a01268b5 229 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
230 *PL_reglastparen = SSPOPINT;
231 PL_regsize = SSPOPINT;
b1ce53c5
JH
232
233 /* Now restore the parentheses context. */
41123dfd
JH
234 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
235 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 236 I32 tmps;
097eb12c 237 U32 paren = (U32)SSPOPINT;
3280af22 238 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
239 PL_regstartp[paren] = SSPOPINT;
240 tmps = SSPOPINT;
3280af22
NIS
241 if (paren <= *PL_reglastparen)
242 PL_regendp[paren] = tmps;
a3621e74 243 DEBUG_EXECUTE_r(
c3464db5 244 PerlIO_printf(Perl_debug_log,
b900a521 245 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 246 (UV)paren, (IV)PL_regstartp[paren],
b900a521 247 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 248 (IV)PL_regendp[paren],
3280af22 249 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 250 );
a0d0e21e 251 }
a3621e74 252 DEBUG_EXECUTE_r(
4f639d21 253 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 254 PerlIO_printf(Perl_debug_log,
faccc32b 255 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 256 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
257 }
258 );
daf18116 259#if 1
dafc8851
JH
260 /* It would seem that the similar code in regtry()
261 * already takes care of this, and in fact it is in
262 * a better location to since this code can #if 0-ed out
263 * but the code in regtry() is needed or otherwise tests
264 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
265 * (as of patchlevel 7877) will fail. Then again,
266 * this code seems to be necessary or otherwise
267 * building DynaLoader will fail:
268 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
269 * --jhi */
097eb12c
AL
270 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
271 if (i > PL_regsize)
272 PL_regstartp[i] = -1;
273 PL_regendp[i] = -1;
a0d0e21e 274 }
dafc8851 275#endif
a0d0e21e
LW
276 return input;
277}
278
02db2b7b 279#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 280
95b24440 281#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
282 if (paren) { \
283 if (n) { \
284 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
285 PL_regendp[paren] = input - PL_bostr; \
286 } \
287 else \
288 PL_regendp[paren] = -1; \
289 } \
95b24440
DM
290 REGMATCH(next, where); \
291 if (result) \
29d1e993
HS
292 sayYES; \
293 if (paren && n) \
294 PL_regendp[paren] = -1; \
295}
296
297
a687059c 298/*
e50aee73 299 * pregexec and friends
a687059c
LW
300 */
301
302/*
c277df42 303 - pregexec - match a regexp against a string
a687059c 304 */
c277df42 305I32
864dbfa3 306Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 307 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
308/* strend: pointer to null at end of string */
309/* strbeg: real beginning of string */
310/* minend: end of match must be >=minend after stringarg. */
311/* nosave: For optimizations. */
312{
313 return
9041c2e3 314 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
315 nosave ? 0 : REXEC_COPY_STR);
316}
0f5d15d6 317
22e551b9 318
9041c2e3 319/*
cad2e5aa
JH
320 * Need to implement the following flags for reg_anch:
321 *
322 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
323 * USE_INTUIT_ML
324 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
325 * INTUIT_AUTORITATIVE_ML
326 * INTUIT_ONCE_NOML - Intuit can match in one location only.
327 * INTUIT_ONCE_ML
328 *
329 * Another flag for this function: SECOND_TIME (so that float substrs
330 * with giant delta may be not rechecked).
331 */
332
333/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
334
3f7c398e 335/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
336 Otherwise, only SvCUR(sv) is used to get strbeg. */
337
338/* XXXX We assume that strpos is strbeg unless sv. */
339
6eb5f6b9
JH
340/* XXXX Some places assume that there is a fixed substring.
341 An update may be needed if optimizer marks as "INTUITable"
342 RExen without fixed substrings. Similarly, it is assumed that
343 lengths of all the strings are no more than minlen, thus they
344 cannot come from lookahead.
345 (Or minlen should take into account lookahead.) */
346
2c2d71f5
JH
347/* A failure to find a constant substring means that there is no need to make
348 an expensive call to REx engine, thus we celebrate a failure. Similarly,
349 finding a substring too deep into the string means that less calls to
30944b6d
IZ
350 regtry() should be needed.
351
352 REx compiler's optimizer found 4 possible hints:
353 a) Anchored substring;
354 b) Fixed substring;
355 c) Whether we are anchored (beginning-of-line or \G);
356 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 357 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
358 string which does not contradict any of them.
359 */
2c2d71f5 360
6eb5f6b9
JH
361/* Most of decisions we do here should have been done at compile time.
362 The nodes of the REx which we used for the search should have been
363 deleted from the finite automaton. */
364
cad2e5aa
JH
365char *
366Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
367 char *strend, U32 flags, re_scream_pos_data *data)
368{
97aff369 369 dVAR;
b7953727 370 register I32 start_shift = 0;
cad2e5aa 371 /* Should be nonnegative! */
b7953727 372 register I32 end_shift = 0;
2c2d71f5
JH
373 register char *s;
374 register SV *check;
a1933d95 375 char *strbeg;
cad2e5aa 376 char *t;
a3b680e6 377 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 378 I32 ml_anch;
bd61b366
SS
379 register char *other_last = NULL; /* other substr checked before this */
380 char *check_at = NULL; /* check substr found at this pos */
1df70142 381 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 382#ifdef DEBUGGING
890ce7af
AL
383 const char * const i_strpos = strpos;
384 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 385#endif
a3621e74
YO
386
387 GET_RE_DEBUG_FLAGS_DECL;
388
a30b2f1f 389 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 390
b8d68ded 391 if (prog->reganch & ROPT_UTF8) {
a3621e74 392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
393 "UTF-8 regex...\n"));
394 PL_reg_flags |= RF_utf8;
395 }
396
a3621e74 397 DEBUG_EXECUTE_r({
1df70142 398 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
399 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
400 strpos;
1df70142 401 const int len = PL_reg_match_utf8 ?
b8d68ded 402 strlen(s) : strend - strpos;
2a782b5b
JH
403 if (!PL_colorset)
404 reginitcolors();
b8d68ded 405 if (PL_reg_match_utf8)
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 407 "UTF-8 target...\n"));
2a782b5b 408 PerlIO_printf(Perl_debug_log,
a0288114 409 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 410 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
411 prog->precomp,
412 PL_colors[1],
413 (strlen(prog->precomp) > 60 ? "..." : ""),
414 PL_colors[0],
415 (int)(len > 60 ? 60 : len),
416 s, PL_colors[1],
417 (len > 60 ? "..." : "")
418 );
419 });
cad2e5aa 420
c344f387
JH
421 /* CHR_DIST() would be more correct here but it makes things slow. */
422 if (prog->minlen > strend - strpos) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 424 "String too short... [re_intuit_start]\n"));
cad2e5aa 425 goto fail;
2c2d71f5 426 }
a1933d95 427 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 428 PL_regeol = strend;
33b8afdf
JH
429 if (do_utf8) {
430 if (!prog->check_utf8 && prog->check_substr)
431 to_utf8_substr(prog);
432 check = prog->check_utf8;
433 } else {
434 if (!prog->check_substr && prog->check_utf8)
435 to_byte_substr(prog);
436 check = prog->check_substr;
437 }
438 if (check == &PL_sv_undef) {
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
440 "Non-utf string cannot match utf check string\n"));
441 goto fail;
442 }
2c2d71f5 443 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
444 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
445 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 446 && !multiline ) ); /* Check after \n? */
cad2e5aa 447
7e25d62c
JH
448 if (!ml_anch) {
449 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
450 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 451 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
452 && sv && !SvROK(sv)
453 && (strpos != strbeg)) {
a3621e74 454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
455 goto fail;
456 }
457 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 458 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 459 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
460 I32 slen;
461
1aa99e6b 462 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
463 if (SvTAIL(check)) {
464 slen = SvCUR(check); /* >= 1 */
cad2e5aa 465
9041c2e3 466 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 467 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 469 goto fail_finish;
cad2e5aa
JH
470 }
471 /* Now should match s[0..slen-2] */
472 slen--;
3f7c398e 473 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 474 || (slen > 1
3f7c398e 475 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 476 report_neq:
a3621e74 477 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
478 goto fail_finish;
479 }
cad2e5aa 480 }
3f7c398e 481 else if (*SvPVX_const(check) != *s
653099ff 482 || ((slen = SvCUR(check)) > 1
3f7c398e 483 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 484 goto report_neq;
c315bfe8 485 check_at = s;
2c2d71f5 486 goto success_at_start;
7e25d62c 487 }
cad2e5aa 488 }
2c2d71f5 489 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 490 s = strpos;
2c2d71f5 491 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 494 if (!ml_anch) {
a3b680e6 495 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 496 - (SvTAIL(check) != 0);
a3b680e6 497 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
498
499 if (end_shift < eshift)
500 end_shift = eshift;
501 }
cad2e5aa 502 }
2c2d71f5 503 else { /* Can match at random position */
cad2e5aa
JH
504 ml_anch = 0;
505 s = strpos;
2c2d71f5
JH
506 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 /* Should be nonnegative! */
508 end_shift = prog->minlen - start_shift -
653099ff 509 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
510 }
511
2c2d71f5 512#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 513 if (end_shift < 0)
6bbae5e6 514 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
515#endif
516
2c2d71f5
JH
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 520 if (flags & REXEC_SCREAM) {
cad2e5aa 521 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 522 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 523
2c2d71f5
JH
524 if (PL_screamfirst[BmRARE(check)] >= 0
525 || ( BmRARE(check) == '\n'
526 && (BmPREVIOUS(check) == SvCUR(check) - 1)
527 && SvTAIL(check) ))
9041c2e3 528 s = screaminstr(sv, check,
2c2d71f5 529 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 530 else
2c2d71f5 531 goto fail_finish;
4addbd3b
HS
532 /* we may be pointing at the wrong string */
533 if (s && RX_MATCH_COPIED(prog))
3f7c398e 534 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
535 if (data)
536 *data->scream_olds = s;
537 }
f33976b4 538 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
539 s = fbm_instr((U8*)(s + start_shift),
540 (U8*)(strend - end_shift),
7fba1cd6 541 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 542 else
1aa99e6b
IH
543 s = fbm_instr(HOP3(s, start_shift, strend),
544 HOP3(strend, -end_shift, strbeg),
7fba1cd6 545 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
546
547 /* Update the count-of-usability, remove useless subpatterns,
548 unshift s. */
2c2d71f5 549
a0288114 550 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 551 (s ? "Found" : "Did not find"),
33b8afdf 552 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 553 PL_colors[0],
7b0972df 554 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 555 SvPVX_const(check),
2c2d71f5
JH
556 PL_colors[1], (SvTAIL(check) ? "$" : ""),
557 (s ? " at offset " : "...\n") ) );
558
559 if (!s)
560 goto fail_finish;
561
6eb5f6b9
JH
562 check_at = s;
563
2c2d71f5 564 /* Finish the diagnostic message */
a3621e74 565 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
566
567 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
568 Start with the other substr.
569 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 570 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
571 *always* match. Probably should be marked during compile...
572 Probably it is right to do no SCREAM here...
573 */
574
33b8afdf 575 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 576 /* Take into account the "other" substring. */
2c2d71f5
JH
577 /* XXXX May be hopelessly wrong for UTF... */
578 if (!other_last)
6eb5f6b9 579 other_last = strpos;
33b8afdf 580 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
581 do_other_anchored:
582 {
890ce7af
AL
583 char * const last = HOP3c(s, -start_shift, strbeg);
584 char *last1, *last2;
2c2d71f5 585 char *s1 = s;
33b8afdf 586 SV* must;
2c2d71f5 587
2c2d71f5
JH
588 t = s - prog->check_offset_max;
589 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 590 && (!do_utf8
1aa99e6b 591 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 592 && t > strpos)))
30944b6d 593 /* EMPTY */;
2c2d71f5
JH
594 else
595 t = strpos;
1aa99e6b 596 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
597 if (t < other_last) /* These positions already checked */
598 t = other_last;
1aa99e6b 599 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
600 if (last < last1)
601 last1 = last;
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* On end-of-str: see comment below. */
33b8afdf
JH
604 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
605 if (must == &PL_sv_undef) {
606 s = (char*)NULL;
a3621e74 607 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
608 }
609 else
610 s = fbm_instr(
611 (unsigned char*)t,
612 HOP3(HOP3(last1, prog->anchored_offset, strend)
613 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
614 must,
7fba1cd6 615 multiline ? FBMrf_MULTILINE : 0
33b8afdf 616 );
a3621e74 617 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 618 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
619 (s ? "Found" : "Contradicts"),
620 PL_colors[0],
33b8afdf
JH
621 (int)(SvCUR(must)
622 - (SvTAIL(must)!=0)),
3f7c398e 623 SvPVX_const(must),
33b8afdf 624 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
625 if (!s) {
626 if (last1 >= last2) {
a3621e74 627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
628 ", giving up...\n"));
629 goto fail_finish;
630 }
a3621e74 631 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 632 ", trying floating at offset %ld...\n",
1aa99e6b
IH
633 (long)(HOP3c(s1, 1, strend) - i_strpos)));
634 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
635 s = HOP3c(last, 1, strend);
2c2d71f5
JH
636 goto restart;
637 }
638 else {
a3621e74 639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 640 (long)(s - i_strpos)));
1aa99e6b
IH
641 t = HOP3c(s, -prog->anchored_offset, strbeg);
642 other_last = HOP3c(s, 1, strend);
30944b6d 643 s = s1;
2c2d71f5
JH
644 if (t == strpos)
645 goto try_at_start;
2c2d71f5
JH
646 goto try_at_offset;
647 }
30944b6d 648 }
2c2d71f5
JH
649 }
650 else { /* Take into account the floating substring. */
33b8afdf
JH
651 char *last, *last1;
652 char *s1 = s;
653 SV* must;
654
655 t = HOP3c(s, -start_shift, strbeg);
656 last1 = last =
657 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
658 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
659 last = HOP3c(t, prog->float_max_offset, strend);
660 s = HOP3c(t, prog->float_min_offset, strend);
661 if (s < other_last)
662 s = other_last;
2c2d71f5 663 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
664 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
665 /* fbm_instr() takes into account exact value of end-of-str
666 if the check is SvTAIL(ed). Since false positives are OK,
667 and end-of-str is not later than strend we are OK. */
668 if (must == &PL_sv_undef) {
669 s = (char*)NULL;
a3621e74 670 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
671 }
672 else
2c2d71f5 673 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
674 (unsigned char*)last + SvCUR(must)
675 - (SvTAIL(must)!=0),
7fba1cd6 676 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
678 (s ? "Found" : "Contradicts"),
679 PL_colors[0],
680 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 681 SvPVX_const(must),
33b8afdf
JH
682 PL_colors[1], (SvTAIL(must) ? "$" : "")));
683 if (!s) {
684 if (last1 == last) {
a3621e74 685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
686 ", giving up...\n"));
687 goto fail_finish;
2c2d71f5 688 }
a3621e74 689 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
690 ", trying anchored starting at offset %ld...\n",
691 (long)(s1 + 1 - i_strpos)));
692 other_last = last;
693 s = HOP3c(t, 1, strend);
694 goto restart;
695 }
696 else {
a3621e74 697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
698 (long)(s - i_strpos)));
699 other_last = s; /* Fix this later. --Hugo */
700 s = s1;
701 if (t == strpos)
702 goto try_at_start;
703 goto try_at_offset;
704 }
2c2d71f5 705 }
cad2e5aa 706 }
2c2d71f5
JH
707
708 t = s - prog->check_offset_max;
2c2d71f5 709 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 710 && (!do_utf8
1aa99e6b
IH
711 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
712 && t > strpos))) {
2c2d71f5
JH
713 /* Fixed substring is found far enough so that the match
714 cannot start at strpos. */
715 try_at_offset:
cad2e5aa 716 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
717 /* Eventually fbm_*() should handle this, but often
718 anchored_offset is not 0, so this check will not be wasted. */
719 /* XXXX In the code below we prefer to look for "^" even in
720 presence of anchored substrings. And we search even
721 beyond the found float position. These pessimizations
722 are historical artefacts only. */
723 find_anchor:
2c2d71f5 724 while (t < strend - prog->minlen) {
cad2e5aa 725 if (*t == '\n') {
4ee3650e 726 if (t < check_at - prog->check_offset_min) {
33b8afdf 727 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
728 /* Since we moved from the found position,
729 we definitely contradict the found anchored
30944b6d
IZ
730 substr. Due to the above check we do not
731 contradict "check" substr.
732 Thus we can arrive here only if check substr
733 is float. Redo checking for "other"=="fixed".
734 */
9041c2e3 735 strpos = t + 1;
a3621e74 736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 737 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
738 goto do_other_anchored;
739 }
4ee3650e
GS
740 /* We don't contradict the found floating substring. */
741 /* XXXX Why not check for STCLASS? */
cad2e5aa 742 s = t + 1;
a3621e74 743 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 744 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
745 goto set_useful;
746 }
4ee3650e
GS
747 /* Position contradicts check-string */
748 /* XXXX probably better to look for check-string
749 than for "\n", so one should lower the limit for t? */
a3621e74 750 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 751 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 752 other_last = strpos = s = t + 1;
cad2e5aa
JH
753 goto restart;
754 }
755 t++;
756 }
a3621e74 757 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 758 PL_colors[0], PL_colors[1]));
2c2d71f5 759 goto fail_finish;
cad2e5aa 760 }
f5952150 761 else {
a3621e74 762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 763 PL_colors[0], PL_colors[1]));
f5952150 764 }
cad2e5aa
JH
765 s = t;
766 set_useful:
33b8afdf 767 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
768 }
769 else {
f5952150 770 /* The found string does not prohibit matching at strpos,
2c2d71f5 771 - no optimization of calling REx engine can be performed,
f5952150
GS
772 unless it was an MBOL and we are not after MBOL,
773 or a future STCLASS check will fail this. */
2c2d71f5
JH
774 try_at_start:
775 /* Even in this situation we may use MBOL flag if strpos is offset
776 wrt the start of the string. */
05b4157f 777 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 778 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
779 /* May be due to an implicit anchor of m{.*foo} */
780 && !(prog->reganch & ROPT_IMPLICIT))
781 {
cad2e5aa
JH
782 t = strpos;
783 goto find_anchor;
784 }
a3621e74 785 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 786 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 787 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 788 );
2c2d71f5 789 success_at_start:
30944b6d 790 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
791 && (do_utf8 ? (
792 prog->check_utf8 /* Could be deleted already */
793 && --BmUSEFUL(prog->check_utf8) < 0
794 && (prog->check_utf8 == prog->float_utf8)
795 ) : (
796 prog->check_substr /* Could be deleted already */
797 && --BmUSEFUL(prog->check_substr) < 0
798 && (prog->check_substr == prog->float_substr)
799 )))
66e933ab 800 {
cad2e5aa 801 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 802 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
803 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
804 if (do_utf8 ? prog->check_substr : prog->check_utf8)
805 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
806 prog->check_substr = prog->check_utf8 = NULL; /* disable */
807 prog->float_substr = prog->float_utf8 = NULL; /* clear */
808 check = NULL; /* abort */
cad2e5aa 809 s = strpos;
3cf5c195
IZ
810 /* XXXX This is a remnant of the old implementation. It
811 looks wasteful, since now INTUIT can use many
6eb5f6b9 812 other heuristics. */
cad2e5aa
JH
813 prog->reganch &= ~RE_USE_INTUIT;
814 }
815 else
816 s = strpos;
817 }
818
6eb5f6b9
JH
819 /* Last resort... */
820 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
821 if (prog->regstclass) {
822 /* minlen == 0 is possible if regstclass is \b or \B,
823 and the fixed substr is ''$.
824 Since minlen is already taken into account, s+1 is before strend;
825 accidentally, minlen >= 1 guaranties no false positives at s + 1
826 even for \b or \B. But (minlen? 1 : 0) below assumes that
827 regstclass does not come from lookahead... */
828 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
829 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 830 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 831 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 832 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 833 : 1);
a3b680e6 834 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 835 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 836 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
837 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
838 cl_l, strend)
839 : strend);
6eb5f6b9
JH
840
841 t = s;
4f639d21 842 PL_reg_re = prog;
06b5626a 843 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
844 if (!s) {
845#ifdef DEBUGGING
cbbf8932 846 const char *what = NULL;
6eb5f6b9
JH
847#endif
848 if (endpos == strend) {
a3621e74 849 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
850 "Could not match STCLASS...\n") );
851 goto fail;
852 }
a3621e74 853 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 854 "This position contradicts STCLASS...\n") );
653099ff
GS
855 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
856 goto fail;
6eb5f6b9 857 /* Contradict one of substrings */
33b8afdf
JH
858 if (prog->anchored_substr || prog->anchored_utf8) {
859 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 860 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 861 hop_and_restart:
1aa99e6b 862 s = HOP3c(t, 1, strend);
66e933ab
GS
863 if (s + start_shift + end_shift > strend) {
864 /* XXXX Should be taken into account earlier? */
a3621e74 865 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
866 "Could not match STCLASS...\n") );
867 goto fail;
868 }
5e39e1e5
HS
869 if (!check)
870 goto giveup;
a3621e74 871 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 872 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
873 what, (long)(s + start_shift - i_strpos)) );
874 goto restart;
875 }
66e933ab 876 /* Have both, check_string is floating */
6eb5f6b9
JH
877 if (t + start_shift >= check_at) /* Contradicts floating=check */
878 goto retry_floating_check;
879 /* Recheck anchored substring, but not floating... */
9041c2e3 880 s = check_at;
5e39e1e5
HS
881 if (!check)
882 goto giveup;
a3621e74 883 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 884 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
885 (long)(other_last - i_strpos)) );
886 goto do_other_anchored;
887 }
60e71179
GS
888 /* Another way we could have checked stclass at the
889 current position only: */
890 if (ml_anch) {
891 s = t = t + 1;
5e39e1e5
HS
892 if (!check)
893 goto giveup;
a3621e74 894 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 895 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 896 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 897 goto try_at_offset;
66e933ab 898 }
33b8afdf 899 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 900 goto fail;
6eb5f6b9
JH
901 /* Check is floating subtring. */
902 retry_floating_check:
903 t = check_at - start_shift;
a3621e74 904 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
905 goto hop_and_restart;
906 }
b7953727 907 if (t != s) {
a3621e74 908 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 909 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
910 (long)(t - i_strpos), (long)(s - i_strpos))
911 );
912 }
913 else {
a3621e74 914 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
915 "Does not contradict STCLASS...\n");
916 );
917 }
6eb5f6b9 918 }
5e39e1e5 919 giveup:
a3621e74 920 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
921 PL_colors[4], (check ? "Guessed" : "Giving up"),
922 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 923 return s;
2c2d71f5
JH
924
925 fail_finish: /* Substring not found */
33b8afdf
JH
926 if (prog->check_substr || prog->check_utf8) /* could be removed already */
927 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 928 fail:
a3621e74 929 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 930 PL_colors[4], PL_colors[5]));
bd61b366 931 return NULL;
cad2e5aa 932}
9661b544 933
6eb5f6b9 934/* We know what class REx starts with. Try to find this position... */
3c3eec57 935STATIC char *
097eb12c 936S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
a687059c 937{
27da23d5 938 dVAR;
1df70142 939 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 940 char *m;
d8093b23 941 STRLEN ln;
5dab1207 942 STRLEN lnc;
078c425b 943 register STRLEN uskip;
d8093b23
G
944 unsigned int c1;
945 unsigned int c2;
6eb5f6b9
JH
946 char *e;
947 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 948 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 949
6eb5f6b9
JH
950 /* We know what class it must start with. */
951 switch (OP(c)) {
6eb5f6b9 952 case ANYOF:
388cc4de 953 if (do_utf8) {
078c425b 954 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
955 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
956 !UTF8_IS_INVARIANT((U8)s[0]) ?
957 reginclass(c, (U8*)s, 0, do_utf8) :
958 REGINCLASS(c, (U8*)s)) {
959 if (tmp && (norun || regtry(prog, s)))
960 goto got_it;
961 else
962 tmp = doevery;
963 }
964 else
965 tmp = 1;
078c425b 966 s += uskip;
388cc4de
HS
967 }
968 }
969 else {
970 while (s < strend) {
971 STRLEN skip = 1;
972
973 if (REGINCLASS(c, (U8*)s) ||
974 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
975 /* The assignment of 2 is intentional:
976 * for the folded sharp s, the skip is 2. */
977 (skip = SHARP_S_SKIP))) {
978 if (tmp && (norun || regtry(prog, s)))
979 goto got_it;
980 else
981 tmp = doevery;
982 }
983 else
984 tmp = 1;
985 s += skip;
986 }
a0d0e21e 987 }
6eb5f6b9 988 break;
f33976b4
DB
989 case CANY:
990 while (s < strend) {
991 if (tmp && (norun || regtry(prog, s)))
992 goto got_it;
993 else
994 tmp = doevery;
995 s++;
996 }
997 break;
6eb5f6b9 998 case EXACTF:
5dab1207
NIS
999 m = STRING(c);
1000 ln = STR_LEN(c); /* length to match in octets/bytes */
1001 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1002 if (UTF) {
a2a2844f 1003 STRLEN ulen1, ulen2;
5dab1207 1004 U8 *sm = (U8 *) m;
89ebb4a3
JH
1005 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1006 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1007 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1008
1009 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1010 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1011
89ebb4a3 1012 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1013 0, uniflags);
89ebb4a3 1014 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1015 0, uniflags);
5dab1207
NIS
1016 lnc = 0;
1017 while (sm < ((U8 *) m + ln)) {
1018 lnc++;
1019 sm += UTF8SKIP(sm);
1020 }
1aa99e6b
IH
1021 }
1022 else {
1023 c1 = *(U8*)m;
1024 c2 = PL_fold[c1];
1025 }
6eb5f6b9
JH
1026 goto do_exactf;
1027 case EXACTFL:
5dab1207
NIS
1028 m = STRING(c);
1029 ln = STR_LEN(c);
1030 lnc = (I32) ln;
d8093b23 1031 c1 = *(U8*)m;
6eb5f6b9
JH
1032 c2 = PL_fold_locale[c1];
1033 do_exactf:
db12adc6 1034 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1035
6eb5f6b9
JH
1036 if (norun && e < s)
1037 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1038
60a8b682
JH
1039 /* The idea in the EXACTF* cases is to first find the
1040 * first character of the EXACTF* node and then, if
1041 * necessary, case-insensitively compare the full
1042 * text of the node. The c1 and c2 are the first
1043 * characters (though in Unicode it gets a bit
1044 * more complicated because there are more cases
7f16dd3d
JH
1045 * than just upper and lower: one needs to use
1046 * the so-called folding case for case-insensitive
1047 * matching (called "loose matching" in Unicode).
1048 * ibcmp_utf8() will do just that. */
60a8b682 1049
1aa99e6b 1050 if (do_utf8) {
575cac57 1051 UV c, f;
89ebb4a3 1052 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1053 STRLEN len, foldlen;
4ad0818d 1054 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1055 if (c1 == c2) {
5dab1207
NIS
1056 /* Upper and lower of 1st char are equal -
1057 * probably not a "letter". */
1aa99e6b 1058 while (s <= e) {
89ebb4a3 1059 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1060 uniflags);
80aecb99
JH
1061 if ( c == c1
1062 && (ln == len ||
66423254 1063 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1064 m, (char **)0, ln, (bool)UTF))
55da9344 1065 && (norun || regtry(prog, s)) )
1aa99e6b 1066 goto got_it;
80aecb99 1067 else {
1df70142 1068 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1069 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1070 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1071 if ( f != c
1072 && (f == c1 || f == c2)
1073 && (ln == foldlen ||
66423254
JH
1074 !ibcmp_utf8((char *) foldbuf,
1075 (char **)0, foldlen, do_utf8,
d07ddd77 1076 m,
eb160463 1077 (char **)0, ln, (bool)UTF))
80aecb99
JH
1078 && (norun || regtry(prog, s)) )
1079 goto got_it;
1080 }
1aa99e6b
IH
1081 s += len;
1082 }
09091399
JH
1083 }
1084 else {
1aa99e6b 1085 while (s <= e) {
89ebb4a3 1086 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1087 uniflags);
80aecb99 1088
60a8b682 1089 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1090 * Note that not all the possible combinations
1091 * are handled here: some of them are handled
1092 * by the standard folding rules, and some of
1093 * them (the character class or ANYOF cases)
1094 * are handled during compiletime in
1095 * regexec.c:S_regclass(). */
880bd946
JH
1096 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1097 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1098 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1099
1100 if ( (c == c1 || c == c2)
1101 && (ln == len ||
66423254 1102 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1103 m, (char **)0, ln, (bool)UTF))
55da9344 1104 && (norun || regtry(prog, s)) )
1aa99e6b 1105 goto got_it;
80aecb99 1106 else {
1df70142 1107 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1108 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1109 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1110 if ( f != c
1111 && (f == c1 || f == c2)
1112 && (ln == foldlen ||
a6872d42 1113 !ibcmp_utf8((char *) foldbuf,
66423254 1114 (char **)0, foldlen, do_utf8,
d07ddd77 1115 m,
eb160463 1116 (char **)0, ln, (bool)UTF))
80aecb99
JH
1117 && (norun || regtry(prog, s)) )
1118 goto got_it;
1119 }
1aa99e6b
IH
1120 s += len;
1121 }
09091399 1122 }
1aa99e6b
IH
1123 }
1124 else {
1125 if (c1 == c2)
1126 while (s <= e) {
1127 if ( *(U8*)s == c1
1128 && (ln == 1 || !(OP(c) == EXACTF
1129 ? ibcmp(s, m, ln)
1130 : ibcmp_locale(s, m, ln)))
1131 && (norun || regtry(prog, s)) )
1132 goto got_it;
1133 s++;
1134 }
1135 else
1136 while (s <= e) {
1137 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1138 && (ln == 1 || !(OP(c) == EXACTF
1139 ? ibcmp(s, m, ln)
1140 : ibcmp_locale(s, m, ln)))
1141 && (norun || regtry(prog, s)) )
1142 goto got_it;
1143 s++;
1144 }
b3c9acc1
IZ
1145 }
1146 break;
bbce6d69 1147 case BOUNDL:
3280af22 1148 PL_reg_flags |= RF_tainted;
bbce6d69 1149 /* FALL THROUGH */
a0d0e21e 1150 case BOUND:
ffc61ed2 1151 if (do_utf8) {
12d33761 1152 if (s == PL_bostr)
ffc61ed2
JH
1153 tmp = '\n';
1154 else {
6136c704 1155 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1156 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1157 }
1158 tmp = ((OP(c) == BOUND ?
9041c2e3 1159 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1160 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1161 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1162 if (tmp == !(OP(c) == BOUND ?
3568d838 1163 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1164 isALNUM_LC_utf8((U8*)s)))
1165 {
1166 tmp = !tmp;
1167 if ((norun || regtry(prog, s)))
1168 goto got_it;
1169 }
078c425b 1170 s += uskip;
a687059c 1171 }
a0d0e21e 1172 }
667bb95a 1173 else {
12d33761 1174 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1175 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1176 while (s < strend) {
1177 if (tmp ==
1178 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1179 tmp = !tmp;
1180 if ((norun || regtry(prog, s)))
1181 goto got_it;
1182 }
1183 s++;
a0ed51b3 1184 }
a0ed51b3 1185 }
6eb5f6b9 1186 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1187 goto got_it;
1188 break;
bbce6d69 1189 case NBOUNDL:
3280af22 1190 PL_reg_flags |= RF_tainted;
bbce6d69 1191 /* FALL THROUGH */
a0d0e21e 1192 case NBOUND:
ffc61ed2 1193 if (do_utf8) {
12d33761 1194 if (s == PL_bostr)
ffc61ed2
JH
1195 tmp = '\n';
1196 else {
6136c704 1197 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1198 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1199 }
1200 tmp = ((OP(c) == NBOUND ?
9041c2e3 1201 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1202 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1203 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1204 if (tmp == !(OP(c) == NBOUND ?
3568d838 1205 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1206 isALNUM_LC_utf8((U8*)s)))
1207 tmp = !tmp;
1208 else if ((norun || regtry(prog, s)))
1209 goto got_it;
078c425b 1210 s += uskip;
ffc61ed2 1211 }
a0d0e21e 1212 }
667bb95a 1213 else {
12d33761 1214 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1215 tmp = ((OP(c) == NBOUND ?
1216 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1217 while (s < strend) {
1218 if (tmp ==
1219 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1220 tmp = !tmp;
1221 else if ((norun || regtry(prog, s)))
1222 goto got_it;
1223 s++;
1224 }
a0ed51b3 1225 }
6eb5f6b9 1226 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1227 goto got_it;
1228 break;
a0d0e21e 1229 case ALNUM:
ffc61ed2 1230 if (do_utf8) {
1a4fad37 1231 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1232 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1233 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1234 if (tmp && (norun || regtry(prog, s)))
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
bbce6d69 1239 else
ffc61ed2 1240 tmp = 1;
078c425b 1241 s += uskip;
bbce6d69 1242 }
bbce6d69 1243 }
ffc61ed2
JH
1244 else {
1245 while (s < strend) {
1246 if (isALNUM(*s)) {
1247 if (tmp && (norun || regtry(prog, s)))
1248 goto got_it;
1249 else
1250 tmp = doevery;
1251 }
a0ed51b3 1252 else
ffc61ed2
JH
1253 tmp = 1;
1254 s++;
a0ed51b3 1255 }
a0ed51b3
LW
1256 }
1257 break;
bbce6d69 1258 case ALNUML:
3280af22 1259 PL_reg_flags |= RF_tainted;
ffc61ed2 1260 if (do_utf8) {
078c425b 1261 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1262 if (isALNUM_LC_utf8((U8*)s)) {
1263 if (tmp && (norun || regtry(prog, s)))
1264 goto got_it;
1265 else
1266 tmp = doevery;
1267 }
a687059c 1268 else
ffc61ed2 1269 tmp = 1;
078c425b 1270 s += uskip;
a0d0e21e 1271 }
a0d0e21e 1272 }
ffc61ed2
JH
1273 else {
1274 while (s < strend) {
1275 if (isALNUM_LC(*s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1277 goto got_it;
1278 else
1279 tmp = doevery;
1280 }
a0ed51b3 1281 else
ffc61ed2
JH
1282 tmp = 1;
1283 s++;
a0ed51b3 1284 }
a0ed51b3
LW
1285 }
1286 break;
a0d0e21e 1287 case NALNUM:
ffc61ed2 1288 if (do_utf8) {
1a4fad37 1289 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1290 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1291 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1292 if (tmp && (norun || regtry(prog, s)))
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
bbce6d69 1297 else
ffc61ed2 1298 tmp = 1;
078c425b 1299 s += uskip;
bbce6d69 1300 }
bbce6d69 1301 }
ffc61ed2
JH
1302 else {
1303 while (s < strend) {
1304 if (!isALNUM(*s)) {
1305 if (tmp && (norun || regtry(prog, s)))
1306 goto got_it;
1307 else
1308 tmp = doevery;
1309 }
a0ed51b3 1310 else
ffc61ed2
JH
1311 tmp = 1;
1312 s++;
a0ed51b3 1313 }
a0ed51b3
LW
1314 }
1315 break;
bbce6d69 1316 case NALNUML:
3280af22 1317 PL_reg_flags |= RF_tainted;
ffc61ed2 1318 if (do_utf8) {
078c425b 1319 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1320 if (!isALNUM_LC_utf8((U8*)s)) {
1321 if (tmp && (norun || regtry(prog, s)))
1322 goto got_it;
1323 else
1324 tmp = doevery;
1325 }
a687059c 1326 else
ffc61ed2 1327 tmp = 1;
078c425b 1328 s += uskip;
a687059c 1329 }
a0d0e21e 1330 }
ffc61ed2
JH
1331 else {
1332 while (s < strend) {
1333 if (!isALNUM_LC(*s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1335 goto got_it;
1336 else
1337 tmp = doevery;
1338 }
a0ed51b3 1339 else
ffc61ed2
JH
1340 tmp = 1;
1341 s++;
a0ed51b3 1342 }
a0ed51b3
LW
1343 }
1344 break;
a0d0e21e 1345 case SPACE:
ffc61ed2 1346 if (do_utf8) {
1a4fad37 1347 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1348 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1349 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1350 if (tmp && (norun || regtry(prog, s)))
1351 goto got_it;
1352 else
1353 tmp = doevery;
1354 }
a0d0e21e 1355 else
ffc61ed2 1356 tmp = 1;
078c425b 1357 s += uskip;
2304df62 1358 }
a0d0e21e 1359 }
ffc61ed2
JH
1360 else {
1361 while (s < strend) {
1362 if (isSPACE(*s)) {
1363 if (tmp && (norun || regtry(prog, s)))
1364 goto got_it;
1365 else
1366 tmp = doevery;
1367 }
a0ed51b3 1368 else
ffc61ed2
JH
1369 tmp = 1;
1370 s++;
a0ed51b3 1371 }
a0ed51b3
LW
1372 }
1373 break;
bbce6d69 1374 case SPACEL:
3280af22 1375 PL_reg_flags |= RF_tainted;
ffc61ed2 1376 if (do_utf8) {
078c425b 1377 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1378 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1379 if (tmp && (norun || regtry(prog, s)))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
bbce6d69 1384 else
ffc61ed2 1385 tmp = 1;
078c425b 1386 s += uskip;
bbce6d69 1387 }
bbce6d69 1388 }
ffc61ed2
JH
1389 else {
1390 while (s < strend) {
1391 if (isSPACE_LC(*s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
a0ed51b3 1397 else
ffc61ed2
JH
1398 tmp = 1;
1399 s++;
a0ed51b3 1400 }
a0ed51b3
LW
1401 }
1402 break;
a0d0e21e 1403 case NSPACE:
ffc61ed2 1404 if (do_utf8) {
1a4fad37 1405 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1406 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1407 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1408 if (tmp && (norun || regtry(prog, s)))
1409 goto got_it;
1410 else
1411 tmp = doevery;
1412 }
a0d0e21e 1413 else
ffc61ed2 1414 tmp = 1;
078c425b 1415 s += uskip;
a687059c 1416 }
a0d0e21e 1417 }
ffc61ed2
JH
1418 else {
1419 while (s < strend) {
1420 if (!isSPACE(*s)) {
1421 if (tmp && (norun || regtry(prog, s)))
1422 goto got_it;
1423 else
1424 tmp = doevery;
1425 }
a0ed51b3 1426 else
ffc61ed2
JH
1427 tmp = 1;
1428 s++;
a0ed51b3 1429 }
a0ed51b3
LW
1430 }
1431 break;
bbce6d69 1432 case NSPACEL:
3280af22 1433 PL_reg_flags |= RF_tainted;
ffc61ed2 1434 if (do_utf8) {
078c425b 1435 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1436 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1437 if (tmp && (norun || regtry(prog, s)))
1438 goto got_it;
1439 else
1440 tmp = doevery;
1441 }
bbce6d69 1442 else
ffc61ed2 1443 tmp = 1;
078c425b 1444 s += uskip;
bbce6d69 1445 }
bbce6d69 1446 }
ffc61ed2
JH
1447 else {
1448 while (s < strend) {
1449 if (!isSPACE_LC(*s)) {
1450 if (tmp && (norun || regtry(prog, s)))
1451 goto got_it;
1452 else
1453 tmp = doevery;
1454 }
a0ed51b3 1455 else
ffc61ed2
JH
1456 tmp = 1;
1457 s++;
a0ed51b3 1458 }
a0ed51b3
LW
1459 }
1460 break;
a0d0e21e 1461 case DIGIT:
ffc61ed2 1462 if (do_utf8) {
1a4fad37 1463 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1464 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1465 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1466 if (tmp && (norun || regtry(prog, s)))
1467 goto got_it;
1468 else
1469 tmp = doevery;
1470 }
a0d0e21e 1471 else
ffc61ed2 1472 tmp = 1;
078c425b 1473 s += uskip;
2b69d0c2 1474 }
a0d0e21e 1475 }
ffc61ed2
JH
1476 else {
1477 while (s < strend) {
1478 if (isDIGIT(*s)) {
1479 if (tmp && (norun || regtry(prog, s)))
1480 goto got_it;
1481 else
1482 tmp = doevery;
1483 }
a0ed51b3 1484 else
ffc61ed2
JH
1485 tmp = 1;
1486 s++;
a0ed51b3 1487 }
a0ed51b3
LW
1488 }
1489 break;
b8c5462f
JH
1490 case DIGITL:
1491 PL_reg_flags |= RF_tainted;
ffc61ed2 1492 if (do_utf8) {
078c425b 1493 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1494 if (isDIGIT_LC_utf8((U8*)s)) {
1495 if (tmp && (norun || regtry(prog, s)))
1496 goto got_it;
1497 else
1498 tmp = doevery;
1499 }
b8c5462f 1500 else
ffc61ed2 1501 tmp = 1;
078c425b 1502 s += uskip;
b8c5462f 1503 }
b8c5462f 1504 }
ffc61ed2
JH
1505 else {
1506 while (s < strend) {
1507 if (isDIGIT_LC(*s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
b8c5462f 1513 else
ffc61ed2
JH
1514 tmp = 1;
1515 s++;
b8c5462f 1516 }
b8c5462f
JH
1517 }
1518 break;
a0d0e21e 1519 case NDIGIT:
ffc61ed2 1520 if (do_utf8) {
1a4fad37 1521 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1522 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1523 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1524 if (tmp && (norun || regtry(prog, s)))
1525 goto got_it;
1526 else
1527 tmp = doevery;
1528 }
a0d0e21e 1529 else
ffc61ed2 1530 tmp = 1;
078c425b 1531 s += uskip;
a687059c 1532 }
a0d0e21e 1533 }
ffc61ed2
JH
1534 else {
1535 while (s < strend) {
1536 if (!isDIGIT(*s)) {
1537 if (tmp && (norun || regtry(prog, s)))
1538 goto got_it;
1539 else
1540 tmp = doevery;
1541 }
a0ed51b3 1542 else
ffc61ed2
JH
1543 tmp = 1;
1544 s++;
a0ed51b3 1545 }
a0ed51b3
LW
1546 }
1547 break;
b8c5462f
JH
1548 case NDIGITL:
1549 PL_reg_flags |= RF_tainted;
ffc61ed2 1550 if (do_utf8) {
078c425b 1551 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1552 if (!isDIGIT_LC_utf8((U8*)s)) {
1553 if (tmp && (norun || regtry(prog, s)))
1554 goto got_it;
1555 else
1556 tmp = doevery;
1557 }
b8c5462f 1558 else
ffc61ed2 1559 tmp = 1;
078c425b 1560 s += uskip;
b8c5462f 1561 }
a0ed51b3 1562 }
ffc61ed2
JH
1563 else {
1564 while (s < strend) {
1565 if (!isDIGIT_LC(*s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1567 goto got_it;
1568 else
1569 tmp = doevery;
1570 }
cf93c79d 1571 else
ffc61ed2
JH
1572 tmp = 1;
1573 s++;
b8c5462f 1574 }
b8c5462f
JH
1575 }
1576 break;
b3c9acc1 1577 default:
3c3eec57
GS
1578 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1579 break;
d6a28714 1580 }
6eb5f6b9
JH
1581 return 0;
1582 got_it:
1583 return s;
1584}
1585
1586/*
1587 - regexec_flags - match a regexp against a string
1588 */
1589I32
1590Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1591 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1592/* strend: pointer to null at end of string */
1593/* strbeg: real beginning of string */
1594/* minend: end of match must be >=minend after stringarg. */
1595/* data: May be used for some additional optimizations. */
1596/* nosave: For optimizations. */
1597{
97aff369 1598 dVAR;
6eb5f6b9
JH
1599 register char *s;
1600 register regnode *c;
1601 register char *startpos = stringarg;
6eb5f6b9
JH
1602 I32 minlen; /* must match at least this many chars */
1603 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1604 I32 end_shift = 0; /* Same for the end. */ /* CC */
1605 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1606 char *scream_olds = NULL;
6eb5f6b9 1607 SV* oreplsv = GvSV(PL_replgv);
1df70142 1608 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1609 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1610#ifdef DEBUGGING
6136c704
AL
1611 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1612 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1613#endif
a3621e74
YO
1614
1615 GET_RE_DEBUG_FLAGS_DECL;
1616
9d4ba2ae 1617 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1618
1619 /* Be paranoid... */
1620 if (prog == NULL || startpos == NULL) {
1621 Perl_croak(aTHX_ "NULL regexp parameter");
1622 return 0;
1623 }
1624
bac06658
JH
1625 PL_reg_re = prog;
1626#ifdef DEBUGGING
1627 PL_regnarrate = DEBUG_r_TEST;
1628#endif
1629
1630 RX_MATCH_UTF8_set(prog, do_utf8);
1631
6eb5f6b9 1632 minlen = prog->minlen;
61a36c01 1633 if (strend - startpos < minlen) {
a3621e74 1634 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1635 "String too short [regexec_flags]...\n"));
1636 goto phooey;
1aa99e6b 1637 }
6eb5f6b9 1638
6eb5f6b9
JH
1639 /* Check validity of program. */
1640 if (UCHARAT(prog->program) != REG_MAGIC) {
1641 Perl_croak(aTHX_ "corrupted regexp program");
1642 }
1643
1644 PL_reg_flags = 0;
1645 PL_reg_eval_set = 0;
1646 PL_reg_maxiter = 0;
1647
1648 if (prog->reganch & ROPT_UTF8)
1649 PL_reg_flags |= RF_utf8;
1650
1651 /* Mark beginning of line for ^ and lookbehind. */
1652 PL_regbol = startpos;
1653 PL_bostr = strbeg;
1654 PL_reg_sv = sv;
1655
1656 /* Mark end of line for $ (and such) */
1657 PL_regeol = strend;
1658
1659 /* see how far we have to get to not match where we matched before */
1660 PL_regtill = startpos+minend;
1661
6eb5f6b9
JH
1662 /* If there is a "must appear" string, look for it. */
1663 s = startpos;
1664
1665 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1666 MAGIC *mg;
1667
1668 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1669 PL_reg_ganch = startpos;
1670 else if (sv && SvTYPE(sv) >= SVt_PVMG
1671 && SvMAGIC(sv)
14befaf4
DM
1672 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1673 && mg->mg_len >= 0) {
6eb5f6b9
JH
1674 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1675 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1676 if (s > PL_reg_ganch)
6eb5f6b9
JH
1677 goto phooey;
1678 s = PL_reg_ganch;
1679 }
1680 }
1681 else /* pos() not defined */
1682 PL_reg_ganch = strbeg;
1683 }
1684
a0714e2c 1685 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1686 re_scream_pos_data d;
1687
1688 d.scream_olds = &scream_olds;
1689 d.scream_pos = &scream_pos;
1690 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1691 if (!s) {
a3621e74 1692 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1693 goto phooey; /* not present */
3fa9c3d7 1694 }
6eb5f6b9
JH
1695 }
1696
a3621e74 1697 DEBUG_EXECUTE_r({
1df70142
AL
1698 const char * const s0 = UTF
1699 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1700 UNI_DISPLAY_REGEX)
1701 : prog->precomp;
1702 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1703 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1704 UNI_DISPLAY_REGEX) : startpos;
1df70142 1705 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1706 if (!PL_colorset)
1707 reginitcolors();
1708 PerlIO_printf(Perl_debug_log,
a0288114 1709 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1710 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1711 len0, len0, s0,
2a782b5b 1712 PL_colors[1],
9e55ce06 1713 len0 > 60 ? "..." : "",
2a782b5b 1714 PL_colors[0],
9e55ce06
JH
1715 (int)(len1 > 60 ? 60 : len1),
1716 s1, PL_colors[1],
1717 (len1 > 60 ? "..." : "")
2a782b5b
JH
1718 );
1719 });
6eb5f6b9
JH
1720
1721 /* Simplest case: anchored match need be tried only once. */
1722 /* [unless only anchor is BOL and multiline is set] */
1723 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1724 if (s == startpos && regtry(prog, startpos))
1725 goto got_it;
7fba1cd6 1726 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1727 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1728 {
1729 char *end;
1730
1731 if (minlen)
1732 dontbother = minlen - 1;
1aa99e6b 1733 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1734 /* for multiline we only have to try after newlines */
33b8afdf 1735 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1736 if (s == startpos)
1737 goto after_try;
1738 while (1) {
1739 if (regtry(prog, s))
1740 goto got_it;
1741 after_try:
1742 if (s >= end)
1743 goto phooey;
1744 if (prog->reganch & RE_USE_INTUIT) {
1745 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1746 if (!s)
1747 goto phooey;
1748 }
1749 else
1750 s++;
1751 }
1752 } else {
1753 if (s > startpos)
1754 s--;
1755 while (s < end) {
1756 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1757 if (regtry(prog, s))
1758 goto got_it;
1759 }
1760 }
1761 }
1762 }
1763 goto phooey;
1764 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1765 if (regtry(prog, PL_reg_ganch))
1766 goto got_it;
1767 goto phooey;
1768 }
1769
1770 /* Messy cases: unanchored match. */
33b8afdf 1771 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1772 /* we have /x+whatever/ */
1773 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1774 char ch;
bf93d4cc
GS
1775#ifdef DEBUGGING
1776 int did_match = 0;
1777#endif
33b8afdf
JH
1778 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1779 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1780 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1781
1aa99e6b 1782 if (do_utf8) {
6eb5f6b9
JH
1783 while (s < strend) {
1784 if (*s == ch) {
a3621e74 1785 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1786 if (regtry(prog, s)) goto got_it;
1787 s += UTF8SKIP(s);
1788 while (s < strend && *s == ch)
1789 s += UTF8SKIP(s);
1790 }
1791 s += UTF8SKIP(s);
1792 }
1793 }
1794 else {
1795 while (s < strend) {
1796 if (*s == ch) {
a3621e74 1797 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1798 if (regtry(prog, s)) goto got_it;
1799 s++;
1800 while (s < strend && *s == ch)
1801 s++;
1802 }
1803 s++;
1804 }
1805 }
a3621e74 1806 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1807 PerlIO_printf(Perl_debug_log,
b7953727
JH
1808 "Did not find anchored character...\n")
1809 );
6eb5f6b9 1810 }
a0714e2c
SS
1811 else if (prog->anchored_substr != NULL
1812 || prog->anchored_utf8 != NULL
1813 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1814 && prog->float_max_offset < strend - s)) {
1815 SV *must;
1816 I32 back_max;
1817 I32 back_min;
1818 char *last;
6eb5f6b9 1819 char *last1; /* Last position checked before */
bf93d4cc
GS
1820#ifdef DEBUGGING
1821 int did_match = 0;
1822#endif
33b8afdf
JH
1823 if (prog->anchored_substr || prog->anchored_utf8) {
1824 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1825 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1826 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1827 back_max = back_min = prog->anchored_offset;
1828 } else {
1829 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1830 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1831 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1832 back_max = prog->float_max_offset;
1833 back_min = prog->float_min_offset;
1834 }
1835 if (must == &PL_sv_undef)
1836 /* could not downgrade utf8 check substring, so must fail */
1837 goto phooey;
1838
1839 last = HOP3c(strend, /* Cannot start after this */
1840 -(I32)(CHR_SVLEN(must)
1841 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1842
1843 if (s > PL_bostr)
1844 last1 = HOPc(s, -1);
1845 else
1846 last1 = s - 1; /* bogus */
1847
a0288114 1848 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1849 check_substr==must. */
1850 scream_pos = -1;
1851 dontbother = end_shift;
1852 strend = HOPc(strend, -dontbother);
1853 while ( (s <= last) &&
9041c2e3 1854 ((flags & REXEC_SCREAM)
1aa99e6b 1855 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1856 end_shift, &scream_pos, 0))
1aa99e6b 1857 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1858 (unsigned char*)strend, must,
7fba1cd6 1859 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1860 /* we may be pointing at the wrong string */
1861 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1862 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1863 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1864 if (HOPc(s, -back_max) > last1) {
1865 last1 = HOPc(s, -back_min);
1866 s = HOPc(s, -back_max);
1867 }
1868 else {
52657f30 1869 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1870
1871 last1 = HOPc(s, -back_min);
52657f30 1872 s = t;
6eb5f6b9 1873 }
1aa99e6b 1874 if (do_utf8) {
6eb5f6b9
JH
1875 while (s <= last1) {
1876 if (regtry(prog, s))
1877 goto got_it;
1878 s += UTF8SKIP(s);
1879 }
1880 }
1881 else {
1882 while (s <= last1) {
1883 if (regtry(prog, s))
1884 goto got_it;
1885 s++;
1886 }
1887 }
1888 }
a3621e74 1889 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1890 PerlIO_printf(Perl_debug_log,
a0288114 1891 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1892 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1893 ? "anchored" : "floating"),
1894 PL_colors[0],
1895 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1896 SvPVX_const(must),
b7953727
JH
1897 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1898 );
6eb5f6b9
JH
1899 goto phooey;
1900 }
155aba94 1901 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1902 if (minlen) {
1903 I32 op = (U8)OP(prog->regstclass);
66e933ab 1904 /* don't bother with what can't match */
f14c76ed
RGS
1905 if (PL_regkind[op] != EXACT && op != CANY)
1906 strend = HOPc(strend, -(minlen - 1));
1907 }
a3621e74 1908 DEBUG_EXECUTE_r({
ffc61ed2 1909 SV *prop = sv_newmortal();
cfd0369c
NC
1910 const char *s0;
1911 const char *s1;
9e55ce06
JH
1912 int len0;
1913 int len1;
1914
ffc61ed2 1915 regprop(prop, c);
9e55ce06 1916 s0 = UTF ?
3f7c398e 1917 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1918 UNI_DISPLAY_REGEX) :
cfd0369c 1919 SvPVX_const(prop);
9e55ce06
JH
1920 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1921 s1 = UTF ?
c728cb41 1922 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1923 len1 = UTF ? SvCUR(dsv1) : strend - s;
1924 PerlIO_printf(Perl_debug_log,
a0288114 1925 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1926 len0, len0, s0,
1927 len1, len1, s1);
ffc61ed2 1928 });
06b5626a 1929 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1930 goto got_it;
a3621e74 1931 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1932 }
1933 else {
1934 dontbother = 0;
a0714e2c 1935 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1936 /* Trim the end. */
d6a28714 1937 char *last;
33b8afdf
JH
1938 SV* float_real;
1939
1940 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1941 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1942 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1943
1944 if (flags & REXEC_SCREAM) {
33b8afdf 1945 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1946 end_shift, &scream_pos, 1); /* last one */
1947 if (!last)
ffc61ed2 1948 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1949 /* we may be pointing at the wrong string */
1950 else if (RX_MATCH_COPIED(prog))
3f7c398e 1951 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1952 }
d6a28714
JH
1953 else {
1954 STRLEN len;
cfd0369c 1955 const char * const little = SvPV_const(float_real, len);
d6a28714 1956
33b8afdf 1957 if (SvTAIL(float_real)) {
d6a28714
JH
1958 if (memEQ(strend - len + 1, little, len - 1))
1959 last = strend - len + 1;
7fba1cd6 1960 else if (!multiline)
9041c2e3 1961 last = memEQ(strend - len, little, len)
bd61b366 1962 ? strend - len : NULL;
b8c5462f 1963 else
d6a28714
JH
1964 goto find_last;
1965 } else {
1966 find_last:
9041c2e3 1967 if (len)
d6a28714 1968 last = rninstr(s, strend, little, little + len);
b8c5462f 1969 else
a0288114 1970 last = strend; /* matching "$" */
b8c5462f 1971 }
b8c5462f 1972 }
bf93d4cc 1973 if (last == NULL) {
a3621e74 1974 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1975 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1976 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1977 goto phooey; /* Should not happen! */
1978 }
d6a28714
JH
1979 dontbother = strend - last + prog->float_min_offset;
1980 }
1981 if (minlen && (dontbother < minlen))
1982 dontbother = minlen - 1;
1983 strend -= dontbother; /* this one's always in bytes! */
1984 /* We don't know much -- general case. */
1aa99e6b 1985 if (do_utf8) {
d6a28714
JH
1986 for (;;) {
1987 if (regtry(prog, s))
1988 goto got_it;
1989 if (s >= strend)
1990 break;
b8c5462f 1991 s += UTF8SKIP(s);
d6a28714
JH
1992 };
1993 }
1994 else {
1995 do {
1996 if (regtry(prog, s))
1997 goto got_it;
1998 } while (s++ < strend);
1999 }
2000 }
2001
2002 /* Failure. */
2003 goto phooey;
2004
2005got_it:
2006 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2007
2008 if (PL_reg_eval_set) {
2009 /* Preserve the current value of $^R */
2010 if (oreplsv != GvSV(PL_replgv))
2011 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2012 restored, the value remains
2013 the same. */
4f639d21 2014 restore_pos(aTHX_ prog);
d6a28714
JH
2015 }
2016
2017 /* make sure $`, $&, $', and $digit will work later */
2018 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2019 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2020 if (flags & REXEC_COPY_STR) {
2021 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2022#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2023 if ((SvIsCOW(sv)
2024 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2025 if (DEBUG_C_TEST) {
2026 PerlIO_printf(Perl_debug_log,
2027 "Copy on write: regexp capture, type %d\n",
2028 (int) SvTYPE(sv));
2029 }
2030 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2031 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2032 assert (SvPOKp(prog->saved_copy));
2033 } else
2034#endif
2035 {
2036 RX_MATCH_COPIED_on(prog);
2037 s = savepvn(strbeg, i);
2038 prog->subbeg = s;
2039 }
d6a28714 2040 prog->sublen = i;
d6a28714
JH
2041 }
2042 else {
2043 prog->subbeg = strbeg;
2044 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2045 }
2046 }
9041c2e3 2047
d6a28714
JH
2048 return 1;
2049
2050phooey:
a3621e74 2051 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2052 PL_colors[4], PL_colors[5]));
d6a28714 2053 if (PL_reg_eval_set)
4f639d21 2054 restore_pos(aTHX_ prog);
d6a28714
JH
2055 return 0;
2056}
2057
2058/*
2059 - regtry - try match at specific point
2060 */
2061STATIC I32 /* 0 failure, 1 success */
2062S_regtry(pTHX_ regexp *prog, char *startpos)
2063{
97aff369 2064 dVAR;
d6a28714
JH
2065 register I32 *sp;
2066 register I32 *ep;
2067 CHECKPOINT lastcp;
a3621e74 2068 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2069
02db2b7b
IZ
2070#ifdef DEBUGGING
2071 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2072#endif
d6a28714
JH
2073 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2074 MAGIC *mg;
2075
2076 PL_reg_eval_set = RS_init;
a3621e74 2077 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2078 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2079 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2080 ));
e8347627 2081 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2082 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2083 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2084 SAVETMPS;
2085 /* Apparently this is not needed, judging by wantarray. */
e8347627 2086 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2087 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2088
2089 if (PL_reg_sv) {
2090 /* Make $_ available to executed code. */
2091 if (PL_reg_sv != DEFSV) {
59f00321 2092 SAVE_DEFSV;
d6a28714 2093 DEFSV = PL_reg_sv;
b8c5462f 2094 }
d6a28714 2095
9041c2e3 2096 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2097 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2098 /* prepare for quick setting of pos */
d300d9fa
NC
2099#ifdef PERL_OLD_COPY_ON_WRITE
2100 if (SvIsCOW(sv))
2101 sv_force_normal_flags(sv, 0);
2102#endif
2103 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2104 &PL_vtbl_mglob, NULL, 0);
d6a28714 2105 mg->mg_len = -1;
b8c5462f 2106 }
d6a28714
JH
2107 PL_reg_magic = mg;
2108 PL_reg_oldpos = mg->mg_len;
4f639d21 2109 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2110 }
09687e5a 2111 if (!PL_reg_curpm) {
a02a5408 2112 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2113#ifdef USE_ITHREADS
2114 {
2115 SV* repointer = newSViv(0);
577e12cc 2116 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2117 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2118 av_push(PL_regex_padav,repointer);
2119 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2120 PL_regex_pad = AvARRAY(PL_regex_padav);
2121 }
2122#endif
2123 }
aaa362c4 2124 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2125 PL_reg_oldcurpm = PL_curpm;
2126 PL_curpm = PL_reg_curpm;
2127 if (RX_MATCH_COPIED(prog)) {
2128 /* Here is a serious problem: we cannot rewrite subbeg,
2129 since it may be needed if this match fails. Thus
2130 $` inside (?{}) could fail... */
2131 PL_reg_oldsaved = prog->subbeg;
2132 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2133#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2134 PL_nrs = prog->saved_copy;
2135#endif
d6a28714
JH
2136 RX_MATCH_COPIED_off(prog);
2137 }
2138 else
bd61b366 2139 PL_reg_oldsaved = NULL;
d6a28714
JH
2140 prog->subbeg = PL_bostr;
2141 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2142 }
973dddac 2143 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2144 PL_reginput = startpos;
2145 PL_regstartp = prog->startp;
2146 PL_regendp = prog->endp;
2147 PL_reglastparen = &prog->lastparen;
a01268b5 2148 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2149 prog->lastparen = 0;
03994de8 2150 prog->lastcloseparen = 0;
d6a28714 2151 PL_regsize = 0;
a3621e74 2152 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2153 if (PL_reg_start_tmpl <= prog->nparens) {
2154 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2155 if(PL_reg_start_tmp)
2156 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2157 else
a02a5408 2158 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2159 }
2160
2161 /* XXXX What this code is doing here?!!! There should be no need
2162 to do this again and again, PL_reglastparen should take care of
3dd2943c 2163 this! --ilya*/
dafc8851
JH
2164
2165 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2166 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2167 * PL_reglastparen), is not needed at all by the test suite
2168 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2169 * enough, for building DynaLoader, or otherwise this
2170 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2171 * will happen. Meanwhile, this code *is* needed for the
2172 * above-mentioned test suite tests to succeed. The common theme
2173 * on those tests seems to be returning null fields from matches.
2174 * --jhi */
dafc8851 2175#if 1
d6a28714
JH
2176 sp = prog->startp;
2177 ep = prog->endp;
2178 if (prog->nparens) {
097eb12c 2179 register I32 i;
eb160463 2180 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2181 *++sp = -1;
2182 *++ep = -1;
2183 }
2184 }
dafc8851 2185#endif
02db2b7b 2186 REGCP_SET(lastcp);
4f639d21 2187 if (regmatch(prog, prog->program + 1)) {
d6a28714
JH
2188 prog->endp[0] = PL_reginput - PL_bostr;
2189 return 1;
2190 }
02db2b7b 2191 REGCP_UNWIND(lastcp);
d6a28714
JH
2192 return 0;
2193}
2194
02db2b7b
IZ
2195#define RE_UNWIND_BRANCH 1
2196#define RE_UNWIND_BRANCHJ 2
2197
2198union re_unwind_t;
2199
2200typedef struct { /* XX: makes sense to enlarge it... */
2201 I32 type;
2202 I32 prev;
2203 CHECKPOINT lastcp;
2204} re_unwind_generic_t;
2205
2206typedef struct {
2207 I32 type;
2208 I32 prev;
2209 CHECKPOINT lastcp;
2210 I32 lastparen;
2211 regnode *next;
2212 char *locinput;
2213 I32 nextchr;
3a2830be 2214 int minmod;
02db2b7b
IZ
2215#ifdef DEBUGGING
2216 int regindent;
2217#endif
2218} re_unwind_branch_t;
2219
2220typedef union re_unwind_t {
2221 I32 type;
2222 re_unwind_generic_t generic;
2223 re_unwind_branch_t branch;
2224} re_unwind_t;
2225
8ba1375e
MJD
2226#define sayYES goto yes
2227#define sayNO goto no
e0f9d4a8 2228#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2229#define sayYES_FINAL goto yes_final
2230#define sayYES_LOUD goto yes_loud
2231#define sayNO_FINAL goto no_final
2232#define sayNO_SILENT goto do_no
2233#define saySAME(x) if (x) goto yes; else goto no
2234
3ab3c9b4
HS
2235#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2236#define POSCACHE_SEEN 1 /* we know what we're caching */
2237#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2238#define CACHEsayYES STMT_START { \
d8319b27 2239 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2240 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2241 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2242 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2243 /* cache records failure, but this is success */ \
2244 DEBUG_r( \
2245 PerlIO_printf(Perl_debug_log, \
2246 "%*s (remove success from failure cache)\n", \
2247 REPORT_CODE_OFF+PL_regindent*2, "") \
2248 ); \
d8319b27 2249 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2250 } \
2251 } \
2252 sayYES; \
2253} STMT_END
2254#define CACHEsayNO STMT_START { \
d8319b27 2255 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2256 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2257 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2258 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2259 /* cache records success, but this is failure */ \
2260 DEBUG_r( \
2261 PerlIO_printf(Perl_debug_log, \
2262 "%*s (remove failure from success cache)\n", \
2263 REPORT_CODE_OFF+PL_regindent*2, "") \
2264 ); \
d8319b27 2265 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2266 } \
2267 } \
2268 sayNO; \
2269} STMT_END
2270
a3621e74
YO
2271/* this is used to determine how far from the left messages like
2272 'failed...' are printed. Currently 29 makes these messages line
2273 up with the opcode they refer to. Earlier perls used 25 which
2274 left these messages outdented making reviewing a debug output
2275 quite difficult.
2276*/
2277#define REPORT_CODE_OFF 29
2278
2279
2280/* Make sure there is a test for this +1 options in re_tests */
2281#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2282
5d9a96ca
DM
2283/* grab a new slab and return the first slot in it */
2284
2285STATIC regmatch_state *
2286S_push_slab(pTHX)
2287{
2288 regmatch_slab *s = PL_regmatch_slab->next;
2289 if (!s) {
2290 Newx(s, 1, regmatch_slab);
2291 s->prev = PL_regmatch_slab;
2292 s->next = NULL;
2293 PL_regmatch_slab->next = s;
2294 }
2295 PL_regmatch_slab = s;
2296 return &s->states[0];
2297}
5b47454d 2298
95b24440
DM
2299/* simulate a recursive call to regmatch */
2300
2301#define REGMATCH(ns, where) \
5d9a96ca
DM
2302 st->scan = scan; \
2303 scan = (ns); \
2304 st->resume_state = resume_##where; \
95b24440
DM
2305 goto start_recurse; \
2306 resume_point_##where:
2307
aa283a38
DM
2308
2309/* push a new regex state. Set newst to point to it */
2310
2311#define PUSH_STATE(newst, resume) \
2312 depth++; \
2313 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2314 st->scan = scan; \
2315 st->next = next; \
2316 st->n = n; \
2317 st->locinput = locinput; \
2318 st->resume_state = resume; \
2319 newst = st+1; \
2320 if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
2321 newst = S_push_slab(aTHX); \
2322 PL_regmatch_state = newst; \
2323 newst->cc = 0; \
2324 newst->minmod = 0; \
2325 newst->sw = 0; \
2326 newst->logical = 0; \
2327 newst->unwind = 0; \
2328 locinput = PL_reginput; \
2329 nextchr = UCHARAT(locinput);
2330
2331#define POP_STATE \
2332 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2333 depth--; \
2334 st--; \
2335 if (st < &PL_regmatch_slab->states[0]) { \
2336 PL_regmatch_slab = PL_regmatch_slab->prev; \
2337 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
2338 } \
2339 PL_regmatch_state = st; \
2340 scan = st->scan; \
2341 next = st->next; \
2342 n = st->n; \
2343 locinput = st->locinput; \
2344 nextchr = UCHARAT(locinput);
2345
d6a28714
JH
2346/*
2347 - regmatch - main matching routine
2348 *
2349 * Conceptually the strategy is simple: check to see whether the current
2350 * node matches, call self recursively to see whether the rest matches,
2351 * and then act accordingly. In practice we make some effort to avoid
2352 * recursion, in particular by going through "ordinary" nodes (that don't
2353 * need to know whether the rest of the match failed) by a loop instead of
2354 * by recursion.
2355 */
2356/* [lwall] I've hoisted the register declarations to the outer block in order to
2357 * maybe save a little bit of pushing and popping on the stack. It also takes
2358 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2359 *
2360 * This function used to be heavily recursive, but since this had the
2361 * effect of blowing the CPU stack on complex regexes, it has been
2362 * restructured to be iterative, and to save state onto the heap rather
2363 * than the stack. Essentially whereever regmatch() used to be called, it
2364 * pushes the current state, notes where to return, then jumps back into
2365 * the main loop.
2366 *
2367 * Originally the structure of this function used to look something like
2368
2369 S_regmatch() {
2370 int a = 1, b = 2;
2371 ...
2372 while (scan != NULL) {
5d9a96ca 2373 a++; // do stuff with a and b
95b24440
DM
2374 ...
2375 switch (OP(scan)) {
2376 case FOO: {
2377 int local = 3;
2378 ...
2379 if (regmatch(...)) // recurse
2380 goto yes;
2381 }
2382 ...
2383 }
2384 }
2385 yes:
2386 return 1;
2387 }
2388
2389 * Now it looks something like this:
2390
5d9a96ca 2391 typedef struct {
95b24440
DM
2392 int a, b, local;
2393 int resume_state;
5d9a96ca 2394 } regmatch_state;
95b24440
DM
2395
2396 S_regmatch() {
5d9a96ca
DM
2397 regmatch_state *st = new();
2398 int depth=0;
2399 st->a++; // do stuff with a and b
95b24440
DM
2400 ...
2401 while (scan != NULL) {
2402 ...
2403 switch (OP(scan)) {
2404 case FOO: {
5d9a96ca 2405 st->local = 3;
95b24440 2406 ...
5d9a96ca
DM
2407 st->scan = scan;
2408 scan = ...;
2409 st->resume_state = resume_FOO;
2410 goto start_recurse; // recurse
95b24440 2411
5d9a96ca
DM
2412 resume_point_FOO:
2413 if (result)
95b24440
DM
2414 goto yes;
2415 }
2416 ...
2417 }
5d9a96ca
DM
2418 start_recurse:
2419 st = new(); push a new state
2420 st->a = 1; st->b = 2;
2421 depth++;
95b24440 2422 }
5d9a96ca 2423 yes:
95b24440 2424 result = 1;
5d9a96ca
DM
2425 if (depth--) {
2426 st = pop();
95b24440
DM
2427 switch (resume_state) {
2428 case resume_FOO:
2429 goto resume_point_FOO;
2430 ...
2431 }
2432 }
2433 return result
2434 }
2435
2436 * WARNING: this means that any line in this function that contains a
2437 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2438 * regmatch() using gotos instead. Thus the values of any local variables
2439 * not saved in the regmatch_state structure will have been lost when
2440 * execution resumes on the next line .
5d9a96ca
DM
2441 *
2442 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2443 * PL_regmatch_state always points to the currently active state, and
2444 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2445 * The first time regmatch is called, the first slab is allocated, and is
2446 * never freed until interpreter desctruction. When the slab is full,
2447 * a new one is allocated chained to the end. At exit from regmatch, slabs
2448 * allocated since entry are freed.
d6a28714 2449 */
95b24440
DM
2450
2451
d6a28714 2452STATIC I32 /* 0 failure, 1 success */
4f639d21 2453S_regmatch(pTHX_ regexp *rex, regnode *prog)
d6a28714 2454{
27da23d5 2455 dVAR;
95b24440 2456 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2457 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2458
5d9a96ca
DM
2459 regmatch_slab *orig_slab;
2460 regmatch_state *orig_state;
a3621e74 2461
5d9a96ca
DM
2462 /* the current state. This is a cached copy of PL_regmatch_state */
2463 register regmatch_state *st;
95b24440 2464
5d9a96ca
DM
2465 /* cache heavy used fields of st in registers */
2466 register regnode *scan;
2467 register regnode *next;
2468 register I32 n = 0; /* initialize to shut up compiler warning */
2469 register char *locinput = PL_reginput;
95b24440 2470
5d9a96ca
DM
2471 /* these variables are NOT saved during a recusive RFEGMATCH: */
2472 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2473 bool result; /* return value of S_regmatch */
2474 regnode *inner; /* Next node in internal branch. */
2475 int depth = 0; /* depth of recursion */
aa283a38
DM
2476 regmatch_state *newst; /* when pushing a state, this is the new one */
2477 regmatch_state *cur_eval = NULL; /* most recent (??{}) state */
95b24440
DM
2478
2479#ifdef DEBUGGING
ab74612d 2480 SV *re_debug_flags = NULL;
a3621e74 2481 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2482 PL_regindent++;
2483#endif
2484
5d9a96ca
DM
2485 /* on first ever call to regmatch, allocate first slab */
2486 if (!PL_regmatch_slab) {
2487 Newx(PL_regmatch_slab, 1, regmatch_slab);
2488 PL_regmatch_slab->prev = NULL;
2489 PL_regmatch_slab->next = NULL;
2490 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2491 }
2492
2493 /* remember current high-water mark for exit */
2494 /* XXX this should be done with SAVE* instead */
2495 orig_slab = PL_regmatch_slab;
2496 orig_state = PL_regmatch_state;
2497
2498 /* grab next free state slot */
2499 st = ++PL_regmatch_state;
2500 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2501 st = PL_regmatch_state = S_push_slab(aTHX);
2502
2503 st->minmod = 0;
2504 st->sw = 0;
2505 st->logical = 0;
2506 st->unwind = 0;
2507 st->cc = NULL;
d6a28714
JH
2508 /* Note that nextchr is a byte even in UTF */
2509 nextchr = UCHARAT(locinput);
2510 scan = prog;
2511 while (scan != NULL) {
8ba1375e 2512
a3621e74 2513 DEBUG_EXECUTE_r( {
6136c704 2514 SV * const prop = sv_newmortal();
1df70142
AL
2515 const int docolor = *PL_colors[0];
2516 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2517 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2518 /* The part of the string before starttry has one color
2519 (pref0_len chars), between starttry and current
2520 position another one (pref_len - pref0_len chars),
2521 after the current position the third one.
2522 We assume that pref0_len <= pref_len, otherwise we
2523 decrease pref0_len. */
9041c2e3 2524 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2525 ? (5 + taill) - l : locinput - PL_bostr;
2526 int pref0_len;
d6a28714 2527
df1ffd02 2528 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2529 pref_len++;
2530 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2531 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2532 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2533 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2534 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2535 l--;
d6a28714
JH
2536 if (pref0_len < 0)
2537 pref0_len = 0;
2538 if (pref0_len > pref_len)
2539 pref0_len = pref_len;
2540 regprop(prop, scan);
2a782b5b 2541 {
1df70142 2542 const char * const s0 =
f14c76ed 2543 do_utf8 && OP(scan) != CANY ?
95b24440 2544 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2545 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2546 locinput - pref_len;
1df70142
AL
2547 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2548 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2549 pv_uni_display(PERL_DEBUG_PAD(1),
2550 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2551 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2552 locinput - pref_len + pref0_len;
1df70142
AL
2553 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2554 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2555 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2556 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2557 locinput;
1df70142 2558 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2559 PerlIO_printf(Perl_debug_log,
2560 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2561 (IV)(locinput - PL_bostr),
2562 PL_colors[4],
2563 len0, s0,
2564 PL_colors[5],
2565 PL_colors[2],
2566 len1, s1,
2567 PL_colors[3],
2568 (docolor ? "" : "> <"),
2569 PL_colors[0],
2570 len2, s2,
2571 PL_colors[1],
2572 15 - l - pref_len + 1,
2573 "",
4f639d21 2574 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2575 SvPVX_const(prop));
2a782b5b
JH
2576 }
2577 });
d6a28714
JH
2578
2579 next = scan + NEXT_OFF(scan);
2580 if (next == scan)
2581 next = NULL;
2582
2583 switch (OP(scan)) {
2584 case BOL:
7fba1cd6 2585 if (locinput == PL_bostr)
d6a28714
JH
2586 {
2587 /* regtill = regbol; */
b8c5462f
JH
2588 break;
2589 }
d6a28714
JH
2590 sayNO;
2591 case MBOL:
12d33761
HS
2592 if (locinput == PL_bostr ||
2593 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2594 {
b8c5462f
JH
2595 break;
2596 }
d6a28714
JH
2597 sayNO;
2598 case SBOL:
c2a73568 2599 if (locinput == PL_bostr)
b8c5462f 2600 break;
d6a28714
JH
2601 sayNO;
2602 case GPOS:
2603 if (locinput == PL_reg_ganch)
2604 break;
2605 sayNO;
2606 case EOL:
d6a28714
JH
2607 goto seol;
2608 case MEOL:
d6a28714 2609 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2610 sayNO;
b8c5462f 2611 break;
d6a28714
JH
2612 case SEOL:
2613 seol:
2614 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2615 sayNO;
d6a28714 2616 if (PL_regeol - locinput > 1)
b8c5462f 2617 sayNO;
b8c5462f 2618 break;
d6a28714
JH
2619 case EOS:
2620 if (PL_regeol != locinput)
b8c5462f 2621 sayNO;
d6a28714 2622 break;
ffc61ed2 2623 case SANY:
d6a28714 2624 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2625 sayNO;
f33976b4
DB
2626 if (do_utf8) {
2627 locinput += PL_utf8skip[nextchr];
2628 if (locinput > PL_regeol)
2629 sayNO;
2630 nextchr = UCHARAT(locinput);
2631 }
2632 else
2633 nextchr = UCHARAT(++locinput);
2634 break;
2635 case CANY:
2636 if (!nextchr && locinput >= PL_regeol)
2637 sayNO;
b8c5462f 2638 nextchr = UCHARAT(++locinput);
a0d0e21e 2639 break;
ffc61ed2 2640 case REG_ANY:
1aa99e6b
IH
2641 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2642 sayNO;
2643 if (do_utf8) {
b8c5462f 2644 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2645 if (locinput > PL_regeol)
2646 sayNO;
a0ed51b3 2647 nextchr = UCHARAT(locinput);
a0ed51b3 2648 }
1aa99e6b
IH
2649 else
2650 nextchr = UCHARAT(++locinput);
a0ed51b3 2651 break;
a3621e74
YO
2652
2653
2654
2655 /*
2656 traverse the TRIE keeping track of all accepting states
2657 we transition through until we get to a failing node.
2658
a3621e74
YO
2659
2660 */
5b47454d 2661 case TRIE:
a3621e74
YO
2662 case TRIEF:
2663 case TRIEFL:
2664 {
a3621e74
YO
2665 U8 *uc = ( U8* )locinput;
2666 U32 state = 1;
2667 U16 charid = 0;
2668 U32 base = 0;
2669 UV uvc = 0;
2670 STRLEN len = 0;
2671 STRLEN foldlen = 0;
a3621e74
YO
2672 U8 *uscan = (U8*)NULL;
2673 STRLEN bufflen=0;
95b24440 2674 SV *sv_accept_buff = NULL;
5b47454d
DM
2675 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2676 trie_type = do_utf8 ?
2677 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2678 : trie_plain;
2679
7087a21c
NC
2680 /* what trie are we using right now */
2681 reg_trie_data *trie
4f639d21 2682 = (reg_trie_data*)PL_reg_re->data->data[ ARG( scan ) ];
d8319b27 2683 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2684 result = 0;
a3621e74
YO
2685
2686 while ( state && uc <= (U8*)PL_regeol ) {
2687
5b47454d 2688 if (trie->states[ state ].wordnum) {
d8319b27 2689 if (!st->u.trie.accepted ) {
5b47454d
DM
2690 ENTER;
2691 SAVETMPS;
2692 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2693 sv_accept_buff=newSV(bufflen *
2694 sizeof(reg_trie_accepted) - 1);
2695 SvCUR_set(sv_accept_buff,
2696 sizeof(reg_trie_accepted));
2697 SvPOK_on(sv_accept_buff);
2698 sv_2mortal(sv_accept_buff);
d8319b27 2699 st->u.trie.accept_buff =
5b47454d
DM
2700 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2701 }
2702 else {
d8319b27 2703 if (st->u.trie.accepted >= bufflen) {
5b47454d 2704 bufflen *= 2;
d8319b27 2705 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2706 SvGROW(sv_accept_buff,
2707 bufflen * sizeof(reg_trie_accepted));
2708 }
2709 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2710 + sizeof(reg_trie_accepted));
2711 }
d8319b27
DM
2712 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2713 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2714 ++st->u.trie.accepted;
5b47454d 2715 }
a3621e74
YO
2716
2717 base = trie->states[ state ].trans.base;
2718
2719 DEBUG_TRIE_EXECUTE_r(
2720 PerlIO_printf( Perl_debug_log,
e4584336 2721 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2722 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2723 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2724 );
2725
2726 if ( base ) {
5b47454d
DM
2727 switch (trie_type) {
2728 case trie_uft8_fold:
a3621e74
YO
2729 if ( foldlen>0 ) {
2730 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2731 foldlen -= len;
2732 uscan += len;
2733 len=0;
2734 } else {
1df70142 2735 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2736 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2737 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2738 foldlen -= UNISKIP( uvc );
2739 uscan = foldbuf + UNISKIP( uvc );
2740 }
5b47454d
DM
2741 break;
2742 case trie_utf8:
2743 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2744 &len, uniflags );
2745 break;
2746 case trie_plain:
e4584336 2747 uvc = (UV)*uc;
a3621e74
YO
2748 len = 1;
2749 }
2750
5b47454d
DM
2751 if (uvc < 256) {
2752 charid = trie->charmap[ uvc ];
2753 }
2754 else {
2755 charid = 0;
2756 if (trie->widecharmap) {
2757 SV** svpp = (SV**)NULL;
2758 svpp = hv_fetch(trie->widecharmap,
2759 (char*)&uvc, sizeof(UV), 0);
2760 if (svpp)
2761 charid = (U16)SvIV(*svpp);
2762 }
2763 }
a3621e74 2764
5b47454d
DM
2765 if (charid &&
2766 (base + charid > trie->uniquecharcount )
2767 && (base + charid - 1 - trie->uniquecharcount
2768 < trie->lasttrans)
2769 && trie->trans[base + charid - 1 -
2770 trie->uniquecharcount].check == state)
2771 {
2772 state = trie->trans[base + charid - 1 -
2773 trie->uniquecharcount ].next;
2774 }
2775 else {
2776 state = 0;
2777 }
2778 uc += len;
2779
2780 }
2781 else {
a3621e74
YO
2782 state = 0;
2783 }
2784 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2785 PerlIO_printf( Perl_debug_log,
2786 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2787 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2788 );
2789 }
d8319b27 2790 if (!st->u.trie.accepted )
a3621e74 2791 sayNO;
a3621e74
YO
2792
2793 /*
2794 There was at least one accepting state that we
2795 transitioned through. Presumably the number of accepting
2796 states is going to be low, typically one or two. So we
2797 simply scan through to find the one with lowest wordnum.
2798 Once we find it, we swap the last state into its place
2799 and decrement the size. We then try to match the rest of
2800 the pattern at the point where the word ends, if we
2801 succeed then we end the loop, otherwise the loop
2802 eventually terminates once all of the accepting states
2803 have been tried.
2804 */
a3621e74 2805
d8319b27 2806 if ( st->u.trie.accepted == 1 ) {
a3621e74 2807 DEBUG_EXECUTE_r({
097eb12c 2808 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2809 PerlIO_printf( Perl_debug_log,
2810 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2811 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2812 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2813 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2814 PL_colors[5] );
2815 });
d8319b27 2816 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2817 /* in this case we free tmps/leave before we call regmatch
2818 as we wont be using accept_buff again. */
2819 FREETMPS;
2820 LEAVE;
95b24440
DM
2821 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2822 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2823 } else {
2824 DEBUG_EXECUTE_r(
e4584336 2825 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2826 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2827 PL_colors[5] );
2828 );
d8319b27 2829 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2830 U32 best = 0;
2831 U32 cur;
d8319b27 2832 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2833 DEBUG_TRIE_EXECUTE_r(
2834 PerlIO_printf( Perl_debug_log,
2835 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2836 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2837 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2838 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2839 );
a3621e74 2840
d8319b27
DM
2841 if (st->u.trie.accept_buff[cur].wordnum <
2842 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2843 best = cur;
a3621e74
YO
2844 }
2845 DEBUG_EXECUTE_r({
87830502
DM
2846 reg_trie_data * const trie = (reg_trie_data*)
2847 PL_reg_re->data->data[ARG(scan)];
d8319b27 2848 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2849 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2850 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2851 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2852 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2853 PL_colors[5] );
2854 });
d8319b27
DM
2855 if ( best<st->u.trie.accepted ) {
2856 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2857 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2858 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2859 best = st->u.trie.accepted;
a3621e74 2860 }
d8319b27 2861 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2862
2863 /*
2864 as far as I can tell we only need the SAVETMPS/FREETMPS
2865 for re's with EVAL in them but I'm leaving them in for
2866 all until I can be sure.
2867 */
2868 SAVETMPS;
95b24440
DM
2869 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2870 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2871 FREETMPS;
2872 }
2873 FREETMPS;
2874 LEAVE;
2875 }
2876
95b24440 2877 if (result) {
a3621e74
YO
2878 sayYES;
2879 } else {
2880 sayNO;
2881 }
2882 }
2883 /* unreached codepoint */
95b24440
DM
2884 case EXACT: {
2885 char *s = STRING(scan);
5d9a96ca 2886 st->ln = STR_LEN(scan);
eb160463 2887 if (do_utf8 != UTF) {
bc517b45 2888 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2889 char *l = locinput;
5d9a96ca 2890 const char *e = s + st->ln;
a72c7584 2891
5ff6fc6d
JH
2892 if (do_utf8) {
2893 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2894 while (s < e) {
a3b680e6 2895 STRLEN ulen;
1aa99e6b 2896 if (l >= PL_regeol)
5ff6fc6d
JH
2897 sayNO;
2898 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2899 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2900 uniflags))
5ff6fc6d 2901 sayNO;
bc517b45 2902 l += ulen;
5ff6fc6d 2903 s ++;
1aa99e6b 2904 }
5ff6fc6d
JH
2905 }
2906 else {
2907 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2908 while (s < e) {
a3b680e6 2909 STRLEN ulen;
1aa99e6b
IH
2910 if (l >= PL_regeol)
2911 sayNO;
5ff6fc6d 2912 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2913 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2914 uniflags))
1aa99e6b 2915 sayNO;
bc517b45 2916 s += ulen;
a72c7584 2917 l ++;
1aa99e6b 2918 }
5ff6fc6d 2919 }
1aa99e6b
IH
2920 locinput = l;
2921 nextchr = UCHARAT(locinput);
2922 break;
2923 }
bc517b45 2924 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2925 /* Inline the first character, for speed. */
2926 if (UCHARAT(s) != nextchr)
2927 sayNO;
5d9a96ca 2928 if (PL_regeol - locinput < st->ln)
d6a28714 2929 sayNO;
5d9a96ca 2930 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2931 sayNO;
5d9a96ca 2932 locinput += st->ln;
d6a28714
JH
2933 nextchr = UCHARAT(locinput);
2934 break;
95b24440 2935 }
d6a28714 2936 case EXACTFL:
b8c5462f
JH
2937 PL_reg_flags |= RF_tainted;
2938 /* FALL THROUGH */
95b24440
DM
2939 case EXACTF: {
2940 char *s = STRING(scan);
5d9a96ca 2941 st->ln = STR_LEN(scan);
d6a28714 2942
d07ddd77
JH
2943 if (do_utf8 || UTF) {
2944 /* Either target or the pattern are utf8. */
d6a28714 2945 char *l = locinput;
d07ddd77 2946 char *e = PL_regeol;
bc517b45 2947
5d9a96ca 2948 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2949 l, &e, 0, do_utf8)) {
5486206c
JH
2950 /* One more case for the sharp s:
2951 * pack("U0U*", 0xDF) =~ /ss/i,
2952 * the 0xC3 0x9F are the UTF-8
2953 * byte sequence for the U+00DF. */
2954 if (!(do_utf8 &&
2955 toLOWER(s[0]) == 's' &&
5d9a96ca 2956 st->ln >= 2 &&
5486206c
JH
2957 toLOWER(s[1]) == 's' &&
2958 (U8)l[0] == 0xC3 &&
2959 e - l >= 2 &&
2960 (U8)l[1] == 0x9F))
2961 sayNO;
2962 }
d07ddd77
JH
2963 locinput = e;
2964 nextchr = UCHARAT(locinput);
2965 break;
a0ed51b3 2966 }
d6a28714 2967
bc517b45
JH
2968 /* Neither the target and the pattern are utf8. */
2969
d6a28714
JH
2970 /* Inline the first character, for speed. */
2971 if (UCHARAT(s) != nextchr &&
2972 UCHARAT(s) != ((OP(scan) == EXACTF)
2973 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2974 sayNO;
5d9a96ca 2975 if (PL_regeol - locinput < st->ln)
b8c5462f 2976 sayNO;
5d9a96ca
DM
2977 if (st->ln > 1 && (OP(scan) == EXACTF
2978 ? ibcmp(s, locinput, st->ln)
2979 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2980 sayNO;
5d9a96ca 2981 locinput += st->ln;
d6a28714 2982 nextchr = UCHARAT(locinput);
a0d0e21e 2983 break;
95b24440 2984 }
d6a28714 2985 case ANYOF:
ffc61ed2 2986 if (do_utf8) {
9e55ce06
JH
2987 STRLEN inclasslen = PL_regeol - locinput;
2988
ba7b4546 2989 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2990 sayNO_ANYOF;
ffc61ed2
JH
2991 if (locinput >= PL_regeol)
2992 sayNO;
0f0076b4 2993 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2994 nextchr = UCHARAT(locinput);
e0f9d4a8 2995 break;
ffc61ed2
JH
2996 }
2997 else {
2998 if (nextchr < 0)
2999 nextchr = UCHARAT(locinput);
7d3e948e 3000 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 3001 sayNO_ANYOF;
ffc61ed2
JH
3002 if (!nextchr && locinput >= PL_regeol)
3003 sayNO;
3004 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3005 break;
3006 }
3007 no_anyof:
3008 /* If we might have the case of the German sharp s
3009 * in a casefolding Unicode character class. */
3010
ebc501f0
JH
3011 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3012 locinput += SHARP_S_SKIP;
e0f9d4a8 3013 nextchr = UCHARAT(locinput);
ffc61ed2 3014 }
e0f9d4a8
JH
3015 else
3016 sayNO;
b8c5462f 3017 break;
d6a28714 3018 case ALNUML:
b8c5462f
JH
3019 PL_reg_flags |= RF_tainted;
3020 /* FALL THROUGH */
d6a28714 3021 case ALNUM:
b8c5462f 3022 if (!nextchr)
4633a7c4 3023 sayNO;
ffc61ed2 3024 if (do_utf8) {
1a4fad37 3025 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3026 if (!(OP(scan) == ALNUM
3568d838 3027 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3028 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3029 {
3030 sayNO;
a0ed51b3 3031 }
b8c5462f 3032 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3033 nextchr = UCHARAT(locinput);
3034 break;
3035 }
ffc61ed2 3036 if (!(OP(scan) == ALNUM
d6a28714 3037 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3038 sayNO;
b8c5462f 3039 nextchr = UCHARAT(++locinput);
a0d0e21e 3040 break;
d6a28714 3041 case NALNUML:
b8c5462f
JH
3042 PL_reg_flags |= RF_tainted;
3043 /* FALL THROUGH */
d6a28714
JH
3044 case NALNUM:
3045 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3046 sayNO;
ffc61ed2 3047 if (do_utf8) {
1a4fad37 3048 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3049 if (OP(scan) == NALNUM
3568d838 3050 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3051 : isALNUM_LC_utf8((U8*)locinput))
3052 {
b8c5462f 3053 sayNO;
d6a28714 3054 }
b8c5462f
JH
3055 locinput += PL_utf8skip[nextchr];
3056 nextchr = UCHARAT(locinput);
3057 break;
3058 }
ffc61ed2 3059 if (OP(scan) == NALNUM
d6a28714 3060 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3061 sayNO;
76e3520e 3062 nextchr = UCHARAT(++locinput);
a0d0e21e 3063 break;
d6a28714
JH
3064 case BOUNDL:
3065 case NBOUNDL:
3280af22 3066 PL_reg_flags |= RF_tainted;
bbce6d69 3067 /* FALL THROUGH */
d6a28714
JH
3068 case BOUND:
3069 case NBOUND:
3070 /* was last char in word? */
ffc61ed2 3071 if (do_utf8) {
12d33761 3072 if (locinput == PL_bostr)
5d9a96ca 3073 st->ln = '\n';
ffc61ed2 3074 else {
a3b680e6 3075 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3076
4ad0818d 3077 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3078 }
3079 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3080 st->ln = isALNUM_uni(st->ln);
1a4fad37 3081 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3082 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3083 }
3084 else {
5d9a96ca 3085 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3086 n = isALNUM_LC_utf8((U8*)locinput);
3087 }
a0ed51b3 3088 }
d6a28714 3089 else {
5d9a96ca 3090 st->ln = (locinput != PL_bostr) ?
12d33761 3091 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3092 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3093 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3094 n = isALNUM(nextchr);
3095 }
3096 else {
5d9a96ca 3097 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3098 n = isALNUM_LC(nextchr);
3099 }
d6a28714 3100 }
5d9a96ca 3101 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3102 OP(scan) == BOUNDL))
3103 sayNO;
a0ed51b3 3104 break;
d6a28714 3105 case SPACEL:
3280af22 3106 PL_reg_flags |= RF_tainted;
bbce6d69 3107 /* FALL THROUGH */
d6a28714 3108 case SPACE:
9442cb0e 3109 if (!nextchr)
4633a7c4 3110 sayNO;
1aa99e6b 3111 if (do_utf8) {
fd400ab9 3112 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3113 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3114 if (!(OP(scan) == SPACE
3568d838 3115 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3116 : isSPACE_LC_utf8((U8*)locinput)))
3117 {
3118 sayNO;
3119 }
3120 locinput += PL_utf8skip[nextchr];
3121 nextchr = UCHARAT(locinput);
3122 break;
d6a28714 3123 }
ffc61ed2
JH
3124 if (!(OP(scan) == SPACE
3125 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3126 sayNO;
3127 nextchr = UCHARAT(++locinput);
3128 }
3129 else {
3130 if (!(OP(scan) == SPACE
3131 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3132 sayNO;
3133 nextchr = UCHARAT(++locinput);
a0ed51b3 3134 }
a0ed51b3 3135 break;
d6a28714 3136 case NSPACEL:
3280af22 3137 PL_reg_flags |= RF_tainted;
bbce6d69 3138 /* FALL THROUGH */
d6a28714 3139 case NSPACE:
9442cb0e 3140 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3141 sayNO;
1aa99e6b 3142 if (do_utf8) {
1a4fad37 3143 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3144 if (OP(scan) == NSPACE
3568d838 3145 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3146 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3147 {
3148 sayNO;
3149 }
3150 locinput += PL_utf8skip[nextchr];
3151 nextchr = UCHARAT(locinput);
3152 break;
a0ed51b3 3153 }
ffc61ed2 3154 if (OP(scan) == NSPACE
d6a28714 3155 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3156 sayNO;
76e3520e 3157 nextchr = UCHARAT(++locinput);
a0d0e21e 3158 break;
d6a28714 3159 case DIGITL:
a0ed51b3
LW
3160 PL_reg_flags |= RF_tainted;
3161 /* FALL THROUGH */
d6a28714 3162 case DIGIT:
9442cb0e 3163 if (!nextchr)
a0ed51b3 3164 sayNO;
1aa99e6b 3165 if (do_utf8) {
1a4fad37 3166 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3167 if (!(OP(scan) == DIGIT
3568d838 3168 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3169 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3170 {
a0ed51b3 3171 sayNO;
dfe13c55 3172 }
6f06b55f 3173 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3174 nextchr = UCHARAT(locinput);
3175 break;
3176 }
ffc61ed2 3177 if (!(OP(scan) == DIGIT
9442cb0e 3178 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3179 sayNO;
3180 nextchr = UCHARAT(++locinput);
3181 break;
d6a28714 3182 case NDIGITL:
b8c5462f
JH
3183 PL_reg_flags |= RF_tainted;
3184 /* FALL THROUGH */
d6a28714 3185 case NDIGIT:
9442cb0e 3186 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3187 sayNO;
1aa99e6b 3188 if (do_utf8) {
1a4fad37 3189 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3190 if (OP(scan) == NDIGIT
3568d838 3191 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3192 : isDIGIT_LC_utf8((U8*)locinput))
3193 {
a0ed51b3 3194 sayNO;
9442cb0e 3195 }
6f06b55f 3196 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3197 nextchr = UCHARAT(locinput);
3198 break;
3199 }
ffc61ed2 3200 if (OP(scan) == NDIGIT
9442cb0e 3201 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3202 sayNO;
3203 nextchr = UCHARAT(++locinput);
3204 break;
3205 case CLUMP:
b7c83a7e 3206 if (locinput >= PL_regeol)
a0ed51b3 3207 sayNO;
b7c83a7e 3208 if (do_utf8) {
1a4fad37 3209 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3210 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3211 sayNO;
3212 locinput += PL_utf8skip[nextchr];
3213 while (locinput < PL_regeol &&
3214 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3215 locinput += UTF8SKIP(locinput);
3216 if (locinput > PL_regeol)
3217 sayNO;
eb08e2da
JH
3218 }
3219 else
3220 locinput++;
a0ed51b3
LW
3221 nextchr = UCHARAT(locinput);
3222 break;
c8756f30 3223 case REFFL:
3280af22 3224 PL_reg_flags |= RF_tainted;
c8756f30 3225 /* FALL THROUGH */
c277df42 3226 case REF:
95b24440
DM
3227 case REFF: {
3228 char *s;
c277df42 3229 n = ARG(scan); /* which paren pair */
5d9a96ca 3230 st->ln = PL_regstartp[n];
2c2d71f5 3231 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3232 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3233 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3234 if (st->ln == PL_regendp[n])
a0d0e21e 3235 break;
a0ed51b3 3236
5d9a96ca 3237 s = PL_bostr + st->ln;
1aa99e6b 3238 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3239 char *l = locinput;
a3b680e6 3240 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3241 /*
3242 * Note that we can't do the "other character" lookup trick as
3243 * in the 8-bit case (no pun intended) because in Unicode we
3244 * have to map both upper and title case to lower case.
3245 */
3246 if (OP(scan) == REFF) {
3247 while (s < e) {
a3b680e6
AL
3248 STRLEN ulen1, ulen2;
3249 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3250 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3251
a0ed51b3
LW
3252 if (l >= PL_regeol)
3253 sayNO;
a2a2844f
JH
3254 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3255 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3256 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3257 sayNO;
a2a2844f
JH
3258 s += ulen1;
3259 l += ulen2;
a0ed51b3
LW
3260 }
3261 }
3262 locinput = l;
3263 nextchr = UCHARAT(locinput);
3264 break;
3265 }
3266
a0d0e21e 3267 /* Inline the first character, for speed. */
76e3520e 3268 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3269 (OP(scan) == REF ||
3270 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3271 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3272 sayNO;
5d9a96ca
DM
3273 st->ln = PL_regendp[n] - st->ln;
3274 if (locinput + st->ln > PL_regeol)
4633a7c4 3275 sayNO;
5d9a96ca
DM
3276 if (st->ln > 1 && (OP(scan) == REF
3277 ? memNE(s, locinput, st->ln)
c8756f30 3278 : (OP(scan) == REFF
5d9a96ca
DM
3279 ? ibcmp(s, locinput, st->ln)
3280 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3281 sayNO;
5d9a96ca 3282 locinput += st->ln;
76e3520e 3283 nextchr = UCHARAT(locinput);
a0d0e21e 3284 break;
95b24440 3285 }
a0d0e21e
LW
3286
3287 case NOTHING:
c277df42 3288 case TAIL:
a0d0e21e
LW
3289 break;
3290 case BACK:
3291 break;
c277df42
IZ
3292 case EVAL:
3293 {
c277df42 3294 SV *ret;
8e5e9ebe 3295 {
4aabdb9b
DM
3296 /* execute the code in the {...} */
3297 dSP;
6136c704 3298 SV ** const before = SP;
4aabdb9b
DM
3299 OP_4tree * const oop = PL_op;
3300 COP * const ocurcop = PL_curcop;
3301 PAD *old_comppad;
3302 struct regexp * const oreg = PL_reg_re;
3303
3304 n = ARG(scan);
4f639d21 3305 PL_op = (OP_4tree*)PL_reg_re->data->data[n];
4aabdb9b 3306 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
4f639d21 3307 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_reg_re->data->data[n + 2]);
4aabdb9b
DM
3308 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3309
8e5e9ebe
RGS
3310 CALLRUNOPS(aTHX); /* Scalar context. */
3311 SPAGAIN;
3312 if (SP == before)
075aa684 3313 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3314 else {
3315 ret = POPs;
3316 PUTBACK;
3317 }
4aabdb9b
DM
3318
3319 PL_op = oop;
3320 PAD_RESTORE_LOCAL(old_comppad);
3321 PL_curcop = ocurcop;
87830502 3322 PL_reg_re = oreg;
4aabdb9b
DM
3323 if (!st->logical) {
3324 /* /(?{...})/ */
3325 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3326 break;
3327 }
8e5e9ebe 3328 }
4aabdb9b
DM
3329 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3330 regexp *re;
4aabdb9b 3331 {
4f639d21
DM
3332 /* extract RE object from returned value; compiling if
3333 * necessary */
3334
6136c704 3335 MAGIC *mg = NULL;
4aabdb9b 3336 SV *sv;
faf82a0b
AE
3337 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3338 mg = mg_find(sv, PERL_MAGIC_qr);
3339 else if (SvSMAGICAL(ret)) {
3340 if (SvGMAGICAL(ret))
3341 sv_unmagic(ret, PERL_MAGIC_qr);
3342 else
3343 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3344 }
faf82a0b 3345
0f5d15d6
IZ
3346 if (mg) {
3347 re = (regexp *)mg->mg_obj;
df0003d4 3348 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3349 }
3350 else {
3351 STRLEN len;
6136c704 3352 const char * const t = SvPV_const(ret, len);
0f5d15d6 3353 PMOP pm;
a3b680e6 3354 const I32 osize = PL_regsize;
0f5d15d6 3355
5fcd1c1b 3356 Zero(&pm, 1, PMOP);
4aabdb9b 3357 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3358 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3359 if (!(SvFLAGS(ret)
faf82a0b
AE
3360 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3361 | SVs_GMG)))
14befaf4
DM
3362 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3363 PERL_MAGIC_qr,0,0);
0f5d15d6 3364 PL_regsize = osize;
0f5d15d6 3365 }
4aabdb9b 3366 }
aa283a38
DM
3367
3368 /* run the pattern returned from (??{...}) */
3369
4aabdb9b
DM
3370 DEBUG_EXECUTE_r(
3371 PerlIO_printf(Perl_debug_log,
3372 "Entering embedded \"%s%.60s%s%s\"\n",
3373 PL_colors[0],
3374 re->precomp,
3375 PL_colors[1],
3376 (strlen(re->precomp) > 60 ? "..." : ""))
3377 );
2c2d71f5 3378
4aabdb9b
DM
3379 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3380 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3381 *PL_reglastparen = 0;
3382 *PL_reglastcloseparen = 0;
4aabdb9b 3383 PL_reginput = locinput;
4aabdb9b
DM
3384
3385 /* XXXX This is too dramatic a measure... */
3386 PL_reg_maxiter = 0;
3387
5d9a96ca 3388 st->logical = 0;
aa283a38
DM
3389 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3390 ((re->reganch & ROPT_UTF8) != 0);
3391 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3392 st->u.eval.prev_rex = rex;
3393 assert(rex == PL_reg_re); /* XXX */
3394 rex = re;
3395 PL_reg_re = rex; /* XXX */
3396
3397 st->u.eval.prev_eval = cur_eval;
3398 st->u.eval.prev_slab = PL_regmatch_slab;
3399 st->u.eval.depth = depth;
3400 cur_eval = st;
3401 PUSH_STATE(newst, resume_EVAL);
3402 st = newst;
3403
3404 /* now continue from first node in postoned RE */
3405 next = re->program + 1;
3406 break;
4aabdb9b 3407 /* NOTREACHED */
a0ed51b3 3408 }
4aabdb9b
DM
3409 /* /(?(?{...})X|Y)/ */
3410 st->sw = SvTRUE(ret);
3411 st->logical = 0;
c277df42
IZ
3412 break;
3413 }
a0d0e21e 3414 case OPEN:
c277df42 3415 n = ARG(scan); /* which paren pair */
3280af22
NIS
3416 PL_reg_start_tmp[n] = locinput;
3417 if (n > PL_regsize)
3418 PL_regsize = n;
a0d0e21e
LW
3419 break;
3420 case CLOSE:
c277df42 3421 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3422 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3423 PL_regendp[n] = locinput - PL_bostr;
eb160463 3424 if (n > (I32)*PL_reglastparen)
3280af22 3425 *PL_reglastparen = n;
a01268b5 3426 *PL_reglastcloseparen = n;
a0d0e21e 3427 break;
c277df42
IZ
3428 case GROUPP:
3429 n = ARG(scan); /* which paren pair */
5d9a96ca 3430 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3431 break;
3432 case IFTHEN:
2c2d71f5 3433 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3434 if (st->sw)
c277df42
IZ
3435 next = NEXTOPER(NEXTOPER(scan));
3436 else {
3437 next = scan + ARG(scan);
3438 if (OP(next) == IFTHEN) /* Fake one. */
3439 next = NEXTOPER(NEXTOPER(next));
3440 }
3441 break;
3442 case LOGICAL:
5d9a96ca 3443 st->logical = scan->flags;
c277df42 3444 break;
2ab05381 3445/*******************************************************************
a0374537
DM
3446 cc points to the regmatch_state associated with the most recent CURLYX.
3447 This struct contains info about the innermost (...)* loop (an
3448 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3449
3450 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3451
95b24440 3452 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3453
a0374537 3454 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3455 with the starting point at WHILEM node;
2ab05381
IZ
3456
3457 3) Each hit of WHILEM node tries to match A and Z (in the order
3458 depending on the current iteration, min/max of {min,max} and
3459 greediness). The information about where are nodes for "A"
a0374537 3460 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3461 was already matched, and greediness.
3462
3463 4) After A matches, the same WHILEM node is hit again.
3464
95b24440 3465 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3466 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3467 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3468 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3469 of the external loop.
3470
a0374537 3471 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3472 and whatever it mentions via ->next, and additional attached trees
3473 corresponding to temporarily unset infoblocks as in "5" above.
3474
95b24440 3475 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3476 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3477 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3478 infoblocks are drawn below the "reset" infoblock.
3479
3480 In fact in the picture below we do not show failed matches for Z and T
3481 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3482 more obvious *why* one needs to *temporary* unset infoblocks.]
3483
3484 Matched REx position InfoBlocks Comment
3485 (Y(A)*?Z)*?T x
3486 Y(A)*?Z)*?T x <- O
3487 Y (A)*?Z)*?T x <- O
3488 Y A)*?Z)*?T x <- O <- I
3489 YA )*?Z)*?T x <- O <- I
3490 YA A)*?Z)*?T x <- O <- I
3491 YAA )*?Z)*?T x <- O <- I
3492 YAA Z)*?T x <- O # Temporary unset I
3493 I
3494
3495 YAAZ Y(A)*?Z)*?T x <- O
3496 I
3497
3498 YAAZY (A)*?Z)*?T x <- O
3499 I
3500
3501 YAAZY A)*?Z)*?T x <- O <- I
3502 I
3503
3504 YAAZYA )*?Z)*?T x <- O <- I
3505 I
3506
3507 YAAZYA Z)*?T x <- O # Temporary unset I
3508 I,I
3509
3510 YAAZYAZ )*?T x <- O
3511 I,I
3512
3513 YAAZYAZ T x # Temporary unset O
3514 O
3515 I,I
3516
3517 YAAZYAZT x
3518 O
3519 I,I
3520 *******************************************************************/
95b24440 3521
a0d0e21e 3522 case CURLYX: {
cb434fcc
IZ
3523 /* No need to save/restore up to this paren */
3524 I32 parenfloor = scan->flags;
c277df42
IZ
3525
3526 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3527 next += ARG(next);
cb434fcc
IZ
3528 /* XXXX Probably it is better to teach regpush to support
3529 parenfloor > PL_regsize... */
eb160463 3530 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3531 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3532
d8319b27
DM
3533 st->u.curlyx.cp = PL_savestack_ix;
3534 st->u.curlyx.outercc = st->cc;
a0374537
DM
3535 st->cc = st;
3536 /* these fields contain the state of the current curly.
3537 * they are accessed by subsequent WHILEMs;
3538 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3539 st->u.curlyx.parenfloor = parenfloor;
3540 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3541 st->u.curlyx.min = ARG1(scan);
3542 st->u.curlyx.max = ARG2(scan);
3543 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3544 st->u.curlyx.lastloc = 0;
a0374537
DM
3545 /* st->next and st->minmod are also read by WHILEM */
3546
3280af22 3547 PL_reginput = locinput;
95b24440
DM
3548 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3549 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3550 regcpblow(st->u.curlyx.cp);
3551 st->cc = st->u.curlyx.outercc;
95b24440 3552 saySAME(result);
a0d0e21e 3553 }
5f66b61c 3554 /* NOTREACHED */
a0d0e21e
LW
3555 case WHILEM: {
3556 /*
3557 * This is really hard to understand, because after we match
3558 * what we're trying to match, we must make sure the rest of
2c2d71f5 3559 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3560 * to go back UP the parse tree by recursing ever deeper. And
3561 * if it fails, we have to reset our parent's current state
3562 * that we can try again after backing off.
3563 */
3564
d8319b27
DM
3565 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3566 st->u.whilem.cache_offset = 0;
3567 st->u.whilem.cache_bit = 0;
c277df42 3568
d8319b27 3569 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3570 PL_reginput = locinput;
a0d0e21e 3571
a3621e74 3572 DEBUG_EXECUTE_r(
9041c2e3 3573 PerlIO_printf(Perl_debug_log,
91f3b821 3574 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3575 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3576 (long)n, (long)st->cc->u.curlyx.min,
3577 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3578 );
4633a7c4 3579
a0d0e21e
LW
3580 /* If degenerate scan matches "", assume scan done. */
3581
d8319b27
DM
3582 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3583 st->u.whilem.savecc = st->cc;
3584 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3585 if (st->cc)
d8319b27 3586 st->ln = st->cc->u.curlyx.cur;
a3621e74 3587 DEBUG_EXECUTE_r(
c3464db5
DD
3588 PerlIO_printf(Perl_debug_log,
3589 "%*s empty match detected, try continuation...\n",
3280af22 3590 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3591 );
d8319b27 3592 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3593 /*** all unsaved local vars undefined at this point */
d8319b27 3594 st->cc = st->u.whilem.savecc;
95b24440 3595 if (result)
4633a7c4 3596 sayYES;
d8319b27
DM
3597 if (st->cc->u.curlyx.outercc)
3598 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3599 sayNO;
a0d0e21e
LW
3600 }
3601
3602 /* First just match a string of min scans. */
3603
d8319b27
DM
3604 if (n < st->cc->u.curlyx.min) {
3605 st->cc->u.curlyx.cur = n;
3606 st->cc->u.curlyx.lastloc = locinput;
3607 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3608 /*** all unsaved local vars undefined at this point */
3609 if (result)
4633a7c4 3610 sayYES;
d8319b27
DM
3611 st->cc->u.curlyx.cur = n - 1;
3612 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3613 sayNO;
a0d0e21e
LW
3614 }
3615
2c2d71f5
JH
3616 if (scan->flags) {
3617 /* Check whether we already were at this position.
3618 Postpone detection until we know the match is not
3619 *that* much linear. */
3620 if (!PL_reg_maxiter) {
3621 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3622 PL_reg_leftiter = PL_reg_maxiter;
3623 }
3624 if (PL_reg_leftiter-- == 0) {
a3b680e6 3625 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3626 if (PL_reg_poscache) {
eb160463 3627 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3628 Renew(PL_reg_poscache, size, char);
3629 PL_reg_poscache_size = size;
3630 }
3631 Zero(PL_reg_poscache, size, char);
3632 }
3633 else {
3634 PL_reg_poscache_size = size;
a02a5408 3635 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3636 }
a3621e74 3637 DEBUG_EXECUTE_r(
2c2d71f5
JH
3638 PerlIO_printf(Perl_debug_log,
3639 "%sDetected a super-linear match, switching on caching%s...\n",
3640 PL_colors[4], PL_colors[5])
3641 );
3642 }
3643 if (PL_reg_leftiter < 0) {
d8319b27 3644 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3645
d8319b27
DM
3646 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3647 + st->u.whilem.cache_offset * (scan->flags>>4);
3648 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3649 st->u.whilem.cache_offset /= 8;
3650 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3651 DEBUG_EXECUTE_r(
2c2d71f5
JH
3652 PerlIO_printf(Perl_debug_log,
3653 "%*s already tried at this position...\n",
3654 REPORT_CODE_OFF+PL_regindent*2, "")
3655 );
3ab3c9b4
HS
3656 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3657 /* cache records success */
c2b0868c
HS
3658 sayYES;
3659 else
3ab3c9b4 3660 /* cache records failure */
c2b0868c 3661 sayNO_SILENT;
2c2d71f5 3662 }
d8319b27 3663 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
2c2d71f5
JH
3664 }
3665 }
3666
a0d0e21e
LW
3667 /* Prefer next over scan for minimal matching. */
3668
5d9a96ca 3669 if (st->cc->minmod) {
d8319b27
DM
3670 st->u.whilem.savecc = st->cc;
3671 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3672 if (st->cc)
d8319b27
DM
3673 st->ln = st->cc->u.curlyx.cur;
3674 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3675 REGCP_SET(st->u.whilem.lastcp);
3676 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3677 /*** all unsaved local vars undefined at this point */
d8319b27 3678 st->cc = st->u.whilem.savecc;
95b24440 3679 if (result) {
d8319b27 3680 regcpblow(st->u.whilem.cp);
3ab3c9b4 3681 CACHEsayYES; /* All done. */
5f05dabc 3682 }
d8319b27 3683 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3684 regcppop(rex);
d8319b27
DM
3685 if (st->cc->u.curlyx.outercc)
3686 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3687
d8319b27 3688 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3689 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3690 && !(PL_reg_flags & RF_warned)) {
3691 PL_reg_flags |= RF_warned;
9014280d 3692 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3693 "Complex regular subexpression recursion",
3694 REG_INFTY - 1);
c277df42 3695 }
3ab3c9b4 3696 CACHEsayNO;
c277df42 3697 }
a687059c 3698
a3621e74 3699 DEBUG_EXECUTE_r(
c3464db5
DD
3700 PerlIO_printf(Perl_debug_log,
3701 "%*s trying longer...\n",
3280af22 3702 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3703 );
a0d0e21e 3704 /* Try scanning more and see if it helps. */
3280af22 3705 PL_reginput = locinput;
d8319b27
DM
3706 st->cc->u.curlyx.cur = n;
3707 st->cc->u.curlyx.lastloc = locinput;
3708 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3709 REGCP_SET(st->u.whilem.lastcp);
3710 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3711 /*** all unsaved local vars undefined at this point */
3712 if (result) {
d8319b27 3713 regcpblow(st->u.whilem.cp);
3ab3c9b4 3714 CACHEsayYES;
5f05dabc 3715 }
d8319b27 3716 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3717 regcppop(rex);
d8319b27
DM
3718 st->cc->u.curlyx.cur = n - 1;
3719 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3720 CACHEsayNO;
a0d0e21e
LW
3721 }
3722
3723 /* Prefer scan over next for maximal matching. */
3724
d8319b27
DM
3725 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3726 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3727 st->cc->u.curlyx.cur = n;
3728 st->cc->u.curlyx.lastloc = locinput;
3729 REGCP_SET(st->u.whilem.lastcp);
3730 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3731 /*** all unsaved local vars undefined at this point */
3732 if (result) {
d8319b27 3733 regcpblow(st->u.whilem.cp);
3ab3c9b4 3734 CACHEsayYES;
5f05dabc 3735 }
d8319b27 3736 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3737 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3738 PL_reginput = locinput;
a3621e74 3739 DEBUG_EXECUTE_r(
c3464db5
DD
3740 PerlIO_printf(Perl_debug_log,
3741 "%*s failed, try continuation...\n",
3280af22 3742 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3743 );
3744 }
9041c2e3 3745 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3746 && !(PL_reg_flags & RF_warned)) {
3280af22 3747 PL_reg_flags |= RF_warned;
9014280d 3748 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3749 "Complex regular subexpression recursion",
3750 REG_INFTY - 1);
a0d0e21e
LW
3751 }
3752
3753 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3754 st->u.whilem.savecc = st->cc;
3755 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3756 if (st->cc)
d8319b27
DM
3757 st->ln = st->cc->u.curlyx.cur;
3758 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3759 /*** all unsaved local vars undefined at this point */
d8319b27 3760 st->cc = st->u.whilem.savecc;
95b24440 3761 if (result)
3ab3c9b4 3762 CACHEsayYES;
d8319b27
DM
3763 if (st->cc->u.curlyx.outercc)
3764 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3765 st->cc->u.curlyx.cur = n - 1;
3766 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3767 CACHEsayNO;
a0d0e21e 3768 }
5f66b61c 3769 /* NOTREACHED */
9041c2e3 3770 case BRANCHJ:
c277df42
IZ
3771 next = scan + ARG(scan);
3772 if (next == scan)
3773 next = NULL;
3774 inner = NEXTOPER(NEXTOPER(scan));
3775 goto do_branch;
9041c2e3 3776 case BRANCH:
c277df42
IZ
3777 inner = NEXTOPER(scan);
3778 do_branch:
3779 {
e822a8b4
DM
3780 I32 type;
3781 type = OP(scan);
3782 if (OP(next) != type) /* No choice. */
c277df42 3783 next = inner; /* Avoid recursion. */
a0d0e21e 3784 else {
a3b680e6 3785 const I32 lastparen = *PL_reglastparen;
02db2b7b 3786 /* Put unwinding data on stack */
6136c704
AL
3787 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3788 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3789
5d9a96ca
DM
3790 uw->prev = st->unwind;
3791 st->unwind = unwind1;
e822a8b4 3792 uw->type = ((type == BRANCH)
02db2b7b
IZ
3793 ? RE_UNWIND_BRANCH
3794 : RE_UNWIND_BRANCHJ);
3795 uw->lastparen = lastparen;
3796 uw->next = next;
3797 uw->locinput = locinput;
3798 uw->nextchr = nextchr;
3a2830be 3799 uw->minmod = st->minmod;
02db2b7b
IZ
3800#ifdef DEBUGGING
3801 uw->regindent = ++PL_regindent;
3802#endif
c277df42 3803
02db2b7b
IZ
3804 REGCP_SET(uw->lastcp);
3805
3806 /* Now go into the first branch */
3807 next = inner;
a687059c 3808 }
a0d0e21e
LW
3809 }
3810 break;
3811 case MINMOD:
5d9a96ca 3812 st->minmod = 1;
a0d0e21e 3813 break;
c277df42
IZ
3814 case CURLYM:
3815 {
d8319b27 3816 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3817
c277df42 3818 /* We suppose that the next guy does not need
0e788c72 3819 backtracking: in particular, it is of constant non-zero length,
c277df42 3820 and has no parenths to influence future backrefs. */
5d9a96ca 3821 st->ln = ARG1(scan); /* min to match */
c277df42 3822 n = ARG2(scan); /* max to match */
d8319b27
DM
3823 st->u.curlym.paren = scan->flags;
3824 if (st->u.curlym.paren) {
3825 if (st->u.curlym.paren > PL_regsize)
3826 PL_regsize = st->u.curlym.paren;
3827 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3828 *PL_reglastparen = st->u.curlym.paren;
c277df42 3829 }
dc45a647 3830 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3831 if (st->u.curlym.paren)
c277df42 3832 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3833 PL_reginput = locinput;
d8319b27
DM
3834 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
3835 if (st->u.curlym.maxwanted) {
3836 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
95b24440
DM
3837 REGMATCH(scan, CURLYM1);
3838 /*** all unsaved local vars undefined at this point */
3839 if (!result)
6407bf3b 3840 break;
d8319b27
DM
3841 /* on first match, determine length, u.curlym.l */
3842 if (!st->u.curlym.matches++) {
6407bf3b
DM
3843 if (PL_reg_match_utf8) {
3844 char *s = locinput;
3845 while (s < PL_reginput) {
d8319b27 3846 st->u.curlym.l++;
6407bf3b
DM
3847 s += UTF8SKIP(s);
3848 }
3849 }
3850 else {
d8319b27 3851 st->u.curlym.l = PL_reginput - locinput;
6407bf3b 3852 }
d8319b27
DM
3853 if (st->u.curlym.l == 0) {
3854 st->u.curlym.matches = st->u.curlym.maxwanted;
6407bf3b
DM
3855 break;
3856 }
3857 }
3858 locinput = PL_reginput;
3859 }
3860 }
3861
3862 PL_reginput = locinput;
3863
5d9a96ca
DM
3864 if (st->minmod) {
3865 st->minmod = 0;
d8319b27 3866 if (st->ln && st->u.curlym.matches < st->ln)
c277df42 3867 sayNO;
cca55fe3 3868 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3869 regnode *text_node = next;
3870
cca55fe3 3871 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3872
d8319b27 3873 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3874 else {
cca55fe3 3875 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3876 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3877 goto assume_ok_MM;
cca55fe3 3878 }
d8319b27 3879 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
af5decee 3880 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3881 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3882 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3883 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3884 else
d8319b27 3885 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3886 }
a0ed51b3
LW
3887 }
3888 else
d8319b27 3889 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
cca55fe3 3890 assume_ok_MM:
d8319b27 3891 REGCP_SET(st->u.curlym.lastcp);
5d9a96ca 3892 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3893 /* If it could work, try it. */
d8319b27
DM
3894 if (st->u.curlym.c1 == -1000 ||
3895 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3896 UCHARAT(PL_reginput) == st->u.curlym.c2)
c277df42 3897 {
d8319b27 3898 if (st->u.curlym.paren) {
5d9a96ca 3899 if (st->ln) {
d8319b27
DM
3900 PL_regstartp[st->u.curlym.paren] =
3901 HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3902 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3903 }
3904 else
d8319b27 3905 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3906 }
95b24440
DM
3907 REGMATCH(next, CURLYM2);
3908 /*** all unsaved local vars undefined at this point */
3909 if (result)
c277df42 3910 sayYES;
d8319b27 3911 REGCP_UNWIND(st->u.curlym.lastcp);
c277df42
IZ
3912 }
3913 /* Couldn't or didn't -- move forward. */
3280af22 3914 PL_reginput = locinput;
95b24440
DM
3915 REGMATCH(scan, CURLYM3);
3916 /*** all unsaved local vars undefined at this point */
3917 if (result) {
5d9a96ca 3918 st->ln++;
3280af22 3919 locinput = PL_reginput;
c277df42
IZ
3920 }
3921 else
3922 sayNO;
3923 }
a0ed51b3
LW
3924 }
3925 else {
a3621e74 3926 DEBUG_EXECUTE_r(
5c0ca799 3927 PerlIO_printf(Perl_debug_log,
95b24440
DM
3928 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3929 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
d8319b27 3930 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
c277df42 3931 );
d8319b27 3932 if (st->u.curlym.matches >= st->ln) {
cca55fe3 3933 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3934 regnode *text_node = next;
3935
cca55fe3 3936 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3937
d8319b27 3938 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3939 else {
cca55fe3 3940 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3941 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3942 goto assume_ok_REG;
cca55fe3 3943 }
d8319b27 3944 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3945
af5decee 3946 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3947 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3948 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3949 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3950 else
d8319b27 3951 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3952 }
a0ed51b3
LW
3953 }
3954 else
d8319b27 3955 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
c277df42 3956 }
cca55fe3 3957 assume_ok_REG:
d8319b27
DM
3958 REGCP_SET(st->u.curlym.lastcp);
3959 while (st->u.curlym.matches >= st->ln) {
c277df42 3960 /* If it could work, try it. */
d8319b27
DM
3961 if (st->u.curlym.c1 == -1000 ||
3962 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3963 UCHARAT(PL_reginput) == st->u.curlym.c2)
a0ed51b3 3964 {
a3621e74 3965 DEBUG_EXECUTE_r(
6407bf3b
DM
3966 PerlIO_printf(Perl_debug_log,
3967 "%*s trying tail with matches=%"IVdf"...\n",
3968 (int)(REPORT_CODE_OFF+PL_regindent*2),
d8319b27 3969 "", (IV)st->u.curlym.matches)
a0ed51b3 3970 );
d8319b27
DM
3971 if (st->u.curlym.paren) {
3972 if (st->u.curlym.matches) {
3973 PL_regstartp[st->u.curlym.paren]
3974 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3975 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
c277df42 3976 }
a0ed51b3 3977 else
d8319b27 3978 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3979 }
95b24440
DM
3980 REGMATCH(next, CURLYM4);
3981 /*** all unsaved local vars undefined at this point */
3982 if (result)
a0ed51b3 3983 sayYES;
d8319b27 3984 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3985 }
c277df42 3986 /* Couldn't or didn't -- back up. */
d8319b27
DM
3987 st->u.curlym.matches--;
3988 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3989 PL_reginput = locinput;
c277df42
IZ
3990 }
3991 }
3992 sayNO;
5f66b61c 3993 /* NOTREACHED */
c277df42
IZ
3994 break;
3995 }
3996 case CURLYN:
d8319b27
DM
3997 st->u.plus.paren = scan->flags; /* Which paren to set */
3998 if (st->u.plus.paren > PL_regsize)
3999 PL_regsize = st->u.plus.paren;
4000 if (st->u.plus.paren > (I32)*PL_reglastparen)
4001 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4002 st->ln = ARG1(scan); /* min to match */
c277df42 4003 n = ARG2(scan); /* max to match */
dc45a647 4004 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4005 goto repeat;
a0d0e21e 4006 case CURLY:
d8319b27 4007 st->u.plus.paren = 0;
5d9a96ca 4008 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4009 n = ARG2(scan); /* max to match */
dc45a647 4010 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4011 goto repeat;
4012 case STAR:
5d9a96ca 4013 st->ln = 0;
c277df42 4014 n = REG_INFTY;
a0d0e21e 4015 scan = NEXTOPER(scan);
d8319b27 4016 st->u.plus.paren = 0;
a0d0e21e
LW
4017 goto repeat;
4018 case PLUS:
5d9a96ca 4019 st->ln = 1;
c277df42
IZ
4020 n = REG_INFTY;
4021 scan = NEXTOPER(scan);
d8319b27 4022 st->u.plus.paren = 0;
c277df42 4023 repeat:
a0d0e21e
LW
4024 /*
4025 * Lookahead to avoid useless match attempts
4026 * when we know what character comes next.
4027 */
5f80c4cf
JP
4028
4029 /*
4030 * Used to only do .*x and .*?x, but now it allows
4031 * for )'s, ('s and (?{ ... })'s to be in the way
4032 * of the quantifier and the EXACT-like node. -- japhy
4033 */
4034
cca55fe3 4035 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4036 U8 *s;
4037 regnode *text_node = next;
4038
cca55fe3 4039 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4040
d8319b27 4041 if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
5f80c4cf 4042 else {
cca55fe3 4043 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 4044 st->u.plus.c1 = st->u.plus.c2 = -1000;
44a68960 4045 goto assume_ok_easy;
cca55fe3
JP
4046 }
4047 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4048
4049 if (!UTF) {
d8319b27 4050 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4051 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4052 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4053 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4054 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4055 }
5f80c4cf 4056 else { /* UTF */
f65d3ee7 4057 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4058 STRLEN ulen1, ulen2;
89ebb4a3
JH
4059 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4060 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4061
4062 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4063 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4064
d8319b27 4065 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4066 uniflags);
d8319b27 4067 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4068 uniflags);
5f80c4cf
JP
4069 }
4070 else {
d8319b27 4071 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4072 uniflags);
5f80c4cf 4073 }
1aa99e6b
IH
4074 }
4075 }
bbce6d69 4076 }
a0d0e21e 4077 else
d8319b27 4078 st->u.plus.c1 = st->u.plus.c2 = -1000;
cca55fe3 4079 assume_ok_easy:
3280af22 4080 PL_reginput = locinput;
5d9a96ca
DM
4081 if (st->minmod) {
4082 st->minmod = 0;
4083 if (st->ln && regrepeat(scan, st->ln) < st->ln)
4633a7c4 4084 sayNO;
a0ed51b3 4085 locinput = PL_reginput;
d8319b27
DM
4086 REGCP_SET(st->u.plus.lastcp);
4087 if (st->u.plus.c1 != -1000) {
4088 st->u.plus.old = locinput;
4089 st->u.plus.count = 0;
0fe9bf95 4090
1aa99e6b 4091 if (n == REG_INFTY) {
d8319b27 4092 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4093 if (do_utf8)
d8319b27
DM
4094 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4095 st->u.plus.e--;
1aa99e6b
IH
4096 }
4097 else if (do_utf8) {
5d9a96ca 4098 int m = n - st->ln;
d8319b27
DM
4099 for (st->u.plus.e = locinput;
4100 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4101 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4102 }
4103 else {
d8319b27
DM
4104 st->u.plus.e = locinput + n - st->ln;
4105 if (st->u.plus.e >= PL_regeol)
4106 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4107 }
0fe9bf95
IZ
4108 while (1) {
4109 /* Find place 'next' could work */
1aa99e6b 4110 if (!do_utf8) {
d8319b27
DM
4111 if (st->u.plus.c1 == st->u.plus.c2) {
4112 while (locinput <= st->u.plus.e &&
4113 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4114 locinput++;
4115 } else {
d8319b27
DM
4116 while (locinput <= st->u.plus.e
4117 && UCHARAT(locinput) != st->u.plus.c1
4118 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4119 locinput++;
4120 }
d8319b27 4121 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4122 }
4123 else {
d8319b27 4124 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4125 STRLEN len;
872c91ae
JH
4126 /* count initialised to
4127 * utf8_distance(old, locinput) */
d8319b27 4128 while (locinput <= st->u.plus.e &&
872c91ae 4129 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4130 UTF8_MAXBYTES, &len,
d8319b27 4131 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4132 locinput += len;
d8319b27 4133 st->u.plus.count++;
b2f2f093 4134 }
1aa99e6b 4135 } else {
a3b680e6 4136 STRLEN len;
872c91ae
JH
4137 /* count initialised to
4138 * utf8_distance(old, locinput) */
d8319b27 4139 while (locinput <= st->u.plus.e) {
872c91ae 4140 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4141 UTF8_MAXBYTES, &len,
041457d9 4142 uniflags);
d8319b27 4143 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4144 break;
b2f2f093 4145 locinput += len;
d8319b27 4146 st->u.plus.count++;
1aa99e6b
IH
4147 }
4148 }
0fe9bf95 4149 }
d8319b27 4150 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4151 sayNO;
4152 /* PL_reginput == old now */
d8319b27 4153 if (locinput != st->u.plus.old) {
5d9a96ca 4154 st->ln = 1; /* Did some */
d8319b27 4155 if (regrepeat(scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4156 sayNO;
4157 }
4158 /* PL_reginput == locinput now */
d8319b27 4159 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4160 /*** all unsaved local vars undefined at this point */
0fe9bf95 4161 PL_reginput = locinput; /* Could be reset... */
d8319b27 4162 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4163 /* Couldn't or didn't -- move forward. */
d8319b27 4164 st->u.plus.old = locinput;
1aa99e6b
IH
4165 if (do_utf8)
4166 locinput += UTF8SKIP(locinput);
4167 else
4168 locinput++;
d8319b27 4169 st->u.plus.count = 1;
0fe9bf95
IZ
4170 }
4171 }
4172 else
5d9a96ca 4173 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4174 UV c;
d8319b27 4175 if (st->u.plus.c1 != -1000) {
1aa99e6b 4176 if (do_utf8)
872c91ae 4177 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4178 UTF8_MAXBYTES, 0,
041457d9 4179 uniflags);
1aa99e6b 4180 else
9041c2e3 4181 c = UCHARAT(PL_reginput);
2390ecbc 4182 /* If it could work, try it. */
d8319b27 4183 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4184 {
d8319b27 4185 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4186 /*** all unsaved local vars undefined at this point */
d8319b27 4187 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4188 }
1aa99e6b 4189 }
a0d0e21e 4190 /* If it could work, try it. */
d8319b27 4191 else if (st->u.plus.c1 == -1000)
bbce6d69 4192 {
d8319b27 4193 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4194 /*** all unsaved local vars undefined at this point */
d8319b27 4195 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4196 }
c277df42 4197 /* Couldn't or didn't -- move forward. */
a0ed51b3 4198 PL_reginput = locinput;
a0d0e21e 4199 if (regrepeat(scan, 1)) {
5d9a96ca 4200 st->ln++;
a0ed51b3
LW
4201 locinput = PL_reginput;
4202 }
4203 else
4633a7c4 4204 sayNO;
a0d0e21e
LW
4205 }
4206 }
4207 else {
4208 n = regrepeat(scan, n);
a0ed51b3 4209 locinput = PL_reginput;
5d9a96ca 4210 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4211 (OP(next) != MEOL ||
15272685
HS
4212 OP(next) == SEOL || OP(next) == EOS))
4213 {
5d9a96ca 4214 st->ln = n; /* why back off? */
1aeab75a
GS
4215 /* ...because $ and \Z can match before *and* after
4216 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4217 We should back off by one in this case. */
4218 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4219 st->ln--;
1aeab75a 4220 }
d8319b27 4221 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4222 {
8fa7f367 4223 UV c = 0;
5d9a96ca 4224 while (n >= st->ln) {
d8319b27 4225 if (st->u.plus.c1 != -1000) {
1aa99e6b 4226 if (do_utf8)
872c91ae 4227 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4228 UTF8_MAXBYTES, 0,
041457d9 4229 uniflags);
1aa99e6b 4230 else
9041c2e3 4231 c = UCHARAT(PL_reginput);
1aa99e6b 4232 }
c277df42 4233 /* If it could work, try it. */
d8319b27 4234 if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4235 {
d8319b27 4236 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4237 /*** all unsaved local vars undefined at this point */
d8319b27 4238 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4239 }
4240 /* Couldn't or didn't -- back up. */
4241 n--;
dfe13c55 4242 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4243 }
a0d0e21e
LW
4244 }
4245 }
4633a7c4 4246 sayNO;
c277df42 4247 break;
a0d0e21e 4248 case END:
aa283a38
DM
4249 if (cur_eval) {
4250 /* we have successfully completed the execution of a
4251 * postponed re. Pop all states back to the last EVAL
4252 * then continue with the node following the (??{...})
4253 */
4254
4255 /* this simulates a POP_STATE, except that it pops several
4256 * levels, and doesn't restore locinput */
6136c704 4257
aa283a38
DM
4258 st = cur_eval;
4259 PL_regmatch_slab = st->u.eval.prev_slab;
4260 cur_eval = st->u.eval.prev_eval;
4261 depth = st->u.eval.depth;
6136c704 4262
aa283a38
DM
4263 PL_regmatch_state = st;
4264 scan = st->scan;
4265 next = st->next;
4266 n = st->n;
4267
4268 if (st->u.eval.toggleutf)
4269 PL_reg_flags ^= RF_utf8;
4270 ReREFCNT_dec(rex);
4271 rex = st->u.eval.prev_rex;
4272 PL_reg_re = rex; /* XXX */
4273 /* XXXX This is too dramatic a measure... */
4274 PL_reg_maxiter = 0;
4275
4276 /* Restore parens of the caller without popping the
4277 * savestack */
95b24440
DM
4278 {
4279 I32 tmp = PL_savestack_ix;
aa283a38 4280 PL_savestack_ix = st->u.eval.lastcp;
4f639d21 4281 regcppop(rex);
95b24440
DM
4282 PL_savestack_ix = tmp;
4283 }
6136c704 4284
6136c704 4285
aa283a38
DM
4286 PL_reginput = locinput;
4287 /* resume at node following the (??{...}) */
4288 break;
0f5d15d6 4289
0f5d15d6 4290 }
aa283a38 4291
7821416a 4292 if (locinput < PL_regtill) {
a3621e74 4293 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4294 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4295 PL_colors[4],
4296 (long)(locinput - PL_reg_starttry),
4297 (long)(PL_regtill - PL_reg_starttry),
4298 PL_colors[5]));
4299 sayNO_FINAL; /* Cannot match: too short. */
4300 }
4301 PL_reginput = locinput; /* put where regtry can find it */
4302 sayYES_FINAL; /* Success! */
7e5428c5 4303 case SUCCEED:
3280af22 4304 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4305 sayYES_LOUD; /* Success! */
c277df42
IZ
4306 case SUSPEND:
4307 n = 1;
9fe1d20c 4308 PL_reginput = locinput;
9041c2e3 4309 goto do_ifmatch;
a0d0e21e 4310 case UNLESSM:
c277df42 4311 n = 0;
a0ed51b3 4312 if (scan->flags) {
52657f30 4313 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4314 if (!s)
4315 goto say_yes;
4316 PL_reginput = s;
a0ed51b3
LW
4317 }
4318 else
4319 PL_reginput = locinput;
c277df42
IZ
4320 goto do_ifmatch;
4321 case IFMATCH:
4322 n = 1;
a0ed51b3 4323 if (scan->flags) {
52657f30 4324 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4325 if (!s)
4326 goto say_no;
4327 PL_reginput = s;
a0ed51b3
LW
4328 }
4329 else
4330 PL_reginput = locinput;
4331
c277df42 4332 do_ifmatch:
95b24440
DM
4333 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4334 /*** all unsaved local vars undefined at this point */
4335 if (result != n) {
c277df42 4336 say_no:
5d9a96ca
DM
4337 if (st->logical) {
4338 st->logical = 0;
4339 st->sw = 0;
c277df42 4340 goto do_longjump;
a0ed51b3
LW
4341 }
4342 else
c277df42
IZ
4343 sayNO;
4344 }
4345 say_yes:
5d9a96ca
DM
4346 if (st->logical) {
4347 st->logical = 0;
4348 st->sw = 1;
c277df42 4349 }
fe44a5e8 4350 if (OP(scan) == SUSPEND) {
3280af22 4351 locinput = PL_reginput;
565764a8 4352 nextchr = UCHARAT(locinput);
fe44a5e8 4353 }
c277df42
IZ
4354 /* FALL THROUGH. */
4355 case LONGJMP:
4356 do_longjump:
4357 next = scan + ARG(scan);
4358 if (next == scan)
4359 next = NULL;
a0d0e21e
LW
4360 break;
4361 default:
b900a521 4362 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4363 PTR2UV(scan), OP(scan));
cea2e8a9 4364 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4365 }
95b24440 4366
02db2b7b 4367 reenter:
a0d0e21e 4368 scan = next;
95b24440
DM
4369 continue;
4370 /* NOTREACHED */
4371
4372 /* simulate recursively calling regmatch(), but without actually
4373 * recursing - ie save the current state on the heap rather than on
4374 * the stack, then re-enter the loop. This avoids complex regexes
4375 * blowing the processor stack */
4376
4377 start_recurse:
4378 {
5d9a96ca
DM
4379 /* push new state */
4380 regmatch_state *oldst = st;
4381
4382 depth++;
4383
4384 /* grab the next free state slot */
4385 st++;
4386 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4387 st = S_push_slab(aTHX);
4388 PL_regmatch_state = st;
4389
4390 oldst->next = next;
4391 oldst->n = n;
4392 oldst->locinput = locinput;
5d9a96ca
DM
4393
4394 st->cc = oldst->cc;
95b24440
DM
4395 locinput = PL_reginput;
4396 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4397 st->minmod = 0;
4398 st->sw = 0;
4399 st->logical = 0;
4400 st->unwind = 0;
95b24440
DM
4401#ifdef DEBUGGING
4402 PL_regindent++;
4403#endif
4404 }
a0d0e21e 4405 }
a687059c 4406
aa283a38
DM
4407
4408
a0d0e21e
LW
4409 /*
4410 * We get here only if there's trouble -- normally "case END" is
4411 * the terminating point.
4412 */
cea2e8a9 4413 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4414 /*NOTREACHED*/
4633a7c4
LW
4415 sayNO;
4416
7821416a 4417yes_loud:
a3621e74 4418 DEBUG_EXECUTE_r(
7821416a
IZ
4419 PerlIO_printf(Perl_debug_log,
4420 "%*s %scould match...%s\n",
e4584336 4421 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4422 );
4423 goto yes;
4424yes_final:
a3621e74 4425 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4426 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4427yes:
4428#ifdef DEBUGGING
3280af22 4429 PL_regindent--;
4633a7c4 4430#endif
02db2b7b 4431
95b24440 4432 result = 1;
aa283a38
DM
4433 /* XXX this is duplicate(ish) code to that in the do_no section.
4434 * eventually a yes should just pop the whole stack */
4435 if (depth) {
4436 /* restore previous state and re-enter */
4437 POP_STATE;
4438
4439 switch (st->resume_state) {
4440 case resume_TRIE1:
4441 goto resume_point_TRIE1;
4442 case resume_TRIE2:
4443 goto resume_point_TRIE2;
4444 case resume_EVAL:
4445 break;
4446 case resume_CURLYX:
4447 goto resume_point_CURLYX;
4448 case resume_WHILEM1:
4449 goto resume_point_WHILEM1;
4450 case resume_WHILEM2:
4451 goto resume_point_WHILEM2;
4452 case resume_WHILEM3:
4453 goto resume_point_WHILEM3;
4454 case resume_WHILEM4:
4455 goto resume_point_WHILEM4;
4456 case resume_WHILEM5:
4457 goto resume_point_WHILEM5;
4458 case resume_WHILEM6:
4459 goto resume_point_WHILEM6;
4460 case resume_CURLYM1:
4461 goto resume_point_CURLYM1;
4462 case resume_CURLYM2:
4463 goto resume_point_CURLYM2;
4464 case resume_CURLYM3:
4465 goto resume_point_CURLYM3;
4466 case resume_CURLYM4:
4467 goto resume_point_CURLYM4;
4468 case resume_IFMATCH:
4469 goto resume_point_IFMATCH;
4470 case resume_PLUS1:
4471 goto resume_point_PLUS1;
4472 case resume_PLUS2:
4473 goto resume_point_PLUS2;
4474 case resume_PLUS3:
4475 goto resume_point_PLUS3;
4476 case resume_PLUS4:
4477 goto resume_point_PLUS4;
4478 default:
4479 Perl_croak(aTHX_ "regexp resume memory corruption");
4480 }
4481 }
4482 goto final_exit;
4633a7c4
LW
4483
4484no:
a3621e74 4485 DEBUG_EXECUTE_r(
7821416a
IZ
4486 PerlIO_printf(Perl_debug_log,
4487 "%*s %sfailed...%s\n",
e4584336 4488 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4489 );
4490 goto do_no;
4491no_final:
4492do_no:
5d9a96ca
DM
4493 if (st->unwind) {
4494 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4495
4496 switch (uw->type) {
4497 case RE_UNWIND_BRANCH:
4498 case RE_UNWIND_BRANCHJ:
4499 {
6136c704 4500 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4501 const I32 lastparen = uwb->lastparen;
9041c2e3 4502
02db2b7b
IZ
4503 REGCP_UNWIND(uwb->lastcp);
4504 for (n = *PL_reglastparen; n > lastparen; n--)
4505 PL_regendp[n] = -1;
4506 *PL_reglastparen = n;
4507 scan = next = uwb->next;
3a2830be 4508 st->minmod = uwb->minmod;
9041c2e3
NIS
4509 if ( !scan ||
4510 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4511 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4512 st->unwind = uwb->prev;
02db2b7b
IZ
4513#ifdef DEBUGGING
4514 PL_regindent--;
4515#endif
4516 goto do_no;
4517 }
4518 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4519 if ((n = (uwb->type == RE_UNWIND_BRANCH
4520 ? NEXT_OFF(next) : ARG(next))))
4521 next += n;
4522 else
4523 next = NULL; /* XXXX Needn't unwinding in this case... */
4524 uwb->next = next;
4525 next = NEXTOPER(scan);
4526 if (uwb->type == RE_UNWIND_BRANCHJ)
4527 next = NEXTOPER(next);
4528 locinput = uwb->locinput;
4529 nextchr = uwb->nextchr;
4530#ifdef DEBUGGING
4531 PL_regindent = uwb->regindent;
4532#endif
4533
4534 goto reenter;
4535 }
5f66b61c 4536 /* NOTREACHED */
02db2b7b
IZ
4537 default:
4538 Perl_croak(aTHX_ "regexp unwind memory corruption");
4539 }
5f66b61c 4540 /* NOTREACHED */
02db2b7b 4541 }
aa283a38 4542
4633a7c4 4543#ifdef DEBUGGING
3280af22 4544 PL_regindent--;
4633a7c4 4545#endif
95b24440 4546 result = 0;
5d9a96ca 4547
aa283a38
DM
4548 if (depth) {
4549 /* there's a previous state to backtrack to */
4550 POP_STATE;
5d9a96ca 4551 switch (st->resume_state) {
95b24440
DM
4552 case resume_TRIE1:
4553 goto resume_point_TRIE1;
4554 case resume_TRIE2:
4555 goto resume_point_TRIE2;
aa283a38
DM
4556 case resume_EVAL:
4557 /* we have failed an (??{...}). Restore state to the outer re
4558 * then re-throw the failure */
4559 if (st->u.eval.toggleutf)
4560 PL_reg_flags ^= RF_utf8;
4561 ReREFCNT_dec(rex);
4562 rex = st->u.eval.prev_rex;
4563 PL_reg_re = rex; /* XXX */
4564 cur_eval = st->u.eval.prev_eval;
4565
4566 /* XXXX This is too dramatic a measure... */
4567 PL_reg_maxiter = 0;
4568
4569 PL_reginput = locinput;
4570 REGCP_UNWIND(st->u.eval.lastcp);
4571 regcppop(rex);
4572 goto do_no;
4573
95b24440
DM
4574 case resume_CURLYX:
4575 goto resume_point_CURLYX;
4576 case resume_WHILEM1:
4577 goto resume_point_WHILEM1;
4578 case resume_WHILEM2:
4579 goto resume_point_WHILEM2;
4580 case resume_WHILEM3:
4581 goto resume_point_WHILEM3;
4582 case resume_WHILEM4:
4583 goto resume_point_WHILEM4;
4584 case resume_WHILEM5:
4585 goto resume_point_WHILEM5;
4586 case resume_WHILEM6:
4587 goto resume_point_WHILEM6;
4588 case resume_CURLYM1:
4589 goto resume_point_CURLYM1;
4590 case resume_CURLYM2:
4591 goto resume_point_CURLYM2;
4592 case resume_CURLYM3:
4593 goto resume_point_CURLYM3;
4594 case resume_CURLYM4:
4595 goto resume_point_CURLYM4;
4596 case resume_IFMATCH:
4597 goto resume_point_IFMATCH;
4598 case resume_PLUS1:
4599 goto resume_point_PLUS1;
4600 case resume_PLUS2:
4601 goto resume_point_PLUS2;
4602 case resume_PLUS3:
4603 goto resume_point_PLUS3;
4604 case resume_PLUS4:
4605 goto resume_point_PLUS4;
95b24440
DM
4606 default:
4607 Perl_croak(aTHX_ "regexp resume memory corruption");
4608 }
95b24440 4609 }
aa283a38
DM
4610
4611final_exit:
4612
5d9a96ca
DM
4613 /* restore original high-water mark */
4614 PL_regmatch_slab = orig_slab;
4615 PL_regmatch_state = orig_state;
4616
4617 /* free all slabs above current one */
4618 if (orig_slab->next) {
ad65c075 4619 regmatch_slab *osl, *sl = orig_slab->next;
5d9a96ca
DM
4620 orig_slab->next = NULL;
4621 while (sl) {
ad65c075 4622 osl = sl;
5d9a96ca 4623 sl = sl->next;
ad65c075 4624 Safefree(osl);
5d9a96ca
DM
4625 }
4626 }
4627
95b24440
DM
4628 return result;
4629
a687059c
LW
4630}
4631
4632/*
4633 - regrepeat - repeatedly match something simple, report how many
4634 */
4635/*
4636 * [This routine now assumes that it will only match on things of length 1.
4637 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4638 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4639 */
76e3520e 4640STATIC I32
a3b680e6 4641S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4642{
27da23d5 4643 dVAR;
a0d0e21e 4644 register char *scan;
a0d0e21e 4645 register I32 c;
3280af22 4646 register char *loceol = PL_regeol;
a0ed51b3 4647 register I32 hardcount = 0;
53c4c00c 4648 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4649
3280af22 4650 scan = PL_reginput;
faf11cac
HS
4651 if (max == REG_INFTY)
4652 max = I32_MAX;
4653 else if (max < loceol - scan)
7f596f4c 4654 loceol = scan + max;
a0d0e21e 4655 switch (OP(p)) {
22c35a8c 4656 case REG_ANY:
1aa99e6b 4657 if (do_utf8) {
ffc61ed2 4658 loceol = PL_regeol;
1aa99e6b 4659 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4660 scan += UTF8SKIP(scan);
4661 hardcount++;
4662 }
4663 } else {
4664 while (scan < loceol && *scan != '\n')
4665 scan++;
a0ed51b3
LW
4666 }
4667 break;
ffc61ed2 4668 case SANY:
def8e4ea
JH
4669 if (do_utf8) {
4670 loceol = PL_regeol;
a0804c9e 4671 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4672 scan += UTF8SKIP(scan);
4673 hardcount++;
4674 }
4675 }
4676 else
4677 scan = loceol;
a0ed51b3 4678 break;
f33976b4
DB
4679 case CANY:
4680 scan = loceol;
4681 break;
090f7165
JH
4682 case EXACT: /* length of string is 1 */
4683 c = (U8)*STRING(p);
4684 while (scan < loceol && UCHARAT(scan) == c)
4685 scan++;
bbce6d69 4686 break;
4687 case EXACTF: /* length of string is 1 */
cd439c50 4688 c = (U8)*STRING(p);
bbce6d69 4689 while (scan < loceol &&
22c35a8c 4690 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4691 scan++;
4692 break;
4693 case EXACTFL: /* length of string is 1 */
3280af22 4694 PL_reg_flags |= RF_tainted;
cd439c50 4695 c = (U8)*STRING(p);
bbce6d69 4696 while (scan < loceol &&
22c35a8c 4697 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4698 scan++;
4699 break;
4700 case ANYOF:
ffc61ed2
JH
4701 if (do_utf8) {
4702 loceol = PL_regeol;
cfc92286
JH
4703 while (hardcount < max && scan < loceol &&
4704 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4705 scan += UTF8SKIP(scan);
4706 hardcount++;
4707 }
4708 } else {
7d3e948e 4709 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4710 scan++;
4711 }
a0d0e21e
LW
4712 break;
4713 case ALNUM:
1aa99e6b 4714 if (do_utf8) {
ffc61ed2 4715 loceol = PL_regeol;
1a4fad37 4716 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4717 while (hardcount < max && scan < loceol &&
3568d838 4718 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4719 scan += UTF8SKIP(scan);
4720 hardcount++;
4721 }
4722 } else {
4723 while (scan < loceol && isALNUM(*scan))
4724 scan++;
a0ed51b3
LW
4725 }
4726 break;
bbce6d69 4727 case ALNUML:
3280af22 4728 PL_reg_flags |= RF_tainted;
1aa99e6b 4729 if (do_utf8) {
ffc61ed2 4730 loceol = PL_regeol;
1aa99e6b
IH
4731 while (hardcount < max && scan < loceol &&
4732 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4733 scan += UTF8SKIP(scan);
4734 hardcount++;
4735 }
4736 } else {
4737 while (scan < loceol && isALNUM_LC(*scan))
4738 scan++;
a0ed51b3
LW
4739 }
4740 break;
a0d0e21e 4741 case NALNUM:
1aa99e6b 4742 if (do_utf8) {
ffc61ed2 4743 loceol = PL_regeol;
1a4fad37 4744 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4745 while (hardcount < max && scan < loceol &&
3568d838 4746 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4747 scan += UTF8SKIP(scan);
4748 hardcount++;
4749 }
4750 } else {
4751 while (scan < loceol && !isALNUM(*scan))
4752 scan++;
a0ed51b3
LW
4753 }
4754 break;
bbce6d69 4755 case NALNUML:
3280af22 4756 PL_reg_flags |= RF_tainted;
1aa99e6b 4757 if (do_utf8) {
ffc61ed2 4758 loceol = PL_regeol;
1aa99e6b
IH
4759 while (hardcount < max && scan < loceol &&
4760 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4761 scan += UTF8SKIP(scan);
4762 hardcount++;
4763 }
4764 } else {
4765 while (scan < loceol && !isALNUM_LC(*scan))
4766 scan++;
a0ed51b3
LW
4767 }
4768 break;
a0d0e21e 4769 case SPACE:
1aa99e6b 4770 if (do_utf8) {
ffc61ed2 4771 loceol = PL_regeol;
1a4fad37 4772 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4773 while (hardcount < max && scan < loceol &&
3568d838
JH
4774 (*scan == ' ' ||
4775 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4776 scan += UTF8SKIP(scan);
4777 hardcount++;
4778 }
4779 } else {
4780 while (scan < loceol && isSPACE(*scan))
4781 scan++;
a0ed51b3
LW
4782 }
4783 break;
bbce6d69 4784 case SPACEL:
3280af22 4785 PL_reg_flags |= RF_tainted;
1aa99e6b 4786 if (do_utf8) {
ffc61ed2 4787 loceol = PL_regeol;
1aa99e6b 4788 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4789 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4790 scan += UTF8SKIP(scan);
4791 hardcount++;
4792 }
4793 } else {
4794 while (scan < loceol && isSPACE_LC(*scan))
4795 scan++;
a0ed51b3
LW
4796 }
4797 break;
a0d0e21e 4798 case NSPACE:
1aa99e6b 4799 if (do_utf8) {
ffc61ed2 4800 loceol = PL_regeol;
1a4fad37 4801 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4802 while (hardcount < max && scan < loceol &&
3568d838
JH
4803 !(*scan == ' ' ||
4804 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4805 scan += UTF8SKIP(scan);
4806 hardcount++;
4807 }
4808 } else {
4809 while (scan < loceol && !isSPACE(*scan))
4810 scan++;
4811 break;
a0ed51b3 4812 }
bbce6d69 4813 case NSPACEL:
3280af22 4814 PL_reg_flags |= RF_tainted;
1aa99e6b 4815 if (do_utf8) {
ffc61ed2 4816 loceol = PL_regeol;
1aa99e6b 4817 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4818 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4819 scan += UTF8SKIP(scan);
4820 hardcount++;
4821 }
4822 } else {
4823 while (scan < loceol && !isSPACE_LC(*scan))
4824 scan++;
a0ed51b3
LW
4825 }
4826 break;
a0d0e21e 4827 case DIGIT:
1aa99e6b 4828 if (do_utf8) {
ffc61ed2 4829 loceol = PL_regeol;
1a4fad37 4830 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4831 while (hardcount < max && scan < loceol &&
3568d838 4832 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4833 scan += UTF8SKIP(scan);
4834 hardcount++;
4835 }
4836 } else {
4837 while (scan < loceol && isDIGIT(*scan))
4838 scan++;
a0ed51b3
LW
4839 }
4840 break;
a0d0e21e 4841 case NDIGIT:
1aa99e6b 4842 if (do_utf8) {
ffc61ed2 4843 loceol = PL_regeol;
1a4fad37 4844 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4845 while (hardcount < max && scan < loceol &&
3568d838 4846 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4847 scan += UTF8SKIP(scan);
4848 hardcount++;
4849 }
4850 } else {
4851 while (scan < loceol && !isDIGIT(*scan))
4852 scan++;
a0ed51b3
LW
4853 }
4854 break;
a0d0e21e
LW
4855 default: /* Called on something of 0 width. */
4856 break; /* So match right here or not at all. */
4857 }
a687059c 4858
a0ed51b3
LW
4859 if (hardcount)
4860 c = hardcount;
4861 else
4862 c = scan - PL_reginput;
3280af22 4863 PL_reginput = scan;
a687059c 4864
a3621e74 4865 DEBUG_r({
ab74612d 4866 SV *re_debug_flags = NULL;
6136c704 4867 SV * const prop = sv_newmortal();
a3621e74
YO
4868 GET_RE_DEBUG_FLAGS;
4869 DEBUG_EXECUTE_r({
c277df42 4870 regprop(prop, p);
9041c2e3
NIS
4871 PerlIO_printf(Perl_debug_log,
4872 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4873 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4874 });
a3621e74 4875 });
9041c2e3 4876
a0d0e21e 4877 return(c);
a687059c
LW
4878}
4879
c277df42
IZ
4880
4881/*
ffc61ed2
JH
4882- regclass_swash - prepare the utf8 swash
4883*/
4884
4885SV *
a3b680e6 4886Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4887{
97aff369 4888 dVAR;
9e55ce06
JH
4889 SV *sw = NULL;
4890 SV *si = NULL;
4891 SV *alt = NULL;
a8079bba 4892 const struct reg_data *data = PL_reg_re ? PL_reg_re->data : NULL;
ffc61ed2 4893
4f639d21 4894 if (data && data->count) {
a3b680e6 4895 const U32 n = ARG(node);
ffc61ed2 4896
4f639d21
DM
4897 if (data->what[n] == 's') {
4898 SV * const rv = (SV*)data->data[n];
890ce7af 4899 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4900 SV **const ary = AvARRAY(av);
9e55ce06 4901 SV **a, **b;
9041c2e3 4902
711a919c 4903 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4904 * documentation of these array elements. */
4905
b11f357e 4906 si = *ary;
8f7f7219 4907 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4908 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4909
ffc61ed2
JH
4910 if (a)
4911 sw = *a;
4912 else if (si && doinit) {
4913 sw = swash_init("utf8", "", si, 1, 0);
4914 (void)av_store(av, 1, sw);
4915 }
9e55ce06
JH
4916 if (b)
4917 alt = *b;
ffc61ed2
JH
4918 }
4919 }
4920
9e55ce06
JH
4921 if (listsvp)
4922 *listsvp = si;
4923 if (altsvp)
4924 *altsvp = alt;
ffc61ed2
JH
4925
4926 return sw;
4927}
4928
4929/*
ba7b4546 4930 - reginclass - determine if a character falls into a character class
832705d4
JH
4931
4932 The n is the ANYOF regnode, the p is the target string, lenp
4933 is pointer to the maximum length of how far to go in the p
4934 (if the lenp is zero, UTF8SKIP(p) is used),
4935 do_utf8 tells whether the target string is in UTF-8.
4936
bbce6d69 4937 */
4938
76e3520e 4939STATIC bool
a3b680e6 4940S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4941{
27da23d5 4942 dVAR;
a3b680e6 4943 const char flags = ANYOF_FLAGS(n);
bbce6d69 4944 bool match = FALSE;
cc07378b 4945 UV c = *p;
ae9ddab8 4946 STRLEN len = 0;
9e55ce06 4947 STRLEN plen;
1aa99e6b 4948
19f67299
TS
4949 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4950 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4951 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4952 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4953 if (len == (STRLEN)-1)
4954 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4955 }
bbce6d69 4956
0f0076b4 4957 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4958 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4959 if (lenp)
4960 *lenp = 0;
ffc61ed2 4961 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4962 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4963 match = TRUE;
bbce6d69 4964 }
3568d838 4965 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4966 match = TRUE;
ffc61ed2 4967 if (!match) {
9e55ce06 4968 AV *av;
890ce7af 4969 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4970
4971 if (sw) {
3568d838 4972 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4973 match = TRUE;
4974 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4975 if (!match && lenp && av) {
4976 I32 i;
9e55ce06 4977 for (i = 0; i <= av_len(av); i++) {
890ce7af 4978 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4979 STRLEN len;
890ce7af 4980 const char * const s = SvPV_const(sv, len);
9e55ce06 4981
061b10df 4982 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4983 *lenp = len;
4984 match = TRUE;
4985 break;
4986 }
4987 }
4988 }
4989 if (!match) {
89ebb4a3 4990 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4991 STRLEN tmplen;
4992
9e55ce06
JH
4993 to_utf8_fold(p, tmpbuf, &tmplen);
4994 if (swash_fetch(sw, tmpbuf, do_utf8))
4995 match = TRUE;
4996 }
ffc61ed2
JH
4997 }
4998 }
bbce6d69 4999 }
9e55ce06 5000 if (match && lenp && *lenp == 0)
0f0076b4 5001 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5002 }
1aa99e6b 5003 if (!match && c < 256) {
ffc61ed2
JH
5004 if (ANYOF_BITMAP_TEST(n, c))
5005 match = TRUE;
5006 else if (flags & ANYOF_FOLD) {
eb160463 5007 U8 f;
a0ed51b3 5008
ffc61ed2
JH
5009 if (flags & ANYOF_LOCALE) {
5010 PL_reg_flags |= RF_tainted;
5011 f = PL_fold_locale[c];
5012 }
5013 else
5014 f = PL_fold[c];
5015 if (f != c && ANYOF_BITMAP_TEST(n, f))
5016 match = TRUE;
5017 }
5018
5019 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5020 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5021 if (
5022 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5023 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5024 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5025 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5026 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5027 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5028 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5029 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5030 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5031 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5032 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5033 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5034 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5035 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5036 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5037 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5038 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5039 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5040 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5041 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5042 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5043 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5044 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5045 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5046 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5047 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5048 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5049 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5050 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5051 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5052 ) /* How's that for a conditional? */
5053 {
5054 match = TRUE;
5055 }
a0ed51b3 5056 }
a0ed51b3
LW
5057 }
5058
a0ed51b3
LW
5059 return (flags & ANYOF_INVERT) ? !match : match;
5060}
161b471a 5061
dfe13c55 5062STATIC U8 *
5f66b61c 5063S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5064{
97aff369 5065 dVAR;
a0ed51b3 5066 if (off >= 0) {
1aa99e6b 5067 while (off-- && s < lim) {
ffc61ed2 5068 /* XXX could check well-formedness here */
a0ed51b3 5069 s += UTF8SKIP(s);
ffc61ed2 5070 }
a0ed51b3
LW
5071 }
5072 else {
5073 while (off++) {
1aa99e6b 5074 if (s > lim) {
a0ed51b3 5075 s--;
ffc61ed2 5076 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5077 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5078 s--;
ffc61ed2
JH
5079 }
5080 /* XXX could check well-formedness here */
a0ed51b3
LW
5081 }
5082 }
5083 }
5084 return s;
5085}
161b471a 5086
dfe13c55 5087STATIC U8 *
5f66b61c 5088S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5089{
97aff369 5090 dVAR;
a0ed51b3 5091 if (off >= 0) {
1aa99e6b 5092 while (off-- && s < lim) {
ffc61ed2 5093 /* XXX could check well-formedness here */
a0ed51b3 5094 s += UTF8SKIP(s);
ffc61ed2 5095 }
a0ed51b3
LW
5096 if (off >= 0)
5097 return 0;
5098 }
5099 else {
5100 while (off++) {
1aa99e6b 5101 if (s > lim) {
a0ed51b3 5102 s--;
ffc61ed2 5103 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5104 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5105 s--;
ffc61ed2
JH
5106 }
5107 /* XXX could check well-formedness here */
a0ed51b3
LW
5108 }
5109 else
5110 break;
5111 }
5112 if (off <= 0)
5113 return 0;
5114 }
5115 return s;
5116}
51371543 5117
51371543 5118static void
acfe0abc 5119restore_pos(pTHX_ void *arg)
51371543 5120{
97aff369 5121 dVAR;
097eb12c 5122 regexp * const rex = (regexp *)arg;
51371543
GS
5123 if (PL_reg_eval_set) {
5124 if (PL_reg_oldsaved) {
4f639d21
DM
5125 rex->subbeg = PL_reg_oldsaved;
5126 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5127#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5128 rex->saved_copy = PL_nrs;
ed252734 5129#endif
4f639d21 5130 RX_MATCH_COPIED_on(rex);
51371543
GS
5131 }
5132 PL_reg_magic->mg_len = PL_reg_oldpos;
5133 PL_reg_eval_set = 0;
5134 PL_curpm = PL_reg_oldcurpm;
5135 }
5136}
33b8afdf
JH
5137
5138STATIC void
5139S_to_utf8_substr(pTHX_ register regexp *prog)
5140{
33b8afdf 5141 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5142 SV* const sv = newSVsv(prog->float_substr);
5143 prog->float_utf8 = sv;
33b8afdf
JH
5144 sv_utf8_upgrade(sv);
5145 if (SvTAIL(prog->float_substr))
5146 SvTAIL_on(sv);
5147 if (prog->float_substr == prog->check_substr)
5148 prog->check_utf8 = sv;
5149 }
5150 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5151 SV* const sv = newSVsv(prog->anchored_substr);
5152 prog->anchored_utf8 = sv;
33b8afdf
JH
5153 sv_utf8_upgrade(sv);
5154 if (SvTAIL(prog->anchored_substr))
5155 SvTAIL_on(sv);
5156 if (prog->anchored_substr == prog->check_substr)
5157 prog->check_utf8 = sv;
5158 }
5159}
5160
5161STATIC void
5162S_to_byte_substr(pTHX_ register regexp *prog)
5163{
97aff369 5164 dVAR;
33b8afdf 5165 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5166 SV* sv = newSVsv(prog->float_utf8);
5167 prog->float_substr = sv;
33b8afdf
JH
5168 if (sv_utf8_downgrade(sv, TRUE)) {
5169 if (SvTAIL(prog->float_utf8))
5170 SvTAIL_on(sv);
5171 } else {
5172 SvREFCNT_dec(sv);
5173 prog->float_substr = sv = &PL_sv_undef;
5174 }
5175 if (prog->float_utf8 == prog->check_utf8)
5176 prog->check_substr = sv;
5177 }
5178 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5179 SV* sv = newSVsv(prog->anchored_utf8);
5180 prog->anchored_substr = sv;
33b8afdf
JH
5181 if (sv_utf8_downgrade(sv, TRUE)) {
5182 if (SvTAIL(prog->anchored_utf8))
5183 SvTAIL_on(sv);
5184 } else {
5185 SvREFCNT_dec(sv);
5186 prog->anchored_substr = sv = &PL_sv_undef;
5187 }
5188 if (prog->anchored_utf8 == prog->check_utf8)
5189 prog->check_substr = sv;
5190 }
5191}
66610fdd
RGS
5192
5193/*
5194 * Local variables:
5195 * c-indentation-style: bsd
5196 * c-basic-offset: 4
5197 * indent-tabs-mode: t
5198 * End:
5199 *
37442d52
RGS
5200 * ex: set ts=8 sts=4 sw=4 noet:
5201 */