This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a break; to the end of the SAVEt_COP_WARNINGS case, and don't try
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
32fc9b6a 110#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
097eb12c 216S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e 220 char *input;
b1ce53c5 221
a3621e74
YO
222 GET_RE_DEBUG_FLAGS_DECL;
223
b1ce53c5 224 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 225 i = SSPOPINT;
b1ce53c5
JH
226 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
227 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 228 input = (char *) SSPOPPTR;
a01268b5 229 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
230 *PL_reglastparen = SSPOPINT;
231 PL_regsize = SSPOPINT;
b1ce53c5
JH
232
233 /* Now restore the parentheses context. */
41123dfd
JH
234 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
235 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 236 I32 tmps;
097eb12c 237 U32 paren = (U32)SSPOPINT;
3280af22 238 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
239 PL_regstartp[paren] = SSPOPINT;
240 tmps = SSPOPINT;
3280af22
NIS
241 if (paren <= *PL_reglastparen)
242 PL_regendp[paren] = tmps;
a3621e74 243 DEBUG_EXECUTE_r(
c3464db5 244 PerlIO_printf(Perl_debug_log,
b900a521 245 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 246 (UV)paren, (IV)PL_regstartp[paren],
b900a521 247 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 248 (IV)PL_regendp[paren],
3280af22 249 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 250 );
a0d0e21e 251 }
a3621e74 252 DEBUG_EXECUTE_r(
4f639d21 253 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 254 PerlIO_printf(Perl_debug_log,
faccc32b 255 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 256 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
257 }
258 );
daf18116 259#if 1
dafc8851
JH
260 /* It would seem that the similar code in regtry()
261 * already takes care of this, and in fact it is in
262 * a better location to since this code can #if 0-ed out
263 * but the code in regtry() is needed or otherwise tests
264 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
265 * (as of patchlevel 7877) will fail. Then again,
266 * this code seems to be necessary or otherwise
267 * building DynaLoader will fail:
268 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
269 * --jhi */
097eb12c
AL
270 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
271 if (i > PL_regsize)
272 PL_regstartp[i] = -1;
273 PL_regendp[i] = -1;
a0d0e21e 274 }
dafc8851 275#endif
a0d0e21e
LW
276 return input;
277}
278
02db2b7b 279#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 280
95b24440 281#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
282 if (paren) { \
283 if (n) { \
284 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
285 PL_regendp[paren] = input - PL_bostr; \
286 } \
287 else \
288 PL_regendp[paren] = -1; \
289 } \
95b24440
DM
290 REGMATCH(next, where); \
291 if (result) \
29d1e993
HS
292 sayYES; \
293 if (paren && n) \
294 PL_regendp[paren] = -1; \
295}
296
297
a687059c 298/*
e50aee73 299 * pregexec and friends
a687059c
LW
300 */
301
302/*
c277df42 303 - pregexec - match a regexp against a string
a687059c 304 */
c277df42 305I32
864dbfa3 306Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 307 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
308/* strend: pointer to null at end of string */
309/* strbeg: real beginning of string */
310/* minend: end of match must be >=minend after stringarg. */
311/* nosave: For optimizations. */
312{
313 return
9041c2e3 314 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
315 nosave ? 0 : REXEC_COPY_STR);
316}
0f5d15d6 317
22e551b9 318
9041c2e3 319/*
cad2e5aa
JH
320 * Need to implement the following flags for reg_anch:
321 *
322 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
323 * USE_INTUIT_ML
324 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
325 * INTUIT_AUTORITATIVE_ML
326 * INTUIT_ONCE_NOML - Intuit can match in one location only.
327 * INTUIT_ONCE_ML
328 *
329 * Another flag for this function: SECOND_TIME (so that float substrs
330 * with giant delta may be not rechecked).
331 */
332
333/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
334
3f7c398e 335/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
336 Otherwise, only SvCUR(sv) is used to get strbeg. */
337
338/* XXXX We assume that strpos is strbeg unless sv. */
339
6eb5f6b9
JH
340/* XXXX Some places assume that there is a fixed substring.
341 An update may be needed if optimizer marks as "INTUITable"
342 RExen without fixed substrings. Similarly, it is assumed that
343 lengths of all the strings are no more than minlen, thus they
344 cannot come from lookahead.
345 (Or minlen should take into account lookahead.) */
346
2c2d71f5
JH
347/* A failure to find a constant substring means that there is no need to make
348 an expensive call to REx engine, thus we celebrate a failure. Similarly,
349 finding a substring too deep into the string means that less calls to
30944b6d
IZ
350 regtry() should be needed.
351
352 REx compiler's optimizer found 4 possible hints:
353 a) Anchored substring;
354 b) Fixed substring;
355 c) Whether we are anchored (beginning-of-line or \G);
356 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 357 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
358 string which does not contradict any of them.
359 */
2c2d71f5 360
6eb5f6b9
JH
361/* Most of decisions we do here should have been done at compile time.
362 The nodes of the REx which we used for the search should have been
363 deleted from the finite automaton. */
364
cad2e5aa
JH
365char *
366Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
367 char *strend, U32 flags, re_scream_pos_data *data)
368{
97aff369 369 dVAR;
b7953727 370 register I32 start_shift = 0;
cad2e5aa 371 /* Should be nonnegative! */
b7953727 372 register I32 end_shift = 0;
2c2d71f5
JH
373 register char *s;
374 register SV *check;
a1933d95 375 char *strbeg;
cad2e5aa 376 char *t;
a3b680e6 377 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 378 I32 ml_anch;
bd61b366
SS
379 register char *other_last = NULL; /* other substr checked before this */
380 char *check_at = NULL; /* check substr found at this pos */
1df70142 381 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 382#ifdef DEBUGGING
890ce7af
AL
383 const char * const i_strpos = strpos;
384 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 385#endif
a3621e74
YO
386
387 GET_RE_DEBUG_FLAGS_DECL;
388
a30b2f1f 389 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 390
b8d68ded 391 if (prog->reganch & ROPT_UTF8) {
a3621e74 392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
393 "UTF-8 regex...\n"));
394 PL_reg_flags |= RF_utf8;
395 }
396
a3621e74 397 DEBUG_EXECUTE_r({
1df70142 398 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
399 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
400 strpos;
1df70142 401 const int len = PL_reg_match_utf8 ?
b8d68ded 402 strlen(s) : strend - strpos;
2a782b5b
JH
403 if (!PL_colorset)
404 reginitcolors();
b8d68ded 405 if (PL_reg_match_utf8)
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 407 "UTF-8 target...\n"));
2a782b5b 408 PerlIO_printf(Perl_debug_log,
a0288114 409 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 410 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
411 prog->precomp,
412 PL_colors[1],
413 (strlen(prog->precomp) > 60 ? "..." : ""),
414 PL_colors[0],
415 (int)(len > 60 ? 60 : len),
416 s, PL_colors[1],
417 (len > 60 ? "..." : "")
418 );
419 });
cad2e5aa 420
c344f387
JH
421 /* CHR_DIST() would be more correct here but it makes things slow. */
422 if (prog->minlen > strend - strpos) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 424 "String too short... [re_intuit_start]\n"));
cad2e5aa 425 goto fail;
2c2d71f5 426 }
a1933d95 427 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 428 PL_regeol = strend;
33b8afdf
JH
429 if (do_utf8) {
430 if (!prog->check_utf8 && prog->check_substr)
431 to_utf8_substr(prog);
432 check = prog->check_utf8;
433 } else {
434 if (!prog->check_substr && prog->check_utf8)
435 to_byte_substr(prog);
436 check = prog->check_substr;
437 }
438 if (check == &PL_sv_undef) {
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
440 "Non-utf string cannot match utf check string\n"));
441 goto fail;
442 }
2c2d71f5 443 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
444 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
445 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 446 && !multiline ) ); /* Check after \n? */
cad2e5aa 447
7e25d62c
JH
448 if (!ml_anch) {
449 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
450 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 451 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
452 && sv && !SvROK(sv)
453 && (strpos != strbeg)) {
a3621e74 454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
455 goto fail;
456 }
457 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 458 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 459 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
460 I32 slen;
461
1aa99e6b 462 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
463 if (SvTAIL(check)) {
464 slen = SvCUR(check); /* >= 1 */
cad2e5aa 465
9041c2e3 466 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 467 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 469 goto fail_finish;
cad2e5aa
JH
470 }
471 /* Now should match s[0..slen-2] */
472 slen--;
3f7c398e 473 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 474 || (slen > 1
3f7c398e 475 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 476 report_neq:
a3621e74 477 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
478 goto fail_finish;
479 }
cad2e5aa 480 }
3f7c398e 481 else if (*SvPVX_const(check) != *s
653099ff 482 || ((slen = SvCUR(check)) > 1
3f7c398e 483 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 484 goto report_neq;
c315bfe8 485 check_at = s;
2c2d71f5 486 goto success_at_start;
7e25d62c 487 }
cad2e5aa 488 }
2c2d71f5 489 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 490 s = strpos;
2c2d71f5 491 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 494 if (!ml_anch) {
a3b680e6 495 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 496 - (SvTAIL(check) != 0);
a3b680e6 497 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
498
499 if (end_shift < eshift)
500 end_shift = eshift;
501 }
cad2e5aa 502 }
2c2d71f5 503 else { /* Can match at random position */
cad2e5aa
JH
504 ml_anch = 0;
505 s = strpos;
2c2d71f5
JH
506 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 /* Should be nonnegative! */
508 end_shift = prog->minlen - start_shift -
653099ff 509 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
510 }
511
2c2d71f5 512#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 513 if (end_shift < 0)
6bbae5e6 514 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
515#endif
516
2c2d71f5
JH
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 520 if (flags & REXEC_SCREAM) {
cad2e5aa 521 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 522 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 523
2c2d71f5
JH
524 if (PL_screamfirst[BmRARE(check)] >= 0
525 || ( BmRARE(check) == '\n'
526 && (BmPREVIOUS(check) == SvCUR(check) - 1)
527 && SvTAIL(check) ))
9041c2e3 528 s = screaminstr(sv, check,
2c2d71f5 529 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 530 else
2c2d71f5 531 goto fail_finish;
4addbd3b
HS
532 /* we may be pointing at the wrong string */
533 if (s && RX_MATCH_COPIED(prog))
3f7c398e 534 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
535 if (data)
536 *data->scream_olds = s;
537 }
f33976b4 538 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
539 s = fbm_instr((U8*)(s + start_shift),
540 (U8*)(strend - end_shift),
7fba1cd6 541 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 542 else
1aa99e6b
IH
543 s = fbm_instr(HOP3(s, start_shift, strend),
544 HOP3(strend, -end_shift, strbeg),
7fba1cd6 545 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
546
547 /* Update the count-of-usability, remove useless subpatterns,
548 unshift s. */
2c2d71f5 549
a0288114 550 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 551 (s ? "Found" : "Did not find"),
33b8afdf 552 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 553 PL_colors[0],
7b0972df 554 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 555 SvPVX_const(check),
2c2d71f5
JH
556 PL_colors[1], (SvTAIL(check) ? "$" : ""),
557 (s ? " at offset " : "...\n") ) );
558
559 if (!s)
560 goto fail_finish;
561
6eb5f6b9
JH
562 check_at = s;
563
2c2d71f5 564 /* Finish the diagnostic message */
a3621e74 565 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
566
567 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
568 Start with the other substr.
569 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 570 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
571 *always* match. Probably should be marked during compile...
572 Probably it is right to do no SCREAM here...
573 */
574
33b8afdf 575 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 576 /* Take into account the "other" substring. */
2c2d71f5
JH
577 /* XXXX May be hopelessly wrong for UTF... */
578 if (!other_last)
6eb5f6b9 579 other_last = strpos;
33b8afdf 580 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
581 do_other_anchored:
582 {
890ce7af
AL
583 char * const last = HOP3c(s, -start_shift, strbeg);
584 char *last1, *last2;
2c2d71f5 585 char *s1 = s;
33b8afdf 586 SV* must;
2c2d71f5 587
2c2d71f5
JH
588 t = s - prog->check_offset_max;
589 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 590 && (!do_utf8
1aa99e6b 591 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 592 && t > strpos)))
30944b6d 593 /* EMPTY */;
2c2d71f5
JH
594 else
595 t = strpos;
1aa99e6b 596 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
597 if (t < other_last) /* These positions already checked */
598 t = other_last;
1aa99e6b 599 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
600 if (last < last1)
601 last1 = last;
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* On end-of-str: see comment below. */
33b8afdf
JH
604 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
605 if (must == &PL_sv_undef) {
606 s = (char*)NULL;
a3621e74 607 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
608 }
609 else
610 s = fbm_instr(
611 (unsigned char*)t,
612 HOP3(HOP3(last1, prog->anchored_offset, strend)
613 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
614 must,
7fba1cd6 615 multiline ? FBMrf_MULTILINE : 0
33b8afdf 616 );
a3621e74 617 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 618 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
619 (s ? "Found" : "Contradicts"),
620 PL_colors[0],
33b8afdf
JH
621 (int)(SvCUR(must)
622 - (SvTAIL(must)!=0)),
3f7c398e 623 SvPVX_const(must),
33b8afdf 624 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
625 if (!s) {
626 if (last1 >= last2) {
a3621e74 627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
628 ", giving up...\n"));
629 goto fail_finish;
630 }
a3621e74 631 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 632 ", trying floating at offset %ld...\n",
1aa99e6b
IH
633 (long)(HOP3c(s1, 1, strend) - i_strpos)));
634 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
635 s = HOP3c(last, 1, strend);
2c2d71f5
JH
636 goto restart;
637 }
638 else {
a3621e74 639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 640 (long)(s - i_strpos)));
1aa99e6b
IH
641 t = HOP3c(s, -prog->anchored_offset, strbeg);
642 other_last = HOP3c(s, 1, strend);
30944b6d 643 s = s1;
2c2d71f5
JH
644 if (t == strpos)
645 goto try_at_start;
2c2d71f5
JH
646 goto try_at_offset;
647 }
30944b6d 648 }
2c2d71f5
JH
649 }
650 else { /* Take into account the floating substring. */
33b8afdf
JH
651 char *last, *last1;
652 char *s1 = s;
653 SV* must;
654
655 t = HOP3c(s, -start_shift, strbeg);
656 last1 = last =
657 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
658 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
659 last = HOP3c(t, prog->float_max_offset, strend);
660 s = HOP3c(t, prog->float_min_offset, strend);
661 if (s < other_last)
662 s = other_last;
2c2d71f5 663 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
664 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
665 /* fbm_instr() takes into account exact value of end-of-str
666 if the check is SvTAIL(ed). Since false positives are OK,
667 and end-of-str is not later than strend we are OK. */
668 if (must == &PL_sv_undef) {
669 s = (char*)NULL;
a3621e74 670 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
671 }
672 else
2c2d71f5 673 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
674 (unsigned char*)last + SvCUR(must)
675 - (SvTAIL(must)!=0),
7fba1cd6 676 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
678 (s ? "Found" : "Contradicts"),
679 PL_colors[0],
680 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 681 SvPVX_const(must),
33b8afdf
JH
682 PL_colors[1], (SvTAIL(must) ? "$" : "")));
683 if (!s) {
684 if (last1 == last) {
a3621e74 685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
686 ", giving up...\n"));
687 goto fail_finish;
2c2d71f5 688 }
a3621e74 689 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
690 ", trying anchored starting at offset %ld...\n",
691 (long)(s1 + 1 - i_strpos)));
692 other_last = last;
693 s = HOP3c(t, 1, strend);
694 goto restart;
695 }
696 else {
a3621e74 697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
698 (long)(s - i_strpos)));
699 other_last = s; /* Fix this later. --Hugo */
700 s = s1;
701 if (t == strpos)
702 goto try_at_start;
703 goto try_at_offset;
704 }
2c2d71f5 705 }
cad2e5aa 706 }
2c2d71f5
JH
707
708 t = s - prog->check_offset_max;
2c2d71f5 709 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 710 && (!do_utf8
1aa99e6b
IH
711 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
712 && t > strpos))) {
2c2d71f5
JH
713 /* Fixed substring is found far enough so that the match
714 cannot start at strpos. */
715 try_at_offset:
cad2e5aa 716 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
717 /* Eventually fbm_*() should handle this, but often
718 anchored_offset is not 0, so this check will not be wasted. */
719 /* XXXX In the code below we prefer to look for "^" even in
720 presence of anchored substrings. And we search even
721 beyond the found float position. These pessimizations
722 are historical artefacts only. */
723 find_anchor:
2c2d71f5 724 while (t < strend - prog->minlen) {
cad2e5aa 725 if (*t == '\n') {
4ee3650e 726 if (t < check_at - prog->check_offset_min) {
33b8afdf 727 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
728 /* Since we moved from the found position,
729 we definitely contradict the found anchored
30944b6d
IZ
730 substr. Due to the above check we do not
731 contradict "check" substr.
732 Thus we can arrive here only if check substr
733 is float. Redo checking for "other"=="fixed".
734 */
9041c2e3 735 strpos = t + 1;
a3621e74 736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 737 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
738 goto do_other_anchored;
739 }
4ee3650e
GS
740 /* We don't contradict the found floating substring. */
741 /* XXXX Why not check for STCLASS? */
cad2e5aa 742 s = t + 1;
a3621e74 743 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 744 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
745 goto set_useful;
746 }
4ee3650e
GS
747 /* Position contradicts check-string */
748 /* XXXX probably better to look for check-string
749 than for "\n", so one should lower the limit for t? */
a3621e74 750 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 751 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 752 other_last = strpos = s = t + 1;
cad2e5aa
JH
753 goto restart;
754 }
755 t++;
756 }
a3621e74 757 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 758 PL_colors[0], PL_colors[1]));
2c2d71f5 759 goto fail_finish;
cad2e5aa 760 }
f5952150 761 else {
a3621e74 762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 763 PL_colors[0], PL_colors[1]));
f5952150 764 }
cad2e5aa
JH
765 s = t;
766 set_useful:
33b8afdf 767 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
768 }
769 else {
f5952150 770 /* The found string does not prohibit matching at strpos,
2c2d71f5 771 - no optimization of calling REx engine can be performed,
f5952150
GS
772 unless it was an MBOL and we are not after MBOL,
773 or a future STCLASS check will fail this. */
2c2d71f5
JH
774 try_at_start:
775 /* Even in this situation we may use MBOL flag if strpos is offset
776 wrt the start of the string. */
05b4157f 777 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 778 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
779 /* May be due to an implicit anchor of m{.*foo} */
780 && !(prog->reganch & ROPT_IMPLICIT))
781 {
cad2e5aa
JH
782 t = strpos;
783 goto find_anchor;
784 }
a3621e74 785 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 786 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 787 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 788 );
2c2d71f5 789 success_at_start:
30944b6d 790 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
791 && (do_utf8 ? (
792 prog->check_utf8 /* Could be deleted already */
793 && --BmUSEFUL(prog->check_utf8) < 0
794 && (prog->check_utf8 == prog->float_utf8)
795 ) : (
796 prog->check_substr /* Could be deleted already */
797 && --BmUSEFUL(prog->check_substr) < 0
798 && (prog->check_substr == prog->float_substr)
799 )))
66e933ab 800 {
cad2e5aa 801 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 802 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
803 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
804 if (do_utf8 ? prog->check_substr : prog->check_utf8)
805 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
806 prog->check_substr = prog->check_utf8 = NULL; /* disable */
807 prog->float_substr = prog->float_utf8 = NULL; /* clear */
808 check = NULL; /* abort */
cad2e5aa 809 s = strpos;
3cf5c195
IZ
810 /* XXXX This is a remnant of the old implementation. It
811 looks wasteful, since now INTUIT can use many
6eb5f6b9 812 other heuristics. */
cad2e5aa
JH
813 prog->reganch &= ~RE_USE_INTUIT;
814 }
815 else
816 s = strpos;
817 }
818
6eb5f6b9
JH
819 /* Last resort... */
820 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
821 if (prog->regstclass) {
822 /* minlen == 0 is possible if regstclass is \b or \B,
823 and the fixed substr is ''$.
824 Since minlen is already taken into account, s+1 is before strend;
825 accidentally, minlen >= 1 guaranties no false positives at s + 1
826 even for \b or \B. But (minlen? 1 : 0) below assumes that
827 regstclass does not come from lookahead... */
828 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
829 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 830 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 831 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 832 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 833 : 1);
a3b680e6 834 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 835 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 836 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
837 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
838 cl_l, strend)
839 : strend);
6eb5f6b9
JH
840
841 t = s;
06b5626a 842 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
843 if (!s) {
844#ifdef DEBUGGING
cbbf8932 845 const char *what = NULL;
6eb5f6b9
JH
846#endif
847 if (endpos == strend) {
a3621e74 848 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
849 "Could not match STCLASS...\n") );
850 goto fail;
851 }
a3621e74 852 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 853 "This position contradicts STCLASS...\n") );
653099ff
GS
854 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
855 goto fail;
6eb5f6b9 856 /* Contradict one of substrings */
33b8afdf
JH
857 if (prog->anchored_substr || prog->anchored_utf8) {
858 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 859 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 860 hop_and_restart:
1aa99e6b 861 s = HOP3c(t, 1, strend);
66e933ab
GS
862 if (s + start_shift + end_shift > strend) {
863 /* XXXX Should be taken into account earlier? */
a3621e74 864 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
865 "Could not match STCLASS...\n") );
866 goto fail;
867 }
5e39e1e5
HS
868 if (!check)
869 goto giveup;
a3621e74 870 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 871 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
872 what, (long)(s + start_shift - i_strpos)) );
873 goto restart;
874 }
66e933ab 875 /* Have both, check_string is floating */
6eb5f6b9
JH
876 if (t + start_shift >= check_at) /* Contradicts floating=check */
877 goto retry_floating_check;
878 /* Recheck anchored substring, but not floating... */
9041c2e3 879 s = check_at;
5e39e1e5
HS
880 if (!check)
881 goto giveup;
a3621e74 882 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 883 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
884 (long)(other_last - i_strpos)) );
885 goto do_other_anchored;
886 }
60e71179
GS
887 /* Another way we could have checked stclass at the
888 current position only: */
889 if (ml_anch) {
890 s = t = t + 1;
5e39e1e5
HS
891 if (!check)
892 goto giveup;
a3621e74 893 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 894 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 895 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 896 goto try_at_offset;
66e933ab 897 }
33b8afdf 898 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 899 goto fail;
6eb5f6b9
JH
900 /* Check is floating subtring. */
901 retry_floating_check:
902 t = check_at - start_shift;
a3621e74 903 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
904 goto hop_and_restart;
905 }
b7953727 906 if (t != s) {
a3621e74 907 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 908 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
909 (long)(t - i_strpos), (long)(s - i_strpos))
910 );
911 }
912 else {
a3621e74 913 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
914 "Does not contradict STCLASS...\n");
915 );
916 }
6eb5f6b9 917 }
5e39e1e5 918 giveup:
a3621e74 919 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
920 PL_colors[4], (check ? "Guessed" : "Giving up"),
921 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 922 return s;
2c2d71f5
JH
923
924 fail_finish: /* Substring not found */
33b8afdf
JH
925 if (prog->check_substr || prog->check_utf8) /* could be removed already */
926 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 927 fail:
a3621e74 928 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 929 PL_colors[4], PL_colors[5]));
bd61b366 930 return NULL;
cad2e5aa 931}
9661b544 932
6eb5f6b9 933/* We know what class REx starts with. Try to find this position... */
3c3eec57 934STATIC char *
097eb12c 935S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
a687059c 936{
27da23d5 937 dVAR;
1df70142 938 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 939 char *m;
d8093b23 940 STRLEN ln;
5dab1207 941 STRLEN lnc;
078c425b 942 register STRLEN uskip;
d8093b23
G
943 unsigned int c1;
944 unsigned int c2;
6eb5f6b9
JH
945 char *e;
946 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 947 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 948
6eb5f6b9
JH
949 /* We know what class it must start with. */
950 switch (OP(c)) {
6eb5f6b9 951 case ANYOF:
388cc4de 952 if (do_utf8) {
078c425b 953 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
954 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
955 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
956 reginclass(prog, c, (U8*)s, 0, do_utf8) :
957 REGINCLASS(prog, c, (U8*)s)) {
388cc4de
HS
958 if (tmp && (norun || regtry(prog, s)))
959 goto got_it;
960 else
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
078c425b 965 s += uskip;
388cc4de
HS
966 }
967 }
968 else {
969 while (s < strend) {
970 STRLEN skip = 1;
971
32fc9b6a 972 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
973 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
974 /* The assignment of 2 is intentional:
975 * for the folded sharp s, the skip is 2. */
976 (skip = SHARP_S_SKIP))) {
977 if (tmp && (norun || regtry(prog, s)))
978 goto got_it;
979 else
980 tmp = doevery;
981 }
982 else
983 tmp = 1;
984 s += skip;
985 }
a0d0e21e 986 }
6eb5f6b9 987 break;
f33976b4
DB
988 case CANY:
989 while (s < strend) {
990 if (tmp && (norun || regtry(prog, s)))
991 goto got_it;
992 else
993 tmp = doevery;
994 s++;
995 }
996 break;
6eb5f6b9 997 case EXACTF:
5dab1207
NIS
998 m = STRING(c);
999 ln = STR_LEN(c); /* length to match in octets/bytes */
1000 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1001 if (UTF) {
a2a2844f 1002 STRLEN ulen1, ulen2;
5dab1207 1003 U8 *sm = (U8 *) m;
89ebb4a3
JH
1004 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1005 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1006 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1007
1008 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1009 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1010
89ebb4a3 1011 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1012 0, uniflags);
89ebb4a3 1013 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1014 0, uniflags);
5dab1207
NIS
1015 lnc = 0;
1016 while (sm < ((U8 *) m + ln)) {
1017 lnc++;
1018 sm += UTF8SKIP(sm);
1019 }
1aa99e6b
IH
1020 }
1021 else {
1022 c1 = *(U8*)m;
1023 c2 = PL_fold[c1];
1024 }
6eb5f6b9
JH
1025 goto do_exactf;
1026 case EXACTFL:
5dab1207
NIS
1027 m = STRING(c);
1028 ln = STR_LEN(c);
1029 lnc = (I32) ln;
d8093b23 1030 c1 = *(U8*)m;
6eb5f6b9
JH
1031 c2 = PL_fold_locale[c1];
1032 do_exactf:
db12adc6 1033 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1034
6eb5f6b9
JH
1035 if (norun && e < s)
1036 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1037
60a8b682
JH
1038 /* The idea in the EXACTF* cases is to first find the
1039 * first character of the EXACTF* node and then, if
1040 * necessary, case-insensitively compare the full
1041 * text of the node. The c1 and c2 are the first
1042 * characters (though in Unicode it gets a bit
1043 * more complicated because there are more cases
7f16dd3d
JH
1044 * than just upper and lower: one needs to use
1045 * the so-called folding case for case-insensitive
1046 * matching (called "loose matching" in Unicode).
1047 * ibcmp_utf8() will do just that. */
60a8b682 1048
1aa99e6b 1049 if (do_utf8) {
575cac57 1050 UV c, f;
89ebb4a3 1051 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1052 STRLEN len, foldlen;
4ad0818d 1053 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1054 if (c1 == c2) {
5dab1207
NIS
1055 /* Upper and lower of 1st char are equal -
1056 * probably not a "letter". */
1aa99e6b 1057 while (s <= e) {
89ebb4a3 1058 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1059 uniflags);
80aecb99
JH
1060 if ( c == c1
1061 && (ln == len ||
66423254 1062 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1063 m, (char **)0, ln, (bool)UTF))
55da9344 1064 && (norun || regtry(prog, s)) )
1aa99e6b 1065 goto got_it;
80aecb99 1066 else {
1df70142 1067 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1068 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1069 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1070 if ( f != c
1071 && (f == c1 || f == c2)
1072 && (ln == foldlen ||
66423254
JH
1073 !ibcmp_utf8((char *) foldbuf,
1074 (char **)0, foldlen, do_utf8,
d07ddd77 1075 m,
eb160463 1076 (char **)0, ln, (bool)UTF))
80aecb99
JH
1077 && (norun || regtry(prog, s)) )
1078 goto got_it;
1079 }
1aa99e6b
IH
1080 s += len;
1081 }
09091399
JH
1082 }
1083 else {
1aa99e6b 1084 while (s <= e) {
89ebb4a3 1085 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1086 uniflags);
80aecb99 1087
60a8b682 1088 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1089 * Note that not all the possible combinations
1090 * are handled here: some of them are handled
1091 * by the standard folding rules, and some of
1092 * them (the character class or ANYOF cases)
1093 * are handled during compiletime in
1094 * regexec.c:S_regclass(). */
880bd946
JH
1095 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1096 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1097 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1098
1099 if ( (c == c1 || c == c2)
1100 && (ln == len ||
66423254 1101 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1102 m, (char **)0, ln, (bool)UTF))
55da9344 1103 && (norun || regtry(prog, s)) )
1aa99e6b 1104 goto got_it;
80aecb99 1105 else {
1df70142 1106 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1107 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1108 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1109 if ( f != c
1110 && (f == c1 || f == c2)
1111 && (ln == foldlen ||
a6872d42 1112 !ibcmp_utf8((char *) foldbuf,
66423254 1113 (char **)0, foldlen, do_utf8,
d07ddd77 1114 m,
eb160463 1115 (char **)0, ln, (bool)UTF))
80aecb99
JH
1116 && (norun || regtry(prog, s)) )
1117 goto got_it;
1118 }
1aa99e6b
IH
1119 s += len;
1120 }
09091399 1121 }
1aa99e6b
IH
1122 }
1123 else {
1124 if (c1 == c2)
1125 while (s <= e) {
1126 if ( *(U8*)s == c1
1127 && (ln == 1 || !(OP(c) == EXACTF
1128 ? ibcmp(s, m, ln)
1129 : ibcmp_locale(s, m, ln)))
1130 && (norun || regtry(prog, s)) )
1131 goto got_it;
1132 s++;
1133 }
1134 else
1135 while (s <= e) {
1136 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1137 && (ln == 1 || !(OP(c) == EXACTF
1138 ? ibcmp(s, m, ln)
1139 : ibcmp_locale(s, m, ln)))
1140 && (norun || regtry(prog, s)) )
1141 goto got_it;
1142 s++;
1143 }
b3c9acc1
IZ
1144 }
1145 break;
bbce6d69 1146 case BOUNDL:
3280af22 1147 PL_reg_flags |= RF_tainted;
bbce6d69 1148 /* FALL THROUGH */
a0d0e21e 1149 case BOUND:
ffc61ed2 1150 if (do_utf8) {
12d33761 1151 if (s == PL_bostr)
ffc61ed2
JH
1152 tmp = '\n';
1153 else {
6136c704 1154 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1155 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1156 }
1157 tmp = ((OP(c) == BOUND ?
9041c2e3 1158 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1159 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1160 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1161 if (tmp == !(OP(c) == BOUND ?
3568d838 1162 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1163 isALNUM_LC_utf8((U8*)s)))
1164 {
1165 tmp = !tmp;
1166 if ((norun || regtry(prog, s)))
1167 goto got_it;
1168 }
078c425b 1169 s += uskip;
a687059c 1170 }
a0d0e21e 1171 }
667bb95a 1172 else {
12d33761 1173 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1174 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1175 while (s < strend) {
1176 if (tmp ==
1177 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1178 tmp = !tmp;
1179 if ((norun || regtry(prog, s)))
1180 goto got_it;
1181 }
1182 s++;
a0ed51b3 1183 }
a0ed51b3 1184 }
6eb5f6b9 1185 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1186 goto got_it;
1187 break;
bbce6d69 1188 case NBOUNDL:
3280af22 1189 PL_reg_flags |= RF_tainted;
bbce6d69 1190 /* FALL THROUGH */
a0d0e21e 1191 case NBOUND:
ffc61ed2 1192 if (do_utf8) {
12d33761 1193 if (s == PL_bostr)
ffc61ed2
JH
1194 tmp = '\n';
1195 else {
6136c704 1196 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1197 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1198 }
1199 tmp = ((OP(c) == NBOUND ?
9041c2e3 1200 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1201 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1202 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1203 if (tmp == !(OP(c) == NBOUND ?
3568d838 1204 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1205 isALNUM_LC_utf8((U8*)s)))
1206 tmp = !tmp;
1207 else if ((norun || regtry(prog, s)))
1208 goto got_it;
078c425b 1209 s += uskip;
ffc61ed2 1210 }
a0d0e21e 1211 }
667bb95a 1212 else {
12d33761 1213 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1214 tmp = ((OP(c) == NBOUND ?
1215 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1216 while (s < strend) {
1217 if (tmp ==
1218 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1219 tmp = !tmp;
1220 else if ((norun || regtry(prog, s)))
1221 goto got_it;
1222 s++;
1223 }
a0ed51b3 1224 }
6eb5f6b9 1225 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1226 goto got_it;
1227 break;
a0d0e21e 1228 case ALNUM:
ffc61ed2 1229 if (do_utf8) {
1a4fad37 1230 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1231 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1232 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
bbce6d69 1238 else
ffc61ed2 1239 tmp = 1;
078c425b 1240 s += uskip;
bbce6d69 1241 }
bbce6d69 1242 }
ffc61ed2
JH
1243 else {
1244 while (s < strend) {
1245 if (isALNUM(*s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1247 goto got_it;
1248 else
1249 tmp = doevery;
1250 }
a0ed51b3 1251 else
ffc61ed2
JH
1252 tmp = 1;
1253 s++;
a0ed51b3 1254 }
a0ed51b3
LW
1255 }
1256 break;
bbce6d69 1257 case ALNUML:
3280af22 1258 PL_reg_flags |= RF_tainted;
ffc61ed2 1259 if (do_utf8) {
078c425b 1260 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1261 if (isALNUM_LC_utf8((U8*)s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a687059c 1267 else
ffc61ed2 1268 tmp = 1;
078c425b 1269 s += uskip;
a0d0e21e 1270 }
a0d0e21e 1271 }
ffc61ed2
JH
1272 else {
1273 while (s < strend) {
1274 if (isALNUM_LC(*s)) {
1275 if (tmp && (norun || regtry(prog, s)))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
a0ed51b3 1280 else
ffc61ed2
JH
1281 tmp = 1;
1282 s++;
a0ed51b3 1283 }
a0ed51b3
LW
1284 }
1285 break;
a0d0e21e 1286 case NALNUM:
ffc61ed2 1287 if (do_utf8) {
1a4fad37 1288 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1289 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1290 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
bbce6d69 1296 else
ffc61ed2 1297 tmp = 1;
078c425b 1298 s += uskip;
bbce6d69 1299 }
bbce6d69 1300 }
ffc61ed2
JH
1301 else {
1302 while (s < strend) {
1303 if (!isALNUM(*s)) {
1304 if (tmp && (norun || regtry(prog, s)))
1305 goto got_it;
1306 else
1307 tmp = doevery;
1308 }
a0ed51b3 1309 else
ffc61ed2
JH
1310 tmp = 1;
1311 s++;
a0ed51b3 1312 }
a0ed51b3
LW
1313 }
1314 break;
bbce6d69 1315 case NALNUML:
3280af22 1316 PL_reg_flags |= RF_tainted;
ffc61ed2 1317 if (do_utf8) {
078c425b 1318 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1319 if (!isALNUM_LC_utf8((U8*)s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a687059c 1325 else
ffc61ed2 1326 tmp = 1;
078c425b 1327 s += uskip;
a687059c 1328 }
a0d0e21e 1329 }
ffc61ed2
JH
1330 else {
1331 while (s < strend) {
1332 if (!isALNUM_LC(*s)) {
1333 if (tmp && (norun || regtry(prog, s)))
1334 goto got_it;
1335 else
1336 tmp = doevery;
1337 }
a0ed51b3 1338 else
ffc61ed2
JH
1339 tmp = 1;
1340 s++;
a0ed51b3 1341 }
a0ed51b3
LW
1342 }
1343 break;
a0d0e21e 1344 case SPACE:
ffc61ed2 1345 if (do_utf8) {
1a4fad37 1346 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1347 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1348 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0d0e21e 1354 else
ffc61ed2 1355 tmp = 1;
078c425b 1356 s += uskip;
2304df62 1357 }
a0d0e21e 1358 }
ffc61ed2
JH
1359 else {
1360 while (s < strend) {
1361 if (isSPACE(*s)) {
1362 if (tmp && (norun || regtry(prog, s)))
1363 goto got_it;
1364 else
1365 tmp = doevery;
1366 }
a0ed51b3 1367 else
ffc61ed2
JH
1368 tmp = 1;
1369 s++;
a0ed51b3 1370 }
a0ed51b3
LW
1371 }
1372 break;
bbce6d69 1373 case SPACEL:
3280af22 1374 PL_reg_flags |= RF_tainted;
ffc61ed2 1375 if (do_utf8) {
078c425b 1376 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1377 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1378 if (tmp && (norun || regtry(prog, s)))
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
bbce6d69 1383 else
ffc61ed2 1384 tmp = 1;
078c425b 1385 s += uskip;
bbce6d69 1386 }
bbce6d69 1387 }
ffc61ed2
JH
1388 else {
1389 while (s < strend) {
1390 if (isSPACE_LC(*s)) {
1391 if (tmp && (norun || regtry(prog, s)))
1392 goto got_it;
1393 else
1394 tmp = doevery;
1395 }
a0ed51b3 1396 else
ffc61ed2
JH
1397 tmp = 1;
1398 s++;
a0ed51b3 1399 }
a0ed51b3
LW
1400 }
1401 break;
a0d0e21e 1402 case NSPACE:
ffc61ed2 1403 if (do_utf8) {
1a4fad37 1404 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1405 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1406 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1407 if (tmp && (norun || regtry(prog, s)))
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0d0e21e 1412 else
ffc61ed2 1413 tmp = 1;
078c425b 1414 s += uskip;
a687059c 1415 }
a0d0e21e 1416 }
ffc61ed2
JH
1417 else {
1418 while (s < strend) {
1419 if (!isSPACE(*s)) {
1420 if (tmp && (norun || regtry(prog, s)))
1421 goto got_it;
1422 else
1423 tmp = doevery;
1424 }
a0ed51b3 1425 else
ffc61ed2
JH
1426 tmp = 1;
1427 s++;
a0ed51b3 1428 }
a0ed51b3
LW
1429 }
1430 break;
bbce6d69 1431 case NSPACEL:
3280af22 1432 PL_reg_flags |= RF_tainted;
ffc61ed2 1433 if (do_utf8) {
078c425b 1434 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1435 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1436 if (tmp && (norun || regtry(prog, s)))
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
bbce6d69 1441 else
ffc61ed2 1442 tmp = 1;
078c425b 1443 s += uskip;
bbce6d69 1444 }
bbce6d69 1445 }
ffc61ed2
JH
1446 else {
1447 while (s < strend) {
1448 if (!isSPACE_LC(*s)) {
1449 if (tmp && (norun || regtry(prog, s)))
1450 goto got_it;
1451 else
1452 tmp = doevery;
1453 }
a0ed51b3 1454 else
ffc61ed2
JH
1455 tmp = 1;
1456 s++;
a0ed51b3 1457 }
a0ed51b3
LW
1458 }
1459 break;
a0d0e21e 1460 case DIGIT:
ffc61ed2 1461 if (do_utf8) {
1a4fad37 1462 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1463 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1464 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1465 if (tmp && (norun || regtry(prog, s)))
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0d0e21e 1470 else
ffc61ed2 1471 tmp = 1;
078c425b 1472 s += uskip;
2b69d0c2 1473 }
a0d0e21e 1474 }
ffc61ed2
JH
1475 else {
1476 while (s < strend) {
1477 if (isDIGIT(*s)) {
1478 if (tmp && (norun || regtry(prog, s)))
1479 goto got_it;
1480 else
1481 tmp = doevery;
1482 }
a0ed51b3 1483 else
ffc61ed2
JH
1484 tmp = 1;
1485 s++;
a0ed51b3 1486 }
a0ed51b3
LW
1487 }
1488 break;
b8c5462f
JH
1489 case DIGITL:
1490 PL_reg_flags |= RF_tainted;
ffc61ed2 1491 if (do_utf8) {
078c425b 1492 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1493 if (isDIGIT_LC_utf8((U8*)s)) {
1494 if (tmp && (norun || regtry(prog, s)))
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
b8c5462f 1499 else
ffc61ed2 1500 tmp = 1;
078c425b 1501 s += uskip;
b8c5462f 1502 }
b8c5462f 1503 }
ffc61ed2
JH
1504 else {
1505 while (s < strend) {
1506 if (isDIGIT_LC(*s)) {
1507 if (tmp && (norun || regtry(prog, s)))
1508 goto got_it;
1509 else
1510 tmp = doevery;
1511 }
b8c5462f 1512 else
ffc61ed2
JH
1513 tmp = 1;
1514 s++;
b8c5462f 1515 }
b8c5462f
JH
1516 }
1517 break;
a0d0e21e 1518 case NDIGIT:
ffc61ed2 1519 if (do_utf8) {
1a4fad37 1520 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1521 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1522 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1523 if (tmp && (norun || regtry(prog, s)))
1524 goto got_it;
1525 else
1526 tmp = doevery;
1527 }
a0d0e21e 1528 else
ffc61ed2 1529 tmp = 1;
078c425b 1530 s += uskip;
a687059c 1531 }
a0d0e21e 1532 }
ffc61ed2
JH
1533 else {
1534 while (s < strend) {
1535 if (!isDIGIT(*s)) {
1536 if (tmp && (norun || regtry(prog, s)))
1537 goto got_it;
1538 else
1539 tmp = doevery;
1540 }
a0ed51b3 1541 else
ffc61ed2
JH
1542 tmp = 1;
1543 s++;
a0ed51b3 1544 }
a0ed51b3
LW
1545 }
1546 break;
b8c5462f
JH
1547 case NDIGITL:
1548 PL_reg_flags |= RF_tainted;
ffc61ed2 1549 if (do_utf8) {
078c425b 1550 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1551 if (!isDIGIT_LC_utf8((U8*)s)) {
1552 if (tmp && (norun || regtry(prog, s)))
1553 goto got_it;
1554 else
1555 tmp = doevery;
1556 }
b8c5462f 1557 else
ffc61ed2 1558 tmp = 1;
078c425b 1559 s += uskip;
b8c5462f 1560 }
a0ed51b3 1561 }
ffc61ed2
JH
1562 else {
1563 while (s < strend) {
1564 if (!isDIGIT_LC(*s)) {
1565 if (tmp && (norun || regtry(prog, s)))
1566 goto got_it;
1567 else
1568 tmp = doevery;
1569 }
cf93c79d 1570 else
ffc61ed2
JH
1571 tmp = 1;
1572 s++;
b8c5462f 1573 }
b8c5462f
JH
1574 }
1575 break;
b3c9acc1 1576 default:
3c3eec57
GS
1577 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1578 break;
d6a28714 1579 }
6eb5f6b9
JH
1580 return 0;
1581 got_it:
1582 return s;
1583}
1584
1585/*
1586 - regexec_flags - match a regexp against a string
1587 */
1588I32
1589Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1590 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1591/* strend: pointer to null at end of string */
1592/* strbeg: real beginning of string */
1593/* minend: end of match must be >=minend after stringarg. */
1594/* data: May be used for some additional optimizations. */
1595/* nosave: For optimizations. */
1596{
97aff369 1597 dVAR;
6eb5f6b9
JH
1598 register char *s;
1599 register regnode *c;
1600 register char *startpos = stringarg;
6eb5f6b9
JH
1601 I32 minlen; /* must match at least this many chars */
1602 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1603 I32 end_shift = 0; /* Same for the end. */ /* CC */
1604 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1605 char *scream_olds = NULL;
6eb5f6b9 1606 SV* oreplsv = GvSV(PL_replgv);
1df70142 1607 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1608 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1609#ifdef DEBUGGING
6136c704
AL
1610 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1611 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1612#endif
a3621e74
YO
1613
1614 GET_RE_DEBUG_FLAGS_DECL;
1615
9d4ba2ae 1616 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1617
1618 /* Be paranoid... */
1619 if (prog == NULL || startpos == NULL) {
1620 Perl_croak(aTHX_ "NULL regexp parameter");
1621 return 0;
1622 }
1623
bac06658
JH
1624#ifdef DEBUGGING
1625 PL_regnarrate = DEBUG_r_TEST;
1626#endif
1627
1628 RX_MATCH_UTF8_set(prog, do_utf8);
1629
6eb5f6b9 1630 minlen = prog->minlen;
61a36c01 1631 if (strend - startpos < minlen) {
a3621e74 1632 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1633 "String too short [regexec_flags]...\n"));
1634 goto phooey;
1aa99e6b 1635 }
6eb5f6b9 1636
6eb5f6b9
JH
1637 /* Check validity of program. */
1638 if (UCHARAT(prog->program) != REG_MAGIC) {
1639 Perl_croak(aTHX_ "corrupted regexp program");
1640 }
1641
1642 PL_reg_flags = 0;
1643 PL_reg_eval_set = 0;
1644 PL_reg_maxiter = 0;
1645
1646 if (prog->reganch & ROPT_UTF8)
1647 PL_reg_flags |= RF_utf8;
1648
1649 /* Mark beginning of line for ^ and lookbehind. */
1650 PL_regbol = startpos;
1651 PL_bostr = strbeg;
1652 PL_reg_sv = sv;
1653
1654 /* Mark end of line for $ (and such) */
1655 PL_regeol = strend;
1656
1657 /* see how far we have to get to not match where we matched before */
1658 PL_regtill = startpos+minend;
1659
6eb5f6b9
JH
1660 /* If there is a "must appear" string, look for it. */
1661 s = startpos;
1662
1663 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1664 MAGIC *mg;
1665
1666 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1667 PL_reg_ganch = startpos;
1668 else if (sv && SvTYPE(sv) >= SVt_PVMG
1669 && SvMAGIC(sv)
14befaf4
DM
1670 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1671 && mg->mg_len >= 0) {
6eb5f6b9
JH
1672 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1673 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1674 if (s > PL_reg_ganch)
6eb5f6b9
JH
1675 goto phooey;
1676 s = PL_reg_ganch;
1677 }
1678 }
1679 else /* pos() not defined */
1680 PL_reg_ganch = strbeg;
1681 }
1682
a0714e2c 1683 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1684 re_scream_pos_data d;
1685
1686 d.scream_olds = &scream_olds;
1687 d.scream_pos = &scream_pos;
1688 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1689 if (!s) {
a3621e74 1690 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1691 goto phooey; /* not present */
3fa9c3d7 1692 }
6eb5f6b9
JH
1693 }
1694
a3621e74 1695 DEBUG_EXECUTE_r({
1df70142
AL
1696 const char * const s0 = UTF
1697 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1698 UNI_DISPLAY_REGEX)
1699 : prog->precomp;
1700 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1701 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1702 UNI_DISPLAY_REGEX) : startpos;
1df70142 1703 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1704 if (!PL_colorset)
1705 reginitcolors();
1706 PerlIO_printf(Perl_debug_log,
a0288114 1707 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1708 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1709 len0, len0, s0,
2a782b5b 1710 PL_colors[1],
9e55ce06 1711 len0 > 60 ? "..." : "",
2a782b5b 1712 PL_colors[0],
9e55ce06
JH
1713 (int)(len1 > 60 ? 60 : len1),
1714 s1, PL_colors[1],
1715 (len1 > 60 ? "..." : "")
2a782b5b
JH
1716 );
1717 });
6eb5f6b9
JH
1718
1719 /* Simplest case: anchored match need be tried only once. */
1720 /* [unless only anchor is BOL and multiline is set] */
1721 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1722 if (s == startpos && regtry(prog, startpos))
1723 goto got_it;
7fba1cd6 1724 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1725 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1726 {
1727 char *end;
1728
1729 if (minlen)
1730 dontbother = minlen - 1;
1aa99e6b 1731 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1732 /* for multiline we only have to try after newlines */
33b8afdf 1733 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1734 if (s == startpos)
1735 goto after_try;
1736 while (1) {
1737 if (regtry(prog, s))
1738 goto got_it;
1739 after_try:
1740 if (s >= end)
1741 goto phooey;
1742 if (prog->reganch & RE_USE_INTUIT) {
1743 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1744 if (!s)
1745 goto phooey;
1746 }
1747 else
1748 s++;
1749 }
1750 } else {
1751 if (s > startpos)
1752 s--;
1753 while (s < end) {
1754 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1755 if (regtry(prog, s))
1756 goto got_it;
1757 }
1758 }
1759 }
1760 }
1761 goto phooey;
1762 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1763 if (regtry(prog, PL_reg_ganch))
1764 goto got_it;
1765 goto phooey;
1766 }
1767
1768 /* Messy cases: unanchored match. */
33b8afdf 1769 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1770 /* we have /x+whatever/ */
1771 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1772 char ch;
bf93d4cc
GS
1773#ifdef DEBUGGING
1774 int did_match = 0;
1775#endif
33b8afdf
JH
1776 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1777 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1778 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1779
1aa99e6b 1780 if (do_utf8) {
6eb5f6b9
JH
1781 while (s < strend) {
1782 if (*s == ch) {
a3621e74 1783 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1784 if (regtry(prog, s)) goto got_it;
1785 s += UTF8SKIP(s);
1786 while (s < strend && *s == ch)
1787 s += UTF8SKIP(s);
1788 }
1789 s += UTF8SKIP(s);
1790 }
1791 }
1792 else {
1793 while (s < strend) {
1794 if (*s == ch) {
a3621e74 1795 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1796 if (regtry(prog, s)) goto got_it;
1797 s++;
1798 while (s < strend && *s == ch)
1799 s++;
1800 }
1801 s++;
1802 }
1803 }
a3621e74 1804 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1805 PerlIO_printf(Perl_debug_log,
b7953727
JH
1806 "Did not find anchored character...\n")
1807 );
6eb5f6b9 1808 }
a0714e2c
SS
1809 else if (prog->anchored_substr != NULL
1810 || prog->anchored_utf8 != NULL
1811 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1812 && prog->float_max_offset < strend - s)) {
1813 SV *must;
1814 I32 back_max;
1815 I32 back_min;
1816 char *last;
6eb5f6b9 1817 char *last1; /* Last position checked before */
bf93d4cc
GS
1818#ifdef DEBUGGING
1819 int did_match = 0;
1820#endif
33b8afdf
JH
1821 if (prog->anchored_substr || prog->anchored_utf8) {
1822 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1823 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1824 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1825 back_max = back_min = prog->anchored_offset;
1826 } else {
1827 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1828 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1829 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1830 back_max = prog->float_max_offset;
1831 back_min = prog->float_min_offset;
1832 }
1833 if (must == &PL_sv_undef)
1834 /* could not downgrade utf8 check substring, so must fail */
1835 goto phooey;
1836
1837 last = HOP3c(strend, /* Cannot start after this */
1838 -(I32)(CHR_SVLEN(must)
1839 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1840
1841 if (s > PL_bostr)
1842 last1 = HOPc(s, -1);
1843 else
1844 last1 = s - 1; /* bogus */
1845
a0288114 1846 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1847 check_substr==must. */
1848 scream_pos = -1;
1849 dontbother = end_shift;
1850 strend = HOPc(strend, -dontbother);
1851 while ( (s <= last) &&
9041c2e3 1852 ((flags & REXEC_SCREAM)
1aa99e6b 1853 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1854 end_shift, &scream_pos, 0))
1aa99e6b 1855 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1856 (unsigned char*)strend, must,
7fba1cd6 1857 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1858 /* we may be pointing at the wrong string */
1859 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1860 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1861 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1862 if (HOPc(s, -back_max) > last1) {
1863 last1 = HOPc(s, -back_min);
1864 s = HOPc(s, -back_max);
1865 }
1866 else {
52657f30 1867 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1868
1869 last1 = HOPc(s, -back_min);
52657f30 1870 s = t;
6eb5f6b9 1871 }
1aa99e6b 1872 if (do_utf8) {
6eb5f6b9
JH
1873 while (s <= last1) {
1874 if (regtry(prog, s))
1875 goto got_it;
1876 s += UTF8SKIP(s);
1877 }
1878 }
1879 else {
1880 while (s <= last1) {
1881 if (regtry(prog, s))
1882 goto got_it;
1883 s++;
1884 }
1885 }
1886 }
a3621e74 1887 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1888 PerlIO_printf(Perl_debug_log,
a0288114 1889 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1890 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1891 ? "anchored" : "floating"),
1892 PL_colors[0],
1893 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1894 SvPVX_const(must),
b7953727
JH
1895 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1896 );
6eb5f6b9
JH
1897 goto phooey;
1898 }
155aba94 1899 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1900 if (minlen) {
1901 I32 op = (U8)OP(prog->regstclass);
66e933ab 1902 /* don't bother with what can't match */
f14c76ed
RGS
1903 if (PL_regkind[op] != EXACT && op != CANY)
1904 strend = HOPc(strend, -(minlen - 1));
1905 }
a3621e74 1906 DEBUG_EXECUTE_r({
ffc61ed2 1907 SV *prop = sv_newmortal();
cfd0369c
NC
1908 const char *s0;
1909 const char *s1;
9e55ce06
JH
1910 int len0;
1911 int len1;
1912
32fc9b6a 1913 regprop(prog, prop, c);
9e55ce06 1914 s0 = UTF ?
3f7c398e 1915 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1916 UNI_DISPLAY_REGEX) :
cfd0369c 1917 SvPVX_const(prop);
9e55ce06
JH
1918 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1919 s1 = UTF ?
c728cb41 1920 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1921 len1 = UTF ? SvCUR(dsv1) : strend - s;
1922 PerlIO_printf(Perl_debug_log,
a0288114 1923 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1924 len0, len0, s0,
1925 len1, len1, s1);
ffc61ed2 1926 });
06b5626a 1927 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1928 goto got_it;
a3621e74 1929 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1930 }
1931 else {
1932 dontbother = 0;
a0714e2c 1933 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1934 /* Trim the end. */
d6a28714 1935 char *last;
33b8afdf
JH
1936 SV* float_real;
1937
1938 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1939 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1940 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1941
1942 if (flags & REXEC_SCREAM) {
33b8afdf 1943 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1944 end_shift, &scream_pos, 1); /* last one */
1945 if (!last)
ffc61ed2 1946 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1947 /* we may be pointing at the wrong string */
1948 else if (RX_MATCH_COPIED(prog))
3f7c398e 1949 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1950 }
d6a28714
JH
1951 else {
1952 STRLEN len;
cfd0369c 1953 const char * const little = SvPV_const(float_real, len);
d6a28714 1954
33b8afdf 1955 if (SvTAIL(float_real)) {
d6a28714
JH
1956 if (memEQ(strend - len + 1, little, len - 1))
1957 last = strend - len + 1;
7fba1cd6 1958 else if (!multiline)
9041c2e3 1959 last = memEQ(strend - len, little, len)
bd61b366 1960 ? strend - len : NULL;
b8c5462f 1961 else
d6a28714
JH
1962 goto find_last;
1963 } else {
1964 find_last:
9041c2e3 1965 if (len)
d6a28714 1966 last = rninstr(s, strend, little, little + len);
b8c5462f 1967 else
a0288114 1968 last = strend; /* matching "$" */
b8c5462f 1969 }
b8c5462f 1970 }
bf93d4cc 1971 if (last == NULL) {
a3621e74 1972 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1973 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1974 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1975 goto phooey; /* Should not happen! */
1976 }
d6a28714
JH
1977 dontbother = strend - last + prog->float_min_offset;
1978 }
1979 if (minlen && (dontbother < minlen))
1980 dontbother = minlen - 1;
1981 strend -= dontbother; /* this one's always in bytes! */
1982 /* We don't know much -- general case. */
1aa99e6b 1983 if (do_utf8) {
d6a28714
JH
1984 for (;;) {
1985 if (regtry(prog, s))
1986 goto got_it;
1987 if (s >= strend)
1988 break;
b8c5462f 1989 s += UTF8SKIP(s);
d6a28714
JH
1990 };
1991 }
1992 else {
1993 do {
1994 if (regtry(prog, s))
1995 goto got_it;
1996 } while (s++ < strend);
1997 }
1998 }
1999
2000 /* Failure. */
2001 goto phooey;
2002
2003got_it:
2004 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2005
2006 if (PL_reg_eval_set) {
2007 /* Preserve the current value of $^R */
2008 if (oreplsv != GvSV(PL_replgv))
2009 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2010 restored, the value remains
2011 the same. */
4f639d21 2012 restore_pos(aTHX_ prog);
d6a28714
JH
2013 }
2014
2015 /* make sure $`, $&, $', and $digit will work later */
2016 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2017 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2018 if (flags & REXEC_COPY_STR) {
2019 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2020#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2021 if ((SvIsCOW(sv)
2022 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2023 if (DEBUG_C_TEST) {
2024 PerlIO_printf(Perl_debug_log,
2025 "Copy on write: regexp capture, type %d\n",
2026 (int) SvTYPE(sv));
2027 }
2028 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2029 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2030 assert (SvPOKp(prog->saved_copy));
2031 } else
2032#endif
2033 {
2034 RX_MATCH_COPIED_on(prog);
2035 s = savepvn(strbeg, i);
2036 prog->subbeg = s;
2037 }
d6a28714 2038 prog->sublen = i;
d6a28714
JH
2039 }
2040 else {
2041 prog->subbeg = strbeg;
2042 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2043 }
2044 }
9041c2e3 2045
d6a28714
JH
2046 return 1;
2047
2048phooey:
a3621e74 2049 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2050 PL_colors[4], PL_colors[5]));
d6a28714 2051 if (PL_reg_eval_set)
4f639d21 2052 restore_pos(aTHX_ prog);
d6a28714
JH
2053 return 0;
2054}
2055
2056/*
2057 - regtry - try match at specific point
2058 */
2059STATIC I32 /* 0 failure, 1 success */
2060S_regtry(pTHX_ regexp *prog, char *startpos)
2061{
97aff369 2062 dVAR;
d6a28714
JH
2063 register I32 *sp;
2064 register I32 *ep;
2065 CHECKPOINT lastcp;
a3621e74 2066 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2067
02db2b7b
IZ
2068#ifdef DEBUGGING
2069 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2070#endif
d6a28714
JH
2071 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2072 MAGIC *mg;
2073
2074 PL_reg_eval_set = RS_init;
a3621e74 2075 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2076 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2077 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2078 ));
e8347627 2079 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2080 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2081 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2082 SAVETMPS;
2083 /* Apparently this is not needed, judging by wantarray. */
e8347627 2084 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2085 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2086
2087 if (PL_reg_sv) {
2088 /* Make $_ available to executed code. */
2089 if (PL_reg_sv != DEFSV) {
59f00321 2090 SAVE_DEFSV;
d6a28714 2091 DEFSV = PL_reg_sv;
b8c5462f 2092 }
d6a28714 2093
9041c2e3 2094 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2095 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2096 /* prepare for quick setting of pos */
d300d9fa
NC
2097#ifdef PERL_OLD_COPY_ON_WRITE
2098 if (SvIsCOW(sv))
2099 sv_force_normal_flags(sv, 0);
2100#endif
2101 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2102 &PL_vtbl_mglob, NULL, 0);
d6a28714 2103 mg->mg_len = -1;
b8c5462f 2104 }
d6a28714
JH
2105 PL_reg_magic = mg;
2106 PL_reg_oldpos = mg->mg_len;
4f639d21 2107 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2108 }
09687e5a 2109 if (!PL_reg_curpm) {
a02a5408 2110 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2111#ifdef USE_ITHREADS
2112 {
2113 SV* repointer = newSViv(0);
577e12cc 2114 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2115 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2116 av_push(PL_regex_padav,repointer);
2117 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2118 PL_regex_pad = AvARRAY(PL_regex_padav);
2119 }
2120#endif
2121 }
aaa362c4 2122 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2123 PL_reg_oldcurpm = PL_curpm;
2124 PL_curpm = PL_reg_curpm;
2125 if (RX_MATCH_COPIED(prog)) {
2126 /* Here is a serious problem: we cannot rewrite subbeg,
2127 since it may be needed if this match fails. Thus
2128 $` inside (?{}) could fail... */
2129 PL_reg_oldsaved = prog->subbeg;
2130 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2131#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2132 PL_nrs = prog->saved_copy;
2133#endif
d6a28714
JH
2134 RX_MATCH_COPIED_off(prog);
2135 }
2136 else
bd61b366 2137 PL_reg_oldsaved = NULL;
d6a28714
JH
2138 prog->subbeg = PL_bostr;
2139 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2140 }
973dddac 2141 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2142 PL_reginput = startpos;
2143 PL_regstartp = prog->startp;
2144 PL_regendp = prog->endp;
2145 PL_reglastparen = &prog->lastparen;
a01268b5 2146 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2147 prog->lastparen = 0;
03994de8 2148 prog->lastcloseparen = 0;
d6a28714 2149 PL_regsize = 0;
a3621e74 2150 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2151 if (PL_reg_start_tmpl <= prog->nparens) {
2152 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2153 if(PL_reg_start_tmp)
2154 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2155 else
a02a5408 2156 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2157 }
2158
2159 /* XXXX What this code is doing here?!!! There should be no need
2160 to do this again and again, PL_reglastparen should take care of
3dd2943c 2161 this! --ilya*/
dafc8851
JH
2162
2163 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2164 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2165 * PL_reglastparen), is not needed at all by the test suite
2166 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2167 * enough, for building DynaLoader, or otherwise this
2168 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2169 * will happen. Meanwhile, this code *is* needed for the
2170 * above-mentioned test suite tests to succeed. The common theme
2171 * on those tests seems to be returning null fields from matches.
2172 * --jhi */
dafc8851 2173#if 1
d6a28714
JH
2174 sp = prog->startp;
2175 ep = prog->endp;
2176 if (prog->nparens) {
097eb12c 2177 register I32 i;
eb160463 2178 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2179 *++sp = -1;
2180 *++ep = -1;
2181 }
2182 }
dafc8851 2183#endif
02db2b7b 2184 REGCP_SET(lastcp);
4f639d21 2185 if (regmatch(prog, prog->program + 1)) {
d6a28714
JH
2186 prog->endp[0] = PL_reginput - PL_bostr;
2187 return 1;
2188 }
02db2b7b 2189 REGCP_UNWIND(lastcp);
d6a28714
JH
2190 return 0;
2191}
2192
02db2b7b
IZ
2193#define RE_UNWIND_BRANCH 1
2194#define RE_UNWIND_BRANCHJ 2
2195
2196union re_unwind_t;
2197
2198typedef struct { /* XX: makes sense to enlarge it... */
2199 I32 type;
2200 I32 prev;
2201 CHECKPOINT lastcp;
2202} re_unwind_generic_t;
2203
2204typedef struct {
2205 I32 type;
2206 I32 prev;
2207 CHECKPOINT lastcp;
2208 I32 lastparen;
2209 regnode *next;
2210 char *locinput;
2211 I32 nextchr;
3a2830be 2212 int minmod;
02db2b7b
IZ
2213#ifdef DEBUGGING
2214 int regindent;
2215#endif
2216} re_unwind_branch_t;
2217
2218typedef union re_unwind_t {
2219 I32 type;
2220 re_unwind_generic_t generic;
2221 re_unwind_branch_t branch;
2222} re_unwind_t;
2223
8ba1375e
MJD
2224#define sayYES goto yes
2225#define sayNO goto no
e0f9d4a8 2226#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2227#define sayYES_FINAL goto yes_final
2228#define sayYES_LOUD goto yes_loud
2229#define sayNO_FINAL goto no_final
2230#define sayNO_SILENT goto do_no
2231#define saySAME(x) if (x) goto yes; else goto no
2232
3ab3c9b4
HS
2233#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2234#define POSCACHE_SEEN 1 /* we know what we're caching */
2235#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2236#define CACHEsayYES STMT_START { \
d8319b27 2237 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2238 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2239 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2240 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2241 /* cache records failure, but this is success */ \
2242 DEBUG_r( \
2243 PerlIO_printf(Perl_debug_log, \
2244 "%*s (remove success from failure cache)\n", \
2245 REPORT_CODE_OFF+PL_regindent*2, "") \
2246 ); \
d8319b27 2247 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2248 } \
2249 } \
2250 sayYES; \
2251} STMT_END
2252#define CACHEsayNO STMT_START { \
d8319b27 2253 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2254 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2255 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2256 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 /* cache records success, but this is failure */ \
2258 DEBUG_r( \
2259 PerlIO_printf(Perl_debug_log, \
2260 "%*s (remove failure from success cache)\n", \
2261 REPORT_CODE_OFF+PL_regindent*2, "") \
2262 ); \
d8319b27 2263 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2264 } \
2265 } \
2266 sayNO; \
2267} STMT_END
2268
a3621e74
YO
2269/* this is used to determine how far from the left messages like
2270 'failed...' are printed. Currently 29 makes these messages line
2271 up with the opcode they refer to. Earlier perls used 25 which
2272 left these messages outdented making reviewing a debug output
2273 quite difficult.
2274*/
2275#define REPORT_CODE_OFF 29
2276
2277
2278/* Make sure there is a test for this +1 options in re_tests */
2279#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2280
5d9a96ca
DM
2281/* grab a new slab and return the first slot in it */
2282
2283STATIC regmatch_state *
2284S_push_slab(pTHX)
2285{
2286 regmatch_slab *s = PL_regmatch_slab->next;
2287 if (!s) {
2288 Newx(s, 1, regmatch_slab);
2289 s->prev = PL_regmatch_slab;
2290 s->next = NULL;
2291 PL_regmatch_slab->next = s;
2292 }
2293 PL_regmatch_slab = s;
2294 return &s->states[0];
2295}
5b47454d 2296
95b24440
DM
2297/* simulate a recursive call to regmatch */
2298
2299#define REGMATCH(ns, where) \
5d9a96ca
DM
2300 st->scan = scan; \
2301 scan = (ns); \
2302 st->resume_state = resume_##where; \
95b24440
DM
2303 goto start_recurse; \
2304 resume_point_##where:
2305
aa283a38
DM
2306
2307/* push a new regex state. Set newst to point to it */
2308
2309#define PUSH_STATE(newst, resume) \
2310 depth++; \
2311 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2312 st->scan = scan; \
2313 st->next = next; \
2314 st->n = n; \
2315 st->locinput = locinput; \
2316 st->resume_state = resume; \
2317 newst = st+1; \
2318 if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
2319 newst = S_push_slab(aTHX); \
2320 PL_regmatch_state = newst; \
2321 newst->cc = 0; \
2322 newst->minmod = 0; \
2323 newst->sw = 0; \
2324 newst->logical = 0; \
2325 newst->unwind = 0; \
2326 locinput = PL_reginput; \
2327 nextchr = UCHARAT(locinput);
2328
2329#define POP_STATE \
2330 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2331 depth--; \
2332 st--; \
2333 if (st < &PL_regmatch_slab->states[0]) { \
2334 PL_regmatch_slab = PL_regmatch_slab->prev; \
2335 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
2336 } \
2337 PL_regmatch_state = st; \
2338 scan = st->scan; \
2339 next = st->next; \
2340 n = st->n; \
2341 locinput = st->locinput; \
2342 nextchr = UCHARAT(locinput);
2343
d6a28714
JH
2344/*
2345 - regmatch - main matching routine
2346 *
2347 * Conceptually the strategy is simple: check to see whether the current
2348 * node matches, call self recursively to see whether the rest matches,
2349 * and then act accordingly. In practice we make some effort to avoid
2350 * recursion, in particular by going through "ordinary" nodes (that don't
2351 * need to know whether the rest of the match failed) by a loop instead of
2352 * by recursion.
2353 */
2354/* [lwall] I've hoisted the register declarations to the outer block in order to
2355 * maybe save a little bit of pushing and popping on the stack. It also takes
2356 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2357 *
2358 * This function used to be heavily recursive, but since this had the
2359 * effect of blowing the CPU stack on complex regexes, it has been
2360 * restructured to be iterative, and to save state onto the heap rather
2361 * than the stack. Essentially whereever regmatch() used to be called, it
2362 * pushes the current state, notes where to return, then jumps back into
2363 * the main loop.
2364 *
2365 * Originally the structure of this function used to look something like
2366
2367 S_regmatch() {
2368 int a = 1, b = 2;
2369 ...
2370 while (scan != NULL) {
5d9a96ca 2371 a++; // do stuff with a and b
95b24440
DM
2372 ...
2373 switch (OP(scan)) {
2374 case FOO: {
2375 int local = 3;
2376 ...
2377 if (regmatch(...)) // recurse
2378 goto yes;
2379 }
2380 ...
2381 }
2382 }
2383 yes:
2384 return 1;
2385 }
2386
2387 * Now it looks something like this:
2388
5d9a96ca 2389 typedef struct {
95b24440
DM
2390 int a, b, local;
2391 int resume_state;
5d9a96ca 2392 } regmatch_state;
95b24440
DM
2393
2394 S_regmatch() {
5d9a96ca
DM
2395 regmatch_state *st = new();
2396 int depth=0;
2397 st->a++; // do stuff with a and b
95b24440
DM
2398 ...
2399 while (scan != NULL) {
2400 ...
2401 switch (OP(scan)) {
2402 case FOO: {
5d9a96ca 2403 st->local = 3;
95b24440 2404 ...
5d9a96ca
DM
2405 st->scan = scan;
2406 scan = ...;
2407 st->resume_state = resume_FOO;
2408 goto start_recurse; // recurse
95b24440 2409
5d9a96ca
DM
2410 resume_point_FOO:
2411 if (result)
95b24440
DM
2412 goto yes;
2413 }
2414 ...
2415 }
5d9a96ca
DM
2416 start_recurse:
2417 st = new(); push a new state
2418 st->a = 1; st->b = 2;
2419 depth++;
95b24440 2420 }
5d9a96ca 2421 yes:
95b24440 2422 result = 1;
5d9a96ca
DM
2423 if (depth--) {
2424 st = pop();
95b24440
DM
2425 switch (resume_state) {
2426 case resume_FOO:
2427 goto resume_point_FOO;
2428 ...
2429 }
2430 }
2431 return result
2432 }
2433
2434 * WARNING: this means that any line in this function that contains a
2435 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2436 * regmatch() using gotos instead. Thus the values of any local variables
2437 * not saved in the regmatch_state structure will have been lost when
2438 * execution resumes on the next line .
5d9a96ca
DM
2439 *
2440 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2441 * PL_regmatch_state always points to the currently active state, and
2442 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2443 * The first time regmatch is called, the first slab is allocated, and is
2444 * never freed until interpreter desctruction. When the slab is full,
2445 * a new one is allocated chained to the end. At exit from regmatch, slabs
2446 * allocated since entry are freed.
d6a28714 2447 */
95b24440
DM
2448
2449
d6a28714 2450STATIC I32 /* 0 failure, 1 success */
4f639d21 2451S_regmatch(pTHX_ regexp *rex, regnode *prog)
d6a28714 2452{
27da23d5 2453 dVAR;
95b24440 2454 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2455 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2456
5d9a96ca
DM
2457 regmatch_slab *orig_slab;
2458 regmatch_state *orig_state;
a3621e74 2459
5d9a96ca
DM
2460 /* the current state. This is a cached copy of PL_regmatch_state */
2461 register regmatch_state *st;
95b24440 2462
5d9a96ca
DM
2463 /* cache heavy used fields of st in registers */
2464 register regnode *scan;
2465 register regnode *next;
2466 register I32 n = 0; /* initialize to shut up compiler warning */
2467 register char *locinput = PL_reginput;
95b24440 2468
5d9a96ca
DM
2469 /* these variables are NOT saved during a recusive RFEGMATCH: */
2470 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2471 bool result; /* return value of S_regmatch */
2472 regnode *inner; /* Next node in internal branch. */
2473 int depth = 0; /* depth of recursion */
aa283a38
DM
2474 regmatch_state *newst; /* when pushing a state, this is the new one */
2475 regmatch_state *cur_eval = NULL; /* most recent (??{}) state */
95b24440
DM
2476
2477#ifdef DEBUGGING
ab74612d 2478 SV *re_debug_flags = NULL;
a3621e74 2479 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2480 PL_regindent++;
2481#endif
2482
5d9a96ca
DM
2483 /* on first ever call to regmatch, allocate first slab */
2484 if (!PL_regmatch_slab) {
2485 Newx(PL_regmatch_slab, 1, regmatch_slab);
2486 PL_regmatch_slab->prev = NULL;
2487 PL_regmatch_slab->next = NULL;
2488 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2489 }
2490
2491 /* remember current high-water mark for exit */
2492 /* XXX this should be done with SAVE* instead */
2493 orig_slab = PL_regmatch_slab;
2494 orig_state = PL_regmatch_state;
2495
2496 /* grab next free state slot */
2497 st = ++PL_regmatch_state;
2498 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2499 st = PL_regmatch_state = S_push_slab(aTHX);
2500
2501 st->minmod = 0;
2502 st->sw = 0;
2503 st->logical = 0;
2504 st->unwind = 0;
2505 st->cc = NULL;
d6a28714
JH
2506 /* Note that nextchr is a byte even in UTF */
2507 nextchr = UCHARAT(locinput);
2508 scan = prog;
2509 while (scan != NULL) {
8ba1375e 2510
a3621e74 2511 DEBUG_EXECUTE_r( {
6136c704 2512 SV * const prop = sv_newmortal();
1df70142
AL
2513 const int docolor = *PL_colors[0];
2514 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2515 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2516 /* The part of the string before starttry has one color
2517 (pref0_len chars), between starttry and current
2518 position another one (pref_len - pref0_len chars),
2519 after the current position the third one.
2520 We assume that pref0_len <= pref_len, otherwise we
2521 decrease pref0_len. */
9041c2e3 2522 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2523 ? (5 + taill) - l : locinput - PL_bostr;
2524 int pref0_len;
d6a28714 2525
df1ffd02 2526 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2527 pref_len++;
2528 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2529 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2530 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2531 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2532 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2533 l--;
d6a28714
JH
2534 if (pref0_len < 0)
2535 pref0_len = 0;
2536 if (pref0_len > pref_len)
2537 pref0_len = pref_len;
32fc9b6a 2538 regprop(rex, prop, scan);
2a782b5b 2539 {
1df70142 2540 const char * const s0 =
f14c76ed 2541 do_utf8 && OP(scan) != CANY ?
95b24440 2542 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2543 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2544 locinput - pref_len;
1df70142
AL
2545 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2546 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2547 pv_uni_display(PERL_DEBUG_PAD(1),
2548 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2549 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2550 locinput - pref_len + pref0_len;
1df70142
AL
2551 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2552 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2553 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2554 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2555 locinput;
1df70142 2556 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2557 PerlIO_printf(Perl_debug_log,
2558 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2559 (IV)(locinput - PL_bostr),
2560 PL_colors[4],
2561 len0, s0,
2562 PL_colors[5],
2563 PL_colors[2],
2564 len1, s1,
2565 PL_colors[3],
2566 (docolor ? "" : "> <"),
2567 PL_colors[0],
2568 len2, s2,
2569 PL_colors[1],
2570 15 - l - pref_len + 1,
2571 "",
4f639d21 2572 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2573 SvPVX_const(prop));
2a782b5b
JH
2574 }
2575 });
d6a28714
JH
2576
2577 next = scan + NEXT_OFF(scan);
2578 if (next == scan)
2579 next = NULL;
2580
2581 switch (OP(scan)) {
2582 case BOL:
7fba1cd6 2583 if (locinput == PL_bostr)
d6a28714
JH
2584 {
2585 /* regtill = regbol; */
b8c5462f
JH
2586 break;
2587 }
d6a28714
JH
2588 sayNO;
2589 case MBOL:
12d33761
HS
2590 if (locinput == PL_bostr ||
2591 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2592 {
b8c5462f
JH
2593 break;
2594 }
d6a28714
JH
2595 sayNO;
2596 case SBOL:
c2a73568 2597 if (locinput == PL_bostr)
b8c5462f 2598 break;
d6a28714
JH
2599 sayNO;
2600 case GPOS:
2601 if (locinput == PL_reg_ganch)
2602 break;
2603 sayNO;
2604 case EOL:
d6a28714
JH
2605 goto seol;
2606 case MEOL:
d6a28714 2607 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2608 sayNO;
b8c5462f 2609 break;
d6a28714
JH
2610 case SEOL:
2611 seol:
2612 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2613 sayNO;
d6a28714 2614 if (PL_regeol - locinput > 1)
b8c5462f 2615 sayNO;
b8c5462f 2616 break;
d6a28714
JH
2617 case EOS:
2618 if (PL_regeol != locinput)
b8c5462f 2619 sayNO;
d6a28714 2620 break;
ffc61ed2 2621 case SANY:
d6a28714 2622 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2623 sayNO;
f33976b4
DB
2624 if (do_utf8) {
2625 locinput += PL_utf8skip[nextchr];
2626 if (locinput > PL_regeol)
2627 sayNO;
2628 nextchr = UCHARAT(locinput);
2629 }
2630 else
2631 nextchr = UCHARAT(++locinput);
2632 break;
2633 case CANY:
2634 if (!nextchr && locinput >= PL_regeol)
2635 sayNO;
b8c5462f 2636 nextchr = UCHARAT(++locinput);
a0d0e21e 2637 break;
ffc61ed2 2638 case REG_ANY:
1aa99e6b
IH
2639 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2640 sayNO;
2641 if (do_utf8) {
b8c5462f 2642 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2643 if (locinput > PL_regeol)
2644 sayNO;
a0ed51b3 2645 nextchr = UCHARAT(locinput);
a0ed51b3 2646 }
1aa99e6b
IH
2647 else
2648 nextchr = UCHARAT(++locinput);
a0ed51b3 2649 break;
a3621e74
YO
2650
2651
2652
2653 /*
2654 traverse the TRIE keeping track of all accepting states
2655 we transition through until we get to a failing node.
2656
a3621e74
YO
2657
2658 */
5b47454d 2659 case TRIE:
a3621e74
YO
2660 case TRIEF:
2661 case TRIEFL:
2662 {
a3621e74
YO
2663 U8 *uc = ( U8* )locinput;
2664 U32 state = 1;
2665 U16 charid = 0;
2666 U32 base = 0;
2667 UV uvc = 0;
2668 STRLEN len = 0;
2669 STRLEN foldlen = 0;
a3621e74
YO
2670 U8 *uscan = (U8*)NULL;
2671 STRLEN bufflen=0;
95b24440 2672 SV *sv_accept_buff = NULL;
5b47454d
DM
2673 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2674 trie_type = do_utf8 ?
2675 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2676 : trie_plain;
2677
7087a21c
NC
2678 /* what trie are we using right now */
2679 reg_trie_data *trie
32fc9b6a 2680 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
d8319b27 2681 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2682 result = 0;
a3621e74
YO
2683
2684 while ( state && uc <= (U8*)PL_regeol ) {
2685
5b47454d 2686 if (trie->states[ state ].wordnum) {
d8319b27 2687 if (!st->u.trie.accepted ) {
5b47454d
DM
2688 ENTER;
2689 SAVETMPS;
2690 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2691 sv_accept_buff=newSV(bufflen *
2692 sizeof(reg_trie_accepted) - 1);
2693 SvCUR_set(sv_accept_buff,
2694 sizeof(reg_trie_accepted));
2695 SvPOK_on(sv_accept_buff);
2696 sv_2mortal(sv_accept_buff);
d8319b27 2697 st->u.trie.accept_buff =
5b47454d
DM
2698 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2699 }
2700 else {
d8319b27 2701 if (st->u.trie.accepted >= bufflen) {
5b47454d 2702 bufflen *= 2;
d8319b27 2703 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2704 SvGROW(sv_accept_buff,
2705 bufflen * sizeof(reg_trie_accepted));
2706 }
2707 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2708 + sizeof(reg_trie_accepted));
2709 }
d8319b27
DM
2710 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2711 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2712 ++st->u.trie.accepted;
5b47454d 2713 }
a3621e74
YO
2714
2715 base = trie->states[ state ].trans.base;
2716
2717 DEBUG_TRIE_EXECUTE_r(
2718 PerlIO_printf( Perl_debug_log,
e4584336 2719 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2720 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2721 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2722 );
2723
2724 if ( base ) {
5b47454d
DM
2725 switch (trie_type) {
2726 case trie_uft8_fold:
a3621e74
YO
2727 if ( foldlen>0 ) {
2728 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2729 foldlen -= len;
2730 uscan += len;
2731 len=0;
2732 } else {
1df70142 2733 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2734 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2735 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2736 foldlen -= UNISKIP( uvc );
2737 uscan = foldbuf + UNISKIP( uvc );
2738 }
5b47454d
DM
2739 break;
2740 case trie_utf8:
2741 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2742 &len, uniflags );
2743 break;
2744 case trie_plain:
e4584336 2745 uvc = (UV)*uc;
a3621e74
YO
2746 len = 1;
2747 }
2748
5b47454d
DM
2749 if (uvc < 256) {
2750 charid = trie->charmap[ uvc ];
2751 }
2752 else {
2753 charid = 0;
2754 if (trie->widecharmap) {
2755 SV** svpp = (SV**)NULL;
2756 svpp = hv_fetch(trie->widecharmap,
2757 (char*)&uvc, sizeof(UV), 0);
2758 if (svpp)
2759 charid = (U16)SvIV(*svpp);
2760 }
2761 }
a3621e74 2762
5b47454d
DM
2763 if (charid &&
2764 (base + charid > trie->uniquecharcount )
2765 && (base + charid - 1 - trie->uniquecharcount
2766 < trie->lasttrans)
2767 && trie->trans[base + charid - 1 -
2768 trie->uniquecharcount].check == state)
2769 {
2770 state = trie->trans[base + charid - 1 -
2771 trie->uniquecharcount ].next;
2772 }
2773 else {
2774 state = 0;
2775 }
2776 uc += len;
2777
2778 }
2779 else {
a3621e74
YO
2780 state = 0;
2781 }
2782 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2783 PerlIO_printf( Perl_debug_log,
2784 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2785 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2786 );
2787 }
d8319b27 2788 if (!st->u.trie.accepted )
a3621e74 2789 sayNO;
a3621e74
YO
2790
2791 /*
2792 There was at least one accepting state that we
2793 transitioned through. Presumably the number of accepting
2794 states is going to be low, typically one or two. So we
2795 simply scan through to find the one with lowest wordnum.
2796 Once we find it, we swap the last state into its place
2797 and decrement the size. We then try to match the rest of
2798 the pattern at the point where the word ends, if we
2799 succeed then we end the loop, otherwise the loop
2800 eventually terminates once all of the accepting states
2801 have been tried.
2802 */
a3621e74 2803
d8319b27 2804 if ( st->u.trie.accepted == 1 ) {
a3621e74 2805 DEBUG_EXECUTE_r({
097eb12c 2806 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2807 PerlIO_printf( Perl_debug_log,
2808 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2809 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2810 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2811 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2812 PL_colors[5] );
2813 });
d8319b27 2814 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2815 /* in this case we free tmps/leave before we call regmatch
2816 as we wont be using accept_buff again. */
2817 FREETMPS;
2818 LEAVE;
95b24440
DM
2819 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2820 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2821 } else {
2822 DEBUG_EXECUTE_r(
e4584336 2823 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2824 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2825 PL_colors[5] );
2826 );
d8319b27 2827 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2828 U32 best = 0;
2829 U32 cur;
d8319b27 2830 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2831 DEBUG_TRIE_EXECUTE_r(
2832 PerlIO_printf( Perl_debug_log,
2833 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2834 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2835 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2836 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2837 );
a3621e74 2838
d8319b27
DM
2839 if (st->u.trie.accept_buff[cur].wordnum <
2840 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2841 best = cur;
a3621e74
YO
2842 }
2843 DEBUG_EXECUTE_r({
87830502 2844 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2845 rex->data->data[ARG(scan)];
d8319b27 2846 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2847 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2848 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2849 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2850 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2851 PL_colors[5] );
2852 });
d8319b27
DM
2853 if ( best<st->u.trie.accepted ) {
2854 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2855 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2856 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2857 best = st->u.trie.accepted;
a3621e74 2858 }
d8319b27 2859 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2860
2861 /*
2862 as far as I can tell we only need the SAVETMPS/FREETMPS
2863 for re's with EVAL in them but I'm leaving them in for
2864 all until I can be sure.
2865 */
2866 SAVETMPS;
95b24440
DM
2867 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2868 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2869 FREETMPS;
2870 }
2871 FREETMPS;
2872 LEAVE;
2873 }
2874
95b24440 2875 if (result) {
a3621e74
YO
2876 sayYES;
2877 } else {
2878 sayNO;
2879 }
2880 }
2881 /* unreached codepoint */
95b24440
DM
2882 case EXACT: {
2883 char *s = STRING(scan);
5d9a96ca 2884 st->ln = STR_LEN(scan);
eb160463 2885 if (do_utf8 != UTF) {
bc517b45 2886 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2887 char *l = locinput;
5d9a96ca 2888 const char *e = s + st->ln;
a72c7584 2889
5ff6fc6d
JH
2890 if (do_utf8) {
2891 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2892 while (s < e) {
a3b680e6 2893 STRLEN ulen;
1aa99e6b 2894 if (l >= PL_regeol)
5ff6fc6d
JH
2895 sayNO;
2896 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2897 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2898 uniflags))
5ff6fc6d 2899 sayNO;
bc517b45 2900 l += ulen;
5ff6fc6d 2901 s ++;
1aa99e6b 2902 }
5ff6fc6d
JH
2903 }
2904 else {
2905 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2906 while (s < e) {
a3b680e6 2907 STRLEN ulen;
1aa99e6b
IH
2908 if (l >= PL_regeol)
2909 sayNO;
5ff6fc6d 2910 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2911 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2912 uniflags))
1aa99e6b 2913 sayNO;
bc517b45 2914 s += ulen;
a72c7584 2915 l ++;
1aa99e6b 2916 }
5ff6fc6d 2917 }
1aa99e6b
IH
2918 locinput = l;
2919 nextchr = UCHARAT(locinput);
2920 break;
2921 }
bc517b45 2922 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2923 /* Inline the first character, for speed. */
2924 if (UCHARAT(s) != nextchr)
2925 sayNO;
5d9a96ca 2926 if (PL_regeol - locinput < st->ln)
d6a28714 2927 sayNO;
5d9a96ca 2928 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2929 sayNO;
5d9a96ca 2930 locinput += st->ln;
d6a28714
JH
2931 nextchr = UCHARAT(locinput);
2932 break;
95b24440 2933 }
d6a28714 2934 case EXACTFL:
b8c5462f
JH
2935 PL_reg_flags |= RF_tainted;
2936 /* FALL THROUGH */
95b24440
DM
2937 case EXACTF: {
2938 char *s = STRING(scan);
5d9a96ca 2939 st->ln = STR_LEN(scan);
d6a28714 2940
d07ddd77
JH
2941 if (do_utf8 || UTF) {
2942 /* Either target or the pattern are utf8. */
d6a28714 2943 char *l = locinput;
d07ddd77 2944 char *e = PL_regeol;
bc517b45 2945
5d9a96ca 2946 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2947 l, &e, 0, do_utf8)) {
5486206c
JH
2948 /* One more case for the sharp s:
2949 * pack("U0U*", 0xDF) =~ /ss/i,
2950 * the 0xC3 0x9F are the UTF-8
2951 * byte sequence for the U+00DF. */
2952 if (!(do_utf8 &&
2953 toLOWER(s[0]) == 's' &&
5d9a96ca 2954 st->ln >= 2 &&
5486206c
JH
2955 toLOWER(s[1]) == 's' &&
2956 (U8)l[0] == 0xC3 &&
2957 e - l >= 2 &&
2958 (U8)l[1] == 0x9F))
2959 sayNO;
2960 }
d07ddd77
JH
2961 locinput = e;
2962 nextchr = UCHARAT(locinput);
2963 break;
a0ed51b3 2964 }
d6a28714 2965
bc517b45
JH
2966 /* Neither the target and the pattern are utf8. */
2967
d6a28714
JH
2968 /* Inline the first character, for speed. */
2969 if (UCHARAT(s) != nextchr &&
2970 UCHARAT(s) != ((OP(scan) == EXACTF)
2971 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2972 sayNO;
5d9a96ca 2973 if (PL_regeol - locinput < st->ln)
b8c5462f 2974 sayNO;
5d9a96ca
DM
2975 if (st->ln > 1 && (OP(scan) == EXACTF
2976 ? ibcmp(s, locinput, st->ln)
2977 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2978 sayNO;
5d9a96ca 2979 locinput += st->ln;
d6a28714 2980 nextchr = UCHARAT(locinput);
a0d0e21e 2981 break;
95b24440 2982 }
d6a28714 2983 case ANYOF:
ffc61ed2 2984 if (do_utf8) {
9e55ce06
JH
2985 STRLEN inclasslen = PL_regeol - locinput;
2986
32fc9b6a 2987 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2988 sayNO_ANYOF;
ffc61ed2
JH
2989 if (locinput >= PL_regeol)
2990 sayNO;
0f0076b4 2991 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2992 nextchr = UCHARAT(locinput);
e0f9d4a8 2993 break;
ffc61ed2
JH
2994 }
2995 else {
2996 if (nextchr < 0)
2997 nextchr = UCHARAT(locinput);
32fc9b6a 2998 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 2999 sayNO_ANYOF;
ffc61ed2
JH
3000 if (!nextchr && locinput >= PL_regeol)
3001 sayNO;
3002 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3003 break;
3004 }
3005 no_anyof:
3006 /* If we might have the case of the German sharp s
3007 * in a casefolding Unicode character class. */
3008
ebc501f0
JH
3009 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3010 locinput += SHARP_S_SKIP;
e0f9d4a8 3011 nextchr = UCHARAT(locinput);
ffc61ed2 3012 }
e0f9d4a8
JH
3013 else
3014 sayNO;
b8c5462f 3015 break;
d6a28714 3016 case ALNUML:
b8c5462f
JH
3017 PL_reg_flags |= RF_tainted;
3018 /* FALL THROUGH */
d6a28714 3019 case ALNUM:
b8c5462f 3020 if (!nextchr)
4633a7c4 3021 sayNO;
ffc61ed2 3022 if (do_utf8) {
1a4fad37 3023 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3024 if (!(OP(scan) == ALNUM
3568d838 3025 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3026 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3027 {
3028 sayNO;
a0ed51b3 3029 }
b8c5462f 3030 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3031 nextchr = UCHARAT(locinput);
3032 break;
3033 }
ffc61ed2 3034 if (!(OP(scan) == ALNUM
d6a28714 3035 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3036 sayNO;
b8c5462f 3037 nextchr = UCHARAT(++locinput);
a0d0e21e 3038 break;
d6a28714 3039 case NALNUML:
b8c5462f
JH
3040 PL_reg_flags |= RF_tainted;
3041 /* FALL THROUGH */
d6a28714
JH
3042 case NALNUM:
3043 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3044 sayNO;
ffc61ed2 3045 if (do_utf8) {
1a4fad37 3046 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3047 if (OP(scan) == NALNUM
3568d838 3048 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3049 : isALNUM_LC_utf8((U8*)locinput))
3050 {
b8c5462f 3051 sayNO;
d6a28714 3052 }
b8c5462f
JH
3053 locinput += PL_utf8skip[nextchr];
3054 nextchr = UCHARAT(locinput);
3055 break;
3056 }
ffc61ed2 3057 if (OP(scan) == NALNUM
d6a28714 3058 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3059 sayNO;
76e3520e 3060 nextchr = UCHARAT(++locinput);
a0d0e21e 3061 break;
d6a28714
JH
3062 case BOUNDL:
3063 case NBOUNDL:
3280af22 3064 PL_reg_flags |= RF_tainted;
bbce6d69 3065 /* FALL THROUGH */
d6a28714
JH
3066 case BOUND:
3067 case NBOUND:
3068 /* was last char in word? */
ffc61ed2 3069 if (do_utf8) {
12d33761 3070 if (locinput == PL_bostr)
5d9a96ca 3071 st->ln = '\n';
ffc61ed2 3072 else {
a3b680e6 3073 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3074
4ad0818d 3075 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3076 }
3077 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3078 st->ln = isALNUM_uni(st->ln);
1a4fad37 3079 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3080 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3081 }
3082 else {
5d9a96ca 3083 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3084 n = isALNUM_LC_utf8((U8*)locinput);
3085 }
a0ed51b3 3086 }
d6a28714 3087 else {
5d9a96ca 3088 st->ln = (locinput != PL_bostr) ?
12d33761 3089 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3090 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3091 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3092 n = isALNUM(nextchr);
3093 }
3094 else {
5d9a96ca 3095 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3096 n = isALNUM_LC(nextchr);
3097 }
d6a28714 3098 }
5d9a96ca 3099 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3100 OP(scan) == BOUNDL))
3101 sayNO;
a0ed51b3 3102 break;
d6a28714 3103 case SPACEL:
3280af22 3104 PL_reg_flags |= RF_tainted;
bbce6d69 3105 /* FALL THROUGH */
d6a28714 3106 case SPACE:
9442cb0e 3107 if (!nextchr)
4633a7c4 3108 sayNO;
1aa99e6b 3109 if (do_utf8) {
fd400ab9 3110 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3111 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3112 if (!(OP(scan) == SPACE
3568d838 3113 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3114 : isSPACE_LC_utf8((U8*)locinput)))
3115 {
3116 sayNO;
3117 }
3118 locinput += PL_utf8skip[nextchr];
3119 nextchr = UCHARAT(locinput);
3120 break;
d6a28714 3121 }
ffc61ed2
JH
3122 if (!(OP(scan) == SPACE
3123 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3124 sayNO;
3125 nextchr = UCHARAT(++locinput);
3126 }
3127 else {
3128 if (!(OP(scan) == SPACE
3129 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3130 sayNO;
3131 nextchr = UCHARAT(++locinput);
a0ed51b3 3132 }
a0ed51b3 3133 break;
d6a28714 3134 case NSPACEL:
3280af22 3135 PL_reg_flags |= RF_tainted;
bbce6d69 3136 /* FALL THROUGH */
d6a28714 3137 case NSPACE:
9442cb0e 3138 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3139 sayNO;
1aa99e6b 3140 if (do_utf8) {
1a4fad37 3141 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3142 if (OP(scan) == NSPACE
3568d838 3143 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3144 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3145 {
3146 sayNO;
3147 }
3148 locinput += PL_utf8skip[nextchr];
3149 nextchr = UCHARAT(locinput);
3150 break;
a0ed51b3 3151 }
ffc61ed2 3152 if (OP(scan) == NSPACE
d6a28714 3153 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3154 sayNO;
76e3520e 3155 nextchr = UCHARAT(++locinput);
a0d0e21e 3156 break;
d6a28714 3157 case DIGITL:
a0ed51b3
LW
3158 PL_reg_flags |= RF_tainted;
3159 /* FALL THROUGH */
d6a28714 3160 case DIGIT:
9442cb0e 3161 if (!nextchr)
a0ed51b3 3162 sayNO;
1aa99e6b 3163 if (do_utf8) {
1a4fad37 3164 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3165 if (!(OP(scan) == DIGIT
3568d838 3166 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3167 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3168 {
a0ed51b3 3169 sayNO;
dfe13c55 3170 }
6f06b55f 3171 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3172 nextchr = UCHARAT(locinput);
3173 break;
3174 }
ffc61ed2 3175 if (!(OP(scan) == DIGIT
9442cb0e 3176 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3177 sayNO;
3178 nextchr = UCHARAT(++locinput);
3179 break;
d6a28714 3180 case NDIGITL:
b8c5462f
JH
3181 PL_reg_flags |= RF_tainted;
3182 /* FALL THROUGH */
d6a28714 3183 case NDIGIT:
9442cb0e 3184 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3185 sayNO;
1aa99e6b 3186 if (do_utf8) {
1a4fad37 3187 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3188 if (OP(scan) == NDIGIT
3568d838 3189 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3190 : isDIGIT_LC_utf8((U8*)locinput))
3191 {
a0ed51b3 3192 sayNO;
9442cb0e 3193 }
6f06b55f 3194 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3195 nextchr = UCHARAT(locinput);
3196 break;
3197 }
ffc61ed2 3198 if (OP(scan) == NDIGIT
9442cb0e 3199 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3200 sayNO;
3201 nextchr = UCHARAT(++locinput);
3202 break;
3203 case CLUMP:
b7c83a7e 3204 if (locinput >= PL_regeol)
a0ed51b3 3205 sayNO;
b7c83a7e 3206 if (do_utf8) {
1a4fad37 3207 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3208 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3209 sayNO;
3210 locinput += PL_utf8skip[nextchr];
3211 while (locinput < PL_regeol &&
3212 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3213 locinput += UTF8SKIP(locinput);
3214 if (locinput > PL_regeol)
3215 sayNO;
eb08e2da
JH
3216 }
3217 else
3218 locinput++;
a0ed51b3
LW
3219 nextchr = UCHARAT(locinput);
3220 break;
c8756f30 3221 case REFFL:
3280af22 3222 PL_reg_flags |= RF_tainted;
c8756f30 3223 /* FALL THROUGH */
c277df42 3224 case REF:
95b24440
DM
3225 case REFF: {
3226 char *s;
c277df42 3227 n = ARG(scan); /* which paren pair */
5d9a96ca 3228 st->ln = PL_regstartp[n];
2c2d71f5 3229 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3230 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3231 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3232 if (st->ln == PL_regendp[n])
a0d0e21e 3233 break;
a0ed51b3 3234
5d9a96ca 3235 s = PL_bostr + st->ln;
1aa99e6b 3236 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3237 char *l = locinput;
a3b680e6 3238 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3239 /*
3240 * Note that we can't do the "other character" lookup trick as
3241 * in the 8-bit case (no pun intended) because in Unicode we
3242 * have to map both upper and title case to lower case.
3243 */
3244 if (OP(scan) == REFF) {
3245 while (s < e) {
a3b680e6
AL
3246 STRLEN ulen1, ulen2;
3247 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3248 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3249
a0ed51b3
LW
3250 if (l >= PL_regeol)
3251 sayNO;
a2a2844f
JH
3252 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3253 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3254 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3255 sayNO;
a2a2844f
JH
3256 s += ulen1;
3257 l += ulen2;
a0ed51b3
LW
3258 }
3259 }
3260 locinput = l;
3261 nextchr = UCHARAT(locinput);
3262 break;
3263 }
3264
a0d0e21e 3265 /* Inline the first character, for speed. */
76e3520e 3266 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3267 (OP(scan) == REF ||
3268 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3269 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3270 sayNO;
5d9a96ca
DM
3271 st->ln = PL_regendp[n] - st->ln;
3272 if (locinput + st->ln > PL_regeol)
4633a7c4 3273 sayNO;
5d9a96ca
DM
3274 if (st->ln > 1 && (OP(scan) == REF
3275 ? memNE(s, locinput, st->ln)
c8756f30 3276 : (OP(scan) == REFF
5d9a96ca
DM
3277 ? ibcmp(s, locinput, st->ln)
3278 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3279 sayNO;
5d9a96ca 3280 locinput += st->ln;
76e3520e 3281 nextchr = UCHARAT(locinput);
a0d0e21e 3282 break;
95b24440 3283 }
a0d0e21e
LW
3284
3285 case NOTHING:
c277df42 3286 case TAIL:
a0d0e21e
LW
3287 break;
3288 case BACK:
3289 break;
c277df42
IZ
3290 case EVAL:
3291 {
c277df42 3292 SV *ret;
8e5e9ebe 3293 {
4aabdb9b
DM
3294 /* execute the code in the {...} */
3295 dSP;
6136c704 3296 SV ** const before = SP;
4aabdb9b
DM
3297 OP_4tree * const oop = PL_op;
3298 COP * const ocurcop = PL_curcop;
3299 PAD *old_comppad;
4aabdb9b
DM
3300
3301 n = ARG(scan);
32fc9b6a 3302 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3303 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3304 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3305 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3306
8e5e9ebe
RGS
3307 CALLRUNOPS(aTHX); /* Scalar context. */
3308 SPAGAIN;
3309 if (SP == before)
075aa684 3310 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3311 else {
3312 ret = POPs;
3313 PUTBACK;
3314 }
4aabdb9b
DM
3315
3316 PL_op = oop;
3317 PAD_RESTORE_LOCAL(old_comppad);
3318 PL_curcop = ocurcop;
3319 if (!st->logical) {
3320 /* /(?{...})/ */
3321 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3322 break;
3323 }
8e5e9ebe 3324 }
4aabdb9b
DM
3325 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3326 regexp *re;
4aabdb9b 3327 {
4f639d21
DM
3328 /* extract RE object from returned value; compiling if
3329 * necessary */
3330
6136c704 3331 MAGIC *mg = NULL;
4aabdb9b 3332 SV *sv;
faf82a0b
AE
3333 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3334 mg = mg_find(sv, PERL_MAGIC_qr);
3335 else if (SvSMAGICAL(ret)) {
3336 if (SvGMAGICAL(ret))
3337 sv_unmagic(ret, PERL_MAGIC_qr);
3338 else
3339 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3340 }
faf82a0b 3341
0f5d15d6
IZ
3342 if (mg) {
3343 re = (regexp *)mg->mg_obj;
df0003d4 3344 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3345 }
3346 else {
3347 STRLEN len;
6136c704 3348 const char * const t = SvPV_const(ret, len);
0f5d15d6 3349 PMOP pm;
a3b680e6 3350 const I32 osize = PL_regsize;
0f5d15d6 3351
5fcd1c1b 3352 Zero(&pm, 1, PMOP);
4aabdb9b 3353 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3354 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3355 if (!(SvFLAGS(ret)
faf82a0b
AE
3356 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3357 | SVs_GMG)))
14befaf4
DM
3358 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3359 PERL_MAGIC_qr,0,0);
0f5d15d6 3360 PL_regsize = osize;
0f5d15d6 3361 }
4aabdb9b 3362 }
aa283a38
DM
3363
3364 /* run the pattern returned from (??{...}) */
3365
4aabdb9b
DM
3366 DEBUG_EXECUTE_r(
3367 PerlIO_printf(Perl_debug_log,
3368 "Entering embedded \"%s%.60s%s%s\"\n",
3369 PL_colors[0],
3370 re->precomp,
3371 PL_colors[1],
3372 (strlen(re->precomp) > 60 ? "..." : ""))
3373 );
2c2d71f5 3374
4aabdb9b
DM
3375 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3376 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3377 *PL_reglastparen = 0;
3378 *PL_reglastcloseparen = 0;
4aabdb9b 3379 PL_reginput = locinput;
4aabdb9b
DM
3380
3381 /* XXXX This is too dramatic a measure... */
3382 PL_reg_maxiter = 0;
3383
5d9a96ca 3384 st->logical = 0;
aa283a38
DM
3385 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3386 ((re->reganch & ROPT_UTF8) != 0);
3387 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3388 st->u.eval.prev_rex = rex;
aa283a38 3389 rex = re;
aa283a38
DM
3390
3391 st->u.eval.prev_eval = cur_eval;
3392 st->u.eval.prev_slab = PL_regmatch_slab;
3393 st->u.eval.depth = depth;
3394 cur_eval = st;
3395 PUSH_STATE(newst, resume_EVAL);
3396 st = newst;
3397
3398 /* now continue from first node in postoned RE */
3399 next = re->program + 1;
3400 break;
4aabdb9b 3401 /* NOTREACHED */
a0ed51b3 3402 }
4aabdb9b
DM
3403 /* /(?(?{...})X|Y)/ */
3404 st->sw = SvTRUE(ret);
3405 st->logical = 0;
c277df42
IZ
3406 break;
3407 }
a0d0e21e 3408 case OPEN:
c277df42 3409 n = ARG(scan); /* which paren pair */
3280af22
NIS
3410 PL_reg_start_tmp[n] = locinput;
3411 if (n > PL_regsize)
3412 PL_regsize = n;
a0d0e21e
LW
3413 break;
3414 case CLOSE:
c277df42 3415 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3416 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3417 PL_regendp[n] = locinput - PL_bostr;
eb160463 3418 if (n > (I32)*PL_reglastparen)
3280af22 3419 *PL_reglastparen = n;
a01268b5 3420 *PL_reglastcloseparen = n;
a0d0e21e 3421 break;
c277df42
IZ
3422 case GROUPP:
3423 n = ARG(scan); /* which paren pair */
5d9a96ca 3424 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3425 break;
3426 case IFTHEN:
2c2d71f5 3427 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3428 if (st->sw)
c277df42
IZ
3429 next = NEXTOPER(NEXTOPER(scan));
3430 else {
3431 next = scan + ARG(scan);
3432 if (OP(next) == IFTHEN) /* Fake one. */
3433 next = NEXTOPER(NEXTOPER(next));
3434 }
3435 break;
3436 case LOGICAL:
5d9a96ca 3437 st->logical = scan->flags;
c277df42 3438 break;
2ab05381 3439/*******************************************************************
a0374537
DM
3440 cc points to the regmatch_state associated with the most recent CURLYX.
3441 This struct contains info about the innermost (...)* loop (an
3442 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3443
3444 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3445
95b24440 3446 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3447
a0374537 3448 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3449 with the starting point at WHILEM node;
2ab05381
IZ
3450
3451 3) Each hit of WHILEM node tries to match A and Z (in the order
3452 depending on the current iteration, min/max of {min,max} and
3453 greediness). The information about where are nodes for "A"
a0374537 3454 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3455 was already matched, and greediness.
3456
3457 4) After A matches, the same WHILEM node is hit again.
3458
95b24440 3459 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3460 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3461 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3462 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3463 of the external loop.
3464
a0374537 3465 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3466 and whatever it mentions via ->next, and additional attached trees
3467 corresponding to temporarily unset infoblocks as in "5" above.
3468
95b24440 3469 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3470 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3471 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3472 infoblocks are drawn below the "reset" infoblock.
3473
3474 In fact in the picture below we do not show failed matches for Z and T
3475 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3476 more obvious *why* one needs to *temporary* unset infoblocks.]
3477
3478 Matched REx position InfoBlocks Comment
3479 (Y(A)*?Z)*?T x
3480 Y(A)*?Z)*?T x <- O
3481 Y (A)*?Z)*?T x <- O
3482 Y A)*?Z)*?T x <- O <- I
3483 YA )*?Z)*?T x <- O <- I
3484 YA A)*?Z)*?T x <- O <- I
3485 YAA )*?Z)*?T x <- O <- I
3486 YAA Z)*?T x <- O # Temporary unset I
3487 I
3488
3489 YAAZ Y(A)*?Z)*?T x <- O
3490 I
3491
3492 YAAZY (A)*?Z)*?T x <- O
3493 I
3494
3495 YAAZY A)*?Z)*?T x <- O <- I
3496 I
3497
3498 YAAZYA )*?Z)*?T x <- O <- I
3499 I
3500
3501 YAAZYA Z)*?T x <- O # Temporary unset I
3502 I,I
3503
3504 YAAZYAZ )*?T x <- O
3505 I,I
3506
3507 YAAZYAZ T x # Temporary unset O
3508 O
3509 I,I
3510
3511 YAAZYAZT x
3512 O
3513 I,I
3514 *******************************************************************/
95b24440 3515
a0d0e21e 3516 case CURLYX: {
cb434fcc
IZ
3517 /* No need to save/restore up to this paren */
3518 I32 parenfloor = scan->flags;
c277df42
IZ
3519
3520 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3521 next += ARG(next);
cb434fcc
IZ
3522 /* XXXX Probably it is better to teach regpush to support
3523 parenfloor > PL_regsize... */
eb160463 3524 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3525 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3526
d8319b27
DM
3527 st->u.curlyx.cp = PL_savestack_ix;
3528 st->u.curlyx.outercc = st->cc;
a0374537
DM
3529 st->cc = st;
3530 /* these fields contain the state of the current curly.
3531 * they are accessed by subsequent WHILEMs;
3532 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3533 st->u.curlyx.parenfloor = parenfloor;
3534 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3535 st->u.curlyx.min = ARG1(scan);
3536 st->u.curlyx.max = ARG2(scan);
3537 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3538 st->u.curlyx.lastloc = 0;
a0374537
DM
3539 /* st->next and st->minmod are also read by WHILEM */
3540
3280af22 3541 PL_reginput = locinput;
95b24440
DM
3542 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3543 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3544 regcpblow(st->u.curlyx.cp);
3545 st->cc = st->u.curlyx.outercc;
95b24440 3546 saySAME(result);
a0d0e21e 3547 }
5f66b61c 3548 /* NOTREACHED */
a0d0e21e
LW
3549 case WHILEM: {
3550 /*
3551 * This is really hard to understand, because after we match
3552 * what we're trying to match, we must make sure the rest of
2c2d71f5 3553 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3554 * to go back UP the parse tree by recursing ever deeper. And
3555 * if it fails, we have to reset our parent's current state
3556 * that we can try again after backing off.
3557 */
3558
d8319b27
DM
3559 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3560 st->u.whilem.cache_offset = 0;
3561 st->u.whilem.cache_bit = 0;
c277df42 3562
d8319b27 3563 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3564 PL_reginput = locinput;
a0d0e21e 3565
a3621e74 3566 DEBUG_EXECUTE_r(
9041c2e3 3567 PerlIO_printf(Perl_debug_log,
91f3b821 3568 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3569 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3570 (long)n, (long)st->cc->u.curlyx.min,
3571 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3572 );
4633a7c4 3573
a0d0e21e
LW
3574 /* If degenerate scan matches "", assume scan done. */
3575
d8319b27
DM
3576 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3577 st->u.whilem.savecc = st->cc;
3578 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3579 if (st->cc)
d8319b27 3580 st->ln = st->cc->u.curlyx.cur;
a3621e74 3581 DEBUG_EXECUTE_r(
c3464db5
DD
3582 PerlIO_printf(Perl_debug_log,
3583 "%*s empty match detected, try continuation...\n",
3280af22 3584 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3585 );
d8319b27 3586 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3587 /*** all unsaved local vars undefined at this point */
d8319b27 3588 st->cc = st->u.whilem.savecc;
95b24440 3589 if (result)
4633a7c4 3590 sayYES;
d8319b27
DM
3591 if (st->cc->u.curlyx.outercc)
3592 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3593 sayNO;
a0d0e21e
LW
3594 }
3595
3596 /* First just match a string of min scans. */
3597
d8319b27
DM
3598 if (n < st->cc->u.curlyx.min) {
3599 st->cc->u.curlyx.cur = n;
3600 st->cc->u.curlyx.lastloc = locinput;
3601 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3602 /*** all unsaved local vars undefined at this point */
3603 if (result)
4633a7c4 3604 sayYES;
d8319b27
DM
3605 st->cc->u.curlyx.cur = n - 1;
3606 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3607 sayNO;
a0d0e21e
LW
3608 }
3609
2c2d71f5
JH
3610 if (scan->flags) {
3611 /* Check whether we already were at this position.
3612 Postpone detection until we know the match is not
3613 *that* much linear. */
3614 if (!PL_reg_maxiter) {
3615 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3616 PL_reg_leftiter = PL_reg_maxiter;
3617 }
3618 if (PL_reg_leftiter-- == 0) {
a3b680e6 3619 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3620 if (PL_reg_poscache) {
eb160463 3621 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3622 Renew(PL_reg_poscache, size, char);
3623 PL_reg_poscache_size = size;
3624 }
3625 Zero(PL_reg_poscache, size, char);
3626 }
3627 else {
3628 PL_reg_poscache_size = size;
a02a5408 3629 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3630 }
a3621e74 3631 DEBUG_EXECUTE_r(
2c2d71f5
JH
3632 PerlIO_printf(Perl_debug_log,
3633 "%sDetected a super-linear match, switching on caching%s...\n",
3634 PL_colors[4], PL_colors[5])
3635 );
3636 }
3637 if (PL_reg_leftiter < 0) {
d8319b27 3638 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3639
d8319b27
DM
3640 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3641 + st->u.whilem.cache_offset * (scan->flags>>4);
3642 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3643 st->u.whilem.cache_offset /= 8;
3644 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3645 DEBUG_EXECUTE_r(
2c2d71f5
JH
3646 PerlIO_printf(Perl_debug_log,
3647 "%*s already tried at this position...\n",
3648 REPORT_CODE_OFF+PL_regindent*2, "")
3649 );
3ab3c9b4
HS
3650 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3651 /* cache records success */
c2b0868c
HS
3652 sayYES;
3653 else
3ab3c9b4 3654 /* cache records failure */
c2b0868c 3655 sayNO_SILENT;
2c2d71f5 3656 }
d8319b27 3657 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
2c2d71f5
JH
3658 }
3659 }
3660
a0d0e21e
LW
3661 /* Prefer next over scan for minimal matching. */
3662
5d9a96ca 3663 if (st->cc->minmod) {
d8319b27
DM
3664 st->u.whilem.savecc = st->cc;
3665 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3666 if (st->cc)
d8319b27
DM
3667 st->ln = st->cc->u.curlyx.cur;
3668 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3669 REGCP_SET(st->u.whilem.lastcp);
3670 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3671 /*** all unsaved local vars undefined at this point */
d8319b27 3672 st->cc = st->u.whilem.savecc;
95b24440 3673 if (result) {
d8319b27 3674 regcpblow(st->u.whilem.cp);
3ab3c9b4 3675 CACHEsayYES; /* All done. */
5f05dabc 3676 }
d8319b27 3677 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3678 regcppop(rex);
d8319b27
DM
3679 if (st->cc->u.curlyx.outercc)
3680 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3681
d8319b27 3682 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3683 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3684 && !(PL_reg_flags & RF_warned)) {
3685 PL_reg_flags |= RF_warned;
9014280d 3686 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3687 "Complex regular subexpression recursion",
3688 REG_INFTY - 1);
c277df42 3689 }
3ab3c9b4 3690 CACHEsayNO;
c277df42 3691 }
a687059c 3692
a3621e74 3693 DEBUG_EXECUTE_r(
c3464db5
DD
3694 PerlIO_printf(Perl_debug_log,
3695 "%*s trying longer...\n",
3280af22 3696 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3697 );
a0d0e21e 3698 /* Try scanning more and see if it helps. */
3280af22 3699 PL_reginput = locinput;
d8319b27
DM
3700 st->cc->u.curlyx.cur = n;
3701 st->cc->u.curlyx.lastloc = locinput;
3702 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3703 REGCP_SET(st->u.whilem.lastcp);
3704 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3705 /*** all unsaved local vars undefined at this point */
3706 if (result) {
d8319b27 3707 regcpblow(st->u.whilem.cp);
3ab3c9b4 3708 CACHEsayYES;
5f05dabc 3709 }
d8319b27 3710 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3711 regcppop(rex);
d8319b27
DM
3712 st->cc->u.curlyx.cur = n - 1;
3713 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3714 CACHEsayNO;
a0d0e21e
LW
3715 }
3716
3717 /* Prefer scan over next for maximal matching. */
3718
d8319b27
DM
3719 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3720 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3721 st->cc->u.curlyx.cur = n;
3722 st->cc->u.curlyx.lastloc = locinput;
3723 REGCP_SET(st->u.whilem.lastcp);
3724 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3725 /*** all unsaved local vars undefined at this point */
3726 if (result) {
d8319b27 3727 regcpblow(st->u.whilem.cp);
3ab3c9b4 3728 CACHEsayYES;
5f05dabc 3729 }
d8319b27 3730 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3731 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3732 PL_reginput = locinput;
a3621e74 3733 DEBUG_EXECUTE_r(
c3464db5
DD
3734 PerlIO_printf(Perl_debug_log,
3735 "%*s failed, try continuation...\n",
3280af22 3736 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3737 );
3738 }
9041c2e3 3739 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3740 && !(PL_reg_flags & RF_warned)) {
3280af22 3741 PL_reg_flags |= RF_warned;
9014280d 3742 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3743 "Complex regular subexpression recursion",
3744 REG_INFTY - 1);
a0d0e21e
LW
3745 }
3746
3747 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3748 st->u.whilem.savecc = st->cc;
3749 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3750 if (st->cc)
d8319b27
DM
3751 st->ln = st->cc->u.curlyx.cur;
3752 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3753 /*** all unsaved local vars undefined at this point */
d8319b27 3754 st->cc = st->u.whilem.savecc;
95b24440 3755 if (result)
3ab3c9b4 3756 CACHEsayYES;
d8319b27
DM
3757 if (st->cc->u.curlyx.outercc)
3758 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3759 st->cc->u.curlyx.cur = n - 1;
3760 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3761 CACHEsayNO;
a0d0e21e 3762 }
5f66b61c 3763 /* NOTREACHED */
9041c2e3 3764 case BRANCHJ:
c277df42
IZ
3765 next = scan + ARG(scan);
3766 if (next == scan)
3767 next = NULL;
3768 inner = NEXTOPER(NEXTOPER(scan));
3769 goto do_branch;
9041c2e3 3770 case BRANCH:
c277df42
IZ
3771 inner = NEXTOPER(scan);
3772 do_branch:
3773 {
e822a8b4
DM
3774 I32 type;
3775 type = OP(scan);
3776 if (OP(next) != type) /* No choice. */
c277df42 3777 next = inner; /* Avoid recursion. */
a0d0e21e 3778 else {
a3b680e6 3779 const I32 lastparen = *PL_reglastparen;
02db2b7b 3780 /* Put unwinding data on stack */
6136c704
AL
3781 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3782 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3783
5d9a96ca
DM
3784 uw->prev = st->unwind;
3785 st->unwind = unwind1;
e822a8b4 3786 uw->type = ((type == BRANCH)
02db2b7b
IZ
3787 ? RE_UNWIND_BRANCH
3788 : RE_UNWIND_BRANCHJ);
3789 uw->lastparen = lastparen;
3790 uw->next = next;
3791 uw->locinput = locinput;
3792 uw->nextchr = nextchr;
3a2830be 3793 uw->minmod = st->minmod;
02db2b7b
IZ
3794#ifdef DEBUGGING
3795 uw->regindent = ++PL_regindent;
3796#endif
c277df42 3797
02db2b7b
IZ
3798 REGCP_SET(uw->lastcp);
3799
3800 /* Now go into the first branch */
3801 next = inner;
a687059c 3802 }
a0d0e21e
LW
3803 }
3804 break;
3805 case MINMOD:
5d9a96ca 3806 st->minmod = 1;
a0d0e21e 3807 break;
c277df42
IZ
3808 case CURLYM:
3809 {
d8319b27 3810 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3811
c277df42 3812 /* We suppose that the next guy does not need
0e788c72 3813 backtracking: in particular, it is of constant non-zero length,
c277df42 3814 and has no parenths to influence future backrefs. */
5d9a96ca 3815 st->ln = ARG1(scan); /* min to match */
c277df42 3816 n = ARG2(scan); /* max to match */
d8319b27
DM
3817 st->u.curlym.paren = scan->flags;
3818 if (st->u.curlym.paren) {
3819 if (st->u.curlym.paren > PL_regsize)
3820 PL_regsize = st->u.curlym.paren;
3821 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3822 *PL_reglastparen = st->u.curlym.paren;
c277df42 3823 }
dc45a647 3824 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3825 if (st->u.curlym.paren)
c277df42 3826 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3827 PL_reginput = locinput;
d8319b27
DM
3828 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
3829 if (st->u.curlym.maxwanted) {
3830 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
95b24440
DM
3831 REGMATCH(scan, CURLYM1);
3832 /*** all unsaved local vars undefined at this point */
3833 if (!result)
6407bf3b 3834 break;
d8319b27
DM
3835 /* on first match, determine length, u.curlym.l */
3836 if (!st->u.curlym.matches++) {
6407bf3b
DM
3837 if (PL_reg_match_utf8) {
3838 char *s = locinput;
3839 while (s < PL_reginput) {
d8319b27 3840 st->u.curlym.l++;
6407bf3b
DM
3841 s += UTF8SKIP(s);
3842 }
3843 }
3844 else {
d8319b27 3845 st->u.curlym.l = PL_reginput - locinput;
6407bf3b 3846 }
d8319b27
DM
3847 if (st->u.curlym.l == 0) {
3848 st->u.curlym.matches = st->u.curlym.maxwanted;
6407bf3b
DM
3849 break;
3850 }
3851 }
3852 locinput = PL_reginput;
3853 }
3854 }
3855
3856 PL_reginput = locinput;
3857
5d9a96ca
DM
3858 if (st->minmod) {
3859 st->minmod = 0;
d8319b27 3860 if (st->ln && st->u.curlym.matches < st->ln)
c277df42 3861 sayNO;
cca55fe3 3862 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3863 regnode *text_node = next;
3864
cca55fe3 3865 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3866
d8319b27 3867 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3868 else {
cca55fe3 3869 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3870 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3871 goto assume_ok_MM;
cca55fe3 3872 }
d8319b27 3873 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
af5decee 3874 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3875 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3876 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3877 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3878 else
d8319b27 3879 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3880 }
a0ed51b3
LW
3881 }
3882 else
d8319b27 3883 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
cca55fe3 3884 assume_ok_MM:
d8319b27 3885 REGCP_SET(st->u.curlym.lastcp);
5d9a96ca 3886 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3887 /* If it could work, try it. */
d8319b27
DM
3888 if (st->u.curlym.c1 == -1000 ||
3889 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3890 UCHARAT(PL_reginput) == st->u.curlym.c2)
c277df42 3891 {
d8319b27 3892 if (st->u.curlym.paren) {
5d9a96ca 3893 if (st->ln) {
d8319b27
DM
3894 PL_regstartp[st->u.curlym.paren] =
3895 HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3896 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3897 }
3898 else
d8319b27 3899 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3900 }
95b24440
DM
3901 REGMATCH(next, CURLYM2);
3902 /*** all unsaved local vars undefined at this point */
3903 if (result)
c277df42 3904 sayYES;
d8319b27 3905 REGCP_UNWIND(st->u.curlym.lastcp);
c277df42
IZ
3906 }
3907 /* Couldn't or didn't -- move forward. */
3280af22 3908 PL_reginput = locinput;
95b24440
DM
3909 REGMATCH(scan, CURLYM3);
3910 /*** all unsaved local vars undefined at this point */
3911 if (result) {
5d9a96ca 3912 st->ln++;
3280af22 3913 locinput = PL_reginput;
c277df42
IZ
3914 }
3915 else
3916 sayNO;
3917 }
a0ed51b3
LW
3918 }
3919 else {
a3621e74 3920 DEBUG_EXECUTE_r(
5c0ca799 3921 PerlIO_printf(Perl_debug_log,
95b24440
DM
3922 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3923 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
d8319b27 3924 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
c277df42 3925 );
d8319b27 3926 if (st->u.curlym.matches >= st->ln) {
cca55fe3 3927 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3928 regnode *text_node = next;
3929
cca55fe3 3930 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3931
d8319b27 3932 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3933 else {
cca55fe3 3934 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3935 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3936 goto assume_ok_REG;
cca55fe3 3937 }
d8319b27 3938 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3939
af5decee 3940 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3941 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3942 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3943 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3944 else
d8319b27 3945 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3946 }
a0ed51b3
LW
3947 }
3948 else
d8319b27 3949 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
c277df42 3950 }
cca55fe3 3951 assume_ok_REG:
d8319b27
DM
3952 REGCP_SET(st->u.curlym.lastcp);
3953 while (st->u.curlym.matches >= st->ln) {
c277df42 3954 /* If it could work, try it. */
d8319b27
DM
3955 if (st->u.curlym.c1 == -1000 ||
3956 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3957 UCHARAT(PL_reginput) == st->u.curlym.c2)
a0ed51b3 3958 {
a3621e74 3959 DEBUG_EXECUTE_r(
6407bf3b
DM
3960 PerlIO_printf(Perl_debug_log,
3961 "%*s trying tail with matches=%"IVdf"...\n",
3962 (int)(REPORT_CODE_OFF+PL_regindent*2),
d8319b27 3963 "", (IV)st->u.curlym.matches)
a0ed51b3 3964 );
d8319b27
DM
3965 if (st->u.curlym.paren) {
3966 if (st->u.curlym.matches) {
3967 PL_regstartp[st->u.curlym.paren]
3968 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3969 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
c277df42 3970 }
a0ed51b3 3971 else
d8319b27 3972 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3973 }
95b24440
DM
3974 REGMATCH(next, CURLYM4);
3975 /*** all unsaved local vars undefined at this point */
3976 if (result)
a0ed51b3 3977 sayYES;
d8319b27 3978 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3979 }
c277df42 3980 /* Couldn't or didn't -- back up. */
d8319b27
DM
3981 st->u.curlym.matches--;
3982 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3983 PL_reginput = locinput;
c277df42
IZ
3984 }
3985 }
3986 sayNO;
5f66b61c 3987 /* NOTREACHED */
c277df42
IZ
3988 break;
3989 }
3990 case CURLYN:
d8319b27
DM
3991 st->u.plus.paren = scan->flags; /* Which paren to set */
3992 if (st->u.plus.paren > PL_regsize)
3993 PL_regsize = st->u.plus.paren;
3994 if (st->u.plus.paren > (I32)*PL_reglastparen)
3995 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 3996 st->ln = ARG1(scan); /* min to match */
c277df42 3997 n = ARG2(scan); /* max to match */
dc45a647 3998 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3999 goto repeat;
a0d0e21e 4000 case CURLY:
d8319b27 4001 st->u.plus.paren = 0;
5d9a96ca 4002 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4003 n = ARG2(scan); /* max to match */
dc45a647 4004 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4005 goto repeat;
4006 case STAR:
5d9a96ca 4007 st->ln = 0;
c277df42 4008 n = REG_INFTY;
a0d0e21e 4009 scan = NEXTOPER(scan);
d8319b27 4010 st->u.plus.paren = 0;
a0d0e21e
LW
4011 goto repeat;
4012 case PLUS:
5d9a96ca 4013 st->ln = 1;
c277df42
IZ
4014 n = REG_INFTY;
4015 scan = NEXTOPER(scan);
d8319b27 4016 st->u.plus.paren = 0;
c277df42 4017 repeat:
a0d0e21e
LW
4018 /*
4019 * Lookahead to avoid useless match attempts
4020 * when we know what character comes next.
4021 */
5f80c4cf
JP
4022
4023 /*
4024 * Used to only do .*x and .*?x, but now it allows
4025 * for )'s, ('s and (?{ ... })'s to be in the way
4026 * of the quantifier and the EXACT-like node. -- japhy
4027 */
4028
cca55fe3 4029 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4030 U8 *s;
4031 regnode *text_node = next;
4032
cca55fe3 4033 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4034
d8319b27 4035 if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
5f80c4cf 4036 else {
cca55fe3 4037 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 4038 st->u.plus.c1 = st->u.plus.c2 = -1000;
44a68960 4039 goto assume_ok_easy;
cca55fe3
JP
4040 }
4041 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4042
4043 if (!UTF) {
d8319b27 4044 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4045 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4046 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4047 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4048 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4049 }
5f80c4cf 4050 else { /* UTF */
f65d3ee7 4051 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4052 STRLEN ulen1, ulen2;
89ebb4a3
JH
4053 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4054 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4055
4056 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4057 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4058
d8319b27 4059 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4060 uniflags);
d8319b27 4061 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4062 uniflags);
5f80c4cf
JP
4063 }
4064 else {
d8319b27 4065 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4066 uniflags);
5f80c4cf 4067 }
1aa99e6b
IH
4068 }
4069 }
bbce6d69 4070 }
a0d0e21e 4071 else
d8319b27 4072 st->u.plus.c1 = st->u.plus.c2 = -1000;
cca55fe3 4073 assume_ok_easy:
3280af22 4074 PL_reginput = locinput;
5d9a96ca
DM
4075 if (st->minmod) {
4076 st->minmod = 0;
32fc9b6a 4077 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4078 sayNO;
a0ed51b3 4079 locinput = PL_reginput;
d8319b27
DM
4080 REGCP_SET(st->u.plus.lastcp);
4081 if (st->u.plus.c1 != -1000) {
4082 st->u.plus.old = locinput;
4083 st->u.plus.count = 0;
0fe9bf95 4084
1aa99e6b 4085 if (n == REG_INFTY) {
d8319b27 4086 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4087 if (do_utf8)
d8319b27
DM
4088 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4089 st->u.plus.e--;
1aa99e6b
IH
4090 }
4091 else if (do_utf8) {
5d9a96ca 4092 int m = n - st->ln;
d8319b27
DM
4093 for (st->u.plus.e = locinput;
4094 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4095 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4096 }
4097 else {
d8319b27
DM
4098 st->u.plus.e = locinput + n - st->ln;
4099 if (st->u.plus.e >= PL_regeol)
4100 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4101 }
0fe9bf95
IZ
4102 while (1) {
4103 /* Find place 'next' could work */
1aa99e6b 4104 if (!do_utf8) {
d8319b27
DM
4105 if (st->u.plus.c1 == st->u.plus.c2) {
4106 while (locinput <= st->u.plus.e &&
4107 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4108 locinput++;
4109 } else {
d8319b27
DM
4110 while (locinput <= st->u.plus.e
4111 && UCHARAT(locinput) != st->u.plus.c1
4112 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4113 locinput++;
4114 }
d8319b27 4115 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4116 }
4117 else {
d8319b27 4118 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4119 STRLEN len;
872c91ae
JH
4120 /* count initialised to
4121 * utf8_distance(old, locinput) */
d8319b27 4122 while (locinput <= st->u.plus.e &&
872c91ae 4123 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4124 UTF8_MAXBYTES, &len,
d8319b27 4125 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4126 locinput += len;
d8319b27 4127 st->u.plus.count++;
b2f2f093 4128 }
1aa99e6b 4129 } else {
a3b680e6 4130 STRLEN len;
872c91ae
JH
4131 /* count initialised to
4132 * utf8_distance(old, locinput) */
d8319b27 4133 while (locinput <= st->u.plus.e) {
872c91ae 4134 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4135 UTF8_MAXBYTES, &len,
041457d9 4136 uniflags);
d8319b27 4137 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4138 break;
b2f2f093 4139 locinput += len;
d8319b27 4140 st->u.plus.count++;
1aa99e6b
IH
4141 }
4142 }
0fe9bf95 4143 }
d8319b27 4144 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4145 sayNO;
4146 /* PL_reginput == old now */
d8319b27 4147 if (locinput != st->u.plus.old) {
5d9a96ca 4148 st->ln = 1; /* Did some */
32fc9b6a 4149 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4150 sayNO;
4151 }
4152 /* PL_reginput == locinput now */
d8319b27 4153 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4154 /*** all unsaved local vars undefined at this point */
0fe9bf95 4155 PL_reginput = locinput; /* Could be reset... */
d8319b27 4156 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4157 /* Couldn't or didn't -- move forward. */
d8319b27 4158 st->u.plus.old = locinput;
1aa99e6b
IH
4159 if (do_utf8)
4160 locinput += UTF8SKIP(locinput);
4161 else
4162 locinput++;
d8319b27 4163 st->u.plus.count = 1;
0fe9bf95
IZ
4164 }
4165 }
4166 else
5d9a96ca 4167 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4168 UV c;
d8319b27 4169 if (st->u.plus.c1 != -1000) {
1aa99e6b 4170 if (do_utf8)
872c91ae 4171 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4172 UTF8_MAXBYTES, 0,
041457d9 4173 uniflags);
1aa99e6b 4174 else
9041c2e3 4175 c = UCHARAT(PL_reginput);
2390ecbc 4176 /* If it could work, try it. */
d8319b27 4177 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4178 {
d8319b27 4179 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4180 /*** all unsaved local vars undefined at this point */
d8319b27 4181 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4182 }
1aa99e6b 4183 }
a0d0e21e 4184 /* If it could work, try it. */
d8319b27 4185 else if (st->u.plus.c1 == -1000)
bbce6d69 4186 {
d8319b27 4187 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4188 /*** all unsaved local vars undefined at this point */
d8319b27 4189 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4190 }
c277df42 4191 /* Couldn't or didn't -- move forward. */
a0ed51b3 4192 PL_reginput = locinput;
32fc9b6a 4193 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4194 st->ln++;
a0ed51b3
LW
4195 locinput = PL_reginput;
4196 }
4197 else
4633a7c4 4198 sayNO;
a0d0e21e
LW
4199 }
4200 }
4201 else {
32fc9b6a 4202 n = regrepeat(rex, scan, n);
a0ed51b3 4203 locinput = PL_reginput;
5d9a96ca 4204 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4205 (OP(next) != MEOL ||
15272685
HS
4206 OP(next) == SEOL || OP(next) == EOS))
4207 {
5d9a96ca 4208 st->ln = n; /* why back off? */
1aeab75a
GS
4209 /* ...because $ and \Z can match before *and* after
4210 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4211 We should back off by one in this case. */
4212 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4213 st->ln--;
1aeab75a 4214 }
d8319b27 4215 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4216 {
8fa7f367 4217 UV c = 0;
5d9a96ca 4218 while (n >= st->ln) {
d8319b27 4219 if (st->u.plus.c1 != -1000) {
1aa99e6b 4220 if (do_utf8)
872c91ae 4221 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4222 UTF8_MAXBYTES, 0,
041457d9 4223 uniflags);
1aa99e6b 4224 else
9041c2e3 4225 c = UCHARAT(PL_reginput);
1aa99e6b 4226 }
c277df42 4227 /* If it could work, try it. */
d8319b27 4228 if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4229 {
d8319b27 4230 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4231 /*** all unsaved local vars undefined at this point */
d8319b27 4232 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4233 }
4234 /* Couldn't or didn't -- back up. */
4235 n--;
dfe13c55 4236 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4237 }
a0d0e21e
LW
4238 }
4239 }
4633a7c4 4240 sayNO;
c277df42 4241 break;
a0d0e21e 4242 case END:
aa283a38
DM
4243 if (cur_eval) {
4244 /* we have successfully completed the execution of a
4245 * postponed re. Pop all states back to the last EVAL
4246 * then continue with the node following the (??{...})
4247 */
4248
4249 /* this simulates a POP_STATE, except that it pops several
4250 * levels, and doesn't restore locinput */
6136c704 4251
aa283a38
DM
4252 st = cur_eval;
4253 PL_regmatch_slab = st->u.eval.prev_slab;
4254 cur_eval = st->u.eval.prev_eval;
4255 depth = st->u.eval.depth;
6136c704 4256
aa283a38
DM
4257 PL_regmatch_state = st;
4258 scan = st->scan;
4259 next = st->next;
4260 n = st->n;
4261
4262 if (st->u.eval.toggleutf)
4263 PL_reg_flags ^= RF_utf8;
4264 ReREFCNT_dec(rex);
4265 rex = st->u.eval.prev_rex;
aa283a38
DM
4266 /* XXXX This is too dramatic a measure... */
4267 PL_reg_maxiter = 0;
4268
4269 /* Restore parens of the caller without popping the
4270 * savestack */
95b24440
DM
4271 {
4272 I32 tmp = PL_savestack_ix;
aa283a38 4273 PL_savestack_ix = st->u.eval.lastcp;
4f639d21 4274 regcppop(rex);
95b24440
DM
4275 PL_savestack_ix = tmp;
4276 }
6136c704 4277
6136c704 4278
aa283a38
DM
4279 PL_reginput = locinput;
4280 /* resume at node following the (??{...}) */
4281 break;
0f5d15d6 4282
0f5d15d6 4283 }
aa283a38 4284
7821416a 4285 if (locinput < PL_regtill) {
a3621e74 4286 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4287 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4288 PL_colors[4],
4289 (long)(locinput - PL_reg_starttry),
4290 (long)(PL_regtill - PL_reg_starttry),
4291 PL_colors[5]));
4292 sayNO_FINAL; /* Cannot match: too short. */
4293 }
4294 PL_reginput = locinput; /* put where regtry can find it */
4295 sayYES_FINAL; /* Success! */
7e5428c5 4296 case SUCCEED:
3280af22 4297 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4298 sayYES_LOUD; /* Success! */
c277df42
IZ
4299 case SUSPEND:
4300 n = 1;
9fe1d20c 4301 PL_reginput = locinput;
9041c2e3 4302 goto do_ifmatch;
a0d0e21e 4303 case UNLESSM:
c277df42 4304 n = 0;
a0ed51b3 4305 if (scan->flags) {
52657f30 4306 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4307 if (!s)
4308 goto say_yes;
4309 PL_reginput = s;
a0ed51b3
LW
4310 }
4311 else
4312 PL_reginput = locinput;
c277df42
IZ
4313 goto do_ifmatch;
4314 case IFMATCH:
4315 n = 1;
a0ed51b3 4316 if (scan->flags) {
52657f30 4317 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4318 if (!s)
4319 goto say_no;
4320 PL_reginput = s;
a0ed51b3
LW
4321 }
4322 else
4323 PL_reginput = locinput;
4324
c277df42 4325 do_ifmatch:
95b24440
DM
4326 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4327 /*** all unsaved local vars undefined at this point */
4328 if (result != n) {
c277df42 4329 say_no:
5d9a96ca
DM
4330 if (st->logical) {
4331 st->logical = 0;
4332 st->sw = 0;
c277df42 4333 goto do_longjump;
a0ed51b3
LW
4334 }
4335 else
c277df42
IZ
4336 sayNO;
4337 }
4338 say_yes:
5d9a96ca
DM
4339 if (st->logical) {
4340 st->logical = 0;
4341 st->sw = 1;
c277df42 4342 }
fe44a5e8 4343 if (OP(scan) == SUSPEND) {
3280af22 4344 locinput = PL_reginput;
565764a8 4345 nextchr = UCHARAT(locinput);
fe44a5e8 4346 }
c277df42
IZ
4347 /* FALL THROUGH. */
4348 case LONGJMP:
4349 do_longjump:
4350 next = scan + ARG(scan);
4351 if (next == scan)
4352 next = NULL;
a0d0e21e
LW
4353 break;
4354 default:
b900a521 4355 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4356 PTR2UV(scan), OP(scan));
cea2e8a9 4357 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4358 }
95b24440 4359
02db2b7b 4360 reenter:
a0d0e21e 4361 scan = next;
95b24440
DM
4362 continue;
4363 /* NOTREACHED */
4364
4365 /* simulate recursively calling regmatch(), but without actually
4366 * recursing - ie save the current state on the heap rather than on
4367 * the stack, then re-enter the loop. This avoids complex regexes
4368 * blowing the processor stack */
4369
4370 start_recurse:
4371 {
5d9a96ca
DM
4372 /* push new state */
4373 regmatch_state *oldst = st;
4374
4375 depth++;
4376
4377 /* grab the next free state slot */
4378 st++;
4379 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4380 st = S_push_slab(aTHX);
4381 PL_regmatch_state = st;
4382
4383 oldst->next = next;
4384 oldst->n = n;
4385 oldst->locinput = locinput;
5d9a96ca
DM
4386
4387 st->cc = oldst->cc;
95b24440
DM
4388 locinput = PL_reginput;
4389 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4390 st->minmod = 0;
4391 st->sw = 0;
4392 st->logical = 0;
4393 st->unwind = 0;
95b24440
DM
4394#ifdef DEBUGGING
4395 PL_regindent++;
4396#endif
4397 }
a0d0e21e 4398 }
a687059c 4399
aa283a38
DM
4400
4401
a0d0e21e
LW
4402 /*
4403 * We get here only if there's trouble -- normally "case END" is
4404 * the terminating point.
4405 */
cea2e8a9 4406 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4407 /*NOTREACHED*/
4633a7c4
LW
4408 sayNO;
4409
7821416a 4410yes_loud:
a3621e74 4411 DEBUG_EXECUTE_r(
7821416a
IZ
4412 PerlIO_printf(Perl_debug_log,
4413 "%*s %scould match...%s\n",
e4584336 4414 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4415 );
4416 goto yes;
4417yes_final:
a3621e74 4418 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4419 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4420yes:
4421#ifdef DEBUGGING
3280af22 4422 PL_regindent--;
4633a7c4 4423#endif
02db2b7b 4424
95b24440 4425 result = 1;
aa283a38
DM
4426 /* XXX this is duplicate(ish) code to that in the do_no section.
4427 * eventually a yes should just pop the whole stack */
4428 if (depth) {
4429 /* restore previous state and re-enter */
4430 POP_STATE;
4431
4432 switch (st->resume_state) {
4433 case resume_TRIE1:
4434 goto resume_point_TRIE1;
4435 case resume_TRIE2:
4436 goto resume_point_TRIE2;
4437 case resume_EVAL:
4438 break;
4439 case resume_CURLYX:
4440 goto resume_point_CURLYX;
4441 case resume_WHILEM1:
4442 goto resume_point_WHILEM1;
4443 case resume_WHILEM2:
4444 goto resume_point_WHILEM2;
4445 case resume_WHILEM3:
4446 goto resume_point_WHILEM3;
4447 case resume_WHILEM4:
4448 goto resume_point_WHILEM4;
4449 case resume_WHILEM5:
4450 goto resume_point_WHILEM5;
4451 case resume_WHILEM6:
4452 goto resume_point_WHILEM6;
4453 case resume_CURLYM1:
4454 goto resume_point_CURLYM1;
4455 case resume_CURLYM2:
4456 goto resume_point_CURLYM2;
4457 case resume_CURLYM3:
4458 goto resume_point_CURLYM3;
4459 case resume_CURLYM4:
4460 goto resume_point_CURLYM4;
4461 case resume_IFMATCH:
4462 goto resume_point_IFMATCH;
4463 case resume_PLUS1:
4464 goto resume_point_PLUS1;
4465 case resume_PLUS2:
4466 goto resume_point_PLUS2;
4467 case resume_PLUS3:
4468 goto resume_point_PLUS3;
4469 case resume_PLUS4:
4470 goto resume_point_PLUS4;
4471 default:
4472 Perl_croak(aTHX_ "regexp resume memory corruption");
4473 }
4474 }
4475 goto final_exit;
4633a7c4
LW
4476
4477no:
a3621e74 4478 DEBUG_EXECUTE_r(
7821416a
IZ
4479 PerlIO_printf(Perl_debug_log,
4480 "%*s %sfailed...%s\n",
e4584336 4481 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4482 );
4483 goto do_no;
4484no_final:
4485do_no:
5d9a96ca
DM
4486 if (st->unwind) {
4487 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4488
4489 switch (uw->type) {
4490 case RE_UNWIND_BRANCH:
4491 case RE_UNWIND_BRANCHJ:
4492 {
6136c704 4493 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4494 const I32 lastparen = uwb->lastparen;
9041c2e3 4495
02db2b7b
IZ
4496 REGCP_UNWIND(uwb->lastcp);
4497 for (n = *PL_reglastparen; n > lastparen; n--)
4498 PL_regendp[n] = -1;
4499 *PL_reglastparen = n;
4500 scan = next = uwb->next;
3a2830be 4501 st->minmod = uwb->minmod;
9041c2e3
NIS
4502 if ( !scan ||
4503 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4504 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4505 st->unwind = uwb->prev;
02db2b7b
IZ
4506#ifdef DEBUGGING
4507 PL_regindent--;
4508#endif
4509 goto do_no;
4510 }
4511 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4512 if ((n = (uwb->type == RE_UNWIND_BRANCH
4513 ? NEXT_OFF(next) : ARG(next))))
4514 next += n;
4515 else
4516 next = NULL; /* XXXX Needn't unwinding in this case... */
4517 uwb->next = next;
4518 next = NEXTOPER(scan);
4519 if (uwb->type == RE_UNWIND_BRANCHJ)
4520 next = NEXTOPER(next);
4521 locinput = uwb->locinput;
4522 nextchr = uwb->nextchr;
4523#ifdef DEBUGGING
4524 PL_regindent = uwb->regindent;
4525#endif
4526
4527 goto reenter;
4528 }
5f66b61c 4529 /* NOTREACHED */
02db2b7b
IZ
4530 default:
4531 Perl_croak(aTHX_ "regexp unwind memory corruption");
4532 }
5f66b61c 4533 /* NOTREACHED */
02db2b7b 4534 }
aa283a38 4535
4633a7c4 4536#ifdef DEBUGGING
3280af22 4537 PL_regindent--;
4633a7c4 4538#endif
95b24440 4539 result = 0;
5d9a96ca 4540
aa283a38
DM
4541 if (depth) {
4542 /* there's a previous state to backtrack to */
4543 POP_STATE;
5d9a96ca 4544 switch (st->resume_state) {
95b24440
DM
4545 case resume_TRIE1:
4546 goto resume_point_TRIE1;
4547 case resume_TRIE2:
4548 goto resume_point_TRIE2;
aa283a38
DM
4549 case resume_EVAL:
4550 /* we have failed an (??{...}). Restore state to the outer re
4551 * then re-throw the failure */
4552 if (st->u.eval.toggleutf)
4553 PL_reg_flags ^= RF_utf8;
4554 ReREFCNT_dec(rex);
4555 rex = st->u.eval.prev_rex;
aa283a38
DM
4556 cur_eval = st->u.eval.prev_eval;
4557
4558 /* XXXX This is too dramatic a measure... */
4559 PL_reg_maxiter = 0;
4560
4561 PL_reginput = locinput;
4562 REGCP_UNWIND(st->u.eval.lastcp);
4563 regcppop(rex);
4564 goto do_no;
4565
95b24440
DM
4566 case resume_CURLYX:
4567 goto resume_point_CURLYX;
4568 case resume_WHILEM1:
4569 goto resume_point_WHILEM1;
4570 case resume_WHILEM2:
4571 goto resume_point_WHILEM2;
4572 case resume_WHILEM3:
4573 goto resume_point_WHILEM3;
4574 case resume_WHILEM4:
4575 goto resume_point_WHILEM4;
4576 case resume_WHILEM5:
4577 goto resume_point_WHILEM5;
4578 case resume_WHILEM6:
4579 goto resume_point_WHILEM6;
4580 case resume_CURLYM1:
4581 goto resume_point_CURLYM1;
4582 case resume_CURLYM2:
4583 goto resume_point_CURLYM2;
4584 case resume_CURLYM3:
4585 goto resume_point_CURLYM3;
4586 case resume_CURLYM4:
4587 goto resume_point_CURLYM4;
4588 case resume_IFMATCH:
4589 goto resume_point_IFMATCH;
4590 case resume_PLUS1:
4591 goto resume_point_PLUS1;
4592 case resume_PLUS2:
4593 goto resume_point_PLUS2;
4594 case resume_PLUS3:
4595 goto resume_point_PLUS3;
4596 case resume_PLUS4:
4597 goto resume_point_PLUS4;
95b24440
DM
4598 default:
4599 Perl_croak(aTHX_ "regexp resume memory corruption");
4600 }
95b24440 4601 }
aa283a38
DM
4602
4603final_exit:
4604
5d9a96ca
DM
4605 /* restore original high-water mark */
4606 PL_regmatch_slab = orig_slab;
4607 PL_regmatch_state = orig_state;
4608
4609 /* free all slabs above current one */
4610 if (orig_slab->next) {
ad65c075 4611 regmatch_slab *osl, *sl = orig_slab->next;
5d9a96ca
DM
4612 orig_slab->next = NULL;
4613 while (sl) {
ad65c075 4614 osl = sl;
5d9a96ca 4615 sl = sl->next;
ad65c075 4616 Safefree(osl);
5d9a96ca
DM
4617 }
4618 }
4619
95b24440
DM
4620 return result;
4621
a687059c
LW
4622}
4623
4624/*
4625 - regrepeat - repeatedly match something simple, report how many
4626 */
4627/*
4628 * [This routine now assumes that it will only match on things of length 1.
4629 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4630 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4631 */
76e3520e 4632STATIC I32
32fc9b6a 4633S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4634{
27da23d5 4635 dVAR;
a0d0e21e 4636 register char *scan;
a0d0e21e 4637 register I32 c;
3280af22 4638 register char *loceol = PL_regeol;
a0ed51b3 4639 register I32 hardcount = 0;
53c4c00c 4640 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4641
3280af22 4642 scan = PL_reginput;
faf11cac
HS
4643 if (max == REG_INFTY)
4644 max = I32_MAX;
4645 else if (max < loceol - scan)
7f596f4c 4646 loceol = scan + max;
a0d0e21e 4647 switch (OP(p)) {
22c35a8c 4648 case REG_ANY:
1aa99e6b 4649 if (do_utf8) {
ffc61ed2 4650 loceol = PL_regeol;
1aa99e6b 4651 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4652 scan += UTF8SKIP(scan);
4653 hardcount++;
4654 }
4655 } else {
4656 while (scan < loceol && *scan != '\n')
4657 scan++;
a0ed51b3
LW
4658 }
4659 break;
ffc61ed2 4660 case SANY:
def8e4ea
JH
4661 if (do_utf8) {
4662 loceol = PL_regeol;
a0804c9e 4663 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4664 scan += UTF8SKIP(scan);
4665 hardcount++;
4666 }
4667 }
4668 else
4669 scan = loceol;
a0ed51b3 4670 break;
f33976b4
DB
4671 case CANY:
4672 scan = loceol;
4673 break;
090f7165
JH
4674 case EXACT: /* length of string is 1 */
4675 c = (U8)*STRING(p);
4676 while (scan < loceol && UCHARAT(scan) == c)
4677 scan++;
bbce6d69 4678 break;
4679 case EXACTF: /* length of string is 1 */
cd439c50 4680 c = (U8)*STRING(p);
bbce6d69 4681 while (scan < loceol &&
22c35a8c 4682 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4683 scan++;
4684 break;
4685 case EXACTFL: /* length of string is 1 */
3280af22 4686 PL_reg_flags |= RF_tainted;
cd439c50 4687 c = (U8)*STRING(p);
bbce6d69 4688 while (scan < loceol &&
22c35a8c 4689 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4690 scan++;
4691 break;
4692 case ANYOF:
ffc61ed2
JH
4693 if (do_utf8) {
4694 loceol = PL_regeol;
cfc92286 4695 while (hardcount < max && scan < loceol &&
32fc9b6a 4696 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4697 scan += UTF8SKIP(scan);
4698 hardcount++;
4699 }
4700 } else {
32fc9b6a 4701 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4702 scan++;
4703 }
a0d0e21e
LW
4704 break;
4705 case ALNUM:
1aa99e6b 4706 if (do_utf8) {
ffc61ed2 4707 loceol = PL_regeol;
1a4fad37 4708 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4709 while (hardcount < max && scan < loceol &&
3568d838 4710 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4711 scan += UTF8SKIP(scan);
4712 hardcount++;
4713 }
4714 } else {
4715 while (scan < loceol && isALNUM(*scan))
4716 scan++;
a0ed51b3
LW
4717 }
4718 break;
bbce6d69 4719 case ALNUML:
3280af22 4720 PL_reg_flags |= RF_tainted;
1aa99e6b 4721 if (do_utf8) {
ffc61ed2 4722 loceol = PL_regeol;
1aa99e6b
IH
4723 while (hardcount < max && scan < loceol &&
4724 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4725 scan += UTF8SKIP(scan);
4726 hardcount++;
4727 }
4728 } else {
4729 while (scan < loceol && isALNUM_LC(*scan))
4730 scan++;
a0ed51b3
LW
4731 }
4732 break;
a0d0e21e 4733 case NALNUM:
1aa99e6b 4734 if (do_utf8) {
ffc61ed2 4735 loceol = PL_regeol;
1a4fad37 4736 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4737 while (hardcount < max && scan < loceol &&
3568d838 4738 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4739 scan += UTF8SKIP(scan);
4740 hardcount++;
4741 }
4742 } else {
4743 while (scan < loceol && !isALNUM(*scan))
4744 scan++;
a0ed51b3
LW
4745 }
4746 break;
bbce6d69 4747 case NALNUML:
3280af22 4748 PL_reg_flags |= RF_tainted;
1aa99e6b 4749 if (do_utf8) {
ffc61ed2 4750 loceol = PL_regeol;
1aa99e6b
IH
4751 while (hardcount < max && scan < loceol &&
4752 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4753 scan += UTF8SKIP(scan);
4754 hardcount++;
4755 }
4756 } else {
4757 while (scan < loceol && !isALNUM_LC(*scan))
4758 scan++;
a0ed51b3
LW
4759 }
4760 break;
a0d0e21e 4761 case SPACE:
1aa99e6b 4762 if (do_utf8) {
ffc61ed2 4763 loceol = PL_regeol;
1a4fad37 4764 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4765 while (hardcount < max && scan < loceol &&
3568d838
JH
4766 (*scan == ' ' ||
4767 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4768 scan += UTF8SKIP(scan);
4769 hardcount++;
4770 }
4771 } else {
4772 while (scan < loceol && isSPACE(*scan))
4773 scan++;
a0ed51b3
LW
4774 }
4775 break;
bbce6d69 4776 case SPACEL:
3280af22 4777 PL_reg_flags |= RF_tainted;
1aa99e6b 4778 if (do_utf8) {
ffc61ed2 4779 loceol = PL_regeol;
1aa99e6b 4780 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4781 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4782 scan += UTF8SKIP(scan);
4783 hardcount++;
4784 }
4785 } else {
4786 while (scan < loceol && isSPACE_LC(*scan))
4787 scan++;
a0ed51b3
LW
4788 }
4789 break;
a0d0e21e 4790 case NSPACE:
1aa99e6b 4791 if (do_utf8) {
ffc61ed2 4792 loceol = PL_regeol;
1a4fad37 4793 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4794 while (hardcount < max && scan < loceol &&
3568d838
JH
4795 !(*scan == ' ' ||
4796 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4797 scan += UTF8SKIP(scan);
4798 hardcount++;
4799 }
4800 } else {
4801 while (scan < loceol && !isSPACE(*scan))
4802 scan++;
4803 break;
a0ed51b3 4804 }
bbce6d69 4805 case NSPACEL:
3280af22 4806 PL_reg_flags |= RF_tainted;
1aa99e6b 4807 if (do_utf8) {
ffc61ed2 4808 loceol = PL_regeol;
1aa99e6b 4809 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4810 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4811 scan += UTF8SKIP(scan);
4812 hardcount++;
4813 }
4814 } else {
4815 while (scan < loceol && !isSPACE_LC(*scan))
4816 scan++;
a0ed51b3
LW
4817 }
4818 break;
a0d0e21e 4819 case DIGIT:
1aa99e6b 4820 if (do_utf8) {
ffc61ed2 4821 loceol = PL_regeol;
1a4fad37 4822 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4823 while (hardcount < max && scan < loceol &&
3568d838 4824 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4825 scan += UTF8SKIP(scan);
4826 hardcount++;
4827 }
4828 } else {
4829 while (scan < loceol && isDIGIT(*scan))
4830 scan++;
a0ed51b3
LW
4831 }
4832 break;
a0d0e21e 4833 case NDIGIT:
1aa99e6b 4834 if (do_utf8) {
ffc61ed2 4835 loceol = PL_regeol;
1a4fad37 4836 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4837 while (hardcount < max && scan < loceol &&
3568d838 4838 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4839 scan += UTF8SKIP(scan);
4840 hardcount++;
4841 }
4842 } else {
4843 while (scan < loceol && !isDIGIT(*scan))
4844 scan++;
a0ed51b3
LW
4845 }
4846 break;
a0d0e21e
LW
4847 default: /* Called on something of 0 width. */
4848 break; /* So match right here or not at all. */
4849 }
a687059c 4850
a0ed51b3
LW
4851 if (hardcount)
4852 c = hardcount;
4853 else
4854 c = scan - PL_reginput;
3280af22 4855 PL_reginput = scan;
a687059c 4856
a3621e74 4857 DEBUG_r({
ab74612d 4858 SV *re_debug_flags = NULL;
6136c704 4859 SV * const prop = sv_newmortal();
a3621e74
YO
4860 GET_RE_DEBUG_FLAGS;
4861 DEBUG_EXECUTE_r({
32fc9b6a 4862 regprop(prog, prop, p);
9041c2e3
NIS
4863 PerlIO_printf(Perl_debug_log,
4864 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4865 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4866 });
a3621e74 4867 });
9041c2e3 4868
a0d0e21e 4869 return(c);
a687059c
LW
4870}
4871
c277df42
IZ
4872
4873/*
ffc61ed2
JH
4874- regclass_swash - prepare the utf8 swash
4875*/
4876
4877SV *
32fc9b6a 4878Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4879{
97aff369 4880 dVAR;
9e55ce06
JH
4881 SV *sw = NULL;
4882 SV *si = NULL;
4883 SV *alt = NULL;
32fc9b6a 4884 const struct reg_data *data = prog ? prog->data : NULL;
ffc61ed2 4885
4f639d21 4886 if (data && data->count) {
a3b680e6 4887 const U32 n = ARG(node);
ffc61ed2 4888
4f639d21
DM
4889 if (data->what[n] == 's') {
4890 SV * const rv = (SV*)data->data[n];
890ce7af 4891 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4892 SV **const ary = AvARRAY(av);
9e55ce06 4893 SV **a, **b;
9041c2e3 4894
711a919c 4895 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4896 * documentation of these array elements. */
4897
b11f357e 4898 si = *ary;
8f7f7219 4899 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4900 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4901
ffc61ed2
JH
4902 if (a)
4903 sw = *a;
4904 else if (si && doinit) {
4905 sw = swash_init("utf8", "", si, 1, 0);
4906 (void)av_store(av, 1, sw);
4907 }
9e55ce06
JH
4908 if (b)
4909 alt = *b;
ffc61ed2
JH
4910 }
4911 }
4912
9e55ce06
JH
4913 if (listsvp)
4914 *listsvp = si;
4915 if (altsvp)
4916 *altsvp = alt;
ffc61ed2
JH
4917
4918 return sw;
4919}
4920
4921/*
ba7b4546 4922 - reginclass - determine if a character falls into a character class
832705d4
JH
4923
4924 The n is the ANYOF regnode, the p is the target string, lenp
4925 is pointer to the maximum length of how far to go in the p
4926 (if the lenp is zero, UTF8SKIP(p) is used),
4927 do_utf8 tells whether the target string is in UTF-8.
4928
bbce6d69 4929 */
4930
76e3520e 4931STATIC bool
32fc9b6a 4932S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4933{
27da23d5 4934 dVAR;
a3b680e6 4935 const char flags = ANYOF_FLAGS(n);
bbce6d69 4936 bool match = FALSE;
cc07378b 4937 UV c = *p;
ae9ddab8 4938 STRLEN len = 0;
9e55ce06 4939 STRLEN plen;
1aa99e6b 4940
19f67299
TS
4941 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4942 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4943 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4944 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4945 if (len == (STRLEN)-1)
4946 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4947 }
bbce6d69 4948
0f0076b4 4949 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4950 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4951 if (lenp)
4952 *lenp = 0;
ffc61ed2 4953 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4954 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4955 match = TRUE;
bbce6d69 4956 }
3568d838 4957 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4958 match = TRUE;
ffc61ed2 4959 if (!match) {
9e55ce06 4960 AV *av;
32fc9b6a 4961 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4962
4963 if (sw) {
3568d838 4964 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4965 match = TRUE;
4966 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4967 if (!match && lenp && av) {
4968 I32 i;
9e55ce06 4969 for (i = 0; i <= av_len(av); i++) {
890ce7af 4970 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4971 STRLEN len;
890ce7af 4972 const char * const s = SvPV_const(sv, len);
9e55ce06 4973
061b10df 4974 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4975 *lenp = len;
4976 match = TRUE;
4977 break;
4978 }
4979 }
4980 }
4981 if (!match) {
89ebb4a3 4982 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4983 STRLEN tmplen;
4984
9e55ce06
JH
4985 to_utf8_fold(p, tmpbuf, &tmplen);
4986 if (swash_fetch(sw, tmpbuf, do_utf8))
4987 match = TRUE;
4988 }
ffc61ed2
JH
4989 }
4990 }
bbce6d69 4991 }
9e55ce06 4992 if (match && lenp && *lenp == 0)
0f0076b4 4993 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4994 }
1aa99e6b 4995 if (!match && c < 256) {
ffc61ed2
JH
4996 if (ANYOF_BITMAP_TEST(n, c))
4997 match = TRUE;
4998 else if (flags & ANYOF_FOLD) {
eb160463 4999 U8 f;
a0ed51b3 5000
ffc61ed2
JH
5001 if (flags & ANYOF_LOCALE) {
5002 PL_reg_flags |= RF_tainted;
5003 f = PL_fold_locale[c];
5004 }
5005 else
5006 f = PL_fold[c];
5007 if (f != c && ANYOF_BITMAP_TEST(n, f))
5008 match = TRUE;
5009 }
5010
5011 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5012 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5013 if (
5014 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5015 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5016 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5017 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5018 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5019 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5020 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5021 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5022 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5023 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5024 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5025 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5026 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5027 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5028 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5029 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5030 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5031 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5032 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5033 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5034 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5035 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5036 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5037 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5038 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5039 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5040 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5041 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5042 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5043 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5044 ) /* How's that for a conditional? */
5045 {
5046 match = TRUE;
5047 }
a0ed51b3 5048 }
a0ed51b3
LW
5049 }
5050
a0ed51b3
LW
5051 return (flags & ANYOF_INVERT) ? !match : match;
5052}
161b471a 5053
dfe13c55 5054STATIC U8 *
5f66b61c 5055S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5056{
97aff369 5057 dVAR;
a0ed51b3 5058 if (off >= 0) {
1aa99e6b 5059 while (off-- && s < lim) {
ffc61ed2 5060 /* XXX could check well-formedness here */
a0ed51b3 5061 s += UTF8SKIP(s);
ffc61ed2 5062 }
a0ed51b3
LW
5063 }
5064 else {
5065 while (off++) {
1aa99e6b 5066 if (s > lim) {
a0ed51b3 5067 s--;
ffc61ed2 5068 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5069 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5070 s--;
ffc61ed2
JH
5071 }
5072 /* XXX could check well-formedness here */
a0ed51b3
LW
5073 }
5074 }
5075 }
5076 return s;
5077}
161b471a 5078
dfe13c55 5079STATIC U8 *
5f66b61c 5080S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5081{
97aff369 5082 dVAR;
a0ed51b3 5083 if (off >= 0) {
1aa99e6b 5084 while (off-- && s < lim) {
ffc61ed2 5085 /* XXX could check well-formedness here */
a0ed51b3 5086 s += UTF8SKIP(s);
ffc61ed2 5087 }
a0ed51b3
LW
5088 if (off >= 0)
5089 return 0;
5090 }
5091 else {
5092 while (off++) {
1aa99e6b 5093 if (s > lim) {
a0ed51b3 5094 s--;
ffc61ed2 5095 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5096 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5097 s--;
ffc61ed2
JH
5098 }
5099 /* XXX could check well-formedness here */
a0ed51b3
LW
5100 }
5101 else
5102 break;
5103 }
5104 if (off <= 0)
5105 return 0;
5106 }
5107 return s;
5108}
51371543 5109
51371543 5110static void
acfe0abc 5111restore_pos(pTHX_ void *arg)
51371543 5112{
97aff369 5113 dVAR;
097eb12c 5114 regexp * const rex = (regexp *)arg;
51371543
GS
5115 if (PL_reg_eval_set) {
5116 if (PL_reg_oldsaved) {
4f639d21
DM
5117 rex->subbeg = PL_reg_oldsaved;
5118 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5119#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5120 rex->saved_copy = PL_nrs;
ed252734 5121#endif
4f639d21 5122 RX_MATCH_COPIED_on(rex);
51371543
GS
5123 }
5124 PL_reg_magic->mg_len = PL_reg_oldpos;
5125 PL_reg_eval_set = 0;
5126 PL_curpm = PL_reg_oldcurpm;
5127 }
5128}
33b8afdf
JH
5129
5130STATIC void
5131S_to_utf8_substr(pTHX_ register regexp *prog)
5132{
33b8afdf 5133 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5134 SV* const sv = newSVsv(prog->float_substr);
5135 prog->float_utf8 = sv;
33b8afdf
JH
5136 sv_utf8_upgrade(sv);
5137 if (SvTAIL(prog->float_substr))
5138 SvTAIL_on(sv);
5139 if (prog->float_substr == prog->check_substr)
5140 prog->check_utf8 = sv;
5141 }
5142 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5143 SV* const sv = newSVsv(prog->anchored_substr);
5144 prog->anchored_utf8 = sv;
33b8afdf
JH
5145 sv_utf8_upgrade(sv);
5146 if (SvTAIL(prog->anchored_substr))
5147 SvTAIL_on(sv);
5148 if (prog->anchored_substr == prog->check_substr)
5149 prog->check_utf8 = sv;
5150 }
5151}
5152
5153STATIC void
5154S_to_byte_substr(pTHX_ register regexp *prog)
5155{
97aff369 5156 dVAR;
33b8afdf 5157 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5158 SV* sv = newSVsv(prog->float_utf8);
5159 prog->float_substr = sv;
33b8afdf
JH
5160 if (sv_utf8_downgrade(sv, TRUE)) {
5161 if (SvTAIL(prog->float_utf8))
5162 SvTAIL_on(sv);
5163 } else {
5164 SvREFCNT_dec(sv);
5165 prog->float_substr = sv = &PL_sv_undef;
5166 }
5167 if (prog->float_utf8 == prog->check_utf8)
5168 prog->check_substr = sv;
5169 }
5170 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5171 SV* sv = newSVsv(prog->anchored_utf8);
5172 prog->anchored_substr = sv;
33b8afdf
JH
5173 if (sv_utf8_downgrade(sv, TRUE)) {
5174 if (SvTAIL(prog->anchored_utf8))
5175 SvTAIL_on(sv);
5176 } else {
5177 SvREFCNT_dec(sv);
5178 prog->anchored_substr = sv = &PL_sv_undef;
5179 }
5180 if (prog->anchored_utf8 == prog->check_utf8)
5181 prog->check_substr = sv;
5182 }
5183}
66610fdd
RGS
5184
5185/*
5186 * Local variables:
5187 * c-indentation-style: bsd
5188 * c-basic-offset: 4
5189 * indent-tabs-mode: t
5190 * End:
5191 *
37442d52
RGS
5192 * ex: set ts=8 sts=4 sw=4 noet:
5193 */