This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c (try II): use &PL_sv_undef for listsv as suggested by Nick
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
097eb12c 216S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e 220 char *input;
b1ce53c5 221
a3621e74
YO
222 GET_RE_DEBUG_FLAGS_DECL;
223
b1ce53c5 224 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 225 i = SSPOPINT;
b1ce53c5
JH
226 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
227 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 228 input = (char *) SSPOPPTR;
a01268b5 229 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
230 *PL_reglastparen = SSPOPINT;
231 PL_regsize = SSPOPINT;
b1ce53c5
JH
232
233 /* Now restore the parentheses context. */
41123dfd
JH
234 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
235 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 236 I32 tmps;
097eb12c 237 U32 paren = (U32)SSPOPINT;
3280af22 238 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
239 PL_regstartp[paren] = SSPOPINT;
240 tmps = SSPOPINT;
3280af22
NIS
241 if (paren <= *PL_reglastparen)
242 PL_regendp[paren] = tmps;
a3621e74 243 DEBUG_EXECUTE_r(
c3464db5 244 PerlIO_printf(Perl_debug_log,
b900a521 245 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 246 (UV)paren, (IV)PL_regstartp[paren],
b900a521 247 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 248 (IV)PL_regendp[paren],
3280af22 249 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 250 );
a0d0e21e 251 }
a3621e74 252 DEBUG_EXECUTE_r(
4f639d21 253 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 254 PerlIO_printf(Perl_debug_log,
faccc32b 255 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 256 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
257 }
258 );
daf18116 259#if 1
dafc8851
JH
260 /* It would seem that the similar code in regtry()
261 * already takes care of this, and in fact it is in
262 * a better location to since this code can #if 0-ed out
263 * but the code in regtry() is needed or otherwise tests
264 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
265 * (as of patchlevel 7877) will fail. Then again,
266 * this code seems to be necessary or otherwise
267 * building DynaLoader will fail:
268 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
269 * --jhi */
097eb12c
AL
270 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
271 if (i > PL_regsize)
272 PL_regstartp[i] = -1;
273 PL_regendp[i] = -1;
a0d0e21e 274 }
dafc8851 275#endif
a0d0e21e
LW
276 return input;
277}
278
02db2b7b 279#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 280
95b24440 281#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
282 if (paren) { \
283 if (n) { \
284 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
285 PL_regendp[paren] = input - PL_bostr; \
286 } \
287 else \
288 PL_regendp[paren] = -1; \
289 } \
95b24440
DM
290 REGMATCH(next, where); \
291 if (result) \
29d1e993
HS
292 sayYES; \
293 if (paren && n) \
294 PL_regendp[paren] = -1; \
295}
296
297
a687059c 298/*
e50aee73 299 * pregexec and friends
a687059c
LW
300 */
301
302/*
c277df42 303 - pregexec - match a regexp against a string
a687059c 304 */
c277df42 305I32
864dbfa3 306Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 307 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
308/* strend: pointer to null at end of string */
309/* strbeg: real beginning of string */
310/* minend: end of match must be >=minend after stringarg. */
311/* nosave: For optimizations. */
312{
313 return
9041c2e3 314 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
315 nosave ? 0 : REXEC_COPY_STR);
316}
0f5d15d6 317
22e551b9 318
9041c2e3 319/*
cad2e5aa
JH
320 * Need to implement the following flags for reg_anch:
321 *
322 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
323 * USE_INTUIT_ML
324 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
325 * INTUIT_AUTORITATIVE_ML
326 * INTUIT_ONCE_NOML - Intuit can match in one location only.
327 * INTUIT_ONCE_ML
328 *
329 * Another flag for this function: SECOND_TIME (so that float substrs
330 * with giant delta may be not rechecked).
331 */
332
333/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
334
3f7c398e 335/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
336 Otherwise, only SvCUR(sv) is used to get strbeg. */
337
338/* XXXX We assume that strpos is strbeg unless sv. */
339
6eb5f6b9
JH
340/* XXXX Some places assume that there is a fixed substring.
341 An update may be needed if optimizer marks as "INTUITable"
342 RExen without fixed substrings. Similarly, it is assumed that
343 lengths of all the strings are no more than minlen, thus they
344 cannot come from lookahead.
345 (Or minlen should take into account lookahead.) */
346
2c2d71f5
JH
347/* A failure to find a constant substring means that there is no need to make
348 an expensive call to REx engine, thus we celebrate a failure. Similarly,
349 finding a substring too deep into the string means that less calls to
30944b6d
IZ
350 regtry() should be needed.
351
352 REx compiler's optimizer found 4 possible hints:
353 a) Anchored substring;
354 b) Fixed substring;
355 c) Whether we are anchored (beginning-of-line or \G);
356 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 357 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
358 string which does not contradict any of them.
359 */
2c2d71f5 360
6eb5f6b9
JH
361/* Most of decisions we do here should have been done at compile time.
362 The nodes of the REx which we used for the search should have been
363 deleted from the finite automaton. */
364
cad2e5aa
JH
365char *
366Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
367 char *strend, U32 flags, re_scream_pos_data *data)
368{
97aff369 369 dVAR;
b7953727 370 register I32 start_shift = 0;
cad2e5aa 371 /* Should be nonnegative! */
b7953727 372 register I32 end_shift = 0;
2c2d71f5
JH
373 register char *s;
374 register SV *check;
a1933d95 375 char *strbeg;
cad2e5aa 376 char *t;
a3b680e6 377 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 378 I32 ml_anch;
bd61b366
SS
379 register char *other_last = NULL; /* other substr checked before this */
380 char *check_at = NULL; /* check substr found at this pos */
1df70142 381 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 382#ifdef DEBUGGING
890ce7af
AL
383 const char * const i_strpos = strpos;
384 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 385#endif
a3621e74
YO
386
387 GET_RE_DEBUG_FLAGS_DECL;
388
a30b2f1f 389 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 390
b8d68ded 391 if (prog->reganch & ROPT_UTF8) {
a3621e74 392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
393 "UTF-8 regex...\n"));
394 PL_reg_flags |= RF_utf8;
395 }
396
a3621e74 397 DEBUG_EXECUTE_r({
1df70142 398 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
399 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
400 strpos;
1df70142 401 const int len = PL_reg_match_utf8 ?
b8d68ded 402 strlen(s) : strend - strpos;
2a782b5b
JH
403 if (!PL_colorset)
404 reginitcolors();
b8d68ded 405 if (PL_reg_match_utf8)
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 407 "UTF-8 target...\n"));
2a782b5b 408 PerlIO_printf(Perl_debug_log,
a0288114 409 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 410 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
411 prog->precomp,
412 PL_colors[1],
413 (strlen(prog->precomp) > 60 ? "..." : ""),
414 PL_colors[0],
415 (int)(len > 60 ? 60 : len),
416 s, PL_colors[1],
417 (len > 60 ? "..." : "")
418 );
419 });
cad2e5aa 420
c344f387
JH
421 /* CHR_DIST() would be more correct here but it makes things slow. */
422 if (prog->minlen > strend - strpos) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 424 "String too short... [re_intuit_start]\n"));
cad2e5aa 425 goto fail;
2c2d71f5 426 }
a1933d95 427 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 428 PL_regeol = strend;
33b8afdf
JH
429 if (do_utf8) {
430 if (!prog->check_utf8 && prog->check_substr)
431 to_utf8_substr(prog);
432 check = prog->check_utf8;
433 } else {
434 if (!prog->check_substr && prog->check_utf8)
435 to_byte_substr(prog);
436 check = prog->check_substr;
437 }
438 if (check == &PL_sv_undef) {
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
440 "Non-utf string cannot match utf check string\n"));
441 goto fail;
442 }
2c2d71f5 443 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
444 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
445 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 446 && !multiline ) ); /* Check after \n? */
cad2e5aa 447
7e25d62c
JH
448 if (!ml_anch) {
449 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
450 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 451 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
452 && sv && !SvROK(sv)
453 && (strpos != strbeg)) {
a3621e74 454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
455 goto fail;
456 }
457 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 458 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 459 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
460 I32 slen;
461
1aa99e6b 462 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
463 if (SvTAIL(check)) {
464 slen = SvCUR(check); /* >= 1 */
cad2e5aa 465
9041c2e3 466 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 467 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 469 goto fail_finish;
cad2e5aa
JH
470 }
471 /* Now should match s[0..slen-2] */
472 slen--;
3f7c398e 473 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 474 || (slen > 1
3f7c398e 475 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 476 report_neq:
a3621e74 477 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
478 goto fail_finish;
479 }
cad2e5aa 480 }
3f7c398e 481 else if (*SvPVX_const(check) != *s
653099ff 482 || ((slen = SvCUR(check)) > 1
3f7c398e 483 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 484 goto report_neq;
c315bfe8 485 check_at = s;
2c2d71f5 486 goto success_at_start;
7e25d62c 487 }
cad2e5aa 488 }
2c2d71f5 489 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 490 s = strpos;
2c2d71f5 491 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 494 if (!ml_anch) {
a3b680e6 495 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 496 - (SvTAIL(check) != 0);
a3b680e6 497 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
498
499 if (end_shift < eshift)
500 end_shift = eshift;
501 }
cad2e5aa 502 }
2c2d71f5 503 else { /* Can match at random position */
cad2e5aa
JH
504 ml_anch = 0;
505 s = strpos;
2c2d71f5
JH
506 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 /* Should be nonnegative! */
508 end_shift = prog->minlen - start_shift -
653099ff 509 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
510 }
511
2c2d71f5 512#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 513 if (end_shift < 0)
6bbae5e6 514 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
515#endif
516
2c2d71f5
JH
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 520 if (flags & REXEC_SCREAM) {
cad2e5aa 521 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 522 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 523
2c2d71f5
JH
524 if (PL_screamfirst[BmRARE(check)] >= 0
525 || ( BmRARE(check) == '\n'
526 && (BmPREVIOUS(check) == SvCUR(check) - 1)
527 && SvTAIL(check) ))
9041c2e3 528 s = screaminstr(sv, check,
2c2d71f5 529 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 530 else
2c2d71f5 531 goto fail_finish;
4addbd3b
HS
532 /* we may be pointing at the wrong string */
533 if (s && RX_MATCH_COPIED(prog))
3f7c398e 534 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
535 if (data)
536 *data->scream_olds = s;
537 }
f33976b4 538 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
539 s = fbm_instr((U8*)(s + start_shift),
540 (U8*)(strend - end_shift),
7fba1cd6 541 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 542 else
1aa99e6b
IH
543 s = fbm_instr(HOP3(s, start_shift, strend),
544 HOP3(strend, -end_shift, strbeg),
7fba1cd6 545 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
546
547 /* Update the count-of-usability, remove useless subpatterns,
548 unshift s. */
2c2d71f5 549
a0288114 550 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 551 (s ? "Found" : "Did not find"),
33b8afdf 552 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 553 PL_colors[0],
7b0972df 554 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 555 SvPVX_const(check),
2c2d71f5
JH
556 PL_colors[1], (SvTAIL(check) ? "$" : ""),
557 (s ? " at offset " : "...\n") ) );
558
559 if (!s)
560 goto fail_finish;
561
6eb5f6b9
JH
562 check_at = s;
563
2c2d71f5 564 /* Finish the diagnostic message */
a3621e74 565 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
566
567 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
568 Start with the other substr.
569 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 570 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
571 *always* match. Probably should be marked during compile...
572 Probably it is right to do no SCREAM here...
573 */
574
33b8afdf 575 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 576 /* Take into account the "other" substring. */
2c2d71f5
JH
577 /* XXXX May be hopelessly wrong for UTF... */
578 if (!other_last)
6eb5f6b9 579 other_last = strpos;
33b8afdf 580 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
581 do_other_anchored:
582 {
890ce7af
AL
583 char * const last = HOP3c(s, -start_shift, strbeg);
584 char *last1, *last2;
2c2d71f5 585 char *s1 = s;
33b8afdf 586 SV* must;
2c2d71f5 587
2c2d71f5
JH
588 t = s - prog->check_offset_max;
589 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 590 && (!do_utf8
1aa99e6b 591 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 592 && t > strpos)))
30944b6d 593 /* EMPTY */;
2c2d71f5
JH
594 else
595 t = strpos;
1aa99e6b 596 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
597 if (t < other_last) /* These positions already checked */
598 t = other_last;
1aa99e6b 599 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
600 if (last < last1)
601 last1 = last;
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* On end-of-str: see comment below. */
33b8afdf
JH
604 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
605 if (must == &PL_sv_undef) {
606 s = (char*)NULL;
a3621e74 607 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
608 }
609 else
610 s = fbm_instr(
611 (unsigned char*)t,
612 HOP3(HOP3(last1, prog->anchored_offset, strend)
613 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
614 must,
7fba1cd6 615 multiline ? FBMrf_MULTILINE : 0
33b8afdf 616 );
a3621e74 617 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 618 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
619 (s ? "Found" : "Contradicts"),
620 PL_colors[0],
33b8afdf
JH
621 (int)(SvCUR(must)
622 - (SvTAIL(must)!=0)),
3f7c398e 623 SvPVX_const(must),
33b8afdf 624 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
625 if (!s) {
626 if (last1 >= last2) {
a3621e74 627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
628 ", giving up...\n"));
629 goto fail_finish;
630 }
a3621e74 631 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 632 ", trying floating at offset %ld...\n",
1aa99e6b
IH
633 (long)(HOP3c(s1, 1, strend) - i_strpos)));
634 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
635 s = HOP3c(last, 1, strend);
2c2d71f5
JH
636 goto restart;
637 }
638 else {
a3621e74 639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 640 (long)(s - i_strpos)));
1aa99e6b
IH
641 t = HOP3c(s, -prog->anchored_offset, strbeg);
642 other_last = HOP3c(s, 1, strend);
30944b6d 643 s = s1;
2c2d71f5
JH
644 if (t == strpos)
645 goto try_at_start;
2c2d71f5
JH
646 goto try_at_offset;
647 }
30944b6d 648 }
2c2d71f5
JH
649 }
650 else { /* Take into account the floating substring. */
33b8afdf
JH
651 char *last, *last1;
652 char *s1 = s;
653 SV* must;
654
655 t = HOP3c(s, -start_shift, strbeg);
656 last1 = last =
657 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
658 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
659 last = HOP3c(t, prog->float_max_offset, strend);
660 s = HOP3c(t, prog->float_min_offset, strend);
661 if (s < other_last)
662 s = other_last;
2c2d71f5 663 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
664 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
665 /* fbm_instr() takes into account exact value of end-of-str
666 if the check is SvTAIL(ed). Since false positives are OK,
667 and end-of-str is not later than strend we are OK. */
668 if (must == &PL_sv_undef) {
669 s = (char*)NULL;
a3621e74 670 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
671 }
672 else
2c2d71f5 673 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
674 (unsigned char*)last + SvCUR(must)
675 - (SvTAIL(must)!=0),
7fba1cd6 676 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
678 (s ? "Found" : "Contradicts"),
679 PL_colors[0],
680 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 681 SvPVX_const(must),
33b8afdf
JH
682 PL_colors[1], (SvTAIL(must) ? "$" : "")));
683 if (!s) {
684 if (last1 == last) {
a3621e74 685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
686 ", giving up...\n"));
687 goto fail_finish;
2c2d71f5 688 }
a3621e74 689 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
690 ", trying anchored starting at offset %ld...\n",
691 (long)(s1 + 1 - i_strpos)));
692 other_last = last;
693 s = HOP3c(t, 1, strend);
694 goto restart;
695 }
696 else {
a3621e74 697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
698 (long)(s - i_strpos)));
699 other_last = s; /* Fix this later. --Hugo */
700 s = s1;
701 if (t == strpos)
702 goto try_at_start;
703 goto try_at_offset;
704 }
2c2d71f5 705 }
cad2e5aa 706 }
2c2d71f5
JH
707
708 t = s - prog->check_offset_max;
2c2d71f5 709 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 710 && (!do_utf8
1aa99e6b
IH
711 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
712 && t > strpos))) {
2c2d71f5
JH
713 /* Fixed substring is found far enough so that the match
714 cannot start at strpos. */
715 try_at_offset:
cad2e5aa 716 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
717 /* Eventually fbm_*() should handle this, but often
718 anchored_offset is not 0, so this check will not be wasted. */
719 /* XXXX In the code below we prefer to look for "^" even in
720 presence of anchored substrings. And we search even
721 beyond the found float position. These pessimizations
722 are historical artefacts only. */
723 find_anchor:
2c2d71f5 724 while (t < strend - prog->minlen) {
cad2e5aa 725 if (*t == '\n') {
4ee3650e 726 if (t < check_at - prog->check_offset_min) {
33b8afdf 727 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
728 /* Since we moved from the found position,
729 we definitely contradict the found anchored
30944b6d
IZ
730 substr. Due to the above check we do not
731 contradict "check" substr.
732 Thus we can arrive here only if check substr
733 is float. Redo checking for "other"=="fixed".
734 */
9041c2e3 735 strpos = t + 1;
a3621e74 736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 737 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
738 goto do_other_anchored;
739 }
4ee3650e
GS
740 /* We don't contradict the found floating substring. */
741 /* XXXX Why not check for STCLASS? */
cad2e5aa 742 s = t + 1;
a3621e74 743 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 744 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
745 goto set_useful;
746 }
4ee3650e
GS
747 /* Position contradicts check-string */
748 /* XXXX probably better to look for check-string
749 than for "\n", so one should lower the limit for t? */
a3621e74 750 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 751 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 752 other_last = strpos = s = t + 1;
cad2e5aa
JH
753 goto restart;
754 }
755 t++;
756 }
a3621e74 757 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 758 PL_colors[0], PL_colors[1]));
2c2d71f5 759 goto fail_finish;
cad2e5aa 760 }
f5952150 761 else {
a3621e74 762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 763 PL_colors[0], PL_colors[1]));
f5952150 764 }
cad2e5aa
JH
765 s = t;
766 set_useful:
33b8afdf 767 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
768 }
769 else {
f5952150 770 /* The found string does not prohibit matching at strpos,
2c2d71f5 771 - no optimization of calling REx engine can be performed,
f5952150
GS
772 unless it was an MBOL and we are not after MBOL,
773 or a future STCLASS check will fail this. */
2c2d71f5
JH
774 try_at_start:
775 /* Even in this situation we may use MBOL flag if strpos is offset
776 wrt the start of the string. */
05b4157f 777 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 778 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
779 /* May be due to an implicit anchor of m{.*foo} */
780 && !(prog->reganch & ROPT_IMPLICIT))
781 {
cad2e5aa
JH
782 t = strpos;
783 goto find_anchor;
784 }
a3621e74 785 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 786 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 787 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 788 );
2c2d71f5 789 success_at_start:
30944b6d 790 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
791 && (do_utf8 ? (
792 prog->check_utf8 /* Could be deleted already */
793 && --BmUSEFUL(prog->check_utf8) < 0
794 && (prog->check_utf8 == prog->float_utf8)
795 ) : (
796 prog->check_substr /* Could be deleted already */
797 && --BmUSEFUL(prog->check_substr) < 0
798 && (prog->check_substr == prog->float_substr)
799 )))
66e933ab 800 {
cad2e5aa 801 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 802 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
803 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
804 if (do_utf8 ? prog->check_substr : prog->check_utf8)
805 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
806 prog->check_substr = prog->check_utf8 = NULL; /* disable */
807 prog->float_substr = prog->float_utf8 = NULL; /* clear */
808 check = NULL; /* abort */
cad2e5aa 809 s = strpos;
3cf5c195
IZ
810 /* XXXX This is a remnant of the old implementation. It
811 looks wasteful, since now INTUIT can use many
6eb5f6b9 812 other heuristics. */
cad2e5aa
JH
813 prog->reganch &= ~RE_USE_INTUIT;
814 }
815 else
816 s = strpos;
817 }
818
6eb5f6b9
JH
819 /* Last resort... */
820 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
821 if (prog->regstclass) {
822 /* minlen == 0 is possible if regstclass is \b or \B,
823 and the fixed substr is ''$.
824 Since minlen is already taken into account, s+1 is before strend;
825 accidentally, minlen >= 1 guaranties no false positives at s + 1
826 even for \b or \B. But (minlen? 1 : 0) below assumes that
827 regstclass does not come from lookahead... */
828 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
829 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 830 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 831 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 832 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 833 : 1);
a3b680e6 834 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 835 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 836 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
837 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
838 cl_l, strend)
839 : strend);
6eb5f6b9
JH
840
841 t = s;
4f639d21 842 PL_reg_re = prog;
06b5626a 843 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
844 if (!s) {
845#ifdef DEBUGGING
cbbf8932 846 const char *what = NULL;
6eb5f6b9
JH
847#endif
848 if (endpos == strend) {
a3621e74 849 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
850 "Could not match STCLASS...\n") );
851 goto fail;
852 }
a3621e74 853 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 854 "This position contradicts STCLASS...\n") );
653099ff
GS
855 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
856 goto fail;
6eb5f6b9 857 /* Contradict one of substrings */
33b8afdf
JH
858 if (prog->anchored_substr || prog->anchored_utf8) {
859 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 860 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 861 hop_and_restart:
1aa99e6b 862 s = HOP3c(t, 1, strend);
66e933ab
GS
863 if (s + start_shift + end_shift > strend) {
864 /* XXXX Should be taken into account earlier? */
a3621e74 865 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
866 "Could not match STCLASS...\n") );
867 goto fail;
868 }
5e39e1e5
HS
869 if (!check)
870 goto giveup;
a3621e74 871 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 872 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
873 what, (long)(s + start_shift - i_strpos)) );
874 goto restart;
875 }
66e933ab 876 /* Have both, check_string is floating */
6eb5f6b9
JH
877 if (t + start_shift >= check_at) /* Contradicts floating=check */
878 goto retry_floating_check;
879 /* Recheck anchored substring, but not floating... */
9041c2e3 880 s = check_at;
5e39e1e5
HS
881 if (!check)
882 goto giveup;
a3621e74 883 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 884 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
885 (long)(other_last - i_strpos)) );
886 goto do_other_anchored;
887 }
60e71179
GS
888 /* Another way we could have checked stclass at the
889 current position only: */
890 if (ml_anch) {
891 s = t = t + 1;
5e39e1e5
HS
892 if (!check)
893 goto giveup;
a3621e74 894 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 895 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 896 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 897 goto try_at_offset;
66e933ab 898 }
33b8afdf 899 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 900 goto fail;
6eb5f6b9
JH
901 /* Check is floating subtring. */
902 retry_floating_check:
903 t = check_at - start_shift;
a3621e74 904 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
905 goto hop_and_restart;
906 }
b7953727 907 if (t != s) {
a3621e74 908 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 909 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
910 (long)(t - i_strpos), (long)(s - i_strpos))
911 );
912 }
913 else {
a3621e74 914 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
915 "Does not contradict STCLASS...\n");
916 );
917 }
6eb5f6b9 918 }
5e39e1e5 919 giveup:
a3621e74 920 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
921 PL_colors[4], (check ? "Guessed" : "Giving up"),
922 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 923 return s;
2c2d71f5
JH
924
925 fail_finish: /* Substring not found */
33b8afdf
JH
926 if (prog->check_substr || prog->check_utf8) /* could be removed already */
927 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 928 fail:
a3621e74 929 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 930 PL_colors[4], PL_colors[5]));
bd61b366 931 return NULL;
cad2e5aa 932}
9661b544 933
6eb5f6b9 934/* We know what class REx starts with. Try to find this position... */
3c3eec57 935STATIC char *
097eb12c 936S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
a687059c 937{
27da23d5 938 dVAR;
1df70142 939 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 940 char *m;
d8093b23 941 STRLEN ln;
5dab1207 942 STRLEN lnc;
078c425b 943 register STRLEN uskip;
d8093b23
G
944 unsigned int c1;
945 unsigned int c2;
6eb5f6b9
JH
946 char *e;
947 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 948 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 949
6eb5f6b9
JH
950 /* We know what class it must start with. */
951 switch (OP(c)) {
6eb5f6b9 952 case ANYOF:
388cc4de 953 if (do_utf8) {
078c425b 954 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
955 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
956 !UTF8_IS_INVARIANT((U8)s[0]) ?
957 reginclass(c, (U8*)s, 0, do_utf8) :
958 REGINCLASS(c, (U8*)s)) {
959 if (tmp && (norun || regtry(prog, s)))
960 goto got_it;
961 else
962 tmp = doevery;
963 }
964 else
965 tmp = 1;
078c425b 966 s += uskip;
388cc4de
HS
967 }
968 }
969 else {
970 while (s < strend) {
971 STRLEN skip = 1;
972
973 if (REGINCLASS(c, (U8*)s) ||
974 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
975 /* The assignment of 2 is intentional:
976 * for the folded sharp s, the skip is 2. */
977 (skip = SHARP_S_SKIP))) {
978 if (tmp && (norun || regtry(prog, s)))
979 goto got_it;
980 else
981 tmp = doevery;
982 }
983 else
984 tmp = 1;
985 s += skip;
986 }
a0d0e21e 987 }
6eb5f6b9 988 break;
f33976b4
DB
989 case CANY:
990 while (s < strend) {
991 if (tmp && (norun || regtry(prog, s)))
992 goto got_it;
993 else
994 tmp = doevery;
995 s++;
996 }
997 break;
6eb5f6b9 998 case EXACTF:
5dab1207
NIS
999 m = STRING(c);
1000 ln = STR_LEN(c); /* length to match in octets/bytes */
1001 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1002 if (UTF) {
a2a2844f 1003 STRLEN ulen1, ulen2;
5dab1207 1004 U8 *sm = (U8 *) m;
89ebb4a3
JH
1005 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1006 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1007 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1008
1009 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1010 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1011
89ebb4a3 1012 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1013 0, uniflags);
89ebb4a3 1014 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1015 0, uniflags);
5dab1207
NIS
1016 lnc = 0;
1017 while (sm < ((U8 *) m + ln)) {
1018 lnc++;
1019 sm += UTF8SKIP(sm);
1020 }
1aa99e6b
IH
1021 }
1022 else {
1023 c1 = *(U8*)m;
1024 c2 = PL_fold[c1];
1025 }
6eb5f6b9
JH
1026 goto do_exactf;
1027 case EXACTFL:
5dab1207
NIS
1028 m = STRING(c);
1029 ln = STR_LEN(c);
1030 lnc = (I32) ln;
d8093b23 1031 c1 = *(U8*)m;
6eb5f6b9
JH
1032 c2 = PL_fold_locale[c1];
1033 do_exactf:
db12adc6 1034 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1035
6eb5f6b9
JH
1036 if (norun && e < s)
1037 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1038
60a8b682
JH
1039 /* The idea in the EXACTF* cases is to first find the
1040 * first character of the EXACTF* node and then, if
1041 * necessary, case-insensitively compare the full
1042 * text of the node. The c1 and c2 are the first
1043 * characters (though in Unicode it gets a bit
1044 * more complicated because there are more cases
7f16dd3d
JH
1045 * than just upper and lower: one needs to use
1046 * the so-called folding case for case-insensitive
1047 * matching (called "loose matching" in Unicode).
1048 * ibcmp_utf8() will do just that. */
60a8b682 1049
1aa99e6b 1050 if (do_utf8) {
575cac57 1051 UV c, f;
89ebb4a3 1052 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1053 STRLEN len, foldlen;
4ad0818d 1054 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1055 if (c1 == c2) {
5dab1207
NIS
1056 /* Upper and lower of 1st char are equal -
1057 * probably not a "letter". */
1aa99e6b 1058 while (s <= e) {
89ebb4a3 1059 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1060 uniflags);
80aecb99
JH
1061 if ( c == c1
1062 && (ln == len ||
66423254 1063 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1064 m, (char **)0, ln, (bool)UTF))
55da9344 1065 && (norun || regtry(prog, s)) )
1aa99e6b 1066 goto got_it;
80aecb99 1067 else {
1df70142 1068 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1069 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1070 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1071 if ( f != c
1072 && (f == c1 || f == c2)
1073 && (ln == foldlen ||
66423254
JH
1074 !ibcmp_utf8((char *) foldbuf,
1075 (char **)0, foldlen, do_utf8,
d07ddd77 1076 m,
eb160463 1077 (char **)0, ln, (bool)UTF))
80aecb99
JH
1078 && (norun || regtry(prog, s)) )
1079 goto got_it;
1080 }
1aa99e6b
IH
1081 s += len;
1082 }
09091399
JH
1083 }
1084 else {
1aa99e6b 1085 while (s <= e) {
89ebb4a3 1086 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1087 uniflags);
80aecb99 1088
60a8b682 1089 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1090 * Note that not all the possible combinations
1091 * are handled here: some of them are handled
1092 * by the standard folding rules, and some of
1093 * them (the character class or ANYOF cases)
1094 * are handled during compiletime in
1095 * regexec.c:S_regclass(). */
880bd946
JH
1096 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1097 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1098 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1099
1100 if ( (c == c1 || c == c2)
1101 && (ln == len ||
66423254 1102 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1103 m, (char **)0, ln, (bool)UTF))
55da9344 1104 && (norun || regtry(prog, s)) )
1aa99e6b 1105 goto got_it;
80aecb99 1106 else {
1df70142 1107 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1108 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1109 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1110 if ( f != c
1111 && (f == c1 || f == c2)
1112 && (ln == foldlen ||
a6872d42 1113 !ibcmp_utf8((char *) foldbuf,
66423254 1114 (char **)0, foldlen, do_utf8,
d07ddd77 1115 m,
eb160463 1116 (char **)0, ln, (bool)UTF))
80aecb99
JH
1117 && (norun || regtry(prog, s)) )
1118 goto got_it;
1119 }
1aa99e6b
IH
1120 s += len;
1121 }
09091399 1122 }
1aa99e6b
IH
1123 }
1124 else {
1125 if (c1 == c2)
1126 while (s <= e) {
1127 if ( *(U8*)s == c1
1128 && (ln == 1 || !(OP(c) == EXACTF
1129 ? ibcmp(s, m, ln)
1130 : ibcmp_locale(s, m, ln)))
1131 && (norun || regtry(prog, s)) )
1132 goto got_it;
1133 s++;
1134 }
1135 else
1136 while (s <= e) {
1137 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1138 && (ln == 1 || !(OP(c) == EXACTF
1139 ? ibcmp(s, m, ln)
1140 : ibcmp_locale(s, m, ln)))
1141 && (norun || regtry(prog, s)) )
1142 goto got_it;
1143 s++;
1144 }
b3c9acc1
IZ
1145 }
1146 break;
bbce6d69 1147 case BOUNDL:
3280af22 1148 PL_reg_flags |= RF_tainted;
bbce6d69 1149 /* FALL THROUGH */
a0d0e21e 1150 case BOUND:
ffc61ed2 1151 if (do_utf8) {
12d33761 1152 if (s == PL_bostr)
ffc61ed2
JH
1153 tmp = '\n';
1154 else {
6136c704 1155 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1156 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1157 }
1158 tmp = ((OP(c) == BOUND ?
9041c2e3 1159 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1160 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1161 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1162 if (tmp == !(OP(c) == BOUND ?
3568d838 1163 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1164 isALNUM_LC_utf8((U8*)s)))
1165 {
1166 tmp = !tmp;
1167 if ((norun || regtry(prog, s)))
1168 goto got_it;
1169 }
078c425b 1170 s += uskip;
a687059c 1171 }
a0d0e21e 1172 }
667bb95a 1173 else {
12d33761 1174 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1175 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1176 while (s < strend) {
1177 if (tmp ==
1178 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1179 tmp = !tmp;
1180 if ((norun || regtry(prog, s)))
1181 goto got_it;
1182 }
1183 s++;
a0ed51b3 1184 }
a0ed51b3 1185 }
6eb5f6b9 1186 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1187 goto got_it;
1188 break;
bbce6d69 1189 case NBOUNDL:
3280af22 1190 PL_reg_flags |= RF_tainted;
bbce6d69 1191 /* FALL THROUGH */
a0d0e21e 1192 case NBOUND:
ffc61ed2 1193 if (do_utf8) {
12d33761 1194 if (s == PL_bostr)
ffc61ed2
JH
1195 tmp = '\n';
1196 else {
6136c704 1197 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1198 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1199 }
1200 tmp = ((OP(c) == NBOUND ?
9041c2e3 1201 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1202 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1203 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1204 if (tmp == !(OP(c) == NBOUND ?
3568d838 1205 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1206 isALNUM_LC_utf8((U8*)s)))
1207 tmp = !tmp;
1208 else if ((norun || regtry(prog, s)))
1209 goto got_it;
078c425b 1210 s += uskip;
ffc61ed2 1211 }
a0d0e21e 1212 }
667bb95a 1213 else {
12d33761 1214 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1215 tmp = ((OP(c) == NBOUND ?
1216 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1217 while (s < strend) {
1218 if (tmp ==
1219 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1220 tmp = !tmp;
1221 else if ((norun || regtry(prog, s)))
1222 goto got_it;
1223 s++;
1224 }
a0ed51b3 1225 }
6eb5f6b9 1226 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1227 goto got_it;
1228 break;
a0d0e21e 1229 case ALNUM:
ffc61ed2 1230 if (do_utf8) {
1a4fad37 1231 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1232 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1233 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1234 if (tmp && (norun || regtry(prog, s)))
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
bbce6d69 1239 else
ffc61ed2 1240 tmp = 1;
078c425b 1241 s += uskip;
bbce6d69 1242 }
bbce6d69 1243 }
ffc61ed2
JH
1244 else {
1245 while (s < strend) {
1246 if (isALNUM(*s)) {
1247 if (tmp && (norun || regtry(prog, s)))
1248 goto got_it;
1249 else
1250 tmp = doevery;
1251 }
a0ed51b3 1252 else
ffc61ed2
JH
1253 tmp = 1;
1254 s++;
a0ed51b3 1255 }
a0ed51b3
LW
1256 }
1257 break;
bbce6d69 1258 case ALNUML:
3280af22 1259 PL_reg_flags |= RF_tainted;
ffc61ed2 1260 if (do_utf8) {
078c425b 1261 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1262 if (isALNUM_LC_utf8((U8*)s)) {
1263 if (tmp && (norun || regtry(prog, s)))
1264 goto got_it;
1265 else
1266 tmp = doevery;
1267 }
a687059c 1268 else
ffc61ed2 1269 tmp = 1;
078c425b 1270 s += uskip;
a0d0e21e 1271 }
a0d0e21e 1272 }
ffc61ed2
JH
1273 else {
1274 while (s < strend) {
1275 if (isALNUM_LC(*s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1277 goto got_it;
1278 else
1279 tmp = doevery;
1280 }
a0ed51b3 1281 else
ffc61ed2
JH
1282 tmp = 1;
1283 s++;
a0ed51b3 1284 }
a0ed51b3
LW
1285 }
1286 break;
a0d0e21e 1287 case NALNUM:
ffc61ed2 1288 if (do_utf8) {
1a4fad37 1289 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1290 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1291 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1292 if (tmp && (norun || regtry(prog, s)))
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
bbce6d69 1297 else
ffc61ed2 1298 tmp = 1;
078c425b 1299 s += uskip;
bbce6d69 1300 }
bbce6d69 1301 }
ffc61ed2
JH
1302 else {
1303 while (s < strend) {
1304 if (!isALNUM(*s)) {
1305 if (tmp && (norun || regtry(prog, s)))
1306 goto got_it;
1307 else
1308 tmp = doevery;
1309 }
a0ed51b3 1310 else
ffc61ed2
JH
1311 tmp = 1;
1312 s++;
a0ed51b3 1313 }
a0ed51b3
LW
1314 }
1315 break;
bbce6d69 1316 case NALNUML:
3280af22 1317 PL_reg_flags |= RF_tainted;
ffc61ed2 1318 if (do_utf8) {
078c425b 1319 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1320 if (!isALNUM_LC_utf8((U8*)s)) {
1321 if (tmp && (norun || regtry(prog, s)))
1322 goto got_it;
1323 else
1324 tmp = doevery;
1325 }
a687059c 1326 else
ffc61ed2 1327 tmp = 1;
078c425b 1328 s += uskip;
a687059c 1329 }
a0d0e21e 1330 }
ffc61ed2
JH
1331 else {
1332 while (s < strend) {
1333 if (!isALNUM_LC(*s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1335 goto got_it;
1336 else
1337 tmp = doevery;
1338 }
a0ed51b3 1339 else
ffc61ed2
JH
1340 tmp = 1;
1341 s++;
a0ed51b3 1342 }
a0ed51b3
LW
1343 }
1344 break;
a0d0e21e 1345 case SPACE:
ffc61ed2 1346 if (do_utf8) {
1a4fad37 1347 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1348 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1349 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1350 if (tmp && (norun || regtry(prog, s)))
1351 goto got_it;
1352 else
1353 tmp = doevery;
1354 }
a0d0e21e 1355 else
ffc61ed2 1356 tmp = 1;
078c425b 1357 s += uskip;
2304df62 1358 }
a0d0e21e 1359 }
ffc61ed2
JH
1360 else {
1361 while (s < strend) {
1362 if (isSPACE(*s)) {
1363 if (tmp && (norun || regtry(prog, s)))
1364 goto got_it;
1365 else
1366 tmp = doevery;
1367 }
a0ed51b3 1368 else
ffc61ed2
JH
1369 tmp = 1;
1370 s++;
a0ed51b3 1371 }
a0ed51b3
LW
1372 }
1373 break;
bbce6d69 1374 case SPACEL:
3280af22 1375 PL_reg_flags |= RF_tainted;
ffc61ed2 1376 if (do_utf8) {
078c425b 1377 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1378 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1379 if (tmp && (norun || regtry(prog, s)))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
bbce6d69 1384 else
ffc61ed2 1385 tmp = 1;
078c425b 1386 s += uskip;
bbce6d69 1387 }
bbce6d69 1388 }
ffc61ed2
JH
1389 else {
1390 while (s < strend) {
1391 if (isSPACE_LC(*s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
a0ed51b3 1397 else
ffc61ed2
JH
1398 tmp = 1;
1399 s++;
a0ed51b3 1400 }
a0ed51b3
LW
1401 }
1402 break;
a0d0e21e 1403 case NSPACE:
ffc61ed2 1404 if (do_utf8) {
1a4fad37 1405 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1406 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1407 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1408 if (tmp && (norun || regtry(prog, s)))
1409 goto got_it;
1410 else
1411 tmp = doevery;
1412 }
a0d0e21e 1413 else
ffc61ed2 1414 tmp = 1;
078c425b 1415 s += uskip;
a687059c 1416 }
a0d0e21e 1417 }
ffc61ed2
JH
1418 else {
1419 while (s < strend) {
1420 if (!isSPACE(*s)) {
1421 if (tmp && (norun || regtry(prog, s)))
1422 goto got_it;
1423 else
1424 tmp = doevery;
1425 }
a0ed51b3 1426 else
ffc61ed2
JH
1427 tmp = 1;
1428 s++;
a0ed51b3 1429 }
a0ed51b3
LW
1430 }
1431 break;
bbce6d69 1432 case NSPACEL:
3280af22 1433 PL_reg_flags |= RF_tainted;
ffc61ed2 1434 if (do_utf8) {
078c425b 1435 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1436 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1437 if (tmp && (norun || regtry(prog, s)))
1438 goto got_it;
1439 else
1440 tmp = doevery;
1441 }
bbce6d69 1442 else
ffc61ed2 1443 tmp = 1;
078c425b 1444 s += uskip;
bbce6d69 1445 }
bbce6d69 1446 }
ffc61ed2
JH
1447 else {
1448 while (s < strend) {
1449 if (!isSPACE_LC(*s)) {
1450 if (tmp && (norun || regtry(prog, s)))
1451 goto got_it;
1452 else
1453 tmp = doevery;
1454 }
a0ed51b3 1455 else
ffc61ed2
JH
1456 tmp = 1;
1457 s++;
a0ed51b3 1458 }
a0ed51b3
LW
1459 }
1460 break;
a0d0e21e 1461 case DIGIT:
ffc61ed2 1462 if (do_utf8) {
1a4fad37 1463 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1464 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1465 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1466 if (tmp && (norun || regtry(prog, s)))
1467 goto got_it;
1468 else
1469 tmp = doevery;
1470 }
a0d0e21e 1471 else
ffc61ed2 1472 tmp = 1;
078c425b 1473 s += uskip;
2b69d0c2 1474 }
a0d0e21e 1475 }
ffc61ed2
JH
1476 else {
1477 while (s < strend) {
1478 if (isDIGIT(*s)) {
1479 if (tmp && (norun || regtry(prog, s)))
1480 goto got_it;
1481 else
1482 tmp = doevery;
1483 }
a0ed51b3 1484 else
ffc61ed2
JH
1485 tmp = 1;
1486 s++;
a0ed51b3 1487 }
a0ed51b3
LW
1488 }
1489 break;
b8c5462f
JH
1490 case DIGITL:
1491 PL_reg_flags |= RF_tainted;
ffc61ed2 1492 if (do_utf8) {
078c425b 1493 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1494 if (isDIGIT_LC_utf8((U8*)s)) {
1495 if (tmp && (norun || regtry(prog, s)))
1496 goto got_it;
1497 else
1498 tmp = doevery;
1499 }
b8c5462f 1500 else
ffc61ed2 1501 tmp = 1;
078c425b 1502 s += uskip;
b8c5462f 1503 }
b8c5462f 1504 }
ffc61ed2
JH
1505 else {
1506 while (s < strend) {
1507 if (isDIGIT_LC(*s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
b8c5462f 1513 else
ffc61ed2
JH
1514 tmp = 1;
1515 s++;
b8c5462f 1516 }
b8c5462f
JH
1517 }
1518 break;
a0d0e21e 1519 case NDIGIT:
ffc61ed2 1520 if (do_utf8) {
1a4fad37 1521 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1522 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1523 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1524 if (tmp && (norun || regtry(prog, s)))
1525 goto got_it;
1526 else
1527 tmp = doevery;
1528 }
a0d0e21e 1529 else
ffc61ed2 1530 tmp = 1;
078c425b 1531 s += uskip;
a687059c 1532 }
a0d0e21e 1533 }
ffc61ed2
JH
1534 else {
1535 while (s < strend) {
1536 if (!isDIGIT(*s)) {
1537 if (tmp && (norun || regtry(prog, s)))
1538 goto got_it;
1539 else
1540 tmp = doevery;
1541 }
a0ed51b3 1542 else
ffc61ed2
JH
1543 tmp = 1;
1544 s++;
a0ed51b3 1545 }
a0ed51b3
LW
1546 }
1547 break;
b8c5462f
JH
1548 case NDIGITL:
1549 PL_reg_flags |= RF_tainted;
ffc61ed2 1550 if (do_utf8) {
078c425b 1551 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1552 if (!isDIGIT_LC_utf8((U8*)s)) {
1553 if (tmp && (norun || regtry(prog, s)))
1554 goto got_it;
1555 else
1556 tmp = doevery;
1557 }
b8c5462f 1558 else
ffc61ed2 1559 tmp = 1;
078c425b 1560 s += uskip;
b8c5462f 1561 }
a0ed51b3 1562 }
ffc61ed2
JH
1563 else {
1564 while (s < strend) {
1565 if (!isDIGIT_LC(*s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1567 goto got_it;
1568 else
1569 tmp = doevery;
1570 }
cf93c79d 1571 else
ffc61ed2
JH
1572 tmp = 1;
1573 s++;
b8c5462f 1574 }
b8c5462f
JH
1575 }
1576 break;
b3c9acc1 1577 default:
3c3eec57
GS
1578 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1579 break;
d6a28714 1580 }
6eb5f6b9
JH
1581 return 0;
1582 got_it:
1583 return s;
1584}
1585
1586/*
1587 - regexec_flags - match a regexp against a string
1588 */
1589I32
1590Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1591 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1592/* strend: pointer to null at end of string */
1593/* strbeg: real beginning of string */
1594/* minend: end of match must be >=minend after stringarg. */
1595/* data: May be used for some additional optimizations. */
1596/* nosave: For optimizations. */
1597{
97aff369 1598 dVAR;
6eb5f6b9
JH
1599 register char *s;
1600 register regnode *c;
1601 register char *startpos = stringarg;
6eb5f6b9
JH
1602 I32 minlen; /* must match at least this many chars */
1603 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1604 I32 end_shift = 0; /* Same for the end. */ /* CC */
1605 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1606 char *scream_olds = NULL;
6eb5f6b9 1607 SV* oreplsv = GvSV(PL_replgv);
1df70142 1608 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1609 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1610#ifdef DEBUGGING
6136c704
AL
1611 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1612 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1613#endif
a3621e74
YO
1614
1615 GET_RE_DEBUG_FLAGS_DECL;
1616
9d4ba2ae 1617 PERL_UNUSED_ARG(data);
a30b2f1f 1618 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9 1619
4f639d21 1620 PL_reg_re = prog;
6eb5f6b9 1621#ifdef DEBUGGING
aea4f609 1622 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1623#endif
1624
1625 /* Be paranoid... */
1626 if (prog == NULL || startpos == NULL) {
1627 Perl_croak(aTHX_ "NULL regexp parameter");
1628 return 0;
1629 }
1630
1631 minlen = prog->minlen;
61a36c01 1632 if (strend - startpos < minlen) {
a3621e74 1633 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1634 "String too short [regexec_flags]...\n"));
1635 goto phooey;
1aa99e6b 1636 }
6eb5f6b9 1637
6eb5f6b9
JH
1638 /* Check validity of program. */
1639 if (UCHARAT(prog->program) != REG_MAGIC) {
1640 Perl_croak(aTHX_ "corrupted regexp program");
1641 }
1642
1643 PL_reg_flags = 0;
1644 PL_reg_eval_set = 0;
1645 PL_reg_maxiter = 0;
1646
1647 if (prog->reganch & ROPT_UTF8)
1648 PL_reg_flags |= RF_utf8;
1649
1650 /* Mark beginning of line for ^ and lookbehind. */
1651 PL_regbol = startpos;
1652 PL_bostr = strbeg;
1653 PL_reg_sv = sv;
1654
1655 /* Mark end of line for $ (and such) */
1656 PL_regeol = strend;
1657
1658 /* see how far we have to get to not match where we matched before */
1659 PL_regtill = startpos+minend;
1660
1661 /* We start without call_cc context. */
1662 PL_reg_call_cc = 0;
1663
1664 /* If there is a "must appear" string, look for it. */
1665 s = startpos;
1666
1667 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1668 MAGIC *mg;
1669
1670 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1671 PL_reg_ganch = startpos;
1672 else if (sv && SvTYPE(sv) >= SVt_PVMG
1673 && SvMAGIC(sv)
14befaf4
DM
1674 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1675 && mg->mg_len >= 0) {
6eb5f6b9
JH
1676 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1677 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1678 if (s > PL_reg_ganch)
6eb5f6b9
JH
1679 goto phooey;
1680 s = PL_reg_ganch;
1681 }
1682 }
1683 else /* pos() not defined */
1684 PL_reg_ganch = strbeg;
1685 }
1686
a0714e2c 1687 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1688 re_scream_pos_data d;
1689
1690 d.scream_olds = &scream_olds;
1691 d.scream_pos = &scream_pos;
1692 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1693 if (!s) {
a3621e74 1694 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1695 goto phooey; /* not present */
3fa9c3d7 1696 }
6eb5f6b9
JH
1697 }
1698
a3621e74 1699 DEBUG_EXECUTE_r({
1df70142
AL
1700 const char * const s0 = UTF
1701 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1702 UNI_DISPLAY_REGEX)
1703 : prog->precomp;
1704 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1705 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1706 UNI_DISPLAY_REGEX) : startpos;
1df70142 1707 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1708 if (!PL_colorset)
1709 reginitcolors();
1710 PerlIO_printf(Perl_debug_log,
a0288114 1711 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1712 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1713 len0, len0, s0,
2a782b5b 1714 PL_colors[1],
9e55ce06 1715 len0 > 60 ? "..." : "",
2a782b5b 1716 PL_colors[0],
9e55ce06
JH
1717 (int)(len1 > 60 ? 60 : len1),
1718 s1, PL_colors[1],
1719 (len1 > 60 ? "..." : "")
2a782b5b
JH
1720 );
1721 });
6eb5f6b9
JH
1722
1723 /* Simplest case: anchored match need be tried only once. */
1724 /* [unless only anchor is BOL and multiline is set] */
1725 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1726 if (s == startpos && regtry(prog, startpos))
1727 goto got_it;
7fba1cd6 1728 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1729 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1730 {
1731 char *end;
1732
1733 if (minlen)
1734 dontbother = minlen - 1;
1aa99e6b 1735 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1736 /* for multiline we only have to try after newlines */
33b8afdf 1737 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1738 if (s == startpos)
1739 goto after_try;
1740 while (1) {
1741 if (regtry(prog, s))
1742 goto got_it;
1743 after_try:
1744 if (s >= end)
1745 goto phooey;
1746 if (prog->reganch & RE_USE_INTUIT) {
1747 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1748 if (!s)
1749 goto phooey;
1750 }
1751 else
1752 s++;
1753 }
1754 } else {
1755 if (s > startpos)
1756 s--;
1757 while (s < end) {
1758 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1759 if (regtry(prog, s))
1760 goto got_it;
1761 }
1762 }
1763 }
1764 }
1765 goto phooey;
1766 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1767 if (regtry(prog, PL_reg_ganch))
1768 goto got_it;
1769 goto phooey;
1770 }
1771
1772 /* Messy cases: unanchored match. */
33b8afdf 1773 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1774 /* we have /x+whatever/ */
1775 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1776 char ch;
bf93d4cc
GS
1777#ifdef DEBUGGING
1778 int did_match = 0;
1779#endif
33b8afdf
JH
1780 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1781 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1782 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1783
1aa99e6b 1784 if (do_utf8) {
6eb5f6b9
JH
1785 while (s < strend) {
1786 if (*s == ch) {
a3621e74 1787 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1788 if (regtry(prog, s)) goto got_it;
1789 s += UTF8SKIP(s);
1790 while (s < strend && *s == ch)
1791 s += UTF8SKIP(s);
1792 }
1793 s += UTF8SKIP(s);
1794 }
1795 }
1796 else {
1797 while (s < strend) {
1798 if (*s == ch) {
a3621e74 1799 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1800 if (regtry(prog, s)) goto got_it;
1801 s++;
1802 while (s < strend && *s == ch)
1803 s++;
1804 }
1805 s++;
1806 }
1807 }
a3621e74 1808 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1809 PerlIO_printf(Perl_debug_log,
b7953727
JH
1810 "Did not find anchored character...\n")
1811 );
6eb5f6b9 1812 }
a0714e2c
SS
1813 else if (prog->anchored_substr != NULL
1814 || prog->anchored_utf8 != NULL
1815 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1816 && prog->float_max_offset < strend - s)) {
1817 SV *must;
1818 I32 back_max;
1819 I32 back_min;
1820 char *last;
6eb5f6b9 1821 char *last1; /* Last position checked before */
bf93d4cc
GS
1822#ifdef DEBUGGING
1823 int did_match = 0;
1824#endif
33b8afdf
JH
1825 if (prog->anchored_substr || prog->anchored_utf8) {
1826 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1827 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1828 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1829 back_max = back_min = prog->anchored_offset;
1830 } else {
1831 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1832 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1833 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1834 back_max = prog->float_max_offset;
1835 back_min = prog->float_min_offset;
1836 }
1837 if (must == &PL_sv_undef)
1838 /* could not downgrade utf8 check substring, so must fail */
1839 goto phooey;
1840
1841 last = HOP3c(strend, /* Cannot start after this */
1842 -(I32)(CHR_SVLEN(must)
1843 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1844
1845 if (s > PL_bostr)
1846 last1 = HOPc(s, -1);
1847 else
1848 last1 = s - 1; /* bogus */
1849
a0288114 1850 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1851 check_substr==must. */
1852 scream_pos = -1;
1853 dontbother = end_shift;
1854 strend = HOPc(strend, -dontbother);
1855 while ( (s <= last) &&
9041c2e3 1856 ((flags & REXEC_SCREAM)
1aa99e6b 1857 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1858 end_shift, &scream_pos, 0))
1aa99e6b 1859 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1860 (unsigned char*)strend, must,
7fba1cd6 1861 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1862 /* we may be pointing at the wrong string */
1863 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1864 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1865 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1866 if (HOPc(s, -back_max) > last1) {
1867 last1 = HOPc(s, -back_min);
1868 s = HOPc(s, -back_max);
1869 }
1870 else {
52657f30 1871 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1872
1873 last1 = HOPc(s, -back_min);
52657f30 1874 s = t;
6eb5f6b9 1875 }
1aa99e6b 1876 if (do_utf8) {
6eb5f6b9
JH
1877 while (s <= last1) {
1878 if (regtry(prog, s))
1879 goto got_it;
1880 s += UTF8SKIP(s);
1881 }
1882 }
1883 else {
1884 while (s <= last1) {
1885 if (regtry(prog, s))
1886 goto got_it;
1887 s++;
1888 }
1889 }
1890 }
a3621e74 1891 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1892 PerlIO_printf(Perl_debug_log,
a0288114 1893 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1894 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1895 ? "anchored" : "floating"),
1896 PL_colors[0],
1897 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1898 SvPVX_const(must),
b7953727
JH
1899 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1900 );
6eb5f6b9
JH
1901 goto phooey;
1902 }
155aba94 1903 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1904 if (minlen) {
1905 I32 op = (U8)OP(prog->regstclass);
66e933ab 1906 /* don't bother with what can't match */
f14c76ed
RGS
1907 if (PL_regkind[op] != EXACT && op != CANY)
1908 strend = HOPc(strend, -(minlen - 1));
1909 }
a3621e74 1910 DEBUG_EXECUTE_r({
ffc61ed2 1911 SV *prop = sv_newmortal();
cfd0369c
NC
1912 const char *s0;
1913 const char *s1;
9e55ce06
JH
1914 int len0;
1915 int len1;
1916
ffc61ed2 1917 regprop(prop, c);
9e55ce06 1918 s0 = UTF ?
3f7c398e 1919 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1920 UNI_DISPLAY_REGEX) :
cfd0369c 1921 SvPVX_const(prop);
9e55ce06
JH
1922 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1923 s1 = UTF ?
c728cb41 1924 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1925 len1 = UTF ? SvCUR(dsv1) : strend - s;
1926 PerlIO_printf(Perl_debug_log,
a0288114 1927 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1928 len0, len0, s0,
1929 len1, len1, s1);
ffc61ed2 1930 });
06b5626a 1931 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1932 goto got_it;
a3621e74 1933 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1934 }
1935 else {
1936 dontbother = 0;
a0714e2c 1937 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1938 /* Trim the end. */
d6a28714 1939 char *last;
33b8afdf
JH
1940 SV* float_real;
1941
1942 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1943 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1944 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1945
1946 if (flags & REXEC_SCREAM) {
33b8afdf 1947 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1948 end_shift, &scream_pos, 1); /* last one */
1949 if (!last)
ffc61ed2 1950 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1951 /* we may be pointing at the wrong string */
1952 else if (RX_MATCH_COPIED(prog))
3f7c398e 1953 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1954 }
d6a28714
JH
1955 else {
1956 STRLEN len;
cfd0369c 1957 const char * const little = SvPV_const(float_real, len);
d6a28714 1958
33b8afdf 1959 if (SvTAIL(float_real)) {
d6a28714
JH
1960 if (memEQ(strend - len + 1, little, len - 1))
1961 last = strend - len + 1;
7fba1cd6 1962 else if (!multiline)
9041c2e3 1963 last = memEQ(strend - len, little, len)
bd61b366 1964 ? strend - len : NULL;
b8c5462f 1965 else
d6a28714
JH
1966 goto find_last;
1967 } else {
1968 find_last:
9041c2e3 1969 if (len)
d6a28714 1970 last = rninstr(s, strend, little, little + len);
b8c5462f 1971 else
a0288114 1972 last = strend; /* matching "$" */
b8c5462f 1973 }
b8c5462f 1974 }
bf93d4cc 1975 if (last == NULL) {
a3621e74 1976 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1977 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1978 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1979 goto phooey; /* Should not happen! */
1980 }
d6a28714
JH
1981 dontbother = strend - last + prog->float_min_offset;
1982 }
1983 if (minlen && (dontbother < minlen))
1984 dontbother = minlen - 1;
1985 strend -= dontbother; /* this one's always in bytes! */
1986 /* We don't know much -- general case. */
1aa99e6b 1987 if (do_utf8) {
d6a28714
JH
1988 for (;;) {
1989 if (regtry(prog, s))
1990 goto got_it;
1991 if (s >= strend)
1992 break;
b8c5462f 1993 s += UTF8SKIP(s);
d6a28714
JH
1994 };
1995 }
1996 else {
1997 do {
1998 if (regtry(prog, s))
1999 goto got_it;
2000 } while (s++ < strend);
2001 }
2002 }
2003
2004 /* Failure. */
2005 goto phooey;
2006
2007got_it:
2008 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2009
2010 if (PL_reg_eval_set) {
2011 /* Preserve the current value of $^R */
2012 if (oreplsv != GvSV(PL_replgv))
2013 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2014 restored, the value remains
2015 the same. */
4f639d21 2016 restore_pos(aTHX_ prog);
d6a28714
JH
2017 }
2018
2019 /* make sure $`, $&, $', and $digit will work later */
2020 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2021 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2022 if (flags & REXEC_COPY_STR) {
2023 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2024#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2025 if ((SvIsCOW(sv)
2026 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2027 if (DEBUG_C_TEST) {
2028 PerlIO_printf(Perl_debug_log,
2029 "Copy on write: regexp capture, type %d\n",
2030 (int) SvTYPE(sv));
2031 }
2032 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2033 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2034 assert (SvPOKp(prog->saved_copy));
2035 } else
2036#endif
2037 {
2038 RX_MATCH_COPIED_on(prog);
2039 s = savepvn(strbeg, i);
2040 prog->subbeg = s;
2041 }
d6a28714 2042 prog->sublen = i;
d6a28714
JH
2043 }
2044 else {
2045 prog->subbeg = strbeg;
2046 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2047 }
2048 }
9041c2e3 2049
d6a28714
JH
2050 return 1;
2051
2052phooey:
a3621e74 2053 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2054 PL_colors[4], PL_colors[5]));
d6a28714 2055 if (PL_reg_eval_set)
4f639d21 2056 restore_pos(aTHX_ prog);
d6a28714
JH
2057 return 0;
2058}
2059
2060/*
2061 - regtry - try match at specific point
2062 */
2063STATIC I32 /* 0 failure, 1 success */
2064S_regtry(pTHX_ regexp *prog, char *startpos)
2065{
97aff369 2066 dVAR;
d6a28714
JH
2067 register I32 *sp;
2068 register I32 *ep;
2069 CHECKPOINT lastcp;
a3621e74 2070 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2071
02db2b7b
IZ
2072#ifdef DEBUGGING
2073 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2074#endif
d6a28714
JH
2075 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2076 MAGIC *mg;
2077
2078 PL_reg_eval_set = RS_init;
a3621e74 2079 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2080 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2081 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2082 ));
e8347627 2083 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2084 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2085 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2086 SAVETMPS;
2087 /* Apparently this is not needed, judging by wantarray. */
e8347627 2088 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2089 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2090
2091 if (PL_reg_sv) {
2092 /* Make $_ available to executed code. */
2093 if (PL_reg_sv != DEFSV) {
59f00321 2094 SAVE_DEFSV;
d6a28714 2095 DEFSV = PL_reg_sv;
b8c5462f 2096 }
d6a28714 2097
9041c2e3 2098 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2099 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2100 /* prepare for quick setting of pos */
d300d9fa
NC
2101#ifdef PERL_OLD_COPY_ON_WRITE
2102 if (SvIsCOW(sv))
2103 sv_force_normal_flags(sv, 0);
2104#endif
2105 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2106 &PL_vtbl_mglob, NULL, 0);
d6a28714 2107 mg->mg_len = -1;
b8c5462f 2108 }
d6a28714
JH
2109 PL_reg_magic = mg;
2110 PL_reg_oldpos = mg->mg_len;
4f639d21 2111 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2112 }
09687e5a 2113 if (!PL_reg_curpm) {
a02a5408 2114 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2115#ifdef USE_ITHREADS
2116 {
2117 SV* repointer = newSViv(0);
577e12cc 2118 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2119 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2120 av_push(PL_regex_padav,repointer);
2121 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2122 PL_regex_pad = AvARRAY(PL_regex_padav);
2123 }
2124#endif
2125 }
aaa362c4 2126 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2127 PL_reg_oldcurpm = PL_curpm;
2128 PL_curpm = PL_reg_curpm;
2129 if (RX_MATCH_COPIED(prog)) {
2130 /* Here is a serious problem: we cannot rewrite subbeg,
2131 since it may be needed if this match fails. Thus
2132 $` inside (?{}) could fail... */
2133 PL_reg_oldsaved = prog->subbeg;
2134 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2135#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2136 PL_nrs = prog->saved_copy;
2137#endif
d6a28714
JH
2138 RX_MATCH_COPIED_off(prog);
2139 }
2140 else
bd61b366 2141 PL_reg_oldsaved = NULL;
d6a28714
JH
2142 prog->subbeg = PL_bostr;
2143 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2144 }
973dddac 2145 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2146 PL_reginput = startpos;
2147 PL_regstartp = prog->startp;
2148 PL_regendp = prog->endp;
2149 PL_reglastparen = &prog->lastparen;
a01268b5 2150 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2151 prog->lastparen = 0;
03994de8 2152 prog->lastcloseparen = 0;
d6a28714 2153 PL_regsize = 0;
a3621e74 2154 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2155 if (PL_reg_start_tmpl <= prog->nparens) {
2156 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2157 if(PL_reg_start_tmp)
2158 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2159 else
a02a5408 2160 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2161 }
2162
2163 /* XXXX What this code is doing here?!!! There should be no need
2164 to do this again and again, PL_reglastparen should take care of
3dd2943c 2165 this! --ilya*/
dafc8851
JH
2166
2167 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2168 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2169 * PL_reglastparen), is not needed at all by the test suite
2170 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2171 * enough, for building DynaLoader, or otherwise this
2172 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2173 * will happen. Meanwhile, this code *is* needed for the
2174 * above-mentioned test suite tests to succeed. The common theme
2175 * on those tests seems to be returning null fields from matches.
2176 * --jhi */
dafc8851 2177#if 1
d6a28714
JH
2178 sp = prog->startp;
2179 ep = prog->endp;
2180 if (prog->nparens) {
097eb12c 2181 register I32 i;
eb160463 2182 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2183 *++sp = -1;
2184 *++ep = -1;
2185 }
2186 }
dafc8851 2187#endif
02db2b7b 2188 REGCP_SET(lastcp);
4f639d21 2189 if (regmatch(prog, prog->program + 1)) {
d6a28714
JH
2190 prog->endp[0] = PL_reginput - PL_bostr;
2191 return 1;
2192 }
02db2b7b 2193 REGCP_UNWIND(lastcp);
d6a28714
JH
2194 return 0;
2195}
2196
02db2b7b
IZ
2197#define RE_UNWIND_BRANCH 1
2198#define RE_UNWIND_BRANCHJ 2
2199
2200union re_unwind_t;
2201
2202typedef struct { /* XX: makes sense to enlarge it... */
2203 I32 type;
2204 I32 prev;
2205 CHECKPOINT lastcp;
2206} re_unwind_generic_t;
2207
2208typedef struct {
2209 I32 type;
2210 I32 prev;
2211 CHECKPOINT lastcp;
2212 I32 lastparen;
2213 regnode *next;
2214 char *locinput;
2215 I32 nextchr;
2216#ifdef DEBUGGING
2217 int regindent;
2218#endif
2219} re_unwind_branch_t;
2220
2221typedef union re_unwind_t {
2222 I32 type;
2223 re_unwind_generic_t generic;
2224 re_unwind_branch_t branch;
2225} re_unwind_t;
2226
8ba1375e
MJD
2227#define sayYES goto yes
2228#define sayNO goto no
e0f9d4a8 2229#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2230#define sayYES_FINAL goto yes_final
2231#define sayYES_LOUD goto yes_loud
2232#define sayNO_FINAL goto no_final
2233#define sayNO_SILENT goto do_no
2234#define saySAME(x) if (x) goto yes; else goto no
2235
3ab3c9b4
HS
2236#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2237#define POSCACHE_SEEN 1 /* we know what we're caching */
2238#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2239#define CACHEsayYES STMT_START { \
d8319b27 2240 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2241 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2242 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2243 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2244 /* cache records failure, but this is success */ \
2245 DEBUG_r( \
2246 PerlIO_printf(Perl_debug_log, \
2247 "%*s (remove success from failure cache)\n", \
2248 REPORT_CODE_OFF+PL_regindent*2, "") \
2249 ); \
d8319b27 2250 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2251 } \
2252 } \
2253 sayYES; \
2254} STMT_END
2255#define CACHEsayNO STMT_START { \
d8319b27 2256 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2257 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2258 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2259 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2260 /* cache records success, but this is failure */ \
2261 DEBUG_r( \
2262 PerlIO_printf(Perl_debug_log, \
2263 "%*s (remove failure from success cache)\n", \
2264 REPORT_CODE_OFF+PL_regindent*2, "") \
2265 ); \
d8319b27 2266 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2267 } \
2268 } \
2269 sayNO; \
2270} STMT_END
2271
a3621e74
YO
2272/* this is used to determine how far from the left messages like
2273 'failed...' are printed. Currently 29 makes these messages line
2274 up with the opcode they refer to. Earlier perls used 25 which
2275 left these messages outdented making reviewing a debug output
2276 quite difficult.
2277*/
2278#define REPORT_CODE_OFF 29
2279
2280
2281/* Make sure there is a test for this +1 options in re_tests */
2282#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2283
5d9a96ca
DM
2284/* grab a new slab and return the first slot in it */
2285
2286STATIC regmatch_state *
2287S_push_slab(pTHX)
2288{
2289 regmatch_slab *s = PL_regmatch_slab->next;
2290 if (!s) {
2291 Newx(s, 1, regmatch_slab);
2292 s->prev = PL_regmatch_slab;
2293 s->next = NULL;
2294 PL_regmatch_slab->next = s;
2295 }
2296 PL_regmatch_slab = s;
2297 return &s->states[0];
2298}
5b47454d 2299
95b24440
DM
2300/* simulate a recursive call to regmatch */
2301
2302#define REGMATCH(ns, where) \
5d9a96ca
DM
2303 st->scan = scan; \
2304 scan = (ns); \
2305 st->resume_state = resume_##where; \
95b24440
DM
2306 goto start_recurse; \
2307 resume_point_##where:
2308
d6a28714
JH
2309/*
2310 - regmatch - main matching routine
2311 *
2312 * Conceptually the strategy is simple: check to see whether the current
2313 * node matches, call self recursively to see whether the rest matches,
2314 * and then act accordingly. In practice we make some effort to avoid
2315 * recursion, in particular by going through "ordinary" nodes (that don't
2316 * need to know whether the rest of the match failed) by a loop instead of
2317 * by recursion.
2318 */
2319/* [lwall] I've hoisted the register declarations to the outer block in order to
2320 * maybe save a little bit of pushing and popping on the stack. It also takes
2321 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2322 *
2323 * This function used to be heavily recursive, but since this had the
2324 * effect of blowing the CPU stack on complex regexes, it has been
2325 * restructured to be iterative, and to save state onto the heap rather
2326 * than the stack. Essentially whereever regmatch() used to be called, it
2327 * pushes the current state, notes where to return, then jumps back into
2328 * the main loop.
2329 *
2330 * Originally the structure of this function used to look something like
2331
2332 S_regmatch() {
2333 int a = 1, b = 2;
2334 ...
2335 while (scan != NULL) {
5d9a96ca 2336 a++; // do stuff with a and b
95b24440
DM
2337 ...
2338 switch (OP(scan)) {
2339 case FOO: {
2340 int local = 3;
2341 ...
2342 if (regmatch(...)) // recurse
2343 goto yes;
2344 }
2345 ...
2346 }
2347 }
2348 yes:
2349 return 1;
2350 }
2351
2352 * Now it looks something like this:
2353
5d9a96ca 2354 typedef struct {
95b24440
DM
2355 int a, b, local;
2356 int resume_state;
5d9a96ca 2357 } regmatch_state;
95b24440
DM
2358
2359 S_regmatch() {
5d9a96ca
DM
2360 regmatch_state *st = new();
2361 int depth=0;
2362 st->a++; // do stuff with a and b
95b24440
DM
2363 ...
2364 while (scan != NULL) {
2365 ...
2366 switch (OP(scan)) {
2367 case FOO: {
5d9a96ca 2368 st->local = 3;
95b24440 2369 ...
5d9a96ca
DM
2370 st->scan = scan;
2371 scan = ...;
2372 st->resume_state = resume_FOO;
2373 goto start_recurse; // recurse
95b24440 2374
5d9a96ca
DM
2375 resume_point_FOO:
2376 if (result)
95b24440
DM
2377 goto yes;
2378 }
2379 ...
2380 }
5d9a96ca
DM
2381 start_recurse:
2382 st = new(); push a new state
2383 st->a = 1; st->b = 2;
2384 depth++;
95b24440 2385 }
5d9a96ca 2386 yes:
95b24440 2387 result = 1;
5d9a96ca
DM
2388 if (depth--) {
2389 st = pop();
95b24440
DM
2390 switch (resume_state) {
2391 case resume_FOO:
2392 goto resume_point_FOO;
2393 ...
2394 }
2395 }
2396 return result
2397 }
2398
2399 * WARNING: this means that any line in this function that contains a
2400 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2401 * regmatch() using gotos instead. Thus the values of any local variables
2402 * not saved in the regmatch_state structure will have been lost when
2403 * execution resumes on the next line .
5d9a96ca
DM
2404 *
2405 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2406 * PL_regmatch_state always points to the currently active state, and
2407 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2408 * The first time regmatch is called, the first slab is allocated, and is
2409 * never freed until interpreter desctruction. When the slab is full,
2410 * a new one is allocated chained to the end. At exit from regmatch, slabs
2411 * allocated since entry are freed.
d6a28714 2412 */
95b24440
DM
2413
2414
d6a28714 2415STATIC I32 /* 0 failure, 1 success */
4f639d21 2416S_regmatch(pTHX_ regexp *rex, regnode *prog)
d6a28714 2417{
27da23d5 2418 dVAR;
95b24440 2419 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2420 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2421
5d9a96ca
DM
2422 regmatch_slab *orig_slab;
2423 regmatch_state *orig_state;
a3621e74 2424
5d9a96ca
DM
2425 /* the current state. This is a cached copy of PL_regmatch_state */
2426 register regmatch_state *st;
95b24440 2427
5d9a96ca
DM
2428 /* cache heavy used fields of st in registers */
2429 register regnode *scan;
2430 register regnode *next;
2431 register I32 n = 0; /* initialize to shut up compiler warning */
2432 register char *locinput = PL_reginput;
95b24440 2433
5d9a96ca
DM
2434 /* these variables are NOT saved during a recusive RFEGMATCH: */
2435 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2436 bool result; /* return value of S_regmatch */
2437 regnode *inner; /* Next node in internal branch. */
2438 int depth = 0; /* depth of recursion */
95b24440
DM
2439
2440#ifdef DEBUGGING
ab74612d 2441 SV *re_debug_flags = NULL;
a3621e74 2442 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2443 PL_regindent++;
2444#endif
2445
5d9a96ca
DM
2446 /* on first ever call to regmatch, allocate first slab */
2447 if (!PL_regmatch_slab) {
2448 Newx(PL_regmatch_slab, 1, regmatch_slab);
2449 PL_regmatch_slab->prev = NULL;
2450 PL_regmatch_slab->next = NULL;
2451 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2452 }
2453
2454 /* remember current high-water mark for exit */
2455 /* XXX this should be done with SAVE* instead */
2456 orig_slab = PL_regmatch_slab;
2457 orig_state = PL_regmatch_state;
2458
2459 /* grab next free state slot */
2460 st = ++PL_regmatch_state;
2461 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2462 st = PL_regmatch_state = S_push_slab(aTHX);
2463
2464 st->minmod = 0;
2465 st->sw = 0;
2466 st->logical = 0;
2467 st->unwind = 0;
2468 st->cc = NULL;
d6a28714
JH
2469 /* Note that nextchr is a byte even in UTF */
2470 nextchr = UCHARAT(locinput);
2471 scan = prog;
2472 while (scan != NULL) {
8ba1375e 2473
a3621e74 2474 DEBUG_EXECUTE_r( {
6136c704 2475 SV * const prop = sv_newmortal();
1df70142
AL
2476 const int docolor = *PL_colors[0];
2477 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2478 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2479 /* The part of the string before starttry has one color
2480 (pref0_len chars), between starttry and current
2481 position another one (pref_len - pref0_len chars),
2482 after the current position the third one.
2483 We assume that pref0_len <= pref_len, otherwise we
2484 decrease pref0_len. */
9041c2e3 2485 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2486 ? (5 + taill) - l : locinput - PL_bostr;
2487 int pref0_len;
d6a28714 2488
df1ffd02 2489 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2490 pref_len++;
2491 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2492 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2493 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2494 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2495 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2496 l--;
d6a28714
JH
2497 if (pref0_len < 0)
2498 pref0_len = 0;
2499 if (pref0_len > pref_len)
2500 pref0_len = pref_len;
2501 regprop(prop, scan);
2a782b5b 2502 {
1df70142 2503 const char * const s0 =
f14c76ed 2504 do_utf8 && OP(scan) != CANY ?
95b24440 2505 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2506 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2507 locinput - pref_len;
1df70142
AL
2508 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2509 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2510 pv_uni_display(PERL_DEBUG_PAD(1),
2511 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2512 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2513 locinput - pref_len + pref0_len;
1df70142
AL
2514 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2515 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2516 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2517 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2518 locinput;
1df70142 2519 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2520 PerlIO_printf(Perl_debug_log,
2521 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2522 (IV)(locinput - PL_bostr),
2523 PL_colors[4],
2524 len0, s0,
2525 PL_colors[5],
2526 PL_colors[2],
2527 len1, s1,
2528 PL_colors[3],
2529 (docolor ? "" : "> <"),
2530 PL_colors[0],
2531 len2, s2,
2532 PL_colors[1],
2533 15 - l - pref_len + 1,
2534 "",
4f639d21 2535 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2536 SvPVX_const(prop));
2a782b5b
JH
2537 }
2538 });
d6a28714
JH
2539
2540 next = scan + NEXT_OFF(scan);
2541 if (next == scan)
2542 next = NULL;
2543
2544 switch (OP(scan)) {
2545 case BOL:
7fba1cd6 2546 if (locinput == PL_bostr)
d6a28714
JH
2547 {
2548 /* regtill = regbol; */
b8c5462f
JH
2549 break;
2550 }
d6a28714
JH
2551 sayNO;
2552 case MBOL:
12d33761
HS
2553 if (locinput == PL_bostr ||
2554 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2555 {
b8c5462f
JH
2556 break;
2557 }
d6a28714
JH
2558 sayNO;
2559 case SBOL:
c2a73568 2560 if (locinput == PL_bostr)
b8c5462f 2561 break;
d6a28714
JH
2562 sayNO;
2563 case GPOS:
2564 if (locinput == PL_reg_ganch)
2565 break;
2566 sayNO;
2567 case EOL:
d6a28714
JH
2568 goto seol;
2569 case MEOL:
d6a28714 2570 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2571 sayNO;
b8c5462f 2572 break;
d6a28714
JH
2573 case SEOL:
2574 seol:
2575 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2576 sayNO;
d6a28714 2577 if (PL_regeol - locinput > 1)
b8c5462f 2578 sayNO;
b8c5462f 2579 break;
d6a28714
JH
2580 case EOS:
2581 if (PL_regeol != locinput)
b8c5462f 2582 sayNO;
d6a28714 2583 break;
ffc61ed2 2584 case SANY:
d6a28714 2585 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2586 sayNO;
f33976b4
DB
2587 if (do_utf8) {
2588 locinput += PL_utf8skip[nextchr];
2589 if (locinput > PL_regeol)
2590 sayNO;
2591 nextchr = UCHARAT(locinput);
2592 }
2593 else
2594 nextchr = UCHARAT(++locinput);
2595 break;
2596 case CANY:
2597 if (!nextchr && locinput >= PL_regeol)
2598 sayNO;
b8c5462f 2599 nextchr = UCHARAT(++locinput);
a0d0e21e 2600 break;
ffc61ed2 2601 case REG_ANY:
1aa99e6b
IH
2602 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2603 sayNO;
2604 if (do_utf8) {
b8c5462f 2605 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2606 if (locinput > PL_regeol)
2607 sayNO;
a0ed51b3 2608 nextchr = UCHARAT(locinput);
a0ed51b3 2609 }
1aa99e6b
IH
2610 else
2611 nextchr = UCHARAT(++locinput);
a0ed51b3 2612 break;
a3621e74
YO
2613
2614
2615
2616 /*
2617 traverse the TRIE keeping track of all accepting states
2618 we transition through until we get to a failing node.
2619
a3621e74
YO
2620
2621 */
5b47454d 2622 case TRIE:
a3621e74
YO
2623 case TRIEF:
2624 case TRIEFL:
2625 {
a3621e74
YO
2626 U8 *uc = ( U8* )locinput;
2627 U32 state = 1;
2628 U16 charid = 0;
2629 U32 base = 0;
2630 UV uvc = 0;
2631 STRLEN len = 0;
2632 STRLEN foldlen = 0;
a3621e74
YO
2633 U8 *uscan = (U8*)NULL;
2634 STRLEN bufflen=0;
95b24440 2635 SV *sv_accept_buff = NULL;
5b47454d
DM
2636 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2637 trie_type = do_utf8 ?
2638 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2639 : trie_plain;
2640
7087a21c
NC
2641 /* what trie are we using right now */
2642 reg_trie_data *trie
4f639d21 2643 = (reg_trie_data*)PL_reg_re->data->data[ ARG( scan ) ];
d8319b27 2644 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2645 result = 0;
a3621e74
YO
2646
2647 while ( state && uc <= (U8*)PL_regeol ) {
2648
5b47454d 2649 if (trie->states[ state ].wordnum) {
d8319b27 2650 if (!st->u.trie.accepted ) {
5b47454d
DM
2651 ENTER;
2652 SAVETMPS;
2653 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2654 sv_accept_buff=newSV(bufflen *
2655 sizeof(reg_trie_accepted) - 1);
2656 SvCUR_set(sv_accept_buff,
2657 sizeof(reg_trie_accepted));
2658 SvPOK_on(sv_accept_buff);
2659 sv_2mortal(sv_accept_buff);
d8319b27 2660 st->u.trie.accept_buff =
5b47454d
DM
2661 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2662 }
2663 else {
d8319b27 2664 if (st->u.trie.accepted >= bufflen) {
5b47454d 2665 bufflen *= 2;
d8319b27 2666 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2667 SvGROW(sv_accept_buff,
2668 bufflen * sizeof(reg_trie_accepted));
2669 }
2670 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2671 + sizeof(reg_trie_accepted));
2672 }
d8319b27
DM
2673 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2674 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2675 ++st->u.trie.accepted;
5b47454d 2676 }
a3621e74
YO
2677
2678 base = trie->states[ state ].trans.base;
2679
2680 DEBUG_TRIE_EXECUTE_r(
2681 PerlIO_printf( Perl_debug_log,
e4584336 2682 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2683 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2684 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2685 );
2686
2687 if ( base ) {
5b47454d
DM
2688 switch (trie_type) {
2689 case trie_uft8_fold:
a3621e74
YO
2690 if ( foldlen>0 ) {
2691 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2692 foldlen -= len;
2693 uscan += len;
2694 len=0;
2695 } else {
1df70142 2696 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2697 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2698 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2699 foldlen -= UNISKIP( uvc );
2700 uscan = foldbuf + UNISKIP( uvc );
2701 }
5b47454d
DM
2702 break;
2703 case trie_utf8:
2704 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2705 &len, uniflags );
2706 break;
2707 case trie_plain:
e4584336 2708 uvc = (UV)*uc;
a3621e74
YO
2709 len = 1;
2710 }
2711
5b47454d
DM
2712 if (uvc < 256) {
2713 charid = trie->charmap[ uvc ];
2714 }
2715 else {
2716 charid = 0;
2717 if (trie->widecharmap) {
2718 SV** svpp = (SV**)NULL;
2719 svpp = hv_fetch(trie->widecharmap,
2720 (char*)&uvc, sizeof(UV), 0);
2721 if (svpp)
2722 charid = (U16)SvIV(*svpp);
2723 }
2724 }
a3621e74 2725
5b47454d
DM
2726 if (charid &&
2727 (base + charid > trie->uniquecharcount )
2728 && (base + charid - 1 - trie->uniquecharcount
2729 < trie->lasttrans)
2730 && trie->trans[base + charid - 1 -
2731 trie->uniquecharcount].check == state)
2732 {
2733 state = trie->trans[base + charid - 1 -
2734 trie->uniquecharcount ].next;
2735 }
2736 else {
2737 state = 0;
2738 }
2739 uc += len;
2740
2741 }
2742 else {
a3621e74
YO
2743 state = 0;
2744 }
2745 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2746 PerlIO_printf( Perl_debug_log,
2747 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2748 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2749 );
2750 }
d8319b27 2751 if (!st->u.trie.accepted )
a3621e74 2752 sayNO;
a3621e74
YO
2753
2754 /*
2755 There was at least one accepting state that we
2756 transitioned through. Presumably the number of accepting
2757 states is going to be low, typically one or two. So we
2758 simply scan through to find the one with lowest wordnum.
2759 Once we find it, we swap the last state into its place
2760 and decrement the size. We then try to match the rest of
2761 the pattern at the point where the word ends, if we
2762 succeed then we end the loop, otherwise the loop
2763 eventually terminates once all of the accepting states
2764 have been tried.
2765 */
a3621e74 2766
d8319b27 2767 if ( st->u.trie.accepted == 1 ) {
a3621e74 2768 DEBUG_EXECUTE_r({
097eb12c 2769 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2770 PerlIO_printf( Perl_debug_log,
2771 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2772 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2773 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2774 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2775 PL_colors[5] );
2776 });
d8319b27 2777 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2778 /* in this case we free tmps/leave before we call regmatch
2779 as we wont be using accept_buff again. */
2780 FREETMPS;
2781 LEAVE;
95b24440
DM
2782 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2783 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2784 } else {
2785 DEBUG_EXECUTE_r(
e4584336 2786 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2787 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2788 PL_colors[5] );
2789 );
d8319b27 2790 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2791 U32 best = 0;
2792 U32 cur;
d8319b27 2793 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2794 DEBUG_TRIE_EXECUTE_r(
2795 PerlIO_printf( Perl_debug_log,
2796 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2797 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2798 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2799 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2800 );
a3621e74 2801
d8319b27
DM
2802 if (st->u.trie.accept_buff[cur].wordnum <
2803 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2804 best = cur;
a3621e74
YO
2805 }
2806 DEBUG_EXECUTE_r({
87830502
DM
2807 reg_trie_data * const trie = (reg_trie_data*)
2808 PL_reg_re->data->data[ARG(scan)];
d8319b27 2809 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2810 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2811 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2812 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2813 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2814 PL_colors[5] );
2815 });
d8319b27
DM
2816 if ( best<st->u.trie.accepted ) {
2817 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2818 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2819 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2820 best = st->u.trie.accepted;
a3621e74 2821 }
d8319b27 2822 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2823
2824 /*
2825 as far as I can tell we only need the SAVETMPS/FREETMPS
2826 for re's with EVAL in them but I'm leaving them in for
2827 all until I can be sure.
2828 */
2829 SAVETMPS;
95b24440
DM
2830 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2831 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2832 FREETMPS;
2833 }
2834 FREETMPS;
2835 LEAVE;
2836 }
2837
95b24440 2838 if (result) {
a3621e74
YO
2839 sayYES;
2840 } else {
2841 sayNO;
2842 }
2843 }
2844 /* unreached codepoint */
95b24440
DM
2845 case EXACT: {
2846 char *s = STRING(scan);
5d9a96ca 2847 st->ln = STR_LEN(scan);
eb160463 2848 if (do_utf8 != UTF) {
bc517b45 2849 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2850 char *l = locinput;
5d9a96ca 2851 const char *e = s + st->ln;
a72c7584 2852
5ff6fc6d
JH
2853 if (do_utf8) {
2854 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2855 while (s < e) {
a3b680e6 2856 STRLEN ulen;
1aa99e6b 2857 if (l >= PL_regeol)
5ff6fc6d
JH
2858 sayNO;
2859 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2860 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2861 uniflags))
5ff6fc6d 2862 sayNO;
bc517b45 2863 l += ulen;
5ff6fc6d 2864 s ++;
1aa99e6b 2865 }
5ff6fc6d
JH
2866 }
2867 else {
2868 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2869 while (s < e) {
a3b680e6 2870 STRLEN ulen;
1aa99e6b
IH
2871 if (l >= PL_regeol)
2872 sayNO;
5ff6fc6d 2873 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2874 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2875 uniflags))
1aa99e6b 2876 sayNO;
bc517b45 2877 s += ulen;
a72c7584 2878 l ++;
1aa99e6b 2879 }
5ff6fc6d 2880 }
1aa99e6b
IH
2881 locinput = l;
2882 nextchr = UCHARAT(locinput);
2883 break;
2884 }
bc517b45 2885 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2886 /* Inline the first character, for speed. */
2887 if (UCHARAT(s) != nextchr)
2888 sayNO;
5d9a96ca 2889 if (PL_regeol - locinput < st->ln)
d6a28714 2890 sayNO;
5d9a96ca 2891 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2892 sayNO;
5d9a96ca 2893 locinput += st->ln;
d6a28714
JH
2894 nextchr = UCHARAT(locinput);
2895 break;
95b24440 2896 }
d6a28714 2897 case EXACTFL:
b8c5462f
JH
2898 PL_reg_flags |= RF_tainted;
2899 /* FALL THROUGH */
95b24440
DM
2900 case EXACTF: {
2901 char *s = STRING(scan);
5d9a96ca 2902 st->ln = STR_LEN(scan);
d6a28714 2903
d07ddd77
JH
2904 if (do_utf8 || UTF) {
2905 /* Either target or the pattern are utf8. */
d6a28714 2906 char *l = locinput;
d07ddd77 2907 char *e = PL_regeol;
bc517b45 2908
5d9a96ca 2909 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2910 l, &e, 0, do_utf8)) {
5486206c
JH
2911 /* One more case for the sharp s:
2912 * pack("U0U*", 0xDF) =~ /ss/i,
2913 * the 0xC3 0x9F are the UTF-8
2914 * byte sequence for the U+00DF. */
2915 if (!(do_utf8 &&
2916 toLOWER(s[0]) == 's' &&
5d9a96ca 2917 st->ln >= 2 &&
5486206c
JH
2918 toLOWER(s[1]) == 's' &&
2919 (U8)l[0] == 0xC3 &&
2920 e - l >= 2 &&
2921 (U8)l[1] == 0x9F))
2922 sayNO;
2923 }
d07ddd77
JH
2924 locinput = e;
2925 nextchr = UCHARAT(locinput);
2926 break;
a0ed51b3 2927 }
d6a28714 2928
bc517b45
JH
2929 /* Neither the target and the pattern are utf8. */
2930
d6a28714
JH
2931 /* Inline the first character, for speed. */
2932 if (UCHARAT(s) != nextchr &&
2933 UCHARAT(s) != ((OP(scan) == EXACTF)
2934 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2935 sayNO;
5d9a96ca 2936 if (PL_regeol - locinput < st->ln)
b8c5462f 2937 sayNO;
5d9a96ca
DM
2938 if (st->ln > 1 && (OP(scan) == EXACTF
2939 ? ibcmp(s, locinput, st->ln)
2940 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2941 sayNO;
5d9a96ca 2942 locinput += st->ln;
d6a28714 2943 nextchr = UCHARAT(locinput);
a0d0e21e 2944 break;
95b24440 2945 }
d6a28714 2946 case ANYOF:
ffc61ed2 2947 if (do_utf8) {
9e55ce06
JH
2948 STRLEN inclasslen = PL_regeol - locinput;
2949
ba7b4546 2950 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2951 sayNO_ANYOF;
ffc61ed2
JH
2952 if (locinput >= PL_regeol)
2953 sayNO;
0f0076b4 2954 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2955 nextchr = UCHARAT(locinput);
e0f9d4a8 2956 break;
ffc61ed2
JH
2957 }
2958 else {
2959 if (nextchr < 0)
2960 nextchr = UCHARAT(locinput);
7d3e948e 2961 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2962 sayNO_ANYOF;
ffc61ed2
JH
2963 if (!nextchr && locinput >= PL_regeol)
2964 sayNO;
2965 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2966 break;
2967 }
2968 no_anyof:
2969 /* If we might have the case of the German sharp s
2970 * in a casefolding Unicode character class. */
2971
ebc501f0
JH
2972 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2973 locinput += SHARP_S_SKIP;
e0f9d4a8 2974 nextchr = UCHARAT(locinput);
ffc61ed2 2975 }
e0f9d4a8
JH
2976 else
2977 sayNO;
b8c5462f 2978 break;
d6a28714 2979 case ALNUML:
b8c5462f
JH
2980 PL_reg_flags |= RF_tainted;
2981 /* FALL THROUGH */
d6a28714 2982 case ALNUM:
b8c5462f 2983 if (!nextchr)
4633a7c4 2984 sayNO;
ffc61ed2 2985 if (do_utf8) {
1a4fad37 2986 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2987 if (!(OP(scan) == ALNUM
3568d838 2988 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2989 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2990 {
2991 sayNO;
a0ed51b3 2992 }
b8c5462f 2993 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2994 nextchr = UCHARAT(locinput);
2995 break;
2996 }
ffc61ed2 2997 if (!(OP(scan) == ALNUM
d6a28714 2998 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2999 sayNO;
b8c5462f 3000 nextchr = UCHARAT(++locinput);
a0d0e21e 3001 break;
d6a28714 3002 case NALNUML:
b8c5462f
JH
3003 PL_reg_flags |= RF_tainted;
3004 /* FALL THROUGH */
d6a28714
JH
3005 case NALNUM:
3006 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3007 sayNO;
ffc61ed2 3008 if (do_utf8) {
1a4fad37 3009 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3010 if (OP(scan) == NALNUM
3568d838 3011 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3012 : isALNUM_LC_utf8((U8*)locinput))
3013 {
b8c5462f 3014 sayNO;
d6a28714 3015 }
b8c5462f
JH
3016 locinput += PL_utf8skip[nextchr];
3017 nextchr = UCHARAT(locinput);
3018 break;
3019 }
ffc61ed2 3020 if (OP(scan) == NALNUM
d6a28714 3021 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3022 sayNO;
76e3520e 3023 nextchr = UCHARAT(++locinput);
a0d0e21e 3024 break;
d6a28714
JH
3025 case BOUNDL:
3026 case NBOUNDL:
3280af22 3027 PL_reg_flags |= RF_tainted;
bbce6d69 3028 /* FALL THROUGH */
d6a28714
JH
3029 case BOUND:
3030 case NBOUND:
3031 /* was last char in word? */
ffc61ed2 3032 if (do_utf8) {
12d33761 3033 if (locinput == PL_bostr)
5d9a96ca 3034 st->ln = '\n';
ffc61ed2 3035 else {
a3b680e6 3036 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3037
4ad0818d 3038 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3039 }
3040 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3041 st->ln = isALNUM_uni(st->ln);
1a4fad37 3042 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3043 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3044 }
3045 else {
5d9a96ca 3046 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3047 n = isALNUM_LC_utf8((U8*)locinput);
3048 }
a0ed51b3 3049 }
d6a28714 3050 else {
5d9a96ca 3051 st->ln = (locinput != PL_bostr) ?
12d33761 3052 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3053 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3054 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3055 n = isALNUM(nextchr);
3056 }
3057 else {
5d9a96ca 3058 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3059 n = isALNUM_LC(nextchr);
3060 }
d6a28714 3061 }
5d9a96ca 3062 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3063 OP(scan) == BOUNDL))
3064 sayNO;
a0ed51b3 3065 break;
d6a28714 3066 case SPACEL:
3280af22 3067 PL_reg_flags |= RF_tainted;
bbce6d69 3068 /* FALL THROUGH */
d6a28714 3069 case SPACE:
9442cb0e 3070 if (!nextchr)
4633a7c4 3071 sayNO;
1aa99e6b 3072 if (do_utf8) {
fd400ab9 3073 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3074 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3075 if (!(OP(scan) == SPACE
3568d838 3076 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3077 : isSPACE_LC_utf8((U8*)locinput)))
3078 {
3079 sayNO;
3080 }
3081 locinput += PL_utf8skip[nextchr];
3082 nextchr = UCHARAT(locinput);
3083 break;
d6a28714 3084 }
ffc61ed2
JH
3085 if (!(OP(scan) == SPACE
3086 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3087 sayNO;
3088 nextchr = UCHARAT(++locinput);
3089 }
3090 else {
3091 if (!(OP(scan) == SPACE
3092 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3093 sayNO;
3094 nextchr = UCHARAT(++locinput);
a0ed51b3 3095 }
a0ed51b3 3096 break;
d6a28714 3097 case NSPACEL:
3280af22 3098 PL_reg_flags |= RF_tainted;
bbce6d69 3099 /* FALL THROUGH */
d6a28714 3100 case NSPACE:
9442cb0e 3101 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3102 sayNO;
1aa99e6b 3103 if (do_utf8) {
1a4fad37 3104 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3105 if (OP(scan) == NSPACE
3568d838 3106 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3107 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3108 {
3109 sayNO;
3110 }
3111 locinput += PL_utf8skip[nextchr];
3112 nextchr = UCHARAT(locinput);
3113 break;
a0ed51b3 3114 }
ffc61ed2 3115 if (OP(scan) == NSPACE
d6a28714 3116 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3117 sayNO;
76e3520e 3118 nextchr = UCHARAT(++locinput);
a0d0e21e 3119 break;
d6a28714 3120 case DIGITL:
a0ed51b3
LW
3121 PL_reg_flags |= RF_tainted;
3122 /* FALL THROUGH */
d6a28714 3123 case DIGIT:
9442cb0e 3124 if (!nextchr)
a0ed51b3 3125 sayNO;
1aa99e6b 3126 if (do_utf8) {
1a4fad37 3127 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3128 if (!(OP(scan) == DIGIT
3568d838 3129 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3130 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3131 {
a0ed51b3 3132 sayNO;
dfe13c55 3133 }
6f06b55f 3134 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3135 nextchr = UCHARAT(locinput);
3136 break;
3137 }
ffc61ed2 3138 if (!(OP(scan) == DIGIT
9442cb0e 3139 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3140 sayNO;
3141 nextchr = UCHARAT(++locinput);
3142 break;
d6a28714 3143 case NDIGITL:
b8c5462f
JH
3144 PL_reg_flags |= RF_tainted;
3145 /* FALL THROUGH */
d6a28714 3146 case NDIGIT:
9442cb0e 3147 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3148 sayNO;
1aa99e6b 3149 if (do_utf8) {
1a4fad37 3150 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3151 if (OP(scan) == NDIGIT
3568d838 3152 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3153 : isDIGIT_LC_utf8((U8*)locinput))
3154 {
a0ed51b3 3155 sayNO;
9442cb0e 3156 }
6f06b55f 3157 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3158 nextchr = UCHARAT(locinput);
3159 break;
3160 }
ffc61ed2 3161 if (OP(scan) == NDIGIT
9442cb0e 3162 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3163 sayNO;
3164 nextchr = UCHARAT(++locinput);
3165 break;
3166 case CLUMP:
b7c83a7e 3167 if (locinput >= PL_regeol)
a0ed51b3 3168 sayNO;
b7c83a7e 3169 if (do_utf8) {
1a4fad37 3170 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3171 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3172 sayNO;
3173 locinput += PL_utf8skip[nextchr];
3174 while (locinput < PL_regeol &&
3175 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3176 locinput += UTF8SKIP(locinput);
3177 if (locinput > PL_regeol)
3178 sayNO;
eb08e2da
JH
3179 }
3180 else
3181 locinput++;
a0ed51b3
LW
3182 nextchr = UCHARAT(locinput);
3183 break;
c8756f30 3184 case REFFL:
3280af22 3185 PL_reg_flags |= RF_tainted;
c8756f30 3186 /* FALL THROUGH */
c277df42 3187 case REF:
95b24440
DM
3188 case REFF: {
3189 char *s;
c277df42 3190 n = ARG(scan); /* which paren pair */
5d9a96ca 3191 st->ln = PL_regstartp[n];
2c2d71f5 3192 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3193 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3194 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3195 if (st->ln == PL_regendp[n])
a0d0e21e 3196 break;
a0ed51b3 3197
5d9a96ca 3198 s = PL_bostr + st->ln;
1aa99e6b 3199 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3200 char *l = locinput;
a3b680e6 3201 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3202 /*
3203 * Note that we can't do the "other character" lookup trick as
3204 * in the 8-bit case (no pun intended) because in Unicode we
3205 * have to map both upper and title case to lower case.
3206 */
3207 if (OP(scan) == REFF) {
3208 while (s < e) {
a3b680e6
AL
3209 STRLEN ulen1, ulen2;
3210 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3211 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3212
a0ed51b3
LW
3213 if (l >= PL_regeol)
3214 sayNO;
a2a2844f
JH
3215 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3216 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3217 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3218 sayNO;
a2a2844f
JH
3219 s += ulen1;
3220 l += ulen2;
a0ed51b3
LW
3221 }
3222 }
3223 locinput = l;
3224 nextchr = UCHARAT(locinput);
3225 break;
3226 }
3227
a0d0e21e 3228 /* Inline the first character, for speed. */
76e3520e 3229 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3230 (OP(scan) == REF ||
3231 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3232 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3233 sayNO;
5d9a96ca
DM
3234 st->ln = PL_regendp[n] - st->ln;
3235 if (locinput + st->ln > PL_regeol)
4633a7c4 3236 sayNO;
5d9a96ca
DM
3237 if (st->ln > 1 && (OP(scan) == REF
3238 ? memNE(s, locinput, st->ln)
c8756f30 3239 : (OP(scan) == REFF
5d9a96ca
DM
3240 ? ibcmp(s, locinput, st->ln)
3241 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3242 sayNO;
5d9a96ca 3243 locinput += st->ln;
76e3520e 3244 nextchr = UCHARAT(locinput);
a0d0e21e 3245 break;
95b24440 3246 }
a0d0e21e
LW
3247
3248 case NOTHING:
c277df42 3249 case TAIL:
a0d0e21e
LW
3250 break;
3251 case BACK:
3252 break;
c277df42
IZ
3253 case EVAL:
3254 {
c277df42 3255 SV *ret;
8e5e9ebe 3256 {
4aabdb9b
DM
3257 /* execute the code in the {...} */
3258 dSP;
6136c704 3259 SV ** const before = SP;
4aabdb9b
DM
3260 OP_4tree * const oop = PL_op;
3261 COP * const ocurcop = PL_curcop;
3262 PAD *old_comppad;
3263 struct regexp * const oreg = PL_reg_re;
3264
3265 n = ARG(scan);
4f639d21 3266 PL_op = (OP_4tree*)PL_reg_re->data->data[n];
4aabdb9b 3267 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
4f639d21 3268 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_reg_re->data->data[n + 2]);
4aabdb9b
DM
3269 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3270
8e5e9ebe
RGS
3271 CALLRUNOPS(aTHX); /* Scalar context. */
3272 SPAGAIN;
3273 if (SP == before)
075aa684 3274 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3275 else {
3276 ret = POPs;
3277 PUTBACK;
3278 }
4aabdb9b
DM
3279
3280 PL_op = oop;
3281 PAD_RESTORE_LOCAL(old_comppad);
3282 PL_curcop = ocurcop;
87830502 3283 PL_reg_re = oreg;
4aabdb9b
DM
3284 if (!st->logical) {
3285 /* /(?{...})/ */
3286 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3287 break;
3288 }
8e5e9ebe 3289 }
4aabdb9b
DM
3290 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3291 regexp *re;
3292 re_cc_state state;
3293 int toggleutf;
8e5e9ebe 3294
4aabdb9b 3295 {
4f639d21
DM
3296 /* extract RE object from returned value; compiling if
3297 * necessary */
3298
6136c704 3299 MAGIC *mg = NULL;
4aabdb9b 3300 SV *sv;
faf82a0b
AE
3301 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3302 mg = mg_find(sv, PERL_MAGIC_qr);
3303 else if (SvSMAGICAL(ret)) {
3304 if (SvGMAGICAL(ret))
3305 sv_unmagic(ret, PERL_MAGIC_qr);
3306 else
3307 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3308 }
faf82a0b 3309
0f5d15d6
IZ
3310 if (mg) {
3311 re = (regexp *)mg->mg_obj;
df0003d4 3312 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3313 }
3314 else {
3315 STRLEN len;
6136c704 3316 const char * const t = SvPV_const(ret, len);
0f5d15d6 3317 PMOP pm;
a3b680e6 3318 const I32 osize = PL_regsize;
0f5d15d6 3319
5fcd1c1b 3320 Zero(&pm, 1, PMOP);
4aabdb9b 3321 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3322 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3323 if (!(SvFLAGS(ret)
faf82a0b
AE
3324 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3325 | SVs_GMG)))
14befaf4
DM
3326 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3327 PERL_MAGIC_qr,0,0);
0f5d15d6 3328 PL_regsize = osize;
0f5d15d6 3329 }
4aabdb9b
DM
3330 }
3331 DEBUG_EXECUTE_r(
3332 PerlIO_printf(Perl_debug_log,
3333 "Entering embedded \"%s%.60s%s%s\"\n",
3334 PL_colors[0],
3335 re->precomp,
3336 PL_colors[1],
3337 (strlen(re->precomp) > 60 ? "..." : ""))
3338 );
3339 state.node = next;
3340 state.prev = PL_reg_call_cc;
3341 state.cc = st->cc;
3342 state.re = PL_reg_re;
2c2d71f5 3343
4aabdb9b
DM
3344 st->cc = 0;
3345
3346 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3347 REGCP_SET(st->u.eval.lastcp);
4f639d21 3348 PL_reg_re = re;
4aabdb9b
DM
3349 state.ss = PL_savestack_ix;
3350 *PL_reglastparen = 0;
3351 *PL_reglastcloseparen = 0;
3352 PL_reg_call_cc = &state;
3353 PL_reginput = locinput;
3354 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3355 ((re->reganch & ROPT_UTF8) != 0);
3356 if (toggleutf) PL_reg_flags ^= RF_utf8;
3357
3358 /* XXXX This is too dramatic a measure... */
3359 PL_reg_maxiter = 0;
3360
3361 /* XXX the only recursion left in regmatch() */
4f639d21 3362 if (regmatch(re, re->program + 1)) {
4aabdb9b
DM
3363 /* Even though we succeeded, we need to restore
3364 global variables, since we may be wrapped inside
3365 SUSPEND, thus the match may be not finished yet. */
3366
3367 /* XXXX Do this only if SUSPENDed? */
0f5d15d6 3368 PL_reg_call_cc = state.prev;
5d9a96ca 3369 st->cc = state.cc;
0f5d15d6 3370 PL_reg_re = state.re;
4f639d21 3371
cb50f42d 3372 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3373
3374 /* XXXX This is too dramatic a measure... */
3375 PL_reg_maxiter = 0;
3376
4aabdb9b
DM
3377 /* These are needed even if not SUSPEND. */
3378 ReREFCNT_dec(re);
3379 regcpblow(st->u.eval.cp);
3380 sayYES;
0f5d15d6 3381 }
4aabdb9b
DM
3382 ReREFCNT_dec(re);
3383 REGCP_UNWIND(st->u.eval.lastcp);
4f639d21 3384 regcppop(rex);
4aabdb9b
DM
3385 PL_reg_call_cc = state.prev;
3386 st->cc = state.cc;
3387 PL_reg_re = state.re;
4aabdb9b
DM
3388 if (toggleutf) PL_reg_flags ^= RF_utf8;
3389
3390 /* XXXX This is too dramatic a measure... */
3391 PL_reg_maxiter = 0;
3392
5d9a96ca 3393 st->logical = 0;
4aabdb9b
DM
3394 sayNO;
3395 /* NOTREACHED */
a0ed51b3 3396 }
4aabdb9b
DM
3397 /* /(?(?{...})X|Y)/ */
3398 st->sw = SvTRUE(ret);
3399 st->logical = 0;
c277df42
IZ
3400 break;
3401 }
a0d0e21e 3402 case OPEN:
c277df42 3403 n = ARG(scan); /* which paren pair */
3280af22
NIS
3404 PL_reg_start_tmp[n] = locinput;
3405 if (n > PL_regsize)
3406 PL_regsize = n;
a0d0e21e
LW
3407 break;
3408 case CLOSE:
c277df42 3409 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3410 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3411 PL_regendp[n] = locinput - PL_bostr;
eb160463 3412 if (n > (I32)*PL_reglastparen)
3280af22 3413 *PL_reglastparen = n;
a01268b5 3414 *PL_reglastcloseparen = n;
a0d0e21e 3415 break;
c277df42
IZ
3416 case GROUPP:
3417 n = ARG(scan); /* which paren pair */
5d9a96ca 3418 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3419 break;
3420 case IFTHEN:
2c2d71f5 3421 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3422 if (st->sw)
c277df42
IZ
3423 next = NEXTOPER(NEXTOPER(scan));
3424 else {
3425 next = scan + ARG(scan);
3426 if (OP(next) == IFTHEN) /* Fake one. */
3427 next = NEXTOPER(NEXTOPER(next));
3428 }
3429 break;
3430 case LOGICAL:
5d9a96ca 3431 st->logical = scan->flags;
c277df42 3432 break;
2ab05381 3433/*******************************************************************
a0374537
DM
3434 cc points to the regmatch_state associated with the most recent CURLYX.
3435 This struct contains info about the innermost (...)* loop (an
3436 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3437
3438 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3439
95b24440 3440 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3441
a0374537 3442 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3443 with the starting point at WHILEM node;
2ab05381
IZ
3444
3445 3) Each hit of WHILEM node tries to match A and Z (in the order
3446 depending on the current iteration, min/max of {min,max} and
3447 greediness). The information about where are nodes for "A"
a0374537 3448 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3449 was already matched, and greediness.
3450
3451 4) After A matches, the same WHILEM node is hit again.
3452
95b24440 3453 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3454 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3455 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3456 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3457 of the external loop.
3458
a0374537 3459 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3460 and whatever it mentions via ->next, and additional attached trees
3461 corresponding to temporarily unset infoblocks as in "5" above.
3462
95b24440 3463 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3464 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3465 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3466 infoblocks are drawn below the "reset" infoblock.
3467
3468 In fact in the picture below we do not show failed matches for Z and T
3469 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3470 more obvious *why* one needs to *temporary* unset infoblocks.]
3471
3472 Matched REx position InfoBlocks Comment
3473 (Y(A)*?Z)*?T x
3474 Y(A)*?Z)*?T x <- O
3475 Y (A)*?Z)*?T x <- O
3476 Y A)*?Z)*?T x <- O <- I
3477 YA )*?Z)*?T x <- O <- I
3478 YA A)*?Z)*?T x <- O <- I
3479 YAA )*?Z)*?T x <- O <- I
3480 YAA Z)*?T x <- O # Temporary unset I
3481 I
3482
3483 YAAZ Y(A)*?Z)*?T x <- O
3484 I
3485
3486 YAAZY (A)*?Z)*?T x <- O
3487 I
3488
3489 YAAZY A)*?Z)*?T x <- O <- I
3490 I
3491
3492 YAAZYA )*?Z)*?T x <- O <- I
3493 I
3494
3495 YAAZYA Z)*?T x <- O # Temporary unset I
3496 I,I
3497
3498 YAAZYAZ )*?T x <- O
3499 I,I
3500
3501 YAAZYAZ T x # Temporary unset O
3502 O
3503 I,I
3504
3505 YAAZYAZT x
3506 O
3507 I,I
3508 *******************************************************************/
95b24440 3509
a0d0e21e 3510 case CURLYX: {
cb434fcc
IZ
3511 /* No need to save/restore up to this paren */
3512 I32 parenfloor = scan->flags;
c277df42
IZ
3513
3514 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3515 next += ARG(next);
cb434fcc
IZ
3516 /* XXXX Probably it is better to teach regpush to support
3517 parenfloor > PL_regsize... */
eb160463 3518 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3519 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3520
d8319b27
DM
3521 st->u.curlyx.cp = PL_savestack_ix;
3522 st->u.curlyx.outercc = st->cc;
a0374537
DM
3523 st->cc = st;
3524 /* these fields contain the state of the current curly.
3525 * they are accessed by subsequent WHILEMs;
3526 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3527 st->u.curlyx.parenfloor = parenfloor;
3528 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3529 st->u.curlyx.min = ARG1(scan);
3530 st->u.curlyx.max = ARG2(scan);
3531 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3532 st->u.curlyx.lastloc = 0;
a0374537
DM
3533 /* st->next and st->minmod are also read by WHILEM */
3534
3280af22 3535 PL_reginput = locinput;
95b24440
DM
3536 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3537 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3538 regcpblow(st->u.curlyx.cp);
3539 st->cc = st->u.curlyx.outercc;
95b24440 3540 saySAME(result);
a0d0e21e 3541 }
5f66b61c 3542 /* NOTREACHED */
a0d0e21e
LW
3543 case WHILEM: {
3544 /*
3545 * This is really hard to understand, because after we match
3546 * what we're trying to match, we must make sure the rest of
2c2d71f5 3547 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3548 * to go back UP the parse tree by recursing ever deeper. And
3549 * if it fails, we have to reset our parent's current state
3550 * that we can try again after backing off.
3551 */
3552
d8319b27
DM
3553 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3554 st->u.whilem.cache_offset = 0;
3555 st->u.whilem.cache_bit = 0;
c277df42 3556
d8319b27 3557 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3558 PL_reginput = locinput;
a0d0e21e 3559
a3621e74 3560 DEBUG_EXECUTE_r(
9041c2e3 3561 PerlIO_printf(Perl_debug_log,
91f3b821 3562 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3563 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3564 (long)n, (long)st->cc->u.curlyx.min,
3565 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3566 );
4633a7c4 3567
a0d0e21e
LW
3568 /* If degenerate scan matches "", assume scan done. */
3569
d8319b27
DM
3570 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3571 st->u.whilem.savecc = st->cc;
3572 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3573 if (st->cc)
d8319b27 3574 st->ln = st->cc->u.curlyx.cur;
a3621e74 3575 DEBUG_EXECUTE_r(
c3464db5
DD
3576 PerlIO_printf(Perl_debug_log,
3577 "%*s empty match detected, try continuation...\n",
3280af22 3578 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3579 );
d8319b27 3580 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3581 /*** all unsaved local vars undefined at this point */
d8319b27 3582 st->cc = st->u.whilem.savecc;
95b24440 3583 if (result)
4633a7c4 3584 sayYES;
d8319b27
DM
3585 if (st->cc->u.curlyx.outercc)
3586 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3587 sayNO;
a0d0e21e
LW
3588 }
3589
3590 /* First just match a string of min scans. */
3591
d8319b27
DM
3592 if (n < st->cc->u.curlyx.min) {
3593 st->cc->u.curlyx.cur = n;
3594 st->cc->u.curlyx.lastloc = locinput;
3595 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3596 /*** all unsaved local vars undefined at this point */
3597 if (result)
4633a7c4 3598 sayYES;
d8319b27
DM
3599 st->cc->u.curlyx.cur = n - 1;
3600 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3601 sayNO;
a0d0e21e
LW
3602 }
3603
2c2d71f5
JH
3604 if (scan->flags) {
3605 /* Check whether we already were at this position.
3606 Postpone detection until we know the match is not
3607 *that* much linear. */
3608 if (!PL_reg_maxiter) {
3609 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3610 PL_reg_leftiter = PL_reg_maxiter;
3611 }
3612 if (PL_reg_leftiter-- == 0) {
a3b680e6 3613 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3614 if (PL_reg_poscache) {
eb160463 3615 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3616 Renew(PL_reg_poscache, size, char);
3617 PL_reg_poscache_size = size;
3618 }
3619 Zero(PL_reg_poscache, size, char);
3620 }
3621 else {
3622 PL_reg_poscache_size = size;
a02a5408 3623 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3624 }
a3621e74 3625 DEBUG_EXECUTE_r(
2c2d71f5
JH
3626 PerlIO_printf(Perl_debug_log,
3627 "%sDetected a super-linear match, switching on caching%s...\n",
3628 PL_colors[4], PL_colors[5])
3629 );
3630 }
3631 if (PL_reg_leftiter < 0) {
d8319b27 3632 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3633
d8319b27
DM
3634 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3635 + st->u.whilem.cache_offset * (scan->flags>>4);
3636 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3637 st->u.whilem.cache_offset /= 8;
3638 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3639 DEBUG_EXECUTE_r(
2c2d71f5
JH
3640 PerlIO_printf(Perl_debug_log,
3641 "%*s already tried at this position...\n",
3642 REPORT_CODE_OFF+PL_regindent*2, "")
3643 );
3ab3c9b4
HS
3644 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3645 /* cache records success */
c2b0868c
HS
3646 sayYES;
3647 else
3ab3c9b4 3648 /* cache records failure */
c2b0868c 3649 sayNO_SILENT;
2c2d71f5 3650 }
d8319b27 3651 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
2c2d71f5
JH
3652 }
3653 }
3654
a0d0e21e
LW
3655 /* Prefer next over scan for minimal matching. */
3656
5d9a96ca 3657 if (st->cc->minmod) {
d8319b27
DM
3658 st->u.whilem.savecc = st->cc;
3659 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3660 if (st->cc)
d8319b27
DM
3661 st->ln = st->cc->u.curlyx.cur;
3662 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3663 REGCP_SET(st->u.whilem.lastcp);
3664 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3665 /*** all unsaved local vars undefined at this point */
d8319b27 3666 st->cc = st->u.whilem.savecc;
95b24440 3667 if (result) {
d8319b27 3668 regcpblow(st->u.whilem.cp);
3ab3c9b4 3669 CACHEsayYES; /* All done. */
5f05dabc 3670 }
d8319b27 3671 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3672 regcppop(rex);
d8319b27
DM
3673 if (st->cc->u.curlyx.outercc)
3674 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3675
d8319b27 3676 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3677 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3678 && !(PL_reg_flags & RF_warned)) {
3679 PL_reg_flags |= RF_warned;
9014280d 3680 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3681 "Complex regular subexpression recursion",
3682 REG_INFTY - 1);
c277df42 3683 }
3ab3c9b4 3684 CACHEsayNO;
c277df42 3685 }
a687059c 3686
a3621e74 3687 DEBUG_EXECUTE_r(
c3464db5
DD
3688 PerlIO_printf(Perl_debug_log,
3689 "%*s trying longer...\n",
3280af22 3690 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3691 );
a0d0e21e 3692 /* Try scanning more and see if it helps. */
3280af22 3693 PL_reginput = locinput;
d8319b27
DM
3694 st->cc->u.curlyx.cur = n;
3695 st->cc->u.curlyx.lastloc = locinput;
3696 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3697 REGCP_SET(st->u.whilem.lastcp);
3698 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3699 /*** all unsaved local vars undefined at this point */
3700 if (result) {
d8319b27 3701 regcpblow(st->u.whilem.cp);
3ab3c9b4 3702 CACHEsayYES;
5f05dabc 3703 }
d8319b27 3704 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3705 regcppop(rex);
d8319b27
DM
3706 st->cc->u.curlyx.cur = n - 1;
3707 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3708 CACHEsayNO;
a0d0e21e
LW
3709 }
3710
3711 /* Prefer scan over next for maximal matching. */
3712
d8319b27
DM
3713 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3714 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3715 st->cc->u.curlyx.cur = n;
3716 st->cc->u.curlyx.lastloc = locinput;
3717 REGCP_SET(st->u.whilem.lastcp);
3718 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3719 /*** all unsaved local vars undefined at this point */
3720 if (result) {
d8319b27 3721 regcpblow(st->u.whilem.cp);
3ab3c9b4 3722 CACHEsayYES;
5f05dabc 3723 }
d8319b27 3724 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3725 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3726 PL_reginput = locinput;
a3621e74 3727 DEBUG_EXECUTE_r(
c3464db5
DD
3728 PerlIO_printf(Perl_debug_log,
3729 "%*s failed, try continuation...\n",
3280af22 3730 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3731 );
3732 }
9041c2e3 3733 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3734 && !(PL_reg_flags & RF_warned)) {
3280af22 3735 PL_reg_flags |= RF_warned;
9014280d 3736 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3737 "Complex regular subexpression recursion",
3738 REG_INFTY - 1);
a0d0e21e
LW
3739 }
3740
3741 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3742 st->u.whilem.savecc = st->cc;
3743 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3744 if (st->cc)
d8319b27
DM
3745 st->ln = st->cc->u.curlyx.cur;
3746 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3747 /*** all unsaved local vars undefined at this point */
d8319b27 3748 st->cc = st->u.whilem.savecc;
95b24440 3749 if (result)
3ab3c9b4 3750 CACHEsayYES;
d8319b27
DM
3751 if (st->cc->u.curlyx.outercc)
3752 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3753 st->cc->u.curlyx.cur = n - 1;
3754 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3755 CACHEsayNO;
a0d0e21e 3756 }
5f66b61c 3757 /* NOTREACHED */
9041c2e3 3758 case BRANCHJ:
c277df42
IZ
3759 next = scan + ARG(scan);
3760 if (next == scan)
3761 next = NULL;
3762 inner = NEXTOPER(NEXTOPER(scan));
3763 goto do_branch;
9041c2e3 3764 case BRANCH:
c277df42
IZ
3765 inner = NEXTOPER(scan);
3766 do_branch:
3767 {
e822a8b4
DM
3768 I32 type;
3769 type = OP(scan);
3770 if (OP(next) != type) /* No choice. */
c277df42 3771 next = inner; /* Avoid recursion. */
a0d0e21e 3772 else {
a3b680e6 3773 const I32 lastparen = *PL_reglastparen;
02db2b7b 3774 /* Put unwinding data on stack */
6136c704
AL
3775 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3776 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3777
5d9a96ca
DM
3778 uw->prev = st->unwind;
3779 st->unwind = unwind1;
e822a8b4 3780 uw->type = ((type == BRANCH)
02db2b7b
IZ
3781 ? RE_UNWIND_BRANCH
3782 : RE_UNWIND_BRANCHJ);
3783 uw->lastparen = lastparen;
3784 uw->next = next;
3785 uw->locinput = locinput;
3786 uw->nextchr = nextchr;
3787#ifdef DEBUGGING
3788 uw->regindent = ++PL_regindent;
3789#endif
c277df42 3790
02db2b7b
IZ
3791 REGCP_SET(uw->lastcp);
3792
3793 /* Now go into the first branch */
3794 next = inner;
a687059c 3795 }
a0d0e21e
LW
3796 }
3797 break;
3798 case MINMOD:
5d9a96ca 3799 st->minmod = 1;
a0d0e21e 3800 break;
c277df42
IZ
3801 case CURLYM:
3802 {
d8319b27 3803 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3804
c277df42 3805 /* We suppose that the next guy does not need
0e788c72 3806 backtracking: in particular, it is of constant non-zero length,
c277df42 3807 and has no parenths to influence future backrefs. */
5d9a96ca 3808 st->ln = ARG1(scan); /* min to match */
c277df42 3809 n = ARG2(scan); /* max to match */
d8319b27
DM
3810 st->u.curlym.paren = scan->flags;
3811 if (st->u.curlym.paren) {
3812 if (st->u.curlym.paren > PL_regsize)
3813 PL_regsize = st->u.curlym.paren;
3814 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3815 *PL_reglastparen = st->u.curlym.paren;
c277df42 3816 }
dc45a647 3817 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3818 if (st->u.curlym.paren)
c277df42 3819 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3820 PL_reginput = locinput;
d8319b27
DM
3821 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
3822 if (st->u.curlym.maxwanted) {
3823 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
95b24440
DM
3824 REGMATCH(scan, CURLYM1);
3825 /*** all unsaved local vars undefined at this point */
3826 if (!result)
6407bf3b 3827 break;
d8319b27
DM
3828 /* on first match, determine length, u.curlym.l */
3829 if (!st->u.curlym.matches++) {
6407bf3b
DM
3830 if (PL_reg_match_utf8) {
3831 char *s = locinput;
3832 while (s < PL_reginput) {
d8319b27 3833 st->u.curlym.l++;
6407bf3b
DM
3834 s += UTF8SKIP(s);
3835 }
3836 }
3837 else {
d8319b27 3838 st->u.curlym.l = PL_reginput - locinput;
6407bf3b 3839 }
d8319b27
DM
3840 if (st->u.curlym.l == 0) {
3841 st->u.curlym.matches = st->u.curlym.maxwanted;
6407bf3b
DM
3842 break;
3843 }
3844 }
3845 locinput = PL_reginput;
3846 }
3847 }
3848
3849 PL_reginput = locinput;
3850
5d9a96ca
DM
3851 if (st->minmod) {
3852 st->minmod = 0;
d8319b27 3853 if (st->ln && st->u.curlym.matches < st->ln)
c277df42 3854 sayNO;
cca55fe3 3855 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3856 regnode *text_node = next;
3857
cca55fe3 3858 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3859
d8319b27 3860 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3861 else {
cca55fe3 3862 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3863 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3864 goto assume_ok_MM;
cca55fe3 3865 }
d8319b27 3866 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
af5decee 3867 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3868 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3869 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3870 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3871 else
d8319b27 3872 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3873 }
a0ed51b3
LW
3874 }
3875 else
d8319b27 3876 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
cca55fe3 3877 assume_ok_MM:
d8319b27 3878 REGCP_SET(st->u.curlym.lastcp);
5d9a96ca 3879 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3880 /* If it could work, try it. */
d8319b27
DM
3881 if (st->u.curlym.c1 == -1000 ||
3882 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3883 UCHARAT(PL_reginput) == st->u.curlym.c2)
c277df42 3884 {
d8319b27 3885 if (st->u.curlym.paren) {
5d9a96ca 3886 if (st->ln) {
d8319b27
DM
3887 PL_regstartp[st->u.curlym.paren] =
3888 HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3889 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3890 }
3891 else
d8319b27 3892 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3893 }
95b24440
DM
3894 REGMATCH(next, CURLYM2);
3895 /*** all unsaved local vars undefined at this point */
3896 if (result)
c277df42 3897 sayYES;
d8319b27 3898 REGCP_UNWIND(st->u.curlym.lastcp);
c277df42
IZ
3899 }
3900 /* Couldn't or didn't -- move forward. */
3280af22 3901 PL_reginput = locinput;
95b24440
DM
3902 REGMATCH(scan, CURLYM3);
3903 /*** all unsaved local vars undefined at this point */
3904 if (result) {
5d9a96ca 3905 st->ln++;
3280af22 3906 locinput = PL_reginput;
c277df42
IZ
3907 }
3908 else
3909 sayNO;
3910 }
a0ed51b3
LW
3911 }
3912 else {
a3621e74 3913 DEBUG_EXECUTE_r(
5c0ca799 3914 PerlIO_printf(Perl_debug_log,
95b24440
DM
3915 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3916 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
d8319b27 3917 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
c277df42 3918 );
d8319b27 3919 if (st->u.curlym.matches >= st->ln) {
cca55fe3 3920 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3921 regnode *text_node = next;
3922
cca55fe3 3923 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3924
d8319b27 3925 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3926 else {
cca55fe3 3927 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3928 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3929 goto assume_ok_REG;
cca55fe3 3930 }
d8319b27 3931 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3932
af5decee 3933 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3934 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3935 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3936 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3937 else
d8319b27 3938 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3939 }
a0ed51b3
LW
3940 }
3941 else
d8319b27 3942 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
c277df42 3943 }
cca55fe3 3944 assume_ok_REG:
d8319b27
DM
3945 REGCP_SET(st->u.curlym.lastcp);
3946 while (st->u.curlym.matches >= st->ln) {
c277df42 3947 /* If it could work, try it. */
d8319b27
DM
3948 if (st->u.curlym.c1 == -1000 ||
3949 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3950 UCHARAT(PL_reginput) == st->u.curlym.c2)
a0ed51b3 3951 {
a3621e74 3952 DEBUG_EXECUTE_r(
6407bf3b
DM
3953 PerlIO_printf(Perl_debug_log,
3954 "%*s trying tail with matches=%"IVdf"...\n",
3955 (int)(REPORT_CODE_OFF+PL_regindent*2),
d8319b27 3956 "", (IV)st->u.curlym.matches)
a0ed51b3 3957 );
d8319b27
DM
3958 if (st->u.curlym.paren) {
3959 if (st->u.curlym.matches) {
3960 PL_regstartp[st->u.curlym.paren]
3961 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3962 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
c277df42 3963 }
a0ed51b3 3964 else
d8319b27 3965 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3966 }
95b24440
DM
3967 REGMATCH(next, CURLYM4);
3968 /*** all unsaved local vars undefined at this point */
3969 if (result)
a0ed51b3 3970 sayYES;
d8319b27 3971 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3972 }
c277df42 3973 /* Couldn't or didn't -- back up. */
d8319b27
DM
3974 st->u.curlym.matches--;
3975 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3976 PL_reginput = locinput;
c277df42
IZ
3977 }
3978 }
3979 sayNO;
5f66b61c 3980 /* NOTREACHED */
c277df42
IZ
3981 break;
3982 }
3983 case CURLYN:
d8319b27
DM
3984 st->u.plus.paren = scan->flags; /* Which paren to set */
3985 if (st->u.plus.paren > PL_regsize)
3986 PL_regsize = st->u.plus.paren;
3987 if (st->u.plus.paren > (I32)*PL_reglastparen)
3988 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 3989 st->ln = ARG1(scan); /* min to match */
c277df42 3990 n = ARG2(scan); /* max to match */
dc45a647 3991 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3992 goto repeat;
a0d0e21e 3993 case CURLY:
d8319b27 3994 st->u.plus.paren = 0;
5d9a96ca 3995 st->ln = ARG1(scan); /* min to match */
a0d0e21e 3996 n = ARG2(scan); /* max to match */
dc45a647 3997 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3998 goto repeat;
3999 case STAR:
5d9a96ca 4000 st->ln = 0;
c277df42 4001 n = REG_INFTY;
a0d0e21e 4002 scan = NEXTOPER(scan);
d8319b27 4003 st->u.plus.paren = 0;
a0d0e21e
LW
4004 goto repeat;
4005 case PLUS:
5d9a96ca 4006 st->ln = 1;
c277df42
IZ
4007 n = REG_INFTY;
4008 scan = NEXTOPER(scan);
d8319b27 4009 st->u.plus.paren = 0;
c277df42 4010 repeat:
a0d0e21e
LW
4011 /*
4012 * Lookahead to avoid useless match attempts
4013 * when we know what character comes next.
4014 */
5f80c4cf
JP
4015
4016 /*
4017 * Used to only do .*x and .*?x, but now it allows
4018 * for )'s, ('s and (?{ ... })'s to be in the way
4019 * of the quantifier and the EXACT-like node. -- japhy
4020 */
4021
cca55fe3 4022 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4023 U8 *s;
4024 regnode *text_node = next;
4025
cca55fe3 4026 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4027
d8319b27 4028 if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
5f80c4cf 4029 else {
cca55fe3 4030 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 4031 st->u.plus.c1 = st->u.plus.c2 = -1000;
44a68960 4032 goto assume_ok_easy;
cca55fe3
JP
4033 }
4034 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4035
4036 if (!UTF) {
d8319b27 4037 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4038 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4039 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4040 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4041 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4042 }
5f80c4cf 4043 else { /* UTF */
f65d3ee7 4044 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4045 STRLEN ulen1, ulen2;
89ebb4a3
JH
4046 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4047 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4048
4049 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4050 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4051
d8319b27 4052 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4053 uniflags);
d8319b27 4054 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4055 uniflags);
5f80c4cf
JP
4056 }
4057 else {
d8319b27 4058 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4059 uniflags);
5f80c4cf 4060 }
1aa99e6b
IH
4061 }
4062 }
bbce6d69 4063 }
a0d0e21e 4064 else
d8319b27 4065 st->u.plus.c1 = st->u.plus.c2 = -1000;
cca55fe3 4066 assume_ok_easy:
3280af22 4067 PL_reginput = locinput;
5d9a96ca
DM
4068 if (st->minmod) {
4069 st->minmod = 0;
4070 if (st->ln && regrepeat(scan, st->ln) < st->ln)
4633a7c4 4071 sayNO;
a0ed51b3 4072 locinput = PL_reginput;
d8319b27
DM
4073 REGCP_SET(st->u.plus.lastcp);
4074 if (st->u.plus.c1 != -1000) {
4075 st->u.plus.old = locinput;
4076 st->u.plus.count = 0;
0fe9bf95 4077
1aa99e6b 4078 if (n == REG_INFTY) {
d8319b27 4079 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4080 if (do_utf8)
d8319b27
DM
4081 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4082 st->u.plus.e--;
1aa99e6b
IH
4083 }
4084 else if (do_utf8) {
5d9a96ca 4085 int m = n - st->ln;
d8319b27
DM
4086 for (st->u.plus.e = locinput;
4087 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4088 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4089 }
4090 else {
d8319b27
DM
4091 st->u.plus.e = locinput + n - st->ln;
4092 if (st->u.plus.e >= PL_regeol)
4093 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4094 }
0fe9bf95
IZ
4095 while (1) {
4096 /* Find place 'next' could work */
1aa99e6b 4097 if (!do_utf8) {
d8319b27
DM
4098 if (st->u.plus.c1 == st->u.plus.c2) {
4099 while (locinput <= st->u.plus.e &&
4100 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4101 locinput++;
4102 } else {
d8319b27
DM
4103 while (locinput <= st->u.plus.e
4104 && UCHARAT(locinput) != st->u.plus.c1
4105 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4106 locinput++;
4107 }
d8319b27 4108 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4109 }
4110 else {
d8319b27 4111 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4112 STRLEN len;
872c91ae
JH
4113 /* count initialised to
4114 * utf8_distance(old, locinput) */
d8319b27 4115 while (locinput <= st->u.plus.e &&
872c91ae 4116 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4117 UTF8_MAXBYTES, &len,
d8319b27 4118 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4119 locinput += len;
d8319b27 4120 st->u.plus.count++;
b2f2f093 4121 }
1aa99e6b 4122 } else {
a3b680e6 4123 STRLEN len;
872c91ae
JH
4124 /* count initialised to
4125 * utf8_distance(old, locinput) */
d8319b27 4126 while (locinput <= st->u.plus.e) {
872c91ae 4127 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4128 UTF8_MAXBYTES, &len,
041457d9 4129 uniflags);
d8319b27 4130 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4131 break;
b2f2f093 4132 locinput += len;
d8319b27 4133 st->u.plus.count++;
1aa99e6b
IH
4134 }
4135 }
0fe9bf95 4136 }
d8319b27 4137 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4138 sayNO;
4139 /* PL_reginput == old now */
d8319b27 4140 if (locinput != st->u.plus.old) {
5d9a96ca 4141 st->ln = 1; /* Did some */
d8319b27 4142 if (regrepeat(scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4143 sayNO;
4144 }
4145 /* PL_reginput == locinput now */
d8319b27 4146 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4147 /*** all unsaved local vars undefined at this point */
0fe9bf95 4148 PL_reginput = locinput; /* Could be reset... */
d8319b27 4149 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4150 /* Couldn't or didn't -- move forward. */
d8319b27 4151 st->u.plus.old = locinput;
1aa99e6b
IH
4152 if (do_utf8)
4153 locinput += UTF8SKIP(locinput);
4154 else
4155 locinput++;
d8319b27 4156 st->u.plus.count = 1;
0fe9bf95
IZ
4157 }
4158 }
4159 else
5d9a96ca 4160 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4161 UV c;
d8319b27 4162 if (st->u.plus.c1 != -1000) {
1aa99e6b 4163 if (do_utf8)
872c91ae 4164 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4165 UTF8_MAXBYTES, 0,
041457d9 4166 uniflags);
1aa99e6b 4167 else
9041c2e3 4168 c = UCHARAT(PL_reginput);
2390ecbc 4169 /* If it could work, try it. */
d8319b27 4170 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4171 {
d8319b27 4172 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4173 /*** all unsaved local vars undefined at this point */
d8319b27 4174 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4175 }
1aa99e6b 4176 }
a0d0e21e 4177 /* If it could work, try it. */
d8319b27 4178 else if (st->u.plus.c1 == -1000)
bbce6d69 4179 {
d8319b27 4180 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4181 /*** all unsaved local vars undefined at this point */
d8319b27 4182 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4183 }
c277df42 4184 /* Couldn't or didn't -- move forward. */
a0ed51b3 4185 PL_reginput = locinput;
a0d0e21e 4186 if (regrepeat(scan, 1)) {
5d9a96ca 4187 st->ln++;
a0ed51b3
LW
4188 locinput = PL_reginput;
4189 }
4190 else
4633a7c4 4191 sayNO;
a0d0e21e
LW
4192 }
4193 }
4194 else {
4195 n = regrepeat(scan, n);
a0ed51b3 4196 locinput = PL_reginput;
5d9a96ca 4197 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4198 (OP(next) != MEOL ||
15272685
HS
4199 OP(next) == SEOL || OP(next) == EOS))
4200 {
5d9a96ca 4201 st->ln = n; /* why back off? */
1aeab75a
GS
4202 /* ...because $ and \Z can match before *and* after
4203 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4204 We should back off by one in this case. */
4205 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4206 st->ln--;
1aeab75a 4207 }
d8319b27 4208 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4209 {
8fa7f367 4210 UV c = 0;
5d9a96ca 4211 while (n >= st->ln) {
d8319b27 4212 if (st->u.plus.c1 != -1000) {
1aa99e6b 4213 if (do_utf8)
872c91ae 4214 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4215 UTF8_MAXBYTES, 0,
041457d9 4216 uniflags);
1aa99e6b 4217 else
9041c2e3 4218 c = UCHARAT(PL_reginput);
1aa99e6b 4219 }
c277df42 4220 /* If it could work, try it. */
d8319b27 4221 if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4222 {
d8319b27 4223 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4224 /*** all unsaved local vars undefined at this point */
d8319b27 4225 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4226 }
4227 /* Couldn't or didn't -- back up. */
4228 n--;
dfe13c55 4229 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4230 }
a0d0e21e
LW
4231 }
4232 }
4633a7c4 4233 sayNO;
c277df42 4234 break;
a0d0e21e 4235 case END:
0f5d15d6 4236 if (PL_reg_call_cc) {
d8319b27
DM
4237 st->u.end.cur_call_cc = PL_reg_call_cc;
4238 st->u.end.end_re = PL_reg_re;
6136c704
AL
4239
4240 /* Save *all* the positions. */
d8319b27
DM
4241 st->u.end.cp = regcppush(0);
4242 REGCP_SET(st->u.end.lastcp);
6136c704
AL
4243
4244 /* Restore parens of the caller. */
95b24440
DM
4245 {
4246 I32 tmp = PL_savestack_ix;
4247 PL_savestack_ix = PL_reg_call_cc->ss;
4f639d21 4248 regcppop(rex);
95b24440
DM
4249 PL_savestack_ix = tmp;
4250 }
6136c704
AL
4251
4252 /* Make position available to the callcc. */
4253 PL_reginput = locinput;
4254
4f639d21 4255 PL_reg_re = PL_reg_call_cc->re;
d8319b27 4256 st->u.end.savecc = st->cc;
5d9a96ca 4257 st->cc = PL_reg_call_cc->cc;
0f5d15d6 4258 PL_reg_call_cc = PL_reg_call_cc->prev;
d8319b27 4259 REGMATCH(st->u.end.cur_call_cc->node, END);
95b24440
DM
4260 /*** all unsaved local vars undefined at this point */
4261 if (result) {
d8319b27
DM
4262 PL_reg_call_cc = st->u.end.cur_call_cc;
4263 regcpblow(st->u.end.cp);
0f5d15d6
IZ
4264 sayYES;
4265 }
d8319b27 4266 REGCP_UNWIND(st->u.end.lastcp);
4f639d21 4267 regcppop(rex);
d8319b27
DM
4268 PL_reg_call_cc = st->u.end.cur_call_cc;
4269 st->cc = st->u.end.savecc;
4270 PL_reg_re = st->u.end.end_re;
0f5d15d6 4271
a3621e74 4272 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4273 PerlIO_printf(Perl_debug_log,
4274 "%*s continuation failed...\n",
4275 REPORT_CODE_OFF+PL_regindent*2, "")
4276 );
7821416a 4277 sayNO_SILENT;
0f5d15d6 4278 }
7821416a 4279 if (locinput < PL_regtill) {
a3621e74 4280 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4281 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4282 PL_colors[4],
4283 (long)(locinput - PL_reg_starttry),
4284 (long)(PL_regtill - PL_reg_starttry),
4285 PL_colors[5]));
4286 sayNO_FINAL; /* Cannot match: too short. */
4287 }
4288 PL_reginput = locinput; /* put where regtry can find it */
4289 sayYES_FINAL; /* Success! */
7e5428c5 4290 case SUCCEED:
3280af22 4291 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4292 sayYES_LOUD; /* Success! */
c277df42
IZ
4293 case SUSPEND:
4294 n = 1;
9fe1d20c 4295 PL_reginput = locinput;
9041c2e3 4296 goto do_ifmatch;
a0d0e21e 4297 case UNLESSM:
c277df42 4298 n = 0;
a0ed51b3 4299 if (scan->flags) {
52657f30 4300 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4301 if (!s)
4302 goto say_yes;
4303 PL_reginput = s;
a0ed51b3
LW
4304 }
4305 else
4306 PL_reginput = locinput;
c277df42
IZ
4307 goto do_ifmatch;
4308 case IFMATCH:
4309 n = 1;
a0ed51b3 4310 if (scan->flags) {
52657f30 4311 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4312 if (!s)
4313 goto say_no;
4314 PL_reginput = s;
a0ed51b3
LW
4315 }
4316 else
4317 PL_reginput = locinput;
4318
c277df42 4319 do_ifmatch:
95b24440
DM
4320 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4321 /*** all unsaved local vars undefined at this point */
4322 if (result != n) {
c277df42 4323 say_no:
5d9a96ca
DM
4324 if (st->logical) {
4325 st->logical = 0;
4326 st->sw = 0;
c277df42 4327 goto do_longjump;
a0ed51b3
LW
4328 }
4329 else
c277df42
IZ
4330 sayNO;
4331 }
4332 say_yes:
5d9a96ca
DM
4333 if (st->logical) {
4334 st->logical = 0;
4335 st->sw = 1;
c277df42 4336 }
fe44a5e8 4337 if (OP(scan) == SUSPEND) {
3280af22 4338 locinput = PL_reginput;
565764a8 4339 nextchr = UCHARAT(locinput);
fe44a5e8 4340 }
c277df42
IZ
4341 /* FALL THROUGH. */
4342 case LONGJMP:
4343 do_longjump:
4344 next = scan + ARG(scan);
4345 if (next == scan)
4346 next = NULL;
a0d0e21e
LW
4347 break;
4348 default:
b900a521 4349 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4350 PTR2UV(scan), OP(scan));
cea2e8a9 4351 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4352 }
95b24440 4353
02db2b7b 4354 reenter:
a0d0e21e 4355 scan = next;
95b24440
DM
4356 continue;
4357 /* NOTREACHED */
4358
4359 /* simulate recursively calling regmatch(), but without actually
4360 * recursing - ie save the current state on the heap rather than on
4361 * the stack, then re-enter the loop. This avoids complex regexes
4362 * blowing the processor stack */
4363
4364 start_recurse:
4365 {
5d9a96ca
DM
4366 /* push new state */
4367 regmatch_state *oldst = st;
4368
4369 depth++;
4370
4371 /* grab the next free state slot */
4372 st++;
4373 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4374 st = S_push_slab(aTHX);
4375 PL_regmatch_state = st;
4376
4377 oldst->next = next;
4378 oldst->n = n;
4379 oldst->locinput = locinput;
4380 oldst->reg_call_cc = PL_reg_call_cc;
4381
4382 st->cc = oldst->cc;
95b24440
DM
4383 locinput = PL_reginput;
4384 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4385 st->minmod = 0;
4386 st->sw = 0;
4387 st->logical = 0;
4388 st->unwind = 0;
95b24440
DM
4389#ifdef DEBUGGING
4390 PL_regindent++;
4391#endif
4392 }
a0d0e21e 4393 }
a687059c 4394
a0d0e21e
LW
4395 /*
4396 * We get here only if there's trouble -- normally "case END" is
4397 * the terminating point.
4398 */
cea2e8a9 4399 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4400 /*NOTREACHED*/
4633a7c4
LW
4401 sayNO;
4402
7821416a 4403yes_loud:
a3621e74 4404 DEBUG_EXECUTE_r(
7821416a
IZ
4405 PerlIO_printf(Perl_debug_log,
4406 "%*s %scould match...%s\n",
e4584336 4407 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4408 );
4409 goto yes;
4410yes_final:
a3621e74 4411 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4412 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4413yes:
4414#ifdef DEBUGGING
3280af22 4415 PL_regindent--;
4633a7c4 4416#endif
02db2b7b 4417
95b24440
DM
4418 result = 1;
4419 goto exit_level;
4633a7c4
LW
4420
4421no:
a3621e74 4422 DEBUG_EXECUTE_r(
7821416a
IZ
4423 PerlIO_printf(Perl_debug_log,
4424 "%*s %sfailed...%s\n",
e4584336 4425 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4426 );
4427 goto do_no;
4428no_final:
4429do_no:
5d9a96ca
DM
4430 if (st->unwind) {
4431 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4432
4433 switch (uw->type) {
4434 case RE_UNWIND_BRANCH:
4435 case RE_UNWIND_BRANCHJ:
4436 {
6136c704 4437 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4438 const I32 lastparen = uwb->lastparen;
9041c2e3 4439
02db2b7b
IZ
4440 REGCP_UNWIND(uwb->lastcp);
4441 for (n = *PL_reglastparen; n > lastparen; n--)
4442 PL_regendp[n] = -1;
4443 *PL_reglastparen = n;
4444 scan = next = uwb->next;
9041c2e3
NIS
4445 if ( !scan ||
4446 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4447 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4448 st->unwind = uwb->prev;
02db2b7b
IZ
4449#ifdef DEBUGGING
4450 PL_regindent--;
4451#endif
4452 goto do_no;
4453 }
4454 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4455 if ((n = (uwb->type == RE_UNWIND_BRANCH
4456 ? NEXT_OFF(next) : ARG(next))))
4457 next += n;
4458 else
4459 next = NULL; /* XXXX Needn't unwinding in this case... */
4460 uwb->next = next;
4461 next = NEXTOPER(scan);
4462 if (uwb->type == RE_UNWIND_BRANCHJ)
4463 next = NEXTOPER(next);
4464 locinput = uwb->locinput;
4465 nextchr = uwb->nextchr;
4466#ifdef DEBUGGING
4467 PL_regindent = uwb->regindent;
4468#endif
4469
4470 goto reenter;
4471 }
5f66b61c 4472 /* NOTREACHED */
02db2b7b
IZ
4473 default:
4474 Perl_croak(aTHX_ "regexp unwind memory corruption");
4475 }
5f66b61c 4476 /* NOTREACHED */
02db2b7b 4477 }
4633a7c4 4478#ifdef DEBUGGING
3280af22 4479 PL_regindent--;
4633a7c4 4480#endif
95b24440
DM
4481 result = 0;
4482exit_level:
5d9a96ca
DM
4483
4484 if (depth--) {
95b24440 4485 /* restore previous state and re-enter */
5d9a96ca
DM
4486 st--;
4487 if (st < &PL_regmatch_slab->states[0]) {
4488 PL_regmatch_slab = PL_regmatch_slab->prev;
4489 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
4490 }
4491 PL_regmatch_state = st;
95b24440 4492
5d9a96ca
DM
4493 PL_reg_call_cc = st->reg_call_cc;
4494 scan = st->scan;
4495 next = st->next;
4496 n = st->n;
4497 locinput = st->locinput;
95b24440 4498 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4499
4500 switch (st->resume_state) {
95b24440
DM
4501 case resume_TRIE1:
4502 goto resume_point_TRIE1;
4503 case resume_TRIE2:
4504 goto resume_point_TRIE2;
4505 case resume_CURLYX:
4506 goto resume_point_CURLYX;
4507 case resume_WHILEM1:
4508 goto resume_point_WHILEM1;
4509 case resume_WHILEM2:
4510 goto resume_point_WHILEM2;
4511 case resume_WHILEM3:
4512 goto resume_point_WHILEM3;
4513 case resume_WHILEM4:
4514 goto resume_point_WHILEM4;
4515 case resume_WHILEM5:
4516 goto resume_point_WHILEM5;
4517 case resume_WHILEM6:
4518 goto resume_point_WHILEM6;
4519 case resume_CURLYM1:
4520 goto resume_point_CURLYM1;
4521 case resume_CURLYM2:
4522 goto resume_point_CURLYM2;
4523 case resume_CURLYM3:
4524 goto resume_point_CURLYM3;
4525 case resume_CURLYM4:
4526 goto resume_point_CURLYM4;
4527 case resume_IFMATCH:
4528 goto resume_point_IFMATCH;
4529 case resume_PLUS1:
4530 goto resume_point_PLUS1;
4531 case resume_PLUS2:
4532 goto resume_point_PLUS2;
4533 case resume_PLUS3:
4534 goto resume_point_PLUS3;
4535 case resume_PLUS4:
4536 goto resume_point_PLUS4;
4537 case resume_END:
4538 goto resume_point_END;
4539 default:
4540 Perl_croak(aTHX_ "regexp resume memory corruption");
4541 }
4542 /* NOTREACHED */
4543 }
5d9a96ca
DM
4544 /* restore original high-water mark */
4545 PL_regmatch_slab = orig_slab;
4546 PL_regmatch_state = orig_state;
4547
4548 /* free all slabs above current one */
4549 if (orig_slab->next) {
ad65c075 4550 regmatch_slab *osl, *sl = orig_slab->next;
5d9a96ca
DM
4551 orig_slab->next = NULL;
4552 while (sl) {
ad65c075 4553 osl = sl;
5d9a96ca 4554 sl = sl->next;
ad65c075 4555 Safefree(osl);
5d9a96ca
DM
4556 }
4557 }
4558
95b24440
DM
4559 return result;
4560
a687059c
LW
4561}
4562
4563/*
4564 - regrepeat - repeatedly match something simple, report how many
4565 */
4566/*
4567 * [This routine now assumes that it will only match on things of length 1.
4568 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4569 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4570 */
76e3520e 4571STATIC I32
a3b680e6 4572S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4573{
27da23d5 4574 dVAR;
a0d0e21e 4575 register char *scan;
a0d0e21e 4576 register I32 c;
3280af22 4577 register char *loceol = PL_regeol;
a0ed51b3 4578 register I32 hardcount = 0;
53c4c00c 4579 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4580
3280af22 4581 scan = PL_reginput;
faf11cac
HS
4582 if (max == REG_INFTY)
4583 max = I32_MAX;
4584 else if (max < loceol - scan)
7f596f4c 4585 loceol = scan + max;
a0d0e21e 4586 switch (OP(p)) {
22c35a8c 4587 case REG_ANY:
1aa99e6b 4588 if (do_utf8) {
ffc61ed2 4589 loceol = PL_regeol;
1aa99e6b 4590 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4591 scan += UTF8SKIP(scan);
4592 hardcount++;
4593 }
4594 } else {
4595 while (scan < loceol && *scan != '\n')
4596 scan++;
a0ed51b3
LW
4597 }
4598 break;
ffc61ed2 4599 case SANY:
def8e4ea
JH
4600 if (do_utf8) {
4601 loceol = PL_regeol;
a0804c9e 4602 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4603 scan += UTF8SKIP(scan);
4604 hardcount++;
4605 }
4606 }
4607 else
4608 scan = loceol;
a0ed51b3 4609 break;
f33976b4
DB
4610 case CANY:
4611 scan = loceol;
4612 break;
090f7165
JH
4613 case EXACT: /* length of string is 1 */
4614 c = (U8)*STRING(p);
4615 while (scan < loceol && UCHARAT(scan) == c)
4616 scan++;
bbce6d69 4617 break;
4618 case EXACTF: /* length of string is 1 */
cd439c50 4619 c = (U8)*STRING(p);
bbce6d69 4620 while (scan < loceol &&
22c35a8c 4621 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4622 scan++;
4623 break;
4624 case EXACTFL: /* length of string is 1 */
3280af22 4625 PL_reg_flags |= RF_tainted;
cd439c50 4626 c = (U8)*STRING(p);
bbce6d69 4627 while (scan < loceol &&
22c35a8c 4628 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4629 scan++;
4630 break;
4631 case ANYOF:
ffc61ed2
JH
4632 if (do_utf8) {
4633 loceol = PL_regeol;
cfc92286
JH
4634 while (hardcount < max && scan < loceol &&
4635 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4636 scan += UTF8SKIP(scan);
4637 hardcount++;
4638 }
4639 } else {
7d3e948e 4640 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4641 scan++;
4642 }
a0d0e21e
LW
4643 break;
4644 case ALNUM:
1aa99e6b 4645 if (do_utf8) {
ffc61ed2 4646 loceol = PL_regeol;
1a4fad37 4647 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4648 while (hardcount < max && scan < loceol &&
3568d838 4649 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4650 scan += UTF8SKIP(scan);
4651 hardcount++;
4652 }
4653 } else {
4654 while (scan < loceol && isALNUM(*scan))
4655 scan++;
a0ed51b3
LW
4656 }
4657 break;
bbce6d69 4658 case ALNUML:
3280af22 4659 PL_reg_flags |= RF_tainted;
1aa99e6b 4660 if (do_utf8) {
ffc61ed2 4661 loceol = PL_regeol;
1aa99e6b
IH
4662 while (hardcount < max && scan < loceol &&
4663 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4664 scan += UTF8SKIP(scan);
4665 hardcount++;
4666 }
4667 } else {
4668 while (scan < loceol && isALNUM_LC(*scan))
4669 scan++;
a0ed51b3
LW
4670 }
4671 break;
a0d0e21e 4672 case NALNUM:
1aa99e6b 4673 if (do_utf8) {
ffc61ed2 4674 loceol = PL_regeol;
1a4fad37 4675 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4676 while (hardcount < max && scan < loceol &&
3568d838 4677 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4678 scan += UTF8SKIP(scan);
4679 hardcount++;
4680 }
4681 } else {
4682 while (scan < loceol && !isALNUM(*scan))
4683 scan++;
a0ed51b3
LW
4684 }
4685 break;
bbce6d69 4686 case NALNUML:
3280af22 4687 PL_reg_flags |= RF_tainted;
1aa99e6b 4688 if (do_utf8) {
ffc61ed2 4689 loceol = PL_regeol;
1aa99e6b
IH
4690 while (hardcount < max && scan < loceol &&
4691 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4692 scan += UTF8SKIP(scan);
4693 hardcount++;
4694 }
4695 } else {
4696 while (scan < loceol && !isALNUM_LC(*scan))
4697 scan++;
a0ed51b3
LW
4698 }
4699 break;
a0d0e21e 4700 case SPACE:
1aa99e6b 4701 if (do_utf8) {
ffc61ed2 4702 loceol = PL_regeol;
1a4fad37 4703 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4704 while (hardcount < max && scan < loceol &&
3568d838
JH
4705 (*scan == ' ' ||
4706 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4707 scan += UTF8SKIP(scan);
4708 hardcount++;
4709 }
4710 } else {
4711 while (scan < loceol && isSPACE(*scan))
4712 scan++;
a0ed51b3
LW
4713 }
4714 break;
bbce6d69 4715 case SPACEL:
3280af22 4716 PL_reg_flags |= RF_tainted;
1aa99e6b 4717 if (do_utf8) {
ffc61ed2 4718 loceol = PL_regeol;
1aa99e6b 4719 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4720 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4721 scan += UTF8SKIP(scan);
4722 hardcount++;
4723 }
4724 } else {
4725 while (scan < loceol && isSPACE_LC(*scan))
4726 scan++;
a0ed51b3
LW
4727 }
4728 break;
a0d0e21e 4729 case NSPACE:
1aa99e6b 4730 if (do_utf8) {
ffc61ed2 4731 loceol = PL_regeol;
1a4fad37 4732 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4733 while (hardcount < max && scan < loceol &&
3568d838
JH
4734 !(*scan == ' ' ||
4735 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4736 scan += UTF8SKIP(scan);
4737 hardcount++;
4738 }
4739 } else {
4740 while (scan < loceol && !isSPACE(*scan))
4741 scan++;
4742 break;
a0ed51b3 4743 }
bbce6d69 4744 case NSPACEL:
3280af22 4745 PL_reg_flags |= RF_tainted;
1aa99e6b 4746 if (do_utf8) {
ffc61ed2 4747 loceol = PL_regeol;
1aa99e6b 4748 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4749 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4750 scan += UTF8SKIP(scan);
4751 hardcount++;
4752 }
4753 } else {
4754 while (scan < loceol && !isSPACE_LC(*scan))
4755 scan++;
a0ed51b3
LW
4756 }
4757 break;
a0d0e21e 4758 case DIGIT:
1aa99e6b 4759 if (do_utf8) {
ffc61ed2 4760 loceol = PL_regeol;
1a4fad37 4761 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4762 while (hardcount < max && scan < loceol &&
3568d838 4763 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4764 scan += UTF8SKIP(scan);
4765 hardcount++;
4766 }
4767 } else {
4768 while (scan < loceol && isDIGIT(*scan))
4769 scan++;
a0ed51b3
LW
4770 }
4771 break;
a0d0e21e 4772 case NDIGIT:
1aa99e6b 4773 if (do_utf8) {
ffc61ed2 4774 loceol = PL_regeol;
1a4fad37 4775 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4776 while (hardcount < max && scan < loceol &&
3568d838 4777 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4778 scan += UTF8SKIP(scan);
4779 hardcount++;
4780 }
4781 } else {
4782 while (scan < loceol && !isDIGIT(*scan))
4783 scan++;
a0ed51b3
LW
4784 }
4785 break;
a0d0e21e
LW
4786 default: /* Called on something of 0 width. */
4787 break; /* So match right here or not at all. */
4788 }
a687059c 4789
a0ed51b3
LW
4790 if (hardcount)
4791 c = hardcount;
4792 else
4793 c = scan - PL_reginput;
3280af22 4794 PL_reginput = scan;
a687059c 4795
a3621e74 4796 DEBUG_r({
ab74612d 4797 SV *re_debug_flags = NULL;
6136c704 4798 SV * const prop = sv_newmortal();
a3621e74
YO
4799 GET_RE_DEBUG_FLAGS;
4800 DEBUG_EXECUTE_r({
c277df42 4801 regprop(prop, p);
9041c2e3
NIS
4802 PerlIO_printf(Perl_debug_log,
4803 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4804 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4805 });
a3621e74 4806 });
9041c2e3 4807
a0d0e21e 4808 return(c);
a687059c
LW
4809}
4810
c277df42
IZ
4811
4812/*
ffc61ed2
JH
4813- regclass_swash - prepare the utf8 swash
4814*/
4815
4816SV *
a3b680e6 4817Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4818{
97aff369 4819 dVAR;
9e55ce06
JH
4820 SV *sw = NULL;
4821 SV *si = NULL;
4822 SV *alt = NULL;
a8079bba 4823 const struct reg_data *data = PL_reg_re ? PL_reg_re->data : NULL;
ffc61ed2 4824
4f639d21 4825 if (data && data->count) {
a3b680e6 4826 const U32 n = ARG(node);
ffc61ed2 4827
4f639d21
DM
4828 if (data->what[n] == 's') {
4829 SV * const rv = (SV*)data->data[n];
890ce7af 4830 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4831 SV **const ary = AvARRAY(av);
9e55ce06 4832 SV **a, **b;
9041c2e3 4833
711a919c 4834 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4835 * documentation of these array elements. */
4836
b11f357e 4837 si = *ary;
8f7f7219 4838 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4839 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4840
ffc61ed2
JH
4841 if (a)
4842 sw = *a;
4843 else if (si && doinit) {
4844 sw = swash_init("utf8", "", si, 1, 0);
4845 (void)av_store(av, 1, sw);
4846 }
9e55ce06
JH
4847 if (b)
4848 alt = *b;
ffc61ed2
JH
4849 }
4850 }
4851
9e55ce06
JH
4852 if (listsvp)
4853 *listsvp = si;
4854 if (altsvp)
4855 *altsvp = alt;
ffc61ed2
JH
4856
4857 return sw;
4858}
4859
4860/*
ba7b4546 4861 - reginclass - determine if a character falls into a character class
832705d4
JH
4862
4863 The n is the ANYOF regnode, the p is the target string, lenp
4864 is pointer to the maximum length of how far to go in the p
4865 (if the lenp is zero, UTF8SKIP(p) is used),
4866 do_utf8 tells whether the target string is in UTF-8.
4867
bbce6d69 4868 */
4869
76e3520e 4870STATIC bool
a3b680e6 4871S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4872{
27da23d5 4873 dVAR;
a3b680e6 4874 const char flags = ANYOF_FLAGS(n);
bbce6d69 4875 bool match = FALSE;
cc07378b 4876 UV c = *p;
ae9ddab8 4877 STRLEN len = 0;
9e55ce06 4878 STRLEN plen;
1aa99e6b 4879
19f67299
TS
4880 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4881 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4882 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4883 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4884 if (len == (STRLEN)-1)
4885 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4886 }
bbce6d69 4887
0f0076b4 4888 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4889 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4890 if (lenp)
4891 *lenp = 0;
ffc61ed2 4892 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4893 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4894 match = TRUE;
bbce6d69 4895 }
3568d838 4896 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4897 match = TRUE;
ffc61ed2 4898 if (!match) {
9e55ce06 4899 AV *av;
890ce7af 4900 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4901
4902 if (sw) {
3568d838 4903 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4904 match = TRUE;
4905 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4906 if (!match && lenp && av) {
4907 I32 i;
9e55ce06 4908 for (i = 0; i <= av_len(av); i++) {
890ce7af 4909 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4910 STRLEN len;
890ce7af 4911 const char * const s = SvPV_const(sv, len);
9e55ce06 4912
061b10df 4913 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4914 *lenp = len;
4915 match = TRUE;
4916 break;
4917 }
4918 }
4919 }
4920 if (!match) {
89ebb4a3 4921 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4922 STRLEN tmplen;
4923
9e55ce06
JH
4924 to_utf8_fold(p, tmpbuf, &tmplen);
4925 if (swash_fetch(sw, tmpbuf, do_utf8))
4926 match = TRUE;
4927 }
ffc61ed2
JH
4928 }
4929 }
bbce6d69 4930 }
9e55ce06 4931 if (match && lenp && *lenp == 0)
0f0076b4 4932 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4933 }
1aa99e6b 4934 if (!match && c < 256) {
ffc61ed2
JH
4935 if (ANYOF_BITMAP_TEST(n, c))
4936 match = TRUE;
4937 else if (flags & ANYOF_FOLD) {
eb160463 4938 U8 f;
a0ed51b3 4939
ffc61ed2
JH
4940 if (flags & ANYOF_LOCALE) {
4941 PL_reg_flags |= RF_tainted;
4942 f = PL_fold_locale[c];
4943 }
4944 else
4945 f = PL_fold[c];
4946 if (f != c && ANYOF_BITMAP_TEST(n, f))
4947 match = TRUE;
4948 }
4949
4950 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4951 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4952 if (
4953 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4954 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4955 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4956 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4957 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4958 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4959 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4960 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4961 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4962 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4963 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4964 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4965 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4966 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4967 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4968 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4969 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4970 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4971 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4972 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4973 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4974 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4975 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4976 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4977 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4978 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4979 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4980 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4981 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4982 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4983 ) /* How's that for a conditional? */
4984 {
4985 match = TRUE;
4986 }
a0ed51b3 4987 }
a0ed51b3
LW
4988 }
4989
a0ed51b3
LW
4990 return (flags & ANYOF_INVERT) ? !match : match;
4991}
161b471a 4992
dfe13c55 4993STATIC U8 *
5f66b61c 4994S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 4995{
97aff369 4996 dVAR;
a0ed51b3 4997 if (off >= 0) {
1aa99e6b 4998 while (off-- && s < lim) {
ffc61ed2 4999 /* XXX could check well-formedness here */
a0ed51b3 5000 s += UTF8SKIP(s);
ffc61ed2 5001 }
a0ed51b3
LW
5002 }
5003 else {
5004 while (off++) {
1aa99e6b 5005 if (s > lim) {
a0ed51b3 5006 s--;
ffc61ed2 5007 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5008 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5009 s--;
ffc61ed2
JH
5010 }
5011 /* XXX could check well-formedness here */
a0ed51b3
LW
5012 }
5013 }
5014 }
5015 return s;
5016}
161b471a 5017
dfe13c55 5018STATIC U8 *
5f66b61c 5019S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5020{
97aff369 5021 dVAR;
a0ed51b3 5022 if (off >= 0) {
1aa99e6b 5023 while (off-- && s < lim) {
ffc61ed2 5024 /* XXX could check well-formedness here */
a0ed51b3 5025 s += UTF8SKIP(s);
ffc61ed2 5026 }
a0ed51b3
LW
5027 if (off >= 0)
5028 return 0;
5029 }
5030 else {
5031 while (off++) {
1aa99e6b 5032 if (s > lim) {
a0ed51b3 5033 s--;
ffc61ed2 5034 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5035 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5036 s--;
ffc61ed2
JH
5037 }
5038 /* XXX could check well-formedness here */
a0ed51b3
LW
5039 }
5040 else
5041 break;
5042 }
5043 if (off <= 0)
5044 return 0;
5045 }
5046 return s;
5047}
51371543 5048
51371543 5049static void
acfe0abc 5050restore_pos(pTHX_ void *arg)
51371543 5051{
97aff369 5052 dVAR;
097eb12c 5053 regexp * const rex = (regexp *)arg;
51371543
GS
5054 if (PL_reg_eval_set) {
5055 if (PL_reg_oldsaved) {
4f639d21
DM
5056 rex->subbeg = PL_reg_oldsaved;
5057 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5058#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5059 rex->saved_copy = PL_nrs;
ed252734 5060#endif
4f639d21 5061 RX_MATCH_COPIED_on(rex);
51371543
GS
5062 }
5063 PL_reg_magic->mg_len = PL_reg_oldpos;
5064 PL_reg_eval_set = 0;
5065 PL_curpm = PL_reg_oldcurpm;
5066 }
5067}
33b8afdf
JH
5068
5069STATIC void
5070S_to_utf8_substr(pTHX_ register regexp *prog)
5071{
33b8afdf 5072 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5073 SV* const sv = newSVsv(prog->float_substr);
5074 prog->float_utf8 = sv;
33b8afdf
JH
5075 sv_utf8_upgrade(sv);
5076 if (SvTAIL(prog->float_substr))
5077 SvTAIL_on(sv);
5078 if (prog->float_substr == prog->check_substr)
5079 prog->check_utf8 = sv;
5080 }
5081 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5082 SV* const sv = newSVsv(prog->anchored_substr);
5083 prog->anchored_utf8 = sv;
33b8afdf
JH
5084 sv_utf8_upgrade(sv);
5085 if (SvTAIL(prog->anchored_substr))
5086 SvTAIL_on(sv);
5087 if (prog->anchored_substr == prog->check_substr)
5088 prog->check_utf8 = sv;
5089 }
5090}
5091
5092STATIC void
5093S_to_byte_substr(pTHX_ register regexp *prog)
5094{
97aff369 5095 dVAR;
33b8afdf 5096 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5097 SV* sv = newSVsv(prog->float_utf8);
5098 prog->float_substr = sv;
33b8afdf
JH
5099 if (sv_utf8_downgrade(sv, TRUE)) {
5100 if (SvTAIL(prog->float_utf8))
5101 SvTAIL_on(sv);
5102 } else {
5103 SvREFCNT_dec(sv);
5104 prog->float_substr = sv = &PL_sv_undef;
5105 }
5106 if (prog->float_utf8 == prog->check_utf8)
5107 prog->check_substr = sv;
5108 }
5109 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5110 SV* sv = newSVsv(prog->anchored_utf8);
5111 prog->anchored_substr = sv;
33b8afdf
JH
5112 if (sv_utf8_downgrade(sv, TRUE)) {
5113 if (SvTAIL(prog->anchored_utf8))
5114 SvTAIL_on(sv);
5115 } else {
5116 SvREFCNT_dec(sv);
5117 prog->anchored_substr = sv = &PL_sv_undef;
5118 }
5119 if (prog->anchored_utf8 == prog->check_utf8)
5120 prog->check_substr = sv;
5121 }
5122}
66610fdd
RGS
5123
5124/*
5125 * Local variables:
5126 * c-indentation-style: bsd
5127 * c-basic-offset: 4
5128 * indent-tabs-mode: t
5129 * End:
5130 *
37442d52
RGS
5131 * ex: set ts=8 sts=4 sw=4 noet:
5132 */