This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
disable 'split /(?{ split "" })/' test until recursive split is fixed
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
32fc9b6a 110#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
097eb12c 216S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e 220 char *input;
b1ce53c5 221
a3621e74
YO
222 GET_RE_DEBUG_FLAGS_DECL;
223
b1ce53c5 224 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 225 i = SSPOPINT;
b1ce53c5
JH
226 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
227 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 228 input = (char *) SSPOPPTR;
a01268b5 229 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
230 *PL_reglastparen = SSPOPINT;
231 PL_regsize = SSPOPINT;
b1ce53c5
JH
232
233 /* Now restore the parentheses context. */
41123dfd
JH
234 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
235 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 236 I32 tmps;
097eb12c 237 U32 paren = (U32)SSPOPINT;
3280af22 238 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
239 PL_regstartp[paren] = SSPOPINT;
240 tmps = SSPOPINT;
3280af22
NIS
241 if (paren <= *PL_reglastparen)
242 PL_regendp[paren] = tmps;
a3621e74 243 DEBUG_EXECUTE_r(
c3464db5 244 PerlIO_printf(Perl_debug_log,
b900a521 245 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 246 (UV)paren, (IV)PL_regstartp[paren],
b900a521 247 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 248 (IV)PL_regendp[paren],
3280af22 249 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 250 );
a0d0e21e 251 }
a3621e74 252 DEBUG_EXECUTE_r(
4f639d21 253 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 254 PerlIO_printf(Perl_debug_log,
faccc32b 255 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 256 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
257 }
258 );
daf18116 259#if 1
dafc8851
JH
260 /* It would seem that the similar code in regtry()
261 * already takes care of this, and in fact it is in
262 * a better location to since this code can #if 0-ed out
263 * but the code in regtry() is needed or otherwise tests
264 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
265 * (as of patchlevel 7877) will fail. Then again,
266 * this code seems to be necessary or otherwise
267 * building DynaLoader will fail:
268 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
269 * --jhi */
097eb12c
AL
270 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
271 if (i > PL_regsize)
272 PL_regstartp[i] = -1;
273 PL_regendp[i] = -1;
a0d0e21e 274 }
dafc8851 275#endif
a0d0e21e
LW
276 return input;
277}
278
02db2b7b 279#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 280
95b24440 281#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
282 if (paren) { \
283 if (n) { \
284 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
285 PL_regendp[paren] = input - PL_bostr; \
286 } \
287 else \
288 PL_regendp[paren] = -1; \
289 } \
95b24440
DM
290 REGMATCH(next, where); \
291 if (result) \
29d1e993
HS
292 sayYES; \
293 if (paren && n) \
294 PL_regendp[paren] = -1; \
295}
296
297
a687059c 298/*
e50aee73 299 * pregexec and friends
a687059c
LW
300 */
301
302/*
c277df42 303 - pregexec - match a regexp against a string
a687059c 304 */
c277df42 305I32
864dbfa3 306Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 307 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
308/* strend: pointer to null at end of string */
309/* strbeg: real beginning of string */
310/* minend: end of match must be >=minend after stringarg. */
311/* nosave: For optimizations. */
312{
313 return
9041c2e3 314 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
315 nosave ? 0 : REXEC_COPY_STR);
316}
0f5d15d6 317
22e551b9 318
9041c2e3 319/*
cad2e5aa
JH
320 * Need to implement the following flags for reg_anch:
321 *
322 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
323 * USE_INTUIT_ML
324 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
325 * INTUIT_AUTORITATIVE_ML
326 * INTUIT_ONCE_NOML - Intuit can match in one location only.
327 * INTUIT_ONCE_ML
328 *
329 * Another flag for this function: SECOND_TIME (so that float substrs
330 * with giant delta may be not rechecked).
331 */
332
333/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
334
3f7c398e 335/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
336 Otherwise, only SvCUR(sv) is used to get strbeg. */
337
338/* XXXX We assume that strpos is strbeg unless sv. */
339
6eb5f6b9
JH
340/* XXXX Some places assume that there is a fixed substring.
341 An update may be needed if optimizer marks as "INTUITable"
342 RExen without fixed substrings. Similarly, it is assumed that
343 lengths of all the strings are no more than minlen, thus they
344 cannot come from lookahead.
345 (Or minlen should take into account lookahead.) */
346
2c2d71f5
JH
347/* A failure to find a constant substring means that there is no need to make
348 an expensive call to REx engine, thus we celebrate a failure. Similarly,
349 finding a substring too deep into the string means that less calls to
30944b6d
IZ
350 regtry() should be needed.
351
352 REx compiler's optimizer found 4 possible hints:
353 a) Anchored substring;
354 b) Fixed substring;
355 c) Whether we are anchored (beginning-of-line or \G);
356 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 357 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
358 string which does not contradict any of them.
359 */
2c2d71f5 360
6eb5f6b9
JH
361/* Most of decisions we do here should have been done at compile time.
362 The nodes of the REx which we used for the search should have been
363 deleted from the finite automaton. */
364
cad2e5aa
JH
365char *
366Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
367 char *strend, U32 flags, re_scream_pos_data *data)
368{
97aff369 369 dVAR;
b7953727 370 register I32 start_shift = 0;
cad2e5aa 371 /* Should be nonnegative! */
b7953727 372 register I32 end_shift = 0;
2c2d71f5
JH
373 register char *s;
374 register SV *check;
a1933d95 375 char *strbeg;
cad2e5aa 376 char *t;
a3b680e6 377 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 378 I32 ml_anch;
bd61b366
SS
379 register char *other_last = NULL; /* other substr checked before this */
380 char *check_at = NULL; /* check substr found at this pos */
1df70142 381 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 382#ifdef DEBUGGING
890ce7af
AL
383 const char * const i_strpos = strpos;
384 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 385#endif
a3621e74
YO
386
387 GET_RE_DEBUG_FLAGS_DECL;
388
a30b2f1f 389 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 390
b8d68ded 391 if (prog->reganch & ROPT_UTF8) {
a3621e74 392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
393 "UTF-8 regex...\n"));
394 PL_reg_flags |= RF_utf8;
395 }
396
a3621e74 397 DEBUG_EXECUTE_r({
1df70142 398 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
399 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
400 strpos;
1df70142 401 const int len = PL_reg_match_utf8 ?
b8d68ded 402 strlen(s) : strend - strpos;
2a782b5b
JH
403 if (!PL_colorset)
404 reginitcolors();
b8d68ded 405 if (PL_reg_match_utf8)
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 407 "UTF-8 target...\n"));
2a782b5b 408 PerlIO_printf(Perl_debug_log,
a0288114 409 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 410 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
411 prog->precomp,
412 PL_colors[1],
413 (strlen(prog->precomp) > 60 ? "..." : ""),
414 PL_colors[0],
415 (int)(len > 60 ? 60 : len),
416 s, PL_colors[1],
417 (len > 60 ? "..." : "")
418 );
419 });
cad2e5aa 420
c344f387
JH
421 /* CHR_DIST() would be more correct here but it makes things slow. */
422 if (prog->minlen > strend - strpos) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 424 "String too short... [re_intuit_start]\n"));
cad2e5aa 425 goto fail;
2c2d71f5 426 }
a1933d95 427 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 428 PL_regeol = strend;
33b8afdf
JH
429 if (do_utf8) {
430 if (!prog->check_utf8 && prog->check_substr)
431 to_utf8_substr(prog);
432 check = prog->check_utf8;
433 } else {
434 if (!prog->check_substr && prog->check_utf8)
435 to_byte_substr(prog);
436 check = prog->check_substr;
437 }
438 if (check == &PL_sv_undef) {
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
440 "Non-utf string cannot match utf check string\n"));
441 goto fail;
442 }
2c2d71f5 443 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
444 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
445 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 446 && !multiline ) ); /* Check after \n? */
cad2e5aa 447
7e25d62c
JH
448 if (!ml_anch) {
449 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
450 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 451 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
452 && sv && !SvROK(sv)
453 && (strpos != strbeg)) {
a3621e74 454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
455 goto fail;
456 }
457 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 458 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 459 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
460 I32 slen;
461
1aa99e6b 462 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
463 if (SvTAIL(check)) {
464 slen = SvCUR(check); /* >= 1 */
cad2e5aa 465
9041c2e3 466 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 467 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 469 goto fail_finish;
cad2e5aa
JH
470 }
471 /* Now should match s[0..slen-2] */
472 slen--;
3f7c398e 473 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 474 || (slen > 1
3f7c398e 475 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 476 report_neq:
a3621e74 477 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
478 goto fail_finish;
479 }
cad2e5aa 480 }
3f7c398e 481 else if (*SvPVX_const(check) != *s
653099ff 482 || ((slen = SvCUR(check)) > 1
3f7c398e 483 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 484 goto report_neq;
c315bfe8 485 check_at = s;
2c2d71f5 486 goto success_at_start;
7e25d62c 487 }
cad2e5aa 488 }
2c2d71f5 489 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 490 s = strpos;
2c2d71f5 491 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 494 if (!ml_anch) {
a3b680e6 495 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 496 - (SvTAIL(check) != 0);
a3b680e6 497 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
498
499 if (end_shift < eshift)
500 end_shift = eshift;
501 }
cad2e5aa 502 }
2c2d71f5 503 else { /* Can match at random position */
cad2e5aa
JH
504 ml_anch = 0;
505 s = strpos;
2c2d71f5
JH
506 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 /* Should be nonnegative! */
508 end_shift = prog->minlen - start_shift -
653099ff 509 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
510 }
511
2c2d71f5 512#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 513 if (end_shift < 0)
6bbae5e6 514 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
515#endif
516
2c2d71f5
JH
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 520 if (flags & REXEC_SCREAM) {
cad2e5aa 521 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 522 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 523
2c2d71f5
JH
524 if (PL_screamfirst[BmRARE(check)] >= 0
525 || ( BmRARE(check) == '\n'
526 && (BmPREVIOUS(check) == SvCUR(check) - 1)
527 && SvTAIL(check) ))
9041c2e3 528 s = screaminstr(sv, check,
2c2d71f5 529 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 530 else
2c2d71f5 531 goto fail_finish;
4addbd3b
HS
532 /* we may be pointing at the wrong string */
533 if (s && RX_MATCH_COPIED(prog))
3f7c398e 534 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
535 if (data)
536 *data->scream_olds = s;
537 }
f33976b4 538 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
539 s = fbm_instr((U8*)(s + start_shift),
540 (U8*)(strend - end_shift),
7fba1cd6 541 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 542 else
1aa99e6b
IH
543 s = fbm_instr(HOP3(s, start_shift, strend),
544 HOP3(strend, -end_shift, strbeg),
7fba1cd6 545 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
546
547 /* Update the count-of-usability, remove useless subpatterns,
548 unshift s. */
2c2d71f5 549
a0288114 550 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 551 (s ? "Found" : "Did not find"),
33b8afdf 552 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 553 PL_colors[0],
7b0972df 554 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 555 SvPVX_const(check),
2c2d71f5
JH
556 PL_colors[1], (SvTAIL(check) ? "$" : ""),
557 (s ? " at offset " : "...\n") ) );
558
559 if (!s)
560 goto fail_finish;
561
6eb5f6b9
JH
562 check_at = s;
563
2c2d71f5 564 /* Finish the diagnostic message */
a3621e74 565 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
566
567 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
568 Start with the other substr.
569 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 570 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
571 *always* match. Probably should be marked during compile...
572 Probably it is right to do no SCREAM here...
573 */
574
33b8afdf 575 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 576 /* Take into account the "other" substring. */
2c2d71f5
JH
577 /* XXXX May be hopelessly wrong for UTF... */
578 if (!other_last)
6eb5f6b9 579 other_last = strpos;
33b8afdf 580 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
581 do_other_anchored:
582 {
890ce7af
AL
583 char * const last = HOP3c(s, -start_shift, strbeg);
584 char *last1, *last2;
2c2d71f5 585 char *s1 = s;
33b8afdf 586 SV* must;
2c2d71f5 587
2c2d71f5
JH
588 t = s - prog->check_offset_max;
589 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 590 && (!do_utf8
1aa99e6b 591 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 592 && t > strpos)))
30944b6d 593 /* EMPTY */;
2c2d71f5
JH
594 else
595 t = strpos;
1aa99e6b 596 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
597 if (t < other_last) /* These positions already checked */
598 t = other_last;
1aa99e6b 599 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
600 if (last < last1)
601 last1 = last;
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* On end-of-str: see comment below. */
33b8afdf
JH
604 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
605 if (must == &PL_sv_undef) {
606 s = (char*)NULL;
a3621e74 607 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
608 }
609 else
610 s = fbm_instr(
611 (unsigned char*)t,
612 HOP3(HOP3(last1, prog->anchored_offset, strend)
613 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
614 must,
7fba1cd6 615 multiline ? FBMrf_MULTILINE : 0
33b8afdf 616 );
a3621e74 617 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 618 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
619 (s ? "Found" : "Contradicts"),
620 PL_colors[0],
33b8afdf
JH
621 (int)(SvCUR(must)
622 - (SvTAIL(must)!=0)),
3f7c398e 623 SvPVX_const(must),
33b8afdf 624 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
625 if (!s) {
626 if (last1 >= last2) {
a3621e74 627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
628 ", giving up...\n"));
629 goto fail_finish;
630 }
a3621e74 631 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 632 ", trying floating at offset %ld...\n",
1aa99e6b
IH
633 (long)(HOP3c(s1, 1, strend) - i_strpos)));
634 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
635 s = HOP3c(last, 1, strend);
2c2d71f5
JH
636 goto restart;
637 }
638 else {
a3621e74 639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 640 (long)(s - i_strpos)));
1aa99e6b
IH
641 t = HOP3c(s, -prog->anchored_offset, strbeg);
642 other_last = HOP3c(s, 1, strend);
30944b6d 643 s = s1;
2c2d71f5
JH
644 if (t == strpos)
645 goto try_at_start;
2c2d71f5
JH
646 goto try_at_offset;
647 }
30944b6d 648 }
2c2d71f5
JH
649 }
650 else { /* Take into account the floating substring. */
33b8afdf
JH
651 char *last, *last1;
652 char *s1 = s;
653 SV* must;
654
655 t = HOP3c(s, -start_shift, strbeg);
656 last1 = last =
657 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
658 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
659 last = HOP3c(t, prog->float_max_offset, strend);
660 s = HOP3c(t, prog->float_min_offset, strend);
661 if (s < other_last)
662 s = other_last;
2c2d71f5 663 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
664 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
665 /* fbm_instr() takes into account exact value of end-of-str
666 if the check is SvTAIL(ed). Since false positives are OK,
667 and end-of-str is not later than strend we are OK. */
668 if (must == &PL_sv_undef) {
669 s = (char*)NULL;
a3621e74 670 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
671 }
672 else
2c2d71f5 673 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
674 (unsigned char*)last + SvCUR(must)
675 - (SvTAIL(must)!=0),
7fba1cd6 676 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
678 (s ? "Found" : "Contradicts"),
679 PL_colors[0],
680 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 681 SvPVX_const(must),
33b8afdf
JH
682 PL_colors[1], (SvTAIL(must) ? "$" : "")));
683 if (!s) {
684 if (last1 == last) {
a3621e74 685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
686 ", giving up...\n"));
687 goto fail_finish;
2c2d71f5 688 }
a3621e74 689 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
690 ", trying anchored starting at offset %ld...\n",
691 (long)(s1 + 1 - i_strpos)));
692 other_last = last;
693 s = HOP3c(t, 1, strend);
694 goto restart;
695 }
696 else {
a3621e74 697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
698 (long)(s - i_strpos)));
699 other_last = s; /* Fix this later. --Hugo */
700 s = s1;
701 if (t == strpos)
702 goto try_at_start;
703 goto try_at_offset;
704 }
2c2d71f5 705 }
cad2e5aa 706 }
2c2d71f5
JH
707
708 t = s - prog->check_offset_max;
2c2d71f5 709 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 710 && (!do_utf8
1aa99e6b
IH
711 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
712 && t > strpos))) {
2c2d71f5
JH
713 /* Fixed substring is found far enough so that the match
714 cannot start at strpos. */
715 try_at_offset:
cad2e5aa 716 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
717 /* Eventually fbm_*() should handle this, but often
718 anchored_offset is not 0, so this check will not be wasted. */
719 /* XXXX In the code below we prefer to look for "^" even in
720 presence of anchored substrings. And we search even
721 beyond the found float position. These pessimizations
722 are historical artefacts only. */
723 find_anchor:
2c2d71f5 724 while (t < strend - prog->minlen) {
cad2e5aa 725 if (*t == '\n') {
4ee3650e 726 if (t < check_at - prog->check_offset_min) {
33b8afdf 727 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
728 /* Since we moved from the found position,
729 we definitely contradict the found anchored
30944b6d
IZ
730 substr. Due to the above check we do not
731 contradict "check" substr.
732 Thus we can arrive here only if check substr
733 is float. Redo checking for "other"=="fixed".
734 */
9041c2e3 735 strpos = t + 1;
a3621e74 736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 737 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
738 goto do_other_anchored;
739 }
4ee3650e
GS
740 /* We don't contradict the found floating substring. */
741 /* XXXX Why not check for STCLASS? */
cad2e5aa 742 s = t + 1;
a3621e74 743 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 744 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
745 goto set_useful;
746 }
4ee3650e
GS
747 /* Position contradicts check-string */
748 /* XXXX probably better to look for check-string
749 than for "\n", so one should lower the limit for t? */
a3621e74 750 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 751 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 752 other_last = strpos = s = t + 1;
cad2e5aa
JH
753 goto restart;
754 }
755 t++;
756 }
a3621e74 757 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 758 PL_colors[0], PL_colors[1]));
2c2d71f5 759 goto fail_finish;
cad2e5aa 760 }
f5952150 761 else {
a3621e74 762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 763 PL_colors[0], PL_colors[1]));
f5952150 764 }
cad2e5aa
JH
765 s = t;
766 set_useful:
33b8afdf 767 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
768 }
769 else {
f5952150 770 /* The found string does not prohibit matching at strpos,
2c2d71f5 771 - no optimization of calling REx engine can be performed,
f5952150
GS
772 unless it was an MBOL and we are not after MBOL,
773 or a future STCLASS check will fail this. */
2c2d71f5
JH
774 try_at_start:
775 /* Even in this situation we may use MBOL flag if strpos is offset
776 wrt the start of the string. */
05b4157f 777 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 778 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
779 /* May be due to an implicit anchor of m{.*foo} */
780 && !(prog->reganch & ROPT_IMPLICIT))
781 {
cad2e5aa
JH
782 t = strpos;
783 goto find_anchor;
784 }
a3621e74 785 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 786 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 787 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 788 );
2c2d71f5 789 success_at_start:
30944b6d 790 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
791 && (do_utf8 ? (
792 prog->check_utf8 /* Could be deleted already */
793 && --BmUSEFUL(prog->check_utf8) < 0
794 && (prog->check_utf8 == prog->float_utf8)
795 ) : (
796 prog->check_substr /* Could be deleted already */
797 && --BmUSEFUL(prog->check_substr) < 0
798 && (prog->check_substr == prog->float_substr)
799 )))
66e933ab 800 {
cad2e5aa 801 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 802 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
803 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
804 if (do_utf8 ? prog->check_substr : prog->check_utf8)
805 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
806 prog->check_substr = prog->check_utf8 = NULL; /* disable */
807 prog->float_substr = prog->float_utf8 = NULL; /* clear */
808 check = NULL; /* abort */
cad2e5aa 809 s = strpos;
3cf5c195
IZ
810 /* XXXX This is a remnant of the old implementation. It
811 looks wasteful, since now INTUIT can use many
6eb5f6b9 812 other heuristics. */
cad2e5aa
JH
813 prog->reganch &= ~RE_USE_INTUIT;
814 }
815 else
816 s = strpos;
817 }
818
6eb5f6b9
JH
819 /* Last resort... */
820 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
821 if (prog->regstclass) {
822 /* minlen == 0 is possible if regstclass is \b or \B,
823 and the fixed substr is ''$.
824 Since minlen is already taken into account, s+1 is before strend;
825 accidentally, minlen >= 1 guaranties no false positives at s + 1
826 even for \b or \B. But (minlen? 1 : 0) below assumes that
827 regstclass does not come from lookahead... */
828 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
829 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 830 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 831 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 832 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 833 : 1);
a3b680e6 834 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 835 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 836 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
837 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
838 cl_l, strend)
839 : strend);
6eb5f6b9
JH
840
841 t = s;
06b5626a 842 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
843 if (!s) {
844#ifdef DEBUGGING
cbbf8932 845 const char *what = NULL;
6eb5f6b9
JH
846#endif
847 if (endpos == strend) {
a3621e74 848 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
849 "Could not match STCLASS...\n") );
850 goto fail;
851 }
a3621e74 852 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 853 "This position contradicts STCLASS...\n") );
653099ff
GS
854 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
855 goto fail;
6eb5f6b9 856 /* Contradict one of substrings */
33b8afdf
JH
857 if (prog->anchored_substr || prog->anchored_utf8) {
858 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 859 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 860 hop_and_restart:
1aa99e6b 861 s = HOP3c(t, 1, strend);
66e933ab
GS
862 if (s + start_shift + end_shift > strend) {
863 /* XXXX Should be taken into account earlier? */
a3621e74 864 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
865 "Could not match STCLASS...\n") );
866 goto fail;
867 }
5e39e1e5
HS
868 if (!check)
869 goto giveup;
a3621e74 870 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 871 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
872 what, (long)(s + start_shift - i_strpos)) );
873 goto restart;
874 }
66e933ab 875 /* Have both, check_string is floating */
6eb5f6b9
JH
876 if (t + start_shift >= check_at) /* Contradicts floating=check */
877 goto retry_floating_check;
878 /* Recheck anchored substring, but not floating... */
9041c2e3 879 s = check_at;
5e39e1e5
HS
880 if (!check)
881 goto giveup;
a3621e74 882 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 883 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
884 (long)(other_last - i_strpos)) );
885 goto do_other_anchored;
886 }
60e71179
GS
887 /* Another way we could have checked stclass at the
888 current position only: */
889 if (ml_anch) {
890 s = t = t + 1;
5e39e1e5
HS
891 if (!check)
892 goto giveup;
a3621e74 893 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 894 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 895 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 896 goto try_at_offset;
66e933ab 897 }
33b8afdf 898 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 899 goto fail;
6eb5f6b9
JH
900 /* Check is floating subtring. */
901 retry_floating_check:
902 t = check_at - start_shift;
a3621e74 903 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
904 goto hop_and_restart;
905 }
b7953727 906 if (t != s) {
a3621e74 907 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 908 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
909 (long)(t - i_strpos), (long)(s - i_strpos))
910 );
911 }
912 else {
a3621e74 913 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
914 "Does not contradict STCLASS...\n");
915 );
916 }
6eb5f6b9 917 }
5e39e1e5 918 giveup:
a3621e74 919 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
920 PL_colors[4], (check ? "Guessed" : "Giving up"),
921 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 922 return s;
2c2d71f5
JH
923
924 fail_finish: /* Substring not found */
33b8afdf
JH
925 if (prog->check_substr || prog->check_utf8) /* could be removed already */
926 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 927 fail:
a3621e74 928 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 929 PL_colors[4], PL_colors[5]));
bd61b366 930 return NULL;
cad2e5aa 931}
9661b544 932
6eb5f6b9 933/* We know what class REx starts with. Try to find this position... */
3c3eec57 934STATIC char *
097eb12c 935S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
a687059c 936{
27da23d5 937 dVAR;
1df70142 938 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 939 char *m;
d8093b23 940 STRLEN ln;
5dab1207 941 STRLEN lnc;
078c425b 942 register STRLEN uskip;
d8093b23
G
943 unsigned int c1;
944 unsigned int c2;
6eb5f6b9
JH
945 char *e;
946 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 947 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 948
6eb5f6b9
JH
949 /* We know what class it must start with. */
950 switch (OP(c)) {
6eb5f6b9 951 case ANYOF:
388cc4de 952 if (do_utf8) {
078c425b 953 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
954 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
955 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
956 reginclass(prog, c, (U8*)s, 0, do_utf8) :
957 REGINCLASS(prog, c, (U8*)s)) {
388cc4de
HS
958 if (tmp && (norun || regtry(prog, s)))
959 goto got_it;
960 else
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
078c425b 965 s += uskip;
388cc4de
HS
966 }
967 }
968 else {
969 while (s < strend) {
970 STRLEN skip = 1;
971
32fc9b6a 972 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
973 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
974 /* The assignment of 2 is intentional:
975 * for the folded sharp s, the skip is 2. */
976 (skip = SHARP_S_SKIP))) {
977 if (tmp && (norun || regtry(prog, s)))
978 goto got_it;
979 else
980 tmp = doevery;
981 }
982 else
983 tmp = 1;
984 s += skip;
985 }
a0d0e21e 986 }
6eb5f6b9 987 break;
f33976b4
DB
988 case CANY:
989 while (s < strend) {
990 if (tmp && (norun || regtry(prog, s)))
991 goto got_it;
992 else
993 tmp = doevery;
994 s++;
995 }
996 break;
6eb5f6b9 997 case EXACTF:
5dab1207
NIS
998 m = STRING(c);
999 ln = STR_LEN(c); /* length to match in octets/bytes */
1000 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1001 if (UTF) {
a2a2844f 1002 STRLEN ulen1, ulen2;
5dab1207 1003 U8 *sm = (U8 *) m;
89ebb4a3
JH
1004 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1005 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1006 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1007
1008 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1009 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1010
89ebb4a3 1011 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1012 0, uniflags);
89ebb4a3 1013 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1014 0, uniflags);
5dab1207
NIS
1015 lnc = 0;
1016 while (sm < ((U8 *) m + ln)) {
1017 lnc++;
1018 sm += UTF8SKIP(sm);
1019 }
1aa99e6b
IH
1020 }
1021 else {
1022 c1 = *(U8*)m;
1023 c2 = PL_fold[c1];
1024 }
6eb5f6b9
JH
1025 goto do_exactf;
1026 case EXACTFL:
5dab1207
NIS
1027 m = STRING(c);
1028 ln = STR_LEN(c);
1029 lnc = (I32) ln;
d8093b23 1030 c1 = *(U8*)m;
6eb5f6b9
JH
1031 c2 = PL_fold_locale[c1];
1032 do_exactf:
db12adc6 1033 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1034
6eb5f6b9
JH
1035 if (norun && e < s)
1036 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1037
60a8b682
JH
1038 /* The idea in the EXACTF* cases is to first find the
1039 * first character of the EXACTF* node and then, if
1040 * necessary, case-insensitively compare the full
1041 * text of the node. The c1 and c2 are the first
1042 * characters (though in Unicode it gets a bit
1043 * more complicated because there are more cases
7f16dd3d
JH
1044 * than just upper and lower: one needs to use
1045 * the so-called folding case for case-insensitive
1046 * matching (called "loose matching" in Unicode).
1047 * ibcmp_utf8() will do just that. */
60a8b682 1048
1aa99e6b 1049 if (do_utf8) {
575cac57 1050 UV c, f;
89ebb4a3 1051 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1052 STRLEN len, foldlen;
4ad0818d 1053 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1054 if (c1 == c2) {
5dab1207
NIS
1055 /* Upper and lower of 1st char are equal -
1056 * probably not a "letter". */
1aa99e6b 1057 while (s <= e) {
89ebb4a3 1058 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1059 uniflags);
80aecb99
JH
1060 if ( c == c1
1061 && (ln == len ||
66423254 1062 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1063 m, (char **)0, ln, (bool)UTF))
55da9344 1064 && (norun || regtry(prog, s)) )
1aa99e6b 1065 goto got_it;
80aecb99 1066 else {
1df70142 1067 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1068 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1069 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1070 if ( f != c
1071 && (f == c1 || f == c2)
1072 && (ln == foldlen ||
66423254
JH
1073 !ibcmp_utf8((char *) foldbuf,
1074 (char **)0, foldlen, do_utf8,
d07ddd77 1075 m,
eb160463 1076 (char **)0, ln, (bool)UTF))
80aecb99
JH
1077 && (norun || regtry(prog, s)) )
1078 goto got_it;
1079 }
1aa99e6b
IH
1080 s += len;
1081 }
09091399
JH
1082 }
1083 else {
1aa99e6b 1084 while (s <= e) {
89ebb4a3 1085 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1086 uniflags);
80aecb99 1087
60a8b682 1088 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1089 * Note that not all the possible combinations
1090 * are handled here: some of them are handled
1091 * by the standard folding rules, and some of
1092 * them (the character class or ANYOF cases)
1093 * are handled during compiletime in
1094 * regexec.c:S_regclass(). */
880bd946
JH
1095 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1096 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1097 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1098
1099 if ( (c == c1 || c == c2)
1100 && (ln == len ||
66423254 1101 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1102 m, (char **)0, ln, (bool)UTF))
55da9344 1103 && (norun || regtry(prog, s)) )
1aa99e6b 1104 goto got_it;
80aecb99 1105 else {
1df70142 1106 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1107 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1108 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1109 if ( f != c
1110 && (f == c1 || f == c2)
1111 && (ln == foldlen ||
a6872d42 1112 !ibcmp_utf8((char *) foldbuf,
66423254 1113 (char **)0, foldlen, do_utf8,
d07ddd77 1114 m,
eb160463 1115 (char **)0, ln, (bool)UTF))
80aecb99
JH
1116 && (norun || regtry(prog, s)) )
1117 goto got_it;
1118 }
1aa99e6b
IH
1119 s += len;
1120 }
09091399 1121 }
1aa99e6b
IH
1122 }
1123 else {
1124 if (c1 == c2)
1125 while (s <= e) {
1126 if ( *(U8*)s == c1
1127 && (ln == 1 || !(OP(c) == EXACTF
1128 ? ibcmp(s, m, ln)
1129 : ibcmp_locale(s, m, ln)))
1130 && (norun || regtry(prog, s)) )
1131 goto got_it;
1132 s++;
1133 }
1134 else
1135 while (s <= e) {
1136 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1137 && (ln == 1 || !(OP(c) == EXACTF
1138 ? ibcmp(s, m, ln)
1139 : ibcmp_locale(s, m, ln)))
1140 && (norun || regtry(prog, s)) )
1141 goto got_it;
1142 s++;
1143 }
b3c9acc1
IZ
1144 }
1145 break;
bbce6d69 1146 case BOUNDL:
3280af22 1147 PL_reg_flags |= RF_tainted;
bbce6d69 1148 /* FALL THROUGH */
a0d0e21e 1149 case BOUND:
ffc61ed2 1150 if (do_utf8) {
12d33761 1151 if (s == PL_bostr)
ffc61ed2
JH
1152 tmp = '\n';
1153 else {
6136c704 1154 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1155 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1156 }
1157 tmp = ((OP(c) == BOUND ?
9041c2e3 1158 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1159 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1160 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1161 if (tmp == !(OP(c) == BOUND ?
3568d838 1162 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1163 isALNUM_LC_utf8((U8*)s)))
1164 {
1165 tmp = !tmp;
1166 if ((norun || regtry(prog, s)))
1167 goto got_it;
1168 }
078c425b 1169 s += uskip;
a687059c 1170 }
a0d0e21e 1171 }
667bb95a 1172 else {
12d33761 1173 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1174 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1175 while (s < strend) {
1176 if (tmp ==
1177 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1178 tmp = !tmp;
1179 if ((norun || regtry(prog, s)))
1180 goto got_it;
1181 }
1182 s++;
a0ed51b3 1183 }
a0ed51b3 1184 }
6eb5f6b9 1185 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1186 goto got_it;
1187 break;
bbce6d69 1188 case NBOUNDL:
3280af22 1189 PL_reg_flags |= RF_tainted;
bbce6d69 1190 /* FALL THROUGH */
a0d0e21e 1191 case NBOUND:
ffc61ed2 1192 if (do_utf8) {
12d33761 1193 if (s == PL_bostr)
ffc61ed2
JH
1194 tmp = '\n';
1195 else {
6136c704 1196 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1197 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1198 }
1199 tmp = ((OP(c) == NBOUND ?
9041c2e3 1200 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1201 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1202 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1203 if (tmp == !(OP(c) == NBOUND ?
3568d838 1204 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1205 isALNUM_LC_utf8((U8*)s)))
1206 tmp = !tmp;
1207 else if ((norun || regtry(prog, s)))
1208 goto got_it;
078c425b 1209 s += uskip;
ffc61ed2 1210 }
a0d0e21e 1211 }
667bb95a 1212 else {
12d33761 1213 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1214 tmp = ((OP(c) == NBOUND ?
1215 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1216 while (s < strend) {
1217 if (tmp ==
1218 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1219 tmp = !tmp;
1220 else if ((norun || regtry(prog, s)))
1221 goto got_it;
1222 s++;
1223 }
a0ed51b3 1224 }
6eb5f6b9 1225 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1226 goto got_it;
1227 break;
a0d0e21e 1228 case ALNUM:
ffc61ed2 1229 if (do_utf8) {
1a4fad37 1230 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1231 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1232 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
bbce6d69 1238 else
ffc61ed2 1239 tmp = 1;
078c425b 1240 s += uskip;
bbce6d69 1241 }
bbce6d69 1242 }
ffc61ed2
JH
1243 else {
1244 while (s < strend) {
1245 if (isALNUM(*s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1247 goto got_it;
1248 else
1249 tmp = doevery;
1250 }
a0ed51b3 1251 else
ffc61ed2
JH
1252 tmp = 1;
1253 s++;
a0ed51b3 1254 }
a0ed51b3
LW
1255 }
1256 break;
bbce6d69 1257 case ALNUML:
3280af22 1258 PL_reg_flags |= RF_tainted;
ffc61ed2 1259 if (do_utf8) {
078c425b 1260 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1261 if (isALNUM_LC_utf8((U8*)s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a687059c 1267 else
ffc61ed2 1268 tmp = 1;
078c425b 1269 s += uskip;
a0d0e21e 1270 }
a0d0e21e 1271 }
ffc61ed2
JH
1272 else {
1273 while (s < strend) {
1274 if (isALNUM_LC(*s)) {
1275 if (tmp && (norun || regtry(prog, s)))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
a0ed51b3 1280 else
ffc61ed2
JH
1281 tmp = 1;
1282 s++;
a0ed51b3 1283 }
a0ed51b3
LW
1284 }
1285 break;
a0d0e21e 1286 case NALNUM:
ffc61ed2 1287 if (do_utf8) {
1a4fad37 1288 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1289 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1290 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
bbce6d69 1296 else
ffc61ed2 1297 tmp = 1;
078c425b 1298 s += uskip;
bbce6d69 1299 }
bbce6d69 1300 }
ffc61ed2
JH
1301 else {
1302 while (s < strend) {
1303 if (!isALNUM(*s)) {
1304 if (tmp && (norun || regtry(prog, s)))
1305 goto got_it;
1306 else
1307 tmp = doevery;
1308 }
a0ed51b3 1309 else
ffc61ed2
JH
1310 tmp = 1;
1311 s++;
a0ed51b3 1312 }
a0ed51b3
LW
1313 }
1314 break;
bbce6d69 1315 case NALNUML:
3280af22 1316 PL_reg_flags |= RF_tainted;
ffc61ed2 1317 if (do_utf8) {
078c425b 1318 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1319 if (!isALNUM_LC_utf8((U8*)s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a687059c 1325 else
ffc61ed2 1326 tmp = 1;
078c425b 1327 s += uskip;
a687059c 1328 }
a0d0e21e 1329 }
ffc61ed2
JH
1330 else {
1331 while (s < strend) {
1332 if (!isALNUM_LC(*s)) {
1333 if (tmp && (norun || regtry(prog, s)))
1334 goto got_it;
1335 else
1336 tmp = doevery;
1337 }
a0ed51b3 1338 else
ffc61ed2
JH
1339 tmp = 1;
1340 s++;
a0ed51b3 1341 }
a0ed51b3
LW
1342 }
1343 break;
a0d0e21e 1344 case SPACE:
ffc61ed2 1345 if (do_utf8) {
1a4fad37 1346 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1347 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1348 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0d0e21e 1354 else
ffc61ed2 1355 tmp = 1;
078c425b 1356 s += uskip;
2304df62 1357 }
a0d0e21e 1358 }
ffc61ed2
JH
1359 else {
1360 while (s < strend) {
1361 if (isSPACE(*s)) {
1362 if (tmp && (norun || regtry(prog, s)))
1363 goto got_it;
1364 else
1365 tmp = doevery;
1366 }
a0ed51b3 1367 else
ffc61ed2
JH
1368 tmp = 1;
1369 s++;
a0ed51b3 1370 }
a0ed51b3
LW
1371 }
1372 break;
bbce6d69 1373 case SPACEL:
3280af22 1374 PL_reg_flags |= RF_tainted;
ffc61ed2 1375 if (do_utf8) {
078c425b 1376 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1377 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1378 if (tmp && (norun || regtry(prog, s)))
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
bbce6d69 1383 else
ffc61ed2 1384 tmp = 1;
078c425b 1385 s += uskip;
bbce6d69 1386 }
bbce6d69 1387 }
ffc61ed2
JH
1388 else {
1389 while (s < strend) {
1390 if (isSPACE_LC(*s)) {
1391 if (tmp && (norun || regtry(prog, s)))
1392 goto got_it;
1393 else
1394 tmp = doevery;
1395 }
a0ed51b3 1396 else
ffc61ed2
JH
1397 tmp = 1;
1398 s++;
a0ed51b3 1399 }
a0ed51b3
LW
1400 }
1401 break;
a0d0e21e 1402 case NSPACE:
ffc61ed2 1403 if (do_utf8) {
1a4fad37 1404 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1405 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1406 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1407 if (tmp && (norun || regtry(prog, s)))
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0d0e21e 1412 else
ffc61ed2 1413 tmp = 1;
078c425b 1414 s += uskip;
a687059c 1415 }
a0d0e21e 1416 }
ffc61ed2
JH
1417 else {
1418 while (s < strend) {
1419 if (!isSPACE(*s)) {
1420 if (tmp && (norun || regtry(prog, s)))
1421 goto got_it;
1422 else
1423 tmp = doevery;
1424 }
a0ed51b3 1425 else
ffc61ed2
JH
1426 tmp = 1;
1427 s++;
a0ed51b3 1428 }
a0ed51b3
LW
1429 }
1430 break;
bbce6d69 1431 case NSPACEL:
3280af22 1432 PL_reg_flags |= RF_tainted;
ffc61ed2 1433 if (do_utf8) {
078c425b 1434 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1435 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1436 if (tmp && (norun || regtry(prog, s)))
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
bbce6d69 1441 else
ffc61ed2 1442 tmp = 1;
078c425b 1443 s += uskip;
bbce6d69 1444 }
bbce6d69 1445 }
ffc61ed2
JH
1446 else {
1447 while (s < strend) {
1448 if (!isSPACE_LC(*s)) {
1449 if (tmp && (norun || regtry(prog, s)))
1450 goto got_it;
1451 else
1452 tmp = doevery;
1453 }
a0ed51b3 1454 else
ffc61ed2
JH
1455 tmp = 1;
1456 s++;
a0ed51b3 1457 }
a0ed51b3
LW
1458 }
1459 break;
a0d0e21e 1460 case DIGIT:
ffc61ed2 1461 if (do_utf8) {
1a4fad37 1462 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1463 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1464 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1465 if (tmp && (norun || regtry(prog, s)))
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0d0e21e 1470 else
ffc61ed2 1471 tmp = 1;
078c425b 1472 s += uskip;
2b69d0c2 1473 }
a0d0e21e 1474 }
ffc61ed2
JH
1475 else {
1476 while (s < strend) {
1477 if (isDIGIT(*s)) {
1478 if (tmp && (norun || regtry(prog, s)))
1479 goto got_it;
1480 else
1481 tmp = doevery;
1482 }
a0ed51b3 1483 else
ffc61ed2
JH
1484 tmp = 1;
1485 s++;
a0ed51b3 1486 }
a0ed51b3
LW
1487 }
1488 break;
b8c5462f
JH
1489 case DIGITL:
1490 PL_reg_flags |= RF_tainted;
ffc61ed2 1491 if (do_utf8) {
078c425b 1492 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1493 if (isDIGIT_LC_utf8((U8*)s)) {
1494 if (tmp && (norun || regtry(prog, s)))
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
b8c5462f 1499 else
ffc61ed2 1500 tmp = 1;
078c425b 1501 s += uskip;
b8c5462f 1502 }
b8c5462f 1503 }
ffc61ed2
JH
1504 else {
1505 while (s < strend) {
1506 if (isDIGIT_LC(*s)) {
1507 if (tmp && (norun || regtry(prog, s)))
1508 goto got_it;
1509 else
1510 tmp = doevery;
1511 }
b8c5462f 1512 else
ffc61ed2
JH
1513 tmp = 1;
1514 s++;
b8c5462f 1515 }
b8c5462f
JH
1516 }
1517 break;
a0d0e21e 1518 case NDIGIT:
ffc61ed2 1519 if (do_utf8) {
1a4fad37 1520 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1521 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1522 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1523 if (tmp && (norun || regtry(prog, s)))
1524 goto got_it;
1525 else
1526 tmp = doevery;
1527 }
a0d0e21e 1528 else
ffc61ed2 1529 tmp = 1;
078c425b 1530 s += uskip;
a687059c 1531 }
a0d0e21e 1532 }
ffc61ed2
JH
1533 else {
1534 while (s < strend) {
1535 if (!isDIGIT(*s)) {
1536 if (tmp && (norun || regtry(prog, s)))
1537 goto got_it;
1538 else
1539 tmp = doevery;
1540 }
a0ed51b3 1541 else
ffc61ed2
JH
1542 tmp = 1;
1543 s++;
a0ed51b3 1544 }
a0ed51b3
LW
1545 }
1546 break;
b8c5462f
JH
1547 case NDIGITL:
1548 PL_reg_flags |= RF_tainted;
ffc61ed2 1549 if (do_utf8) {
078c425b 1550 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1551 if (!isDIGIT_LC_utf8((U8*)s)) {
1552 if (tmp && (norun || regtry(prog, s)))
1553 goto got_it;
1554 else
1555 tmp = doevery;
1556 }
b8c5462f 1557 else
ffc61ed2 1558 tmp = 1;
078c425b 1559 s += uskip;
b8c5462f 1560 }
a0ed51b3 1561 }
ffc61ed2
JH
1562 else {
1563 while (s < strend) {
1564 if (!isDIGIT_LC(*s)) {
1565 if (tmp && (norun || regtry(prog, s)))
1566 goto got_it;
1567 else
1568 tmp = doevery;
1569 }
cf93c79d 1570 else
ffc61ed2
JH
1571 tmp = 1;
1572 s++;
b8c5462f 1573 }
b8c5462f
JH
1574 }
1575 break;
b3c9acc1 1576 default:
3c3eec57
GS
1577 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1578 break;
d6a28714 1579 }
6eb5f6b9
JH
1580 return 0;
1581 got_it:
1582 return s;
1583}
1584
1585/*
1586 - regexec_flags - match a regexp against a string
1587 */
1588I32
1589Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1590 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1591/* strend: pointer to null at end of string */
1592/* strbeg: real beginning of string */
1593/* minend: end of match must be >=minend after stringarg. */
1594/* data: May be used for some additional optimizations. */
1595/* nosave: For optimizations. */
1596{
97aff369 1597 dVAR;
6eb5f6b9
JH
1598 register char *s;
1599 register regnode *c;
1600 register char *startpos = stringarg;
6eb5f6b9
JH
1601 I32 minlen; /* must match at least this many chars */
1602 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1603 I32 end_shift = 0; /* Same for the end. */ /* CC */
1604 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1605 char *scream_olds = NULL;
6eb5f6b9 1606 SV* oreplsv = GvSV(PL_replgv);
1df70142 1607 const bool do_utf8 = DO_UTF8(sv);
2757e526 1608 I32 multiline;
2a782b5b 1609#ifdef DEBUGGING
2757e526
JH
1610 SV* dsv0;
1611 SV* dsv1;
2a782b5b 1612#endif
a3621e74
YO
1613
1614 GET_RE_DEBUG_FLAGS_DECL;
1615
9d4ba2ae 1616 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1617
1618 /* Be paranoid... */
1619 if (prog == NULL || startpos == NULL) {
1620 Perl_croak(aTHX_ "NULL regexp parameter");
1621 return 0;
1622 }
1623
2757e526
JH
1624 multiline = prog->reganch & PMf_MULTILINE;
1625
1626#ifdef DEBUGGING
1627 dsv0 = PERL_DEBUG_PAD_ZERO(0);
1628 dsv1 = PERL_DEBUG_PAD_ZERO(1);
1629#endif
1630
bac06658
JH
1631#ifdef DEBUGGING
1632 PL_regnarrate = DEBUG_r_TEST;
1633#endif
1634
1635 RX_MATCH_UTF8_set(prog, do_utf8);
1636
6eb5f6b9 1637 minlen = prog->minlen;
61a36c01 1638 if (strend - startpos < minlen) {
a3621e74 1639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1640 "String too short [regexec_flags]...\n"));
1641 goto phooey;
1aa99e6b 1642 }
6eb5f6b9 1643
6eb5f6b9
JH
1644 /* Check validity of program. */
1645 if (UCHARAT(prog->program) != REG_MAGIC) {
1646 Perl_croak(aTHX_ "corrupted regexp program");
1647 }
1648
1649 PL_reg_flags = 0;
1650 PL_reg_eval_set = 0;
1651 PL_reg_maxiter = 0;
1652
1653 if (prog->reganch & ROPT_UTF8)
1654 PL_reg_flags |= RF_utf8;
1655
1656 /* Mark beginning of line for ^ and lookbehind. */
1657 PL_regbol = startpos;
1658 PL_bostr = strbeg;
1659 PL_reg_sv = sv;
1660
1661 /* Mark end of line for $ (and such) */
1662 PL_regeol = strend;
1663
1664 /* see how far we have to get to not match where we matched before */
1665 PL_regtill = startpos+minend;
1666
6eb5f6b9
JH
1667 /* If there is a "must appear" string, look for it. */
1668 s = startpos;
1669
1670 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1671 MAGIC *mg;
1672
1673 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1674 PL_reg_ganch = startpos;
1675 else if (sv && SvTYPE(sv) >= SVt_PVMG
1676 && SvMAGIC(sv)
14befaf4
DM
1677 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1678 && mg->mg_len >= 0) {
6eb5f6b9
JH
1679 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1680 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1681 if (s > PL_reg_ganch)
6eb5f6b9
JH
1682 goto phooey;
1683 s = PL_reg_ganch;
1684 }
1685 }
1686 else /* pos() not defined */
1687 PL_reg_ganch = strbeg;
1688 }
1689
a0714e2c 1690 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1691 re_scream_pos_data d;
1692
1693 d.scream_olds = &scream_olds;
1694 d.scream_pos = &scream_pos;
1695 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1696 if (!s) {
a3621e74 1697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1698 goto phooey; /* not present */
3fa9c3d7 1699 }
6eb5f6b9
JH
1700 }
1701
a3621e74 1702 DEBUG_EXECUTE_r({
1df70142
AL
1703 const char * const s0 = UTF
1704 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1705 UNI_DISPLAY_REGEX)
1706 : prog->precomp;
1707 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1708 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1709 UNI_DISPLAY_REGEX) : startpos;
1df70142 1710 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1711 if (!PL_colorset)
1712 reginitcolors();
1713 PerlIO_printf(Perl_debug_log,
a0288114 1714 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1715 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1716 len0, len0, s0,
2a782b5b 1717 PL_colors[1],
9e55ce06 1718 len0 > 60 ? "..." : "",
2a782b5b 1719 PL_colors[0],
9e55ce06
JH
1720 (int)(len1 > 60 ? 60 : len1),
1721 s1, PL_colors[1],
1722 (len1 > 60 ? "..." : "")
2a782b5b
JH
1723 );
1724 });
6eb5f6b9
JH
1725
1726 /* Simplest case: anchored match need be tried only once. */
1727 /* [unless only anchor is BOL and multiline is set] */
1728 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1729 if (s == startpos && regtry(prog, startpos))
1730 goto got_it;
7fba1cd6 1731 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1732 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1733 {
1734 char *end;
1735
1736 if (minlen)
1737 dontbother = minlen - 1;
1aa99e6b 1738 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1739 /* for multiline we only have to try after newlines */
33b8afdf 1740 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1741 if (s == startpos)
1742 goto after_try;
1743 while (1) {
1744 if (regtry(prog, s))
1745 goto got_it;
1746 after_try:
1747 if (s >= end)
1748 goto phooey;
1749 if (prog->reganch & RE_USE_INTUIT) {
1750 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1751 if (!s)
1752 goto phooey;
1753 }
1754 else
1755 s++;
1756 }
1757 } else {
1758 if (s > startpos)
1759 s--;
1760 while (s < end) {
1761 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1762 if (regtry(prog, s))
1763 goto got_it;
1764 }
1765 }
1766 }
1767 }
1768 goto phooey;
1769 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1770 if (regtry(prog, PL_reg_ganch))
1771 goto got_it;
1772 goto phooey;
1773 }
1774
1775 /* Messy cases: unanchored match. */
33b8afdf 1776 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1777 /* we have /x+whatever/ */
1778 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1779 char ch;
bf93d4cc
GS
1780#ifdef DEBUGGING
1781 int did_match = 0;
1782#endif
33b8afdf
JH
1783 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1784 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1785 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1786
1aa99e6b 1787 if (do_utf8) {
6eb5f6b9
JH
1788 while (s < strend) {
1789 if (*s == ch) {
a3621e74 1790 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1791 if (regtry(prog, s)) goto got_it;
1792 s += UTF8SKIP(s);
1793 while (s < strend && *s == ch)
1794 s += UTF8SKIP(s);
1795 }
1796 s += UTF8SKIP(s);
1797 }
1798 }
1799 else {
1800 while (s < strend) {
1801 if (*s == ch) {
a3621e74 1802 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1803 if (regtry(prog, s)) goto got_it;
1804 s++;
1805 while (s < strend && *s == ch)
1806 s++;
1807 }
1808 s++;
1809 }
1810 }
a3621e74 1811 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1812 PerlIO_printf(Perl_debug_log,
b7953727
JH
1813 "Did not find anchored character...\n")
1814 );
6eb5f6b9 1815 }
a0714e2c
SS
1816 else if (prog->anchored_substr != NULL
1817 || prog->anchored_utf8 != NULL
1818 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1819 && prog->float_max_offset < strend - s)) {
1820 SV *must;
1821 I32 back_max;
1822 I32 back_min;
1823 char *last;
6eb5f6b9 1824 char *last1; /* Last position checked before */
bf93d4cc
GS
1825#ifdef DEBUGGING
1826 int did_match = 0;
1827#endif
33b8afdf
JH
1828 if (prog->anchored_substr || prog->anchored_utf8) {
1829 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1830 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1831 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1832 back_max = back_min = prog->anchored_offset;
1833 } else {
1834 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1835 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1836 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1837 back_max = prog->float_max_offset;
1838 back_min = prog->float_min_offset;
1839 }
1840 if (must == &PL_sv_undef)
1841 /* could not downgrade utf8 check substring, so must fail */
1842 goto phooey;
1843
1844 last = HOP3c(strend, /* Cannot start after this */
1845 -(I32)(CHR_SVLEN(must)
1846 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1847
1848 if (s > PL_bostr)
1849 last1 = HOPc(s, -1);
1850 else
1851 last1 = s - 1; /* bogus */
1852
a0288114 1853 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1854 check_substr==must. */
1855 scream_pos = -1;
1856 dontbother = end_shift;
1857 strend = HOPc(strend, -dontbother);
1858 while ( (s <= last) &&
9041c2e3 1859 ((flags & REXEC_SCREAM)
1aa99e6b 1860 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1861 end_shift, &scream_pos, 0))
1aa99e6b 1862 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1863 (unsigned char*)strend, must,
7fba1cd6 1864 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1865 /* we may be pointing at the wrong string */
1866 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1867 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1868 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1869 if (HOPc(s, -back_max) > last1) {
1870 last1 = HOPc(s, -back_min);
1871 s = HOPc(s, -back_max);
1872 }
1873 else {
52657f30 1874 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1875
1876 last1 = HOPc(s, -back_min);
52657f30 1877 s = t;
6eb5f6b9 1878 }
1aa99e6b 1879 if (do_utf8) {
6eb5f6b9
JH
1880 while (s <= last1) {
1881 if (regtry(prog, s))
1882 goto got_it;
1883 s += UTF8SKIP(s);
1884 }
1885 }
1886 else {
1887 while (s <= last1) {
1888 if (regtry(prog, s))
1889 goto got_it;
1890 s++;
1891 }
1892 }
1893 }
a3621e74 1894 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1895 PerlIO_printf(Perl_debug_log,
a0288114 1896 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1897 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1898 ? "anchored" : "floating"),
1899 PL_colors[0],
1900 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1901 SvPVX_const(must),
b7953727
JH
1902 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1903 );
6eb5f6b9
JH
1904 goto phooey;
1905 }
155aba94 1906 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1907 if (minlen) {
1908 I32 op = (U8)OP(prog->regstclass);
66e933ab 1909 /* don't bother with what can't match */
f14c76ed
RGS
1910 if (PL_regkind[op] != EXACT && op != CANY)
1911 strend = HOPc(strend, -(minlen - 1));
1912 }
a3621e74 1913 DEBUG_EXECUTE_r({
ffc61ed2 1914 SV *prop = sv_newmortal();
cfd0369c
NC
1915 const char *s0;
1916 const char *s1;
9e55ce06
JH
1917 int len0;
1918 int len1;
1919
32fc9b6a 1920 regprop(prog, prop, c);
9e55ce06 1921 s0 = UTF ?
3f7c398e 1922 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1923 UNI_DISPLAY_REGEX) :
cfd0369c 1924 SvPVX_const(prop);
9e55ce06
JH
1925 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1926 s1 = UTF ?
c728cb41 1927 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1928 len1 = UTF ? SvCUR(dsv1) : strend - s;
1929 PerlIO_printf(Perl_debug_log,
a0288114 1930 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1931 len0, len0, s0,
1932 len1, len1, s1);
ffc61ed2 1933 });
06b5626a 1934 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1935 goto got_it;
a3621e74 1936 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1937 }
1938 else {
1939 dontbother = 0;
a0714e2c 1940 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1941 /* Trim the end. */
d6a28714 1942 char *last;
33b8afdf
JH
1943 SV* float_real;
1944
1945 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1946 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1947 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1948
1949 if (flags & REXEC_SCREAM) {
33b8afdf 1950 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1951 end_shift, &scream_pos, 1); /* last one */
1952 if (!last)
ffc61ed2 1953 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1954 /* we may be pointing at the wrong string */
1955 else if (RX_MATCH_COPIED(prog))
3f7c398e 1956 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1957 }
d6a28714
JH
1958 else {
1959 STRLEN len;
cfd0369c 1960 const char * const little = SvPV_const(float_real, len);
d6a28714 1961
33b8afdf 1962 if (SvTAIL(float_real)) {
d6a28714
JH
1963 if (memEQ(strend - len + 1, little, len - 1))
1964 last = strend - len + 1;
7fba1cd6 1965 else if (!multiline)
9041c2e3 1966 last = memEQ(strend - len, little, len)
bd61b366 1967 ? strend - len : NULL;
b8c5462f 1968 else
d6a28714
JH
1969 goto find_last;
1970 } else {
1971 find_last:
9041c2e3 1972 if (len)
d6a28714 1973 last = rninstr(s, strend, little, little + len);
b8c5462f 1974 else
a0288114 1975 last = strend; /* matching "$" */
b8c5462f 1976 }
b8c5462f 1977 }
bf93d4cc 1978 if (last == NULL) {
a3621e74 1979 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1980 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1981 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1982 goto phooey; /* Should not happen! */
1983 }
d6a28714
JH
1984 dontbother = strend - last + prog->float_min_offset;
1985 }
1986 if (minlen && (dontbother < minlen))
1987 dontbother = minlen - 1;
1988 strend -= dontbother; /* this one's always in bytes! */
1989 /* We don't know much -- general case. */
1aa99e6b 1990 if (do_utf8) {
d6a28714
JH
1991 for (;;) {
1992 if (regtry(prog, s))
1993 goto got_it;
1994 if (s >= strend)
1995 break;
b8c5462f 1996 s += UTF8SKIP(s);
d6a28714
JH
1997 };
1998 }
1999 else {
2000 do {
2001 if (regtry(prog, s))
2002 goto got_it;
2003 } while (s++ < strend);
2004 }
2005 }
2006
2007 /* Failure. */
2008 goto phooey;
2009
2010got_it:
2011 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2012
2013 if (PL_reg_eval_set) {
2014 /* Preserve the current value of $^R */
2015 if (oreplsv != GvSV(PL_replgv))
2016 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2017 restored, the value remains
2018 the same. */
4f639d21 2019 restore_pos(aTHX_ prog);
d6a28714
JH
2020 }
2021
2022 /* make sure $`, $&, $', and $digit will work later */
2023 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2024 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2025 if (flags & REXEC_COPY_STR) {
2026 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2027#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2028 if ((SvIsCOW(sv)
2029 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2030 if (DEBUG_C_TEST) {
2031 PerlIO_printf(Perl_debug_log,
2032 "Copy on write: regexp capture, type %d\n",
2033 (int) SvTYPE(sv));
2034 }
2035 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2036 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2037 assert (SvPOKp(prog->saved_copy));
2038 } else
2039#endif
2040 {
2041 RX_MATCH_COPIED_on(prog);
2042 s = savepvn(strbeg, i);
2043 prog->subbeg = s;
2044 }
d6a28714 2045 prog->sublen = i;
d6a28714
JH
2046 }
2047 else {
2048 prog->subbeg = strbeg;
2049 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2050 }
2051 }
9041c2e3 2052
d6a28714
JH
2053 return 1;
2054
2055phooey:
a3621e74 2056 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2057 PL_colors[4], PL_colors[5]));
d6a28714 2058 if (PL_reg_eval_set)
4f639d21 2059 restore_pos(aTHX_ prog);
d6a28714
JH
2060 return 0;
2061}
2062
2063/*
2064 - regtry - try match at specific point
2065 */
2066STATIC I32 /* 0 failure, 1 success */
2067S_regtry(pTHX_ regexp *prog, char *startpos)
2068{
97aff369 2069 dVAR;
d6a28714
JH
2070 register I32 *sp;
2071 register I32 *ep;
2072 CHECKPOINT lastcp;
a3621e74 2073 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2074
02db2b7b
IZ
2075#ifdef DEBUGGING
2076 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2077#endif
d6a28714
JH
2078 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2079 MAGIC *mg;
2080
2081 PL_reg_eval_set = RS_init;
a3621e74 2082 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2083 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2084 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2085 ));
e8347627 2086 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2087 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2088 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2089 SAVETMPS;
2090 /* Apparently this is not needed, judging by wantarray. */
e8347627 2091 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2092 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2093
2094 if (PL_reg_sv) {
2095 /* Make $_ available to executed code. */
2096 if (PL_reg_sv != DEFSV) {
59f00321 2097 SAVE_DEFSV;
d6a28714 2098 DEFSV = PL_reg_sv;
b8c5462f 2099 }
d6a28714 2100
9041c2e3 2101 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2102 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2103 /* prepare for quick setting of pos */
d300d9fa
NC
2104#ifdef PERL_OLD_COPY_ON_WRITE
2105 if (SvIsCOW(sv))
2106 sv_force_normal_flags(sv, 0);
2107#endif
2108 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2109 &PL_vtbl_mglob, NULL, 0);
d6a28714 2110 mg->mg_len = -1;
b8c5462f 2111 }
d6a28714
JH
2112 PL_reg_magic = mg;
2113 PL_reg_oldpos = mg->mg_len;
4f639d21 2114 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2115 }
09687e5a 2116 if (!PL_reg_curpm) {
a02a5408 2117 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2118#ifdef USE_ITHREADS
2119 {
2120 SV* repointer = newSViv(0);
577e12cc 2121 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2122 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2123 av_push(PL_regex_padav,repointer);
2124 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2125 PL_regex_pad = AvARRAY(PL_regex_padav);
2126 }
2127#endif
2128 }
aaa362c4 2129 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2130 PL_reg_oldcurpm = PL_curpm;
2131 PL_curpm = PL_reg_curpm;
2132 if (RX_MATCH_COPIED(prog)) {
2133 /* Here is a serious problem: we cannot rewrite subbeg,
2134 since it may be needed if this match fails. Thus
2135 $` inside (?{}) could fail... */
2136 PL_reg_oldsaved = prog->subbeg;
2137 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2138#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2139 PL_nrs = prog->saved_copy;
2140#endif
d6a28714
JH
2141 RX_MATCH_COPIED_off(prog);
2142 }
2143 else
bd61b366 2144 PL_reg_oldsaved = NULL;
d6a28714
JH
2145 prog->subbeg = PL_bostr;
2146 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2147 }
973dddac 2148 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2149 PL_reginput = startpos;
2150 PL_regstartp = prog->startp;
2151 PL_regendp = prog->endp;
2152 PL_reglastparen = &prog->lastparen;
a01268b5 2153 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2154 prog->lastparen = 0;
03994de8 2155 prog->lastcloseparen = 0;
d6a28714 2156 PL_regsize = 0;
a3621e74 2157 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2158 if (PL_reg_start_tmpl <= prog->nparens) {
2159 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2160 if(PL_reg_start_tmp)
2161 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2162 else
a02a5408 2163 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2164 }
2165
2166 /* XXXX What this code is doing here?!!! There should be no need
2167 to do this again and again, PL_reglastparen should take care of
3dd2943c 2168 this! --ilya*/
dafc8851
JH
2169
2170 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2171 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2172 * PL_reglastparen), is not needed at all by the test suite
2173 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2174 * enough, for building DynaLoader, or otherwise this
2175 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2176 * will happen. Meanwhile, this code *is* needed for the
2177 * above-mentioned test suite tests to succeed. The common theme
2178 * on those tests seems to be returning null fields from matches.
2179 * --jhi */
dafc8851 2180#if 1
d6a28714
JH
2181 sp = prog->startp;
2182 ep = prog->endp;
2183 if (prog->nparens) {
097eb12c 2184 register I32 i;
eb160463 2185 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2186 *++sp = -1;
2187 *++ep = -1;
2188 }
2189 }
dafc8851 2190#endif
02db2b7b 2191 REGCP_SET(lastcp);
4f639d21 2192 if (regmatch(prog, prog->program + 1)) {
d6a28714
JH
2193 prog->endp[0] = PL_reginput - PL_bostr;
2194 return 1;
2195 }
02db2b7b 2196 REGCP_UNWIND(lastcp);
d6a28714
JH
2197 return 0;
2198}
2199
02db2b7b
IZ
2200#define RE_UNWIND_BRANCH 1
2201#define RE_UNWIND_BRANCHJ 2
2202
2203union re_unwind_t;
2204
2205typedef struct { /* XX: makes sense to enlarge it... */
2206 I32 type;
2207 I32 prev;
2208 CHECKPOINT lastcp;
2209} re_unwind_generic_t;
2210
2211typedef struct {
2212 I32 type;
2213 I32 prev;
2214 CHECKPOINT lastcp;
2215 I32 lastparen;
2216 regnode *next;
2217 char *locinput;
2218 I32 nextchr;
3a2830be 2219 int minmod;
02db2b7b
IZ
2220#ifdef DEBUGGING
2221 int regindent;
2222#endif
2223} re_unwind_branch_t;
2224
2225typedef union re_unwind_t {
2226 I32 type;
2227 re_unwind_generic_t generic;
2228 re_unwind_branch_t branch;
2229} re_unwind_t;
2230
8ba1375e
MJD
2231#define sayYES goto yes
2232#define sayNO goto no
e0f9d4a8 2233#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2234#define sayYES_FINAL goto yes_final
2235#define sayYES_LOUD goto yes_loud
2236#define sayNO_FINAL goto no_final
2237#define sayNO_SILENT goto do_no
2238#define saySAME(x) if (x) goto yes; else goto no
2239
3ab3c9b4
HS
2240#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2241#define POSCACHE_SEEN 1 /* we know what we're caching */
2242#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2243#define CACHEsayYES STMT_START { \
d8319b27 2244 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2245 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2246 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2247 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2248 /* cache records failure, but this is success */ \
2249 DEBUG_r( \
2250 PerlIO_printf(Perl_debug_log, \
2251 "%*s (remove success from failure cache)\n", \
2252 REPORT_CODE_OFF+PL_regindent*2, "") \
2253 ); \
d8319b27 2254 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2255 } \
2256 } \
2257 sayYES; \
2258} STMT_END
2259#define CACHEsayNO STMT_START { \
d8319b27 2260 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2261 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2262 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2263 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2264 /* cache records success, but this is failure */ \
2265 DEBUG_r( \
2266 PerlIO_printf(Perl_debug_log, \
2267 "%*s (remove failure from success cache)\n", \
2268 REPORT_CODE_OFF+PL_regindent*2, "") \
2269 ); \
d8319b27 2270 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2271 } \
2272 } \
2273 sayNO; \
2274} STMT_END
2275
a3621e74
YO
2276/* this is used to determine how far from the left messages like
2277 'failed...' are printed. Currently 29 makes these messages line
2278 up with the opcode they refer to. Earlier perls used 25 which
2279 left these messages outdented making reviewing a debug output
2280 quite difficult.
2281*/
2282#define REPORT_CODE_OFF 29
2283
2284
2285/* Make sure there is a test for this +1 options in re_tests */
2286#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2287
5d9a96ca
DM
2288/* grab a new slab and return the first slot in it */
2289
2290STATIC regmatch_state *
2291S_push_slab(pTHX)
2292{
2293 regmatch_slab *s = PL_regmatch_slab->next;
2294 if (!s) {
2295 Newx(s, 1, regmatch_slab);
2296 s->prev = PL_regmatch_slab;
2297 s->next = NULL;
2298 PL_regmatch_slab->next = s;
2299 }
2300 PL_regmatch_slab = s;
2301 return &s->states[0];
2302}
5b47454d 2303
95b24440
DM
2304/* simulate a recursive call to regmatch */
2305
2306#define REGMATCH(ns, where) \
5d9a96ca
DM
2307 st->scan = scan; \
2308 scan = (ns); \
2309 st->resume_state = resume_##where; \
95b24440
DM
2310 goto start_recurse; \
2311 resume_point_##where:
2312
aa283a38
DM
2313
2314/* push a new regex state. Set newst to point to it */
2315
2316#define PUSH_STATE(newst, resume) \
2317 depth++; \
2318 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2319 st->scan = scan; \
2320 st->next = next; \
2321 st->n = n; \
2322 st->locinput = locinput; \
2323 st->resume_state = resume; \
2324 newst = st+1; \
2325 if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
2326 newst = S_push_slab(aTHX); \
2327 PL_regmatch_state = newst; \
2328 newst->cc = 0; \
2329 newst->minmod = 0; \
2330 newst->sw = 0; \
2331 newst->logical = 0; \
2332 newst->unwind = 0; \
2333 locinput = PL_reginput; \
2334 nextchr = UCHARAT(locinput);
2335
2336#define POP_STATE \
2337 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2338 depth--; \
2339 st--; \
2340 if (st < &PL_regmatch_slab->states[0]) { \
2341 PL_regmatch_slab = PL_regmatch_slab->prev; \
2342 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
2343 } \
2344 PL_regmatch_state = st; \
2345 scan = st->scan; \
2346 next = st->next; \
2347 n = st->n; \
2348 locinput = st->locinput; \
2349 nextchr = UCHARAT(locinput);
2350
d6a28714
JH
2351/*
2352 - regmatch - main matching routine
2353 *
2354 * Conceptually the strategy is simple: check to see whether the current
2355 * node matches, call self recursively to see whether the rest matches,
2356 * and then act accordingly. In practice we make some effort to avoid
2357 * recursion, in particular by going through "ordinary" nodes (that don't
2358 * need to know whether the rest of the match failed) by a loop instead of
2359 * by recursion.
2360 */
2361/* [lwall] I've hoisted the register declarations to the outer block in order to
2362 * maybe save a little bit of pushing and popping on the stack. It also takes
2363 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2364 *
2365 * This function used to be heavily recursive, but since this had the
2366 * effect of blowing the CPU stack on complex regexes, it has been
2367 * restructured to be iterative, and to save state onto the heap rather
2368 * than the stack. Essentially whereever regmatch() used to be called, it
2369 * pushes the current state, notes where to return, then jumps back into
2370 * the main loop.
2371 *
2372 * Originally the structure of this function used to look something like
2373
2374 S_regmatch() {
2375 int a = 1, b = 2;
2376 ...
2377 while (scan != NULL) {
5d9a96ca 2378 a++; // do stuff with a and b
95b24440
DM
2379 ...
2380 switch (OP(scan)) {
2381 case FOO: {
2382 int local = 3;
2383 ...
2384 if (regmatch(...)) // recurse
2385 goto yes;
2386 }
2387 ...
2388 }
2389 }
2390 yes:
2391 return 1;
2392 }
2393
2394 * Now it looks something like this:
2395
5d9a96ca 2396 typedef struct {
95b24440
DM
2397 int a, b, local;
2398 int resume_state;
5d9a96ca 2399 } regmatch_state;
95b24440
DM
2400
2401 S_regmatch() {
5d9a96ca
DM
2402 regmatch_state *st = new();
2403 int depth=0;
2404 st->a++; // do stuff with a and b
95b24440
DM
2405 ...
2406 while (scan != NULL) {
2407 ...
2408 switch (OP(scan)) {
2409 case FOO: {
5d9a96ca 2410 st->local = 3;
95b24440 2411 ...
5d9a96ca
DM
2412 st->scan = scan;
2413 scan = ...;
2414 st->resume_state = resume_FOO;
2415 goto start_recurse; // recurse
95b24440 2416
5d9a96ca
DM
2417 resume_point_FOO:
2418 if (result)
95b24440
DM
2419 goto yes;
2420 }
2421 ...
2422 }
5d9a96ca
DM
2423 start_recurse:
2424 st = new(); push a new state
2425 st->a = 1; st->b = 2;
2426 depth++;
95b24440 2427 }
5d9a96ca 2428 yes:
95b24440 2429 result = 1;
5d9a96ca
DM
2430 if (depth--) {
2431 st = pop();
95b24440
DM
2432 switch (resume_state) {
2433 case resume_FOO:
2434 goto resume_point_FOO;
2435 ...
2436 }
2437 }
2438 return result
2439 }
2440
2441 * WARNING: this means that any line in this function that contains a
2442 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2443 * regmatch() using gotos instead. Thus the values of any local variables
2444 * not saved in the regmatch_state structure will have been lost when
2445 * execution resumes on the next line .
5d9a96ca
DM
2446 *
2447 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2448 * PL_regmatch_state always points to the currently active state, and
2449 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2450 * The first time regmatch is called, the first slab is allocated, and is
2451 * never freed until interpreter desctruction. When the slab is full,
2452 * a new one is allocated chained to the end. At exit from regmatch, slabs
2453 * allocated since entry are freed.
d6a28714 2454 */
95b24440
DM
2455
2456
d6a28714 2457STATIC I32 /* 0 failure, 1 success */
4f639d21 2458S_regmatch(pTHX_ regexp *rex, regnode *prog)
d6a28714 2459{
27da23d5 2460 dVAR;
95b24440 2461 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2462 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2463
5d9a96ca
DM
2464 regmatch_slab *orig_slab;
2465 regmatch_state *orig_state;
a3621e74 2466
5d9a96ca
DM
2467 /* the current state. This is a cached copy of PL_regmatch_state */
2468 register regmatch_state *st;
95b24440 2469
5d9a96ca
DM
2470 /* cache heavy used fields of st in registers */
2471 register regnode *scan;
2472 register regnode *next;
2473 register I32 n = 0; /* initialize to shut up compiler warning */
2474 register char *locinput = PL_reginput;
95b24440 2475
5d9a96ca
DM
2476 /* these variables are NOT saved during a recusive RFEGMATCH: */
2477 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2478 bool result; /* return value of S_regmatch */
2479 regnode *inner; /* Next node in internal branch. */
2480 int depth = 0; /* depth of recursion */
aa283a38
DM
2481 regmatch_state *newst; /* when pushing a state, this is the new one */
2482 regmatch_state *cur_eval = NULL; /* most recent (??{}) state */
95b24440
DM
2483
2484#ifdef DEBUGGING
ab74612d 2485 SV *re_debug_flags = NULL;
a3621e74 2486 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2487 PL_regindent++;
2488#endif
2489
5d9a96ca
DM
2490 /* on first ever call to regmatch, allocate first slab */
2491 if (!PL_regmatch_slab) {
2492 Newx(PL_regmatch_slab, 1, regmatch_slab);
2493 PL_regmatch_slab->prev = NULL;
2494 PL_regmatch_slab->next = NULL;
2495 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2496 }
2497
2498 /* remember current high-water mark for exit */
2499 /* XXX this should be done with SAVE* instead */
2500 orig_slab = PL_regmatch_slab;
2501 orig_state = PL_regmatch_state;
2502
2503 /* grab next free state slot */
2504 st = ++PL_regmatch_state;
2505 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2506 st = PL_regmatch_state = S_push_slab(aTHX);
2507
2508 st->minmod = 0;
2509 st->sw = 0;
2510 st->logical = 0;
2511 st->unwind = 0;
2512 st->cc = NULL;
d6a28714
JH
2513 /* Note that nextchr is a byte even in UTF */
2514 nextchr = UCHARAT(locinput);
2515 scan = prog;
2516 while (scan != NULL) {
8ba1375e 2517
a3621e74 2518 DEBUG_EXECUTE_r( {
6136c704 2519 SV * const prop = sv_newmortal();
1df70142
AL
2520 const int docolor = *PL_colors[0];
2521 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2522 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2523 /* The part of the string before starttry has one color
2524 (pref0_len chars), between starttry and current
2525 position another one (pref_len - pref0_len chars),
2526 after the current position the third one.
2527 We assume that pref0_len <= pref_len, otherwise we
2528 decrease pref0_len. */
9041c2e3 2529 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2530 ? (5 + taill) - l : locinput - PL_bostr;
2531 int pref0_len;
d6a28714 2532
df1ffd02 2533 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2534 pref_len++;
2535 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2536 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2537 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2538 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2539 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2540 l--;
d6a28714
JH
2541 if (pref0_len < 0)
2542 pref0_len = 0;
2543 if (pref0_len > pref_len)
2544 pref0_len = pref_len;
32fc9b6a 2545 regprop(rex, prop, scan);
2a782b5b 2546 {
1df70142 2547 const char * const s0 =
f14c76ed 2548 do_utf8 && OP(scan) != CANY ?
95b24440 2549 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2550 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2551 locinput - pref_len;
1df70142
AL
2552 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2553 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2554 pv_uni_display(PERL_DEBUG_PAD(1),
2555 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2556 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2557 locinput - pref_len + pref0_len;
1df70142
AL
2558 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2559 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2560 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2561 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2562 locinput;
1df70142 2563 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2564 PerlIO_printf(Perl_debug_log,
2565 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2566 (IV)(locinput - PL_bostr),
2567 PL_colors[4],
2568 len0, s0,
2569 PL_colors[5],
2570 PL_colors[2],
2571 len1, s1,
2572 PL_colors[3],
2573 (docolor ? "" : "> <"),
2574 PL_colors[0],
2575 len2, s2,
2576 PL_colors[1],
2577 15 - l - pref_len + 1,
2578 "",
4f639d21 2579 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2580 SvPVX_const(prop));
2a782b5b
JH
2581 }
2582 });
d6a28714
JH
2583
2584 next = scan + NEXT_OFF(scan);
2585 if (next == scan)
2586 next = NULL;
2587
2588 switch (OP(scan)) {
2589 case BOL:
7fba1cd6 2590 if (locinput == PL_bostr)
d6a28714
JH
2591 {
2592 /* regtill = regbol; */
b8c5462f
JH
2593 break;
2594 }
d6a28714
JH
2595 sayNO;
2596 case MBOL:
12d33761
HS
2597 if (locinput == PL_bostr ||
2598 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2599 {
b8c5462f
JH
2600 break;
2601 }
d6a28714
JH
2602 sayNO;
2603 case SBOL:
c2a73568 2604 if (locinput == PL_bostr)
b8c5462f 2605 break;
d6a28714
JH
2606 sayNO;
2607 case GPOS:
2608 if (locinput == PL_reg_ganch)
2609 break;
2610 sayNO;
2611 case EOL:
d6a28714
JH
2612 goto seol;
2613 case MEOL:
d6a28714 2614 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2615 sayNO;
b8c5462f 2616 break;
d6a28714
JH
2617 case SEOL:
2618 seol:
2619 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2620 sayNO;
d6a28714 2621 if (PL_regeol - locinput > 1)
b8c5462f 2622 sayNO;
b8c5462f 2623 break;
d6a28714
JH
2624 case EOS:
2625 if (PL_regeol != locinput)
b8c5462f 2626 sayNO;
d6a28714 2627 break;
ffc61ed2 2628 case SANY:
d6a28714 2629 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2630 sayNO;
f33976b4
DB
2631 if (do_utf8) {
2632 locinput += PL_utf8skip[nextchr];
2633 if (locinput > PL_regeol)
2634 sayNO;
2635 nextchr = UCHARAT(locinput);
2636 }
2637 else
2638 nextchr = UCHARAT(++locinput);
2639 break;
2640 case CANY:
2641 if (!nextchr && locinput >= PL_regeol)
2642 sayNO;
b8c5462f 2643 nextchr = UCHARAT(++locinput);
a0d0e21e 2644 break;
ffc61ed2 2645 case REG_ANY:
1aa99e6b
IH
2646 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2647 sayNO;
2648 if (do_utf8) {
b8c5462f 2649 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2650 if (locinput > PL_regeol)
2651 sayNO;
a0ed51b3 2652 nextchr = UCHARAT(locinput);
a0ed51b3 2653 }
1aa99e6b
IH
2654 else
2655 nextchr = UCHARAT(++locinput);
a0ed51b3 2656 break;
a3621e74
YO
2657
2658
2659
2660 /*
2661 traverse the TRIE keeping track of all accepting states
2662 we transition through until we get to a failing node.
2663
a3621e74
YO
2664
2665 */
5b47454d 2666 case TRIE:
a3621e74
YO
2667 case TRIEF:
2668 case TRIEFL:
2669 {
a3621e74
YO
2670 U8 *uc = ( U8* )locinput;
2671 U32 state = 1;
2672 U16 charid = 0;
2673 U32 base = 0;
2674 UV uvc = 0;
2675 STRLEN len = 0;
2676 STRLEN foldlen = 0;
a3621e74
YO
2677 U8 *uscan = (U8*)NULL;
2678 STRLEN bufflen=0;
95b24440 2679 SV *sv_accept_buff = NULL;
5b47454d
DM
2680 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2681 trie_type = do_utf8 ?
2682 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2683 : trie_plain;
2684
7087a21c
NC
2685 /* what trie are we using right now */
2686 reg_trie_data *trie
32fc9b6a 2687 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
d8319b27 2688 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2689 result = 0;
a3621e74
YO
2690
2691 while ( state && uc <= (U8*)PL_regeol ) {
2692
5b47454d 2693 if (trie->states[ state ].wordnum) {
d8319b27 2694 if (!st->u.trie.accepted ) {
5b47454d
DM
2695 ENTER;
2696 SAVETMPS;
2697 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2698 sv_accept_buff=newSV(bufflen *
2699 sizeof(reg_trie_accepted) - 1);
2700 SvCUR_set(sv_accept_buff,
2701 sizeof(reg_trie_accepted));
2702 SvPOK_on(sv_accept_buff);
2703 sv_2mortal(sv_accept_buff);
d8319b27 2704 st->u.trie.accept_buff =
5b47454d
DM
2705 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2706 }
2707 else {
d8319b27 2708 if (st->u.trie.accepted >= bufflen) {
5b47454d 2709 bufflen *= 2;
d8319b27 2710 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2711 SvGROW(sv_accept_buff,
2712 bufflen * sizeof(reg_trie_accepted));
2713 }
2714 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2715 + sizeof(reg_trie_accepted));
2716 }
d8319b27
DM
2717 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2718 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2719 ++st->u.trie.accepted;
5b47454d 2720 }
a3621e74
YO
2721
2722 base = trie->states[ state ].trans.base;
2723
2724 DEBUG_TRIE_EXECUTE_r(
2725 PerlIO_printf( Perl_debug_log,
e4584336 2726 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2727 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2728 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2729 );
2730
2731 if ( base ) {
5b47454d
DM
2732 switch (trie_type) {
2733 case trie_uft8_fold:
a3621e74
YO
2734 if ( foldlen>0 ) {
2735 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2736 foldlen -= len;
2737 uscan += len;
2738 len=0;
2739 } else {
1df70142 2740 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2741 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2742 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2743 foldlen -= UNISKIP( uvc );
2744 uscan = foldbuf + UNISKIP( uvc );
2745 }
5b47454d
DM
2746 break;
2747 case trie_utf8:
2748 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2749 &len, uniflags );
2750 break;
2751 case trie_plain:
e4584336 2752 uvc = (UV)*uc;
a3621e74
YO
2753 len = 1;
2754 }
2755
5b47454d
DM
2756 if (uvc < 256) {
2757 charid = trie->charmap[ uvc ];
2758 }
2759 else {
2760 charid = 0;
2761 if (trie->widecharmap) {
2762 SV** svpp = (SV**)NULL;
2763 svpp = hv_fetch(trie->widecharmap,
2764 (char*)&uvc, sizeof(UV), 0);
2765 if (svpp)
2766 charid = (U16)SvIV(*svpp);
2767 }
2768 }
a3621e74 2769
5b47454d
DM
2770 if (charid &&
2771 (base + charid > trie->uniquecharcount )
2772 && (base + charid - 1 - trie->uniquecharcount
2773 < trie->lasttrans)
2774 && trie->trans[base + charid - 1 -
2775 trie->uniquecharcount].check == state)
2776 {
2777 state = trie->trans[base + charid - 1 -
2778 trie->uniquecharcount ].next;
2779 }
2780 else {
2781 state = 0;
2782 }
2783 uc += len;
2784
2785 }
2786 else {
a3621e74
YO
2787 state = 0;
2788 }
2789 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2790 PerlIO_printf( Perl_debug_log,
2791 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2792 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2793 );
2794 }
d8319b27 2795 if (!st->u.trie.accepted )
a3621e74 2796 sayNO;
a3621e74
YO
2797
2798 /*
2799 There was at least one accepting state that we
2800 transitioned through. Presumably the number of accepting
2801 states is going to be low, typically one or two. So we
2802 simply scan through to find the one with lowest wordnum.
2803 Once we find it, we swap the last state into its place
2804 and decrement the size. We then try to match the rest of
2805 the pattern at the point where the word ends, if we
2806 succeed then we end the loop, otherwise the loop
2807 eventually terminates once all of the accepting states
2808 have been tried.
2809 */
a3621e74 2810
d8319b27 2811 if ( st->u.trie.accepted == 1 ) {
a3621e74 2812 DEBUG_EXECUTE_r({
097eb12c 2813 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2814 PerlIO_printf( Perl_debug_log,
2815 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2816 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2817 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2818 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2819 PL_colors[5] );
2820 });
d8319b27 2821 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2822 /* in this case we free tmps/leave before we call regmatch
2823 as we wont be using accept_buff again. */
2824 FREETMPS;
2825 LEAVE;
95b24440
DM
2826 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2827 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2828 } else {
2829 DEBUG_EXECUTE_r(
e4584336 2830 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2831 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2832 PL_colors[5] );
2833 );
d8319b27 2834 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2835 U32 best = 0;
2836 U32 cur;
d8319b27 2837 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2838 DEBUG_TRIE_EXECUTE_r(
2839 PerlIO_printf( Perl_debug_log,
2840 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2841 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2842 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2843 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2844 );
a3621e74 2845
d8319b27
DM
2846 if (st->u.trie.accept_buff[cur].wordnum <
2847 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2848 best = cur;
a3621e74
YO
2849 }
2850 DEBUG_EXECUTE_r({
87830502 2851 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2852 rex->data->data[ARG(scan)];
d8319b27 2853 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2854 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2855 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2856 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2857 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2858 PL_colors[5] );
2859 });
d8319b27
DM
2860 if ( best<st->u.trie.accepted ) {
2861 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2862 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2863 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2864 best = st->u.trie.accepted;
a3621e74 2865 }
d8319b27 2866 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2867
2868 /*
2869 as far as I can tell we only need the SAVETMPS/FREETMPS
2870 for re's with EVAL in them but I'm leaving them in for
2871 all until I can be sure.
2872 */
2873 SAVETMPS;
95b24440
DM
2874 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2875 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2876 FREETMPS;
2877 }
2878 FREETMPS;
2879 LEAVE;
2880 }
2881
95b24440 2882 if (result) {
a3621e74
YO
2883 sayYES;
2884 } else {
2885 sayNO;
2886 }
2887 }
2888 /* unreached codepoint */
95b24440
DM
2889 case EXACT: {
2890 char *s = STRING(scan);
5d9a96ca 2891 st->ln = STR_LEN(scan);
eb160463 2892 if (do_utf8 != UTF) {
bc517b45 2893 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2894 char *l = locinput;
5d9a96ca 2895 const char *e = s + st->ln;
a72c7584 2896
5ff6fc6d
JH
2897 if (do_utf8) {
2898 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2899 while (s < e) {
a3b680e6 2900 STRLEN ulen;
1aa99e6b 2901 if (l >= PL_regeol)
5ff6fc6d
JH
2902 sayNO;
2903 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2904 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2905 uniflags))
5ff6fc6d 2906 sayNO;
bc517b45 2907 l += ulen;
5ff6fc6d 2908 s ++;
1aa99e6b 2909 }
5ff6fc6d
JH
2910 }
2911 else {
2912 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2913 while (s < e) {
a3b680e6 2914 STRLEN ulen;
1aa99e6b
IH
2915 if (l >= PL_regeol)
2916 sayNO;
5ff6fc6d 2917 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2918 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2919 uniflags))
1aa99e6b 2920 sayNO;
bc517b45 2921 s += ulen;
a72c7584 2922 l ++;
1aa99e6b 2923 }
5ff6fc6d 2924 }
1aa99e6b
IH
2925 locinput = l;
2926 nextchr = UCHARAT(locinput);
2927 break;
2928 }
bc517b45 2929 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2930 /* Inline the first character, for speed. */
2931 if (UCHARAT(s) != nextchr)
2932 sayNO;
5d9a96ca 2933 if (PL_regeol - locinput < st->ln)
d6a28714 2934 sayNO;
5d9a96ca 2935 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2936 sayNO;
5d9a96ca 2937 locinput += st->ln;
d6a28714
JH
2938 nextchr = UCHARAT(locinput);
2939 break;
95b24440 2940 }
d6a28714 2941 case EXACTFL:
b8c5462f
JH
2942 PL_reg_flags |= RF_tainted;
2943 /* FALL THROUGH */
95b24440
DM
2944 case EXACTF: {
2945 char *s = STRING(scan);
5d9a96ca 2946 st->ln = STR_LEN(scan);
d6a28714 2947
d07ddd77
JH
2948 if (do_utf8 || UTF) {
2949 /* Either target or the pattern are utf8. */
d6a28714 2950 char *l = locinput;
d07ddd77 2951 char *e = PL_regeol;
bc517b45 2952
5d9a96ca 2953 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2954 l, &e, 0, do_utf8)) {
5486206c
JH
2955 /* One more case for the sharp s:
2956 * pack("U0U*", 0xDF) =~ /ss/i,
2957 * the 0xC3 0x9F are the UTF-8
2958 * byte sequence for the U+00DF. */
2959 if (!(do_utf8 &&
2960 toLOWER(s[0]) == 's' &&
5d9a96ca 2961 st->ln >= 2 &&
5486206c
JH
2962 toLOWER(s[1]) == 's' &&
2963 (U8)l[0] == 0xC3 &&
2964 e - l >= 2 &&
2965 (U8)l[1] == 0x9F))
2966 sayNO;
2967 }
d07ddd77
JH
2968 locinput = e;
2969 nextchr = UCHARAT(locinput);
2970 break;
a0ed51b3 2971 }
d6a28714 2972
bc517b45
JH
2973 /* Neither the target and the pattern are utf8. */
2974
d6a28714
JH
2975 /* Inline the first character, for speed. */
2976 if (UCHARAT(s) != nextchr &&
2977 UCHARAT(s) != ((OP(scan) == EXACTF)
2978 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2979 sayNO;
5d9a96ca 2980 if (PL_regeol - locinput < st->ln)
b8c5462f 2981 sayNO;
5d9a96ca
DM
2982 if (st->ln > 1 && (OP(scan) == EXACTF
2983 ? ibcmp(s, locinput, st->ln)
2984 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2985 sayNO;
5d9a96ca 2986 locinput += st->ln;
d6a28714 2987 nextchr = UCHARAT(locinput);
a0d0e21e 2988 break;
95b24440 2989 }
d6a28714 2990 case ANYOF:
ffc61ed2 2991 if (do_utf8) {
9e55ce06
JH
2992 STRLEN inclasslen = PL_regeol - locinput;
2993
32fc9b6a 2994 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2995 sayNO_ANYOF;
ffc61ed2
JH
2996 if (locinput >= PL_regeol)
2997 sayNO;
0f0076b4 2998 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2999 nextchr = UCHARAT(locinput);
e0f9d4a8 3000 break;
ffc61ed2
JH
3001 }
3002 else {
3003 if (nextchr < 0)
3004 nextchr = UCHARAT(locinput);
32fc9b6a 3005 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 3006 sayNO_ANYOF;
ffc61ed2
JH
3007 if (!nextchr && locinput >= PL_regeol)
3008 sayNO;
3009 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3010 break;
3011 }
3012 no_anyof:
3013 /* If we might have the case of the German sharp s
3014 * in a casefolding Unicode character class. */
3015
ebc501f0
JH
3016 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3017 locinput += SHARP_S_SKIP;
e0f9d4a8 3018 nextchr = UCHARAT(locinput);
ffc61ed2 3019 }
e0f9d4a8
JH
3020 else
3021 sayNO;
b8c5462f 3022 break;
d6a28714 3023 case ALNUML:
b8c5462f
JH
3024 PL_reg_flags |= RF_tainted;
3025 /* FALL THROUGH */
d6a28714 3026 case ALNUM:
b8c5462f 3027 if (!nextchr)
4633a7c4 3028 sayNO;
ffc61ed2 3029 if (do_utf8) {
1a4fad37 3030 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3031 if (!(OP(scan) == ALNUM
3568d838 3032 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3033 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3034 {
3035 sayNO;
a0ed51b3 3036 }
b8c5462f 3037 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3038 nextchr = UCHARAT(locinput);
3039 break;
3040 }
ffc61ed2 3041 if (!(OP(scan) == ALNUM
d6a28714 3042 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3043 sayNO;
b8c5462f 3044 nextchr = UCHARAT(++locinput);
a0d0e21e 3045 break;
d6a28714 3046 case NALNUML:
b8c5462f
JH
3047 PL_reg_flags |= RF_tainted;
3048 /* FALL THROUGH */
d6a28714
JH
3049 case NALNUM:
3050 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3051 sayNO;
ffc61ed2 3052 if (do_utf8) {
1a4fad37 3053 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3054 if (OP(scan) == NALNUM
3568d838 3055 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3056 : isALNUM_LC_utf8((U8*)locinput))
3057 {
b8c5462f 3058 sayNO;
d6a28714 3059 }
b8c5462f
JH
3060 locinput += PL_utf8skip[nextchr];
3061 nextchr = UCHARAT(locinput);
3062 break;
3063 }
ffc61ed2 3064 if (OP(scan) == NALNUM
d6a28714 3065 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3066 sayNO;
76e3520e 3067 nextchr = UCHARAT(++locinput);
a0d0e21e 3068 break;
d6a28714
JH
3069 case BOUNDL:
3070 case NBOUNDL:
3280af22 3071 PL_reg_flags |= RF_tainted;
bbce6d69 3072 /* FALL THROUGH */
d6a28714
JH
3073 case BOUND:
3074 case NBOUND:
3075 /* was last char in word? */
ffc61ed2 3076 if (do_utf8) {
12d33761 3077 if (locinput == PL_bostr)
5d9a96ca 3078 st->ln = '\n';
ffc61ed2 3079 else {
a3b680e6 3080 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3081
4ad0818d 3082 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3083 }
3084 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3085 st->ln = isALNUM_uni(st->ln);
1a4fad37 3086 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3087 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3088 }
3089 else {
5d9a96ca 3090 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3091 n = isALNUM_LC_utf8((U8*)locinput);
3092 }
a0ed51b3 3093 }
d6a28714 3094 else {
5d9a96ca 3095 st->ln = (locinput != PL_bostr) ?
12d33761 3096 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3097 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3098 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3099 n = isALNUM(nextchr);
3100 }
3101 else {
5d9a96ca 3102 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3103 n = isALNUM_LC(nextchr);
3104 }
d6a28714 3105 }
5d9a96ca 3106 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3107 OP(scan) == BOUNDL))
3108 sayNO;
a0ed51b3 3109 break;
d6a28714 3110 case SPACEL:
3280af22 3111 PL_reg_flags |= RF_tainted;
bbce6d69 3112 /* FALL THROUGH */
d6a28714 3113 case SPACE:
9442cb0e 3114 if (!nextchr)
4633a7c4 3115 sayNO;
1aa99e6b 3116 if (do_utf8) {
fd400ab9 3117 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3118 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3119 if (!(OP(scan) == SPACE
3568d838 3120 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3121 : isSPACE_LC_utf8((U8*)locinput)))
3122 {
3123 sayNO;
3124 }
3125 locinput += PL_utf8skip[nextchr];
3126 nextchr = UCHARAT(locinput);
3127 break;
d6a28714 3128 }
ffc61ed2
JH
3129 if (!(OP(scan) == SPACE
3130 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3131 sayNO;
3132 nextchr = UCHARAT(++locinput);
3133 }
3134 else {
3135 if (!(OP(scan) == SPACE
3136 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3137 sayNO;
3138 nextchr = UCHARAT(++locinput);
a0ed51b3 3139 }
a0ed51b3 3140 break;
d6a28714 3141 case NSPACEL:
3280af22 3142 PL_reg_flags |= RF_tainted;
bbce6d69 3143 /* FALL THROUGH */
d6a28714 3144 case NSPACE:
9442cb0e 3145 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3146 sayNO;
1aa99e6b 3147 if (do_utf8) {
1a4fad37 3148 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3149 if (OP(scan) == NSPACE
3568d838 3150 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3151 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3152 {
3153 sayNO;
3154 }
3155 locinput += PL_utf8skip[nextchr];
3156 nextchr = UCHARAT(locinput);
3157 break;
a0ed51b3 3158 }
ffc61ed2 3159 if (OP(scan) == NSPACE
d6a28714 3160 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3161 sayNO;
76e3520e 3162 nextchr = UCHARAT(++locinput);
a0d0e21e 3163 break;
d6a28714 3164 case DIGITL:
a0ed51b3
LW
3165 PL_reg_flags |= RF_tainted;
3166 /* FALL THROUGH */
d6a28714 3167 case DIGIT:
9442cb0e 3168 if (!nextchr)
a0ed51b3 3169 sayNO;
1aa99e6b 3170 if (do_utf8) {
1a4fad37 3171 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3172 if (!(OP(scan) == DIGIT
3568d838 3173 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3174 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3175 {
a0ed51b3 3176 sayNO;
dfe13c55 3177 }
6f06b55f 3178 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3179 nextchr = UCHARAT(locinput);
3180 break;
3181 }
ffc61ed2 3182 if (!(OP(scan) == DIGIT
9442cb0e 3183 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3184 sayNO;
3185 nextchr = UCHARAT(++locinput);
3186 break;
d6a28714 3187 case NDIGITL:
b8c5462f
JH
3188 PL_reg_flags |= RF_tainted;
3189 /* FALL THROUGH */
d6a28714 3190 case NDIGIT:
9442cb0e 3191 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3192 sayNO;
1aa99e6b 3193 if (do_utf8) {
1a4fad37 3194 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3195 if (OP(scan) == NDIGIT
3568d838 3196 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3197 : isDIGIT_LC_utf8((U8*)locinput))
3198 {
a0ed51b3 3199 sayNO;
9442cb0e 3200 }
6f06b55f 3201 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3202 nextchr = UCHARAT(locinput);
3203 break;
3204 }
ffc61ed2 3205 if (OP(scan) == NDIGIT
9442cb0e 3206 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3207 sayNO;
3208 nextchr = UCHARAT(++locinput);
3209 break;
3210 case CLUMP:
b7c83a7e 3211 if (locinput >= PL_regeol)
a0ed51b3 3212 sayNO;
b7c83a7e 3213 if (do_utf8) {
1a4fad37 3214 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3215 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3216 sayNO;
3217 locinput += PL_utf8skip[nextchr];
3218 while (locinput < PL_regeol &&
3219 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3220 locinput += UTF8SKIP(locinput);
3221 if (locinput > PL_regeol)
3222 sayNO;
eb08e2da
JH
3223 }
3224 else
3225 locinput++;
a0ed51b3
LW
3226 nextchr = UCHARAT(locinput);
3227 break;
c8756f30 3228 case REFFL:
3280af22 3229 PL_reg_flags |= RF_tainted;
c8756f30 3230 /* FALL THROUGH */
c277df42 3231 case REF:
95b24440
DM
3232 case REFF: {
3233 char *s;
c277df42 3234 n = ARG(scan); /* which paren pair */
5d9a96ca 3235 st->ln = PL_regstartp[n];
2c2d71f5 3236 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3237 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3238 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3239 if (st->ln == PL_regendp[n])
a0d0e21e 3240 break;
a0ed51b3 3241
5d9a96ca 3242 s = PL_bostr + st->ln;
1aa99e6b 3243 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3244 char *l = locinput;
a3b680e6 3245 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3246 /*
3247 * Note that we can't do the "other character" lookup trick as
3248 * in the 8-bit case (no pun intended) because in Unicode we
3249 * have to map both upper and title case to lower case.
3250 */
3251 if (OP(scan) == REFF) {
3252 while (s < e) {
a3b680e6
AL
3253 STRLEN ulen1, ulen2;
3254 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3255 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3256
a0ed51b3
LW
3257 if (l >= PL_regeol)
3258 sayNO;
a2a2844f
JH
3259 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3260 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3261 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3262 sayNO;
a2a2844f
JH
3263 s += ulen1;
3264 l += ulen2;
a0ed51b3
LW
3265 }
3266 }
3267 locinput = l;
3268 nextchr = UCHARAT(locinput);
3269 break;
3270 }
3271
a0d0e21e 3272 /* Inline the first character, for speed. */
76e3520e 3273 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3274 (OP(scan) == REF ||
3275 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3276 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3277 sayNO;
5d9a96ca
DM
3278 st->ln = PL_regendp[n] - st->ln;
3279 if (locinput + st->ln > PL_regeol)
4633a7c4 3280 sayNO;
5d9a96ca
DM
3281 if (st->ln > 1 && (OP(scan) == REF
3282 ? memNE(s, locinput, st->ln)
c8756f30 3283 : (OP(scan) == REFF
5d9a96ca
DM
3284 ? ibcmp(s, locinput, st->ln)
3285 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3286 sayNO;
5d9a96ca 3287 locinput += st->ln;
76e3520e 3288 nextchr = UCHARAT(locinput);
a0d0e21e 3289 break;
95b24440 3290 }
a0d0e21e
LW
3291
3292 case NOTHING:
c277df42 3293 case TAIL:
a0d0e21e
LW
3294 break;
3295 case BACK:
3296 break;
c277df42
IZ
3297 case EVAL:
3298 {
c277df42 3299 SV *ret;
8e5e9ebe 3300 {
4aabdb9b
DM
3301 /* execute the code in the {...} */
3302 dSP;
6136c704 3303 SV ** const before = SP;
4aabdb9b
DM
3304 OP_4tree * const oop = PL_op;
3305 COP * const ocurcop = PL_curcop;
3306 PAD *old_comppad;
4aabdb9b
DM
3307
3308 n = ARG(scan);
32fc9b6a 3309 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3310 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3311 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3312 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3313
8e5e9ebe
RGS
3314 CALLRUNOPS(aTHX); /* Scalar context. */
3315 SPAGAIN;
3316 if (SP == before)
075aa684 3317 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3318 else {
3319 ret = POPs;
3320 PUTBACK;
3321 }
4aabdb9b
DM
3322
3323 PL_op = oop;
3324 PAD_RESTORE_LOCAL(old_comppad);
3325 PL_curcop = ocurcop;
3326 if (!st->logical) {
3327 /* /(?{...})/ */
3328 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3329 break;
3330 }
8e5e9ebe 3331 }
4aabdb9b
DM
3332 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3333 regexp *re;
4aabdb9b 3334 {
4f639d21
DM
3335 /* extract RE object from returned value; compiling if
3336 * necessary */
3337
6136c704 3338 MAGIC *mg = NULL;
4aabdb9b 3339 SV *sv;
faf82a0b
AE
3340 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3341 mg = mg_find(sv, PERL_MAGIC_qr);
3342 else if (SvSMAGICAL(ret)) {
3343 if (SvGMAGICAL(ret))
3344 sv_unmagic(ret, PERL_MAGIC_qr);
3345 else
3346 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3347 }
faf82a0b 3348
0f5d15d6
IZ
3349 if (mg) {
3350 re = (regexp *)mg->mg_obj;
df0003d4 3351 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3352 }
3353 else {
3354 STRLEN len;
6136c704 3355 const char * const t = SvPV_const(ret, len);
0f5d15d6 3356 PMOP pm;
a3b680e6 3357 const I32 osize = PL_regsize;
0f5d15d6 3358
5fcd1c1b 3359 Zero(&pm, 1, PMOP);
4aabdb9b 3360 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3361 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3362 if (!(SvFLAGS(ret)
faf82a0b
AE
3363 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3364 | SVs_GMG)))
14befaf4
DM
3365 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3366 PERL_MAGIC_qr,0,0);
0f5d15d6 3367 PL_regsize = osize;
0f5d15d6 3368 }
4aabdb9b 3369 }
aa283a38
DM
3370
3371 /* run the pattern returned from (??{...}) */
3372
4aabdb9b
DM
3373 DEBUG_EXECUTE_r(
3374 PerlIO_printf(Perl_debug_log,
3375 "Entering embedded \"%s%.60s%s%s\"\n",
3376 PL_colors[0],
3377 re->precomp,
3378 PL_colors[1],
3379 (strlen(re->precomp) > 60 ? "..." : ""))
3380 );
2c2d71f5 3381
4aabdb9b
DM
3382 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3383 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3384 *PL_reglastparen = 0;
3385 *PL_reglastcloseparen = 0;
4aabdb9b 3386 PL_reginput = locinput;
4aabdb9b
DM
3387
3388 /* XXXX This is too dramatic a measure... */
3389 PL_reg_maxiter = 0;
3390
5d9a96ca 3391 st->logical = 0;
aa283a38
DM
3392 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3393 ((re->reganch & ROPT_UTF8) != 0);
3394 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3395 st->u.eval.prev_rex = rex;
aa283a38 3396 rex = re;
aa283a38
DM
3397
3398 st->u.eval.prev_eval = cur_eval;
3399 st->u.eval.prev_slab = PL_regmatch_slab;
3400 st->u.eval.depth = depth;
3401 cur_eval = st;
3402 PUSH_STATE(newst, resume_EVAL);
3403 st = newst;
3404
3405 /* now continue from first node in postoned RE */
3406 next = re->program + 1;
3407 break;
4aabdb9b 3408 /* NOTREACHED */
a0ed51b3 3409 }
4aabdb9b
DM
3410 /* /(?(?{...})X|Y)/ */
3411 st->sw = SvTRUE(ret);
3412 st->logical = 0;
c277df42
IZ
3413 break;
3414 }
a0d0e21e 3415 case OPEN:
c277df42 3416 n = ARG(scan); /* which paren pair */
3280af22
NIS
3417 PL_reg_start_tmp[n] = locinput;
3418 if (n > PL_regsize)
3419 PL_regsize = n;
a0d0e21e
LW
3420 break;
3421 case CLOSE:
c277df42 3422 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3423 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3424 PL_regendp[n] = locinput - PL_bostr;
eb160463 3425 if (n > (I32)*PL_reglastparen)
3280af22 3426 *PL_reglastparen = n;
a01268b5 3427 *PL_reglastcloseparen = n;
a0d0e21e 3428 break;
c277df42
IZ
3429 case GROUPP:
3430 n = ARG(scan); /* which paren pair */
5d9a96ca 3431 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3432 break;
3433 case IFTHEN:
2c2d71f5 3434 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3435 if (st->sw)
c277df42
IZ
3436 next = NEXTOPER(NEXTOPER(scan));
3437 else {
3438 next = scan + ARG(scan);
3439 if (OP(next) == IFTHEN) /* Fake one. */
3440 next = NEXTOPER(NEXTOPER(next));
3441 }
3442 break;
3443 case LOGICAL:
5d9a96ca 3444 st->logical = scan->flags;
c277df42 3445 break;
2ab05381 3446/*******************************************************************
a0374537
DM
3447 cc points to the regmatch_state associated with the most recent CURLYX.
3448 This struct contains info about the innermost (...)* loop (an
3449 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3450
3451 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3452
95b24440 3453 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3454
a0374537 3455 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3456 with the starting point at WHILEM node;
2ab05381
IZ
3457
3458 3) Each hit of WHILEM node tries to match A and Z (in the order
3459 depending on the current iteration, min/max of {min,max} and
3460 greediness). The information about where are nodes for "A"
a0374537 3461 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3462 was already matched, and greediness.
3463
3464 4) After A matches, the same WHILEM node is hit again.
3465
95b24440 3466 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3467 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3468 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3469 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3470 of the external loop.
3471
a0374537 3472 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3473 and whatever it mentions via ->next, and additional attached trees
3474 corresponding to temporarily unset infoblocks as in "5" above.
3475
95b24440 3476 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3477 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3478 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3479 infoblocks are drawn below the "reset" infoblock.
3480
3481 In fact in the picture below we do not show failed matches for Z and T
3482 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3483 more obvious *why* one needs to *temporary* unset infoblocks.]
3484
3485 Matched REx position InfoBlocks Comment
3486 (Y(A)*?Z)*?T x
3487 Y(A)*?Z)*?T x <- O
3488 Y (A)*?Z)*?T x <- O
3489 Y A)*?Z)*?T x <- O <- I
3490 YA )*?Z)*?T x <- O <- I
3491 YA A)*?Z)*?T x <- O <- I
3492 YAA )*?Z)*?T x <- O <- I
3493 YAA Z)*?T x <- O # Temporary unset I
3494 I
3495
3496 YAAZ Y(A)*?Z)*?T x <- O
3497 I
3498
3499 YAAZY (A)*?Z)*?T x <- O
3500 I
3501
3502 YAAZY A)*?Z)*?T x <- O <- I
3503 I
3504
3505 YAAZYA )*?Z)*?T x <- O <- I
3506 I
3507
3508 YAAZYA Z)*?T x <- O # Temporary unset I
3509 I,I
3510
3511 YAAZYAZ )*?T x <- O
3512 I,I
3513
3514 YAAZYAZ T x # Temporary unset O
3515 O
3516 I,I
3517
3518 YAAZYAZT x
3519 O
3520 I,I
3521 *******************************************************************/
95b24440 3522
a0d0e21e 3523 case CURLYX: {
cb434fcc
IZ
3524 /* No need to save/restore up to this paren */
3525 I32 parenfloor = scan->flags;
c277df42 3526
4ae2f981 3527 if (next && (OP(PREVOPER(next)) == NOTHING)) /* LONGJMP */
c277df42 3528 next += ARG(next);
cb434fcc
IZ
3529 /* XXXX Probably it is better to teach regpush to support
3530 parenfloor > PL_regsize... */
eb160463 3531 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3532 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3533
d8319b27
DM
3534 st->u.curlyx.cp = PL_savestack_ix;
3535 st->u.curlyx.outercc = st->cc;
a0374537
DM
3536 st->cc = st;
3537 /* these fields contain the state of the current curly.
3538 * they are accessed by subsequent WHILEMs;
3539 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3540 st->u.curlyx.parenfloor = parenfloor;
3541 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3542 st->u.curlyx.min = ARG1(scan);
3543 st->u.curlyx.max = ARG2(scan);
3544 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3545 st->u.curlyx.lastloc = 0;
a0374537
DM
3546 /* st->next and st->minmod are also read by WHILEM */
3547
3280af22 3548 PL_reginput = locinput;
95b24440
DM
3549 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3550 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3551 regcpblow(st->u.curlyx.cp);
3552 st->cc = st->u.curlyx.outercc;
95b24440 3553 saySAME(result);
a0d0e21e 3554 }
5f66b61c 3555 /* NOTREACHED */
a0d0e21e
LW
3556 case WHILEM: {
3557 /*
3558 * This is really hard to understand, because after we match
3559 * what we're trying to match, we must make sure the rest of
2c2d71f5 3560 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3561 * to go back UP the parse tree by recursing ever deeper. And
3562 * if it fails, we have to reset our parent's current state
3563 * that we can try again after backing off.
3564 */
3565
d8319b27
DM
3566 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3567 st->u.whilem.cache_offset = 0;
3568 st->u.whilem.cache_bit = 0;
c277df42 3569
d8319b27 3570 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3571 PL_reginput = locinput;
a0d0e21e 3572
a3621e74 3573 DEBUG_EXECUTE_r(
9041c2e3 3574 PerlIO_printf(Perl_debug_log,
91f3b821 3575 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3576 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3577 (long)n, (long)st->cc->u.curlyx.min,
3578 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3579 );
4633a7c4 3580
a0d0e21e
LW
3581 /* If degenerate scan matches "", assume scan done. */
3582
d8319b27
DM
3583 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3584 st->u.whilem.savecc = st->cc;
3585 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3586 if (st->cc)
d8319b27 3587 st->ln = st->cc->u.curlyx.cur;
a3621e74 3588 DEBUG_EXECUTE_r(
c3464db5
DD
3589 PerlIO_printf(Perl_debug_log,
3590 "%*s empty match detected, try continuation...\n",
3280af22 3591 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3592 );
d8319b27 3593 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3594 /*** all unsaved local vars undefined at this point */
d8319b27 3595 st->cc = st->u.whilem.savecc;
95b24440 3596 if (result)
4633a7c4 3597 sayYES;
d8319b27
DM
3598 if (st->cc->u.curlyx.outercc)
3599 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3600 sayNO;
a0d0e21e
LW
3601 }
3602
3603 /* First just match a string of min scans. */
3604
d8319b27
DM
3605 if (n < st->cc->u.curlyx.min) {
3606 st->cc->u.curlyx.cur = n;
3607 st->cc->u.curlyx.lastloc = locinput;
3608 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3609 /*** all unsaved local vars undefined at this point */
3610 if (result)
4633a7c4 3611 sayYES;
d8319b27
DM
3612 st->cc->u.curlyx.cur = n - 1;
3613 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3614 sayNO;
a0d0e21e
LW
3615 }
3616
2c2d71f5
JH
3617 if (scan->flags) {
3618 /* Check whether we already were at this position.
3619 Postpone detection until we know the match is not
3620 *that* much linear. */
3621 if (!PL_reg_maxiter) {
3622 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3623 PL_reg_leftiter = PL_reg_maxiter;
3624 }
3625 if (PL_reg_leftiter-- == 0) {
a3b680e6 3626 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3627 if (PL_reg_poscache) {
eb160463 3628 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3629 Renew(PL_reg_poscache, size, char);
3630 PL_reg_poscache_size = size;
3631 }
3632 Zero(PL_reg_poscache, size, char);
3633 }
3634 else {
3635 PL_reg_poscache_size = size;
a02a5408 3636 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3637 }
a3621e74 3638 DEBUG_EXECUTE_r(
2c2d71f5
JH
3639 PerlIO_printf(Perl_debug_log,
3640 "%sDetected a super-linear match, switching on caching%s...\n",
3641 PL_colors[4], PL_colors[5])
3642 );
3643 }
3644 if (PL_reg_leftiter < 0) {
d8319b27 3645 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3646
d8319b27
DM
3647 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3648 + st->u.whilem.cache_offset * (scan->flags>>4);
3649 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3650 st->u.whilem.cache_offset /= 8;
3651 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3652 DEBUG_EXECUTE_r(
2c2d71f5
JH
3653 PerlIO_printf(Perl_debug_log,
3654 "%*s already tried at this position...\n",
3655 REPORT_CODE_OFF+PL_regindent*2, "")
3656 );
3ab3c9b4
HS
3657 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3658 /* cache records success */
c2b0868c
HS
3659 sayYES;
3660 else
3ab3c9b4 3661 /* cache records failure */
c2b0868c 3662 sayNO_SILENT;
2c2d71f5 3663 }
d8319b27 3664 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
2c2d71f5
JH
3665 }
3666 }
3667
a0d0e21e
LW
3668 /* Prefer next over scan for minimal matching. */
3669
5d9a96ca 3670 if (st->cc->minmod) {
d8319b27
DM
3671 st->u.whilem.savecc = st->cc;
3672 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3673 if (st->cc)
d8319b27
DM
3674 st->ln = st->cc->u.curlyx.cur;
3675 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3676 REGCP_SET(st->u.whilem.lastcp);
3677 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3678 /*** all unsaved local vars undefined at this point */
d8319b27 3679 st->cc = st->u.whilem.savecc;
95b24440 3680 if (result) {
d8319b27 3681 regcpblow(st->u.whilem.cp);
3ab3c9b4 3682 CACHEsayYES; /* All done. */
5f05dabc 3683 }
d8319b27 3684 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3685 regcppop(rex);
d8319b27
DM
3686 if (st->cc->u.curlyx.outercc)
3687 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3688
d8319b27 3689 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3690 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3691 && !(PL_reg_flags & RF_warned)) {
3692 PL_reg_flags |= RF_warned;
9014280d 3693 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3694 "Complex regular subexpression recursion",
3695 REG_INFTY - 1);
c277df42 3696 }
3ab3c9b4 3697 CACHEsayNO;
c277df42 3698 }
a687059c 3699
a3621e74 3700 DEBUG_EXECUTE_r(
c3464db5
DD
3701 PerlIO_printf(Perl_debug_log,
3702 "%*s trying longer...\n",
3280af22 3703 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3704 );
a0d0e21e 3705 /* Try scanning more and see if it helps. */
3280af22 3706 PL_reginput = locinput;
d8319b27
DM
3707 st->cc->u.curlyx.cur = n;
3708 st->cc->u.curlyx.lastloc = locinput;
3709 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3710 REGCP_SET(st->u.whilem.lastcp);
3711 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3712 /*** all unsaved local vars undefined at this point */
3713 if (result) {
d8319b27 3714 regcpblow(st->u.whilem.cp);
3ab3c9b4 3715 CACHEsayYES;
5f05dabc 3716 }
d8319b27 3717 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3718 regcppop(rex);
d8319b27
DM
3719 st->cc->u.curlyx.cur = n - 1;
3720 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3721 CACHEsayNO;
a0d0e21e
LW
3722 }
3723
3724 /* Prefer scan over next for maximal matching. */
3725
d8319b27
DM
3726 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3727 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3728 st->cc->u.curlyx.cur = n;
3729 st->cc->u.curlyx.lastloc = locinput;
3730 REGCP_SET(st->u.whilem.lastcp);
3731 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3732 /*** all unsaved local vars undefined at this point */
3733 if (result) {
d8319b27 3734 regcpblow(st->u.whilem.cp);
3ab3c9b4 3735 CACHEsayYES;
5f05dabc 3736 }
d8319b27 3737 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3738 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3739 PL_reginput = locinput;
a3621e74 3740 DEBUG_EXECUTE_r(
c3464db5
DD
3741 PerlIO_printf(Perl_debug_log,
3742 "%*s failed, try continuation...\n",
3280af22 3743 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3744 );
3745 }
9041c2e3 3746 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3747 && !(PL_reg_flags & RF_warned)) {
3280af22 3748 PL_reg_flags |= RF_warned;
9014280d 3749 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3750 "Complex regular subexpression recursion",
3751 REG_INFTY - 1);
a0d0e21e
LW
3752 }
3753
3754 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3755 st->u.whilem.savecc = st->cc;
3756 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3757 if (st->cc)
d8319b27
DM
3758 st->ln = st->cc->u.curlyx.cur;
3759 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3760 /*** all unsaved local vars undefined at this point */
d8319b27 3761 st->cc = st->u.whilem.savecc;
95b24440 3762 if (result)
3ab3c9b4 3763 CACHEsayYES;
d8319b27
DM
3764 if (st->cc->u.curlyx.outercc)
3765 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3766 st->cc->u.curlyx.cur = n - 1;
3767 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3768 CACHEsayNO;
a0d0e21e 3769 }
5f66b61c 3770 /* NOTREACHED */
9041c2e3 3771 case BRANCHJ:
c277df42
IZ
3772 next = scan + ARG(scan);
3773 if (next == scan)
3774 next = NULL;
3775 inner = NEXTOPER(NEXTOPER(scan));
3776 goto do_branch;
9041c2e3 3777 case BRANCH:
c277df42
IZ
3778 inner = NEXTOPER(scan);
3779 do_branch:
3780 {
e822a8b4
DM
3781 I32 type;
3782 type = OP(scan);
ae5031b3 3783 if (!next || OP(next) != type) /* No choice. */
c277df42 3784 next = inner; /* Avoid recursion. */
a0d0e21e 3785 else {
a3b680e6 3786 const I32 lastparen = *PL_reglastparen;
02db2b7b 3787 /* Put unwinding data on stack */
6136c704
AL
3788 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3789 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3790
5d9a96ca
DM
3791 uw->prev = st->unwind;
3792 st->unwind = unwind1;
e822a8b4 3793 uw->type = ((type == BRANCH)
02db2b7b
IZ
3794 ? RE_UNWIND_BRANCH
3795 : RE_UNWIND_BRANCHJ);
3796 uw->lastparen = lastparen;
3797 uw->next = next;
3798 uw->locinput = locinput;
3799 uw->nextchr = nextchr;
3a2830be 3800 uw->minmod = st->minmod;
02db2b7b
IZ
3801#ifdef DEBUGGING
3802 uw->regindent = ++PL_regindent;
3803#endif
c277df42 3804
02db2b7b
IZ
3805 REGCP_SET(uw->lastcp);
3806
3807 /* Now go into the first branch */
3808 next = inner;
a687059c 3809 }
a0d0e21e
LW
3810 }
3811 break;
3812 case MINMOD:
5d9a96ca 3813 st->minmod = 1;
a0d0e21e 3814 break;
c277df42
IZ
3815 case CURLYM:
3816 {
d8319b27 3817 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3818
c277df42 3819 /* We suppose that the next guy does not need
0e788c72 3820 backtracking: in particular, it is of constant non-zero length,
c277df42 3821 and has no parenths to influence future backrefs. */
5d9a96ca 3822 st->ln = ARG1(scan); /* min to match */
c277df42 3823 n = ARG2(scan); /* max to match */
d8319b27
DM
3824 st->u.curlym.paren = scan->flags;
3825 if (st->u.curlym.paren) {
3826 if (st->u.curlym.paren > PL_regsize)
3827 PL_regsize = st->u.curlym.paren;
3828 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3829 *PL_reglastparen = st->u.curlym.paren;
c277df42 3830 }
dc45a647 3831 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3832 if (st->u.curlym.paren)
c277df42 3833 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3834 PL_reginput = locinput;
d8319b27
DM
3835 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
3836 if (st->u.curlym.maxwanted) {
3837 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
95b24440
DM
3838 REGMATCH(scan, CURLYM1);
3839 /*** all unsaved local vars undefined at this point */
3840 if (!result)
6407bf3b 3841 break;
d8319b27
DM
3842 /* on first match, determine length, u.curlym.l */
3843 if (!st->u.curlym.matches++) {
6407bf3b
DM
3844 if (PL_reg_match_utf8) {
3845 char *s = locinput;
3846 while (s < PL_reginput) {
d8319b27 3847 st->u.curlym.l++;
6407bf3b
DM
3848 s += UTF8SKIP(s);
3849 }
3850 }
3851 else {
d8319b27 3852 st->u.curlym.l = PL_reginput - locinput;
6407bf3b 3853 }
d8319b27
DM
3854 if (st->u.curlym.l == 0) {
3855 st->u.curlym.matches = st->u.curlym.maxwanted;
6407bf3b
DM
3856 break;
3857 }
3858 }
3859 locinput = PL_reginput;
3860 }
3861 }
3862
3863 PL_reginput = locinput;
3864
5d9a96ca
DM
3865 if (st->minmod) {
3866 st->minmod = 0;
d8319b27 3867 if (st->ln && st->u.curlym.matches < st->ln)
c277df42 3868 sayNO;
cca55fe3 3869 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3870 regnode *text_node = next;
3871
cca55fe3 3872 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3873
d8319b27 3874 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3875 else {
cca55fe3 3876 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3877 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3878 goto assume_ok_MM;
cca55fe3 3879 }
d8319b27 3880 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
af5decee 3881 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3882 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3883 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3884 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3885 else
d8319b27 3886 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3887 }
a0ed51b3
LW
3888 }
3889 else
d8319b27 3890 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
cca55fe3 3891 assume_ok_MM:
d8319b27 3892 REGCP_SET(st->u.curlym.lastcp);
5d9a96ca 3893 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3894 /* If it could work, try it. */
d8319b27
DM
3895 if (st->u.curlym.c1 == -1000 ||
3896 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3897 UCHARAT(PL_reginput) == st->u.curlym.c2)
c277df42 3898 {
d8319b27 3899 if (st->u.curlym.paren) {
5d9a96ca 3900 if (st->ln) {
d8319b27
DM
3901 PL_regstartp[st->u.curlym.paren] =
3902 HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3903 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3904 }
3905 else
d8319b27 3906 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3907 }
95b24440
DM
3908 REGMATCH(next, CURLYM2);
3909 /*** all unsaved local vars undefined at this point */
3910 if (result)
c277df42 3911 sayYES;
d8319b27 3912 REGCP_UNWIND(st->u.curlym.lastcp);
c277df42
IZ
3913 }
3914 /* Couldn't or didn't -- move forward. */
3280af22 3915 PL_reginput = locinput;
95b24440
DM
3916 REGMATCH(scan, CURLYM3);
3917 /*** all unsaved local vars undefined at this point */
3918 if (result) {
5d9a96ca 3919 st->ln++;
3280af22 3920 locinput = PL_reginput;
c277df42
IZ
3921 }
3922 else
3923 sayNO;
3924 }
a0ed51b3
LW
3925 }
3926 else {
a3621e74 3927 DEBUG_EXECUTE_r(
5c0ca799 3928 PerlIO_printf(Perl_debug_log,
95b24440
DM
3929 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3930 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
d8319b27 3931 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
c277df42 3932 );
d8319b27 3933 if (st->u.curlym.matches >= st->ln) {
cca55fe3 3934 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3935 regnode *text_node = next;
3936
cca55fe3 3937 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3938
d8319b27 3939 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3940 else {
cca55fe3 3941 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3942 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3943 goto assume_ok_REG;
cca55fe3 3944 }
d8319b27 3945 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3946
af5decee 3947 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3948 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3949 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3950 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3951 else
d8319b27 3952 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3953 }
a0ed51b3
LW
3954 }
3955 else
d8319b27 3956 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
c277df42 3957 }
cca55fe3 3958 assume_ok_REG:
d8319b27
DM
3959 REGCP_SET(st->u.curlym.lastcp);
3960 while (st->u.curlym.matches >= st->ln) {
c277df42 3961 /* If it could work, try it. */
d8319b27
DM
3962 if (st->u.curlym.c1 == -1000 ||
3963 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3964 UCHARAT(PL_reginput) == st->u.curlym.c2)
a0ed51b3 3965 {
a3621e74 3966 DEBUG_EXECUTE_r(
6407bf3b
DM
3967 PerlIO_printf(Perl_debug_log,
3968 "%*s trying tail with matches=%"IVdf"...\n",
3969 (int)(REPORT_CODE_OFF+PL_regindent*2),
d8319b27 3970 "", (IV)st->u.curlym.matches)
a0ed51b3 3971 );
d8319b27
DM
3972 if (st->u.curlym.paren) {
3973 if (st->u.curlym.matches) {
3974 PL_regstartp[st->u.curlym.paren]
3975 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3976 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
c277df42 3977 }
a0ed51b3 3978 else
d8319b27 3979 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3980 }
95b24440
DM
3981 REGMATCH(next, CURLYM4);
3982 /*** all unsaved local vars undefined at this point */
3983 if (result)
a0ed51b3 3984 sayYES;
d8319b27 3985 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3986 }
c277df42 3987 /* Couldn't or didn't -- back up. */
d8319b27
DM
3988 st->u.curlym.matches--;
3989 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3990 PL_reginput = locinput;
c277df42
IZ
3991 }
3992 }
3993 sayNO;
5f66b61c 3994 /* NOTREACHED */
c277df42
IZ
3995 break;
3996 }
3997 case CURLYN:
d8319b27
DM
3998 st->u.plus.paren = scan->flags; /* Which paren to set */
3999 if (st->u.plus.paren > PL_regsize)
4000 PL_regsize = st->u.plus.paren;
4001 if (st->u.plus.paren > (I32)*PL_reglastparen)
4002 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4003 st->ln = ARG1(scan); /* min to match */
c277df42 4004 n = ARG2(scan); /* max to match */
dc45a647 4005 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4006 goto repeat;
a0d0e21e 4007 case CURLY:
d8319b27 4008 st->u.plus.paren = 0;
5d9a96ca 4009 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4010 n = ARG2(scan); /* max to match */
dc45a647 4011 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4012 goto repeat;
4013 case STAR:
5d9a96ca 4014 st->ln = 0;
c277df42 4015 n = REG_INFTY;
a0d0e21e 4016 scan = NEXTOPER(scan);
d8319b27 4017 st->u.plus.paren = 0;
a0d0e21e
LW
4018 goto repeat;
4019 case PLUS:
5d9a96ca 4020 st->ln = 1;
c277df42
IZ
4021 n = REG_INFTY;
4022 scan = NEXTOPER(scan);
d8319b27 4023 st->u.plus.paren = 0;
c277df42 4024 repeat:
a0d0e21e
LW
4025 /*
4026 * Lookahead to avoid useless match attempts
4027 * when we know what character comes next.
4028 */
5f80c4cf
JP
4029
4030 /*
4031 * Used to only do .*x and .*?x, but now it allows
4032 * for )'s, ('s and (?{ ... })'s to be in the way
4033 * of the quantifier and the EXACT-like node. -- japhy
4034 */
4035
cca55fe3 4036 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4037 U8 *s;
4038 regnode *text_node = next;
4039
cca55fe3 4040 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4041
d8319b27 4042 if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
5f80c4cf 4043 else {
cca55fe3 4044 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 4045 st->u.plus.c1 = st->u.plus.c2 = -1000;
44a68960 4046 goto assume_ok_easy;
cca55fe3
JP
4047 }
4048 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4049
4050 if (!UTF) {
d8319b27 4051 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4052 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4053 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4054 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4055 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4056 }
5f80c4cf 4057 else { /* UTF */
f65d3ee7 4058 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4059 STRLEN ulen1, ulen2;
89ebb4a3
JH
4060 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4061 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4062
4063 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4064 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4065
d8319b27 4066 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4067 uniflags);
d8319b27 4068 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4069 uniflags);
5f80c4cf
JP
4070 }
4071 else {
d8319b27 4072 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4073 uniflags);
5f80c4cf 4074 }
1aa99e6b
IH
4075 }
4076 }
bbce6d69 4077 }
a0d0e21e 4078 else
d8319b27 4079 st->u.plus.c1 = st->u.plus.c2 = -1000;
cca55fe3 4080 assume_ok_easy:
3280af22 4081 PL_reginput = locinput;
5d9a96ca
DM
4082 if (st->minmod) {
4083 st->minmod = 0;
32fc9b6a 4084 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4085 sayNO;
a0ed51b3 4086 locinput = PL_reginput;
d8319b27
DM
4087 REGCP_SET(st->u.plus.lastcp);
4088 if (st->u.plus.c1 != -1000) {
4089 st->u.plus.old = locinput;
4090 st->u.plus.count = 0;
0fe9bf95 4091
1aa99e6b 4092 if (n == REG_INFTY) {
d8319b27 4093 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4094 if (do_utf8)
d8319b27
DM
4095 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4096 st->u.plus.e--;
1aa99e6b
IH
4097 }
4098 else if (do_utf8) {
5d9a96ca 4099 int m = n - st->ln;
d8319b27
DM
4100 for (st->u.plus.e = locinput;
4101 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4102 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4103 }
4104 else {
d8319b27
DM
4105 st->u.plus.e = locinput + n - st->ln;
4106 if (st->u.plus.e >= PL_regeol)
4107 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4108 }
0fe9bf95
IZ
4109 while (1) {
4110 /* Find place 'next' could work */
1aa99e6b 4111 if (!do_utf8) {
d8319b27
DM
4112 if (st->u.plus.c1 == st->u.plus.c2) {
4113 while (locinput <= st->u.plus.e &&
4114 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4115 locinput++;
4116 } else {
d8319b27
DM
4117 while (locinput <= st->u.plus.e
4118 && UCHARAT(locinput) != st->u.plus.c1
4119 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4120 locinput++;
4121 }
d8319b27 4122 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4123 }
4124 else {
d8319b27 4125 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4126 STRLEN len;
872c91ae
JH
4127 /* count initialised to
4128 * utf8_distance(old, locinput) */
d8319b27 4129 while (locinput <= st->u.plus.e &&
872c91ae 4130 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4131 UTF8_MAXBYTES, &len,
d8319b27 4132 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4133 locinput += len;
d8319b27 4134 st->u.plus.count++;
b2f2f093 4135 }
1aa99e6b 4136 } else {
a3b680e6 4137 STRLEN len;
872c91ae
JH
4138 /* count initialised to
4139 * utf8_distance(old, locinput) */
d8319b27 4140 while (locinput <= st->u.plus.e) {
872c91ae 4141 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4142 UTF8_MAXBYTES, &len,
041457d9 4143 uniflags);
d8319b27 4144 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4145 break;
b2f2f093 4146 locinput += len;
d8319b27 4147 st->u.plus.count++;
1aa99e6b
IH
4148 }
4149 }
0fe9bf95 4150 }
d8319b27 4151 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4152 sayNO;
4153 /* PL_reginput == old now */
d8319b27 4154 if (locinput != st->u.plus.old) {
5d9a96ca 4155 st->ln = 1; /* Did some */
32fc9b6a 4156 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4157 sayNO;
4158 }
4159 /* PL_reginput == locinput now */
d8319b27 4160 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4161 /*** all unsaved local vars undefined at this point */
0fe9bf95 4162 PL_reginput = locinput; /* Could be reset... */
d8319b27 4163 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4164 /* Couldn't or didn't -- move forward. */
d8319b27 4165 st->u.plus.old = locinput;
1aa99e6b
IH
4166 if (do_utf8)
4167 locinput += UTF8SKIP(locinput);
4168 else
4169 locinput++;
d8319b27 4170 st->u.plus.count = 1;
0fe9bf95
IZ
4171 }
4172 }
4173 else
5d9a96ca 4174 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4175 UV c;
d8319b27 4176 if (st->u.plus.c1 != -1000) {
1aa99e6b 4177 if (do_utf8)
872c91ae 4178 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4179 UTF8_MAXBYTES, 0,
041457d9 4180 uniflags);
1aa99e6b 4181 else
9041c2e3 4182 c = UCHARAT(PL_reginput);
2390ecbc 4183 /* If it could work, try it. */
d8319b27 4184 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4185 {
d8319b27 4186 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4187 /*** all unsaved local vars undefined at this point */
d8319b27 4188 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4189 }
1aa99e6b 4190 }
a0d0e21e 4191 /* If it could work, try it. */
d8319b27 4192 else if (st->u.plus.c1 == -1000)
bbce6d69 4193 {
d8319b27 4194 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4195 /*** all unsaved local vars undefined at this point */
d8319b27 4196 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4197 }
c277df42 4198 /* Couldn't or didn't -- move forward. */
a0ed51b3 4199 PL_reginput = locinput;
32fc9b6a 4200 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4201 st->ln++;
a0ed51b3
LW
4202 locinput = PL_reginput;
4203 }
4204 else
4633a7c4 4205 sayNO;
a0d0e21e
LW
4206 }
4207 }
4208 else {
32fc9b6a 4209 n = regrepeat(rex, scan, n);
a0ed51b3 4210 locinput = PL_reginput;
5d9a96ca 4211 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4212 (OP(next) != MEOL ||
15272685
HS
4213 OP(next) == SEOL || OP(next) == EOS))
4214 {
5d9a96ca 4215 st->ln = n; /* why back off? */
1aeab75a
GS
4216 /* ...because $ and \Z can match before *and* after
4217 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4218 We should back off by one in this case. */
4219 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4220 st->ln--;
1aeab75a 4221 }
d8319b27 4222 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4223 {
8fa7f367 4224 UV c = 0;
5d9a96ca 4225 while (n >= st->ln) {
d8319b27 4226 if (st->u.plus.c1 != -1000) {
1aa99e6b 4227 if (do_utf8)
872c91ae 4228 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4229 UTF8_MAXBYTES, 0,
041457d9 4230 uniflags);
1aa99e6b 4231 else
9041c2e3 4232 c = UCHARAT(PL_reginput);
1aa99e6b 4233 }
c277df42 4234 /* If it could work, try it. */
d8319b27 4235 if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4236 {
d8319b27 4237 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4238 /*** all unsaved local vars undefined at this point */
d8319b27 4239 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4240 }
4241 /* Couldn't or didn't -- back up. */
4242 n--;
dfe13c55 4243 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4244 }
a0d0e21e
LW
4245 }
4246 }
4633a7c4 4247 sayNO;
c277df42 4248 break;
a0d0e21e 4249 case END:
aa283a38
DM
4250 if (cur_eval) {
4251 /* we have successfully completed the execution of a
4252 * postponed re. Pop all states back to the last EVAL
4253 * then continue with the node following the (??{...})
4254 */
4255
4256 /* this simulates a POP_STATE, except that it pops several
4257 * levels, and doesn't restore locinput */
6136c704 4258
aa283a38
DM
4259 st = cur_eval;
4260 PL_regmatch_slab = st->u.eval.prev_slab;
4261 cur_eval = st->u.eval.prev_eval;
4262 depth = st->u.eval.depth;
6136c704 4263
aa283a38
DM
4264 PL_regmatch_state = st;
4265 scan = st->scan;
4266 next = st->next;
4267 n = st->n;
4268
4269 if (st->u.eval.toggleutf)
4270 PL_reg_flags ^= RF_utf8;
4271 ReREFCNT_dec(rex);
4272 rex = st->u.eval.prev_rex;
aa283a38
DM
4273 /* XXXX This is too dramatic a measure... */
4274 PL_reg_maxiter = 0;
4275
4276 /* Restore parens of the caller without popping the
4277 * savestack */
95b24440
DM
4278 {
4279 I32 tmp = PL_savestack_ix;
aa283a38 4280 PL_savestack_ix = st->u.eval.lastcp;
4f639d21 4281 regcppop(rex);
95b24440
DM
4282 PL_savestack_ix = tmp;
4283 }
6136c704 4284
6136c704 4285
aa283a38
DM
4286 PL_reginput = locinput;
4287 /* resume at node following the (??{...}) */
4288 break;
0f5d15d6 4289
0f5d15d6 4290 }
aa283a38 4291
7821416a 4292 if (locinput < PL_regtill) {
a3621e74 4293 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4294 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4295 PL_colors[4],
4296 (long)(locinput - PL_reg_starttry),
4297 (long)(PL_regtill - PL_reg_starttry),
4298 PL_colors[5]));
4299 sayNO_FINAL; /* Cannot match: too short. */
4300 }
4301 PL_reginput = locinput; /* put where regtry can find it */
4302 sayYES_FINAL; /* Success! */
7e5428c5 4303 case SUCCEED:
3280af22 4304 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4305 sayYES_LOUD; /* Success! */
c277df42
IZ
4306 case SUSPEND:
4307 n = 1;
9fe1d20c 4308 PL_reginput = locinput;
9041c2e3 4309 goto do_ifmatch;
a0d0e21e 4310 case UNLESSM:
c277df42 4311 n = 0;
a0ed51b3 4312 if (scan->flags) {
52657f30 4313 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4314 if (!s)
4315 goto say_yes;
4316 PL_reginput = s;
a0ed51b3
LW
4317 }
4318 else
4319 PL_reginput = locinput;
c277df42
IZ
4320 goto do_ifmatch;
4321 case IFMATCH:
4322 n = 1;
a0ed51b3 4323 if (scan->flags) {
52657f30 4324 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4325 if (!s)
4326 goto say_no;
4327 PL_reginput = s;
a0ed51b3
LW
4328 }
4329 else
4330 PL_reginput = locinput;
4331
c277df42 4332 do_ifmatch:
95b24440
DM
4333 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4334 /*** all unsaved local vars undefined at this point */
4335 if (result != n) {
c277df42 4336 say_no:
5d9a96ca
DM
4337 if (st->logical) {
4338 st->logical = 0;
4339 st->sw = 0;
c277df42 4340 goto do_longjump;
a0ed51b3
LW
4341 }
4342 else
c277df42
IZ
4343 sayNO;
4344 }
4345 say_yes:
5d9a96ca
DM
4346 if (st->logical) {
4347 st->logical = 0;
4348 st->sw = 1;
c277df42 4349 }
fe44a5e8 4350 if (OP(scan) == SUSPEND) {
3280af22 4351 locinput = PL_reginput;
565764a8 4352 nextchr = UCHARAT(locinput);
fe44a5e8 4353 }
c277df42
IZ
4354 /* FALL THROUGH. */
4355 case LONGJMP:
4356 do_longjump:
4357 next = scan + ARG(scan);
4358 if (next == scan)
4359 next = NULL;
a0d0e21e
LW
4360 break;
4361 default:
b900a521 4362 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4363 PTR2UV(scan), OP(scan));
cea2e8a9 4364 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4365 }
95b24440 4366
02db2b7b 4367 reenter:
a0d0e21e 4368 scan = next;
95b24440
DM
4369 continue;
4370 /* NOTREACHED */
4371
4372 /* simulate recursively calling regmatch(), but without actually
4373 * recursing - ie save the current state on the heap rather than on
4374 * the stack, then re-enter the loop. This avoids complex regexes
4375 * blowing the processor stack */
4376
4377 start_recurse:
4378 {
5d9a96ca
DM
4379 /* push new state */
4380 regmatch_state *oldst = st;
4381
4382 depth++;
4383
4384 /* grab the next free state slot */
4385 st++;
4386 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4387 st = S_push_slab(aTHX);
4388 PL_regmatch_state = st;
4389
4390 oldst->next = next;
4391 oldst->n = n;
4392 oldst->locinput = locinput;
5d9a96ca
DM
4393
4394 st->cc = oldst->cc;
95b24440
DM
4395 locinput = PL_reginput;
4396 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4397 st->minmod = 0;
4398 st->sw = 0;
4399 st->logical = 0;
4400 st->unwind = 0;
95b24440
DM
4401#ifdef DEBUGGING
4402 PL_regindent++;
4403#endif
4404 }
a0d0e21e 4405 }
a687059c 4406
aa283a38
DM
4407
4408
a0d0e21e
LW
4409 /*
4410 * We get here only if there's trouble -- normally "case END" is
4411 * the terminating point.
4412 */
cea2e8a9 4413 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4414 /*NOTREACHED*/
4633a7c4
LW
4415 sayNO;
4416
7821416a 4417yes_loud:
a3621e74 4418 DEBUG_EXECUTE_r(
7821416a
IZ
4419 PerlIO_printf(Perl_debug_log,
4420 "%*s %scould match...%s\n",
e4584336 4421 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4422 );
4423 goto yes;
4424yes_final:
a3621e74 4425 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4426 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4427yes:
4428#ifdef DEBUGGING
3280af22 4429 PL_regindent--;
4633a7c4 4430#endif
02db2b7b 4431
95b24440 4432 result = 1;
aa283a38
DM
4433 /* XXX this is duplicate(ish) code to that in the do_no section.
4434 * eventually a yes should just pop the whole stack */
4435 if (depth) {
4436 /* restore previous state and re-enter */
4437 POP_STATE;
4438
4439 switch (st->resume_state) {
4440 case resume_TRIE1:
4441 goto resume_point_TRIE1;
4442 case resume_TRIE2:
4443 goto resume_point_TRIE2;
4444 case resume_EVAL:
4445 break;
4446 case resume_CURLYX:
4447 goto resume_point_CURLYX;
4448 case resume_WHILEM1:
4449 goto resume_point_WHILEM1;
4450 case resume_WHILEM2:
4451 goto resume_point_WHILEM2;
4452 case resume_WHILEM3:
4453 goto resume_point_WHILEM3;
4454 case resume_WHILEM4:
4455 goto resume_point_WHILEM4;
4456 case resume_WHILEM5:
4457 goto resume_point_WHILEM5;
4458 case resume_WHILEM6:
4459 goto resume_point_WHILEM6;
4460 case resume_CURLYM1:
4461 goto resume_point_CURLYM1;
4462 case resume_CURLYM2:
4463 goto resume_point_CURLYM2;
4464 case resume_CURLYM3:
4465 goto resume_point_CURLYM3;
4466 case resume_CURLYM4:
4467 goto resume_point_CURLYM4;
4468 case resume_IFMATCH:
4469 goto resume_point_IFMATCH;
4470 case resume_PLUS1:
4471 goto resume_point_PLUS1;
4472 case resume_PLUS2:
4473 goto resume_point_PLUS2;
4474 case resume_PLUS3:
4475 goto resume_point_PLUS3;
4476 case resume_PLUS4:
4477 goto resume_point_PLUS4;
4478 default:
4479 Perl_croak(aTHX_ "regexp resume memory corruption");
4480 }
4481 }
4482 goto final_exit;
4633a7c4
LW
4483
4484no:
a3621e74 4485 DEBUG_EXECUTE_r(
7821416a
IZ
4486 PerlIO_printf(Perl_debug_log,
4487 "%*s %sfailed...%s\n",
e4584336 4488 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4489 );
4490 goto do_no;
4491no_final:
4492do_no:
5d9a96ca
DM
4493 if (st->unwind) {
4494 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4495
4496 switch (uw->type) {
4497 case RE_UNWIND_BRANCH:
4498 case RE_UNWIND_BRANCHJ:
4499 {
6136c704 4500 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4501 const I32 lastparen = uwb->lastparen;
9041c2e3 4502
02db2b7b
IZ
4503 REGCP_UNWIND(uwb->lastcp);
4504 for (n = *PL_reglastparen; n > lastparen; n--)
4505 PL_regendp[n] = -1;
4506 *PL_reglastparen = n;
4507 scan = next = uwb->next;
3a2830be 4508 st->minmod = uwb->minmod;
9041c2e3
NIS
4509 if ( !scan ||
4510 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4511 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4512 st->unwind = uwb->prev;
02db2b7b
IZ
4513#ifdef DEBUGGING
4514 PL_regindent--;
4515#endif
4516 goto do_no;
4517 }
4518 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4519 if ((n = (uwb->type == RE_UNWIND_BRANCH
4520 ? NEXT_OFF(next) : ARG(next))))
4521 next += n;
4522 else
4523 next = NULL; /* XXXX Needn't unwinding in this case... */
4524 uwb->next = next;
4525 next = NEXTOPER(scan);
4526 if (uwb->type == RE_UNWIND_BRANCHJ)
4527 next = NEXTOPER(next);
4528 locinput = uwb->locinput;
4529 nextchr = uwb->nextchr;
4530#ifdef DEBUGGING
4531 PL_regindent = uwb->regindent;
4532#endif
4533
4534 goto reenter;
4535 }
5f66b61c 4536 /* NOTREACHED */
02db2b7b
IZ
4537 default:
4538 Perl_croak(aTHX_ "regexp unwind memory corruption");
4539 }
5f66b61c 4540 /* NOTREACHED */
02db2b7b 4541 }
aa283a38 4542
4633a7c4 4543#ifdef DEBUGGING
3280af22 4544 PL_regindent--;
4633a7c4 4545#endif
95b24440 4546 result = 0;
5d9a96ca 4547
aa283a38
DM
4548 if (depth) {
4549 /* there's a previous state to backtrack to */
4550 POP_STATE;
5d9a96ca 4551 switch (st->resume_state) {
95b24440
DM
4552 case resume_TRIE1:
4553 goto resume_point_TRIE1;
4554 case resume_TRIE2:
4555 goto resume_point_TRIE2;
aa283a38
DM
4556 case resume_EVAL:
4557 /* we have failed an (??{...}). Restore state to the outer re
4558 * then re-throw the failure */
4559 if (st->u.eval.toggleutf)
4560 PL_reg_flags ^= RF_utf8;
4561 ReREFCNT_dec(rex);
4562 rex = st->u.eval.prev_rex;
aa283a38
DM
4563 cur_eval = st->u.eval.prev_eval;
4564
4565 /* XXXX This is too dramatic a measure... */
4566 PL_reg_maxiter = 0;
4567
4568 PL_reginput = locinput;
4569 REGCP_UNWIND(st->u.eval.lastcp);
4570 regcppop(rex);
4571 goto do_no;
4572
95b24440
DM
4573 case resume_CURLYX:
4574 goto resume_point_CURLYX;
4575 case resume_WHILEM1:
4576 goto resume_point_WHILEM1;
4577 case resume_WHILEM2:
4578 goto resume_point_WHILEM2;
4579 case resume_WHILEM3:
4580 goto resume_point_WHILEM3;
4581 case resume_WHILEM4:
4582 goto resume_point_WHILEM4;
4583 case resume_WHILEM5:
4584 goto resume_point_WHILEM5;
4585 case resume_WHILEM6:
4586 goto resume_point_WHILEM6;
4587 case resume_CURLYM1:
4588 goto resume_point_CURLYM1;
4589 case resume_CURLYM2:
4590 goto resume_point_CURLYM2;
4591 case resume_CURLYM3:
4592 goto resume_point_CURLYM3;
4593 case resume_CURLYM4:
4594 goto resume_point_CURLYM4;
4595 case resume_IFMATCH:
4596 goto resume_point_IFMATCH;
4597 case resume_PLUS1:
4598 goto resume_point_PLUS1;
4599 case resume_PLUS2:
4600 goto resume_point_PLUS2;
4601 case resume_PLUS3:
4602 goto resume_point_PLUS3;
4603 case resume_PLUS4:
4604 goto resume_point_PLUS4;
95b24440
DM
4605 default:
4606 Perl_croak(aTHX_ "regexp resume memory corruption");
4607 }
95b24440 4608 }
aa283a38
DM
4609
4610final_exit:
4611
5d9a96ca
DM
4612 /* restore original high-water mark */
4613 PL_regmatch_slab = orig_slab;
4614 PL_regmatch_state = orig_state;
4615
4616 /* free all slabs above current one */
4617 if (orig_slab->next) {
ad65c075 4618 regmatch_slab *osl, *sl = orig_slab->next;
5d9a96ca
DM
4619 orig_slab->next = NULL;
4620 while (sl) {
ad65c075 4621 osl = sl;
5d9a96ca 4622 sl = sl->next;
ad65c075 4623 Safefree(osl);
5d9a96ca
DM
4624 }
4625 }
4626
95b24440
DM
4627 return result;
4628
a687059c
LW
4629}
4630
4631/*
4632 - regrepeat - repeatedly match something simple, report how many
4633 */
4634/*
4635 * [This routine now assumes that it will only match on things of length 1.
4636 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4637 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4638 */
76e3520e 4639STATIC I32
32fc9b6a 4640S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4641{
27da23d5 4642 dVAR;
a0d0e21e 4643 register char *scan;
a0d0e21e 4644 register I32 c;
3280af22 4645 register char *loceol = PL_regeol;
a0ed51b3 4646 register I32 hardcount = 0;
53c4c00c 4647 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4648
3280af22 4649 scan = PL_reginput;
faf11cac
HS
4650 if (max == REG_INFTY)
4651 max = I32_MAX;
4652 else if (max < loceol - scan)
7f596f4c 4653 loceol = scan + max;
a0d0e21e 4654 switch (OP(p)) {
22c35a8c 4655 case REG_ANY:
1aa99e6b 4656 if (do_utf8) {
ffc61ed2 4657 loceol = PL_regeol;
1aa99e6b 4658 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4659 scan += UTF8SKIP(scan);
4660 hardcount++;
4661 }
4662 } else {
4663 while (scan < loceol && *scan != '\n')
4664 scan++;
a0ed51b3
LW
4665 }
4666 break;
ffc61ed2 4667 case SANY:
def8e4ea
JH
4668 if (do_utf8) {
4669 loceol = PL_regeol;
a0804c9e 4670 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4671 scan += UTF8SKIP(scan);
4672 hardcount++;
4673 }
4674 }
4675 else
4676 scan = loceol;
a0ed51b3 4677 break;
f33976b4
DB
4678 case CANY:
4679 scan = loceol;
4680 break;
090f7165
JH
4681 case EXACT: /* length of string is 1 */
4682 c = (U8)*STRING(p);
4683 while (scan < loceol && UCHARAT(scan) == c)
4684 scan++;
bbce6d69 4685 break;
4686 case EXACTF: /* length of string is 1 */
cd439c50 4687 c = (U8)*STRING(p);
bbce6d69 4688 while (scan < loceol &&
22c35a8c 4689 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4690 scan++;
4691 break;
4692 case EXACTFL: /* length of string is 1 */
3280af22 4693 PL_reg_flags |= RF_tainted;
cd439c50 4694 c = (U8)*STRING(p);
bbce6d69 4695 while (scan < loceol &&
22c35a8c 4696 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4697 scan++;
4698 break;
4699 case ANYOF:
ffc61ed2
JH
4700 if (do_utf8) {
4701 loceol = PL_regeol;
cfc92286 4702 while (hardcount < max && scan < loceol &&
32fc9b6a 4703 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4704 scan += UTF8SKIP(scan);
4705 hardcount++;
4706 }
4707 } else {
32fc9b6a 4708 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4709 scan++;
4710 }
a0d0e21e
LW
4711 break;
4712 case ALNUM:
1aa99e6b 4713 if (do_utf8) {
ffc61ed2 4714 loceol = PL_regeol;
1a4fad37 4715 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4716 while (hardcount < max && scan < loceol &&
3568d838 4717 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4718 scan += UTF8SKIP(scan);
4719 hardcount++;
4720 }
4721 } else {
4722 while (scan < loceol && isALNUM(*scan))
4723 scan++;
a0ed51b3
LW
4724 }
4725 break;
bbce6d69 4726 case ALNUML:
3280af22 4727 PL_reg_flags |= RF_tainted;
1aa99e6b 4728 if (do_utf8) {
ffc61ed2 4729 loceol = PL_regeol;
1aa99e6b
IH
4730 while (hardcount < max && scan < loceol &&
4731 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4732 scan += UTF8SKIP(scan);
4733 hardcount++;
4734 }
4735 } else {
4736 while (scan < loceol && isALNUM_LC(*scan))
4737 scan++;
a0ed51b3
LW
4738 }
4739 break;
a0d0e21e 4740 case NALNUM:
1aa99e6b 4741 if (do_utf8) {
ffc61ed2 4742 loceol = PL_regeol;
1a4fad37 4743 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4744 while (hardcount < max && scan < loceol &&
3568d838 4745 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4746 scan += UTF8SKIP(scan);
4747 hardcount++;
4748 }
4749 } else {
4750 while (scan < loceol && !isALNUM(*scan))
4751 scan++;
a0ed51b3
LW
4752 }
4753 break;
bbce6d69 4754 case NALNUML:
3280af22 4755 PL_reg_flags |= RF_tainted;
1aa99e6b 4756 if (do_utf8) {
ffc61ed2 4757 loceol = PL_regeol;
1aa99e6b
IH
4758 while (hardcount < max && scan < loceol &&
4759 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4760 scan += UTF8SKIP(scan);
4761 hardcount++;
4762 }
4763 } else {
4764 while (scan < loceol && !isALNUM_LC(*scan))
4765 scan++;
a0ed51b3
LW
4766 }
4767 break;
a0d0e21e 4768 case SPACE:
1aa99e6b 4769 if (do_utf8) {
ffc61ed2 4770 loceol = PL_regeol;
1a4fad37 4771 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4772 while (hardcount < max && scan < loceol &&
3568d838
JH
4773 (*scan == ' ' ||
4774 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4775 scan += UTF8SKIP(scan);
4776 hardcount++;
4777 }
4778 } else {
4779 while (scan < loceol && isSPACE(*scan))
4780 scan++;
a0ed51b3
LW
4781 }
4782 break;
bbce6d69 4783 case SPACEL:
3280af22 4784 PL_reg_flags |= RF_tainted;
1aa99e6b 4785 if (do_utf8) {
ffc61ed2 4786 loceol = PL_regeol;
1aa99e6b 4787 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4788 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4789 scan += UTF8SKIP(scan);
4790 hardcount++;
4791 }
4792 } else {
4793 while (scan < loceol && isSPACE_LC(*scan))
4794 scan++;
a0ed51b3
LW
4795 }
4796 break;
a0d0e21e 4797 case NSPACE:
1aa99e6b 4798 if (do_utf8) {
ffc61ed2 4799 loceol = PL_regeol;
1a4fad37 4800 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4801 while (hardcount < max && scan < loceol &&
3568d838
JH
4802 !(*scan == ' ' ||
4803 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4804 scan += UTF8SKIP(scan);
4805 hardcount++;
4806 }
4807 } else {
4808 while (scan < loceol && !isSPACE(*scan))
4809 scan++;
4810 break;
a0ed51b3 4811 }
bbce6d69 4812 case NSPACEL:
3280af22 4813 PL_reg_flags |= RF_tainted;
1aa99e6b 4814 if (do_utf8) {
ffc61ed2 4815 loceol = PL_regeol;
1aa99e6b 4816 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4817 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4818 scan += UTF8SKIP(scan);
4819 hardcount++;
4820 }
4821 } else {
4822 while (scan < loceol && !isSPACE_LC(*scan))
4823 scan++;
a0ed51b3
LW
4824 }
4825 break;
a0d0e21e 4826 case DIGIT:
1aa99e6b 4827 if (do_utf8) {
ffc61ed2 4828 loceol = PL_regeol;
1a4fad37 4829 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4830 while (hardcount < max && scan < loceol &&
3568d838 4831 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4832 scan += UTF8SKIP(scan);
4833 hardcount++;
4834 }
4835 } else {
4836 while (scan < loceol && isDIGIT(*scan))
4837 scan++;
a0ed51b3
LW
4838 }
4839 break;
a0d0e21e 4840 case NDIGIT:
1aa99e6b 4841 if (do_utf8) {
ffc61ed2 4842 loceol = PL_regeol;
1a4fad37 4843 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4844 while (hardcount < max && scan < loceol &&
3568d838 4845 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4846 scan += UTF8SKIP(scan);
4847 hardcount++;
4848 }
4849 } else {
4850 while (scan < loceol && !isDIGIT(*scan))
4851 scan++;
a0ed51b3
LW
4852 }
4853 break;
a0d0e21e
LW
4854 default: /* Called on something of 0 width. */
4855 break; /* So match right here or not at all. */
4856 }
a687059c 4857
a0ed51b3
LW
4858 if (hardcount)
4859 c = hardcount;
4860 else
4861 c = scan - PL_reginput;
3280af22 4862 PL_reginput = scan;
a687059c 4863
a3621e74 4864 DEBUG_r({
ab74612d 4865 SV *re_debug_flags = NULL;
6136c704 4866 SV * const prop = sv_newmortal();
a3621e74
YO
4867 GET_RE_DEBUG_FLAGS;
4868 DEBUG_EXECUTE_r({
32fc9b6a 4869 regprop(prog, prop, p);
9041c2e3
NIS
4870 PerlIO_printf(Perl_debug_log,
4871 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4872 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4873 });
a3621e74 4874 });
9041c2e3 4875
a0d0e21e 4876 return(c);
a687059c
LW
4877}
4878
c277df42
IZ
4879
4880/*
ffc61ed2
JH
4881- regclass_swash - prepare the utf8 swash
4882*/
4883
4884SV *
32fc9b6a 4885Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4886{
97aff369 4887 dVAR;
9e55ce06
JH
4888 SV *sw = NULL;
4889 SV *si = NULL;
4890 SV *alt = NULL;
32fc9b6a 4891 const struct reg_data *data = prog ? prog->data : NULL;
ffc61ed2 4892
4f639d21 4893 if (data && data->count) {
a3b680e6 4894 const U32 n = ARG(node);
ffc61ed2 4895
4f639d21
DM
4896 if (data->what[n] == 's') {
4897 SV * const rv = (SV*)data->data[n];
890ce7af 4898 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4899 SV **const ary = AvARRAY(av);
9e55ce06 4900 SV **a, **b;
9041c2e3 4901
711a919c 4902 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4903 * documentation of these array elements. */
4904
b11f357e 4905 si = *ary;
8f7f7219 4906 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4907 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4908
ffc61ed2
JH
4909 if (a)
4910 sw = *a;
4911 else if (si && doinit) {
4912 sw = swash_init("utf8", "", si, 1, 0);
4913 (void)av_store(av, 1, sw);
4914 }
9e55ce06
JH
4915 if (b)
4916 alt = *b;
ffc61ed2
JH
4917 }
4918 }
4919
9e55ce06
JH
4920 if (listsvp)
4921 *listsvp = si;
4922 if (altsvp)
4923 *altsvp = alt;
ffc61ed2
JH
4924
4925 return sw;
4926}
4927
4928/*
ba7b4546 4929 - reginclass - determine if a character falls into a character class
832705d4
JH
4930
4931 The n is the ANYOF regnode, the p is the target string, lenp
4932 is pointer to the maximum length of how far to go in the p
4933 (if the lenp is zero, UTF8SKIP(p) is used),
4934 do_utf8 tells whether the target string is in UTF-8.
4935
bbce6d69 4936 */
4937
76e3520e 4938STATIC bool
32fc9b6a 4939S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4940{
27da23d5 4941 dVAR;
a3b680e6 4942 const char flags = ANYOF_FLAGS(n);
bbce6d69 4943 bool match = FALSE;
cc07378b 4944 UV c = *p;
ae9ddab8 4945 STRLEN len = 0;
9e55ce06 4946 STRLEN plen;
1aa99e6b 4947
19f67299
TS
4948 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4949 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4950 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4951 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4952 if (len == (STRLEN)-1)
4953 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4954 }
bbce6d69 4955
0f0076b4 4956 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4957 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4958 if (lenp)
4959 *lenp = 0;
ffc61ed2 4960 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4961 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4962 match = TRUE;
bbce6d69 4963 }
3568d838 4964 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4965 match = TRUE;
ffc61ed2 4966 if (!match) {
9e55ce06 4967 AV *av;
32fc9b6a 4968 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4969
4970 if (sw) {
3568d838 4971 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4972 match = TRUE;
4973 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4974 if (!match && lenp && av) {
4975 I32 i;
9e55ce06 4976 for (i = 0; i <= av_len(av); i++) {
890ce7af 4977 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4978 STRLEN len;
890ce7af 4979 const char * const s = SvPV_const(sv, len);
9e55ce06 4980
061b10df 4981 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4982 *lenp = len;
4983 match = TRUE;
4984 break;
4985 }
4986 }
4987 }
4988 if (!match) {
89ebb4a3 4989 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4990 STRLEN tmplen;
4991
9e55ce06
JH
4992 to_utf8_fold(p, tmpbuf, &tmplen);
4993 if (swash_fetch(sw, tmpbuf, do_utf8))
4994 match = TRUE;
4995 }
ffc61ed2
JH
4996 }
4997 }
bbce6d69 4998 }
9e55ce06 4999 if (match && lenp && *lenp == 0)
0f0076b4 5000 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5001 }
1aa99e6b 5002 if (!match && c < 256) {
ffc61ed2
JH
5003 if (ANYOF_BITMAP_TEST(n, c))
5004 match = TRUE;
5005 else if (flags & ANYOF_FOLD) {
eb160463 5006 U8 f;
a0ed51b3 5007
ffc61ed2
JH
5008 if (flags & ANYOF_LOCALE) {
5009 PL_reg_flags |= RF_tainted;
5010 f = PL_fold_locale[c];
5011 }
5012 else
5013 f = PL_fold[c];
5014 if (f != c && ANYOF_BITMAP_TEST(n, f))
5015 match = TRUE;
5016 }
5017
5018 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5019 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5020 if (
5021 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5022 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5023 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5024 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5025 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5026 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5027 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5028 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5029 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5030 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5031 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5032 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5033 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5034 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5035 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5036 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5037 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5038 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5039 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5040 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5041 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5042 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5043 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5044 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5045 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5046 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5047 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5048 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5049 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5050 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5051 ) /* How's that for a conditional? */
5052 {
5053 match = TRUE;
5054 }
a0ed51b3 5055 }
a0ed51b3
LW
5056 }
5057
a0ed51b3
LW
5058 return (flags & ANYOF_INVERT) ? !match : match;
5059}
161b471a 5060
dfe13c55 5061STATIC U8 *
5f66b61c 5062S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5063{
97aff369 5064 dVAR;
a0ed51b3 5065 if (off >= 0) {
1aa99e6b 5066 while (off-- && s < lim) {
ffc61ed2 5067 /* XXX could check well-formedness here */
a0ed51b3 5068 s += UTF8SKIP(s);
ffc61ed2 5069 }
a0ed51b3
LW
5070 }
5071 else {
5072 while (off++) {
1aa99e6b 5073 if (s > lim) {
a0ed51b3 5074 s--;
ffc61ed2 5075 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5076 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5077 s--;
ffc61ed2
JH
5078 }
5079 /* XXX could check well-formedness here */
a0ed51b3
LW
5080 }
5081 }
5082 }
5083 return s;
5084}
161b471a 5085
dfe13c55 5086STATIC U8 *
5f66b61c 5087S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5088{
97aff369 5089 dVAR;
a0ed51b3 5090 if (off >= 0) {
1aa99e6b 5091 while (off-- && s < lim) {
ffc61ed2 5092 /* XXX could check well-formedness here */
a0ed51b3 5093 s += UTF8SKIP(s);
ffc61ed2 5094 }
a0ed51b3
LW
5095 if (off >= 0)
5096 return 0;
5097 }
5098 else {
5099 while (off++) {
1aa99e6b 5100 if (s > lim) {
a0ed51b3 5101 s--;
ffc61ed2 5102 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5103 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5104 s--;
ffc61ed2
JH
5105 }
5106 /* XXX could check well-formedness here */
a0ed51b3
LW
5107 }
5108 else
5109 break;
5110 }
5111 if (off <= 0)
5112 return 0;
5113 }
5114 return s;
5115}
51371543 5116
51371543 5117static void
acfe0abc 5118restore_pos(pTHX_ void *arg)
51371543 5119{
97aff369 5120 dVAR;
097eb12c 5121 regexp * const rex = (regexp *)arg;
51371543
GS
5122 if (PL_reg_eval_set) {
5123 if (PL_reg_oldsaved) {
4f639d21
DM
5124 rex->subbeg = PL_reg_oldsaved;
5125 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5126#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5127 rex->saved_copy = PL_nrs;
ed252734 5128#endif
4f639d21 5129 RX_MATCH_COPIED_on(rex);
51371543
GS
5130 }
5131 PL_reg_magic->mg_len = PL_reg_oldpos;
5132 PL_reg_eval_set = 0;
5133 PL_curpm = PL_reg_oldcurpm;
5134 }
5135}
33b8afdf
JH
5136
5137STATIC void
5138S_to_utf8_substr(pTHX_ register regexp *prog)
5139{
33b8afdf 5140 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5141 SV* const sv = newSVsv(prog->float_substr);
5142 prog->float_utf8 = sv;
33b8afdf
JH
5143 sv_utf8_upgrade(sv);
5144 if (SvTAIL(prog->float_substr))
5145 SvTAIL_on(sv);
5146 if (prog->float_substr == prog->check_substr)
5147 prog->check_utf8 = sv;
5148 }
5149 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5150 SV* const sv = newSVsv(prog->anchored_substr);
5151 prog->anchored_utf8 = sv;
33b8afdf
JH
5152 sv_utf8_upgrade(sv);
5153 if (SvTAIL(prog->anchored_substr))
5154 SvTAIL_on(sv);
5155 if (prog->anchored_substr == prog->check_substr)
5156 prog->check_utf8 = sv;
5157 }
5158}
5159
5160STATIC void
5161S_to_byte_substr(pTHX_ register regexp *prog)
5162{
97aff369 5163 dVAR;
33b8afdf 5164 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5165 SV* sv = newSVsv(prog->float_utf8);
5166 prog->float_substr = sv;
33b8afdf
JH
5167 if (sv_utf8_downgrade(sv, TRUE)) {
5168 if (SvTAIL(prog->float_utf8))
5169 SvTAIL_on(sv);
5170 } else {
5171 SvREFCNT_dec(sv);
5172 prog->float_substr = sv = &PL_sv_undef;
5173 }
5174 if (prog->float_utf8 == prog->check_utf8)
5175 prog->check_substr = sv;
5176 }
5177 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5178 SV* sv = newSVsv(prog->anchored_utf8);
5179 prog->anchored_substr = sv;
33b8afdf
JH
5180 if (sv_utf8_downgrade(sv, TRUE)) {
5181 if (SvTAIL(prog->anchored_utf8))
5182 SvTAIL_on(sv);
5183 } else {
5184 SvREFCNT_dec(sv);
5185 prog->anchored_substr = sv = &PL_sv_undef;
5186 }
5187 if (prog->anchored_utf8 == prog->check_utf8)
5188 prog->check_substr = sv;
5189 }
5190}
66610fdd
RGS
5191
5192/*
5193 * Local variables:
5194 * c-indentation-style: bsd
5195 * c-basic-offset: 4
5196 * indent-tabs-mode: t
5197 * End:
5198 *
37442d52
RGS
5199 * ex: set ts=8 sts=4 sw=4 noet:
5200 */