This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Test that we can do embedded 0 bytes in hints.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
097eb12c 216S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e 220 char *input;
b1ce53c5 221
a3621e74
YO
222 GET_RE_DEBUG_FLAGS_DECL;
223
b1ce53c5 224 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 225 i = SSPOPINT;
b1ce53c5
JH
226 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
227 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 228 input = (char *) SSPOPPTR;
a01268b5 229 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
230 *PL_reglastparen = SSPOPINT;
231 PL_regsize = SSPOPINT;
b1ce53c5
JH
232
233 /* Now restore the parentheses context. */
41123dfd
JH
234 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
235 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 236 I32 tmps;
097eb12c 237 U32 paren = (U32)SSPOPINT;
3280af22 238 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
239 PL_regstartp[paren] = SSPOPINT;
240 tmps = SSPOPINT;
3280af22
NIS
241 if (paren <= *PL_reglastparen)
242 PL_regendp[paren] = tmps;
a3621e74 243 DEBUG_EXECUTE_r(
c3464db5 244 PerlIO_printf(Perl_debug_log,
b900a521 245 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 246 (UV)paren, (IV)PL_regstartp[paren],
b900a521 247 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 248 (IV)PL_regendp[paren],
3280af22 249 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 250 );
a0d0e21e 251 }
a3621e74 252 DEBUG_EXECUTE_r(
4f639d21 253 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 254 PerlIO_printf(Perl_debug_log,
faccc32b 255 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 256 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
257 }
258 );
daf18116 259#if 1
dafc8851
JH
260 /* It would seem that the similar code in regtry()
261 * already takes care of this, and in fact it is in
262 * a better location to since this code can #if 0-ed out
263 * but the code in regtry() is needed or otherwise tests
264 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
265 * (as of patchlevel 7877) will fail. Then again,
266 * this code seems to be necessary or otherwise
267 * building DynaLoader will fail:
268 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
269 * --jhi */
097eb12c
AL
270 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
271 if (i > PL_regsize)
272 PL_regstartp[i] = -1;
273 PL_regendp[i] = -1;
a0d0e21e 274 }
dafc8851 275#endif
a0d0e21e
LW
276 return input;
277}
278
02db2b7b 279#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 280
95b24440 281#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
282 if (paren) { \
283 if (n) { \
284 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
285 PL_regendp[paren] = input - PL_bostr; \
286 } \
287 else \
288 PL_regendp[paren] = -1; \
289 } \
95b24440
DM
290 REGMATCH(next, where); \
291 if (result) \
29d1e993
HS
292 sayYES; \
293 if (paren && n) \
294 PL_regendp[paren] = -1; \
295}
296
297
a687059c 298/*
e50aee73 299 * pregexec and friends
a687059c
LW
300 */
301
302/*
c277df42 303 - pregexec - match a regexp against a string
a687059c 304 */
c277df42 305I32
864dbfa3 306Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 307 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
308/* strend: pointer to null at end of string */
309/* strbeg: real beginning of string */
310/* minend: end of match must be >=minend after stringarg. */
311/* nosave: For optimizations. */
312{
313 return
9041c2e3 314 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
315 nosave ? 0 : REXEC_COPY_STR);
316}
0f5d15d6 317
22e551b9 318
9041c2e3 319/*
cad2e5aa
JH
320 * Need to implement the following flags for reg_anch:
321 *
322 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
323 * USE_INTUIT_ML
324 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
325 * INTUIT_AUTORITATIVE_ML
326 * INTUIT_ONCE_NOML - Intuit can match in one location only.
327 * INTUIT_ONCE_ML
328 *
329 * Another flag for this function: SECOND_TIME (so that float substrs
330 * with giant delta may be not rechecked).
331 */
332
333/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
334
3f7c398e 335/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
336 Otherwise, only SvCUR(sv) is used to get strbeg. */
337
338/* XXXX We assume that strpos is strbeg unless sv. */
339
6eb5f6b9
JH
340/* XXXX Some places assume that there is a fixed substring.
341 An update may be needed if optimizer marks as "INTUITable"
342 RExen without fixed substrings. Similarly, it is assumed that
343 lengths of all the strings are no more than minlen, thus they
344 cannot come from lookahead.
345 (Or minlen should take into account lookahead.) */
346
2c2d71f5
JH
347/* A failure to find a constant substring means that there is no need to make
348 an expensive call to REx engine, thus we celebrate a failure. Similarly,
349 finding a substring too deep into the string means that less calls to
30944b6d
IZ
350 regtry() should be needed.
351
352 REx compiler's optimizer found 4 possible hints:
353 a) Anchored substring;
354 b) Fixed substring;
355 c) Whether we are anchored (beginning-of-line or \G);
356 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 357 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
358 string which does not contradict any of them.
359 */
2c2d71f5 360
6eb5f6b9
JH
361/* Most of decisions we do here should have been done at compile time.
362 The nodes of the REx which we used for the search should have been
363 deleted from the finite automaton. */
364
cad2e5aa
JH
365char *
366Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
367 char *strend, U32 flags, re_scream_pos_data *data)
368{
97aff369 369 dVAR;
b7953727 370 register I32 start_shift = 0;
cad2e5aa 371 /* Should be nonnegative! */
b7953727 372 register I32 end_shift = 0;
2c2d71f5
JH
373 register char *s;
374 register SV *check;
a1933d95 375 char *strbeg;
cad2e5aa 376 char *t;
a3b680e6 377 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 378 I32 ml_anch;
bd61b366
SS
379 register char *other_last = NULL; /* other substr checked before this */
380 char *check_at = NULL; /* check substr found at this pos */
1df70142 381 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 382#ifdef DEBUGGING
890ce7af
AL
383 const char * const i_strpos = strpos;
384 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 385#endif
a3621e74
YO
386
387 GET_RE_DEBUG_FLAGS_DECL;
388
a30b2f1f 389 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 390
b8d68ded 391 if (prog->reganch & ROPT_UTF8) {
a3621e74 392 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
393 "UTF-8 regex...\n"));
394 PL_reg_flags |= RF_utf8;
395 }
396
a3621e74 397 DEBUG_EXECUTE_r({
1df70142 398 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
399 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
400 strpos;
1df70142 401 const int len = PL_reg_match_utf8 ?
b8d68ded 402 strlen(s) : strend - strpos;
2a782b5b
JH
403 if (!PL_colorset)
404 reginitcolors();
b8d68ded 405 if (PL_reg_match_utf8)
a3621e74 406 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 407 "UTF-8 target...\n"));
2a782b5b 408 PerlIO_printf(Perl_debug_log,
a0288114 409 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 410 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
411 prog->precomp,
412 PL_colors[1],
413 (strlen(prog->precomp) > 60 ? "..." : ""),
414 PL_colors[0],
415 (int)(len > 60 ? 60 : len),
416 s, PL_colors[1],
417 (len > 60 ? "..." : "")
418 );
419 });
cad2e5aa 420
c344f387
JH
421 /* CHR_DIST() would be more correct here but it makes things slow. */
422 if (prog->minlen > strend - strpos) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 424 "String too short... [re_intuit_start]\n"));
cad2e5aa 425 goto fail;
2c2d71f5 426 }
a1933d95 427 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 428 PL_regeol = strend;
33b8afdf
JH
429 if (do_utf8) {
430 if (!prog->check_utf8 && prog->check_substr)
431 to_utf8_substr(prog);
432 check = prog->check_utf8;
433 } else {
434 if (!prog->check_substr && prog->check_utf8)
435 to_byte_substr(prog);
436 check = prog->check_substr;
437 }
438 if (check == &PL_sv_undef) {
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
440 "Non-utf string cannot match utf check string\n"));
441 goto fail;
442 }
2c2d71f5 443 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
444 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
445 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 446 && !multiline ) ); /* Check after \n? */
cad2e5aa 447
7e25d62c
JH
448 if (!ml_anch) {
449 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
450 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 451 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
452 && sv && !SvROK(sv)
453 && (strpos != strbeg)) {
a3621e74 454 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
455 goto fail;
456 }
457 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 458 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 459 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
460 I32 slen;
461
1aa99e6b 462 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
463 if (SvTAIL(check)) {
464 slen = SvCUR(check); /* >= 1 */
cad2e5aa 465
9041c2e3 466 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 467 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 469 goto fail_finish;
cad2e5aa
JH
470 }
471 /* Now should match s[0..slen-2] */
472 slen--;
3f7c398e 473 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 474 || (slen > 1
3f7c398e 475 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 476 report_neq:
a3621e74 477 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
478 goto fail_finish;
479 }
cad2e5aa 480 }
3f7c398e 481 else if (*SvPVX_const(check) != *s
653099ff 482 || ((slen = SvCUR(check)) > 1
3f7c398e 483 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 484 goto report_neq;
c315bfe8 485 check_at = s;
2c2d71f5 486 goto success_at_start;
7e25d62c 487 }
cad2e5aa 488 }
2c2d71f5 489 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 490 s = strpos;
2c2d71f5 491 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 494 if (!ml_anch) {
a3b680e6 495 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 496 - (SvTAIL(check) != 0);
a3b680e6 497 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
498
499 if (end_shift < eshift)
500 end_shift = eshift;
501 }
cad2e5aa 502 }
2c2d71f5 503 else { /* Can match at random position */
cad2e5aa
JH
504 ml_anch = 0;
505 s = strpos;
2c2d71f5
JH
506 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507 /* Should be nonnegative! */
508 end_shift = prog->minlen - start_shift -
653099ff 509 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
510 }
511
2c2d71f5 512#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 513 if (end_shift < 0)
6bbae5e6 514 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
515#endif
516
2c2d71f5
JH
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 520 if (flags & REXEC_SCREAM) {
cad2e5aa 521 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 522 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 523
2c2d71f5
JH
524 if (PL_screamfirst[BmRARE(check)] >= 0
525 || ( BmRARE(check) == '\n'
526 && (BmPREVIOUS(check) == SvCUR(check) - 1)
527 && SvTAIL(check) ))
9041c2e3 528 s = screaminstr(sv, check,
2c2d71f5 529 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 530 else
2c2d71f5 531 goto fail_finish;
4addbd3b
HS
532 /* we may be pointing at the wrong string */
533 if (s && RX_MATCH_COPIED(prog))
3f7c398e 534 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
535 if (data)
536 *data->scream_olds = s;
537 }
f33976b4 538 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
539 s = fbm_instr((U8*)(s + start_shift),
540 (U8*)(strend - end_shift),
7fba1cd6 541 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 542 else
1aa99e6b
IH
543 s = fbm_instr(HOP3(s, start_shift, strend),
544 HOP3(strend, -end_shift, strbeg),
7fba1cd6 545 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
546
547 /* Update the count-of-usability, remove useless subpatterns,
548 unshift s. */
2c2d71f5 549
a0288114 550 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 551 (s ? "Found" : "Did not find"),
33b8afdf 552 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 553 PL_colors[0],
7b0972df 554 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 555 SvPVX_const(check),
2c2d71f5
JH
556 PL_colors[1], (SvTAIL(check) ? "$" : ""),
557 (s ? " at offset " : "...\n") ) );
558
559 if (!s)
560 goto fail_finish;
561
6eb5f6b9
JH
562 check_at = s;
563
2c2d71f5 564 /* Finish the diagnostic message */
a3621e74 565 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
566
567 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
568 Start with the other substr.
569 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 570 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
571 *always* match. Probably should be marked during compile...
572 Probably it is right to do no SCREAM here...
573 */
574
33b8afdf 575 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 576 /* Take into account the "other" substring. */
2c2d71f5
JH
577 /* XXXX May be hopelessly wrong for UTF... */
578 if (!other_last)
6eb5f6b9 579 other_last = strpos;
33b8afdf 580 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
581 do_other_anchored:
582 {
890ce7af
AL
583 char * const last = HOP3c(s, -start_shift, strbeg);
584 char *last1, *last2;
2c2d71f5 585 char *s1 = s;
33b8afdf 586 SV* must;
2c2d71f5 587
2c2d71f5
JH
588 t = s - prog->check_offset_max;
589 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 590 && (!do_utf8
1aa99e6b 591 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 592 && t > strpos)))
30944b6d 593 /* EMPTY */;
2c2d71f5
JH
594 else
595 t = strpos;
1aa99e6b 596 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
597 if (t < other_last) /* These positions already checked */
598 t = other_last;
1aa99e6b 599 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
600 if (last < last1)
601 last1 = last;
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* On end-of-str: see comment below. */
33b8afdf
JH
604 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
605 if (must == &PL_sv_undef) {
606 s = (char*)NULL;
a3621e74 607 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
608 }
609 else
610 s = fbm_instr(
611 (unsigned char*)t,
612 HOP3(HOP3(last1, prog->anchored_offset, strend)
613 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
614 must,
7fba1cd6 615 multiline ? FBMrf_MULTILINE : 0
33b8afdf 616 );
a3621e74 617 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 618 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
619 (s ? "Found" : "Contradicts"),
620 PL_colors[0],
33b8afdf
JH
621 (int)(SvCUR(must)
622 - (SvTAIL(must)!=0)),
3f7c398e 623 SvPVX_const(must),
33b8afdf 624 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
625 if (!s) {
626 if (last1 >= last2) {
a3621e74 627 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
628 ", giving up...\n"));
629 goto fail_finish;
630 }
a3621e74 631 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 632 ", trying floating at offset %ld...\n",
1aa99e6b
IH
633 (long)(HOP3c(s1, 1, strend) - i_strpos)));
634 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
635 s = HOP3c(last, 1, strend);
2c2d71f5
JH
636 goto restart;
637 }
638 else {
a3621e74 639 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 640 (long)(s - i_strpos)));
1aa99e6b
IH
641 t = HOP3c(s, -prog->anchored_offset, strbeg);
642 other_last = HOP3c(s, 1, strend);
30944b6d 643 s = s1;
2c2d71f5
JH
644 if (t == strpos)
645 goto try_at_start;
2c2d71f5
JH
646 goto try_at_offset;
647 }
30944b6d 648 }
2c2d71f5
JH
649 }
650 else { /* Take into account the floating substring. */
33b8afdf
JH
651 char *last, *last1;
652 char *s1 = s;
653 SV* must;
654
655 t = HOP3c(s, -start_shift, strbeg);
656 last1 = last =
657 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
658 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
659 last = HOP3c(t, prog->float_max_offset, strend);
660 s = HOP3c(t, prog->float_min_offset, strend);
661 if (s < other_last)
662 s = other_last;
2c2d71f5 663 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
664 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
665 /* fbm_instr() takes into account exact value of end-of-str
666 if the check is SvTAIL(ed). Since false positives are OK,
667 and end-of-str is not later than strend we are OK. */
668 if (must == &PL_sv_undef) {
669 s = (char*)NULL;
a3621e74 670 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
671 }
672 else
2c2d71f5 673 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
674 (unsigned char*)last + SvCUR(must)
675 - (SvTAIL(must)!=0),
7fba1cd6 676 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
678 (s ? "Found" : "Contradicts"),
679 PL_colors[0],
680 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 681 SvPVX_const(must),
33b8afdf
JH
682 PL_colors[1], (SvTAIL(must) ? "$" : "")));
683 if (!s) {
684 if (last1 == last) {
a3621e74 685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
686 ", giving up...\n"));
687 goto fail_finish;
2c2d71f5 688 }
a3621e74 689 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
690 ", trying anchored starting at offset %ld...\n",
691 (long)(s1 + 1 - i_strpos)));
692 other_last = last;
693 s = HOP3c(t, 1, strend);
694 goto restart;
695 }
696 else {
a3621e74 697 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
698 (long)(s - i_strpos)));
699 other_last = s; /* Fix this later. --Hugo */
700 s = s1;
701 if (t == strpos)
702 goto try_at_start;
703 goto try_at_offset;
704 }
2c2d71f5 705 }
cad2e5aa 706 }
2c2d71f5
JH
707
708 t = s - prog->check_offset_max;
2c2d71f5 709 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 710 && (!do_utf8
1aa99e6b
IH
711 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
712 && t > strpos))) {
2c2d71f5
JH
713 /* Fixed substring is found far enough so that the match
714 cannot start at strpos. */
715 try_at_offset:
cad2e5aa 716 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
717 /* Eventually fbm_*() should handle this, but often
718 anchored_offset is not 0, so this check will not be wasted. */
719 /* XXXX In the code below we prefer to look for "^" even in
720 presence of anchored substrings. And we search even
721 beyond the found float position. These pessimizations
722 are historical artefacts only. */
723 find_anchor:
2c2d71f5 724 while (t < strend - prog->minlen) {
cad2e5aa 725 if (*t == '\n') {
4ee3650e 726 if (t < check_at - prog->check_offset_min) {
33b8afdf 727 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
728 /* Since we moved from the found position,
729 we definitely contradict the found anchored
30944b6d
IZ
730 substr. Due to the above check we do not
731 contradict "check" substr.
732 Thus we can arrive here only if check substr
733 is float. Redo checking for "other"=="fixed".
734 */
9041c2e3 735 strpos = t + 1;
a3621e74 736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 737 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
738 goto do_other_anchored;
739 }
4ee3650e
GS
740 /* We don't contradict the found floating substring. */
741 /* XXXX Why not check for STCLASS? */
cad2e5aa 742 s = t + 1;
a3621e74 743 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 744 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
745 goto set_useful;
746 }
4ee3650e
GS
747 /* Position contradicts check-string */
748 /* XXXX probably better to look for check-string
749 than for "\n", so one should lower the limit for t? */
a3621e74 750 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 751 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 752 other_last = strpos = s = t + 1;
cad2e5aa
JH
753 goto restart;
754 }
755 t++;
756 }
a3621e74 757 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 758 PL_colors[0], PL_colors[1]));
2c2d71f5 759 goto fail_finish;
cad2e5aa 760 }
f5952150 761 else {
a3621e74 762 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 763 PL_colors[0], PL_colors[1]));
f5952150 764 }
cad2e5aa
JH
765 s = t;
766 set_useful:
33b8afdf 767 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
768 }
769 else {
f5952150 770 /* The found string does not prohibit matching at strpos,
2c2d71f5 771 - no optimization of calling REx engine can be performed,
f5952150
GS
772 unless it was an MBOL and we are not after MBOL,
773 or a future STCLASS check will fail this. */
2c2d71f5
JH
774 try_at_start:
775 /* Even in this situation we may use MBOL flag if strpos is offset
776 wrt the start of the string. */
05b4157f 777 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 778 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
779 /* May be due to an implicit anchor of m{.*foo} */
780 && !(prog->reganch & ROPT_IMPLICIT))
781 {
cad2e5aa
JH
782 t = strpos;
783 goto find_anchor;
784 }
a3621e74 785 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 786 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 787 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 788 );
2c2d71f5 789 success_at_start:
30944b6d 790 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
791 && (do_utf8 ? (
792 prog->check_utf8 /* Could be deleted already */
793 && --BmUSEFUL(prog->check_utf8) < 0
794 && (prog->check_utf8 == prog->float_utf8)
795 ) : (
796 prog->check_substr /* Could be deleted already */
797 && --BmUSEFUL(prog->check_substr) < 0
798 && (prog->check_substr == prog->float_substr)
799 )))
66e933ab 800 {
cad2e5aa 801 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 802 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
803 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
804 if (do_utf8 ? prog->check_substr : prog->check_utf8)
805 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
806 prog->check_substr = prog->check_utf8 = NULL; /* disable */
807 prog->float_substr = prog->float_utf8 = NULL; /* clear */
808 check = NULL; /* abort */
cad2e5aa 809 s = strpos;
3cf5c195
IZ
810 /* XXXX This is a remnant of the old implementation. It
811 looks wasteful, since now INTUIT can use many
6eb5f6b9 812 other heuristics. */
cad2e5aa
JH
813 prog->reganch &= ~RE_USE_INTUIT;
814 }
815 else
816 s = strpos;
817 }
818
6eb5f6b9
JH
819 /* Last resort... */
820 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
821 if (prog->regstclass) {
822 /* minlen == 0 is possible if regstclass is \b or \B,
823 and the fixed substr is ''$.
824 Since minlen is already taken into account, s+1 is before strend;
825 accidentally, minlen >= 1 guaranties no false positives at s + 1
826 even for \b or \B. But (minlen? 1 : 0) below assumes that
827 regstclass does not come from lookahead... */
828 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
829 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 830 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 831 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 832 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 833 : 1);
a3b680e6 834 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 835 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 836 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
837 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
838 cl_l, strend)
839 : strend);
6eb5f6b9
JH
840
841 t = s;
4f639d21 842 PL_reg_re = prog;
06b5626a 843 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
844 if (!s) {
845#ifdef DEBUGGING
cbbf8932 846 const char *what = NULL;
6eb5f6b9
JH
847#endif
848 if (endpos == strend) {
a3621e74 849 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
850 "Could not match STCLASS...\n") );
851 goto fail;
852 }
a3621e74 853 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 854 "This position contradicts STCLASS...\n") );
653099ff
GS
855 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
856 goto fail;
6eb5f6b9 857 /* Contradict one of substrings */
33b8afdf
JH
858 if (prog->anchored_substr || prog->anchored_utf8) {
859 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 860 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 861 hop_and_restart:
1aa99e6b 862 s = HOP3c(t, 1, strend);
66e933ab
GS
863 if (s + start_shift + end_shift > strend) {
864 /* XXXX Should be taken into account earlier? */
a3621e74 865 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
866 "Could not match STCLASS...\n") );
867 goto fail;
868 }
5e39e1e5
HS
869 if (!check)
870 goto giveup;
a3621e74 871 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 872 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
873 what, (long)(s + start_shift - i_strpos)) );
874 goto restart;
875 }
66e933ab 876 /* Have both, check_string is floating */
6eb5f6b9
JH
877 if (t + start_shift >= check_at) /* Contradicts floating=check */
878 goto retry_floating_check;
879 /* Recheck anchored substring, but not floating... */
9041c2e3 880 s = check_at;
5e39e1e5
HS
881 if (!check)
882 goto giveup;
a3621e74 883 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 884 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
885 (long)(other_last - i_strpos)) );
886 goto do_other_anchored;
887 }
60e71179
GS
888 /* Another way we could have checked stclass at the
889 current position only: */
890 if (ml_anch) {
891 s = t = t + 1;
5e39e1e5
HS
892 if (!check)
893 goto giveup;
a3621e74 894 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 895 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 896 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 897 goto try_at_offset;
66e933ab 898 }
33b8afdf 899 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 900 goto fail;
6eb5f6b9
JH
901 /* Check is floating subtring. */
902 retry_floating_check:
903 t = check_at - start_shift;
a3621e74 904 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
905 goto hop_and_restart;
906 }
b7953727 907 if (t != s) {
a3621e74 908 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 909 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
910 (long)(t - i_strpos), (long)(s - i_strpos))
911 );
912 }
913 else {
a3621e74 914 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
915 "Does not contradict STCLASS...\n");
916 );
917 }
6eb5f6b9 918 }
5e39e1e5 919 giveup:
a3621e74 920 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
921 PL_colors[4], (check ? "Guessed" : "Giving up"),
922 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 923 return s;
2c2d71f5
JH
924
925 fail_finish: /* Substring not found */
33b8afdf
JH
926 if (prog->check_substr || prog->check_utf8) /* could be removed already */
927 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 928 fail:
a3621e74 929 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 930 PL_colors[4], PL_colors[5]));
bd61b366 931 return NULL;
cad2e5aa 932}
9661b544 933
6eb5f6b9 934/* We know what class REx starts with. Try to find this position... */
3c3eec57 935STATIC char *
097eb12c 936S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
a687059c 937{
27da23d5 938 dVAR;
1df70142 939 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 940 char *m;
d8093b23 941 STRLEN ln;
5dab1207 942 STRLEN lnc;
078c425b 943 register STRLEN uskip;
d8093b23
G
944 unsigned int c1;
945 unsigned int c2;
6eb5f6b9
JH
946 char *e;
947 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 948 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 949
6eb5f6b9
JH
950 /* We know what class it must start with. */
951 switch (OP(c)) {
6eb5f6b9 952 case ANYOF:
388cc4de 953 if (do_utf8) {
078c425b 954 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
955 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
956 !UTF8_IS_INVARIANT((U8)s[0]) ?
957 reginclass(c, (U8*)s, 0, do_utf8) :
958 REGINCLASS(c, (U8*)s)) {
959 if (tmp && (norun || regtry(prog, s)))
960 goto got_it;
961 else
962 tmp = doevery;
963 }
964 else
965 tmp = 1;
078c425b 966 s += uskip;
388cc4de
HS
967 }
968 }
969 else {
970 while (s < strend) {
971 STRLEN skip = 1;
972
973 if (REGINCLASS(c, (U8*)s) ||
974 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
975 /* The assignment of 2 is intentional:
976 * for the folded sharp s, the skip is 2. */
977 (skip = SHARP_S_SKIP))) {
978 if (tmp && (norun || regtry(prog, s)))
979 goto got_it;
980 else
981 tmp = doevery;
982 }
983 else
984 tmp = 1;
985 s += skip;
986 }
a0d0e21e 987 }
6eb5f6b9 988 break;
f33976b4
DB
989 case CANY:
990 while (s < strend) {
991 if (tmp && (norun || regtry(prog, s)))
992 goto got_it;
993 else
994 tmp = doevery;
995 s++;
996 }
997 break;
6eb5f6b9 998 case EXACTF:
5dab1207
NIS
999 m = STRING(c);
1000 ln = STR_LEN(c); /* length to match in octets/bytes */
1001 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1002 if (UTF) {
a2a2844f 1003 STRLEN ulen1, ulen2;
5dab1207 1004 U8 *sm = (U8 *) m;
89ebb4a3
JH
1005 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1006 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 1007 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
1008
1009 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1010 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1011
89ebb4a3 1012 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1013 0, uniflags);
89ebb4a3 1014 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1015 0, uniflags);
5dab1207
NIS
1016 lnc = 0;
1017 while (sm < ((U8 *) m + ln)) {
1018 lnc++;
1019 sm += UTF8SKIP(sm);
1020 }
1aa99e6b
IH
1021 }
1022 else {
1023 c1 = *(U8*)m;
1024 c2 = PL_fold[c1];
1025 }
6eb5f6b9
JH
1026 goto do_exactf;
1027 case EXACTFL:
5dab1207
NIS
1028 m = STRING(c);
1029 ln = STR_LEN(c);
1030 lnc = (I32) ln;
d8093b23 1031 c1 = *(U8*)m;
6eb5f6b9
JH
1032 c2 = PL_fold_locale[c1];
1033 do_exactf:
db12adc6 1034 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1035
6eb5f6b9
JH
1036 if (norun && e < s)
1037 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1038
60a8b682
JH
1039 /* The idea in the EXACTF* cases is to first find the
1040 * first character of the EXACTF* node and then, if
1041 * necessary, case-insensitively compare the full
1042 * text of the node. The c1 and c2 are the first
1043 * characters (though in Unicode it gets a bit
1044 * more complicated because there are more cases
7f16dd3d
JH
1045 * than just upper and lower: one needs to use
1046 * the so-called folding case for case-insensitive
1047 * matching (called "loose matching" in Unicode).
1048 * ibcmp_utf8() will do just that. */
60a8b682 1049
1aa99e6b 1050 if (do_utf8) {
575cac57 1051 UV c, f;
89ebb4a3 1052 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1053 STRLEN len, foldlen;
4ad0818d 1054 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1055 if (c1 == c2) {
5dab1207
NIS
1056 /* Upper and lower of 1st char are equal -
1057 * probably not a "letter". */
1aa99e6b 1058 while (s <= e) {
89ebb4a3 1059 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1060 uniflags);
80aecb99
JH
1061 if ( c == c1
1062 && (ln == len ||
66423254 1063 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1064 m, (char **)0, ln, (bool)UTF))
55da9344 1065 && (norun || regtry(prog, s)) )
1aa99e6b 1066 goto got_it;
80aecb99 1067 else {
1df70142 1068 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1069 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1070 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1071 if ( f != c
1072 && (f == c1 || f == c2)
1073 && (ln == foldlen ||
66423254
JH
1074 !ibcmp_utf8((char *) foldbuf,
1075 (char **)0, foldlen, do_utf8,
d07ddd77 1076 m,
eb160463 1077 (char **)0, ln, (bool)UTF))
80aecb99
JH
1078 && (norun || regtry(prog, s)) )
1079 goto got_it;
1080 }
1aa99e6b
IH
1081 s += len;
1082 }
09091399
JH
1083 }
1084 else {
1aa99e6b 1085 while (s <= e) {
89ebb4a3 1086 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1087 uniflags);
80aecb99 1088
60a8b682 1089 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1090 * Note that not all the possible combinations
1091 * are handled here: some of them are handled
1092 * by the standard folding rules, and some of
1093 * them (the character class or ANYOF cases)
1094 * are handled during compiletime in
1095 * regexec.c:S_regclass(). */
880bd946
JH
1096 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1097 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1098 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1099
1100 if ( (c == c1 || c == c2)
1101 && (ln == len ||
66423254 1102 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1103 m, (char **)0, ln, (bool)UTF))
55da9344 1104 && (norun || regtry(prog, s)) )
1aa99e6b 1105 goto got_it;
80aecb99 1106 else {
1df70142 1107 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1108 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1109 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1110 if ( f != c
1111 && (f == c1 || f == c2)
1112 && (ln == foldlen ||
a6872d42 1113 !ibcmp_utf8((char *) foldbuf,
66423254 1114 (char **)0, foldlen, do_utf8,
d07ddd77 1115 m,
eb160463 1116 (char **)0, ln, (bool)UTF))
80aecb99
JH
1117 && (norun || regtry(prog, s)) )
1118 goto got_it;
1119 }
1aa99e6b
IH
1120 s += len;
1121 }
09091399 1122 }
1aa99e6b
IH
1123 }
1124 else {
1125 if (c1 == c2)
1126 while (s <= e) {
1127 if ( *(U8*)s == c1
1128 && (ln == 1 || !(OP(c) == EXACTF
1129 ? ibcmp(s, m, ln)
1130 : ibcmp_locale(s, m, ln)))
1131 && (norun || regtry(prog, s)) )
1132 goto got_it;
1133 s++;
1134 }
1135 else
1136 while (s <= e) {
1137 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1138 && (ln == 1 || !(OP(c) == EXACTF
1139 ? ibcmp(s, m, ln)
1140 : ibcmp_locale(s, m, ln)))
1141 && (norun || regtry(prog, s)) )
1142 goto got_it;
1143 s++;
1144 }
b3c9acc1
IZ
1145 }
1146 break;
bbce6d69 1147 case BOUNDL:
3280af22 1148 PL_reg_flags |= RF_tainted;
bbce6d69 1149 /* FALL THROUGH */
a0d0e21e 1150 case BOUND:
ffc61ed2 1151 if (do_utf8) {
12d33761 1152 if (s == PL_bostr)
ffc61ed2
JH
1153 tmp = '\n';
1154 else {
6136c704 1155 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1156 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1157 }
1158 tmp = ((OP(c) == BOUND ?
9041c2e3 1159 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1160 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1161 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1162 if (tmp == !(OP(c) == BOUND ?
3568d838 1163 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1164 isALNUM_LC_utf8((U8*)s)))
1165 {
1166 tmp = !tmp;
1167 if ((norun || regtry(prog, s)))
1168 goto got_it;
1169 }
078c425b 1170 s += uskip;
a687059c 1171 }
a0d0e21e 1172 }
667bb95a 1173 else {
12d33761 1174 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1175 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1176 while (s < strend) {
1177 if (tmp ==
1178 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1179 tmp = !tmp;
1180 if ((norun || regtry(prog, s)))
1181 goto got_it;
1182 }
1183 s++;
a0ed51b3 1184 }
a0ed51b3 1185 }
6eb5f6b9 1186 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1187 goto got_it;
1188 break;
bbce6d69 1189 case NBOUNDL:
3280af22 1190 PL_reg_flags |= RF_tainted;
bbce6d69 1191 /* FALL THROUGH */
a0d0e21e 1192 case NBOUND:
ffc61ed2 1193 if (do_utf8) {
12d33761 1194 if (s == PL_bostr)
ffc61ed2
JH
1195 tmp = '\n';
1196 else {
6136c704 1197 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1198 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1199 }
1200 tmp = ((OP(c) == NBOUND ?
9041c2e3 1201 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1202 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1203 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1204 if (tmp == !(OP(c) == NBOUND ?
3568d838 1205 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1206 isALNUM_LC_utf8((U8*)s)))
1207 tmp = !tmp;
1208 else if ((norun || regtry(prog, s)))
1209 goto got_it;
078c425b 1210 s += uskip;
ffc61ed2 1211 }
a0d0e21e 1212 }
667bb95a 1213 else {
12d33761 1214 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1215 tmp = ((OP(c) == NBOUND ?
1216 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1217 while (s < strend) {
1218 if (tmp ==
1219 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1220 tmp = !tmp;
1221 else if ((norun || regtry(prog, s)))
1222 goto got_it;
1223 s++;
1224 }
a0ed51b3 1225 }
6eb5f6b9 1226 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1227 goto got_it;
1228 break;
a0d0e21e 1229 case ALNUM:
ffc61ed2 1230 if (do_utf8) {
1a4fad37 1231 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1232 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1233 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1234 if (tmp && (norun || regtry(prog, s)))
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
bbce6d69 1239 else
ffc61ed2 1240 tmp = 1;
078c425b 1241 s += uskip;
bbce6d69 1242 }
bbce6d69 1243 }
ffc61ed2
JH
1244 else {
1245 while (s < strend) {
1246 if (isALNUM(*s)) {
1247 if (tmp && (norun || regtry(prog, s)))
1248 goto got_it;
1249 else
1250 tmp = doevery;
1251 }
a0ed51b3 1252 else
ffc61ed2
JH
1253 tmp = 1;
1254 s++;
a0ed51b3 1255 }
a0ed51b3
LW
1256 }
1257 break;
bbce6d69 1258 case ALNUML:
3280af22 1259 PL_reg_flags |= RF_tainted;
ffc61ed2 1260 if (do_utf8) {
078c425b 1261 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1262 if (isALNUM_LC_utf8((U8*)s)) {
1263 if (tmp && (norun || regtry(prog, s)))
1264 goto got_it;
1265 else
1266 tmp = doevery;
1267 }
a687059c 1268 else
ffc61ed2 1269 tmp = 1;
078c425b 1270 s += uskip;
a0d0e21e 1271 }
a0d0e21e 1272 }
ffc61ed2
JH
1273 else {
1274 while (s < strend) {
1275 if (isALNUM_LC(*s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1277 goto got_it;
1278 else
1279 tmp = doevery;
1280 }
a0ed51b3 1281 else
ffc61ed2
JH
1282 tmp = 1;
1283 s++;
a0ed51b3 1284 }
a0ed51b3
LW
1285 }
1286 break;
a0d0e21e 1287 case NALNUM:
ffc61ed2 1288 if (do_utf8) {
1a4fad37 1289 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1290 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1291 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1292 if (tmp && (norun || regtry(prog, s)))
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
bbce6d69 1297 else
ffc61ed2 1298 tmp = 1;
078c425b 1299 s += uskip;
bbce6d69 1300 }
bbce6d69 1301 }
ffc61ed2
JH
1302 else {
1303 while (s < strend) {
1304 if (!isALNUM(*s)) {
1305 if (tmp && (norun || regtry(prog, s)))
1306 goto got_it;
1307 else
1308 tmp = doevery;
1309 }
a0ed51b3 1310 else
ffc61ed2
JH
1311 tmp = 1;
1312 s++;
a0ed51b3 1313 }
a0ed51b3
LW
1314 }
1315 break;
bbce6d69 1316 case NALNUML:
3280af22 1317 PL_reg_flags |= RF_tainted;
ffc61ed2 1318 if (do_utf8) {
078c425b 1319 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1320 if (!isALNUM_LC_utf8((U8*)s)) {
1321 if (tmp && (norun || regtry(prog, s)))
1322 goto got_it;
1323 else
1324 tmp = doevery;
1325 }
a687059c 1326 else
ffc61ed2 1327 tmp = 1;
078c425b 1328 s += uskip;
a687059c 1329 }
a0d0e21e 1330 }
ffc61ed2
JH
1331 else {
1332 while (s < strend) {
1333 if (!isALNUM_LC(*s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1335 goto got_it;
1336 else
1337 tmp = doevery;
1338 }
a0ed51b3 1339 else
ffc61ed2
JH
1340 tmp = 1;
1341 s++;
a0ed51b3 1342 }
a0ed51b3
LW
1343 }
1344 break;
a0d0e21e 1345 case SPACE:
ffc61ed2 1346 if (do_utf8) {
1a4fad37 1347 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1348 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1349 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1350 if (tmp && (norun || regtry(prog, s)))
1351 goto got_it;
1352 else
1353 tmp = doevery;
1354 }
a0d0e21e 1355 else
ffc61ed2 1356 tmp = 1;
078c425b 1357 s += uskip;
2304df62 1358 }
a0d0e21e 1359 }
ffc61ed2
JH
1360 else {
1361 while (s < strend) {
1362 if (isSPACE(*s)) {
1363 if (tmp && (norun || regtry(prog, s)))
1364 goto got_it;
1365 else
1366 tmp = doevery;
1367 }
a0ed51b3 1368 else
ffc61ed2
JH
1369 tmp = 1;
1370 s++;
a0ed51b3 1371 }
a0ed51b3
LW
1372 }
1373 break;
bbce6d69 1374 case SPACEL:
3280af22 1375 PL_reg_flags |= RF_tainted;
ffc61ed2 1376 if (do_utf8) {
078c425b 1377 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1378 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1379 if (tmp && (norun || regtry(prog, s)))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
bbce6d69 1384 else
ffc61ed2 1385 tmp = 1;
078c425b 1386 s += uskip;
bbce6d69 1387 }
bbce6d69 1388 }
ffc61ed2
JH
1389 else {
1390 while (s < strend) {
1391 if (isSPACE_LC(*s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
a0ed51b3 1397 else
ffc61ed2
JH
1398 tmp = 1;
1399 s++;
a0ed51b3 1400 }
a0ed51b3
LW
1401 }
1402 break;
a0d0e21e 1403 case NSPACE:
ffc61ed2 1404 if (do_utf8) {
1a4fad37 1405 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1406 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1407 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1408 if (tmp && (norun || regtry(prog, s)))
1409 goto got_it;
1410 else
1411 tmp = doevery;
1412 }
a0d0e21e 1413 else
ffc61ed2 1414 tmp = 1;
078c425b 1415 s += uskip;
a687059c 1416 }
a0d0e21e 1417 }
ffc61ed2
JH
1418 else {
1419 while (s < strend) {
1420 if (!isSPACE(*s)) {
1421 if (tmp && (norun || regtry(prog, s)))
1422 goto got_it;
1423 else
1424 tmp = doevery;
1425 }
a0ed51b3 1426 else
ffc61ed2
JH
1427 tmp = 1;
1428 s++;
a0ed51b3 1429 }
a0ed51b3
LW
1430 }
1431 break;
bbce6d69 1432 case NSPACEL:
3280af22 1433 PL_reg_flags |= RF_tainted;
ffc61ed2 1434 if (do_utf8) {
078c425b 1435 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1436 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1437 if (tmp && (norun || regtry(prog, s)))
1438 goto got_it;
1439 else
1440 tmp = doevery;
1441 }
bbce6d69 1442 else
ffc61ed2 1443 tmp = 1;
078c425b 1444 s += uskip;
bbce6d69 1445 }
bbce6d69 1446 }
ffc61ed2
JH
1447 else {
1448 while (s < strend) {
1449 if (!isSPACE_LC(*s)) {
1450 if (tmp && (norun || regtry(prog, s)))
1451 goto got_it;
1452 else
1453 tmp = doevery;
1454 }
a0ed51b3 1455 else
ffc61ed2
JH
1456 tmp = 1;
1457 s++;
a0ed51b3 1458 }
a0ed51b3
LW
1459 }
1460 break;
a0d0e21e 1461 case DIGIT:
ffc61ed2 1462 if (do_utf8) {
1a4fad37 1463 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1464 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1465 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1466 if (tmp && (norun || regtry(prog, s)))
1467 goto got_it;
1468 else
1469 tmp = doevery;
1470 }
a0d0e21e 1471 else
ffc61ed2 1472 tmp = 1;
078c425b 1473 s += uskip;
2b69d0c2 1474 }
a0d0e21e 1475 }
ffc61ed2
JH
1476 else {
1477 while (s < strend) {
1478 if (isDIGIT(*s)) {
1479 if (tmp && (norun || regtry(prog, s)))
1480 goto got_it;
1481 else
1482 tmp = doevery;
1483 }
a0ed51b3 1484 else
ffc61ed2
JH
1485 tmp = 1;
1486 s++;
a0ed51b3 1487 }
a0ed51b3
LW
1488 }
1489 break;
b8c5462f
JH
1490 case DIGITL:
1491 PL_reg_flags |= RF_tainted;
ffc61ed2 1492 if (do_utf8) {
078c425b 1493 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1494 if (isDIGIT_LC_utf8((U8*)s)) {
1495 if (tmp && (norun || regtry(prog, s)))
1496 goto got_it;
1497 else
1498 tmp = doevery;
1499 }
b8c5462f 1500 else
ffc61ed2 1501 tmp = 1;
078c425b 1502 s += uskip;
b8c5462f 1503 }
b8c5462f 1504 }
ffc61ed2
JH
1505 else {
1506 while (s < strend) {
1507 if (isDIGIT_LC(*s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
b8c5462f 1513 else
ffc61ed2
JH
1514 tmp = 1;
1515 s++;
b8c5462f 1516 }
b8c5462f
JH
1517 }
1518 break;
a0d0e21e 1519 case NDIGIT:
ffc61ed2 1520 if (do_utf8) {
1a4fad37 1521 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1522 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1523 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1524 if (tmp && (norun || regtry(prog, s)))
1525 goto got_it;
1526 else
1527 tmp = doevery;
1528 }
a0d0e21e 1529 else
ffc61ed2 1530 tmp = 1;
078c425b 1531 s += uskip;
a687059c 1532 }
a0d0e21e 1533 }
ffc61ed2
JH
1534 else {
1535 while (s < strend) {
1536 if (!isDIGIT(*s)) {
1537 if (tmp && (norun || regtry(prog, s)))
1538 goto got_it;
1539 else
1540 tmp = doevery;
1541 }
a0ed51b3 1542 else
ffc61ed2
JH
1543 tmp = 1;
1544 s++;
a0ed51b3 1545 }
a0ed51b3
LW
1546 }
1547 break;
b8c5462f
JH
1548 case NDIGITL:
1549 PL_reg_flags |= RF_tainted;
ffc61ed2 1550 if (do_utf8) {
078c425b 1551 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1552 if (!isDIGIT_LC_utf8((U8*)s)) {
1553 if (tmp && (norun || regtry(prog, s)))
1554 goto got_it;
1555 else
1556 tmp = doevery;
1557 }
b8c5462f 1558 else
ffc61ed2 1559 tmp = 1;
078c425b 1560 s += uskip;
b8c5462f 1561 }
a0ed51b3 1562 }
ffc61ed2
JH
1563 else {
1564 while (s < strend) {
1565 if (!isDIGIT_LC(*s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1567 goto got_it;
1568 else
1569 tmp = doevery;
1570 }
cf93c79d 1571 else
ffc61ed2
JH
1572 tmp = 1;
1573 s++;
b8c5462f 1574 }
b8c5462f
JH
1575 }
1576 break;
b3c9acc1 1577 default:
3c3eec57
GS
1578 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1579 break;
d6a28714 1580 }
6eb5f6b9
JH
1581 return 0;
1582 got_it:
1583 return s;
1584}
1585
1586/*
1587 - regexec_flags - match a regexp against a string
1588 */
1589I32
1590Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1591 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1592/* strend: pointer to null at end of string */
1593/* strbeg: real beginning of string */
1594/* minend: end of match must be >=minend after stringarg. */
1595/* data: May be used for some additional optimizations. */
1596/* nosave: For optimizations. */
1597{
97aff369 1598 dVAR;
6eb5f6b9
JH
1599 register char *s;
1600 register regnode *c;
1601 register char *startpos = stringarg;
6eb5f6b9
JH
1602 I32 minlen; /* must match at least this many chars */
1603 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1604 I32 end_shift = 0; /* Same for the end. */ /* CC */
1605 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1606 char *scream_olds = NULL;
6eb5f6b9 1607 SV* oreplsv = GvSV(PL_replgv);
1df70142 1608 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1609 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1610#ifdef DEBUGGING
6136c704
AL
1611 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1612 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1613#endif
a3621e74
YO
1614
1615 GET_RE_DEBUG_FLAGS_DECL;
1616
9d4ba2ae 1617 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1618
1619 /* Be paranoid... */
1620 if (prog == NULL || startpos == NULL) {
1621 Perl_croak(aTHX_ "NULL regexp parameter");
1622 return 0;
1623 }
1624
bac06658
JH
1625 PL_reg_re = prog;
1626#ifdef DEBUGGING
1627 PL_regnarrate = DEBUG_r_TEST;
1628#endif
1629
1630 RX_MATCH_UTF8_set(prog, do_utf8);
1631
6eb5f6b9 1632 minlen = prog->minlen;
61a36c01 1633 if (strend - startpos < minlen) {
a3621e74 1634 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1635 "String too short [regexec_flags]...\n"));
1636 goto phooey;
1aa99e6b 1637 }
6eb5f6b9 1638
6eb5f6b9
JH
1639 /* Check validity of program. */
1640 if (UCHARAT(prog->program) != REG_MAGIC) {
1641 Perl_croak(aTHX_ "corrupted regexp program");
1642 }
1643
1644 PL_reg_flags = 0;
1645 PL_reg_eval_set = 0;
1646 PL_reg_maxiter = 0;
1647
1648 if (prog->reganch & ROPT_UTF8)
1649 PL_reg_flags |= RF_utf8;
1650
1651 /* Mark beginning of line for ^ and lookbehind. */
1652 PL_regbol = startpos;
1653 PL_bostr = strbeg;
1654 PL_reg_sv = sv;
1655
1656 /* Mark end of line for $ (and such) */
1657 PL_regeol = strend;
1658
1659 /* see how far we have to get to not match where we matched before */
1660 PL_regtill = startpos+minend;
1661
1662 /* We start without call_cc context. */
1663 PL_reg_call_cc = 0;
1664
1665 /* If there is a "must appear" string, look for it. */
1666 s = startpos;
1667
1668 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1669 MAGIC *mg;
1670
1671 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1672 PL_reg_ganch = startpos;
1673 else if (sv && SvTYPE(sv) >= SVt_PVMG
1674 && SvMAGIC(sv)
14befaf4
DM
1675 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1676 && mg->mg_len >= 0) {
6eb5f6b9
JH
1677 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1678 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1679 if (s > PL_reg_ganch)
6eb5f6b9
JH
1680 goto phooey;
1681 s = PL_reg_ganch;
1682 }
1683 }
1684 else /* pos() not defined */
1685 PL_reg_ganch = strbeg;
1686 }
1687
a0714e2c 1688 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1689 re_scream_pos_data d;
1690
1691 d.scream_olds = &scream_olds;
1692 d.scream_pos = &scream_pos;
1693 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1694 if (!s) {
a3621e74 1695 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1696 goto phooey; /* not present */
3fa9c3d7 1697 }
6eb5f6b9
JH
1698 }
1699
a3621e74 1700 DEBUG_EXECUTE_r({
1df70142
AL
1701 const char * const s0 = UTF
1702 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1703 UNI_DISPLAY_REGEX)
1704 : prog->precomp;
1705 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1706 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1707 UNI_DISPLAY_REGEX) : startpos;
1df70142 1708 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1709 if (!PL_colorset)
1710 reginitcolors();
1711 PerlIO_printf(Perl_debug_log,
a0288114 1712 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1713 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1714 len0, len0, s0,
2a782b5b 1715 PL_colors[1],
9e55ce06 1716 len0 > 60 ? "..." : "",
2a782b5b 1717 PL_colors[0],
9e55ce06
JH
1718 (int)(len1 > 60 ? 60 : len1),
1719 s1, PL_colors[1],
1720 (len1 > 60 ? "..." : "")
2a782b5b
JH
1721 );
1722 });
6eb5f6b9
JH
1723
1724 /* Simplest case: anchored match need be tried only once. */
1725 /* [unless only anchor is BOL and multiline is set] */
1726 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1727 if (s == startpos && regtry(prog, startpos))
1728 goto got_it;
7fba1cd6 1729 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1730 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1731 {
1732 char *end;
1733
1734 if (minlen)
1735 dontbother = minlen - 1;
1aa99e6b 1736 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1737 /* for multiline we only have to try after newlines */
33b8afdf 1738 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1739 if (s == startpos)
1740 goto after_try;
1741 while (1) {
1742 if (regtry(prog, s))
1743 goto got_it;
1744 after_try:
1745 if (s >= end)
1746 goto phooey;
1747 if (prog->reganch & RE_USE_INTUIT) {
1748 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1749 if (!s)
1750 goto phooey;
1751 }
1752 else
1753 s++;
1754 }
1755 } else {
1756 if (s > startpos)
1757 s--;
1758 while (s < end) {
1759 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1760 if (regtry(prog, s))
1761 goto got_it;
1762 }
1763 }
1764 }
1765 }
1766 goto phooey;
1767 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1768 if (regtry(prog, PL_reg_ganch))
1769 goto got_it;
1770 goto phooey;
1771 }
1772
1773 /* Messy cases: unanchored match. */
33b8afdf 1774 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1775 /* we have /x+whatever/ */
1776 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1777 char ch;
bf93d4cc
GS
1778#ifdef DEBUGGING
1779 int did_match = 0;
1780#endif
33b8afdf
JH
1781 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1782 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1783 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1784
1aa99e6b 1785 if (do_utf8) {
6eb5f6b9
JH
1786 while (s < strend) {
1787 if (*s == ch) {
a3621e74 1788 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1789 if (regtry(prog, s)) goto got_it;
1790 s += UTF8SKIP(s);
1791 while (s < strend && *s == ch)
1792 s += UTF8SKIP(s);
1793 }
1794 s += UTF8SKIP(s);
1795 }
1796 }
1797 else {
1798 while (s < strend) {
1799 if (*s == ch) {
a3621e74 1800 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1801 if (regtry(prog, s)) goto got_it;
1802 s++;
1803 while (s < strend && *s == ch)
1804 s++;
1805 }
1806 s++;
1807 }
1808 }
a3621e74 1809 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1810 PerlIO_printf(Perl_debug_log,
b7953727
JH
1811 "Did not find anchored character...\n")
1812 );
6eb5f6b9 1813 }
a0714e2c
SS
1814 else if (prog->anchored_substr != NULL
1815 || prog->anchored_utf8 != NULL
1816 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1817 && prog->float_max_offset < strend - s)) {
1818 SV *must;
1819 I32 back_max;
1820 I32 back_min;
1821 char *last;
6eb5f6b9 1822 char *last1; /* Last position checked before */
bf93d4cc
GS
1823#ifdef DEBUGGING
1824 int did_match = 0;
1825#endif
33b8afdf
JH
1826 if (prog->anchored_substr || prog->anchored_utf8) {
1827 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1828 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1829 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1830 back_max = back_min = prog->anchored_offset;
1831 } else {
1832 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1833 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1834 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1835 back_max = prog->float_max_offset;
1836 back_min = prog->float_min_offset;
1837 }
1838 if (must == &PL_sv_undef)
1839 /* could not downgrade utf8 check substring, so must fail */
1840 goto phooey;
1841
1842 last = HOP3c(strend, /* Cannot start after this */
1843 -(I32)(CHR_SVLEN(must)
1844 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1845
1846 if (s > PL_bostr)
1847 last1 = HOPc(s, -1);
1848 else
1849 last1 = s - 1; /* bogus */
1850
a0288114 1851 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1852 check_substr==must. */
1853 scream_pos = -1;
1854 dontbother = end_shift;
1855 strend = HOPc(strend, -dontbother);
1856 while ( (s <= last) &&
9041c2e3 1857 ((flags & REXEC_SCREAM)
1aa99e6b 1858 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1859 end_shift, &scream_pos, 0))
1aa99e6b 1860 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1861 (unsigned char*)strend, must,
7fba1cd6 1862 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1863 /* we may be pointing at the wrong string */
1864 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1865 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1866 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1867 if (HOPc(s, -back_max) > last1) {
1868 last1 = HOPc(s, -back_min);
1869 s = HOPc(s, -back_max);
1870 }
1871 else {
52657f30 1872 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1873
1874 last1 = HOPc(s, -back_min);
52657f30 1875 s = t;
6eb5f6b9 1876 }
1aa99e6b 1877 if (do_utf8) {
6eb5f6b9
JH
1878 while (s <= last1) {
1879 if (regtry(prog, s))
1880 goto got_it;
1881 s += UTF8SKIP(s);
1882 }
1883 }
1884 else {
1885 while (s <= last1) {
1886 if (regtry(prog, s))
1887 goto got_it;
1888 s++;
1889 }
1890 }
1891 }
a3621e74 1892 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1893 PerlIO_printf(Perl_debug_log,
a0288114 1894 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1895 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1896 ? "anchored" : "floating"),
1897 PL_colors[0],
1898 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1899 SvPVX_const(must),
b7953727
JH
1900 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1901 );
6eb5f6b9
JH
1902 goto phooey;
1903 }
155aba94 1904 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1905 if (minlen) {
1906 I32 op = (U8)OP(prog->regstclass);
66e933ab 1907 /* don't bother with what can't match */
f14c76ed
RGS
1908 if (PL_regkind[op] != EXACT && op != CANY)
1909 strend = HOPc(strend, -(minlen - 1));
1910 }
a3621e74 1911 DEBUG_EXECUTE_r({
ffc61ed2 1912 SV *prop = sv_newmortal();
cfd0369c
NC
1913 const char *s0;
1914 const char *s1;
9e55ce06
JH
1915 int len0;
1916 int len1;
1917
ffc61ed2 1918 regprop(prop, c);
9e55ce06 1919 s0 = UTF ?
3f7c398e 1920 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1921 UNI_DISPLAY_REGEX) :
cfd0369c 1922 SvPVX_const(prop);
9e55ce06
JH
1923 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1924 s1 = UTF ?
c728cb41 1925 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1926 len1 = UTF ? SvCUR(dsv1) : strend - s;
1927 PerlIO_printf(Perl_debug_log,
a0288114 1928 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1929 len0, len0, s0,
1930 len1, len1, s1);
ffc61ed2 1931 });
06b5626a 1932 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1933 goto got_it;
a3621e74 1934 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1935 }
1936 else {
1937 dontbother = 0;
a0714e2c 1938 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1939 /* Trim the end. */
d6a28714 1940 char *last;
33b8afdf
JH
1941 SV* float_real;
1942
1943 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1944 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1945 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1946
1947 if (flags & REXEC_SCREAM) {
33b8afdf 1948 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1949 end_shift, &scream_pos, 1); /* last one */
1950 if (!last)
ffc61ed2 1951 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1952 /* we may be pointing at the wrong string */
1953 else if (RX_MATCH_COPIED(prog))
3f7c398e 1954 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1955 }
d6a28714
JH
1956 else {
1957 STRLEN len;
cfd0369c 1958 const char * const little = SvPV_const(float_real, len);
d6a28714 1959
33b8afdf 1960 if (SvTAIL(float_real)) {
d6a28714
JH
1961 if (memEQ(strend - len + 1, little, len - 1))
1962 last = strend - len + 1;
7fba1cd6 1963 else if (!multiline)
9041c2e3 1964 last = memEQ(strend - len, little, len)
bd61b366 1965 ? strend - len : NULL;
b8c5462f 1966 else
d6a28714
JH
1967 goto find_last;
1968 } else {
1969 find_last:
9041c2e3 1970 if (len)
d6a28714 1971 last = rninstr(s, strend, little, little + len);
b8c5462f 1972 else
a0288114 1973 last = strend; /* matching "$" */
b8c5462f 1974 }
b8c5462f 1975 }
bf93d4cc 1976 if (last == NULL) {
a3621e74 1977 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1978 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1979 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1980 goto phooey; /* Should not happen! */
1981 }
d6a28714
JH
1982 dontbother = strend - last + prog->float_min_offset;
1983 }
1984 if (minlen && (dontbother < minlen))
1985 dontbother = minlen - 1;
1986 strend -= dontbother; /* this one's always in bytes! */
1987 /* We don't know much -- general case. */
1aa99e6b 1988 if (do_utf8) {
d6a28714
JH
1989 for (;;) {
1990 if (regtry(prog, s))
1991 goto got_it;
1992 if (s >= strend)
1993 break;
b8c5462f 1994 s += UTF8SKIP(s);
d6a28714
JH
1995 };
1996 }
1997 else {
1998 do {
1999 if (regtry(prog, s))
2000 goto got_it;
2001 } while (s++ < strend);
2002 }
2003 }
2004
2005 /* Failure. */
2006 goto phooey;
2007
2008got_it:
2009 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2010
2011 if (PL_reg_eval_set) {
2012 /* Preserve the current value of $^R */
2013 if (oreplsv != GvSV(PL_replgv))
2014 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2015 restored, the value remains
2016 the same. */
4f639d21 2017 restore_pos(aTHX_ prog);
d6a28714
JH
2018 }
2019
2020 /* make sure $`, $&, $', and $digit will work later */
2021 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2022 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2023 if (flags & REXEC_COPY_STR) {
2024 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2025#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2026 if ((SvIsCOW(sv)
2027 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2028 if (DEBUG_C_TEST) {
2029 PerlIO_printf(Perl_debug_log,
2030 "Copy on write: regexp capture, type %d\n",
2031 (int) SvTYPE(sv));
2032 }
2033 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2034 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2035 assert (SvPOKp(prog->saved_copy));
2036 } else
2037#endif
2038 {
2039 RX_MATCH_COPIED_on(prog);
2040 s = savepvn(strbeg, i);
2041 prog->subbeg = s;
2042 }
d6a28714 2043 prog->sublen = i;
d6a28714
JH
2044 }
2045 else {
2046 prog->subbeg = strbeg;
2047 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2048 }
2049 }
9041c2e3 2050
d6a28714
JH
2051 return 1;
2052
2053phooey:
a3621e74 2054 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2055 PL_colors[4], PL_colors[5]));
d6a28714 2056 if (PL_reg_eval_set)
4f639d21 2057 restore_pos(aTHX_ prog);
d6a28714
JH
2058 return 0;
2059}
2060
2061/*
2062 - regtry - try match at specific point
2063 */
2064STATIC I32 /* 0 failure, 1 success */
2065S_regtry(pTHX_ regexp *prog, char *startpos)
2066{
97aff369 2067 dVAR;
d6a28714
JH
2068 register I32 *sp;
2069 register I32 *ep;
2070 CHECKPOINT lastcp;
a3621e74 2071 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2072
02db2b7b
IZ
2073#ifdef DEBUGGING
2074 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2075#endif
d6a28714
JH
2076 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2077 MAGIC *mg;
2078
2079 PL_reg_eval_set = RS_init;
a3621e74 2080 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2081 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2082 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2083 ));
e8347627 2084 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2085 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2086 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2087 SAVETMPS;
2088 /* Apparently this is not needed, judging by wantarray. */
e8347627 2089 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2090 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2091
2092 if (PL_reg_sv) {
2093 /* Make $_ available to executed code. */
2094 if (PL_reg_sv != DEFSV) {
59f00321 2095 SAVE_DEFSV;
d6a28714 2096 DEFSV = PL_reg_sv;
b8c5462f 2097 }
d6a28714 2098
9041c2e3 2099 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2100 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2101 /* prepare for quick setting of pos */
d300d9fa
NC
2102#ifdef PERL_OLD_COPY_ON_WRITE
2103 if (SvIsCOW(sv))
2104 sv_force_normal_flags(sv, 0);
2105#endif
2106 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2107 &PL_vtbl_mglob, NULL, 0);
d6a28714 2108 mg->mg_len = -1;
b8c5462f 2109 }
d6a28714
JH
2110 PL_reg_magic = mg;
2111 PL_reg_oldpos = mg->mg_len;
4f639d21 2112 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2113 }
09687e5a 2114 if (!PL_reg_curpm) {
a02a5408 2115 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2116#ifdef USE_ITHREADS
2117 {
2118 SV* repointer = newSViv(0);
577e12cc 2119 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2120 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2121 av_push(PL_regex_padav,repointer);
2122 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2123 PL_regex_pad = AvARRAY(PL_regex_padav);
2124 }
2125#endif
2126 }
aaa362c4 2127 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2128 PL_reg_oldcurpm = PL_curpm;
2129 PL_curpm = PL_reg_curpm;
2130 if (RX_MATCH_COPIED(prog)) {
2131 /* Here is a serious problem: we cannot rewrite subbeg,
2132 since it may be needed if this match fails. Thus
2133 $` inside (?{}) could fail... */
2134 PL_reg_oldsaved = prog->subbeg;
2135 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2136#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2137 PL_nrs = prog->saved_copy;
2138#endif
d6a28714
JH
2139 RX_MATCH_COPIED_off(prog);
2140 }
2141 else
bd61b366 2142 PL_reg_oldsaved = NULL;
d6a28714
JH
2143 prog->subbeg = PL_bostr;
2144 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2145 }
973dddac 2146 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2147 PL_reginput = startpos;
2148 PL_regstartp = prog->startp;
2149 PL_regendp = prog->endp;
2150 PL_reglastparen = &prog->lastparen;
a01268b5 2151 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2152 prog->lastparen = 0;
03994de8 2153 prog->lastcloseparen = 0;
d6a28714 2154 PL_regsize = 0;
a3621e74 2155 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2156 if (PL_reg_start_tmpl <= prog->nparens) {
2157 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2158 if(PL_reg_start_tmp)
2159 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2160 else
a02a5408 2161 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2162 }
2163
2164 /* XXXX What this code is doing here?!!! There should be no need
2165 to do this again and again, PL_reglastparen should take care of
3dd2943c 2166 this! --ilya*/
dafc8851
JH
2167
2168 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2169 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2170 * PL_reglastparen), is not needed at all by the test suite
2171 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2172 * enough, for building DynaLoader, or otherwise this
2173 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2174 * will happen. Meanwhile, this code *is* needed for the
2175 * above-mentioned test suite tests to succeed. The common theme
2176 * on those tests seems to be returning null fields from matches.
2177 * --jhi */
dafc8851 2178#if 1
d6a28714
JH
2179 sp = prog->startp;
2180 ep = prog->endp;
2181 if (prog->nparens) {
097eb12c 2182 register I32 i;
eb160463 2183 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2184 *++sp = -1;
2185 *++ep = -1;
2186 }
2187 }
dafc8851 2188#endif
02db2b7b 2189 REGCP_SET(lastcp);
4f639d21 2190 if (regmatch(prog, prog->program + 1)) {
d6a28714
JH
2191 prog->endp[0] = PL_reginput - PL_bostr;
2192 return 1;
2193 }
02db2b7b 2194 REGCP_UNWIND(lastcp);
d6a28714
JH
2195 return 0;
2196}
2197
02db2b7b
IZ
2198#define RE_UNWIND_BRANCH 1
2199#define RE_UNWIND_BRANCHJ 2
2200
2201union re_unwind_t;
2202
2203typedef struct { /* XX: makes sense to enlarge it... */
2204 I32 type;
2205 I32 prev;
2206 CHECKPOINT lastcp;
2207} re_unwind_generic_t;
2208
2209typedef struct {
2210 I32 type;
2211 I32 prev;
2212 CHECKPOINT lastcp;
2213 I32 lastparen;
2214 regnode *next;
2215 char *locinput;
2216 I32 nextchr;
2217#ifdef DEBUGGING
2218 int regindent;
2219#endif
2220} re_unwind_branch_t;
2221
2222typedef union re_unwind_t {
2223 I32 type;
2224 re_unwind_generic_t generic;
2225 re_unwind_branch_t branch;
2226} re_unwind_t;
2227
8ba1375e
MJD
2228#define sayYES goto yes
2229#define sayNO goto no
e0f9d4a8 2230#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2231#define sayYES_FINAL goto yes_final
2232#define sayYES_LOUD goto yes_loud
2233#define sayNO_FINAL goto no_final
2234#define sayNO_SILENT goto do_no
2235#define saySAME(x) if (x) goto yes; else goto no
2236
3ab3c9b4
HS
2237#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2238#define POSCACHE_SEEN 1 /* we know what we're caching */
2239#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2240#define CACHEsayYES STMT_START { \
d8319b27 2241 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2242 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2243 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2244 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2245 /* cache records failure, but this is success */ \
2246 DEBUG_r( \
2247 PerlIO_printf(Perl_debug_log, \
2248 "%*s (remove success from failure cache)\n", \
2249 REPORT_CODE_OFF+PL_regindent*2, "") \
2250 ); \
d8319b27 2251 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2252 } \
2253 } \
2254 sayYES; \
2255} STMT_END
2256#define CACHEsayNO STMT_START { \
d8319b27 2257 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
3ab3c9b4
HS
2258 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2259 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2260 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2261 /* cache records success, but this is failure */ \
2262 DEBUG_r( \
2263 PerlIO_printf(Perl_debug_log, \
2264 "%*s (remove failure from success cache)\n", \
2265 REPORT_CODE_OFF+PL_regindent*2, "") \
2266 ); \
d8319b27 2267 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2268 } \
2269 } \
2270 sayNO; \
2271} STMT_END
2272
a3621e74
YO
2273/* this is used to determine how far from the left messages like
2274 'failed...' are printed. Currently 29 makes these messages line
2275 up with the opcode they refer to. Earlier perls used 25 which
2276 left these messages outdented making reviewing a debug output
2277 quite difficult.
2278*/
2279#define REPORT_CODE_OFF 29
2280
2281
2282/* Make sure there is a test for this +1 options in re_tests */
2283#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2284
5d9a96ca
DM
2285/* grab a new slab and return the first slot in it */
2286
2287STATIC regmatch_state *
2288S_push_slab(pTHX)
2289{
2290 regmatch_slab *s = PL_regmatch_slab->next;
2291 if (!s) {
2292 Newx(s, 1, regmatch_slab);
2293 s->prev = PL_regmatch_slab;
2294 s->next = NULL;
2295 PL_regmatch_slab->next = s;
2296 }
2297 PL_regmatch_slab = s;
2298 return &s->states[0];
2299}
5b47454d 2300
95b24440
DM
2301/* simulate a recursive call to regmatch */
2302
2303#define REGMATCH(ns, where) \
5d9a96ca
DM
2304 st->scan = scan; \
2305 scan = (ns); \
2306 st->resume_state = resume_##where; \
95b24440
DM
2307 goto start_recurse; \
2308 resume_point_##where:
2309
d6a28714
JH
2310/*
2311 - regmatch - main matching routine
2312 *
2313 * Conceptually the strategy is simple: check to see whether the current
2314 * node matches, call self recursively to see whether the rest matches,
2315 * and then act accordingly. In practice we make some effort to avoid
2316 * recursion, in particular by going through "ordinary" nodes (that don't
2317 * need to know whether the rest of the match failed) by a loop instead of
2318 * by recursion.
2319 */
2320/* [lwall] I've hoisted the register declarations to the outer block in order to
2321 * maybe save a little bit of pushing and popping on the stack. It also takes
2322 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2323 *
2324 * This function used to be heavily recursive, but since this had the
2325 * effect of blowing the CPU stack on complex regexes, it has been
2326 * restructured to be iterative, and to save state onto the heap rather
2327 * than the stack. Essentially whereever regmatch() used to be called, it
2328 * pushes the current state, notes where to return, then jumps back into
2329 * the main loop.
2330 *
2331 * Originally the structure of this function used to look something like
2332
2333 S_regmatch() {
2334 int a = 1, b = 2;
2335 ...
2336 while (scan != NULL) {
5d9a96ca 2337 a++; // do stuff with a and b
95b24440
DM
2338 ...
2339 switch (OP(scan)) {
2340 case FOO: {
2341 int local = 3;
2342 ...
2343 if (regmatch(...)) // recurse
2344 goto yes;
2345 }
2346 ...
2347 }
2348 }
2349 yes:
2350 return 1;
2351 }
2352
2353 * Now it looks something like this:
2354
5d9a96ca 2355 typedef struct {
95b24440
DM
2356 int a, b, local;
2357 int resume_state;
5d9a96ca 2358 } regmatch_state;
95b24440
DM
2359
2360 S_regmatch() {
5d9a96ca
DM
2361 regmatch_state *st = new();
2362 int depth=0;
2363 st->a++; // do stuff with a and b
95b24440
DM
2364 ...
2365 while (scan != NULL) {
2366 ...
2367 switch (OP(scan)) {
2368 case FOO: {
5d9a96ca 2369 st->local = 3;
95b24440 2370 ...
5d9a96ca
DM
2371 st->scan = scan;
2372 scan = ...;
2373 st->resume_state = resume_FOO;
2374 goto start_recurse; // recurse
95b24440 2375
5d9a96ca
DM
2376 resume_point_FOO:
2377 if (result)
95b24440
DM
2378 goto yes;
2379 }
2380 ...
2381 }
5d9a96ca
DM
2382 start_recurse:
2383 st = new(); push a new state
2384 st->a = 1; st->b = 2;
2385 depth++;
95b24440 2386 }
5d9a96ca 2387 yes:
95b24440 2388 result = 1;
5d9a96ca
DM
2389 if (depth--) {
2390 st = pop();
95b24440
DM
2391 switch (resume_state) {
2392 case resume_FOO:
2393 goto resume_point_FOO;
2394 ...
2395 }
2396 }
2397 return result
2398 }
2399
2400 * WARNING: this means that any line in this function that contains a
2401 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2402 * regmatch() using gotos instead. Thus the values of any local variables
2403 * not saved in the regmatch_state structure will have been lost when
2404 * execution resumes on the next line .
5d9a96ca
DM
2405 *
2406 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2407 * PL_regmatch_state always points to the currently active state, and
2408 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2409 * The first time regmatch is called, the first slab is allocated, and is
2410 * never freed until interpreter desctruction. When the slab is full,
2411 * a new one is allocated chained to the end. At exit from regmatch, slabs
2412 * allocated since entry are freed.
d6a28714 2413 */
95b24440
DM
2414
2415
d6a28714 2416STATIC I32 /* 0 failure, 1 success */
4f639d21 2417S_regmatch(pTHX_ regexp *rex, regnode *prog)
d6a28714 2418{
27da23d5 2419 dVAR;
95b24440 2420 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2421 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2422
5d9a96ca
DM
2423 regmatch_slab *orig_slab;
2424 regmatch_state *orig_state;
a3621e74 2425
5d9a96ca
DM
2426 /* the current state. This is a cached copy of PL_regmatch_state */
2427 register regmatch_state *st;
95b24440 2428
5d9a96ca
DM
2429 /* cache heavy used fields of st in registers */
2430 register regnode *scan;
2431 register regnode *next;
2432 register I32 n = 0; /* initialize to shut up compiler warning */
2433 register char *locinput = PL_reginput;
95b24440 2434
5d9a96ca
DM
2435 /* these variables are NOT saved during a recusive RFEGMATCH: */
2436 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2437 bool result; /* return value of S_regmatch */
2438 regnode *inner; /* Next node in internal branch. */
2439 int depth = 0; /* depth of recursion */
95b24440
DM
2440
2441#ifdef DEBUGGING
ab74612d 2442 SV *re_debug_flags = NULL;
a3621e74 2443 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2444 PL_regindent++;
2445#endif
2446
5d9a96ca
DM
2447 /* on first ever call to regmatch, allocate first slab */
2448 if (!PL_regmatch_slab) {
2449 Newx(PL_regmatch_slab, 1, regmatch_slab);
2450 PL_regmatch_slab->prev = NULL;
2451 PL_regmatch_slab->next = NULL;
2452 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2453 }
2454
2455 /* remember current high-water mark for exit */
2456 /* XXX this should be done with SAVE* instead */
2457 orig_slab = PL_regmatch_slab;
2458 orig_state = PL_regmatch_state;
2459
2460 /* grab next free state slot */
2461 st = ++PL_regmatch_state;
2462 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2463 st = PL_regmatch_state = S_push_slab(aTHX);
2464
2465 st->minmod = 0;
2466 st->sw = 0;
2467 st->logical = 0;
2468 st->unwind = 0;
2469 st->cc = NULL;
d6a28714
JH
2470 /* Note that nextchr is a byte even in UTF */
2471 nextchr = UCHARAT(locinput);
2472 scan = prog;
2473 while (scan != NULL) {
8ba1375e 2474
a3621e74 2475 DEBUG_EXECUTE_r( {
6136c704 2476 SV * const prop = sv_newmortal();
1df70142
AL
2477 const int docolor = *PL_colors[0];
2478 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2479 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2480 /* The part of the string before starttry has one color
2481 (pref0_len chars), between starttry and current
2482 position another one (pref_len - pref0_len chars),
2483 after the current position the third one.
2484 We assume that pref0_len <= pref_len, otherwise we
2485 decrease pref0_len. */
9041c2e3 2486 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2487 ? (5 + taill) - l : locinput - PL_bostr;
2488 int pref0_len;
d6a28714 2489
df1ffd02 2490 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2491 pref_len++;
2492 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2493 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2494 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2495 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2496 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2497 l--;
d6a28714
JH
2498 if (pref0_len < 0)
2499 pref0_len = 0;
2500 if (pref0_len > pref_len)
2501 pref0_len = pref_len;
2502 regprop(prop, scan);
2a782b5b 2503 {
1df70142 2504 const char * const s0 =
f14c76ed 2505 do_utf8 && OP(scan) != CANY ?
95b24440 2506 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2507 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2508 locinput - pref_len;
1df70142
AL
2509 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2510 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2511 pv_uni_display(PERL_DEBUG_PAD(1),
2512 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2513 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2514 locinput - pref_len + pref0_len;
1df70142
AL
2515 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2516 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2517 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2518 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2519 locinput;
1df70142 2520 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2521 PerlIO_printf(Perl_debug_log,
2522 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2523 (IV)(locinput - PL_bostr),
2524 PL_colors[4],
2525 len0, s0,
2526 PL_colors[5],
2527 PL_colors[2],
2528 len1, s1,
2529 PL_colors[3],
2530 (docolor ? "" : "> <"),
2531 PL_colors[0],
2532 len2, s2,
2533 PL_colors[1],
2534 15 - l - pref_len + 1,
2535 "",
4f639d21 2536 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2537 SvPVX_const(prop));
2a782b5b
JH
2538 }
2539 });
d6a28714
JH
2540
2541 next = scan + NEXT_OFF(scan);
2542 if (next == scan)
2543 next = NULL;
2544
2545 switch (OP(scan)) {
2546 case BOL:
7fba1cd6 2547 if (locinput == PL_bostr)
d6a28714
JH
2548 {
2549 /* regtill = regbol; */
b8c5462f
JH
2550 break;
2551 }
d6a28714
JH
2552 sayNO;
2553 case MBOL:
12d33761
HS
2554 if (locinput == PL_bostr ||
2555 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2556 {
b8c5462f
JH
2557 break;
2558 }
d6a28714
JH
2559 sayNO;
2560 case SBOL:
c2a73568 2561 if (locinput == PL_bostr)
b8c5462f 2562 break;
d6a28714
JH
2563 sayNO;
2564 case GPOS:
2565 if (locinput == PL_reg_ganch)
2566 break;
2567 sayNO;
2568 case EOL:
d6a28714
JH
2569 goto seol;
2570 case MEOL:
d6a28714 2571 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2572 sayNO;
b8c5462f 2573 break;
d6a28714
JH
2574 case SEOL:
2575 seol:
2576 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2577 sayNO;
d6a28714 2578 if (PL_regeol - locinput > 1)
b8c5462f 2579 sayNO;
b8c5462f 2580 break;
d6a28714
JH
2581 case EOS:
2582 if (PL_regeol != locinput)
b8c5462f 2583 sayNO;
d6a28714 2584 break;
ffc61ed2 2585 case SANY:
d6a28714 2586 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2587 sayNO;
f33976b4
DB
2588 if (do_utf8) {
2589 locinput += PL_utf8skip[nextchr];
2590 if (locinput > PL_regeol)
2591 sayNO;
2592 nextchr = UCHARAT(locinput);
2593 }
2594 else
2595 nextchr = UCHARAT(++locinput);
2596 break;
2597 case CANY:
2598 if (!nextchr && locinput >= PL_regeol)
2599 sayNO;
b8c5462f 2600 nextchr = UCHARAT(++locinput);
a0d0e21e 2601 break;
ffc61ed2 2602 case REG_ANY:
1aa99e6b
IH
2603 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2604 sayNO;
2605 if (do_utf8) {
b8c5462f 2606 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2607 if (locinput > PL_regeol)
2608 sayNO;
a0ed51b3 2609 nextchr = UCHARAT(locinput);
a0ed51b3 2610 }
1aa99e6b
IH
2611 else
2612 nextchr = UCHARAT(++locinput);
a0ed51b3 2613 break;
a3621e74
YO
2614
2615
2616
2617 /*
2618 traverse the TRIE keeping track of all accepting states
2619 we transition through until we get to a failing node.
2620
a3621e74
YO
2621
2622 */
5b47454d 2623 case TRIE:
a3621e74
YO
2624 case TRIEF:
2625 case TRIEFL:
2626 {
a3621e74
YO
2627 U8 *uc = ( U8* )locinput;
2628 U32 state = 1;
2629 U16 charid = 0;
2630 U32 base = 0;
2631 UV uvc = 0;
2632 STRLEN len = 0;
2633 STRLEN foldlen = 0;
a3621e74
YO
2634 U8 *uscan = (U8*)NULL;
2635 STRLEN bufflen=0;
95b24440 2636 SV *sv_accept_buff = NULL;
5b47454d
DM
2637 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2638 trie_type = do_utf8 ?
2639 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2640 : trie_plain;
2641
7087a21c
NC
2642 /* what trie are we using right now */
2643 reg_trie_data *trie
4f639d21 2644 = (reg_trie_data*)PL_reg_re->data->data[ ARG( scan ) ];
d8319b27 2645 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2646 result = 0;
a3621e74
YO
2647
2648 while ( state && uc <= (U8*)PL_regeol ) {
2649
5b47454d 2650 if (trie->states[ state ].wordnum) {
d8319b27 2651 if (!st->u.trie.accepted ) {
5b47454d
DM
2652 ENTER;
2653 SAVETMPS;
2654 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2655 sv_accept_buff=newSV(bufflen *
2656 sizeof(reg_trie_accepted) - 1);
2657 SvCUR_set(sv_accept_buff,
2658 sizeof(reg_trie_accepted));
2659 SvPOK_on(sv_accept_buff);
2660 sv_2mortal(sv_accept_buff);
d8319b27 2661 st->u.trie.accept_buff =
5b47454d
DM
2662 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2663 }
2664 else {
d8319b27 2665 if (st->u.trie.accepted >= bufflen) {
5b47454d 2666 bufflen *= 2;
d8319b27 2667 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2668 SvGROW(sv_accept_buff,
2669 bufflen * sizeof(reg_trie_accepted));
2670 }
2671 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2672 + sizeof(reg_trie_accepted));
2673 }
d8319b27
DM
2674 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2675 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2676 ++st->u.trie.accepted;
5b47454d 2677 }
a3621e74
YO
2678
2679 base = trie->states[ state ].trans.base;
2680
2681 DEBUG_TRIE_EXECUTE_r(
2682 PerlIO_printf( Perl_debug_log,
e4584336 2683 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2684 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2685 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2686 );
2687
2688 if ( base ) {
5b47454d
DM
2689 switch (trie_type) {
2690 case trie_uft8_fold:
a3621e74
YO
2691 if ( foldlen>0 ) {
2692 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2693 foldlen -= len;
2694 uscan += len;
2695 len=0;
2696 } else {
1df70142 2697 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2698 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2699 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2700 foldlen -= UNISKIP( uvc );
2701 uscan = foldbuf + UNISKIP( uvc );
2702 }
5b47454d
DM
2703 break;
2704 case trie_utf8:
2705 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2706 &len, uniflags );
2707 break;
2708 case trie_plain:
e4584336 2709 uvc = (UV)*uc;
a3621e74
YO
2710 len = 1;
2711 }
2712
5b47454d
DM
2713 if (uvc < 256) {
2714 charid = trie->charmap[ uvc ];
2715 }
2716 else {
2717 charid = 0;
2718 if (trie->widecharmap) {
2719 SV** svpp = (SV**)NULL;
2720 svpp = hv_fetch(trie->widecharmap,
2721 (char*)&uvc, sizeof(UV), 0);
2722 if (svpp)
2723 charid = (U16)SvIV(*svpp);
2724 }
2725 }
a3621e74 2726
5b47454d
DM
2727 if (charid &&
2728 (base + charid > trie->uniquecharcount )
2729 && (base + charid - 1 - trie->uniquecharcount
2730 < trie->lasttrans)
2731 && trie->trans[base + charid - 1 -
2732 trie->uniquecharcount].check == state)
2733 {
2734 state = trie->trans[base + charid - 1 -
2735 trie->uniquecharcount ].next;
2736 }
2737 else {
2738 state = 0;
2739 }
2740 uc += len;
2741
2742 }
2743 else {
a3621e74
YO
2744 state = 0;
2745 }
2746 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2747 PerlIO_printf( Perl_debug_log,
2748 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2749 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2750 );
2751 }
d8319b27 2752 if (!st->u.trie.accepted )
a3621e74 2753 sayNO;
a3621e74
YO
2754
2755 /*
2756 There was at least one accepting state that we
2757 transitioned through. Presumably the number of accepting
2758 states is going to be low, typically one or two. So we
2759 simply scan through to find the one with lowest wordnum.
2760 Once we find it, we swap the last state into its place
2761 and decrement the size. We then try to match the rest of
2762 the pattern at the point where the word ends, if we
2763 succeed then we end the loop, otherwise the loop
2764 eventually terminates once all of the accepting states
2765 have been tried.
2766 */
a3621e74 2767
d8319b27 2768 if ( st->u.trie.accepted == 1 ) {
a3621e74 2769 DEBUG_EXECUTE_r({
097eb12c 2770 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2771 PerlIO_printf( Perl_debug_log,
2772 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2773 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2774 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2775 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2776 PL_colors[5] );
2777 });
d8319b27 2778 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2779 /* in this case we free tmps/leave before we call regmatch
2780 as we wont be using accept_buff again. */
2781 FREETMPS;
2782 LEAVE;
95b24440
DM
2783 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2784 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2785 } else {
2786 DEBUG_EXECUTE_r(
e4584336 2787 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2788 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2789 PL_colors[5] );
2790 );
d8319b27 2791 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2792 U32 best = 0;
2793 U32 cur;
d8319b27 2794 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2795 DEBUG_TRIE_EXECUTE_r(
2796 PerlIO_printf( Perl_debug_log,
2797 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2798 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2799 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2800 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2801 );
a3621e74 2802
d8319b27
DM
2803 if (st->u.trie.accept_buff[cur].wordnum <
2804 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2805 best = cur;
a3621e74
YO
2806 }
2807 DEBUG_EXECUTE_r({
87830502
DM
2808 reg_trie_data * const trie = (reg_trie_data*)
2809 PL_reg_re->data->data[ARG(scan)];
d8319b27 2810 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2811 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2812 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2813 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2814 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2815 PL_colors[5] );
2816 });
d8319b27
DM
2817 if ( best<st->u.trie.accepted ) {
2818 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2819 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2820 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2821 best = st->u.trie.accepted;
a3621e74 2822 }
d8319b27 2823 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2824
2825 /*
2826 as far as I can tell we only need the SAVETMPS/FREETMPS
2827 for re's with EVAL in them but I'm leaving them in for
2828 all until I can be sure.
2829 */
2830 SAVETMPS;
95b24440
DM
2831 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2832 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2833 FREETMPS;
2834 }
2835 FREETMPS;
2836 LEAVE;
2837 }
2838
95b24440 2839 if (result) {
a3621e74
YO
2840 sayYES;
2841 } else {
2842 sayNO;
2843 }
2844 }
2845 /* unreached codepoint */
95b24440
DM
2846 case EXACT: {
2847 char *s = STRING(scan);
5d9a96ca 2848 st->ln = STR_LEN(scan);
eb160463 2849 if (do_utf8 != UTF) {
bc517b45 2850 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2851 char *l = locinput;
5d9a96ca 2852 const char *e = s + st->ln;
a72c7584 2853
5ff6fc6d
JH
2854 if (do_utf8) {
2855 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2856 while (s < e) {
a3b680e6 2857 STRLEN ulen;
1aa99e6b 2858 if (l >= PL_regeol)
5ff6fc6d
JH
2859 sayNO;
2860 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2861 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2862 uniflags))
5ff6fc6d 2863 sayNO;
bc517b45 2864 l += ulen;
5ff6fc6d 2865 s ++;
1aa99e6b 2866 }
5ff6fc6d
JH
2867 }
2868 else {
2869 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2870 while (s < e) {
a3b680e6 2871 STRLEN ulen;
1aa99e6b
IH
2872 if (l >= PL_regeol)
2873 sayNO;
5ff6fc6d 2874 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2875 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2876 uniflags))
1aa99e6b 2877 sayNO;
bc517b45 2878 s += ulen;
a72c7584 2879 l ++;
1aa99e6b 2880 }
5ff6fc6d 2881 }
1aa99e6b
IH
2882 locinput = l;
2883 nextchr = UCHARAT(locinput);
2884 break;
2885 }
bc517b45 2886 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2887 /* Inline the first character, for speed. */
2888 if (UCHARAT(s) != nextchr)
2889 sayNO;
5d9a96ca 2890 if (PL_regeol - locinput < st->ln)
d6a28714 2891 sayNO;
5d9a96ca 2892 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2893 sayNO;
5d9a96ca 2894 locinput += st->ln;
d6a28714
JH
2895 nextchr = UCHARAT(locinput);
2896 break;
95b24440 2897 }
d6a28714 2898 case EXACTFL:
b8c5462f
JH
2899 PL_reg_flags |= RF_tainted;
2900 /* FALL THROUGH */
95b24440
DM
2901 case EXACTF: {
2902 char *s = STRING(scan);
5d9a96ca 2903 st->ln = STR_LEN(scan);
d6a28714 2904
d07ddd77
JH
2905 if (do_utf8 || UTF) {
2906 /* Either target or the pattern are utf8. */
d6a28714 2907 char *l = locinput;
d07ddd77 2908 char *e = PL_regeol;
bc517b45 2909
5d9a96ca 2910 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2911 l, &e, 0, do_utf8)) {
5486206c
JH
2912 /* One more case for the sharp s:
2913 * pack("U0U*", 0xDF) =~ /ss/i,
2914 * the 0xC3 0x9F are the UTF-8
2915 * byte sequence for the U+00DF. */
2916 if (!(do_utf8 &&
2917 toLOWER(s[0]) == 's' &&
5d9a96ca 2918 st->ln >= 2 &&
5486206c
JH
2919 toLOWER(s[1]) == 's' &&
2920 (U8)l[0] == 0xC3 &&
2921 e - l >= 2 &&
2922 (U8)l[1] == 0x9F))
2923 sayNO;
2924 }
d07ddd77
JH
2925 locinput = e;
2926 nextchr = UCHARAT(locinput);
2927 break;
a0ed51b3 2928 }
d6a28714 2929
bc517b45
JH
2930 /* Neither the target and the pattern are utf8. */
2931
d6a28714
JH
2932 /* Inline the first character, for speed. */
2933 if (UCHARAT(s) != nextchr &&
2934 UCHARAT(s) != ((OP(scan) == EXACTF)
2935 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2936 sayNO;
5d9a96ca 2937 if (PL_regeol - locinput < st->ln)
b8c5462f 2938 sayNO;
5d9a96ca
DM
2939 if (st->ln > 1 && (OP(scan) == EXACTF
2940 ? ibcmp(s, locinput, st->ln)
2941 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2942 sayNO;
5d9a96ca 2943 locinput += st->ln;
d6a28714 2944 nextchr = UCHARAT(locinput);
a0d0e21e 2945 break;
95b24440 2946 }
d6a28714 2947 case ANYOF:
ffc61ed2 2948 if (do_utf8) {
9e55ce06
JH
2949 STRLEN inclasslen = PL_regeol - locinput;
2950
ba7b4546 2951 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2952 sayNO_ANYOF;
ffc61ed2
JH
2953 if (locinput >= PL_regeol)
2954 sayNO;
0f0076b4 2955 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2956 nextchr = UCHARAT(locinput);
e0f9d4a8 2957 break;
ffc61ed2
JH
2958 }
2959 else {
2960 if (nextchr < 0)
2961 nextchr = UCHARAT(locinput);
7d3e948e 2962 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2963 sayNO_ANYOF;
ffc61ed2
JH
2964 if (!nextchr && locinput >= PL_regeol)
2965 sayNO;
2966 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2967 break;
2968 }
2969 no_anyof:
2970 /* If we might have the case of the German sharp s
2971 * in a casefolding Unicode character class. */
2972
ebc501f0
JH
2973 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2974 locinput += SHARP_S_SKIP;
e0f9d4a8 2975 nextchr = UCHARAT(locinput);
ffc61ed2 2976 }
e0f9d4a8
JH
2977 else
2978 sayNO;
b8c5462f 2979 break;
d6a28714 2980 case ALNUML:
b8c5462f
JH
2981 PL_reg_flags |= RF_tainted;
2982 /* FALL THROUGH */
d6a28714 2983 case ALNUM:
b8c5462f 2984 if (!nextchr)
4633a7c4 2985 sayNO;
ffc61ed2 2986 if (do_utf8) {
1a4fad37 2987 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2988 if (!(OP(scan) == ALNUM
3568d838 2989 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2990 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2991 {
2992 sayNO;
a0ed51b3 2993 }
b8c5462f 2994 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2995 nextchr = UCHARAT(locinput);
2996 break;
2997 }
ffc61ed2 2998 if (!(OP(scan) == ALNUM
d6a28714 2999 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3000 sayNO;
b8c5462f 3001 nextchr = UCHARAT(++locinput);
a0d0e21e 3002 break;
d6a28714 3003 case NALNUML:
b8c5462f
JH
3004 PL_reg_flags |= RF_tainted;
3005 /* FALL THROUGH */
d6a28714
JH
3006 case NALNUM:
3007 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3008 sayNO;
ffc61ed2 3009 if (do_utf8) {
1a4fad37 3010 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3011 if (OP(scan) == NALNUM
3568d838 3012 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3013 : isALNUM_LC_utf8((U8*)locinput))
3014 {
b8c5462f 3015 sayNO;
d6a28714 3016 }
b8c5462f
JH
3017 locinput += PL_utf8skip[nextchr];
3018 nextchr = UCHARAT(locinput);
3019 break;
3020 }
ffc61ed2 3021 if (OP(scan) == NALNUM
d6a28714 3022 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3023 sayNO;
76e3520e 3024 nextchr = UCHARAT(++locinput);
a0d0e21e 3025 break;
d6a28714
JH
3026 case BOUNDL:
3027 case NBOUNDL:
3280af22 3028 PL_reg_flags |= RF_tainted;
bbce6d69 3029 /* FALL THROUGH */
d6a28714
JH
3030 case BOUND:
3031 case NBOUND:
3032 /* was last char in word? */
ffc61ed2 3033 if (do_utf8) {
12d33761 3034 if (locinput == PL_bostr)
5d9a96ca 3035 st->ln = '\n';
ffc61ed2 3036 else {
a3b680e6 3037 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3038
4ad0818d 3039 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3040 }
3041 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3042 st->ln = isALNUM_uni(st->ln);
1a4fad37 3043 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3044 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3045 }
3046 else {
5d9a96ca 3047 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3048 n = isALNUM_LC_utf8((U8*)locinput);
3049 }
a0ed51b3 3050 }
d6a28714 3051 else {
5d9a96ca 3052 st->ln = (locinput != PL_bostr) ?
12d33761 3053 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3054 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3055 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3056 n = isALNUM(nextchr);
3057 }
3058 else {
5d9a96ca 3059 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3060 n = isALNUM_LC(nextchr);
3061 }
d6a28714 3062 }
5d9a96ca 3063 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3064 OP(scan) == BOUNDL))
3065 sayNO;
a0ed51b3 3066 break;
d6a28714 3067 case SPACEL:
3280af22 3068 PL_reg_flags |= RF_tainted;
bbce6d69 3069 /* FALL THROUGH */
d6a28714 3070 case SPACE:
9442cb0e 3071 if (!nextchr)
4633a7c4 3072 sayNO;
1aa99e6b 3073 if (do_utf8) {
fd400ab9 3074 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3075 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3076 if (!(OP(scan) == SPACE
3568d838 3077 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3078 : isSPACE_LC_utf8((U8*)locinput)))
3079 {
3080 sayNO;
3081 }
3082 locinput += PL_utf8skip[nextchr];
3083 nextchr = UCHARAT(locinput);
3084 break;
d6a28714 3085 }
ffc61ed2
JH
3086 if (!(OP(scan) == SPACE
3087 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3088 sayNO;
3089 nextchr = UCHARAT(++locinput);
3090 }
3091 else {
3092 if (!(OP(scan) == SPACE
3093 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3094 sayNO;
3095 nextchr = UCHARAT(++locinput);
a0ed51b3 3096 }
a0ed51b3 3097 break;
d6a28714 3098 case NSPACEL:
3280af22 3099 PL_reg_flags |= RF_tainted;
bbce6d69 3100 /* FALL THROUGH */
d6a28714 3101 case NSPACE:
9442cb0e 3102 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3103 sayNO;
1aa99e6b 3104 if (do_utf8) {
1a4fad37 3105 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3106 if (OP(scan) == NSPACE
3568d838 3107 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3108 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3109 {
3110 sayNO;
3111 }
3112 locinput += PL_utf8skip[nextchr];
3113 nextchr = UCHARAT(locinput);
3114 break;
a0ed51b3 3115 }
ffc61ed2 3116 if (OP(scan) == NSPACE
d6a28714 3117 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3118 sayNO;
76e3520e 3119 nextchr = UCHARAT(++locinput);
a0d0e21e 3120 break;
d6a28714 3121 case DIGITL:
a0ed51b3
LW
3122 PL_reg_flags |= RF_tainted;
3123 /* FALL THROUGH */
d6a28714 3124 case DIGIT:
9442cb0e 3125 if (!nextchr)
a0ed51b3 3126 sayNO;
1aa99e6b 3127 if (do_utf8) {
1a4fad37 3128 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3129 if (!(OP(scan) == DIGIT
3568d838 3130 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3131 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3132 {
a0ed51b3 3133 sayNO;
dfe13c55 3134 }
6f06b55f 3135 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3136 nextchr = UCHARAT(locinput);
3137 break;
3138 }
ffc61ed2 3139 if (!(OP(scan) == DIGIT
9442cb0e 3140 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3141 sayNO;
3142 nextchr = UCHARAT(++locinput);
3143 break;
d6a28714 3144 case NDIGITL:
b8c5462f
JH
3145 PL_reg_flags |= RF_tainted;
3146 /* FALL THROUGH */
d6a28714 3147 case NDIGIT:
9442cb0e 3148 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3149 sayNO;
1aa99e6b 3150 if (do_utf8) {
1a4fad37 3151 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3152 if (OP(scan) == NDIGIT
3568d838 3153 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3154 : isDIGIT_LC_utf8((U8*)locinput))
3155 {
a0ed51b3 3156 sayNO;
9442cb0e 3157 }
6f06b55f 3158 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3159 nextchr = UCHARAT(locinput);
3160 break;
3161 }
ffc61ed2 3162 if (OP(scan) == NDIGIT
9442cb0e 3163 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3164 sayNO;
3165 nextchr = UCHARAT(++locinput);
3166 break;
3167 case CLUMP:
b7c83a7e 3168 if (locinput >= PL_regeol)
a0ed51b3 3169 sayNO;
b7c83a7e 3170 if (do_utf8) {
1a4fad37 3171 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3172 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3173 sayNO;
3174 locinput += PL_utf8skip[nextchr];
3175 while (locinput < PL_regeol &&
3176 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3177 locinput += UTF8SKIP(locinput);
3178 if (locinput > PL_regeol)
3179 sayNO;
eb08e2da
JH
3180 }
3181 else
3182 locinput++;
a0ed51b3
LW
3183 nextchr = UCHARAT(locinput);
3184 break;
c8756f30 3185 case REFFL:
3280af22 3186 PL_reg_flags |= RF_tainted;
c8756f30 3187 /* FALL THROUGH */
c277df42 3188 case REF:
95b24440
DM
3189 case REFF: {
3190 char *s;
c277df42 3191 n = ARG(scan); /* which paren pair */
5d9a96ca 3192 st->ln = PL_regstartp[n];
2c2d71f5 3193 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3194 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3195 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3196 if (st->ln == PL_regendp[n])
a0d0e21e 3197 break;
a0ed51b3 3198
5d9a96ca 3199 s = PL_bostr + st->ln;
1aa99e6b 3200 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3201 char *l = locinput;
a3b680e6 3202 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3203 /*
3204 * Note that we can't do the "other character" lookup trick as
3205 * in the 8-bit case (no pun intended) because in Unicode we
3206 * have to map both upper and title case to lower case.
3207 */
3208 if (OP(scan) == REFF) {
3209 while (s < e) {
a3b680e6
AL
3210 STRLEN ulen1, ulen2;
3211 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3212 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3213
a0ed51b3
LW
3214 if (l >= PL_regeol)
3215 sayNO;
a2a2844f
JH
3216 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3217 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3218 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3219 sayNO;
a2a2844f
JH
3220 s += ulen1;
3221 l += ulen2;
a0ed51b3
LW
3222 }
3223 }
3224 locinput = l;
3225 nextchr = UCHARAT(locinput);
3226 break;
3227 }
3228
a0d0e21e 3229 /* Inline the first character, for speed. */
76e3520e 3230 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3231 (OP(scan) == REF ||
3232 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3233 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3234 sayNO;
5d9a96ca
DM
3235 st->ln = PL_regendp[n] - st->ln;
3236 if (locinput + st->ln > PL_regeol)
4633a7c4 3237 sayNO;
5d9a96ca
DM
3238 if (st->ln > 1 && (OP(scan) == REF
3239 ? memNE(s, locinput, st->ln)
c8756f30 3240 : (OP(scan) == REFF
5d9a96ca
DM
3241 ? ibcmp(s, locinput, st->ln)
3242 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3243 sayNO;
5d9a96ca 3244 locinput += st->ln;
76e3520e 3245 nextchr = UCHARAT(locinput);
a0d0e21e 3246 break;
95b24440 3247 }
a0d0e21e
LW
3248
3249 case NOTHING:
c277df42 3250 case TAIL:
a0d0e21e
LW
3251 break;
3252 case BACK:
3253 break;
c277df42
IZ
3254 case EVAL:
3255 {
c277df42 3256 SV *ret;
8e5e9ebe 3257 {
4aabdb9b
DM
3258 /* execute the code in the {...} */
3259 dSP;
6136c704 3260 SV ** const before = SP;
4aabdb9b
DM
3261 OP_4tree * const oop = PL_op;
3262 COP * const ocurcop = PL_curcop;
3263 PAD *old_comppad;
3264 struct regexp * const oreg = PL_reg_re;
3265
3266 n = ARG(scan);
4f639d21 3267 PL_op = (OP_4tree*)PL_reg_re->data->data[n];
4aabdb9b 3268 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
4f639d21 3269 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_reg_re->data->data[n + 2]);
4aabdb9b
DM
3270 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3271
8e5e9ebe
RGS
3272 CALLRUNOPS(aTHX); /* Scalar context. */
3273 SPAGAIN;
3274 if (SP == before)
075aa684 3275 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3276 else {
3277 ret = POPs;
3278 PUTBACK;
3279 }
4aabdb9b
DM
3280
3281 PL_op = oop;
3282 PAD_RESTORE_LOCAL(old_comppad);
3283 PL_curcop = ocurcop;
87830502 3284 PL_reg_re = oreg;
4aabdb9b
DM
3285 if (!st->logical) {
3286 /* /(?{...})/ */
3287 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3288 break;
3289 }
8e5e9ebe 3290 }
4aabdb9b
DM
3291 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3292 regexp *re;
3293 re_cc_state state;
3294 int toggleutf;
8e5e9ebe 3295
4aabdb9b 3296 {
4f639d21
DM
3297 /* extract RE object from returned value; compiling if
3298 * necessary */
3299
6136c704 3300 MAGIC *mg = NULL;
4aabdb9b 3301 SV *sv;
faf82a0b
AE
3302 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3303 mg = mg_find(sv, PERL_MAGIC_qr);
3304 else if (SvSMAGICAL(ret)) {
3305 if (SvGMAGICAL(ret))
3306 sv_unmagic(ret, PERL_MAGIC_qr);
3307 else
3308 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3309 }
faf82a0b 3310
0f5d15d6
IZ
3311 if (mg) {
3312 re = (regexp *)mg->mg_obj;
df0003d4 3313 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3314 }
3315 else {
3316 STRLEN len;
6136c704 3317 const char * const t = SvPV_const(ret, len);
0f5d15d6 3318 PMOP pm;
a3b680e6 3319 const I32 osize = PL_regsize;
0f5d15d6 3320
5fcd1c1b 3321 Zero(&pm, 1, PMOP);
4aabdb9b 3322 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3323 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3324 if (!(SvFLAGS(ret)
faf82a0b
AE
3325 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3326 | SVs_GMG)))
14befaf4
DM
3327 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3328 PERL_MAGIC_qr,0,0);
0f5d15d6 3329 PL_regsize = osize;
0f5d15d6 3330 }
4aabdb9b
DM
3331 }
3332 DEBUG_EXECUTE_r(
3333 PerlIO_printf(Perl_debug_log,
3334 "Entering embedded \"%s%.60s%s%s\"\n",
3335 PL_colors[0],
3336 re->precomp,
3337 PL_colors[1],
3338 (strlen(re->precomp) > 60 ? "..." : ""))
3339 );
3340 state.node = next;
3341 state.prev = PL_reg_call_cc;
3342 state.cc = st->cc;
3343 state.re = PL_reg_re;
2c2d71f5 3344
4aabdb9b
DM
3345 st->cc = 0;
3346
3347 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3348 REGCP_SET(st->u.eval.lastcp);
4f639d21 3349 PL_reg_re = re;
4aabdb9b
DM
3350 state.ss = PL_savestack_ix;
3351 *PL_reglastparen = 0;
3352 *PL_reglastcloseparen = 0;
3353 PL_reg_call_cc = &state;
3354 PL_reginput = locinput;
3355 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3356 ((re->reganch & ROPT_UTF8) != 0);
3357 if (toggleutf) PL_reg_flags ^= RF_utf8;
3358
3359 /* XXXX This is too dramatic a measure... */
3360 PL_reg_maxiter = 0;
3361
3362 /* XXX the only recursion left in regmatch() */
4f639d21 3363 if (regmatch(re, re->program + 1)) {
4aabdb9b
DM
3364 /* Even though we succeeded, we need to restore
3365 global variables, since we may be wrapped inside
3366 SUSPEND, thus the match may be not finished yet. */
3367
3368 /* XXXX Do this only if SUSPENDed? */
0f5d15d6 3369 PL_reg_call_cc = state.prev;
5d9a96ca 3370 st->cc = state.cc;
0f5d15d6 3371 PL_reg_re = state.re;
4f639d21 3372
cb50f42d 3373 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3374
3375 /* XXXX This is too dramatic a measure... */
3376 PL_reg_maxiter = 0;
3377
4aabdb9b
DM
3378 /* These are needed even if not SUSPEND. */
3379 ReREFCNT_dec(re);
3380 regcpblow(st->u.eval.cp);
3381 sayYES;
0f5d15d6 3382 }
4aabdb9b
DM
3383 ReREFCNT_dec(re);
3384 REGCP_UNWIND(st->u.eval.lastcp);
4f639d21 3385 regcppop(rex);
4aabdb9b
DM
3386 PL_reg_call_cc = state.prev;
3387 st->cc = state.cc;
3388 PL_reg_re = state.re;
4aabdb9b
DM
3389 if (toggleutf) PL_reg_flags ^= RF_utf8;
3390
3391 /* XXXX This is too dramatic a measure... */
3392 PL_reg_maxiter = 0;
3393
5d9a96ca 3394 st->logical = 0;
4aabdb9b
DM
3395 sayNO;
3396 /* NOTREACHED */
a0ed51b3 3397 }
4aabdb9b
DM
3398 /* /(?(?{...})X|Y)/ */
3399 st->sw = SvTRUE(ret);
3400 st->logical = 0;
c277df42
IZ
3401 break;
3402 }
a0d0e21e 3403 case OPEN:
c277df42 3404 n = ARG(scan); /* which paren pair */
3280af22
NIS
3405 PL_reg_start_tmp[n] = locinput;
3406 if (n > PL_regsize)
3407 PL_regsize = n;
a0d0e21e
LW
3408 break;
3409 case CLOSE:
c277df42 3410 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3411 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3412 PL_regendp[n] = locinput - PL_bostr;
eb160463 3413 if (n > (I32)*PL_reglastparen)
3280af22 3414 *PL_reglastparen = n;
a01268b5 3415 *PL_reglastcloseparen = n;
a0d0e21e 3416 break;
c277df42
IZ
3417 case GROUPP:
3418 n = ARG(scan); /* which paren pair */
5d9a96ca 3419 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3420 break;
3421 case IFTHEN:
2c2d71f5 3422 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3423 if (st->sw)
c277df42
IZ
3424 next = NEXTOPER(NEXTOPER(scan));
3425 else {
3426 next = scan + ARG(scan);
3427 if (OP(next) == IFTHEN) /* Fake one. */
3428 next = NEXTOPER(NEXTOPER(next));
3429 }
3430 break;
3431 case LOGICAL:
5d9a96ca 3432 st->logical = scan->flags;
c277df42 3433 break;
2ab05381 3434/*******************************************************************
a0374537
DM
3435 cc points to the regmatch_state associated with the most recent CURLYX.
3436 This struct contains info about the innermost (...)* loop (an
3437 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3438
3439 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3440
95b24440 3441 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3442
a0374537 3443 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3444 with the starting point at WHILEM node;
2ab05381
IZ
3445
3446 3) Each hit of WHILEM node tries to match A and Z (in the order
3447 depending on the current iteration, min/max of {min,max} and
3448 greediness). The information about where are nodes for "A"
a0374537 3449 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3450 was already matched, and greediness.
3451
3452 4) After A matches, the same WHILEM node is hit again.
3453
95b24440 3454 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3455 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3456 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3457 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3458 of the external loop.
3459
a0374537 3460 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3461 and whatever it mentions via ->next, and additional attached trees
3462 corresponding to temporarily unset infoblocks as in "5" above.
3463
95b24440 3464 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3465 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3466 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3467 infoblocks are drawn below the "reset" infoblock.
3468
3469 In fact in the picture below we do not show failed matches for Z and T
3470 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3471 more obvious *why* one needs to *temporary* unset infoblocks.]
3472
3473 Matched REx position InfoBlocks Comment
3474 (Y(A)*?Z)*?T x
3475 Y(A)*?Z)*?T x <- O
3476 Y (A)*?Z)*?T x <- O
3477 Y A)*?Z)*?T x <- O <- I
3478 YA )*?Z)*?T x <- O <- I
3479 YA A)*?Z)*?T x <- O <- I
3480 YAA )*?Z)*?T x <- O <- I
3481 YAA Z)*?T x <- O # Temporary unset I
3482 I
3483
3484 YAAZ Y(A)*?Z)*?T x <- O
3485 I
3486
3487 YAAZY (A)*?Z)*?T x <- O
3488 I
3489
3490 YAAZY A)*?Z)*?T x <- O <- I
3491 I
3492
3493 YAAZYA )*?Z)*?T x <- O <- I
3494 I
3495
3496 YAAZYA Z)*?T x <- O # Temporary unset I
3497 I,I
3498
3499 YAAZYAZ )*?T x <- O
3500 I,I
3501
3502 YAAZYAZ T x # Temporary unset O
3503 O
3504 I,I
3505
3506 YAAZYAZT x
3507 O
3508 I,I
3509 *******************************************************************/
95b24440 3510
a0d0e21e 3511 case CURLYX: {
cb434fcc
IZ
3512 /* No need to save/restore up to this paren */
3513 I32 parenfloor = scan->flags;
c277df42
IZ
3514
3515 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3516 next += ARG(next);
cb434fcc
IZ
3517 /* XXXX Probably it is better to teach regpush to support
3518 parenfloor > PL_regsize... */
eb160463 3519 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3520 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3521
d8319b27
DM
3522 st->u.curlyx.cp = PL_savestack_ix;
3523 st->u.curlyx.outercc = st->cc;
a0374537
DM
3524 st->cc = st;
3525 /* these fields contain the state of the current curly.
3526 * they are accessed by subsequent WHILEMs;
3527 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3528 st->u.curlyx.parenfloor = parenfloor;
3529 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3530 st->u.curlyx.min = ARG1(scan);
3531 st->u.curlyx.max = ARG2(scan);
3532 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3533 st->u.curlyx.lastloc = 0;
a0374537
DM
3534 /* st->next and st->minmod are also read by WHILEM */
3535
3280af22 3536 PL_reginput = locinput;
95b24440
DM
3537 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3538 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3539 regcpblow(st->u.curlyx.cp);
3540 st->cc = st->u.curlyx.outercc;
95b24440 3541 saySAME(result);
a0d0e21e 3542 }
5f66b61c 3543 /* NOTREACHED */
a0d0e21e
LW
3544 case WHILEM: {
3545 /*
3546 * This is really hard to understand, because after we match
3547 * what we're trying to match, we must make sure the rest of
2c2d71f5 3548 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3549 * to go back UP the parse tree by recursing ever deeper. And
3550 * if it fails, we have to reset our parent's current state
3551 * that we can try again after backing off.
3552 */
3553
d8319b27
DM
3554 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3555 st->u.whilem.cache_offset = 0;
3556 st->u.whilem.cache_bit = 0;
c277df42 3557
d8319b27 3558 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3559 PL_reginput = locinput;
a0d0e21e 3560
a3621e74 3561 DEBUG_EXECUTE_r(
9041c2e3 3562 PerlIO_printf(Perl_debug_log,
91f3b821 3563 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3564 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3565 (long)n, (long)st->cc->u.curlyx.min,
3566 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3567 );
4633a7c4 3568
a0d0e21e
LW
3569 /* If degenerate scan matches "", assume scan done. */
3570
d8319b27
DM
3571 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3572 st->u.whilem.savecc = st->cc;
3573 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3574 if (st->cc)
d8319b27 3575 st->ln = st->cc->u.curlyx.cur;
a3621e74 3576 DEBUG_EXECUTE_r(
c3464db5
DD
3577 PerlIO_printf(Perl_debug_log,
3578 "%*s empty match detected, try continuation...\n",
3280af22 3579 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3580 );
d8319b27 3581 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3582 /*** all unsaved local vars undefined at this point */
d8319b27 3583 st->cc = st->u.whilem.savecc;
95b24440 3584 if (result)
4633a7c4 3585 sayYES;
d8319b27
DM
3586 if (st->cc->u.curlyx.outercc)
3587 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3588 sayNO;
a0d0e21e
LW
3589 }
3590
3591 /* First just match a string of min scans. */
3592
d8319b27
DM
3593 if (n < st->cc->u.curlyx.min) {
3594 st->cc->u.curlyx.cur = n;
3595 st->cc->u.curlyx.lastloc = locinput;
3596 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3597 /*** all unsaved local vars undefined at this point */
3598 if (result)
4633a7c4 3599 sayYES;
d8319b27
DM
3600 st->cc->u.curlyx.cur = n - 1;
3601 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3602 sayNO;
a0d0e21e
LW
3603 }
3604
2c2d71f5
JH
3605 if (scan->flags) {
3606 /* Check whether we already were at this position.
3607 Postpone detection until we know the match is not
3608 *that* much linear. */
3609 if (!PL_reg_maxiter) {
3610 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3611 PL_reg_leftiter = PL_reg_maxiter;
3612 }
3613 if (PL_reg_leftiter-- == 0) {
a3b680e6 3614 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3615 if (PL_reg_poscache) {
eb160463 3616 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3617 Renew(PL_reg_poscache, size, char);
3618 PL_reg_poscache_size = size;
3619 }
3620 Zero(PL_reg_poscache, size, char);
3621 }
3622 else {
3623 PL_reg_poscache_size = size;
a02a5408 3624 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3625 }
a3621e74 3626 DEBUG_EXECUTE_r(
2c2d71f5
JH
3627 PerlIO_printf(Perl_debug_log,
3628 "%sDetected a super-linear match, switching on caching%s...\n",
3629 PL_colors[4], PL_colors[5])
3630 );
3631 }
3632 if (PL_reg_leftiter < 0) {
d8319b27 3633 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3634
d8319b27
DM
3635 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3636 + st->u.whilem.cache_offset * (scan->flags>>4);
3637 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3638 st->u.whilem.cache_offset /= 8;
3639 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3640 DEBUG_EXECUTE_r(
2c2d71f5
JH
3641 PerlIO_printf(Perl_debug_log,
3642 "%*s already tried at this position...\n",
3643 REPORT_CODE_OFF+PL_regindent*2, "")
3644 );
3ab3c9b4
HS
3645 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3646 /* cache records success */
c2b0868c
HS
3647 sayYES;
3648 else
3ab3c9b4 3649 /* cache records failure */
c2b0868c 3650 sayNO_SILENT;
2c2d71f5 3651 }
d8319b27 3652 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
2c2d71f5
JH
3653 }
3654 }
3655
a0d0e21e
LW
3656 /* Prefer next over scan for minimal matching. */
3657
5d9a96ca 3658 if (st->cc->minmod) {
d8319b27
DM
3659 st->u.whilem.savecc = st->cc;
3660 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3661 if (st->cc)
d8319b27
DM
3662 st->ln = st->cc->u.curlyx.cur;
3663 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3664 REGCP_SET(st->u.whilem.lastcp);
3665 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3666 /*** all unsaved local vars undefined at this point */
d8319b27 3667 st->cc = st->u.whilem.savecc;
95b24440 3668 if (result) {
d8319b27 3669 regcpblow(st->u.whilem.cp);
3ab3c9b4 3670 CACHEsayYES; /* All done. */
5f05dabc 3671 }
d8319b27 3672 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3673 regcppop(rex);
d8319b27
DM
3674 if (st->cc->u.curlyx.outercc)
3675 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3676
d8319b27 3677 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3678 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3679 && !(PL_reg_flags & RF_warned)) {
3680 PL_reg_flags |= RF_warned;
9014280d 3681 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3682 "Complex regular subexpression recursion",
3683 REG_INFTY - 1);
c277df42 3684 }
3ab3c9b4 3685 CACHEsayNO;
c277df42 3686 }
a687059c 3687
a3621e74 3688 DEBUG_EXECUTE_r(
c3464db5
DD
3689 PerlIO_printf(Perl_debug_log,
3690 "%*s trying longer...\n",
3280af22 3691 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3692 );
a0d0e21e 3693 /* Try scanning more and see if it helps. */
3280af22 3694 PL_reginput = locinput;
d8319b27
DM
3695 st->cc->u.curlyx.cur = n;
3696 st->cc->u.curlyx.lastloc = locinput;
3697 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3698 REGCP_SET(st->u.whilem.lastcp);
3699 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3700 /*** all unsaved local vars undefined at this point */
3701 if (result) {
d8319b27 3702 regcpblow(st->u.whilem.cp);
3ab3c9b4 3703 CACHEsayYES;
5f05dabc 3704 }
d8319b27 3705 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3706 regcppop(rex);
d8319b27
DM
3707 st->cc->u.curlyx.cur = n - 1;
3708 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3709 CACHEsayNO;
a0d0e21e
LW
3710 }
3711
3712 /* Prefer scan over next for maximal matching. */
3713
d8319b27
DM
3714 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3715 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3716 st->cc->u.curlyx.cur = n;
3717 st->cc->u.curlyx.lastloc = locinput;
3718 REGCP_SET(st->u.whilem.lastcp);
3719 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3720 /*** all unsaved local vars undefined at this point */
3721 if (result) {
d8319b27 3722 regcpblow(st->u.whilem.cp);
3ab3c9b4 3723 CACHEsayYES;
5f05dabc 3724 }
d8319b27 3725 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3726 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3727 PL_reginput = locinput;
a3621e74 3728 DEBUG_EXECUTE_r(
c3464db5
DD
3729 PerlIO_printf(Perl_debug_log,
3730 "%*s failed, try continuation...\n",
3280af22 3731 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3732 );
3733 }
9041c2e3 3734 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3735 && !(PL_reg_flags & RF_warned)) {
3280af22 3736 PL_reg_flags |= RF_warned;
9014280d 3737 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3738 "Complex regular subexpression recursion",
3739 REG_INFTY - 1);
a0d0e21e
LW
3740 }
3741
3742 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3743 st->u.whilem.savecc = st->cc;
3744 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3745 if (st->cc)
d8319b27
DM
3746 st->ln = st->cc->u.curlyx.cur;
3747 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3748 /*** all unsaved local vars undefined at this point */
d8319b27 3749 st->cc = st->u.whilem.savecc;
95b24440 3750 if (result)
3ab3c9b4 3751 CACHEsayYES;
d8319b27
DM
3752 if (st->cc->u.curlyx.outercc)
3753 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3754 st->cc->u.curlyx.cur = n - 1;
3755 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3756 CACHEsayNO;
a0d0e21e 3757 }
5f66b61c 3758 /* NOTREACHED */
9041c2e3 3759 case BRANCHJ:
c277df42
IZ
3760 next = scan + ARG(scan);
3761 if (next == scan)
3762 next = NULL;
3763 inner = NEXTOPER(NEXTOPER(scan));
3764 goto do_branch;
9041c2e3 3765 case BRANCH:
c277df42
IZ
3766 inner = NEXTOPER(scan);
3767 do_branch:
3768 {
e822a8b4
DM
3769 I32 type;
3770 type = OP(scan);
3771 if (OP(next) != type) /* No choice. */
c277df42 3772 next = inner; /* Avoid recursion. */
a0d0e21e 3773 else {
a3b680e6 3774 const I32 lastparen = *PL_reglastparen;
02db2b7b 3775 /* Put unwinding data on stack */
6136c704
AL
3776 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3777 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3778
5d9a96ca
DM
3779 uw->prev = st->unwind;
3780 st->unwind = unwind1;
e822a8b4 3781 uw->type = ((type == BRANCH)
02db2b7b
IZ
3782 ? RE_UNWIND_BRANCH
3783 : RE_UNWIND_BRANCHJ);
3784 uw->lastparen = lastparen;
3785 uw->next = next;
3786 uw->locinput = locinput;
3787 uw->nextchr = nextchr;
3788#ifdef DEBUGGING
3789 uw->regindent = ++PL_regindent;
3790#endif
c277df42 3791
02db2b7b
IZ
3792 REGCP_SET(uw->lastcp);
3793
3794 /* Now go into the first branch */
3795 next = inner;
a687059c 3796 }
a0d0e21e
LW
3797 }
3798 break;
3799 case MINMOD:
5d9a96ca 3800 st->minmod = 1;
a0d0e21e 3801 break;
c277df42
IZ
3802 case CURLYM:
3803 {
d8319b27 3804 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3805
c277df42 3806 /* We suppose that the next guy does not need
0e788c72 3807 backtracking: in particular, it is of constant non-zero length,
c277df42 3808 and has no parenths to influence future backrefs. */
5d9a96ca 3809 st->ln = ARG1(scan); /* min to match */
c277df42 3810 n = ARG2(scan); /* max to match */
d8319b27
DM
3811 st->u.curlym.paren = scan->flags;
3812 if (st->u.curlym.paren) {
3813 if (st->u.curlym.paren > PL_regsize)
3814 PL_regsize = st->u.curlym.paren;
3815 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3816 *PL_reglastparen = st->u.curlym.paren;
c277df42 3817 }
dc45a647 3818 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3819 if (st->u.curlym.paren)
c277df42 3820 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3821 PL_reginput = locinput;
d8319b27
DM
3822 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
3823 if (st->u.curlym.maxwanted) {
3824 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
95b24440
DM
3825 REGMATCH(scan, CURLYM1);
3826 /*** all unsaved local vars undefined at this point */
3827 if (!result)
6407bf3b 3828 break;
d8319b27
DM
3829 /* on first match, determine length, u.curlym.l */
3830 if (!st->u.curlym.matches++) {
6407bf3b
DM
3831 if (PL_reg_match_utf8) {
3832 char *s = locinput;
3833 while (s < PL_reginput) {
d8319b27 3834 st->u.curlym.l++;
6407bf3b
DM
3835 s += UTF8SKIP(s);
3836 }
3837 }
3838 else {
d8319b27 3839 st->u.curlym.l = PL_reginput - locinput;
6407bf3b 3840 }
d8319b27
DM
3841 if (st->u.curlym.l == 0) {
3842 st->u.curlym.matches = st->u.curlym.maxwanted;
6407bf3b
DM
3843 break;
3844 }
3845 }
3846 locinput = PL_reginput;
3847 }
3848 }
3849
3850 PL_reginput = locinput;
3851
5d9a96ca
DM
3852 if (st->minmod) {
3853 st->minmod = 0;
d8319b27 3854 if (st->ln && st->u.curlym.matches < st->ln)
c277df42 3855 sayNO;
cca55fe3 3856 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3857 regnode *text_node = next;
3858
cca55fe3 3859 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3860
d8319b27 3861 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3862 else {
cca55fe3 3863 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3864 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3865 goto assume_ok_MM;
cca55fe3 3866 }
d8319b27 3867 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
af5decee 3868 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3869 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3870 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3871 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3872 else
d8319b27 3873 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3874 }
a0ed51b3
LW
3875 }
3876 else
d8319b27 3877 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
cca55fe3 3878 assume_ok_MM:
d8319b27 3879 REGCP_SET(st->u.curlym.lastcp);
5d9a96ca 3880 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3881 /* If it could work, try it. */
d8319b27
DM
3882 if (st->u.curlym.c1 == -1000 ||
3883 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3884 UCHARAT(PL_reginput) == st->u.curlym.c2)
c277df42 3885 {
d8319b27 3886 if (st->u.curlym.paren) {
5d9a96ca 3887 if (st->ln) {
d8319b27
DM
3888 PL_regstartp[st->u.curlym.paren] =
3889 HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3890 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3891 }
3892 else
d8319b27 3893 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3894 }
95b24440
DM
3895 REGMATCH(next, CURLYM2);
3896 /*** all unsaved local vars undefined at this point */
3897 if (result)
c277df42 3898 sayYES;
d8319b27 3899 REGCP_UNWIND(st->u.curlym.lastcp);
c277df42
IZ
3900 }
3901 /* Couldn't or didn't -- move forward. */
3280af22 3902 PL_reginput = locinput;
95b24440
DM
3903 REGMATCH(scan, CURLYM3);
3904 /*** all unsaved local vars undefined at this point */
3905 if (result) {
5d9a96ca 3906 st->ln++;
3280af22 3907 locinput = PL_reginput;
c277df42
IZ
3908 }
3909 else
3910 sayNO;
3911 }
a0ed51b3
LW
3912 }
3913 else {
a3621e74 3914 DEBUG_EXECUTE_r(
5c0ca799 3915 PerlIO_printf(Perl_debug_log,
95b24440
DM
3916 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3917 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
d8319b27 3918 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
c277df42 3919 );
d8319b27 3920 if (st->u.curlym.matches >= st->ln) {
cca55fe3 3921 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3922 regnode *text_node = next;
3923
cca55fe3 3924 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3925
d8319b27 3926 if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
5f80c4cf 3927 else {
cca55fe3 3928 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 3929 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
44a68960 3930 goto assume_ok_REG;
cca55fe3 3931 }
d8319b27 3932 else { st->u.curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3933
af5decee 3934 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 3935 st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
af5decee 3936 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 3937 st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
5f80c4cf 3938 else
d8319b27 3939 st->u.curlym.c2 = st->u.curlym.c1;
5f80c4cf 3940 }
a0ed51b3
LW
3941 }
3942 else
d8319b27 3943 st->u.curlym.c1 = st->u.curlym.c2 = -1000;
c277df42 3944 }
cca55fe3 3945 assume_ok_REG:
d8319b27
DM
3946 REGCP_SET(st->u.curlym.lastcp);
3947 while (st->u.curlym.matches >= st->ln) {
c277df42 3948 /* If it could work, try it. */
d8319b27
DM
3949 if (st->u.curlym.c1 == -1000 ||
3950 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3951 UCHARAT(PL_reginput) == st->u.curlym.c2)
a0ed51b3 3952 {
a3621e74 3953 DEBUG_EXECUTE_r(
6407bf3b
DM
3954 PerlIO_printf(Perl_debug_log,
3955 "%*s trying tail with matches=%"IVdf"...\n",
3956 (int)(REPORT_CODE_OFF+PL_regindent*2),
d8319b27 3957 "", (IV)st->u.curlym.matches)
a0ed51b3 3958 );
d8319b27
DM
3959 if (st->u.curlym.paren) {
3960 if (st->u.curlym.matches) {
3961 PL_regstartp[st->u.curlym.paren]
3962 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3963 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
c277df42 3964 }
a0ed51b3 3965 else
d8319b27 3966 PL_regendp[st->u.curlym.paren] = -1;
c277df42 3967 }
95b24440
DM
3968 REGMATCH(next, CURLYM4);
3969 /*** all unsaved local vars undefined at this point */
3970 if (result)
a0ed51b3 3971 sayYES;
d8319b27 3972 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3973 }
c277df42 3974 /* Couldn't or didn't -- back up. */
d8319b27
DM
3975 st->u.curlym.matches--;
3976 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3977 PL_reginput = locinput;
c277df42
IZ
3978 }
3979 }
3980 sayNO;
5f66b61c 3981 /* NOTREACHED */
c277df42
IZ
3982 break;
3983 }
3984 case CURLYN:
d8319b27
DM
3985 st->u.plus.paren = scan->flags; /* Which paren to set */
3986 if (st->u.plus.paren > PL_regsize)
3987 PL_regsize = st->u.plus.paren;
3988 if (st->u.plus.paren > (I32)*PL_reglastparen)
3989 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 3990 st->ln = ARG1(scan); /* min to match */
c277df42 3991 n = ARG2(scan); /* max to match */
dc45a647 3992 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3993 goto repeat;
a0d0e21e 3994 case CURLY:
d8319b27 3995 st->u.plus.paren = 0;
5d9a96ca 3996 st->ln = ARG1(scan); /* min to match */
a0d0e21e 3997 n = ARG2(scan); /* max to match */
dc45a647 3998 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3999 goto repeat;
4000 case STAR:
5d9a96ca 4001 st->ln = 0;
c277df42 4002 n = REG_INFTY;
a0d0e21e 4003 scan = NEXTOPER(scan);
d8319b27 4004 st->u.plus.paren = 0;
a0d0e21e
LW
4005 goto repeat;
4006 case PLUS:
5d9a96ca 4007 st->ln = 1;
c277df42
IZ
4008 n = REG_INFTY;
4009 scan = NEXTOPER(scan);
d8319b27 4010 st->u.plus.paren = 0;
c277df42 4011 repeat:
a0d0e21e
LW
4012 /*
4013 * Lookahead to avoid useless match attempts
4014 * when we know what character comes next.
4015 */
5f80c4cf
JP
4016
4017 /*
4018 * Used to only do .*x and .*?x, but now it allows
4019 * for )'s, ('s and (?{ ... })'s to be in the way
4020 * of the quantifier and the EXACT-like node. -- japhy
4021 */
4022
cca55fe3 4023 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4024 U8 *s;
4025 regnode *text_node = next;
4026
cca55fe3 4027 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4028
d8319b27 4029 if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
5f80c4cf 4030 else {
cca55fe3 4031 if (PL_regkind[(U8)OP(text_node)] == REF) {
d8319b27 4032 st->u.plus.c1 = st->u.plus.c2 = -1000;
44a68960 4033 goto assume_ok_easy;
cca55fe3
JP
4034 }
4035 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4036
4037 if (!UTF) {
d8319b27 4038 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4039 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4040 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4041 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4042 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4043 }
5f80c4cf 4044 else { /* UTF */
f65d3ee7 4045 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4046 STRLEN ulen1, ulen2;
89ebb4a3
JH
4047 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4048 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4049
4050 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4051 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4052
d8319b27 4053 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4054 uniflags);
d8319b27 4055 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4056 uniflags);
5f80c4cf
JP
4057 }
4058 else {
d8319b27 4059 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4060 uniflags);
5f80c4cf 4061 }
1aa99e6b
IH
4062 }
4063 }
bbce6d69 4064 }
a0d0e21e 4065 else
d8319b27 4066 st->u.plus.c1 = st->u.plus.c2 = -1000;
cca55fe3 4067 assume_ok_easy:
3280af22 4068 PL_reginput = locinput;
5d9a96ca
DM
4069 if (st->minmod) {
4070 st->minmod = 0;
4071 if (st->ln && regrepeat(scan, st->ln) < st->ln)
4633a7c4 4072 sayNO;
a0ed51b3 4073 locinput = PL_reginput;
d8319b27
DM
4074 REGCP_SET(st->u.plus.lastcp);
4075 if (st->u.plus.c1 != -1000) {
4076 st->u.plus.old = locinput;
4077 st->u.plus.count = 0;
0fe9bf95 4078
1aa99e6b 4079 if (n == REG_INFTY) {
d8319b27 4080 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4081 if (do_utf8)
d8319b27
DM
4082 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4083 st->u.plus.e--;
1aa99e6b
IH
4084 }
4085 else if (do_utf8) {
5d9a96ca 4086 int m = n - st->ln;
d8319b27
DM
4087 for (st->u.plus.e = locinput;
4088 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4089 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4090 }
4091 else {
d8319b27
DM
4092 st->u.plus.e = locinput + n - st->ln;
4093 if (st->u.plus.e >= PL_regeol)
4094 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4095 }
0fe9bf95
IZ
4096 while (1) {
4097 /* Find place 'next' could work */
1aa99e6b 4098 if (!do_utf8) {
d8319b27
DM
4099 if (st->u.plus.c1 == st->u.plus.c2) {
4100 while (locinput <= st->u.plus.e &&
4101 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4102 locinput++;
4103 } else {
d8319b27
DM
4104 while (locinput <= st->u.plus.e
4105 && UCHARAT(locinput) != st->u.plus.c1
4106 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4107 locinput++;
4108 }
d8319b27 4109 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4110 }
4111 else {
d8319b27 4112 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4113 STRLEN len;
872c91ae
JH
4114 /* count initialised to
4115 * utf8_distance(old, locinput) */
d8319b27 4116 while (locinput <= st->u.plus.e &&
872c91ae 4117 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4118 UTF8_MAXBYTES, &len,
d8319b27 4119 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4120 locinput += len;
d8319b27 4121 st->u.plus.count++;
b2f2f093 4122 }
1aa99e6b 4123 } else {
a3b680e6 4124 STRLEN len;
872c91ae
JH
4125 /* count initialised to
4126 * utf8_distance(old, locinput) */
d8319b27 4127 while (locinput <= st->u.plus.e) {
872c91ae 4128 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4129 UTF8_MAXBYTES, &len,
041457d9 4130 uniflags);
d8319b27 4131 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4132 break;
b2f2f093 4133 locinput += len;
d8319b27 4134 st->u.plus.count++;
1aa99e6b
IH
4135 }
4136 }
0fe9bf95 4137 }
d8319b27 4138 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4139 sayNO;
4140 /* PL_reginput == old now */
d8319b27 4141 if (locinput != st->u.plus.old) {
5d9a96ca 4142 st->ln = 1; /* Did some */
d8319b27 4143 if (regrepeat(scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4144 sayNO;
4145 }
4146 /* PL_reginput == locinput now */
d8319b27 4147 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4148 /*** all unsaved local vars undefined at this point */
0fe9bf95 4149 PL_reginput = locinput; /* Could be reset... */
d8319b27 4150 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4151 /* Couldn't or didn't -- move forward. */
d8319b27 4152 st->u.plus.old = locinput;
1aa99e6b
IH
4153 if (do_utf8)
4154 locinput += UTF8SKIP(locinput);
4155 else
4156 locinput++;
d8319b27 4157 st->u.plus.count = 1;
0fe9bf95
IZ
4158 }
4159 }
4160 else
5d9a96ca 4161 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4162 UV c;
d8319b27 4163 if (st->u.plus.c1 != -1000) {
1aa99e6b 4164 if (do_utf8)
872c91ae 4165 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4166 UTF8_MAXBYTES, 0,
041457d9 4167 uniflags);
1aa99e6b 4168 else
9041c2e3 4169 c = UCHARAT(PL_reginput);
2390ecbc 4170 /* If it could work, try it. */
d8319b27 4171 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4172 {
d8319b27 4173 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4174 /*** all unsaved local vars undefined at this point */
d8319b27 4175 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4176 }
1aa99e6b 4177 }
a0d0e21e 4178 /* If it could work, try it. */
d8319b27 4179 else if (st->u.plus.c1 == -1000)
bbce6d69 4180 {
d8319b27 4181 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4182 /*** all unsaved local vars undefined at this point */
d8319b27 4183 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4184 }
c277df42 4185 /* Couldn't or didn't -- move forward. */
a0ed51b3 4186 PL_reginput = locinput;
a0d0e21e 4187 if (regrepeat(scan, 1)) {
5d9a96ca 4188 st->ln++;
a0ed51b3
LW
4189 locinput = PL_reginput;
4190 }
4191 else
4633a7c4 4192 sayNO;
a0d0e21e
LW
4193 }
4194 }
4195 else {
4196 n = regrepeat(scan, n);
a0ed51b3 4197 locinput = PL_reginput;
5d9a96ca 4198 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4199 (OP(next) != MEOL ||
15272685
HS
4200 OP(next) == SEOL || OP(next) == EOS))
4201 {
5d9a96ca 4202 st->ln = n; /* why back off? */
1aeab75a
GS
4203 /* ...because $ and \Z can match before *and* after
4204 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4205 We should back off by one in this case. */
4206 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4207 st->ln--;
1aeab75a 4208 }
d8319b27 4209 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4210 {
8fa7f367 4211 UV c = 0;
5d9a96ca 4212 while (n >= st->ln) {
d8319b27 4213 if (st->u.plus.c1 != -1000) {
1aa99e6b 4214 if (do_utf8)
872c91ae 4215 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4216 UTF8_MAXBYTES, 0,
041457d9 4217 uniflags);
1aa99e6b 4218 else
9041c2e3 4219 c = UCHARAT(PL_reginput);
1aa99e6b 4220 }
c277df42 4221 /* If it could work, try it. */
d8319b27 4222 if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4223 {
d8319b27 4224 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4225 /*** all unsaved local vars undefined at this point */
d8319b27 4226 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4227 }
4228 /* Couldn't or didn't -- back up. */
4229 n--;
dfe13c55 4230 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4231 }
a0d0e21e
LW
4232 }
4233 }
4633a7c4 4234 sayNO;
c277df42 4235 break;
a0d0e21e 4236 case END:
0f5d15d6 4237 if (PL_reg_call_cc) {
d8319b27
DM
4238 st->u.end.cur_call_cc = PL_reg_call_cc;
4239 st->u.end.end_re = PL_reg_re;
6136c704
AL
4240
4241 /* Save *all* the positions. */
d8319b27
DM
4242 st->u.end.cp = regcppush(0);
4243 REGCP_SET(st->u.end.lastcp);
6136c704
AL
4244
4245 /* Restore parens of the caller. */
95b24440
DM
4246 {
4247 I32 tmp = PL_savestack_ix;
4248 PL_savestack_ix = PL_reg_call_cc->ss;
4f639d21 4249 regcppop(rex);
95b24440
DM
4250 PL_savestack_ix = tmp;
4251 }
6136c704
AL
4252
4253 /* Make position available to the callcc. */
4254 PL_reginput = locinput;
4255
4f639d21 4256 PL_reg_re = PL_reg_call_cc->re;
d8319b27 4257 st->u.end.savecc = st->cc;
5d9a96ca 4258 st->cc = PL_reg_call_cc->cc;
0f5d15d6 4259 PL_reg_call_cc = PL_reg_call_cc->prev;
d8319b27 4260 REGMATCH(st->u.end.cur_call_cc->node, END);
95b24440
DM
4261 /*** all unsaved local vars undefined at this point */
4262 if (result) {
d8319b27
DM
4263 PL_reg_call_cc = st->u.end.cur_call_cc;
4264 regcpblow(st->u.end.cp);
0f5d15d6
IZ
4265 sayYES;
4266 }
d8319b27 4267 REGCP_UNWIND(st->u.end.lastcp);
4f639d21 4268 regcppop(rex);
d8319b27
DM
4269 PL_reg_call_cc = st->u.end.cur_call_cc;
4270 st->cc = st->u.end.savecc;
4271 PL_reg_re = st->u.end.end_re;
0f5d15d6 4272
a3621e74 4273 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4274 PerlIO_printf(Perl_debug_log,
4275 "%*s continuation failed...\n",
4276 REPORT_CODE_OFF+PL_regindent*2, "")
4277 );
7821416a 4278 sayNO_SILENT;
0f5d15d6 4279 }
7821416a 4280 if (locinput < PL_regtill) {
a3621e74 4281 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4282 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4283 PL_colors[4],
4284 (long)(locinput - PL_reg_starttry),
4285 (long)(PL_regtill - PL_reg_starttry),
4286 PL_colors[5]));
4287 sayNO_FINAL; /* Cannot match: too short. */
4288 }
4289 PL_reginput = locinput; /* put where regtry can find it */
4290 sayYES_FINAL; /* Success! */
7e5428c5 4291 case SUCCEED:
3280af22 4292 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4293 sayYES_LOUD; /* Success! */
c277df42
IZ
4294 case SUSPEND:
4295 n = 1;
9fe1d20c 4296 PL_reginput = locinput;
9041c2e3 4297 goto do_ifmatch;
a0d0e21e 4298 case UNLESSM:
c277df42 4299 n = 0;
a0ed51b3 4300 if (scan->flags) {
52657f30 4301 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4302 if (!s)
4303 goto say_yes;
4304 PL_reginput = s;
a0ed51b3
LW
4305 }
4306 else
4307 PL_reginput = locinput;
c277df42
IZ
4308 goto do_ifmatch;
4309 case IFMATCH:
4310 n = 1;
a0ed51b3 4311 if (scan->flags) {
52657f30 4312 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4313 if (!s)
4314 goto say_no;
4315 PL_reginput = s;
a0ed51b3
LW
4316 }
4317 else
4318 PL_reginput = locinput;
4319
c277df42 4320 do_ifmatch:
95b24440
DM
4321 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4322 /*** all unsaved local vars undefined at this point */
4323 if (result != n) {
c277df42 4324 say_no:
5d9a96ca
DM
4325 if (st->logical) {
4326 st->logical = 0;
4327 st->sw = 0;
c277df42 4328 goto do_longjump;
a0ed51b3
LW
4329 }
4330 else
c277df42
IZ
4331 sayNO;
4332 }
4333 say_yes:
5d9a96ca
DM
4334 if (st->logical) {
4335 st->logical = 0;
4336 st->sw = 1;
c277df42 4337 }
fe44a5e8 4338 if (OP(scan) == SUSPEND) {
3280af22 4339 locinput = PL_reginput;
565764a8 4340 nextchr = UCHARAT(locinput);
fe44a5e8 4341 }
c277df42
IZ
4342 /* FALL THROUGH. */
4343 case LONGJMP:
4344 do_longjump:
4345 next = scan + ARG(scan);
4346 if (next == scan)
4347 next = NULL;
a0d0e21e
LW
4348 break;
4349 default:
b900a521 4350 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4351 PTR2UV(scan), OP(scan));
cea2e8a9 4352 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4353 }
95b24440 4354
02db2b7b 4355 reenter:
a0d0e21e 4356 scan = next;
95b24440
DM
4357 continue;
4358 /* NOTREACHED */
4359
4360 /* simulate recursively calling regmatch(), but without actually
4361 * recursing - ie save the current state on the heap rather than on
4362 * the stack, then re-enter the loop. This avoids complex regexes
4363 * blowing the processor stack */
4364
4365 start_recurse:
4366 {
5d9a96ca
DM
4367 /* push new state */
4368 regmatch_state *oldst = st;
4369
4370 depth++;
4371
4372 /* grab the next free state slot */
4373 st++;
4374 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4375 st = S_push_slab(aTHX);
4376 PL_regmatch_state = st;
4377
4378 oldst->next = next;
4379 oldst->n = n;
4380 oldst->locinput = locinput;
4381 oldst->reg_call_cc = PL_reg_call_cc;
4382
4383 st->cc = oldst->cc;
95b24440
DM
4384 locinput = PL_reginput;
4385 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4386 st->minmod = 0;
4387 st->sw = 0;
4388 st->logical = 0;
4389 st->unwind = 0;
95b24440
DM
4390#ifdef DEBUGGING
4391 PL_regindent++;
4392#endif
4393 }
a0d0e21e 4394 }
a687059c 4395
a0d0e21e
LW
4396 /*
4397 * We get here only if there's trouble -- normally "case END" is
4398 * the terminating point.
4399 */
cea2e8a9 4400 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4401 /*NOTREACHED*/
4633a7c4
LW
4402 sayNO;
4403
7821416a 4404yes_loud:
a3621e74 4405 DEBUG_EXECUTE_r(
7821416a
IZ
4406 PerlIO_printf(Perl_debug_log,
4407 "%*s %scould match...%s\n",
e4584336 4408 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4409 );
4410 goto yes;
4411yes_final:
a3621e74 4412 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4413 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4414yes:
4415#ifdef DEBUGGING
3280af22 4416 PL_regindent--;
4633a7c4 4417#endif
02db2b7b 4418
95b24440
DM
4419 result = 1;
4420 goto exit_level;
4633a7c4
LW
4421
4422no:
a3621e74 4423 DEBUG_EXECUTE_r(
7821416a
IZ
4424 PerlIO_printf(Perl_debug_log,
4425 "%*s %sfailed...%s\n",
e4584336 4426 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4427 );
4428 goto do_no;
4429no_final:
4430do_no:
5d9a96ca
DM
4431 if (st->unwind) {
4432 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4433
4434 switch (uw->type) {
4435 case RE_UNWIND_BRANCH:
4436 case RE_UNWIND_BRANCHJ:
4437 {
6136c704 4438 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4439 const I32 lastparen = uwb->lastparen;
9041c2e3 4440
02db2b7b
IZ
4441 REGCP_UNWIND(uwb->lastcp);
4442 for (n = *PL_reglastparen; n > lastparen; n--)
4443 PL_regendp[n] = -1;
4444 *PL_reglastparen = n;
4445 scan = next = uwb->next;
9041c2e3
NIS
4446 if ( !scan ||
4447 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4448 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4449 st->unwind = uwb->prev;
02db2b7b
IZ
4450#ifdef DEBUGGING
4451 PL_regindent--;
4452#endif
4453 goto do_no;
4454 }
4455 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4456 if ((n = (uwb->type == RE_UNWIND_BRANCH
4457 ? NEXT_OFF(next) : ARG(next))))
4458 next += n;
4459 else
4460 next = NULL; /* XXXX Needn't unwinding in this case... */
4461 uwb->next = next;
4462 next = NEXTOPER(scan);
4463 if (uwb->type == RE_UNWIND_BRANCHJ)
4464 next = NEXTOPER(next);
4465 locinput = uwb->locinput;
4466 nextchr = uwb->nextchr;
4467#ifdef DEBUGGING
4468 PL_regindent = uwb->regindent;
4469#endif
4470
4471 goto reenter;
4472 }
5f66b61c 4473 /* NOTREACHED */
02db2b7b
IZ
4474 default:
4475 Perl_croak(aTHX_ "regexp unwind memory corruption");
4476 }
5f66b61c 4477 /* NOTREACHED */
02db2b7b 4478 }
4633a7c4 4479#ifdef DEBUGGING
3280af22 4480 PL_regindent--;
4633a7c4 4481#endif
95b24440
DM
4482 result = 0;
4483exit_level:
5d9a96ca
DM
4484
4485 if (depth--) {
95b24440 4486 /* restore previous state and re-enter */
5d9a96ca
DM
4487 st--;
4488 if (st < &PL_regmatch_slab->states[0]) {
4489 PL_regmatch_slab = PL_regmatch_slab->prev;
4490 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
4491 }
4492 PL_regmatch_state = st;
95b24440 4493
5d9a96ca
DM
4494 PL_reg_call_cc = st->reg_call_cc;
4495 scan = st->scan;
4496 next = st->next;
4497 n = st->n;
4498 locinput = st->locinput;
95b24440 4499 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4500
4501 switch (st->resume_state) {
95b24440
DM
4502 case resume_TRIE1:
4503 goto resume_point_TRIE1;
4504 case resume_TRIE2:
4505 goto resume_point_TRIE2;
4506 case resume_CURLYX:
4507 goto resume_point_CURLYX;
4508 case resume_WHILEM1:
4509 goto resume_point_WHILEM1;
4510 case resume_WHILEM2:
4511 goto resume_point_WHILEM2;
4512 case resume_WHILEM3:
4513 goto resume_point_WHILEM3;
4514 case resume_WHILEM4:
4515 goto resume_point_WHILEM4;
4516 case resume_WHILEM5:
4517 goto resume_point_WHILEM5;
4518 case resume_WHILEM6:
4519 goto resume_point_WHILEM6;
4520 case resume_CURLYM1:
4521 goto resume_point_CURLYM1;
4522 case resume_CURLYM2:
4523 goto resume_point_CURLYM2;
4524 case resume_CURLYM3:
4525 goto resume_point_CURLYM3;
4526 case resume_CURLYM4:
4527 goto resume_point_CURLYM4;
4528 case resume_IFMATCH:
4529 goto resume_point_IFMATCH;
4530 case resume_PLUS1:
4531 goto resume_point_PLUS1;
4532 case resume_PLUS2:
4533 goto resume_point_PLUS2;
4534 case resume_PLUS3:
4535 goto resume_point_PLUS3;
4536 case resume_PLUS4:
4537 goto resume_point_PLUS4;
4538 case resume_END:
4539 goto resume_point_END;
4540 default:
4541 Perl_croak(aTHX_ "regexp resume memory corruption");
4542 }
4543 /* NOTREACHED */
4544 }
5d9a96ca
DM
4545 /* restore original high-water mark */
4546 PL_regmatch_slab = orig_slab;
4547 PL_regmatch_state = orig_state;
4548
4549 /* free all slabs above current one */
4550 if (orig_slab->next) {
ad65c075 4551 regmatch_slab *osl, *sl = orig_slab->next;
5d9a96ca
DM
4552 orig_slab->next = NULL;
4553 while (sl) {
ad65c075 4554 osl = sl;
5d9a96ca 4555 sl = sl->next;
ad65c075 4556 Safefree(osl);
5d9a96ca
DM
4557 }
4558 }
4559
95b24440
DM
4560 return result;
4561
a687059c
LW
4562}
4563
4564/*
4565 - regrepeat - repeatedly match something simple, report how many
4566 */
4567/*
4568 * [This routine now assumes that it will only match on things of length 1.
4569 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4570 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4571 */
76e3520e 4572STATIC I32
a3b680e6 4573S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4574{
27da23d5 4575 dVAR;
a0d0e21e 4576 register char *scan;
a0d0e21e 4577 register I32 c;
3280af22 4578 register char *loceol = PL_regeol;
a0ed51b3 4579 register I32 hardcount = 0;
53c4c00c 4580 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4581
3280af22 4582 scan = PL_reginput;
faf11cac
HS
4583 if (max == REG_INFTY)
4584 max = I32_MAX;
4585 else if (max < loceol - scan)
7f596f4c 4586 loceol = scan + max;
a0d0e21e 4587 switch (OP(p)) {
22c35a8c 4588 case REG_ANY:
1aa99e6b 4589 if (do_utf8) {
ffc61ed2 4590 loceol = PL_regeol;
1aa99e6b 4591 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4592 scan += UTF8SKIP(scan);
4593 hardcount++;
4594 }
4595 } else {
4596 while (scan < loceol && *scan != '\n')
4597 scan++;
a0ed51b3
LW
4598 }
4599 break;
ffc61ed2 4600 case SANY:
def8e4ea
JH
4601 if (do_utf8) {
4602 loceol = PL_regeol;
a0804c9e 4603 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4604 scan += UTF8SKIP(scan);
4605 hardcount++;
4606 }
4607 }
4608 else
4609 scan = loceol;
a0ed51b3 4610 break;
f33976b4
DB
4611 case CANY:
4612 scan = loceol;
4613 break;
090f7165
JH
4614 case EXACT: /* length of string is 1 */
4615 c = (U8)*STRING(p);
4616 while (scan < loceol && UCHARAT(scan) == c)
4617 scan++;
bbce6d69 4618 break;
4619 case EXACTF: /* length of string is 1 */
cd439c50 4620 c = (U8)*STRING(p);
bbce6d69 4621 while (scan < loceol &&
22c35a8c 4622 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4623 scan++;
4624 break;
4625 case EXACTFL: /* length of string is 1 */
3280af22 4626 PL_reg_flags |= RF_tainted;
cd439c50 4627 c = (U8)*STRING(p);
bbce6d69 4628 while (scan < loceol &&
22c35a8c 4629 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4630 scan++;
4631 break;
4632 case ANYOF:
ffc61ed2
JH
4633 if (do_utf8) {
4634 loceol = PL_regeol;
cfc92286
JH
4635 while (hardcount < max && scan < loceol &&
4636 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4637 scan += UTF8SKIP(scan);
4638 hardcount++;
4639 }
4640 } else {
7d3e948e 4641 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4642 scan++;
4643 }
a0d0e21e
LW
4644 break;
4645 case ALNUM:
1aa99e6b 4646 if (do_utf8) {
ffc61ed2 4647 loceol = PL_regeol;
1a4fad37 4648 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4649 while (hardcount < max && scan < loceol &&
3568d838 4650 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4651 scan += UTF8SKIP(scan);
4652 hardcount++;
4653 }
4654 } else {
4655 while (scan < loceol && isALNUM(*scan))
4656 scan++;
a0ed51b3
LW
4657 }
4658 break;
bbce6d69 4659 case ALNUML:
3280af22 4660 PL_reg_flags |= RF_tainted;
1aa99e6b 4661 if (do_utf8) {
ffc61ed2 4662 loceol = PL_regeol;
1aa99e6b
IH
4663 while (hardcount < max && scan < loceol &&
4664 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4665 scan += UTF8SKIP(scan);
4666 hardcount++;
4667 }
4668 } else {
4669 while (scan < loceol && isALNUM_LC(*scan))
4670 scan++;
a0ed51b3
LW
4671 }
4672 break;
a0d0e21e 4673 case NALNUM:
1aa99e6b 4674 if (do_utf8) {
ffc61ed2 4675 loceol = PL_regeol;
1a4fad37 4676 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4677 while (hardcount < max && scan < loceol &&
3568d838 4678 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4679 scan += UTF8SKIP(scan);
4680 hardcount++;
4681 }
4682 } else {
4683 while (scan < loceol && !isALNUM(*scan))
4684 scan++;
a0ed51b3
LW
4685 }
4686 break;
bbce6d69 4687 case NALNUML:
3280af22 4688 PL_reg_flags |= RF_tainted;
1aa99e6b 4689 if (do_utf8) {
ffc61ed2 4690 loceol = PL_regeol;
1aa99e6b
IH
4691 while (hardcount < max && scan < loceol &&
4692 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4693 scan += UTF8SKIP(scan);
4694 hardcount++;
4695 }
4696 } else {
4697 while (scan < loceol && !isALNUM_LC(*scan))
4698 scan++;
a0ed51b3
LW
4699 }
4700 break;
a0d0e21e 4701 case SPACE:
1aa99e6b 4702 if (do_utf8) {
ffc61ed2 4703 loceol = PL_regeol;
1a4fad37 4704 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4705 while (hardcount < max && scan < loceol &&
3568d838
JH
4706 (*scan == ' ' ||
4707 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4708 scan += UTF8SKIP(scan);
4709 hardcount++;
4710 }
4711 } else {
4712 while (scan < loceol && isSPACE(*scan))
4713 scan++;
a0ed51b3
LW
4714 }
4715 break;
bbce6d69 4716 case SPACEL:
3280af22 4717 PL_reg_flags |= RF_tainted;
1aa99e6b 4718 if (do_utf8) {
ffc61ed2 4719 loceol = PL_regeol;
1aa99e6b 4720 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4721 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4722 scan += UTF8SKIP(scan);
4723 hardcount++;
4724 }
4725 } else {
4726 while (scan < loceol && isSPACE_LC(*scan))
4727 scan++;
a0ed51b3
LW
4728 }
4729 break;
a0d0e21e 4730 case NSPACE:
1aa99e6b 4731 if (do_utf8) {
ffc61ed2 4732 loceol = PL_regeol;
1a4fad37 4733 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4734 while (hardcount < max && scan < loceol &&
3568d838
JH
4735 !(*scan == ' ' ||
4736 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4737 scan += UTF8SKIP(scan);
4738 hardcount++;
4739 }
4740 } else {
4741 while (scan < loceol && !isSPACE(*scan))
4742 scan++;
4743 break;
a0ed51b3 4744 }
bbce6d69 4745 case NSPACEL:
3280af22 4746 PL_reg_flags |= RF_tainted;
1aa99e6b 4747 if (do_utf8) {
ffc61ed2 4748 loceol = PL_regeol;
1aa99e6b 4749 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4750 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4751 scan += UTF8SKIP(scan);
4752 hardcount++;
4753 }
4754 } else {
4755 while (scan < loceol && !isSPACE_LC(*scan))
4756 scan++;
a0ed51b3
LW
4757 }
4758 break;
a0d0e21e 4759 case DIGIT:
1aa99e6b 4760 if (do_utf8) {
ffc61ed2 4761 loceol = PL_regeol;
1a4fad37 4762 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4763 while (hardcount < max && scan < loceol &&
3568d838 4764 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4765 scan += UTF8SKIP(scan);
4766 hardcount++;
4767 }
4768 } else {
4769 while (scan < loceol && isDIGIT(*scan))
4770 scan++;
a0ed51b3
LW
4771 }
4772 break;
a0d0e21e 4773 case NDIGIT:
1aa99e6b 4774 if (do_utf8) {
ffc61ed2 4775 loceol = PL_regeol;
1a4fad37 4776 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4777 while (hardcount < max && scan < loceol &&
3568d838 4778 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4779 scan += UTF8SKIP(scan);
4780 hardcount++;
4781 }
4782 } else {
4783 while (scan < loceol && !isDIGIT(*scan))
4784 scan++;
a0ed51b3
LW
4785 }
4786 break;
a0d0e21e
LW
4787 default: /* Called on something of 0 width. */
4788 break; /* So match right here or not at all. */
4789 }
a687059c 4790
a0ed51b3
LW
4791 if (hardcount)
4792 c = hardcount;
4793 else
4794 c = scan - PL_reginput;
3280af22 4795 PL_reginput = scan;
a687059c 4796
a3621e74 4797 DEBUG_r({
ab74612d 4798 SV *re_debug_flags = NULL;
6136c704 4799 SV * const prop = sv_newmortal();
a3621e74
YO
4800 GET_RE_DEBUG_FLAGS;
4801 DEBUG_EXECUTE_r({
c277df42 4802 regprop(prop, p);
9041c2e3
NIS
4803 PerlIO_printf(Perl_debug_log,
4804 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4805 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4806 });
a3621e74 4807 });
9041c2e3 4808
a0d0e21e 4809 return(c);
a687059c
LW
4810}
4811
c277df42
IZ
4812
4813/*
ffc61ed2
JH
4814- regclass_swash - prepare the utf8 swash
4815*/
4816
4817SV *
a3b680e6 4818Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4819{
97aff369 4820 dVAR;
9e55ce06
JH
4821 SV *sw = NULL;
4822 SV *si = NULL;
4823 SV *alt = NULL;
a8079bba 4824 const struct reg_data *data = PL_reg_re ? PL_reg_re->data : NULL;
ffc61ed2 4825
4f639d21 4826 if (data && data->count) {
a3b680e6 4827 const U32 n = ARG(node);
ffc61ed2 4828
4f639d21
DM
4829 if (data->what[n] == 's') {
4830 SV * const rv = (SV*)data->data[n];
890ce7af 4831 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4832 SV **const ary = AvARRAY(av);
9e55ce06 4833 SV **a, **b;
9041c2e3 4834
711a919c 4835 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4836 * documentation of these array elements. */
4837
b11f357e 4838 si = *ary;
8f7f7219 4839 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4840 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4841
ffc61ed2
JH
4842 if (a)
4843 sw = *a;
4844 else if (si && doinit) {
4845 sw = swash_init("utf8", "", si, 1, 0);
4846 (void)av_store(av, 1, sw);
4847 }
9e55ce06
JH
4848 if (b)
4849 alt = *b;
ffc61ed2
JH
4850 }
4851 }
4852
9e55ce06
JH
4853 if (listsvp)
4854 *listsvp = si;
4855 if (altsvp)
4856 *altsvp = alt;
ffc61ed2
JH
4857
4858 return sw;
4859}
4860
4861/*
ba7b4546 4862 - reginclass - determine if a character falls into a character class
832705d4
JH
4863
4864 The n is the ANYOF regnode, the p is the target string, lenp
4865 is pointer to the maximum length of how far to go in the p
4866 (if the lenp is zero, UTF8SKIP(p) is used),
4867 do_utf8 tells whether the target string is in UTF-8.
4868
bbce6d69 4869 */
4870
76e3520e 4871STATIC bool
a3b680e6 4872S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4873{
27da23d5 4874 dVAR;
a3b680e6 4875 const char flags = ANYOF_FLAGS(n);
bbce6d69 4876 bool match = FALSE;
cc07378b 4877 UV c = *p;
ae9ddab8 4878 STRLEN len = 0;
9e55ce06 4879 STRLEN plen;
1aa99e6b 4880
19f67299
TS
4881 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4882 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4883 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4884 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4885 if (len == (STRLEN)-1)
4886 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4887 }
bbce6d69 4888
0f0076b4 4889 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4890 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4891 if (lenp)
4892 *lenp = 0;
ffc61ed2 4893 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4894 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4895 match = TRUE;
bbce6d69 4896 }
3568d838 4897 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4898 match = TRUE;
ffc61ed2 4899 if (!match) {
9e55ce06 4900 AV *av;
890ce7af 4901 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4902
4903 if (sw) {
3568d838 4904 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4905 match = TRUE;
4906 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4907 if (!match && lenp && av) {
4908 I32 i;
9e55ce06 4909 for (i = 0; i <= av_len(av); i++) {
890ce7af 4910 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4911 STRLEN len;
890ce7af 4912 const char * const s = SvPV_const(sv, len);
9e55ce06 4913
061b10df 4914 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4915 *lenp = len;
4916 match = TRUE;
4917 break;
4918 }
4919 }
4920 }
4921 if (!match) {
89ebb4a3 4922 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4923 STRLEN tmplen;
4924
9e55ce06
JH
4925 to_utf8_fold(p, tmpbuf, &tmplen);
4926 if (swash_fetch(sw, tmpbuf, do_utf8))
4927 match = TRUE;
4928 }
ffc61ed2
JH
4929 }
4930 }
bbce6d69 4931 }
9e55ce06 4932 if (match && lenp && *lenp == 0)
0f0076b4 4933 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4934 }
1aa99e6b 4935 if (!match && c < 256) {
ffc61ed2
JH
4936 if (ANYOF_BITMAP_TEST(n, c))
4937 match = TRUE;
4938 else if (flags & ANYOF_FOLD) {
eb160463 4939 U8 f;
a0ed51b3 4940
ffc61ed2
JH
4941 if (flags & ANYOF_LOCALE) {
4942 PL_reg_flags |= RF_tainted;
4943 f = PL_fold_locale[c];
4944 }
4945 else
4946 f = PL_fold[c];
4947 if (f != c && ANYOF_BITMAP_TEST(n, f))
4948 match = TRUE;
4949 }
4950
4951 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4952 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4953 if (
4954 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4955 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4956 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4957 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4958 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4959 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4960 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4961 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4962 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4963 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4964 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4965 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4966 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4967 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4968 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4969 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4970 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4971 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4972 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4973 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4974 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4975 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4976 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4977 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4978 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4979 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4980 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4981 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4982 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4983 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4984 ) /* How's that for a conditional? */
4985 {
4986 match = TRUE;
4987 }
a0ed51b3 4988 }
a0ed51b3
LW
4989 }
4990
a0ed51b3
LW
4991 return (flags & ANYOF_INVERT) ? !match : match;
4992}
161b471a 4993
dfe13c55 4994STATIC U8 *
5f66b61c 4995S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 4996{
97aff369 4997 dVAR;
a0ed51b3 4998 if (off >= 0) {
1aa99e6b 4999 while (off-- && s < lim) {
ffc61ed2 5000 /* XXX could check well-formedness here */
a0ed51b3 5001 s += UTF8SKIP(s);
ffc61ed2 5002 }
a0ed51b3
LW
5003 }
5004 else {
5005 while (off++) {
1aa99e6b 5006 if (s > lim) {
a0ed51b3 5007 s--;
ffc61ed2 5008 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5009 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5010 s--;
ffc61ed2
JH
5011 }
5012 /* XXX could check well-formedness here */
a0ed51b3
LW
5013 }
5014 }
5015 }
5016 return s;
5017}
161b471a 5018
dfe13c55 5019STATIC U8 *
5f66b61c 5020S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5021{
97aff369 5022 dVAR;
a0ed51b3 5023 if (off >= 0) {
1aa99e6b 5024 while (off-- && s < lim) {
ffc61ed2 5025 /* XXX could check well-formedness here */
a0ed51b3 5026 s += UTF8SKIP(s);
ffc61ed2 5027 }
a0ed51b3
LW
5028 if (off >= 0)
5029 return 0;
5030 }
5031 else {
5032 while (off++) {
1aa99e6b 5033 if (s > lim) {
a0ed51b3 5034 s--;
ffc61ed2 5035 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5036 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5037 s--;
ffc61ed2
JH
5038 }
5039 /* XXX could check well-formedness here */
a0ed51b3
LW
5040 }
5041 else
5042 break;
5043 }
5044 if (off <= 0)
5045 return 0;
5046 }
5047 return s;
5048}
51371543 5049
51371543 5050static void
acfe0abc 5051restore_pos(pTHX_ void *arg)
51371543 5052{
97aff369 5053 dVAR;
097eb12c 5054 regexp * const rex = (regexp *)arg;
51371543
GS
5055 if (PL_reg_eval_set) {
5056 if (PL_reg_oldsaved) {
4f639d21
DM
5057 rex->subbeg = PL_reg_oldsaved;
5058 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5059#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5060 rex->saved_copy = PL_nrs;
ed252734 5061#endif
4f639d21 5062 RX_MATCH_COPIED_on(rex);
51371543
GS
5063 }
5064 PL_reg_magic->mg_len = PL_reg_oldpos;
5065 PL_reg_eval_set = 0;
5066 PL_curpm = PL_reg_oldcurpm;
5067 }
5068}
33b8afdf
JH
5069
5070STATIC void
5071S_to_utf8_substr(pTHX_ register regexp *prog)
5072{
33b8afdf 5073 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5074 SV* const sv = newSVsv(prog->float_substr);
5075 prog->float_utf8 = sv;
33b8afdf
JH
5076 sv_utf8_upgrade(sv);
5077 if (SvTAIL(prog->float_substr))
5078 SvTAIL_on(sv);
5079 if (prog->float_substr == prog->check_substr)
5080 prog->check_utf8 = sv;
5081 }
5082 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5083 SV* const sv = newSVsv(prog->anchored_substr);
5084 prog->anchored_utf8 = sv;
33b8afdf
JH
5085 sv_utf8_upgrade(sv);
5086 if (SvTAIL(prog->anchored_substr))
5087 SvTAIL_on(sv);
5088 if (prog->anchored_substr == prog->check_substr)
5089 prog->check_utf8 = sv;
5090 }
5091}
5092
5093STATIC void
5094S_to_byte_substr(pTHX_ register regexp *prog)
5095{
97aff369 5096 dVAR;
33b8afdf 5097 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5098 SV* sv = newSVsv(prog->float_utf8);
5099 prog->float_substr = sv;
33b8afdf
JH
5100 if (sv_utf8_downgrade(sv, TRUE)) {
5101 if (SvTAIL(prog->float_utf8))
5102 SvTAIL_on(sv);
5103 } else {
5104 SvREFCNT_dec(sv);
5105 prog->float_substr = sv = &PL_sv_undef;
5106 }
5107 if (prog->float_utf8 == prog->check_utf8)
5108 prog->check_substr = sv;
5109 }
5110 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5111 SV* sv = newSVsv(prog->anchored_utf8);
5112 prog->anchored_substr = sv;
33b8afdf
JH
5113 if (sv_utf8_downgrade(sv, TRUE)) {
5114 if (SvTAIL(prog->anchored_utf8))
5115 SvTAIL_on(sv);
5116 } else {
5117 SvREFCNT_dec(sv);
5118 prog->anchored_substr = sv = &PL_sv_undef;
5119 }
5120 if (prog->anchored_utf8 == prog->check_utf8)
5121 prog->check_substr = sv;
5122 }
5123}
66610fdd
RGS
5124
5125/*
5126 * Local variables:
5127 * c-indentation-style: bsd
5128 * c-basic-offset: 4
5129 * indent-tabs-mode: t
5130 * End:
5131 *
37442d52
RGS
5132 * ex: set ts=8 sts=4 sw=4 noet:
5133 */