This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove a redundant test on mg->mg_ptr. (It had to be non-NULL to get
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
dfe13c55
GS
119#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
121#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
123#define HOPc(pos,off) ((char*)HOP(pos,off))
124#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 125
efb30f32 126#define HOPBACK(pos, off) ( \
e54858b0 127 (PL_reg_match_utf8) \
efb30f32
HS
128 ? reghopmaybe((U8*)pos, -off) \
129 : (pos - off >= PL_bostr) \
130 ? (U8*)(pos - off) \
131 : (U8*)NULL \
132)
133#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134
1aa99e6b
IH
135#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
137#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
139#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141
1a4fad37
AL
142#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
143 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
144#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 148
5f80c4cf 149/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
150#define JUMPABLE(rn) ( \
151 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
152 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153 OP(rn) == PLUS || OP(rn) == MINMOD || \
154 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
155)
156
cca55fe3
JP
157#define HAS_TEXT(rn) ( \
158 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159)
e2d8ce26 160
a84d97b6
HS
161/*
162 Search for mandatory following text node; for lookahead, the text must
163 follow but for lookbehind (rn->flags != 0) we skip to the next step.
164*/
cca55fe3 165#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 166 while (JUMPABLE(rn)) \
a84d97b6 167 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 168 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
169 else if (OP(rn) == PLUS) \
170 rn = NEXTOPER(rn); \
a84d97b6
HS
171 else if (OP(rn) == IFMATCH) \
172 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 173 else rn += NEXT_OFF(rn); \
5f80c4cf 174} STMT_END
74750237 175
acfe0abc 176static void restore_pos(pTHX_ void *arg);
51371543 177
76e3520e 178STATIC CHECKPOINT
cea2e8a9 179S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 180{
97aff369 181 dVAR;
a3b680e6 182 const int retval = PL_savestack_ix;
b1ce53c5 183#define REGCP_PAREN_ELEMS 4
a3b680e6 184 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
185 int p;
186
e49a9654
IH
187 if (paren_elems_to_push < 0)
188 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
189
a01268b5 190#define REGCP_OTHER_ELEMS 6
4b3c1a47 191 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 192 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 193/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
194 SSPUSHINT(PL_regendp[p]);
195 SSPUSHINT(PL_regstartp[p]);
3280af22 196 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
197 SSPUSHINT(p);
198 }
b1ce53c5 199/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
200 SSPUSHINT(PL_regsize);
201 SSPUSHINT(*PL_reglastparen);
a01268b5 202 SSPUSHINT(*PL_reglastcloseparen);
3280af22 203 SSPUSHPTR(PL_reginput);
41123dfd
JH
204#define REGCP_FRAME_ELEMS 2
205/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
206 * are needed for the regexp context stack bookkeeping. */
207 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 208 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 209
a0d0e21e
LW
210 return retval;
211}
212
c277df42 213/* These are needed since we do not localize EVAL nodes: */
a3621e74 214# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 215 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 216 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 217
a3621e74 218# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 219 PerlIO_printf(Perl_debug_log, \
faccc32b 220 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 221 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 222
76e3520e 223STATIC char *
cea2e8a9 224S_regcppop(pTHX)
a0d0e21e 225{
97aff369 226 dVAR;
b1ce53c5 227 I32 i;
a0d0e21e
LW
228 U32 paren = 0;
229 char *input;
b1ce53c5 230
a3621e74
YO
231 GET_RE_DEBUG_FLAGS_DECL;
232
b1ce53c5 233 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 234 i = SSPOPINT;
b1ce53c5
JH
235 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
236 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 237 input = (char *) SSPOPPTR;
a01268b5 238 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
239 *PL_reglastparen = SSPOPINT;
240 PL_regsize = SSPOPINT;
b1ce53c5
JH
241
242 /* Now restore the parentheses context. */
41123dfd
JH
243 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
244 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 245 I32 tmps;
a0d0e21e 246 paren = (U32)SSPOPINT;
3280af22 247 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
248 PL_regstartp[paren] = SSPOPINT;
249 tmps = SSPOPINT;
3280af22
NIS
250 if (paren <= *PL_reglastparen)
251 PL_regendp[paren] = tmps;
a3621e74 252 DEBUG_EXECUTE_r(
c3464db5 253 PerlIO_printf(Perl_debug_log,
b900a521 254 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 255 (UV)paren, (IV)PL_regstartp[paren],
b900a521 256 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 257 (IV)PL_regendp[paren],
3280af22 258 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 259 );
a0d0e21e 260 }
a3621e74 261 DEBUG_EXECUTE_r(
eb160463 262 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 263 PerlIO_printf(Perl_debug_log,
faccc32b
JH
264 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
265 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
266 }
267 );
daf18116 268#if 1
dafc8851
JH
269 /* It would seem that the similar code in regtry()
270 * already takes care of this, and in fact it is in
271 * a better location to since this code can #if 0-ed out
272 * but the code in regtry() is needed or otherwise tests
273 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
274 * (as of patchlevel 7877) will fail. Then again,
275 * this code seems to be necessary or otherwise
276 * building DynaLoader will fail:
277 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
278 * --jhi */
eb160463
GS
279 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
280 if ((I32)paren > PL_regsize)
cf93c79d
IZ
281 PL_regstartp[paren] = -1;
282 PL_regendp[paren] = -1;
a0d0e21e 283 }
dafc8851 284#endif
a0d0e21e
LW
285 return input;
286}
287
0f5d15d6
IZ
288typedef struct re_cc_state
289{
290 I32 ss;
291 regnode *node;
292 struct re_cc_state *prev;
293 CURCUR *cc;
294 regexp *re;
295} re_cc_state;
296
02db2b7b 297#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 298
29d1e993
HS
299#define TRYPAREN(paren, n, input) { \
300 if (paren) { \
301 if (n) { \
302 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
303 PL_regendp[paren] = input - PL_bostr; \
304 } \
305 else \
306 PL_regendp[paren] = -1; \
307 } \
308 if (regmatch(next)) \
309 sayYES; \
310 if (paren && n) \
311 PL_regendp[paren] = -1; \
312}
313
314
a687059c 315/*
e50aee73 316 * pregexec and friends
a687059c
LW
317 */
318
319/*
c277df42 320 - pregexec - match a regexp against a string
a687059c 321 */
c277df42 322I32
864dbfa3 323Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 324 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
325/* strend: pointer to null at end of string */
326/* strbeg: real beginning of string */
327/* minend: end of match must be >=minend after stringarg. */
328/* nosave: For optimizations. */
329{
330 return
9041c2e3 331 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
332 nosave ? 0 : REXEC_COPY_STR);
333}
0f5d15d6
IZ
334
335STATIC void
cea2e8a9 336S_cache_re(pTHX_ regexp *prog)
0f5d15d6 337{
97aff369 338 dVAR;
0f5d15d6
IZ
339 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
340#ifdef DEBUGGING
341 PL_regprogram = prog->program;
342#endif
343 PL_regnpar = prog->nparens;
9041c2e3
NIS
344 PL_regdata = prog->data;
345 PL_reg_re = prog;
0f5d15d6 346}
22e551b9 347
9041c2e3 348/*
cad2e5aa
JH
349 * Need to implement the following flags for reg_anch:
350 *
351 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
352 * USE_INTUIT_ML
353 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
354 * INTUIT_AUTORITATIVE_ML
355 * INTUIT_ONCE_NOML - Intuit can match in one location only.
356 * INTUIT_ONCE_ML
357 *
358 * Another flag for this function: SECOND_TIME (so that float substrs
359 * with giant delta may be not rechecked).
360 */
361
362/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
363
3f7c398e 364/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
365 Otherwise, only SvCUR(sv) is used to get strbeg. */
366
367/* XXXX We assume that strpos is strbeg unless sv. */
368
6eb5f6b9
JH
369/* XXXX Some places assume that there is a fixed substring.
370 An update may be needed if optimizer marks as "INTUITable"
371 RExen without fixed substrings. Similarly, it is assumed that
372 lengths of all the strings are no more than minlen, thus they
373 cannot come from lookahead.
374 (Or minlen should take into account lookahead.) */
375
2c2d71f5
JH
376/* A failure to find a constant substring means that there is no need to make
377 an expensive call to REx engine, thus we celebrate a failure. Similarly,
378 finding a substring too deep into the string means that less calls to
30944b6d
IZ
379 regtry() should be needed.
380
381 REx compiler's optimizer found 4 possible hints:
382 a) Anchored substring;
383 b) Fixed substring;
384 c) Whether we are anchored (beginning-of-line or \G);
385 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 386 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
387 string which does not contradict any of them.
388 */
2c2d71f5 389
6eb5f6b9
JH
390/* Most of decisions we do here should have been done at compile time.
391 The nodes of the REx which we used for the search should have been
392 deleted from the finite automaton. */
393
cad2e5aa
JH
394char *
395Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
396 char *strend, U32 flags, re_scream_pos_data *data)
397{
97aff369 398 dVAR;
b7953727 399 register I32 start_shift = 0;
cad2e5aa 400 /* Should be nonnegative! */
b7953727 401 register I32 end_shift = 0;
2c2d71f5
JH
402 register char *s;
403 register SV *check;
a1933d95 404 char *strbeg;
cad2e5aa 405 char *t;
a3b680e6 406 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 407 I32 ml_anch;
bd61b366
SS
408 register char *other_last = NULL; /* other substr checked before this */
409 char *check_at = NULL; /* check substr found at this pos */
1df70142 410 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 411#ifdef DEBUGGING
890ce7af
AL
412 const char * const i_strpos = strpos;
413 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 414#endif
a3621e74
YO
415
416 GET_RE_DEBUG_FLAGS_DECL;
417
a30b2f1f 418 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 419
b8d68ded 420 if (prog->reganch & ROPT_UTF8) {
a3621e74 421 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
422 "UTF-8 regex...\n"));
423 PL_reg_flags |= RF_utf8;
424 }
425
a3621e74 426 DEBUG_EXECUTE_r({
1df70142 427 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
428 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
429 strpos;
1df70142 430 const int len = PL_reg_match_utf8 ?
b8d68ded 431 strlen(s) : strend - strpos;
2a782b5b
JH
432 if (!PL_colorset)
433 reginitcolors();
b8d68ded 434 if (PL_reg_match_utf8)
a3621e74 435 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 436 "UTF-8 target...\n"));
2a782b5b 437 PerlIO_printf(Perl_debug_log,
a0288114 438 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 439 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
440 prog->precomp,
441 PL_colors[1],
442 (strlen(prog->precomp) > 60 ? "..." : ""),
443 PL_colors[0],
444 (int)(len > 60 ? 60 : len),
445 s, PL_colors[1],
446 (len > 60 ? "..." : "")
447 );
448 });
cad2e5aa 449
c344f387
JH
450 /* CHR_DIST() would be more correct here but it makes things slow. */
451 if (prog->minlen > strend - strpos) {
a3621e74 452 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 453 "String too short... [re_intuit_start]\n"));
cad2e5aa 454 goto fail;
2c2d71f5 455 }
a1933d95 456 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 457 PL_regeol = strend;
33b8afdf
JH
458 if (do_utf8) {
459 if (!prog->check_utf8 && prog->check_substr)
460 to_utf8_substr(prog);
461 check = prog->check_utf8;
462 } else {
463 if (!prog->check_substr && prog->check_utf8)
464 to_byte_substr(prog);
465 check = prog->check_substr;
466 }
467 if (check == &PL_sv_undef) {
a3621e74 468 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
469 "Non-utf string cannot match utf check string\n"));
470 goto fail;
471 }
2c2d71f5 472 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
473 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
474 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 475 && !multiline ) ); /* Check after \n? */
cad2e5aa 476
7e25d62c
JH
477 if (!ml_anch) {
478 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
479 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 480 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
481 && sv && !SvROK(sv)
482 && (strpos != strbeg)) {
a3621e74 483 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
484 goto fail;
485 }
486 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 487 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 488 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
489 I32 slen;
490
1aa99e6b 491 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
492 if (SvTAIL(check)) {
493 slen = SvCUR(check); /* >= 1 */
cad2e5aa 494
9041c2e3 495 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 496 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 497 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 498 goto fail_finish;
cad2e5aa
JH
499 }
500 /* Now should match s[0..slen-2] */
501 slen--;
3f7c398e 502 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 503 || (slen > 1
3f7c398e 504 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 505 report_neq:
a3621e74 506 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
507 goto fail_finish;
508 }
cad2e5aa 509 }
3f7c398e 510 else if (*SvPVX_const(check) != *s
653099ff 511 || ((slen = SvCUR(check)) > 1
3f7c398e 512 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 513 goto report_neq;
c315bfe8 514 check_at = s;
2c2d71f5 515 goto success_at_start;
7e25d62c 516 }
cad2e5aa 517 }
2c2d71f5 518 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 519 s = strpos;
2c2d71f5 520 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 521 end_shift = prog->minlen - start_shift -
653099ff 522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 523 if (!ml_anch) {
a3b680e6 524 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 525 - (SvTAIL(check) != 0);
a3b680e6 526 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
527
528 if (end_shift < eshift)
529 end_shift = eshift;
530 }
cad2e5aa 531 }
2c2d71f5 532 else { /* Can match at random position */
cad2e5aa
JH
533 ml_anch = 0;
534 s = strpos;
2c2d71f5
JH
535 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
536 /* Should be nonnegative! */
537 end_shift = prog->minlen - start_shift -
653099ff 538 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
539 }
540
2c2d71f5 541#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 542 if (end_shift < 0)
6bbae5e6 543 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
544#endif
545
2c2d71f5
JH
546 restart:
547 /* Find a possible match in the region s..strend by looking for
548 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 549 if (flags & REXEC_SCREAM) {
cad2e5aa 550 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 551 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 552
2c2d71f5
JH
553 if (PL_screamfirst[BmRARE(check)] >= 0
554 || ( BmRARE(check) == '\n'
555 && (BmPREVIOUS(check) == SvCUR(check) - 1)
556 && SvTAIL(check) ))
9041c2e3 557 s = screaminstr(sv, check,
2c2d71f5 558 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 559 else
2c2d71f5 560 goto fail_finish;
4addbd3b
HS
561 /* we may be pointing at the wrong string */
562 if (s && RX_MATCH_COPIED(prog))
3f7c398e 563 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
564 if (data)
565 *data->scream_olds = s;
566 }
f33976b4 567 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
568 s = fbm_instr((U8*)(s + start_shift),
569 (U8*)(strend - end_shift),
7fba1cd6 570 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 571 else
1aa99e6b
IH
572 s = fbm_instr(HOP3(s, start_shift, strend),
573 HOP3(strend, -end_shift, strbeg),
7fba1cd6 574 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
575
576 /* Update the count-of-usability, remove useless subpatterns,
577 unshift s. */
2c2d71f5 578
a0288114 579 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 580 (s ? "Found" : "Did not find"),
33b8afdf 581 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 582 PL_colors[0],
7b0972df 583 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 584 SvPVX_const(check),
2c2d71f5
JH
585 PL_colors[1], (SvTAIL(check) ? "$" : ""),
586 (s ? " at offset " : "...\n") ) );
587
588 if (!s)
589 goto fail_finish;
590
6eb5f6b9
JH
591 check_at = s;
592
2c2d71f5 593 /* Finish the diagnostic message */
a3621e74 594 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
595
596 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
597 Start with the other substr.
598 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 599 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
600 *always* match. Probably should be marked during compile...
601 Probably it is right to do no SCREAM here...
602 */
603
33b8afdf 604 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 605 /* Take into account the "other" substring. */
2c2d71f5
JH
606 /* XXXX May be hopelessly wrong for UTF... */
607 if (!other_last)
6eb5f6b9 608 other_last = strpos;
33b8afdf 609 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
610 do_other_anchored:
611 {
890ce7af
AL
612 char * const last = HOP3c(s, -start_shift, strbeg);
613 char *last1, *last2;
2c2d71f5 614 char *s1 = s;
33b8afdf 615 SV* must;
2c2d71f5 616
2c2d71f5
JH
617 t = s - prog->check_offset_max;
618 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 619 && (!do_utf8
1aa99e6b 620 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 621 && t > strpos)))
30944b6d 622 /* EMPTY */;
2c2d71f5
JH
623 else
624 t = strpos;
1aa99e6b 625 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
626 if (t < other_last) /* These positions already checked */
627 t = other_last;
1aa99e6b 628 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
629 if (last < last1)
630 last1 = last;
631 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
632 /* On end-of-str: see comment below. */
33b8afdf
JH
633 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
634 if (must == &PL_sv_undef) {
635 s = (char*)NULL;
a3621e74 636 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
637 }
638 else
639 s = fbm_instr(
640 (unsigned char*)t,
641 HOP3(HOP3(last1, prog->anchored_offset, strend)
642 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
643 must,
7fba1cd6 644 multiline ? FBMrf_MULTILINE : 0
33b8afdf 645 );
a3621e74 646 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 647 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
33b8afdf
JH
650 (int)(SvCUR(must)
651 - (SvTAIL(must)!=0)),
3f7c398e 652 SvPVX_const(must),
33b8afdf 653 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
654 if (!s) {
655 if (last1 >= last2) {
a3621e74 656 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
657 ", giving up...\n"));
658 goto fail_finish;
659 }
a3621e74 660 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 661 ", trying floating at offset %ld...\n",
1aa99e6b
IH
662 (long)(HOP3c(s1, 1, strend) - i_strpos)));
663 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
664 s = HOP3c(last, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
a3621e74 668 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
1aa99e6b
IH
670 t = HOP3c(s, -prog->anchored_offset, strbeg);
671 other_last = HOP3c(s, 1, strend);
30944b6d 672 s = s1;
2c2d71f5
JH
673 if (t == strpos)
674 goto try_at_start;
2c2d71f5
JH
675 goto try_at_offset;
676 }
30944b6d 677 }
2c2d71f5
JH
678 }
679 else { /* Take into account the floating substring. */
33b8afdf
JH
680 char *last, *last1;
681 char *s1 = s;
682 SV* must;
683
684 t = HOP3c(s, -start_shift, strbeg);
685 last1 = last =
686 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
687 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
688 last = HOP3c(t, prog->float_max_offset, strend);
689 s = HOP3c(t, prog->float_min_offset, strend);
690 if (s < other_last)
691 s = other_last;
2c2d71f5 692 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
693 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
694 /* fbm_instr() takes into account exact value of end-of-str
695 if the check is SvTAIL(ed). Since false positives are OK,
696 and end-of-str is not later than strend we are OK. */
697 if (must == &PL_sv_undef) {
698 s = (char*)NULL;
a3621e74 699 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
700 }
701 else
2c2d71f5 702 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
703 (unsigned char*)last + SvCUR(must)
704 - (SvTAIL(must)!=0),
7fba1cd6 705 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 706 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
707 (s ? "Found" : "Contradicts"),
708 PL_colors[0],
709 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 710 SvPVX_const(must),
33b8afdf
JH
711 PL_colors[1], (SvTAIL(must) ? "$" : "")));
712 if (!s) {
713 if (last1 == last) {
a3621e74 714 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
715 ", giving up...\n"));
716 goto fail_finish;
2c2d71f5 717 }
a3621e74 718 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
719 ", trying anchored starting at offset %ld...\n",
720 (long)(s1 + 1 - i_strpos)));
721 other_last = last;
722 s = HOP3c(t, 1, strend);
723 goto restart;
724 }
725 else {
a3621e74 726 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
727 (long)(s - i_strpos)));
728 other_last = s; /* Fix this later. --Hugo */
729 s = s1;
730 if (t == strpos)
731 goto try_at_start;
732 goto try_at_offset;
733 }
2c2d71f5 734 }
cad2e5aa 735 }
2c2d71f5
JH
736
737 t = s - prog->check_offset_max;
2c2d71f5 738 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 739 && (!do_utf8
1aa99e6b
IH
740 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
741 && t > strpos))) {
2c2d71f5
JH
742 /* Fixed substring is found far enough so that the match
743 cannot start at strpos. */
744 try_at_offset:
cad2e5aa 745 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
746 /* Eventually fbm_*() should handle this, but often
747 anchored_offset is not 0, so this check will not be wasted. */
748 /* XXXX In the code below we prefer to look for "^" even in
749 presence of anchored substrings. And we search even
750 beyond the found float position. These pessimizations
751 are historical artefacts only. */
752 find_anchor:
2c2d71f5 753 while (t < strend - prog->minlen) {
cad2e5aa 754 if (*t == '\n') {
4ee3650e 755 if (t < check_at - prog->check_offset_min) {
33b8afdf 756 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
757 /* Since we moved from the found position,
758 we definitely contradict the found anchored
30944b6d
IZ
759 substr. Due to the above check we do not
760 contradict "check" substr.
761 Thus we can arrive here only if check substr
762 is float. Redo checking for "other"=="fixed".
763 */
9041c2e3 764 strpos = t + 1;
a3621e74 765 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 766 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
767 goto do_other_anchored;
768 }
4ee3650e
GS
769 /* We don't contradict the found floating substring. */
770 /* XXXX Why not check for STCLASS? */
cad2e5aa 771 s = t + 1;
a3621e74 772 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 773 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
774 goto set_useful;
775 }
4ee3650e
GS
776 /* Position contradicts check-string */
777 /* XXXX probably better to look for check-string
778 than for "\n", so one should lower the limit for t? */
a3621e74 779 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 780 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 781 other_last = strpos = s = t + 1;
cad2e5aa
JH
782 goto restart;
783 }
784 t++;
785 }
a3621e74 786 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 787 PL_colors[0], PL_colors[1]));
2c2d71f5 788 goto fail_finish;
cad2e5aa 789 }
f5952150 790 else {
a3621e74 791 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 792 PL_colors[0], PL_colors[1]));
f5952150 793 }
cad2e5aa
JH
794 s = t;
795 set_useful:
33b8afdf 796 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
797 }
798 else {
f5952150 799 /* The found string does not prohibit matching at strpos,
2c2d71f5 800 - no optimization of calling REx engine can be performed,
f5952150
GS
801 unless it was an MBOL and we are not after MBOL,
802 or a future STCLASS check will fail this. */
2c2d71f5
JH
803 try_at_start:
804 /* Even in this situation we may use MBOL flag if strpos is offset
805 wrt the start of the string. */
05b4157f 806 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 807 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
808 /* May be due to an implicit anchor of m{.*foo} */
809 && !(prog->reganch & ROPT_IMPLICIT))
810 {
cad2e5aa
JH
811 t = strpos;
812 goto find_anchor;
813 }
a3621e74 814 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 815 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 816 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 817 );
2c2d71f5 818 success_at_start:
30944b6d 819 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
820 && (do_utf8 ? (
821 prog->check_utf8 /* Could be deleted already */
822 && --BmUSEFUL(prog->check_utf8) < 0
823 && (prog->check_utf8 == prog->float_utf8)
824 ) : (
825 prog->check_substr /* Could be deleted already */
826 && --BmUSEFUL(prog->check_substr) < 0
827 && (prog->check_substr == prog->float_substr)
828 )))
66e933ab 829 {
cad2e5aa 830 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 831 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
832 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
833 if (do_utf8 ? prog->check_substr : prog->check_utf8)
834 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
835 prog->check_substr = prog->check_utf8 = NULL; /* disable */
836 prog->float_substr = prog->float_utf8 = NULL; /* clear */
837 check = NULL; /* abort */
cad2e5aa 838 s = strpos;
3cf5c195
IZ
839 /* XXXX This is a remnant of the old implementation. It
840 looks wasteful, since now INTUIT can use many
6eb5f6b9 841 other heuristics. */
cad2e5aa
JH
842 prog->reganch &= ~RE_USE_INTUIT;
843 }
844 else
845 s = strpos;
846 }
847
6eb5f6b9
JH
848 /* Last resort... */
849 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
850 if (prog->regstclass) {
851 /* minlen == 0 is possible if regstclass is \b or \B,
852 and the fixed substr is ''$.
853 Since minlen is already taken into account, s+1 is before strend;
854 accidentally, minlen >= 1 guaranties no false positives at s + 1
855 even for \b or \B. But (minlen? 1 : 0) below assumes that
856 regstclass does not come from lookahead... */
857 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
858 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 859 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 860 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 861 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 862 : 1);
a3b680e6 863 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 864 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 865 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
866 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
867 cl_l, strend)
868 : strend);
6eb5f6b9
JH
869
870 t = s;
9926ca43 871 cache_re(prog);
06b5626a 872 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
873 if (!s) {
874#ifdef DEBUGGING
cbbf8932 875 const char *what = NULL;
6eb5f6b9
JH
876#endif
877 if (endpos == strend) {
a3621e74 878 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
879 "Could not match STCLASS...\n") );
880 goto fail;
881 }
a3621e74 882 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 883 "This position contradicts STCLASS...\n") );
653099ff
GS
884 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
885 goto fail;
6eb5f6b9 886 /* Contradict one of substrings */
33b8afdf
JH
887 if (prog->anchored_substr || prog->anchored_utf8) {
888 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 889 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 890 hop_and_restart:
1aa99e6b 891 s = HOP3c(t, 1, strend);
66e933ab
GS
892 if (s + start_shift + end_shift > strend) {
893 /* XXXX Should be taken into account earlier? */
a3621e74 894 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
895 "Could not match STCLASS...\n") );
896 goto fail;
897 }
5e39e1e5
HS
898 if (!check)
899 goto giveup;
a3621e74 900 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 901 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
902 what, (long)(s + start_shift - i_strpos)) );
903 goto restart;
904 }
66e933ab 905 /* Have both, check_string is floating */
6eb5f6b9
JH
906 if (t + start_shift >= check_at) /* Contradicts floating=check */
907 goto retry_floating_check;
908 /* Recheck anchored substring, but not floating... */
9041c2e3 909 s = check_at;
5e39e1e5
HS
910 if (!check)
911 goto giveup;
a3621e74 912 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 913 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
914 (long)(other_last - i_strpos)) );
915 goto do_other_anchored;
916 }
60e71179
GS
917 /* Another way we could have checked stclass at the
918 current position only: */
919 if (ml_anch) {
920 s = t = t + 1;
5e39e1e5
HS
921 if (!check)
922 goto giveup;
a3621e74 923 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 924 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 925 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 926 goto try_at_offset;
66e933ab 927 }
33b8afdf 928 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 929 goto fail;
6eb5f6b9
JH
930 /* Check is floating subtring. */
931 retry_floating_check:
932 t = check_at - start_shift;
a3621e74 933 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
934 goto hop_and_restart;
935 }
b7953727 936 if (t != s) {
a3621e74 937 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 938 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
939 (long)(t - i_strpos), (long)(s - i_strpos))
940 );
941 }
942 else {
a3621e74 943 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
944 "Does not contradict STCLASS...\n");
945 );
946 }
6eb5f6b9 947 }
5e39e1e5 948 giveup:
a3621e74 949 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
950 PL_colors[4], (check ? "Guessed" : "Giving up"),
951 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 952 return s;
2c2d71f5
JH
953
954 fail_finish: /* Substring not found */
33b8afdf
JH
955 if (prog->check_substr || prog->check_utf8) /* could be removed already */
956 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 957 fail:
a3621e74 958 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 959 PL_colors[4], PL_colors[5]));
bd61b366 960 return NULL;
cad2e5aa 961}
9661b544 962
6eb5f6b9 963/* We know what class REx starts with. Try to find this position... */
3c3eec57 964STATIC char *
a3b680e6 965S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 966{
27da23d5 967 dVAR;
1df70142 968 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 969 char *m;
d8093b23 970 STRLEN ln;
5dab1207 971 STRLEN lnc;
078c425b 972 register STRLEN uskip;
d8093b23
G
973 unsigned int c1;
974 unsigned int c2;
6eb5f6b9
JH
975 char *e;
976 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 977 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 978
6eb5f6b9
JH
979 /* We know what class it must start with. */
980 switch (OP(c)) {
6eb5f6b9 981 case ANYOF:
388cc4de 982 if (do_utf8) {
078c425b 983 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
984 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
985 !UTF8_IS_INVARIANT((U8)s[0]) ?
986 reginclass(c, (U8*)s, 0, do_utf8) :
987 REGINCLASS(c, (U8*)s)) {
988 if (tmp && (norun || regtry(prog, s)))
989 goto got_it;
990 else
991 tmp = doevery;
992 }
993 else
994 tmp = 1;
078c425b 995 s += uskip;
388cc4de
HS
996 }
997 }
998 else {
999 while (s < strend) {
1000 STRLEN skip = 1;
1001
1002 if (REGINCLASS(c, (U8*)s) ||
1003 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1004 /* The assignment of 2 is intentional:
1005 * for the folded sharp s, the skip is 2. */
1006 (skip = SHARP_S_SKIP))) {
1007 if (tmp && (norun || regtry(prog, s)))
1008 goto got_it;
1009 else
1010 tmp = doevery;
1011 }
1012 else
1013 tmp = 1;
1014 s += skip;
1015 }
a0d0e21e 1016 }
6eb5f6b9 1017 break;
f33976b4
DB
1018 case CANY:
1019 while (s < strend) {
1020 if (tmp && (norun || regtry(prog, s)))
1021 goto got_it;
1022 else
1023 tmp = doevery;
1024 s++;
1025 }
1026 break;
6eb5f6b9 1027 case EXACTF:
5dab1207
NIS
1028 m = STRING(c);
1029 ln = STR_LEN(c); /* length to match in octets/bytes */
1030 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1031 if (UTF) {
a2a2844f 1032 STRLEN ulen1, ulen2;
5dab1207 1033 U8 *sm = (U8 *) m;
89ebb4a3
JH
1034 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1035 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
041457d9 1036 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
a2a2844f
JH
1037
1038 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1039 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1040
89ebb4a3 1041 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1042 0, uniflags);
89ebb4a3 1043 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1044 0, uniflags);
5dab1207
NIS
1045 lnc = 0;
1046 while (sm < ((U8 *) m + ln)) {
1047 lnc++;
1048 sm += UTF8SKIP(sm);
1049 }
1aa99e6b
IH
1050 }
1051 else {
1052 c1 = *(U8*)m;
1053 c2 = PL_fold[c1];
1054 }
6eb5f6b9
JH
1055 goto do_exactf;
1056 case EXACTFL:
5dab1207
NIS
1057 m = STRING(c);
1058 ln = STR_LEN(c);
1059 lnc = (I32) ln;
d8093b23 1060 c1 = *(U8*)m;
6eb5f6b9
JH
1061 c2 = PL_fold_locale[c1];
1062 do_exactf:
db12adc6 1063 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1064
6eb5f6b9
JH
1065 if (norun && e < s)
1066 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1067
60a8b682
JH
1068 /* The idea in the EXACTF* cases is to first find the
1069 * first character of the EXACTF* node and then, if
1070 * necessary, case-insensitively compare the full
1071 * text of the node. The c1 and c2 are the first
1072 * characters (though in Unicode it gets a bit
1073 * more complicated because there are more cases
7f16dd3d
JH
1074 * than just upper and lower: one needs to use
1075 * the so-called folding case for case-insensitive
1076 * matching (called "loose matching" in Unicode).
1077 * ibcmp_utf8() will do just that. */
60a8b682 1078
1aa99e6b 1079 if (do_utf8) {
575cac57 1080 UV c, f;
89ebb4a3 1081 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1082 STRLEN len, foldlen;
041457d9 1083 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
09091399 1084 if (c1 == c2) {
5dab1207
NIS
1085 /* Upper and lower of 1st char are equal -
1086 * probably not a "letter". */
1aa99e6b 1087 while (s <= e) {
89ebb4a3 1088 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1089 uniflags);
80aecb99
JH
1090 if ( c == c1
1091 && (ln == len ||
66423254 1092 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1093 m, (char **)0, ln, (bool)UTF))
55da9344 1094 && (norun || regtry(prog, s)) )
1aa99e6b 1095 goto got_it;
80aecb99 1096 else {
1df70142 1097 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1098 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1099 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1100 if ( f != c
1101 && (f == c1 || f == c2)
1102 && (ln == foldlen ||
66423254
JH
1103 !ibcmp_utf8((char *) foldbuf,
1104 (char **)0, foldlen, do_utf8,
d07ddd77 1105 m,
eb160463 1106 (char **)0, ln, (bool)UTF))
80aecb99
JH
1107 && (norun || regtry(prog, s)) )
1108 goto got_it;
1109 }
1aa99e6b
IH
1110 s += len;
1111 }
09091399
JH
1112 }
1113 else {
1aa99e6b 1114 while (s <= e) {
89ebb4a3 1115 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1116 uniflags);
80aecb99 1117
60a8b682 1118 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1119 * Note that not all the possible combinations
1120 * are handled here: some of them are handled
1121 * by the standard folding rules, and some of
1122 * them (the character class or ANYOF cases)
1123 * are handled during compiletime in
1124 * regexec.c:S_regclass(). */
880bd946
JH
1125 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1126 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1127 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1128
1129 if ( (c == c1 || c == c2)
1130 && (ln == len ||
66423254 1131 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1132 m, (char **)0, ln, (bool)UTF))
55da9344 1133 && (norun || regtry(prog, s)) )
1aa99e6b 1134 goto got_it;
80aecb99 1135 else {
1df70142 1136 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1137 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1138 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1139 if ( f != c
1140 && (f == c1 || f == c2)
1141 && (ln == foldlen ||
a6872d42 1142 !ibcmp_utf8((char *) foldbuf,
66423254 1143 (char **)0, foldlen, do_utf8,
d07ddd77 1144 m,
eb160463 1145 (char **)0, ln, (bool)UTF))
80aecb99
JH
1146 && (norun || regtry(prog, s)) )
1147 goto got_it;
1148 }
1aa99e6b
IH
1149 s += len;
1150 }
09091399 1151 }
1aa99e6b
IH
1152 }
1153 else {
1154 if (c1 == c2)
1155 while (s <= e) {
1156 if ( *(U8*)s == c1
1157 && (ln == 1 || !(OP(c) == EXACTF
1158 ? ibcmp(s, m, ln)
1159 : ibcmp_locale(s, m, ln)))
1160 && (norun || regtry(prog, s)) )
1161 goto got_it;
1162 s++;
1163 }
1164 else
1165 while (s <= e) {
1166 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1167 && (ln == 1 || !(OP(c) == EXACTF
1168 ? ibcmp(s, m, ln)
1169 : ibcmp_locale(s, m, ln)))
1170 && (norun || regtry(prog, s)) )
1171 goto got_it;
1172 s++;
1173 }
b3c9acc1
IZ
1174 }
1175 break;
bbce6d69 1176 case BOUNDL:
3280af22 1177 PL_reg_flags |= RF_tainted;
bbce6d69 1178 /* FALL THROUGH */
a0d0e21e 1179 case BOUND:
ffc61ed2 1180 if (do_utf8) {
12d33761 1181 if (s == PL_bostr)
ffc61ed2
JH
1182 tmp = '\n';
1183 else {
6136c704 1184 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
b4f7163a 1185 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1186 }
1187 tmp = ((OP(c) == BOUND ?
9041c2e3 1188 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1189 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1190 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1191 if (tmp == !(OP(c) == BOUND ?
3568d838 1192 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1193 isALNUM_LC_utf8((U8*)s)))
1194 {
1195 tmp = !tmp;
1196 if ((norun || regtry(prog, s)))
1197 goto got_it;
1198 }
078c425b 1199 s += uskip;
a687059c 1200 }
a0d0e21e 1201 }
667bb95a 1202 else {
12d33761 1203 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1204 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1205 while (s < strend) {
1206 if (tmp ==
1207 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1208 tmp = !tmp;
1209 if ((norun || regtry(prog, s)))
1210 goto got_it;
1211 }
1212 s++;
a0ed51b3 1213 }
a0ed51b3 1214 }
6eb5f6b9 1215 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1216 goto got_it;
1217 break;
bbce6d69 1218 case NBOUNDL:
3280af22 1219 PL_reg_flags |= RF_tainted;
bbce6d69 1220 /* FALL THROUGH */
a0d0e21e 1221 case NBOUND:
ffc61ed2 1222 if (do_utf8) {
12d33761 1223 if (s == PL_bostr)
ffc61ed2
JH
1224 tmp = '\n';
1225 else {
6136c704 1226 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
b4f7163a 1227 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1228 }
1229 tmp = ((OP(c) == NBOUND ?
9041c2e3 1230 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1231 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1232 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1233 if (tmp == !(OP(c) == NBOUND ?
3568d838 1234 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1235 isALNUM_LC_utf8((U8*)s)))
1236 tmp = !tmp;
1237 else if ((norun || regtry(prog, s)))
1238 goto got_it;
078c425b 1239 s += uskip;
ffc61ed2 1240 }
a0d0e21e 1241 }
667bb95a 1242 else {
12d33761 1243 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1244 tmp = ((OP(c) == NBOUND ?
1245 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1246 while (s < strend) {
1247 if (tmp ==
1248 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1249 tmp = !tmp;
1250 else if ((norun || regtry(prog, s)))
1251 goto got_it;
1252 s++;
1253 }
a0ed51b3 1254 }
6eb5f6b9 1255 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1256 goto got_it;
1257 break;
a0d0e21e 1258 case ALNUM:
ffc61ed2 1259 if (do_utf8) {
1a4fad37 1260 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1261 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1262 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1263 if (tmp && (norun || regtry(prog, s)))
1264 goto got_it;
1265 else
1266 tmp = doevery;
1267 }
bbce6d69 1268 else
ffc61ed2 1269 tmp = 1;
078c425b 1270 s += uskip;
bbce6d69 1271 }
bbce6d69 1272 }
ffc61ed2
JH
1273 else {
1274 while (s < strend) {
1275 if (isALNUM(*s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1277 goto got_it;
1278 else
1279 tmp = doevery;
1280 }
a0ed51b3 1281 else
ffc61ed2
JH
1282 tmp = 1;
1283 s++;
a0ed51b3 1284 }
a0ed51b3
LW
1285 }
1286 break;
bbce6d69 1287 case ALNUML:
3280af22 1288 PL_reg_flags |= RF_tainted;
ffc61ed2 1289 if (do_utf8) {
078c425b 1290 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1291 if (isALNUM_LC_utf8((U8*)s)) {
1292 if (tmp && (norun || regtry(prog, s)))
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
a687059c 1297 else
ffc61ed2 1298 tmp = 1;
078c425b 1299 s += uskip;
a0d0e21e 1300 }
a0d0e21e 1301 }
ffc61ed2
JH
1302 else {
1303 while (s < strend) {
1304 if (isALNUM_LC(*s)) {
1305 if (tmp && (norun || regtry(prog, s)))
1306 goto got_it;
1307 else
1308 tmp = doevery;
1309 }
a0ed51b3 1310 else
ffc61ed2
JH
1311 tmp = 1;
1312 s++;
a0ed51b3 1313 }
a0ed51b3
LW
1314 }
1315 break;
a0d0e21e 1316 case NALNUM:
ffc61ed2 1317 if (do_utf8) {
1a4fad37 1318 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1319 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1320 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1321 if (tmp && (norun || regtry(prog, s)))
1322 goto got_it;
1323 else
1324 tmp = doevery;
1325 }
bbce6d69 1326 else
ffc61ed2 1327 tmp = 1;
078c425b 1328 s += uskip;
bbce6d69 1329 }
bbce6d69 1330 }
ffc61ed2
JH
1331 else {
1332 while (s < strend) {
1333 if (!isALNUM(*s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1335 goto got_it;
1336 else
1337 tmp = doevery;
1338 }
a0ed51b3 1339 else
ffc61ed2
JH
1340 tmp = 1;
1341 s++;
a0ed51b3 1342 }
a0ed51b3
LW
1343 }
1344 break;
bbce6d69 1345 case NALNUML:
3280af22 1346 PL_reg_flags |= RF_tainted;
ffc61ed2 1347 if (do_utf8) {
078c425b 1348 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1349 if (!isALNUM_LC_utf8((U8*)s)) {
1350 if (tmp && (norun || regtry(prog, s)))
1351 goto got_it;
1352 else
1353 tmp = doevery;
1354 }
a687059c 1355 else
ffc61ed2 1356 tmp = 1;
078c425b 1357 s += uskip;
a687059c 1358 }
a0d0e21e 1359 }
ffc61ed2
JH
1360 else {
1361 while (s < strend) {
1362 if (!isALNUM_LC(*s)) {
1363 if (tmp && (norun || regtry(prog, s)))
1364 goto got_it;
1365 else
1366 tmp = doevery;
1367 }
a0ed51b3 1368 else
ffc61ed2
JH
1369 tmp = 1;
1370 s++;
a0ed51b3 1371 }
a0ed51b3
LW
1372 }
1373 break;
a0d0e21e 1374 case SPACE:
ffc61ed2 1375 if (do_utf8) {
1a4fad37 1376 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1377 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1378 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1379 if (tmp && (norun || regtry(prog, s)))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
a0d0e21e 1384 else
ffc61ed2 1385 tmp = 1;
078c425b 1386 s += uskip;
2304df62 1387 }
a0d0e21e 1388 }
ffc61ed2
JH
1389 else {
1390 while (s < strend) {
1391 if (isSPACE(*s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
a0ed51b3 1397 else
ffc61ed2
JH
1398 tmp = 1;
1399 s++;
a0ed51b3 1400 }
a0ed51b3
LW
1401 }
1402 break;
bbce6d69 1403 case SPACEL:
3280af22 1404 PL_reg_flags |= RF_tainted;
ffc61ed2 1405 if (do_utf8) {
078c425b 1406 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1407 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1408 if (tmp && (norun || regtry(prog, s)))
1409 goto got_it;
1410 else
1411 tmp = doevery;
1412 }
bbce6d69 1413 else
ffc61ed2 1414 tmp = 1;
078c425b 1415 s += uskip;
bbce6d69 1416 }
bbce6d69 1417 }
ffc61ed2
JH
1418 else {
1419 while (s < strend) {
1420 if (isSPACE_LC(*s)) {
1421 if (tmp && (norun || regtry(prog, s)))
1422 goto got_it;
1423 else
1424 tmp = doevery;
1425 }
a0ed51b3 1426 else
ffc61ed2
JH
1427 tmp = 1;
1428 s++;
a0ed51b3 1429 }
a0ed51b3
LW
1430 }
1431 break;
a0d0e21e 1432 case NSPACE:
ffc61ed2 1433 if (do_utf8) {
1a4fad37 1434 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1435 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1436 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1437 if (tmp && (norun || regtry(prog, s)))
1438 goto got_it;
1439 else
1440 tmp = doevery;
1441 }
a0d0e21e 1442 else
ffc61ed2 1443 tmp = 1;
078c425b 1444 s += uskip;
a687059c 1445 }
a0d0e21e 1446 }
ffc61ed2
JH
1447 else {
1448 while (s < strend) {
1449 if (!isSPACE(*s)) {
1450 if (tmp && (norun || regtry(prog, s)))
1451 goto got_it;
1452 else
1453 tmp = doevery;
1454 }
a0ed51b3 1455 else
ffc61ed2
JH
1456 tmp = 1;
1457 s++;
a0ed51b3 1458 }
a0ed51b3
LW
1459 }
1460 break;
bbce6d69 1461 case NSPACEL:
3280af22 1462 PL_reg_flags |= RF_tainted;
ffc61ed2 1463 if (do_utf8) {
078c425b 1464 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1465 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1466 if (tmp && (norun || regtry(prog, s)))
1467 goto got_it;
1468 else
1469 tmp = doevery;
1470 }
bbce6d69 1471 else
ffc61ed2 1472 tmp = 1;
078c425b 1473 s += uskip;
bbce6d69 1474 }
bbce6d69 1475 }
ffc61ed2
JH
1476 else {
1477 while (s < strend) {
1478 if (!isSPACE_LC(*s)) {
1479 if (tmp && (norun || regtry(prog, s)))
1480 goto got_it;
1481 else
1482 tmp = doevery;
1483 }
a0ed51b3 1484 else
ffc61ed2
JH
1485 tmp = 1;
1486 s++;
a0ed51b3 1487 }
a0ed51b3
LW
1488 }
1489 break;
a0d0e21e 1490 case DIGIT:
ffc61ed2 1491 if (do_utf8) {
1a4fad37 1492 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1493 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1494 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1495 if (tmp && (norun || regtry(prog, s)))
1496 goto got_it;
1497 else
1498 tmp = doevery;
1499 }
a0d0e21e 1500 else
ffc61ed2 1501 tmp = 1;
078c425b 1502 s += uskip;
2b69d0c2 1503 }
a0d0e21e 1504 }
ffc61ed2
JH
1505 else {
1506 while (s < strend) {
1507 if (isDIGIT(*s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
a0ed51b3 1513 else
ffc61ed2
JH
1514 tmp = 1;
1515 s++;
a0ed51b3 1516 }
a0ed51b3
LW
1517 }
1518 break;
b8c5462f
JH
1519 case DIGITL:
1520 PL_reg_flags |= RF_tainted;
ffc61ed2 1521 if (do_utf8) {
078c425b 1522 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1523 if (isDIGIT_LC_utf8((U8*)s)) {
1524 if (tmp && (norun || regtry(prog, s)))
1525 goto got_it;
1526 else
1527 tmp = doevery;
1528 }
b8c5462f 1529 else
ffc61ed2 1530 tmp = 1;
078c425b 1531 s += uskip;
b8c5462f 1532 }
b8c5462f 1533 }
ffc61ed2
JH
1534 else {
1535 while (s < strend) {
1536 if (isDIGIT_LC(*s)) {
1537 if (tmp && (norun || regtry(prog, s)))
1538 goto got_it;
1539 else
1540 tmp = doevery;
1541 }
b8c5462f 1542 else
ffc61ed2
JH
1543 tmp = 1;
1544 s++;
b8c5462f 1545 }
b8c5462f
JH
1546 }
1547 break;
a0d0e21e 1548 case NDIGIT:
ffc61ed2 1549 if (do_utf8) {
1a4fad37 1550 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1551 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1552 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1553 if (tmp && (norun || regtry(prog, s)))
1554 goto got_it;
1555 else
1556 tmp = doevery;
1557 }
a0d0e21e 1558 else
ffc61ed2 1559 tmp = 1;
078c425b 1560 s += uskip;
a687059c 1561 }
a0d0e21e 1562 }
ffc61ed2
JH
1563 else {
1564 while (s < strend) {
1565 if (!isDIGIT(*s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1567 goto got_it;
1568 else
1569 tmp = doevery;
1570 }
a0ed51b3 1571 else
ffc61ed2
JH
1572 tmp = 1;
1573 s++;
a0ed51b3 1574 }
a0ed51b3
LW
1575 }
1576 break;
b8c5462f
JH
1577 case NDIGITL:
1578 PL_reg_flags |= RF_tainted;
ffc61ed2 1579 if (do_utf8) {
078c425b 1580 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1581 if (!isDIGIT_LC_utf8((U8*)s)) {
1582 if (tmp && (norun || regtry(prog, s)))
1583 goto got_it;
1584 else
1585 tmp = doevery;
1586 }
b8c5462f 1587 else
ffc61ed2 1588 tmp = 1;
078c425b 1589 s += uskip;
b8c5462f 1590 }
a0ed51b3 1591 }
ffc61ed2
JH
1592 else {
1593 while (s < strend) {
1594 if (!isDIGIT_LC(*s)) {
1595 if (tmp && (norun || regtry(prog, s)))
1596 goto got_it;
1597 else
1598 tmp = doevery;
1599 }
cf93c79d 1600 else
ffc61ed2
JH
1601 tmp = 1;
1602 s++;
b8c5462f 1603 }
b8c5462f
JH
1604 }
1605 break;
b3c9acc1 1606 default:
3c3eec57
GS
1607 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1608 break;
d6a28714 1609 }
6eb5f6b9
JH
1610 return 0;
1611 got_it:
1612 return s;
1613}
1614
1615/*
1616 - regexec_flags - match a regexp against a string
1617 */
1618I32
1619Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1620 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1621/* strend: pointer to null at end of string */
1622/* strbeg: real beginning of string */
1623/* minend: end of match must be >=minend after stringarg. */
1624/* data: May be used for some additional optimizations. */
1625/* nosave: For optimizations. */
1626{
97aff369 1627 dVAR;
6eb5f6b9
JH
1628 register char *s;
1629 register regnode *c;
1630 register char *startpos = stringarg;
6eb5f6b9
JH
1631 I32 minlen; /* must match at least this many chars */
1632 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1633 I32 end_shift = 0; /* Same for the end. */ /* CC */
1634 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1635 char *scream_olds = NULL;
6eb5f6b9 1636 SV* oreplsv = GvSV(PL_replgv);
1df70142 1637 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1638 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1639#ifdef DEBUGGING
6136c704
AL
1640 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1641 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1642#endif
a3621e74
YO
1643
1644 GET_RE_DEBUG_FLAGS_DECL;
1645
9d4ba2ae 1646 PERL_UNUSED_ARG(data);
a30b2f1f 1647 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1648
1649 PL_regcc = 0;
1650
1651 cache_re(prog);
1652#ifdef DEBUGGING
aea4f609 1653 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1654#endif
1655
1656 /* Be paranoid... */
1657 if (prog == NULL || startpos == NULL) {
1658 Perl_croak(aTHX_ "NULL regexp parameter");
1659 return 0;
1660 }
1661
1662 minlen = prog->minlen;
61a36c01 1663 if (strend - startpos < minlen) {
a3621e74 1664 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1665 "String too short [regexec_flags]...\n"));
1666 goto phooey;
1aa99e6b 1667 }
6eb5f6b9 1668
6eb5f6b9
JH
1669 /* Check validity of program. */
1670 if (UCHARAT(prog->program) != REG_MAGIC) {
1671 Perl_croak(aTHX_ "corrupted regexp program");
1672 }
1673
1674 PL_reg_flags = 0;
1675 PL_reg_eval_set = 0;
1676 PL_reg_maxiter = 0;
1677
1678 if (prog->reganch & ROPT_UTF8)
1679 PL_reg_flags |= RF_utf8;
1680
1681 /* Mark beginning of line for ^ and lookbehind. */
1682 PL_regbol = startpos;
1683 PL_bostr = strbeg;
1684 PL_reg_sv = sv;
1685
1686 /* Mark end of line for $ (and such) */
1687 PL_regeol = strend;
1688
1689 /* see how far we have to get to not match where we matched before */
1690 PL_regtill = startpos+minend;
1691
1692 /* We start without call_cc context. */
1693 PL_reg_call_cc = 0;
1694
1695 /* If there is a "must appear" string, look for it. */
1696 s = startpos;
1697
1698 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1699 MAGIC *mg;
1700
1701 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1702 PL_reg_ganch = startpos;
1703 else if (sv && SvTYPE(sv) >= SVt_PVMG
1704 && SvMAGIC(sv)
14befaf4
DM
1705 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1706 && mg->mg_len >= 0) {
6eb5f6b9
JH
1707 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1708 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1709 if (s > PL_reg_ganch)
6eb5f6b9
JH
1710 goto phooey;
1711 s = PL_reg_ganch;
1712 }
1713 }
1714 else /* pos() not defined */
1715 PL_reg_ganch = strbeg;
1716 }
1717
a0714e2c 1718 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1719 re_scream_pos_data d;
1720
1721 d.scream_olds = &scream_olds;
1722 d.scream_pos = &scream_pos;
1723 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1724 if (!s) {
a3621e74 1725 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1726 goto phooey; /* not present */
3fa9c3d7 1727 }
6eb5f6b9
JH
1728 }
1729
a3621e74 1730 DEBUG_EXECUTE_r({
1df70142
AL
1731 const char * const s0 = UTF
1732 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1733 UNI_DISPLAY_REGEX)
1734 : prog->precomp;
1735 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1736 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1737 UNI_DISPLAY_REGEX) : startpos;
1df70142 1738 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1739 if (!PL_colorset)
1740 reginitcolors();
1741 PerlIO_printf(Perl_debug_log,
a0288114 1742 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1743 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1744 len0, len0, s0,
2a782b5b 1745 PL_colors[1],
9e55ce06 1746 len0 > 60 ? "..." : "",
2a782b5b 1747 PL_colors[0],
9e55ce06
JH
1748 (int)(len1 > 60 ? 60 : len1),
1749 s1, PL_colors[1],
1750 (len1 > 60 ? "..." : "")
2a782b5b
JH
1751 );
1752 });
6eb5f6b9
JH
1753
1754 /* Simplest case: anchored match need be tried only once. */
1755 /* [unless only anchor is BOL and multiline is set] */
1756 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1757 if (s == startpos && regtry(prog, startpos))
1758 goto got_it;
7fba1cd6 1759 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1760 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1761 {
1762 char *end;
1763
1764 if (minlen)
1765 dontbother = minlen - 1;
1aa99e6b 1766 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1767 /* for multiline we only have to try after newlines */
33b8afdf 1768 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1769 if (s == startpos)
1770 goto after_try;
1771 while (1) {
1772 if (regtry(prog, s))
1773 goto got_it;
1774 after_try:
1775 if (s >= end)
1776 goto phooey;
1777 if (prog->reganch & RE_USE_INTUIT) {
1778 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1779 if (!s)
1780 goto phooey;
1781 }
1782 else
1783 s++;
1784 }
1785 } else {
1786 if (s > startpos)
1787 s--;
1788 while (s < end) {
1789 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1790 if (regtry(prog, s))
1791 goto got_it;
1792 }
1793 }
1794 }
1795 }
1796 goto phooey;
1797 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1798 if (regtry(prog, PL_reg_ganch))
1799 goto got_it;
1800 goto phooey;
1801 }
1802
1803 /* Messy cases: unanchored match. */
33b8afdf 1804 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1805 /* we have /x+whatever/ */
1806 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1807 char ch;
bf93d4cc
GS
1808#ifdef DEBUGGING
1809 int did_match = 0;
1810#endif
33b8afdf
JH
1811 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1812 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1813 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1814
1aa99e6b 1815 if (do_utf8) {
6eb5f6b9
JH
1816 while (s < strend) {
1817 if (*s == ch) {
a3621e74 1818 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1819 if (regtry(prog, s)) goto got_it;
1820 s += UTF8SKIP(s);
1821 while (s < strend && *s == ch)
1822 s += UTF8SKIP(s);
1823 }
1824 s += UTF8SKIP(s);
1825 }
1826 }
1827 else {
1828 while (s < strend) {
1829 if (*s == ch) {
a3621e74 1830 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1831 if (regtry(prog, s)) goto got_it;
1832 s++;
1833 while (s < strend && *s == ch)
1834 s++;
1835 }
1836 s++;
1837 }
1838 }
a3621e74 1839 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1840 PerlIO_printf(Perl_debug_log,
b7953727
JH
1841 "Did not find anchored character...\n")
1842 );
6eb5f6b9 1843 }
a0714e2c
SS
1844 else if (prog->anchored_substr != NULL
1845 || prog->anchored_utf8 != NULL
1846 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1847 && prog->float_max_offset < strend - s)) {
1848 SV *must;
1849 I32 back_max;
1850 I32 back_min;
1851 char *last;
6eb5f6b9 1852 char *last1; /* Last position checked before */
bf93d4cc
GS
1853#ifdef DEBUGGING
1854 int did_match = 0;
1855#endif
33b8afdf
JH
1856 if (prog->anchored_substr || prog->anchored_utf8) {
1857 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1858 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1859 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1860 back_max = back_min = prog->anchored_offset;
1861 } else {
1862 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1863 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1865 back_max = prog->float_max_offset;
1866 back_min = prog->float_min_offset;
1867 }
1868 if (must == &PL_sv_undef)
1869 /* could not downgrade utf8 check substring, so must fail */
1870 goto phooey;
1871
1872 last = HOP3c(strend, /* Cannot start after this */
1873 -(I32)(CHR_SVLEN(must)
1874 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1875
1876 if (s > PL_bostr)
1877 last1 = HOPc(s, -1);
1878 else
1879 last1 = s - 1; /* bogus */
1880
a0288114 1881 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1882 check_substr==must. */
1883 scream_pos = -1;
1884 dontbother = end_shift;
1885 strend = HOPc(strend, -dontbother);
1886 while ( (s <= last) &&
9041c2e3 1887 ((flags & REXEC_SCREAM)
1aa99e6b 1888 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1889 end_shift, &scream_pos, 0))
1aa99e6b 1890 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1891 (unsigned char*)strend, must,
7fba1cd6 1892 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1893 /* we may be pointing at the wrong string */
1894 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1895 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1896 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1897 if (HOPc(s, -back_max) > last1) {
1898 last1 = HOPc(s, -back_min);
1899 s = HOPc(s, -back_max);
1900 }
1901 else {
1902 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1903
1904 last1 = HOPc(s, -back_min);
1905 s = t;
1906 }
1aa99e6b 1907 if (do_utf8) {
6eb5f6b9
JH
1908 while (s <= last1) {
1909 if (regtry(prog, s))
1910 goto got_it;
1911 s += UTF8SKIP(s);
1912 }
1913 }
1914 else {
1915 while (s <= last1) {
1916 if (regtry(prog, s))
1917 goto got_it;
1918 s++;
1919 }
1920 }
1921 }
a3621e74 1922 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1923 PerlIO_printf(Perl_debug_log,
a0288114 1924 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1925 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1926 ? "anchored" : "floating"),
1927 PL_colors[0],
1928 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1929 SvPVX_const(must),
b7953727
JH
1930 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1931 );
6eb5f6b9
JH
1932 goto phooey;
1933 }
155aba94 1934 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1935 if (minlen) {
1936 I32 op = (U8)OP(prog->regstclass);
66e933ab 1937 /* don't bother with what can't match */
f14c76ed
RGS
1938 if (PL_regkind[op] != EXACT && op != CANY)
1939 strend = HOPc(strend, -(minlen - 1));
1940 }
a3621e74 1941 DEBUG_EXECUTE_r({
ffc61ed2 1942 SV *prop = sv_newmortal();
cfd0369c
NC
1943 const char *s0;
1944 const char *s1;
9e55ce06
JH
1945 int len0;
1946 int len1;
1947
ffc61ed2 1948 regprop(prop, c);
9e55ce06 1949 s0 = UTF ?
3f7c398e 1950 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1951 UNI_DISPLAY_REGEX) :
cfd0369c 1952 SvPVX_const(prop);
9e55ce06
JH
1953 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1954 s1 = UTF ?
c728cb41 1955 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1956 len1 = UTF ? SvCUR(dsv1) : strend - s;
1957 PerlIO_printf(Perl_debug_log,
a0288114 1958 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1959 len0, len0, s0,
1960 len1, len1, s1);
ffc61ed2 1961 });
06b5626a 1962 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1963 goto got_it;
a3621e74 1964 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1965 }
1966 else {
1967 dontbother = 0;
a0714e2c 1968 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1969 /* Trim the end. */
d6a28714 1970 char *last;
33b8afdf
JH
1971 SV* float_real;
1972
1973 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1974 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1975 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1976
1977 if (flags & REXEC_SCREAM) {
33b8afdf 1978 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1979 end_shift, &scream_pos, 1); /* last one */
1980 if (!last)
ffc61ed2 1981 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1982 /* we may be pointing at the wrong string */
1983 else if (RX_MATCH_COPIED(prog))
3f7c398e 1984 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1985 }
d6a28714
JH
1986 else {
1987 STRLEN len;
cfd0369c 1988 const char * const little = SvPV_const(float_real, len);
d6a28714 1989
33b8afdf 1990 if (SvTAIL(float_real)) {
d6a28714
JH
1991 if (memEQ(strend - len + 1, little, len - 1))
1992 last = strend - len + 1;
7fba1cd6 1993 else if (!multiline)
9041c2e3 1994 last = memEQ(strend - len, little, len)
bd61b366 1995 ? strend - len : NULL;
b8c5462f 1996 else
d6a28714
JH
1997 goto find_last;
1998 } else {
1999 find_last:
9041c2e3 2000 if (len)
d6a28714 2001 last = rninstr(s, strend, little, little + len);
b8c5462f 2002 else
a0288114 2003 last = strend; /* matching "$" */
b8c5462f 2004 }
b8c5462f 2005 }
bf93d4cc 2006 if (last == NULL) {
a3621e74 2007 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 2008 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 2009 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2010 goto phooey; /* Should not happen! */
2011 }
d6a28714
JH
2012 dontbother = strend - last + prog->float_min_offset;
2013 }
2014 if (minlen && (dontbother < minlen))
2015 dontbother = minlen - 1;
2016 strend -= dontbother; /* this one's always in bytes! */
2017 /* We don't know much -- general case. */
1aa99e6b 2018 if (do_utf8) {
d6a28714
JH
2019 for (;;) {
2020 if (regtry(prog, s))
2021 goto got_it;
2022 if (s >= strend)
2023 break;
b8c5462f 2024 s += UTF8SKIP(s);
d6a28714
JH
2025 };
2026 }
2027 else {
2028 do {
2029 if (regtry(prog, s))
2030 goto got_it;
2031 } while (s++ < strend);
2032 }
2033 }
2034
2035 /* Failure. */
2036 goto phooey;
2037
2038got_it:
2039 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2040
2041 if (PL_reg_eval_set) {
2042 /* Preserve the current value of $^R */
2043 if (oreplsv != GvSV(PL_replgv))
2044 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2045 restored, the value remains
2046 the same. */
acfe0abc 2047 restore_pos(aTHX_ 0);
d6a28714
JH
2048 }
2049
2050 /* make sure $`, $&, $', and $digit will work later */
2051 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2052 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2053 if (flags & REXEC_COPY_STR) {
2054 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2055#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2056 if ((SvIsCOW(sv)
2057 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2058 if (DEBUG_C_TEST) {
2059 PerlIO_printf(Perl_debug_log,
2060 "Copy on write: regexp capture, type %d\n",
2061 (int) SvTYPE(sv));
2062 }
2063 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2064 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2065 assert (SvPOKp(prog->saved_copy));
2066 } else
2067#endif
2068 {
2069 RX_MATCH_COPIED_on(prog);
2070 s = savepvn(strbeg, i);
2071 prog->subbeg = s;
2072 }
d6a28714 2073 prog->sublen = i;
d6a28714
JH
2074 }
2075 else {
2076 prog->subbeg = strbeg;
2077 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2078 }
2079 }
9041c2e3 2080
d6a28714
JH
2081 return 1;
2082
2083phooey:
a3621e74 2084 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2085 PL_colors[4], PL_colors[5]));
d6a28714 2086 if (PL_reg_eval_set)
acfe0abc 2087 restore_pos(aTHX_ 0);
d6a28714
JH
2088 return 0;
2089}
2090
2091/*
2092 - regtry - try match at specific point
2093 */
2094STATIC I32 /* 0 failure, 1 success */
2095S_regtry(pTHX_ regexp *prog, char *startpos)
2096{
97aff369 2097 dVAR;
d6a28714
JH
2098 register I32 i;
2099 register I32 *sp;
2100 register I32 *ep;
2101 CHECKPOINT lastcp;
a3621e74 2102 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2103
02db2b7b
IZ
2104#ifdef DEBUGGING
2105 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2106#endif
d6a28714
JH
2107 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2108 MAGIC *mg;
2109
2110 PL_reg_eval_set = RS_init;
a3621e74 2111 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2112 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2113 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2114 ));
e8347627 2115 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2116 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2117 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2118 SAVETMPS;
2119 /* Apparently this is not needed, judging by wantarray. */
e8347627 2120 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2121 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2122
2123 if (PL_reg_sv) {
2124 /* Make $_ available to executed code. */
2125 if (PL_reg_sv != DEFSV) {
59f00321 2126 SAVE_DEFSV;
d6a28714 2127 DEFSV = PL_reg_sv;
b8c5462f 2128 }
d6a28714 2129
9041c2e3 2130 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2131 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2132 /* prepare for quick setting of pos */
d300d9fa
NC
2133#ifdef PERL_OLD_COPY_ON_WRITE
2134 if (SvIsCOW(sv))
2135 sv_force_normal_flags(sv, 0);
2136#endif
2137 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2138 &PL_vtbl_mglob, NULL, 0);
d6a28714 2139 mg->mg_len = -1;
b8c5462f 2140 }
d6a28714
JH
2141 PL_reg_magic = mg;
2142 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2143 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2144 }
09687e5a 2145 if (!PL_reg_curpm) {
a02a5408 2146 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2147#ifdef USE_ITHREADS
2148 {
2149 SV* repointer = newSViv(0);
577e12cc 2150 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2151 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2152 av_push(PL_regex_padav,repointer);
2153 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2154 PL_regex_pad = AvARRAY(PL_regex_padav);
2155 }
2156#endif
2157 }
aaa362c4 2158 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2159 PL_reg_oldcurpm = PL_curpm;
2160 PL_curpm = PL_reg_curpm;
2161 if (RX_MATCH_COPIED(prog)) {
2162 /* Here is a serious problem: we cannot rewrite subbeg,
2163 since it may be needed if this match fails. Thus
2164 $` inside (?{}) could fail... */
2165 PL_reg_oldsaved = prog->subbeg;
2166 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2167#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2168 PL_nrs = prog->saved_copy;
2169#endif
d6a28714
JH
2170 RX_MATCH_COPIED_off(prog);
2171 }
2172 else
bd61b366 2173 PL_reg_oldsaved = NULL;
d6a28714
JH
2174 prog->subbeg = PL_bostr;
2175 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2176 }
973dddac 2177 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2178 PL_reginput = startpos;
2179 PL_regstartp = prog->startp;
2180 PL_regendp = prog->endp;
2181 PL_reglastparen = &prog->lastparen;
a01268b5 2182 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2183 prog->lastparen = 0;
03994de8 2184 prog->lastcloseparen = 0;
d6a28714 2185 PL_regsize = 0;
a3621e74 2186 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2187 if (PL_reg_start_tmpl <= prog->nparens) {
2188 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2189 if(PL_reg_start_tmp)
2190 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2191 else
a02a5408 2192 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2193 }
2194
2195 /* XXXX What this code is doing here?!!! There should be no need
2196 to do this again and again, PL_reglastparen should take care of
3dd2943c 2197 this! --ilya*/
dafc8851
JH
2198
2199 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2200 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2201 * PL_reglastparen), is not needed at all by the test suite
2202 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2203 * enough, for building DynaLoader, or otherwise this
2204 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2205 * will happen. Meanwhile, this code *is* needed for the
2206 * above-mentioned test suite tests to succeed. The common theme
2207 * on those tests seems to be returning null fields from matches.
2208 * --jhi */
dafc8851 2209#if 1
d6a28714
JH
2210 sp = prog->startp;
2211 ep = prog->endp;
2212 if (prog->nparens) {
eb160463 2213 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2214 *++sp = -1;
2215 *++ep = -1;
2216 }
2217 }
dafc8851 2218#endif
02db2b7b 2219 REGCP_SET(lastcp);
d6a28714
JH
2220 if (regmatch(prog->program + 1)) {
2221 prog->endp[0] = PL_reginput - PL_bostr;
2222 return 1;
2223 }
02db2b7b 2224 REGCP_UNWIND(lastcp);
d6a28714
JH
2225 return 0;
2226}
2227
02db2b7b
IZ
2228#define RE_UNWIND_BRANCH 1
2229#define RE_UNWIND_BRANCHJ 2
2230
2231union re_unwind_t;
2232
2233typedef struct { /* XX: makes sense to enlarge it... */
2234 I32 type;
2235 I32 prev;
2236 CHECKPOINT lastcp;
2237} re_unwind_generic_t;
2238
2239typedef struct {
2240 I32 type;
2241 I32 prev;
2242 CHECKPOINT lastcp;
2243 I32 lastparen;
2244 regnode *next;
2245 char *locinput;
2246 I32 nextchr;
2247#ifdef DEBUGGING
2248 int regindent;
2249#endif
2250} re_unwind_branch_t;
2251
2252typedef union re_unwind_t {
2253 I32 type;
2254 re_unwind_generic_t generic;
2255 re_unwind_branch_t branch;
2256} re_unwind_t;
2257
8ba1375e
MJD
2258#define sayYES goto yes
2259#define sayNO goto no
e0f9d4a8 2260#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2261#define sayYES_FINAL goto yes_final
2262#define sayYES_LOUD goto yes_loud
2263#define sayNO_FINAL goto no_final
2264#define sayNO_SILENT goto do_no
2265#define saySAME(x) if (x) goto yes; else goto no
2266
3ab3c9b4
HS
2267#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2268#define POSCACHE_SEEN 1 /* we know what we're caching */
2269#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2270#define CACHEsayYES STMT_START { \
2271 if (cache_offset | cache_bit) { \
2272 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2273 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2274 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2275 /* cache records failure, but this is success */ \
2276 DEBUG_r( \
2277 PerlIO_printf(Perl_debug_log, \
2278 "%*s (remove success from failure cache)\n", \
2279 REPORT_CODE_OFF+PL_regindent*2, "") \
2280 ); \
2281 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2282 } \
2283 } \
2284 sayYES; \
2285} STMT_END
2286#define CACHEsayNO STMT_START { \
2287 if (cache_offset | cache_bit) { \
2288 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2289 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2290 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2291 /* cache records success, but this is failure */ \
2292 DEBUG_r( \
2293 PerlIO_printf(Perl_debug_log, \
2294 "%*s (remove failure from success cache)\n", \
2295 REPORT_CODE_OFF+PL_regindent*2, "") \
2296 ); \
2297 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2298 } \
2299 } \
2300 sayNO; \
2301} STMT_END
2302
a3621e74
YO
2303/* this is used to determine how far from the left messages like
2304 'failed...' are printed. Currently 29 makes these messages line
2305 up with the opcode they refer to. Earlier perls used 25 which
2306 left these messages outdented making reviewing a debug output
2307 quite difficult.
2308*/
2309#define REPORT_CODE_OFF 29
2310
2311
2312/* Make sure there is a test for this +1 options in re_tests */
2313#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2314
2315#define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START { \
2316 if ( trie->states[ state ].wordnum ) { \
2317 if ( !accepted ) { \
2318 ENTER; \
2319 SAVETMPS; \
2320 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ; \
561b68a9 2321 sv_accept_buff=newSV(bufflen * sizeof(reg_trie_accepted) - 1 );\
a3621e74
YO
2322 SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) ); \
2323 SvPOK_on( sv_accept_buff ); \
2324 sv_2mortal( sv_accept_buff ); \
2325 accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2326 } else { \
2327 if ( accepted >= bufflen ) { \
2328 bufflen *= 2; \
2329 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2330 bufflen * sizeof(reg_trie_accepted) ); \
2331 } \
2332 SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff ) \
2333 + sizeof( reg_trie_accepted ) ); \
2334 } \
2335 accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2336 accept_buff[ accepted ].endpos = uc; \
2337 ++accepted; \
2338 } } STMT_END
2339
2340#define TRIE_HANDLE_CHAR STMT_START { \
2341 if ( uvc < 256 ) { \
2342 charid = trie->charmap[ uvc ]; \
2343 } else { \
2344 charid = 0; \
2345 if( trie->widecharmap ) { \
2346 SV** svpp = (SV**)NULL; \
2347 svpp = hv_fetch( trie->widecharmap, (char*)&uvc, \
2348 sizeof( UV ), 0 ); \
2349 if ( svpp ) { \
2350 charid = (U16)SvIV( *svpp ); \
2351 } \
2352 } \
2353 } \
2354 if ( charid && \
cc601c31
YO
2355 ( base + charid > trie->uniquecharcount ) && \
2356 ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
a3621e74
YO
2357 trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2358 { \
2359 state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next; \
2360 } else { \
2361 state = 0; \
2362 } \
2363 uc += len; \
2364 } STMT_END
8ba1375e 2365
d6a28714
JH
2366/*
2367 - regmatch - main matching routine
2368 *
2369 * Conceptually the strategy is simple: check to see whether the current
2370 * node matches, call self recursively to see whether the rest matches,
2371 * and then act accordingly. In practice we make some effort to avoid
2372 * recursion, in particular by going through "ordinary" nodes (that don't
2373 * need to know whether the rest of the match failed) by a loop instead of
2374 * by recursion.
2375 */
2376/* [lwall] I've hoisted the register declarations to the outer block in order to
2377 * maybe save a little bit of pushing and popping on the stack. It also takes
2378 * advantage of machines that use a register save mask on subroutine entry.
2379 */
2380STATIC I32 /* 0 failure, 1 success */
2381S_regmatch(pTHX_ regnode *prog)
2382{
27da23d5 2383 dVAR;
d6a28714
JH
2384 register regnode *scan; /* Current node. */
2385 regnode *next; /* Next node. */
2386 regnode *inner; /* Next node in internal branch. */
2387 register I32 nextchr; /* renamed nextchr - nextchar colides with
2388 function of same name */
2389 register I32 n; /* no or next */
b7953727 2390 register I32 ln = 0; /* len or last */
bd61b366 2391 register char *s = NULL; /* operand or save */
d6a28714 2392 register char *locinput = PL_reginput;
b7953727 2393 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2394 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2395 I32 unwind = 0;
a3621e74
YO
2396
2397 /* used by the trie code */
cbbf8932
AL
2398 SV *sv_accept_buff = NULL; /* accepting states we have traversed */
2399 reg_trie_accepted *accept_buff = NULL; /* "" */
ab74612d
NC
2400 reg_trie_data *trie; /* what trie are we using right now */
2401 U32 accepted = 0; /* how many accepting states we have seen*/
a3621e74 2402
b7953727 2403#if 0
02db2b7b 2404 I32 firstcp = PL_savestack_ix;
b7953727 2405#endif
0d46e09a 2406 register const bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2407#ifdef DEBUGGING
6136c704
AL
2408 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
2409 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2410 SV * const dsv2 = PERL_DEBUG_PAD_ZERO(2);
a3621e74 2411
ab74612d 2412 SV *re_debug_flags = NULL;
2a782b5b 2413#endif
041457d9 2414 U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
02db2b7b 2415
a3621e74
YO
2416 GET_RE_DEBUG_FLAGS;
2417
d6a28714
JH
2418#ifdef DEBUGGING
2419 PL_regindent++;
2420#endif
2421
a3621e74 2422
d6a28714
JH
2423 /* Note that nextchr is a byte even in UTF */
2424 nextchr = UCHARAT(locinput);
2425 scan = prog;
2426 while (scan != NULL) {
8ba1375e 2427
a3621e74 2428 DEBUG_EXECUTE_r( {
6136c704 2429 SV * const prop = sv_newmortal();
1df70142
AL
2430 const int docolor = *PL_colors[0];
2431 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2432 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2433 /* The part of the string before starttry has one color
2434 (pref0_len chars), between starttry and current
2435 position another one (pref_len - pref0_len chars),
2436 after the current position the third one.
2437 We assume that pref0_len <= pref_len, otherwise we
2438 decrease pref0_len. */
9041c2e3 2439 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2440 ? (5 + taill) - l : locinput - PL_bostr;
2441 int pref0_len;
d6a28714 2442
df1ffd02 2443 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2444 pref_len++;
2445 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2446 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2447 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2448 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2449 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2450 l--;
d6a28714
JH
2451 if (pref0_len < 0)
2452 pref0_len = 0;
2453 if (pref0_len > pref_len)
2454 pref0_len = pref_len;
2455 regprop(prop, scan);
2a782b5b 2456 {
1df70142 2457 const char * const s0 =
f14c76ed 2458 do_utf8 && OP(scan) != CANY ?
2a782b5b 2459 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2460 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2461 locinput - pref_len;
1df70142
AL
2462 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2463 const char * const s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2464 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2465 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2466 locinput - pref_len + pref0_len;
1df70142
AL
2467 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2468 const char * const s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2469 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2470 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2471 locinput;
1df70142 2472 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2473 PerlIO_printf(Perl_debug_log,
2474 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2475 (IV)(locinput - PL_bostr),
2476 PL_colors[4],
2477 len0, s0,
2478 PL_colors[5],
2479 PL_colors[2],
2480 len1, s1,
2481 PL_colors[3],
2482 (docolor ? "" : "> <"),
2483 PL_colors[0],
2484 len2, s2,
2485 PL_colors[1],
2486 15 - l - pref_len + 1,
2487 "",
2488 (IV)(scan - PL_regprogram), PL_regindent*2, "",
3f7c398e 2489 SvPVX_const(prop));
2a782b5b
JH
2490 }
2491 });
d6a28714
JH
2492
2493 next = scan + NEXT_OFF(scan);
2494 if (next == scan)
2495 next = NULL;
2496
2497 switch (OP(scan)) {
2498 case BOL:
7fba1cd6 2499 if (locinput == PL_bostr)
d6a28714
JH
2500 {
2501 /* regtill = regbol; */
b8c5462f
JH
2502 break;
2503 }
d6a28714
JH
2504 sayNO;
2505 case MBOL:
12d33761
HS
2506 if (locinput == PL_bostr ||
2507 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2508 {
b8c5462f
JH
2509 break;
2510 }
d6a28714
JH
2511 sayNO;
2512 case SBOL:
c2a73568 2513 if (locinput == PL_bostr)
b8c5462f 2514 break;
d6a28714
JH
2515 sayNO;
2516 case GPOS:
2517 if (locinput == PL_reg_ganch)
2518 break;
2519 sayNO;
2520 case EOL:
d6a28714
JH
2521 goto seol;
2522 case MEOL:
d6a28714 2523 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2524 sayNO;
b8c5462f 2525 break;
d6a28714
JH
2526 case SEOL:
2527 seol:
2528 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2529 sayNO;
d6a28714 2530 if (PL_regeol - locinput > 1)
b8c5462f 2531 sayNO;
b8c5462f 2532 break;
d6a28714
JH
2533 case EOS:
2534 if (PL_regeol != locinput)
b8c5462f 2535 sayNO;
d6a28714 2536 break;
ffc61ed2 2537 case SANY:
d6a28714 2538 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2539 sayNO;
f33976b4
DB
2540 if (do_utf8) {
2541 locinput += PL_utf8skip[nextchr];
2542 if (locinput > PL_regeol)
2543 sayNO;
2544 nextchr = UCHARAT(locinput);
2545 }
2546 else
2547 nextchr = UCHARAT(++locinput);
2548 break;
2549 case CANY:
2550 if (!nextchr && locinput >= PL_regeol)
2551 sayNO;
b8c5462f 2552 nextchr = UCHARAT(++locinput);
a0d0e21e 2553 break;
ffc61ed2 2554 case REG_ANY:
1aa99e6b
IH
2555 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2556 sayNO;
2557 if (do_utf8) {
b8c5462f 2558 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2559 if (locinput > PL_regeol)
2560 sayNO;
a0ed51b3 2561 nextchr = UCHARAT(locinput);
a0ed51b3 2562 }
1aa99e6b
IH
2563 else
2564 nextchr = UCHARAT(++locinput);
a0ed51b3 2565 break;
a3621e74
YO
2566
2567
2568
2569 /*
2570 traverse the TRIE keeping track of all accepting states
2571 we transition through until we get to a failing node.
2572
2573 we use two slightly different pieces of code to handle
2574 the traversal depending on whether its case sensitive or
2575 not. we reuse the accept code however. (this should probably
2576 be turned into a macro.)
2577
2578 */
2579 case TRIEF:
2580 case TRIEFL:
2581 {
a3621e74
YO
2582 U8 *uc = ( U8* )locinput;
2583 U32 state = 1;
2584 U16 charid = 0;
2585 U32 base = 0;
2586 UV uvc = 0;
2587 STRLEN len = 0;
2588 STRLEN foldlen = 0;
a3621e74
YO
2589 U8 *uscan = (U8*)NULL;
2590 STRLEN bufflen=0;
2591 accepted = 0;
2592
2593 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2594
2595 while ( state && uc <= (U8*)PL_regeol ) {
2596
2597 TRIE_CHECK_STATE_IS_ACCEPTING;
2598
2599 base = trie->states[ state ].trans.base;
2600
2601 DEBUG_TRIE_EXECUTE_r(
2602 PerlIO_printf( Perl_debug_log,
e4584336 2603 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2604 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e4584336 2605 (UV)state, (UV)base, (UV)accepted );
a3621e74
YO
2606 );
2607
2608 if ( base ) {
2609
108bb1ad 2610 if ( do_utf8 ) {
a3621e74
YO
2611 if ( foldlen>0 ) {
2612 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2613 foldlen -= len;
2614 uscan += len;
2615 len=0;
2616 } else {
1df70142 2617 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2618 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2619 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2620 foldlen -= UNISKIP( uvc );
2621 uscan = foldbuf + UNISKIP( uvc );
2622 }
2623 } else {
e4584336 2624 uvc = (UV)*uc;
a3621e74
YO
2625 len = 1;
2626 }
2627
2628 TRIE_HANDLE_CHAR;
2629
2630 } else {
2631 state = 0;
2632 }
2633 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2634 PerlIO_printf( Perl_debug_log,
2635 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2636 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2637 );
2638 }
2639 if ( !accepted ) {
2640 sayNO;
2641 } else {
2642 goto TrieAccept;
2643 }
2644 }
2645 /* unreached codepoint: we jump into the middle of the next case
2646 from previous if blocks */
2647 case TRIE:
2648 {
a3621e74
YO
2649 U8 *uc = (U8*)locinput;
2650 U32 state = 1;
2651 U16 charid = 0;
2652 U32 base = 0;
2653 UV uvc = 0;
2654 STRLEN len = 0;
2655 STRLEN bufflen = 0;
2656 accepted = 0;
2657
2658 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2659
2660 while ( state && uc <= (U8*)PL_regeol ) {
2661
2662 TRIE_CHECK_STATE_IS_ACCEPTING;
2663
2664 base = trie->states[ state ].trans.base;
2665
2666 DEBUG_TRIE_EXECUTE_r(
2667 PerlIO_printf( Perl_debug_log,
e4584336 2668 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2669 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e4584336 2670 (UV)state, (UV)base, (UV)accepted );
a3621e74
YO
2671 );
2672
2673 if ( base ) {
2674
108bb1ad 2675 if ( do_utf8 ) {
a3621e74
YO
2676 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2677 } else {
2678 uvc = (U32)*uc;
2679 len = 1;
2680 }
2681
2682 TRIE_HANDLE_CHAR;
2683
2684 } else {
2685 state = 0;
2686 }
2687 DEBUG_TRIE_EXECUTE_r(
2688 PerlIO_printf( Perl_debug_log,
e4584336
RB
2689 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2690 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2691 );
2692 }
2693 if ( !accepted ) {
2694 sayNO;
2695 }
2696 }
2697
2698
2699 /*
2700 There was at least one accepting state that we
2701 transitioned through. Presumably the number of accepting
2702 states is going to be low, typically one or two. So we
2703 simply scan through to find the one with lowest wordnum.
2704 Once we find it, we swap the last state into its place
2705 and decrement the size. We then try to match the rest of
2706 the pattern at the point where the word ends, if we
2707 succeed then we end the loop, otherwise the loop
2708 eventually terminates once all of the accepting states
2709 have been tried.
2710 */
2711 TrieAccept:
2712 {
2713 int gotit = 0;
2714
2715 if ( accepted == 1 ) {
2716 DEBUG_EXECUTE_r({
2717 SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2718 PerlIO_printf( Perl_debug_log,
2719 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2720 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2721 accept_buff[ 0 ].wordnum,
cfd0369c 2722 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2723 PL_colors[5] );
2724 });
cc601c31 2725 PL_reginput = (char *)accept_buff[ 0 ].endpos;
a3621e74
YO
2726 /* in this case we free tmps/leave before we call regmatch
2727 as we wont be using accept_buff again. */
2728 FREETMPS;
2729 LEAVE;
2730 gotit = regmatch( scan + NEXT_OFF( scan ) );
2731 } else {
2732 DEBUG_EXECUTE_r(
e4584336
RB
2733 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
2734 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
a3621e74
YO
2735 PL_colors[5] );
2736 );
2737 while ( !gotit && accepted-- ) {
2738 U32 best = 0;
2739 U32 cur;
2740 for( cur = 1 ; cur <= accepted ; cur++ ) {
e4584336
RB
2741 DEBUG_TRIE_EXECUTE_r(
2742 PerlIO_printf( Perl_debug_log,
2743 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2744 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2745 (IV)best, accept_buff[ best ].wordnum, (IV)cur,
2746 accept_buff[ cur ].wordnum, PL_colors[5] );
2747 );
a3621e74
YO
2748
2749 if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2750 best = cur;
2751 }
2752 DEBUG_EXECUTE_r({
6136c704 2753 SV ** const tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2754 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2755 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2756 accept_buff[best].wordnum,
cfd0369c 2757 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2758 PL_colors[5] );
2759 });
2760 if ( best<accepted ) {
2761 reg_trie_accepted tmp = accept_buff[ best ];
2762 accept_buff[ best ] = accept_buff[ accepted ];
2763 accept_buff[ accepted ] = tmp;
2764 best = accepted;
2765 }
cc601c31 2766 PL_reginput = (char *)accept_buff[ best ].endpos;
a3621e74
YO
2767
2768 /*
2769 as far as I can tell we only need the SAVETMPS/FREETMPS
2770 for re's with EVAL in them but I'm leaving them in for
2771 all until I can be sure.
2772 */
2773 SAVETMPS;
2774 gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2775 FREETMPS;
2776 }
2777 FREETMPS;
2778 LEAVE;
2779 }
2780
2781 if ( gotit ) {
2782 sayYES;
2783 } else {
2784 sayNO;
2785 }
2786 }
2787 /* unreached codepoint */
d6a28714 2788 case EXACT:
cd439c50
IZ
2789 s = STRING(scan);
2790 ln = STR_LEN(scan);
eb160463 2791 if (do_utf8 != UTF) {
bc517b45 2792 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2793 char *l = locinput;
a3b680e6 2794 const char *e = s + ln;
a72c7584 2795
5ff6fc6d
JH
2796 if (do_utf8) {
2797 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2798 while (s < e) {
a3b680e6 2799 STRLEN ulen;
1aa99e6b 2800 if (l >= PL_regeol)
5ff6fc6d
JH
2801 sayNO;
2802 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2803 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2804 uniflags))
5ff6fc6d 2805 sayNO;
bc517b45 2806 l += ulen;
5ff6fc6d 2807 s ++;
1aa99e6b 2808 }
5ff6fc6d
JH
2809 }
2810 else {
2811 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2812 while (s < e) {
a3b680e6 2813 STRLEN ulen;
1aa99e6b
IH
2814 if (l >= PL_regeol)
2815 sayNO;
5ff6fc6d 2816 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2817 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2818 uniflags))
1aa99e6b 2819 sayNO;
bc517b45 2820 s += ulen;
a72c7584 2821 l ++;
1aa99e6b 2822 }
5ff6fc6d 2823 }
1aa99e6b
IH
2824 locinput = l;
2825 nextchr = UCHARAT(locinput);
2826 break;
2827 }
bc517b45 2828 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2829 /* Inline the first character, for speed. */
2830 if (UCHARAT(s) != nextchr)
2831 sayNO;
2832 if (PL_regeol - locinput < ln)
2833 sayNO;
2834 if (ln > 1 && memNE(s, locinput, ln))
2835 sayNO;
2836 locinput += ln;
2837 nextchr = UCHARAT(locinput);
2838 break;
2839 case EXACTFL:
b8c5462f
JH
2840 PL_reg_flags |= RF_tainted;
2841 /* FALL THROUGH */
d6a28714 2842 case EXACTF:
cd439c50
IZ
2843 s = STRING(scan);
2844 ln = STR_LEN(scan);
d6a28714 2845
d07ddd77
JH
2846 if (do_utf8 || UTF) {
2847 /* Either target or the pattern are utf8. */
d6a28714 2848 char *l = locinput;
d07ddd77 2849 char *e = PL_regeol;
bc517b45 2850
eb160463 2851 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2852 l, &e, 0, do_utf8)) {
5486206c
JH
2853 /* One more case for the sharp s:
2854 * pack("U0U*", 0xDF) =~ /ss/i,
2855 * the 0xC3 0x9F are the UTF-8
2856 * byte sequence for the U+00DF. */
2857 if (!(do_utf8 &&
2858 toLOWER(s[0]) == 's' &&
2859 ln >= 2 &&
2860 toLOWER(s[1]) == 's' &&
2861 (U8)l[0] == 0xC3 &&
2862 e - l >= 2 &&
2863 (U8)l[1] == 0x9F))
2864 sayNO;
2865 }
d07ddd77
JH
2866 locinput = e;
2867 nextchr = UCHARAT(locinput);
2868 break;
a0ed51b3 2869 }
d6a28714 2870
bc517b45
JH
2871 /* Neither the target and the pattern are utf8. */
2872
d6a28714
JH
2873 /* Inline the first character, for speed. */
2874 if (UCHARAT(s) != nextchr &&
2875 UCHARAT(s) != ((OP(scan) == EXACTF)
2876 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2877 sayNO;
d6a28714 2878 if (PL_regeol - locinput < ln)
b8c5462f 2879 sayNO;
d6a28714
JH
2880 if (ln > 1 && (OP(scan) == EXACTF
2881 ? ibcmp(s, locinput, ln)
2882 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2883 sayNO;
d6a28714
JH
2884 locinput += ln;
2885 nextchr = UCHARAT(locinput);
a0d0e21e 2886 break;
d6a28714 2887 case ANYOF:
ffc61ed2 2888 if (do_utf8) {
9e55ce06
JH
2889 STRLEN inclasslen = PL_regeol - locinput;
2890
ba7b4546 2891 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2892 sayNO_ANYOF;
ffc61ed2
JH
2893 if (locinput >= PL_regeol)
2894 sayNO;
0f0076b4 2895 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2896 nextchr = UCHARAT(locinput);
e0f9d4a8 2897 break;
ffc61ed2
JH
2898 }
2899 else {
2900 if (nextchr < 0)
2901 nextchr = UCHARAT(locinput);
7d3e948e 2902 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2903 sayNO_ANYOF;
ffc61ed2
JH
2904 if (!nextchr && locinput >= PL_regeol)
2905 sayNO;
2906 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2907 break;
2908 }
2909 no_anyof:
2910 /* If we might have the case of the German sharp s
2911 * in a casefolding Unicode character class. */
2912
ebc501f0
JH
2913 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2914 locinput += SHARP_S_SKIP;
e0f9d4a8 2915 nextchr = UCHARAT(locinput);
ffc61ed2 2916 }
e0f9d4a8
JH
2917 else
2918 sayNO;
b8c5462f 2919 break;
d6a28714 2920 case ALNUML:
b8c5462f
JH
2921 PL_reg_flags |= RF_tainted;
2922 /* FALL THROUGH */
d6a28714 2923 case ALNUM:
b8c5462f 2924 if (!nextchr)
4633a7c4 2925 sayNO;
ffc61ed2 2926 if (do_utf8) {
1a4fad37 2927 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2928 if (!(OP(scan) == ALNUM
3568d838 2929 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2930 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2931 {
2932 sayNO;
a0ed51b3 2933 }
b8c5462f 2934 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2935 nextchr = UCHARAT(locinput);
2936 break;
2937 }
ffc61ed2 2938 if (!(OP(scan) == ALNUM
d6a28714 2939 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2940 sayNO;
b8c5462f 2941 nextchr = UCHARAT(++locinput);
a0d0e21e 2942 break;
d6a28714 2943 case NALNUML:
b8c5462f
JH
2944 PL_reg_flags |= RF_tainted;
2945 /* FALL THROUGH */
d6a28714
JH
2946 case NALNUM:
2947 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2948 sayNO;
ffc61ed2 2949 if (do_utf8) {
1a4fad37 2950 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2951 if (OP(scan) == NALNUM
3568d838 2952 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2953 : isALNUM_LC_utf8((U8*)locinput))
2954 {
b8c5462f 2955 sayNO;
d6a28714 2956 }
b8c5462f
JH
2957 locinput += PL_utf8skip[nextchr];
2958 nextchr = UCHARAT(locinput);
2959 break;
2960 }
ffc61ed2 2961 if (OP(scan) == NALNUM
d6a28714 2962 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2963 sayNO;
76e3520e 2964 nextchr = UCHARAT(++locinput);
a0d0e21e 2965 break;
d6a28714
JH
2966 case BOUNDL:
2967 case NBOUNDL:
3280af22 2968 PL_reg_flags |= RF_tainted;
bbce6d69 2969 /* FALL THROUGH */
d6a28714
JH
2970 case BOUND:
2971 case NBOUND:
2972 /* was last char in word? */
ffc61ed2 2973 if (do_utf8) {
12d33761
HS
2974 if (locinput == PL_bostr)
2975 ln = '\n';
ffc61ed2 2976 else {
a3b680e6 2977 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2978
b4f7163a 2979 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2980 }
2981 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2982 ln = isALNUM_uni(ln);
1a4fad37 2983 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 2984 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2985 }
2986 else {
9041c2e3 2987 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2988 n = isALNUM_LC_utf8((U8*)locinput);
2989 }
a0ed51b3 2990 }
d6a28714 2991 else {
12d33761
HS
2992 ln = (locinput != PL_bostr) ?
2993 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2994 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2995 ln = isALNUM(ln);
2996 n = isALNUM(nextchr);
2997 }
2998 else {
2999 ln = isALNUM_LC(ln);
3000 n = isALNUM_LC(nextchr);
3001 }
d6a28714 3002 }
ffc61ed2
JH
3003 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3004 OP(scan) == BOUNDL))
3005 sayNO;
a0ed51b3 3006 break;
d6a28714 3007 case SPACEL:
3280af22 3008 PL_reg_flags |= RF_tainted;
bbce6d69 3009 /* FALL THROUGH */
d6a28714 3010 case SPACE:
9442cb0e 3011 if (!nextchr)
4633a7c4 3012 sayNO;
1aa99e6b 3013 if (do_utf8) {
fd400ab9 3014 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3015 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3016 if (!(OP(scan) == SPACE
3568d838 3017 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3018 : isSPACE_LC_utf8((U8*)locinput)))
3019 {
3020 sayNO;
3021 }
3022 locinput += PL_utf8skip[nextchr];
3023 nextchr = UCHARAT(locinput);
3024 break;
d6a28714 3025 }
ffc61ed2
JH
3026 if (!(OP(scan) == SPACE
3027 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3028 sayNO;
3029 nextchr = UCHARAT(++locinput);
3030 }
3031 else {
3032 if (!(OP(scan) == SPACE
3033 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3034 sayNO;
3035 nextchr = UCHARAT(++locinput);
a0ed51b3 3036 }
a0ed51b3 3037 break;
d6a28714 3038 case NSPACEL:
3280af22 3039 PL_reg_flags |= RF_tainted;
bbce6d69 3040 /* FALL THROUGH */
d6a28714 3041 case NSPACE:
9442cb0e 3042 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3043 sayNO;
1aa99e6b 3044 if (do_utf8) {
1a4fad37 3045 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3046 if (OP(scan) == NSPACE
3568d838 3047 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3048 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3049 {
3050 sayNO;
3051 }
3052 locinput += PL_utf8skip[nextchr];
3053 nextchr = UCHARAT(locinput);
3054 break;
a0ed51b3 3055 }
ffc61ed2 3056 if (OP(scan) == NSPACE
d6a28714 3057 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3058 sayNO;
76e3520e 3059 nextchr = UCHARAT(++locinput);
a0d0e21e 3060 break;
d6a28714 3061 case DIGITL:
a0ed51b3
LW
3062 PL_reg_flags |= RF_tainted;
3063 /* FALL THROUGH */
d6a28714 3064 case DIGIT:
9442cb0e 3065 if (!nextchr)
a0ed51b3 3066 sayNO;
1aa99e6b 3067 if (do_utf8) {
1a4fad37 3068 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3069 if (!(OP(scan) == DIGIT
3568d838 3070 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3071 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3072 {
a0ed51b3 3073 sayNO;
dfe13c55 3074 }
6f06b55f 3075 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3076 nextchr = UCHARAT(locinput);
3077 break;
3078 }
ffc61ed2 3079 if (!(OP(scan) == DIGIT
9442cb0e 3080 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3081 sayNO;
3082 nextchr = UCHARAT(++locinput);
3083 break;
d6a28714 3084 case NDIGITL:
b8c5462f
JH
3085 PL_reg_flags |= RF_tainted;
3086 /* FALL THROUGH */
d6a28714 3087 case NDIGIT:
9442cb0e 3088 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3089 sayNO;
1aa99e6b 3090 if (do_utf8) {
1a4fad37 3091 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3092 if (OP(scan) == NDIGIT
3568d838 3093 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3094 : isDIGIT_LC_utf8((U8*)locinput))
3095 {
a0ed51b3 3096 sayNO;
9442cb0e 3097 }
6f06b55f 3098 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3099 nextchr = UCHARAT(locinput);
3100 break;
3101 }
ffc61ed2 3102 if (OP(scan) == NDIGIT
9442cb0e 3103 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3104 sayNO;
3105 nextchr = UCHARAT(++locinput);
3106 break;
3107 case CLUMP:
b7c83a7e 3108 if (locinput >= PL_regeol)
a0ed51b3 3109 sayNO;
b7c83a7e 3110 if (do_utf8) {
1a4fad37 3111 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3112 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3113 sayNO;
3114 locinput += PL_utf8skip[nextchr];
3115 while (locinput < PL_regeol &&
3116 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3117 locinput += UTF8SKIP(locinput);
3118 if (locinput > PL_regeol)
3119 sayNO;
eb08e2da
JH
3120 }
3121 else
3122 locinput++;
a0ed51b3
LW
3123 nextchr = UCHARAT(locinput);
3124 break;
c8756f30 3125 case REFFL:
3280af22 3126 PL_reg_flags |= RF_tainted;
c8756f30 3127 /* FALL THROUGH */
c277df42 3128 case REF:
c8756f30 3129 case REFF:
c277df42 3130 n = ARG(scan); /* which paren pair */
cf93c79d 3131 ln = PL_regstartp[n];
2c2d71f5 3132 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 3133 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 3134 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3135 if (ln == PL_regendp[n])
a0d0e21e 3136 break;
a0ed51b3 3137
cf93c79d 3138 s = PL_bostr + ln;
1aa99e6b 3139 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3140 char *l = locinput;
a3b680e6 3141 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3142 /*
3143 * Note that we can't do the "other character" lookup trick as
3144 * in the 8-bit case (no pun intended) because in Unicode we
3145 * have to map both upper and title case to lower case.
3146 */
3147 if (OP(scan) == REFF) {
3148 while (s < e) {
a3b680e6
AL
3149 STRLEN ulen1, ulen2;
3150 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3151 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3152
a0ed51b3
LW
3153 if (l >= PL_regeol)
3154 sayNO;
a2a2844f
JH
3155 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3156 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3157 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3158 sayNO;
a2a2844f
JH
3159 s += ulen1;
3160 l += ulen2;
a0ed51b3
LW
3161 }
3162 }
3163 locinput = l;
3164 nextchr = UCHARAT(locinput);
3165 break;
3166 }
3167
a0d0e21e 3168 /* Inline the first character, for speed. */
76e3520e 3169 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3170 (OP(scan) == REF ||
3171 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3172 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3173 sayNO;
cf93c79d 3174 ln = PL_regendp[n] - ln;
3280af22 3175 if (locinput + ln > PL_regeol)
4633a7c4 3176 sayNO;
c8756f30
AK
3177 if (ln > 1 && (OP(scan) == REF
3178 ? memNE(s, locinput, ln)
3179 : (OP(scan) == REFF
3180 ? ibcmp(s, locinput, ln)
3181 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3182 sayNO;
a0d0e21e 3183 locinput += ln;
76e3520e 3184 nextchr = UCHARAT(locinput);
a0d0e21e
LW
3185 break;
3186
3187 case NOTHING:
c277df42 3188 case TAIL:
a0d0e21e
LW
3189 break;
3190 case BACK:
3191 break;
c277df42
IZ
3192 case EVAL:
3193 {
3194 dSP;
6136c704
AL
3195 OP_4tree * const oop = PL_op;
3196 COP * const ocurcop = PL_curcop;
f3548bdc 3197 PAD *old_comppad;
c277df42 3198 SV *ret;
6136c704 3199 struct regexp * const oreg = PL_reg_re;
9041c2e3 3200
c277df42 3201 n = ARG(scan);
533c011a 3202 PL_op = (OP_4tree*)PL_regdata->data[n];
a3621e74 3203 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 3204 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 3205 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3206
8e5e9ebe 3207 {
6136c704 3208 SV ** const before = SP;
8e5e9ebe
RGS
3209 CALLRUNOPS(aTHX); /* Scalar context. */
3210 SPAGAIN;
3211 if (SP == before)
075aa684 3212 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3213 else {
3214 ret = POPs;
3215 PUTBACK;
3216 }
3217 }
3218
0f5d15d6 3219 PL_op = oop;
f3548bdc 3220 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 3221 PL_curcop = ocurcop;
c277df42 3222 if (logical) {
0f5d15d6
IZ
3223 if (logical == 2) { /* Postponed subexpression. */
3224 regexp *re;
6136c704 3225 MAGIC *mg = NULL;
0f5d15d6 3226 re_cc_state state;
0f5d15d6 3227 CHECKPOINT cp, lastcp;
cb50f42d 3228 int toggleutf;
faf82a0b 3229 register SV *sv;
0f5d15d6 3230
faf82a0b
AE
3231 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3232 mg = mg_find(sv, PERL_MAGIC_qr);
3233 else if (SvSMAGICAL(ret)) {
3234 if (SvGMAGICAL(ret))
3235 sv_unmagic(ret, PERL_MAGIC_qr);
3236 else
3237 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3238 }
faf82a0b 3239
0f5d15d6
IZ
3240 if (mg) {
3241 re = (regexp *)mg->mg_obj;
df0003d4 3242 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3243 }
3244 else {
3245 STRLEN len;
6136c704 3246 const char * const t = SvPV_const(ret, len);
0f5d15d6 3247 PMOP pm;
a3b680e6
AL
3248 char * const oprecomp = PL_regprecomp;
3249 const I32 osize = PL_regsize;
3250 const I32 onpar = PL_regnpar;
0f5d15d6 3251
5fcd1c1b 3252 Zero(&pm, 1, PMOP);
cb50f42d 3253 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3254 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3255 if (!(SvFLAGS(ret)
faf82a0b
AE
3256 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3257 | SVs_GMG)))
14befaf4
DM
3258 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3259 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
3260 PL_regprecomp = oprecomp;
3261 PL_regsize = osize;
3262 PL_regnpar = onpar;
3263 }
a3621e74 3264 DEBUG_EXECUTE_r(
9041c2e3 3265 PerlIO_printf(Perl_debug_log,
a0288114 3266 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
3267 PL_colors[0],
3268 re->precomp,
3269 PL_colors[1],
3270 (strlen(re->precomp) > 60 ? "..." : ""))
3271 );
3272 state.node = next;
3273 state.prev = PL_reg_call_cc;
3274 state.cc = PL_regcc;
3275 state.re = PL_reg_re;
3276
2ab05381 3277 PL_regcc = 0;
9041c2e3 3278
0f5d15d6 3279 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3280 REGCP_SET(lastcp);
0f5d15d6
IZ
3281 cache_re(re);
3282 state.ss = PL_savestack_ix;
3283 *PL_reglastparen = 0;
a01268b5 3284 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
3285 PL_reg_call_cc = &state;
3286 PL_reginput = locinput;
cb50f42d
YST
3287 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3288 ((re->reganch & ROPT_UTF8) != 0);
3289 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3290
3291 /* XXXX This is too dramatic a measure... */
3292 PL_reg_maxiter = 0;
3293
0f5d15d6 3294 if (regmatch(re->program + 1)) {
2c914db6
IZ
3295 /* Even though we succeeded, we need to restore
3296 global variables, since we may be wrapped inside
3297 SUSPEND, thus the match may be not finished yet. */
3298
3299 /* XXXX Do this only if SUSPENDed? */
3300 PL_reg_call_cc = state.prev;
3301 PL_regcc = state.cc;
3302 PL_reg_re = state.re;
3303 cache_re(PL_reg_re);
cb50f42d 3304 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
3305
3306 /* XXXX This is too dramatic a measure... */
3307 PL_reg_maxiter = 0;
3308
3309 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3310 ReREFCNT_dec(re);
3311 regcpblow(cp);
3312 sayYES;
3313 }
0f5d15d6 3314 ReREFCNT_dec(re);
02db2b7b 3315 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3316 regcppop();
3317 PL_reg_call_cc = state.prev;
3318 PL_regcc = state.cc;
3319 PL_reg_re = state.re;
d3790889 3320 cache_re(PL_reg_re);
cb50f42d 3321 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3322
3323 /* XXXX This is too dramatic a measure... */
3324 PL_reg_maxiter = 0;
3325
8e514ae6 3326 logical = 0;
0f5d15d6
IZ
3327 sayNO;
3328 }
c277df42 3329 sw = SvTRUE(ret);
0f5d15d6 3330 logical = 0;
a0ed51b3 3331 }
080c2dec 3332 else {
3280af22 3333 sv_setsv(save_scalar(PL_replgv), ret);
080c2dec
AE
3334 cache_re(oreg);
3335 }
c277df42
IZ
3336 break;
3337 }
a0d0e21e 3338 case OPEN:
c277df42 3339 n = ARG(scan); /* which paren pair */
3280af22
NIS
3340 PL_reg_start_tmp[n] = locinput;
3341 if (n > PL_regsize)
3342 PL_regsize = n;
a0d0e21e
LW
3343 break;
3344 case CLOSE:
c277df42 3345 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3346 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3347 PL_regendp[n] = locinput - PL_bostr;
eb160463 3348 if (n > (I32)*PL_reglastparen)
3280af22 3349 *PL_reglastparen = n;
a01268b5 3350 *PL_reglastcloseparen = n;
a0d0e21e 3351 break;
c277df42
IZ
3352 case GROUPP:
3353 n = ARG(scan); /* which paren pair */
eb160463 3354 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3355 break;
3356 case IFTHEN:
2c2d71f5 3357 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3358 if (sw)
3359 next = NEXTOPER(NEXTOPER(scan));
3360 else {
3361 next = scan + ARG(scan);
3362 if (OP(next) == IFTHEN) /* Fake one. */
3363 next = NEXTOPER(NEXTOPER(next));
3364 }
3365 break;
3366 case LOGICAL:
0f5d15d6 3367 logical = scan->flags;
c277df42 3368 break;
2ab05381
IZ
3369/*******************************************************************
3370 PL_regcc contains infoblock about the innermost (...)* loop, and
3371 a pointer to the next outer infoblock.
3372
3373 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3374
3375 1) After matching X, regnode for CURLYX is processed;
3376
9041c2e3 3377 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
3378 regmatch() recursively with the starting point at WHILEM node;
3379
3380 3) Each hit of WHILEM node tries to match A and Z (in the order
3381 depending on the current iteration, min/max of {min,max} and
3382 greediness). The information about where are nodes for "A"
3383 and "Z" is read from the infoblock, as is info on how many times "A"
3384 was already matched, and greediness.
3385
3386 4) After A matches, the same WHILEM node is hit again.
3387
3388 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3389 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3390 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3391 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3392 of the external loop.
3393
3394 Currently present infoblocks form a tree with a stem formed by PL_curcc
3395 and whatever it mentions via ->next, and additional attached trees
3396 corresponding to temporarily unset infoblocks as in "5" above.
3397
9041c2e3 3398 In the following picture infoblocks for outer loop of
2ab05381
IZ
3399 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3400 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3401 infoblocks are drawn below the "reset" infoblock.
3402
3403 In fact in the picture below we do not show failed matches for Z and T
3404 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3405 more obvious *why* one needs to *temporary* unset infoblocks.]
3406
3407 Matched REx position InfoBlocks Comment
3408 (Y(A)*?Z)*?T x
3409 Y(A)*?Z)*?T x <- O
3410 Y (A)*?Z)*?T x <- O
3411 Y A)*?Z)*?T x <- O <- I
3412 YA )*?Z)*?T x <- O <- I
3413 YA A)*?Z)*?T x <- O <- I
3414 YAA )*?Z)*?T x <- O <- I
3415 YAA Z)*?T x <- O # Temporary unset I
3416 I
3417
3418 YAAZ Y(A)*?Z)*?T x <- O
3419 I
3420
3421 YAAZY (A)*?Z)*?T x <- O
3422 I
3423
3424 YAAZY A)*?Z)*?T x <- O <- I
3425 I
3426
3427 YAAZYA )*?Z)*?T x <- O <- I
3428 I
3429
3430 YAAZYA Z)*?T x <- O # Temporary unset I
3431 I,I
3432
3433 YAAZYAZ )*?T x <- O
3434 I,I
3435
3436 YAAZYAZ T x # Temporary unset O
3437 O
3438 I,I
3439
3440 YAAZYAZT x
3441 O
3442 I,I
3443 *******************************************************************/
a0d0e21e
LW
3444 case CURLYX: {
3445 CURCUR cc;
3280af22 3446 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3447 /* No need to save/restore up to this paren */
3448 I32 parenfloor = scan->flags;
c277df42
IZ
3449
3450 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3451 next += ARG(next);
3280af22
NIS
3452 cc.oldcc = PL_regcc;
3453 PL_regcc = &cc;
cb434fcc
IZ
3454 /* XXXX Probably it is better to teach regpush to support
3455 parenfloor > PL_regsize... */
eb160463 3456 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3457 parenfloor = *PL_reglastparen; /* Pessimization... */
3458 cc.parenfloor = parenfloor;
a0d0e21e
LW
3459 cc.cur = -1;
3460 cc.min = ARG1(scan);
3461 cc.max = ARG2(scan);
c277df42 3462 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3463 cc.next = next;
3464 cc.minmod = minmod;
3465 cc.lastloc = 0;
3280af22 3466 PL_reginput = locinput;
a0d0e21e
LW
3467 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3468 regcpblow(cp);
3280af22 3469 PL_regcc = cc.oldcc;
4633a7c4 3470 saySAME(n);
a0d0e21e 3471 }
5f66b61c 3472 /* NOTREACHED */
a0d0e21e
LW
3473 case WHILEM: {
3474 /*
3475 * This is really hard to understand, because after we match
3476 * what we're trying to match, we must make sure the rest of
2c2d71f5 3477 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3478 * to go back UP the parse tree by recursing ever deeper. And
3479 * if it fails, we have to reset our parent's current state
3480 * that we can try again after backing off.
3481 */
3482
c277df42 3483 CHECKPOINT cp, lastcp;
3280af22 3484 CURCUR* cc = PL_regcc;
6136c704 3485 char * const lastloc = cc->lastloc; /* Detection of 0-len. */
3ab3c9b4 3486 I32 cache_offset = 0, cache_bit = 0;
c277df42 3487
4633a7c4 3488 n = cc->cur + 1; /* how many we know we matched */
3280af22 3489 PL_reginput = locinput;
a0d0e21e 3490
a3621e74 3491 DEBUG_EXECUTE_r(
9041c2e3 3492 PerlIO_printf(Perl_debug_log,
91f3b821 3493 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3494 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3495 (long)n, (long)cc->min,
2797576d 3496 (long)cc->max, PTR2UV(cc))
c277df42 3497 );
4633a7c4 3498
a0d0e21e
LW
3499 /* If degenerate scan matches "", assume scan done. */
3500
579cf2c3 3501 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3502 PL_regcc = cc->oldcc;
2ab05381
IZ
3503 if (PL_regcc)
3504 ln = PL_regcc->cur;
a3621e74 3505 DEBUG_EXECUTE_r(
c3464db5
DD
3506 PerlIO_printf(Perl_debug_log,
3507 "%*s empty match detected, try continuation...\n",
3280af22 3508 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3509 );
a0d0e21e 3510 if (regmatch(cc->next))
4633a7c4 3511 sayYES;
2ab05381
IZ
3512 if (PL_regcc)
3513 PL_regcc->cur = ln;
3280af22 3514 PL_regcc = cc;
4633a7c4 3515 sayNO;
a0d0e21e
LW
3516 }
3517
3518 /* First just match a string of min scans. */
3519
3520 if (n < cc->min) {
3521 cc->cur = n;
3522 cc->lastloc = locinput;
4633a7c4
LW
3523 if (regmatch(cc->scan))
3524 sayYES;
3525 cc->cur = n - 1;
c277df42 3526 cc->lastloc = lastloc;
4633a7c4 3527 sayNO;
a0d0e21e
LW
3528 }
3529
2c2d71f5
JH
3530 if (scan->flags) {
3531 /* Check whether we already were at this position.
3532 Postpone detection until we know the match is not
3533 *that* much linear. */
3534 if (!PL_reg_maxiter) {
3535 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3536 PL_reg_leftiter = PL_reg_maxiter;
3537 }
3538 if (PL_reg_leftiter-- == 0) {
a3b680e6 3539 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3540 if (PL_reg_poscache) {
eb160463 3541 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3542 Renew(PL_reg_poscache, size, char);
3543 PL_reg_poscache_size = size;
3544 }
3545 Zero(PL_reg_poscache, size, char);
3546 }
3547 else {
3548 PL_reg_poscache_size = size;
a02a5408 3549 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3550 }
a3621e74 3551 DEBUG_EXECUTE_r(
2c2d71f5
JH
3552 PerlIO_printf(Perl_debug_log,
3553 "%sDetected a super-linear match, switching on caching%s...\n",
3554 PL_colors[4], PL_colors[5])
3555 );
3556 }
3557 if (PL_reg_leftiter < 0) {
3ab3c9b4 3558 cache_offset = locinput - PL_bostr;
2c2d71f5 3559
3ab3c9b4
HS
3560 cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3561 + cache_offset * (scan->flags>>4);
3562 cache_bit = cache_offset % 8;
3563 cache_offset /= 8;
3564 if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
a3621e74 3565 DEBUG_EXECUTE_r(
2c2d71f5
JH
3566 PerlIO_printf(Perl_debug_log,
3567 "%*s already tried at this position...\n",
3568 REPORT_CODE_OFF+PL_regindent*2, "")
3569 );
3ab3c9b4
HS
3570 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3571 /* cache records success */
c2b0868c
HS
3572 sayYES;
3573 else
3ab3c9b4 3574 /* cache records failure */
c2b0868c 3575 sayNO_SILENT;
2c2d71f5 3576 }
3ab3c9b4 3577 PL_reg_poscache[cache_offset] |= (1<<cache_bit);
2c2d71f5
JH
3578 }
3579 }
3580
a0d0e21e
LW
3581 /* Prefer next over scan for minimal matching. */
3582
3583 if (cc->minmod) {
3280af22 3584 PL_regcc = cc->oldcc;
2ab05381
IZ
3585 if (PL_regcc)
3586 ln = PL_regcc->cur;
5f05dabc 3587 cp = regcppush(cc->parenfloor);
02db2b7b 3588 REGCP_SET(lastcp);
5f05dabc 3589 if (regmatch(cc->next)) {
c277df42 3590 regcpblow(cp);
3ab3c9b4 3591 CACHEsayYES; /* All done. */
5f05dabc 3592 }
02db2b7b 3593 REGCP_UNWIND(lastcp);
5f05dabc 3594 regcppop();
2ab05381
IZ
3595 if (PL_regcc)
3596 PL_regcc->cur = ln;
3280af22 3597 PL_regcc = cc;
a0d0e21e 3598
c277df42 3599 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3600 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3601 && !(PL_reg_flags & RF_warned)) {
3602 PL_reg_flags |= RF_warned;
9014280d 3603 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3604 "Complex regular subexpression recursion",
3605 REG_INFTY - 1);
c277df42 3606 }
3ab3c9b4 3607 CACHEsayNO;
c277df42 3608 }
a687059c 3609
a3621e74 3610 DEBUG_EXECUTE_r(
c3464db5
DD
3611 PerlIO_printf(Perl_debug_log,
3612 "%*s trying longer...\n",
3280af22 3613 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3614 );
a0d0e21e 3615 /* Try scanning more and see if it helps. */
3280af22 3616 PL_reginput = locinput;
a0d0e21e
LW
3617 cc->cur = n;
3618 cc->lastloc = locinput;
5f05dabc 3619 cp = regcppush(cc->parenfloor);
02db2b7b 3620 REGCP_SET(lastcp);
5f05dabc 3621 if (regmatch(cc->scan)) {
c277df42 3622 regcpblow(cp);
3ab3c9b4 3623 CACHEsayYES;
5f05dabc 3624 }
02db2b7b 3625 REGCP_UNWIND(lastcp);
5f05dabc 3626 regcppop();
4633a7c4 3627 cc->cur = n - 1;
c277df42 3628 cc->lastloc = lastloc;
3ab3c9b4 3629 CACHEsayNO;
a0d0e21e
LW
3630 }
3631
3632 /* Prefer scan over next for maximal matching. */
3633
3634 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3635 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3636 cc->cur = n;
3637 cc->lastloc = locinput;
02db2b7b 3638 REGCP_SET(lastcp);
5f05dabc 3639 if (regmatch(cc->scan)) {
c277df42 3640 regcpblow(cp);
3ab3c9b4 3641 CACHEsayYES;
5f05dabc 3642 }
02db2b7b 3643 REGCP_UNWIND(lastcp);
a0d0e21e 3644 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3645 PL_reginput = locinput;
a3621e74 3646 DEBUG_EXECUTE_r(
c3464db5
DD
3647 PerlIO_printf(Perl_debug_log,
3648 "%*s failed, try continuation...\n",
3280af22 3649 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3650 );
3651 }
9041c2e3 3652 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3653 && !(PL_reg_flags & RF_warned)) {
3280af22 3654 PL_reg_flags |= RF_warned;
9014280d 3655 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3656 "Complex regular subexpression recursion",
3657 REG_INFTY - 1);
a0d0e21e
LW
3658 }
3659
3660 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3661 PL_regcc = cc->oldcc;
2ab05381
IZ
3662 if (PL_regcc)
3663 ln = PL_regcc->cur;
a0d0e21e 3664 if (regmatch(cc->next))
3ab3c9b4 3665 CACHEsayYES;
2ab05381
IZ
3666 if (PL_regcc)
3667 PL_regcc->cur = ln;
3280af22 3668 PL_regcc = cc;
4633a7c4 3669 cc->cur = n - 1;
c277df42 3670 cc->lastloc = lastloc;
3ab3c9b4 3671 CACHEsayNO;
a0d0e21e 3672 }
5f66b61c 3673 /* NOTREACHED */
9041c2e3 3674 case BRANCHJ:
c277df42
IZ
3675 next = scan + ARG(scan);
3676 if (next == scan)
3677 next = NULL;
3678 inner = NEXTOPER(NEXTOPER(scan));
3679 goto do_branch;
9041c2e3 3680 case BRANCH:
c277df42
IZ
3681 inner = NEXTOPER(scan);
3682 do_branch:
3683 {
c277df42
IZ
3684 c1 = OP(scan);
3685 if (OP(next) != c1) /* No choice. */
3686 next = inner; /* Avoid recursion. */
a0d0e21e 3687 else {
a3b680e6 3688 const I32 lastparen = *PL_reglastparen;
02db2b7b 3689 /* Put unwinding data on stack */
6136c704
AL
3690 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3691 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3692
02db2b7b
IZ
3693 uw->prev = unwind;
3694 unwind = unwind1;
3695 uw->type = ((c1 == BRANCH)
3696 ? RE_UNWIND_BRANCH
3697 : RE_UNWIND_BRANCHJ);
3698 uw->lastparen = lastparen;
3699 uw->next = next;
3700 uw->locinput = locinput;
3701 uw->nextchr = nextchr;
3702#ifdef DEBUGGING
3703 uw->regindent = ++PL_regindent;
3704#endif
c277df42 3705
02db2b7b
IZ
3706 REGCP_SET(uw->lastcp);
3707
3708 /* Now go into the first branch */
3709 next = inner;
a687059c 3710 }
a0d0e21e
LW
3711 }
3712 break;
3713 case MINMOD:
3714 minmod = 1;
3715 break;
c277df42
IZ
3716 case CURLYM:
3717 {
00db4c45 3718 I32 l = 0;
6407bf3b 3719 I32 matches = 0;
c277df42 3720 CHECKPOINT lastcp;
6407bf3b 3721 I32 maxwanted;
9041c2e3 3722
c277df42 3723 /* We suppose that the next guy does not need
0e788c72 3724 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3725 and has no parenths to influence future backrefs. */
3726 ln = ARG1(scan); /* min to match */
3727 n = ARG2(scan); /* max to match */
c277df42
IZ
3728 paren = scan->flags;
3729 if (paren) {
3280af22
NIS
3730 if (paren > PL_regsize)
3731 PL_regsize = paren;
eb160463 3732 if (paren > (I32)*PL_reglastparen)
3280af22 3733 *PL_reglastparen = paren;
c277df42 3734 }
dc45a647 3735 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3736 if (paren)
3737 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3738 PL_reginput = locinput;
6407bf3b
DM
3739 maxwanted = minmod ? ln : n;
3740 if (maxwanted) {
3741 while (PL_reginput < PL_regeol && matches < maxwanted) {
3742 if (!regmatch(scan))
3743 break;
3744 /* on first match, determine length, l */
3745 if (!matches++) {
3746 if (PL_reg_match_utf8) {
3747 char *s = locinput;
3748 while (s < PL_reginput) {
3749 l++;
3750 s += UTF8SKIP(s);
3751 }
3752 }
3753 else {
3754 l = PL_reginput - locinput;
3755 }
3756 if (l == 0) {
3757 matches = maxwanted;
3758 break;
3759 }
3760 }
3761 locinput = PL_reginput;
3762 }
3763 }
3764
3765 PL_reginput = locinput;
3766
c277df42
IZ
3767 if (minmod) {
3768 minmod = 0;
6407bf3b 3769 if (ln && matches < ln)
c277df42 3770 sayNO;
cca55fe3 3771 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3772 regnode *text_node = next;
3773
cca55fe3 3774 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3775
cca55fe3 3776 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3777 else {
cca55fe3 3778 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3779 c1 = c2 = -1000;
3780 goto assume_ok_MM;
cca55fe3
JP
3781 }
3782 else { c1 = (U8)*STRING(text_node); }
af5decee 3783 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3784 c2 = PL_fold[c1];
af5decee 3785 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3786 c2 = PL_fold_locale[c1];
3787 else
3788 c2 = c1;
3789 }
a0ed51b3
LW
3790 }
3791 else
c277df42 3792 c1 = c2 = -1000;
cca55fe3 3793 assume_ok_MM:
02db2b7b 3794 REGCP_SET(lastcp);
0e788c72 3795 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3796 /* If it could work, try it. */
3797 if (c1 == -1000 ||
3280af22
NIS
3798 UCHARAT(PL_reginput) == c1 ||
3799 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3800 {
3801 if (paren) {
f31a99c8 3802 if (ln) {
cf93c79d
IZ
3803 PL_regstartp[paren] =
3804 HOPc(PL_reginput, -l) - PL_bostr;
3805 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3806 }
3807 else
cf93c79d 3808 PL_regendp[paren] = -1;
c277df42
IZ
3809 }
3810 if (regmatch(next))
3811 sayYES;
02db2b7b 3812 REGCP_UNWIND(lastcp);
c277df42
IZ
3813 }
3814 /* Couldn't or didn't -- move forward. */
3280af22 3815 PL_reginput = locinput;
d83d022c 3816 if (regmatch(scan)) {
c277df42 3817 ln++;
3280af22 3818 locinput = PL_reginput;
c277df42
IZ
3819 }
3820 else
3821 sayNO;
3822 }
a0ed51b3
LW
3823 }
3824 else {
a3621e74 3825 DEBUG_EXECUTE_r(
5c0ca799 3826 PerlIO_printf(Perl_debug_log,
faccc32b 3827 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3828 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
6407bf3b 3829 (IV) matches, (IV)l)
c277df42 3830 );
6407bf3b 3831 if (matches >= ln) {
cca55fe3 3832 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3833 regnode *text_node = next;
3834
cca55fe3 3835 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3836
cca55fe3 3837 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3838 else {
cca55fe3 3839 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3840 c1 = c2 = -1000;
3841 goto assume_ok_REG;
cca55fe3
JP
3842 }
3843 else { c1 = (U8)*STRING(text_node); }
3844
af5decee 3845 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3846 c2 = PL_fold[c1];
af5decee 3847 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3848 c2 = PL_fold_locale[c1];
3849 else
3850 c2 = c1;
3851 }
a0ed51b3
LW
3852 }
3853 else
c277df42
IZ
3854 c1 = c2 = -1000;
3855 }
cca55fe3 3856 assume_ok_REG:
02db2b7b 3857 REGCP_SET(lastcp);
6407bf3b 3858 while (matches >= ln) {
c277df42
IZ
3859 /* If it could work, try it. */
3860 if (c1 == -1000 ||
3280af22
NIS
3861 UCHARAT(PL_reginput) == c1 ||
3862 UCHARAT(PL_reginput) == c2)
a0ed51b3 3863 {
a3621e74 3864 DEBUG_EXECUTE_r(
6407bf3b
DM
3865 PerlIO_printf(Perl_debug_log,
3866 "%*s trying tail with matches=%"IVdf"...\n",
3867 (int)(REPORT_CODE_OFF+PL_regindent*2),
3868 "", (IV)matches)
a0ed51b3
LW
3869 );
3870 if (paren) {
6407bf3b 3871 if (matches) {
cf93c79d
IZ
3872 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3873 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3874 }
a0ed51b3 3875 else
cf93c79d 3876 PL_regendp[paren] = -1;
c277df42 3877 }
a0ed51b3
LW
3878 if (regmatch(next))
3879 sayYES;
02db2b7b 3880 REGCP_UNWIND(lastcp);
a0ed51b3 3881 }
c277df42 3882 /* Couldn't or didn't -- back up. */
6407bf3b 3883 matches--;
dfe13c55 3884 locinput = HOPc(locinput, -l);
3280af22 3885 PL_reginput = locinput;
c277df42
IZ
3886 }
3887 }
3888 sayNO;
5f66b61c 3889 /* NOTREACHED */
c277df42
IZ
3890 break;
3891 }
3892 case CURLYN:
3893 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3894 if (paren > PL_regsize)
3895 PL_regsize = paren;
eb160463 3896 if (paren > (I32)*PL_reglastparen)
3280af22 3897 *PL_reglastparen = paren;
c277df42
IZ
3898 ln = ARG1(scan); /* min to match */
3899 n = ARG2(scan); /* max to match */
dc45a647 3900 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3901 goto repeat;
a0d0e21e 3902 case CURLY:
c277df42 3903 paren = 0;
a0d0e21e
LW
3904 ln = ARG1(scan); /* min to match */
3905 n = ARG2(scan); /* max to match */
dc45a647 3906 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3907 goto repeat;
3908 case STAR:
3909 ln = 0;
c277df42 3910 n = REG_INFTY;
a0d0e21e 3911 scan = NEXTOPER(scan);
c277df42 3912 paren = 0;
a0d0e21e
LW
3913 goto repeat;
3914 case PLUS:
c277df42
IZ
3915 ln = 1;
3916 n = REG_INFTY;
3917 scan = NEXTOPER(scan);
3918 paren = 0;
3919 repeat:
a0d0e21e
LW
3920 /*
3921 * Lookahead to avoid useless match attempts
3922 * when we know what character comes next.
3923 */
5f80c4cf
JP
3924
3925 /*
3926 * Used to only do .*x and .*?x, but now it allows
3927 * for )'s, ('s and (?{ ... })'s to be in the way
3928 * of the quantifier and the EXACT-like node. -- japhy
3929 */
3930
cca55fe3 3931 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3932 U8 *s;
3933 regnode *text_node = next;
3934
cca55fe3 3935 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3936
cca55fe3 3937 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3938 else {
cca55fe3 3939 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3940 c1 = c2 = -1000;
3941 goto assume_ok_easy;
cca55fe3
JP
3942 }
3943 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3944
3945 if (!UTF) {
3946 c2 = c1 = *s;
f65d3ee7 3947 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3948 c2 = PL_fold[c1];
f65d3ee7 3949 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3950 c2 = PL_fold_locale[c1];
1aa99e6b 3951 }
5f80c4cf 3952 else { /* UTF */
f65d3ee7 3953 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3954 STRLEN ulen1, ulen2;
89ebb4a3
JH
3955 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3956 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
3957
3958 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3959 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3960
89ebb4a3 3961 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 3962 uniflags);
89ebb4a3 3963 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 3964 uniflags);
5f80c4cf
JP
3965 }
3966 else {
89ebb4a3 3967 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 3968 uniflags);
5f80c4cf 3969 }
1aa99e6b
IH
3970 }
3971 }
bbce6d69 3972 }
a0d0e21e 3973 else
bbce6d69 3974 c1 = c2 = -1000;
cca55fe3 3975 assume_ok_easy:
3280af22 3976 PL_reginput = locinput;
a0d0e21e 3977 if (minmod) {
c277df42 3978 CHECKPOINT lastcp;
a0d0e21e
LW
3979 minmod = 0;
3980 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3981 sayNO;
a0ed51b3 3982 locinput = PL_reginput;
02db2b7b 3983 REGCP_SET(lastcp);
0fe9bf95 3984 if (c1 != -1000) {
1aa99e6b 3985 char *e; /* Should not check after this */
0fe9bf95 3986 char *old = locinput;
b2f2f093 3987 int count = 0;
0fe9bf95 3988
1aa99e6b 3989 if (n == REG_INFTY) {
0fe9bf95 3990 e = PL_regeol - 1;
1aa99e6b
IH
3991 if (do_utf8)
3992 while (UTF8_IS_CONTINUATION(*(U8*)e))
3993 e--;
3994 }
3995 else if (do_utf8) {
3996 int m = n - ln;
3997 for (e = locinput;
3998 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3999 e += UTF8SKIP(e);
4000 }
4001 else {
4002 e = locinput + n - ln;
4003 if (e >= PL_regeol)
4004 e = PL_regeol - 1;
4005 }
0fe9bf95
IZ
4006 while (1) {
4007 /* Find place 'next' could work */
1aa99e6b
IH
4008 if (!do_utf8) {
4009 if (c1 == c2) {
a8e8ab15
JH
4010 while (locinput <= e &&
4011 UCHARAT(locinput) != c1)
1aa99e6b
IH
4012 locinput++;
4013 } else {
9041c2e3 4014 while (locinput <= e
a8e8ab15
JH
4015 && UCHARAT(locinput) != c1
4016 && UCHARAT(locinput) != c2)
1aa99e6b
IH
4017 locinput++;
4018 }
4019 count = locinput - old;
4020 }
4021 else {
1aa99e6b 4022 if (c1 == c2) {
a3b680e6 4023 STRLEN len;
872c91ae
JH
4024 /* count initialised to
4025 * utf8_distance(old, locinput) */
b2f2f093 4026 while (locinput <= e &&
872c91ae 4027 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4028 UTF8_MAXBYTES, &len,
041457d9 4029 uniflags) != (UV)c1) {
1aa99e6b 4030 locinput += len;
b2f2f093
JH
4031 count++;
4032 }
1aa99e6b 4033 } else {
a3b680e6 4034 STRLEN len;
872c91ae
JH
4035 /* count initialised to
4036 * utf8_distance(old, locinput) */
b2f2f093 4037 while (locinput <= e) {
872c91ae 4038 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4039 UTF8_MAXBYTES, &len,
041457d9 4040 uniflags);
eb160463 4041 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 4042 break;
b2f2f093
JH
4043 locinput += len;
4044 count++;
1aa99e6b
IH
4045 }
4046 }
0fe9bf95 4047 }
9041c2e3 4048 if (locinput > e)
0fe9bf95
IZ
4049 sayNO;
4050 /* PL_reginput == old now */
4051 if (locinput != old) {
4052 ln = 1; /* Did some */
1aa99e6b 4053 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
4054 sayNO;
4055 }
4056 /* PL_reginput == locinput now */
29d1e993 4057 TRYPAREN(paren, ln, locinput);
0fe9bf95 4058 PL_reginput = locinput; /* Could be reset... */
02db2b7b 4059 REGCP_UNWIND(lastcp);
0fe9bf95 4060 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
4061 old = locinput;
4062 if (do_utf8)
4063 locinput += UTF8SKIP(locinput);
4064 else
4065 locinput++;
b2f2f093 4066 count = 1;
0fe9bf95
IZ
4067 }
4068 }
4069 else
c277df42 4070 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
4071 UV c;
4072 if (c1 != -1000) {
4073 if (do_utf8)
872c91ae 4074 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4075 UTF8_MAXBYTES, 0,
041457d9 4076 uniflags);
1aa99e6b 4077 else
9041c2e3 4078 c = UCHARAT(PL_reginput);
2390ecbc 4079 /* If it could work, try it. */
eb160463 4080 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 4081 {
ecc99935 4082 TRYPAREN(paren, ln, PL_reginput);
2390ecbc
PP
4083 REGCP_UNWIND(lastcp);
4084 }
1aa99e6b 4085 }
a0d0e21e 4086 /* If it could work, try it. */
2390ecbc 4087 else if (c1 == -1000)
bbce6d69 4088 {
ecc99935 4089 TRYPAREN(paren, ln, PL_reginput);
02db2b7b 4090 REGCP_UNWIND(lastcp);
bbce6d69 4091 }
c277df42 4092 /* Couldn't or didn't -- move forward. */
a0ed51b3 4093 PL_reginput = locinput;
a0d0e21e
LW
4094 if (regrepeat(scan, 1)) {
4095 ln++;
a0ed51b3
LW
4096 locinput = PL_reginput;
4097 }
4098 else
4633a7c4 4099 sayNO;
a0d0e21e
LW
4100 }
4101 }
4102 else {
c277df42 4103 CHECKPOINT lastcp;
a0d0e21e 4104 n = regrepeat(scan, n);
a0ed51b3 4105 locinput = PL_reginput;
22c35a8c 4106 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4107 (OP(next) != MEOL ||
15272685
HS
4108 OP(next) == SEOL || OP(next) == EOS))
4109 {
a0d0e21e 4110 ln = n; /* why back off? */
1aeab75a
GS
4111 /* ...because $ and \Z can match before *and* after
4112 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4113 We should back off by one in this case. */
4114 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4115 ln--;
4116 }
02db2b7b 4117 REGCP_SET(lastcp);
c277df42 4118 if (paren) {
8fa7f367 4119 UV c = 0;
c277df42 4120 while (n >= ln) {
1aa99e6b
IH
4121 if (c1 != -1000) {
4122 if (do_utf8)
872c91ae 4123 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4124 UTF8_MAXBYTES, 0,
041457d9 4125 uniflags);
1aa99e6b 4126 else
9041c2e3 4127 c = UCHARAT(PL_reginput);
1aa99e6b 4128 }
c277df42 4129 /* If it could work, try it. */
eb160463 4130 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4131 {
29d1e993 4132 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4133 REGCP_UNWIND(lastcp);
c277df42
IZ
4134 }
4135 /* Couldn't or didn't -- back up. */
4136 n--;
dfe13c55 4137 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 4138 }
a0ed51b3
LW
4139 }
4140 else {
8fa7f367 4141 UV c = 0;
c277df42 4142 while (n >= ln) {
1aa99e6b
IH
4143 if (c1 != -1000) {
4144 if (do_utf8)
872c91ae 4145 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4146 UTF8_MAXBYTES, 0,
041457d9 4147 uniflags);
1aa99e6b 4148 else
9041c2e3 4149 c = UCHARAT(PL_reginput);
1aa99e6b 4150 }
c277df42 4151 /* If it could work, try it. */
eb160463 4152 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4153 {
29d1e993 4154 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4155 REGCP_UNWIND(lastcp);
c277df42
IZ
4156 }
4157 /* Couldn't or didn't -- back up. */
4158 n--;
dfe13c55 4159 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4160 }
a0d0e21e
LW
4161 }
4162 }
4633a7c4 4163 sayNO;
c277df42 4164 break;
a0d0e21e 4165 case END:
0f5d15d6
IZ
4166 if (PL_reg_call_cc) {
4167 re_cc_state *cur_call_cc = PL_reg_call_cc;
4168 CURCUR *cctmp = PL_regcc;
4169 regexp *re = PL_reg_re;
6136c704
AL
4170 CHECKPOINT lastcp;
4171 I32 tmp;
4172
4173 /* Save *all* the positions. */
4174 const CHECKPOINT cp = regcppush(0);
02db2b7b 4175 REGCP_SET(lastcp);
6136c704
AL
4176
4177 /* Restore parens of the caller. */
4178 tmp = PL_savestack_ix;
4179 PL_savestack_ix = PL_reg_call_cc->ss;
4180 regcppop();
4181 PL_savestack_ix = tmp;
4182
4183 /* Make position available to the callcc. */
4184 PL_reginput = locinput;
4185
0f5d15d6
IZ
4186 cache_re(PL_reg_call_cc->re);
4187 PL_regcc = PL_reg_call_cc->cc;
4188 PL_reg_call_cc = PL_reg_call_cc->prev;
4189 if (regmatch(cur_call_cc->node)) {
4190 PL_reg_call_cc = cur_call_cc;
4191 regcpblow(cp);
4192 sayYES;
4193 }
02db2b7b 4194 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
4195 regcppop();
4196 PL_reg_call_cc = cur_call_cc;
4197 PL_regcc = cctmp;
4198 PL_reg_re = re;
4199 cache_re(re);
4200
a3621e74 4201 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4202 PerlIO_printf(Perl_debug_log,
4203 "%*s continuation failed...\n",
4204 REPORT_CODE_OFF+PL_regindent*2, "")
4205 );
7821416a 4206 sayNO_SILENT;
0f5d15d6 4207 }
7821416a 4208 if (locinput < PL_regtill) {
a3621e74 4209 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4210 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4211 PL_colors[4],
4212 (long)(locinput - PL_reg_starttry),
4213 (long)(PL_regtill - PL_reg_starttry),
4214 PL_colors[5]));
4215 sayNO_FINAL; /* Cannot match: too short. */
4216 }
4217 PL_reginput = locinput; /* put where regtry can find it */
4218 sayYES_FINAL; /* Success! */
7e5428c5 4219 case SUCCEED:
3280af22 4220 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4221 sayYES_LOUD; /* Success! */
c277df42
IZ
4222 case SUSPEND:
4223 n = 1;
9fe1d20c 4224 PL_reginput = locinput;
9041c2e3 4225 goto do_ifmatch;
a0d0e21e 4226 case UNLESSM:
c277df42 4227 n = 0;
a0ed51b3 4228 if (scan->flags) {
efb30f32
HS
4229 s = HOPBACKc(locinput, scan->flags);
4230 if (!s)
4231 goto say_yes;
4232 PL_reginput = s;
a0ed51b3
LW
4233 }
4234 else
4235 PL_reginput = locinput;
c277df42
IZ
4236 goto do_ifmatch;
4237 case IFMATCH:
4238 n = 1;
a0ed51b3 4239 if (scan->flags) {
efb30f32
HS
4240 s = HOPBACKc(locinput, scan->flags);
4241 if (!s)
4242 goto say_no;
4243 PL_reginput = s;
a0ed51b3
LW
4244 }
4245 else
4246 PL_reginput = locinput;
4247
c277df42 4248 do_ifmatch:
c277df42
IZ
4249 inner = NEXTOPER(NEXTOPER(scan));
4250 if (regmatch(inner) != n) {
4251 say_no:
4252 if (logical) {
4253 logical = 0;
4254 sw = 0;
4255 goto do_longjump;
a0ed51b3
LW
4256 }
4257 else
c277df42
IZ
4258 sayNO;
4259 }
4260 say_yes:
4261 if (logical) {
4262 logical = 0;
4263 sw = 1;
4264 }
fe44a5e8 4265 if (OP(scan) == SUSPEND) {
3280af22 4266 locinput = PL_reginput;
565764a8 4267 nextchr = UCHARAT(locinput);
fe44a5e8 4268 }
c277df42
IZ
4269 /* FALL THROUGH. */
4270 case LONGJMP:
4271 do_longjump:
4272 next = scan + ARG(scan);
4273 if (next == scan)
4274 next = NULL;
a0d0e21e
LW
4275 break;
4276 default:
b900a521 4277 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4278 PTR2UV(scan), OP(scan));
cea2e8a9 4279 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4280 }
02db2b7b 4281 reenter:
a0d0e21e
LW
4282 scan = next;
4283 }
a687059c 4284
a0d0e21e
LW
4285 /*
4286 * We get here only if there's trouble -- normally "case END" is
4287 * the terminating point.
4288 */
cea2e8a9 4289 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4290 /*NOTREACHED*/
4633a7c4
LW
4291 sayNO;
4292
7821416a 4293yes_loud:
a3621e74 4294 DEBUG_EXECUTE_r(
7821416a
IZ
4295 PerlIO_printf(Perl_debug_log,
4296 "%*s %scould match...%s\n",
e4584336 4297 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4298 );
4299 goto yes;
4300yes_final:
a3621e74 4301 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4302 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4303yes:
4304#ifdef DEBUGGING
3280af22 4305 PL_regindent--;
4633a7c4 4306#endif
02db2b7b
IZ
4307
4308#if 0 /* Breaks $^R */
4309 if (unwind)
4310 regcpblow(firstcp);
4311#endif
4633a7c4
LW
4312 return 1;
4313
4314no:
a3621e74 4315 DEBUG_EXECUTE_r(
7821416a
IZ
4316 PerlIO_printf(Perl_debug_log,
4317 "%*s %sfailed...%s\n",
e4584336 4318 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4319 );
4320 goto do_no;
4321no_final:
4322do_no:
02db2b7b 4323 if (unwind) {
6136c704 4324 re_unwind_t * const uw = SSPTRt(unwind,re_unwind_t);
02db2b7b
IZ
4325
4326 switch (uw->type) {
4327 case RE_UNWIND_BRANCH:
4328 case RE_UNWIND_BRANCHJ:
4329 {
6136c704 4330 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4331 const I32 lastparen = uwb->lastparen;
9041c2e3 4332
02db2b7b
IZ
4333 REGCP_UNWIND(uwb->lastcp);
4334 for (n = *PL_reglastparen; n > lastparen; n--)
4335 PL_regendp[n] = -1;
4336 *PL_reglastparen = n;
4337 scan = next = uwb->next;
9041c2e3
NIS
4338 if ( !scan ||
4339 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4340 ? BRANCH : BRANCHJ) ) { /* Failure */
4341 unwind = uwb->prev;
4342#ifdef DEBUGGING
4343 PL_regindent--;
4344#endif
4345 goto do_no;
4346 }
4347 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4348 if ((n = (uwb->type == RE_UNWIND_BRANCH
4349 ? NEXT_OFF(next) : ARG(next))))
4350 next += n;
4351 else
4352 next = NULL; /* XXXX Needn't unwinding in this case... */
4353 uwb->next = next;
4354 next = NEXTOPER(scan);
4355 if (uwb->type == RE_UNWIND_BRANCHJ)
4356 next = NEXTOPER(next);
4357 locinput = uwb->locinput;
4358 nextchr = uwb->nextchr;
4359#ifdef DEBUGGING
4360 PL_regindent = uwb->regindent;
4361#endif
4362
4363 goto reenter;
4364 }
5f66b61c 4365 /* NOTREACHED */
02db2b7b
IZ
4366 default:
4367 Perl_croak(aTHX_ "regexp unwind memory corruption");
4368 }
5f66b61c 4369 /* NOTREACHED */
02db2b7b 4370 }
4633a7c4 4371#ifdef DEBUGGING
3280af22 4372 PL_regindent--;
4633a7c4 4373#endif
a0d0e21e 4374 return 0;
a687059c
LW
4375}
4376
4377/*
4378 - regrepeat - repeatedly match something simple, report how many
4379 */
4380/*
4381 * [This routine now assumes that it will only match on things of length 1.
4382 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4383 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4384 */
76e3520e 4385STATIC I32
a3b680e6 4386S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4387{
27da23d5 4388 dVAR;
a0d0e21e 4389 register char *scan;
a0d0e21e 4390 register I32 c;
3280af22 4391 register char *loceol = PL_regeol;
a0ed51b3 4392 register I32 hardcount = 0;
53c4c00c 4393 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4394
3280af22 4395 scan = PL_reginput;
faf11cac
HS
4396 if (max == REG_INFTY)
4397 max = I32_MAX;
4398 else if (max < loceol - scan)
a0d0e21e 4399 loceol = scan + max;
a0d0e21e 4400 switch (OP(p)) {
22c35a8c 4401 case REG_ANY:
1aa99e6b 4402 if (do_utf8) {
ffc61ed2 4403 loceol = PL_regeol;
1aa99e6b 4404 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4405 scan += UTF8SKIP(scan);
4406 hardcount++;
4407 }
4408 } else {
4409 while (scan < loceol && *scan != '\n')
4410 scan++;
a0ed51b3
LW
4411 }
4412 break;
ffc61ed2 4413 case SANY:
def8e4ea
JH
4414 if (do_utf8) {
4415 loceol = PL_regeol;
a0804c9e 4416 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4417 scan += UTF8SKIP(scan);
4418 hardcount++;
4419 }
4420 }
4421 else
4422 scan = loceol;
a0ed51b3 4423 break;
f33976b4
DB
4424 case CANY:
4425 scan = loceol;
4426 break;
090f7165
JH
4427 case EXACT: /* length of string is 1 */
4428 c = (U8)*STRING(p);
4429 while (scan < loceol && UCHARAT(scan) == c)
4430 scan++;
bbce6d69 4431 break;
4432 case EXACTF: /* length of string is 1 */
cd439c50 4433 c = (U8)*STRING(p);
bbce6d69 4434 while (scan < loceol &&
22c35a8c 4435 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4436 scan++;
4437 break;
4438 case EXACTFL: /* length of string is 1 */
3280af22 4439 PL_reg_flags |= RF_tainted;
cd439c50 4440 c = (U8)*STRING(p);
bbce6d69 4441 while (scan < loceol &&
22c35a8c 4442 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4443 scan++;
4444 break;
4445 case ANYOF:
ffc61ed2
JH
4446 if (do_utf8) {
4447 loceol = PL_regeol;
cfc92286
JH
4448 while (hardcount < max && scan < loceol &&
4449 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4450 scan += UTF8SKIP(scan);
4451 hardcount++;
4452 }
4453 } else {
7d3e948e 4454 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4455 scan++;
4456 }
a0d0e21e
LW
4457 break;
4458 case ALNUM:
1aa99e6b 4459 if (do_utf8) {
ffc61ed2 4460 loceol = PL_regeol;
1a4fad37 4461 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4462 while (hardcount < max && scan < loceol &&
3568d838 4463 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4464 scan += UTF8SKIP(scan);
4465 hardcount++;
4466 }
4467 } else {
4468 while (scan < loceol && isALNUM(*scan))
4469 scan++;
a0ed51b3
LW
4470 }
4471 break;
bbce6d69 4472 case ALNUML:
3280af22 4473 PL_reg_flags |= RF_tainted;
1aa99e6b 4474 if (do_utf8) {
ffc61ed2 4475 loceol = PL_regeol;
1aa99e6b
IH
4476 while (hardcount < max && scan < loceol &&
4477 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4478 scan += UTF8SKIP(scan);
4479 hardcount++;
4480 }
4481 } else {
4482 while (scan < loceol && isALNUM_LC(*scan))
4483 scan++;
a0ed51b3
LW
4484 }
4485 break;
a0d0e21e 4486 case NALNUM:
1aa99e6b 4487 if (do_utf8) {
ffc61ed2 4488 loceol = PL_regeol;
1a4fad37 4489 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4490 while (hardcount < max && scan < loceol &&
3568d838 4491 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4492 scan += UTF8SKIP(scan);
4493 hardcount++;
4494 }
4495 } else {
4496 while (scan < loceol && !isALNUM(*scan))
4497 scan++;
a0ed51b3
LW
4498 }
4499 break;
bbce6d69 4500 case NALNUML:
3280af22 4501 PL_reg_flags |= RF_tainted;
1aa99e6b 4502 if (do_utf8) {
ffc61ed2 4503 loceol = PL_regeol;
1aa99e6b
IH
4504 while (hardcount < max && scan < loceol &&
4505 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4506 scan += UTF8SKIP(scan);
4507 hardcount++;
4508 }
4509 } else {
4510 while (scan < loceol && !isALNUM_LC(*scan))
4511 scan++;
a0ed51b3
LW
4512 }
4513 break;
a0d0e21e 4514 case SPACE:
1aa99e6b 4515 if (do_utf8) {
ffc61ed2 4516 loceol = PL_regeol;
1a4fad37 4517 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4518 while (hardcount < max && scan < loceol &&
3568d838
JH
4519 (*scan == ' ' ||
4520 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4521 scan += UTF8SKIP(scan);
4522 hardcount++;
4523 }
4524 } else {
4525 while (scan < loceol && isSPACE(*scan))
4526 scan++;
a0ed51b3
LW
4527 }
4528 break;
bbce6d69 4529 case SPACEL:
3280af22 4530 PL_reg_flags |= RF_tainted;
1aa99e6b 4531 if (do_utf8) {
ffc61ed2 4532 loceol = PL_regeol;
1aa99e6b 4533 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4534 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4535 scan += UTF8SKIP(scan);
4536 hardcount++;
4537 }
4538 } else {
4539 while (scan < loceol && isSPACE_LC(*scan))
4540 scan++;
a0ed51b3
LW
4541 }
4542 break;
a0d0e21e 4543 case NSPACE:
1aa99e6b 4544 if (do_utf8) {
ffc61ed2 4545 loceol = PL_regeol;
1a4fad37 4546 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4547 while (hardcount < max && scan < loceol &&
3568d838
JH
4548 !(*scan == ' ' ||
4549 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4550 scan += UTF8SKIP(scan);
4551 hardcount++;
4552 }
4553 } else {
4554 while (scan < loceol && !isSPACE(*scan))
4555 scan++;
4556 break;
a0ed51b3 4557 }
bbce6d69 4558 case NSPACEL:
3280af22 4559 PL_reg_flags |= RF_tainted;
1aa99e6b 4560 if (do_utf8) {
ffc61ed2 4561 loceol = PL_regeol;
1aa99e6b 4562 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4563 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4564 scan += UTF8SKIP(scan);
4565 hardcount++;
4566 }
4567 } else {
4568 while (scan < loceol && !isSPACE_LC(*scan))
4569 scan++;
a0ed51b3
LW
4570 }
4571 break;
a0d0e21e 4572 case DIGIT:
1aa99e6b 4573 if (do_utf8) {
ffc61ed2 4574 loceol = PL_regeol;
1a4fad37 4575 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4576 while (hardcount < max && scan < loceol &&
3568d838 4577 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4578 scan += UTF8SKIP(scan);
4579 hardcount++;
4580 }
4581 } else {
4582 while (scan < loceol && isDIGIT(*scan))
4583 scan++;
a0ed51b3
LW
4584 }
4585 break;
a0d0e21e 4586 case NDIGIT:
1aa99e6b 4587 if (do_utf8) {
ffc61ed2 4588 loceol = PL_regeol;
1a4fad37 4589 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4590 while (hardcount < max && scan < loceol &&
3568d838 4591 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4592 scan += UTF8SKIP(scan);
4593 hardcount++;
4594 }
4595 } else {
4596 while (scan < loceol && !isDIGIT(*scan))
4597 scan++;
a0ed51b3
LW
4598 }
4599 break;
a0d0e21e
LW
4600 default: /* Called on something of 0 width. */
4601 break; /* So match right here or not at all. */
4602 }
a687059c 4603
a0ed51b3
LW
4604 if (hardcount)
4605 c = hardcount;
4606 else
4607 c = scan - PL_reginput;
3280af22 4608 PL_reginput = scan;
a687059c 4609
a3621e74 4610 DEBUG_r({
ab74612d 4611 SV *re_debug_flags = NULL;
6136c704 4612 SV * const prop = sv_newmortal();
a3621e74
YO
4613 GET_RE_DEBUG_FLAGS;
4614 DEBUG_EXECUTE_r({
c277df42 4615 regprop(prop, p);
9041c2e3
NIS
4616 PerlIO_printf(Perl_debug_log,
4617 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4618 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4619 });
a3621e74 4620 });
9041c2e3 4621
a0d0e21e 4622 return(c);
a687059c
LW
4623}
4624
c277df42
IZ
4625
4626/*
ffc61ed2
JH
4627- regclass_swash - prepare the utf8 swash
4628*/
4629
4630SV *
a3b680e6 4631Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4632{
97aff369 4633 dVAR;
9e55ce06
JH
4634 SV *sw = NULL;
4635 SV *si = NULL;
4636 SV *alt = NULL;
ffc61ed2
JH
4637
4638 if (PL_regdata && PL_regdata->count) {
a3b680e6 4639 const U32 n = ARG(node);
ffc61ed2
JH
4640
4641 if (PL_regdata->what[n] == 's') {
890ce7af
AL
4642 SV * const rv = (SV*)PL_regdata->data[n];
4643 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4644 SV **const ary = AvARRAY(av);
9e55ce06 4645 SV **a, **b;
9041c2e3 4646
711a919c 4647 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4648 * documentation of these array elements. */
4649
b11f357e 4650 si = *ary;
8f7f7219 4651 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4652 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4653
ffc61ed2
JH
4654 if (a)
4655 sw = *a;
4656 else if (si && doinit) {
4657 sw = swash_init("utf8", "", si, 1, 0);
4658 (void)av_store(av, 1, sw);
4659 }
9e55ce06
JH
4660 if (b)
4661 alt = *b;
ffc61ed2
JH
4662 }
4663 }
4664
9e55ce06
JH
4665 if (listsvp)
4666 *listsvp = si;
4667 if (altsvp)
4668 *altsvp = alt;
ffc61ed2
JH
4669
4670 return sw;
4671}
4672
4673/*
ba7b4546 4674 - reginclass - determine if a character falls into a character class
832705d4
JH
4675
4676 The n is the ANYOF regnode, the p is the target string, lenp
4677 is pointer to the maximum length of how far to go in the p
4678 (if the lenp is zero, UTF8SKIP(p) is used),
4679 do_utf8 tells whether the target string is in UTF-8.
4680
bbce6d69 4681 */
4682
76e3520e 4683STATIC bool
a3b680e6 4684S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4685{
27da23d5 4686 dVAR;
a3b680e6 4687 const char flags = ANYOF_FLAGS(n);
bbce6d69 4688 bool match = FALSE;
cc07378b 4689 UV c = *p;
ae9ddab8 4690 STRLEN len = 0;
9e55ce06 4691 STRLEN plen;
1aa99e6b 4692
19f67299
TS
4693 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4694 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4695 ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4696 UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4697 if (len == (STRLEN)-1)
4698 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4699 }
bbce6d69 4700
0f0076b4 4701 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4702 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4703 if (lenp)
4704 *lenp = 0;
ffc61ed2 4705 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4706 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4707 match = TRUE;
bbce6d69 4708 }
3568d838 4709 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4710 match = TRUE;
ffc61ed2 4711 if (!match) {
9e55ce06 4712 AV *av;
890ce7af 4713 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4714
4715 if (sw) {
3568d838 4716 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4717 match = TRUE;
4718 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4719 if (!match && lenp && av) {
4720 I32 i;
9e55ce06 4721 for (i = 0; i <= av_len(av); i++) {
890ce7af 4722 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4723 STRLEN len;
890ce7af 4724 const char * const s = SvPV_const(sv, len);
9e55ce06 4725
061b10df 4726 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4727 *lenp = len;
4728 match = TRUE;
4729 break;
4730 }
4731 }
4732 }
4733 if (!match) {
89ebb4a3 4734 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4735 STRLEN tmplen;
4736
9e55ce06
JH
4737 to_utf8_fold(p, tmpbuf, &tmplen);
4738 if (swash_fetch(sw, tmpbuf, do_utf8))
4739 match = TRUE;
4740 }
ffc61ed2
JH
4741 }
4742 }
bbce6d69 4743 }
9e55ce06 4744 if (match && lenp && *lenp == 0)
0f0076b4 4745 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4746 }
1aa99e6b 4747 if (!match && c < 256) {
ffc61ed2
JH
4748 if (ANYOF_BITMAP_TEST(n, c))
4749 match = TRUE;
4750 else if (flags & ANYOF_FOLD) {
eb160463 4751 U8 f;
a0ed51b3 4752
ffc61ed2
JH
4753 if (flags & ANYOF_LOCALE) {
4754 PL_reg_flags |= RF_tainted;
4755 f = PL_fold_locale[c];
4756 }
4757 else
4758 f = PL_fold[c];
4759 if (f != c && ANYOF_BITMAP_TEST(n, f))
4760 match = TRUE;
4761 }
4762
4763 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4764 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4765 if (
4766 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4767 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4768 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4769 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4770 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4771 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4772 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4773 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4774 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4775 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4776 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4777 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4778 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4779 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4780 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4781 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4782 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4783 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4784 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4785 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4786 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4787 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4788 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4789 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4790 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4791 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4792 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4793 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4794 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4795 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4796 ) /* How's that for a conditional? */
4797 {
4798 match = TRUE;
4799 }
a0ed51b3 4800 }
a0ed51b3
LW
4801 }
4802
a0ed51b3
LW
4803 return (flags & ANYOF_INVERT) ? !match : match;
4804}
161b471a 4805
dfe13c55 4806STATIC U8 *
cea2e8a9 4807S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4808{
97aff369 4809 dVAR;
5f66b61c 4810 return S_reghop3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
1aa99e6b
IH
4811}
4812
4813STATIC U8 *
5f66b61c 4814S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 4815{
97aff369 4816 dVAR;
a0ed51b3 4817 if (off >= 0) {
1aa99e6b 4818 while (off-- && s < lim) {
ffc61ed2 4819 /* XXX could check well-formedness here */
a0ed51b3 4820 s += UTF8SKIP(s);
ffc61ed2 4821 }
a0ed51b3
LW
4822 }
4823 else {
4824 while (off++) {
1aa99e6b 4825 if (s > lim) {
a0ed51b3 4826 s--;
ffc61ed2 4827 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4828 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4829 s--;
ffc61ed2
JH
4830 }
4831 /* XXX could check well-formedness here */
a0ed51b3
LW
4832 }
4833 }
4834 }
4835 return s;
4836}
161b471a 4837
dfe13c55 4838STATIC U8 *
1aa99e6b 4839S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4840{
97aff369 4841 dVAR;
5f66b61c 4842 return S_reghopmaybe3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
1aa99e6b
IH
4843}
4844
4845STATIC U8 *
5f66b61c 4846S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 4847{
97aff369 4848 dVAR;
a0ed51b3 4849 if (off >= 0) {
1aa99e6b 4850 while (off-- && s < lim) {
ffc61ed2 4851 /* XXX could check well-formedness here */
a0ed51b3 4852 s += UTF8SKIP(s);
ffc61ed2 4853 }
a0ed51b3
LW
4854 if (off >= 0)
4855 return 0;
4856 }
4857 else {
4858 while (off++) {
1aa99e6b 4859 if (s > lim) {
a0ed51b3 4860 s--;
ffc61ed2 4861 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4862 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4863 s--;
ffc61ed2
JH
4864 }
4865 /* XXX could check well-formedness here */
a0ed51b3
LW
4866 }
4867 else
4868 break;
4869 }
4870 if (off <= 0)
4871 return 0;
4872 }
4873 return s;
4874}
51371543 4875
51371543 4876static void
acfe0abc 4877restore_pos(pTHX_ void *arg)
51371543 4878{
97aff369 4879 dVAR;
9d4ba2ae 4880 PERL_UNUSED_ARG(arg);
51371543
GS
4881 if (PL_reg_eval_set) {
4882 if (PL_reg_oldsaved) {
4883 PL_reg_re->subbeg = PL_reg_oldsaved;
4884 PL_reg_re->sublen = PL_reg_oldsavedlen;
f8c7b90f 4885#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
4886 PL_reg_re->saved_copy = PL_nrs;
4887#endif
51371543
GS
4888 RX_MATCH_COPIED_on(PL_reg_re);
4889 }
4890 PL_reg_magic->mg_len = PL_reg_oldpos;
4891 PL_reg_eval_set = 0;
4892 PL_curpm = PL_reg_oldcurpm;
4893 }
4894}
33b8afdf
JH
4895
4896STATIC void
4897S_to_utf8_substr(pTHX_ register regexp *prog)
4898{
33b8afdf 4899 if (prog->float_substr && !prog->float_utf8) {
9d4ba2ae 4900 SV* sv;
f2b990bf 4901 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
4902 sv_utf8_upgrade(sv);
4903 if (SvTAIL(prog->float_substr))
4904 SvTAIL_on(sv);
4905 if (prog->float_substr == prog->check_substr)
4906 prog->check_utf8 = sv;
4907 }
4908 if (prog->anchored_substr && !prog->anchored_utf8) {
9d4ba2ae 4909 SV* sv;
f2b990bf 4910 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
4911 sv_utf8_upgrade(sv);
4912 if (SvTAIL(prog->anchored_substr))
4913 SvTAIL_on(sv);
4914 if (prog->anchored_substr == prog->check_substr)
4915 prog->check_utf8 = sv;
4916 }
4917}
4918
4919STATIC void
4920S_to_byte_substr(pTHX_ register regexp *prog)
4921{
97aff369 4922 dVAR;
33b8afdf 4923 if (prog->float_utf8 && !prog->float_substr) {
9d4ba2ae 4924 SV* sv;
f2b990bf 4925 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
4926 if (sv_utf8_downgrade(sv, TRUE)) {
4927 if (SvTAIL(prog->float_utf8))
4928 SvTAIL_on(sv);
4929 } else {
4930 SvREFCNT_dec(sv);
4931 prog->float_substr = sv = &PL_sv_undef;
4932 }
4933 if (prog->float_utf8 == prog->check_utf8)
4934 prog->check_substr = sv;
4935 }
4936 if (prog->anchored_utf8 && !prog->anchored_substr) {
9d4ba2ae 4937 SV* sv;
f2b990bf 4938 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
4939 if (sv_utf8_downgrade(sv, TRUE)) {
4940 if (SvTAIL(prog->anchored_utf8))
4941 SvTAIL_on(sv);
4942 } else {
4943 SvREFCNT_dec(sv);
4944 prog->anchored_substr = sv = &PL_sv_undef;
4945 }
4946 if (prog->anchored_utf8 == prog->check_utf8)
4947 prog->check_substr = sv;
4948 }
4949}
66610fdd
RGS
4950
4951/*
4952 * Local variables:
4953 * c-indentation-style: bsd
4954 * c-basic-offset: 4
4955 * indent-tabs-mode: t
4956 * End:
4957 *
37442d52
RGS
4958 * ex: set ts=8 sts=4 sw=4 noet:
4959 */