This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove stale date.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
dfe13c55
GS
119#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
121#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
123#define HOPc(pos,off) ((char*)HOP(pos,off))
124#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 125
efb30f32 126#define HOPBACK(pos, off) ( \
e54858b0 127 (PL_reg_match_utf8) \
efb30f32
HS
128 ? reghopmaybe((U8*)pos, -off) \
129 : (pos - off >= PL_bostr) \
130 ? (U8*)(pos - off) \
131 : (U8*)NULL \
132)
133#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134
1aa99e6b
IH
135#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
137#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
139#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141
1a4fad37
AL
142#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
143 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
144#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 148
5f80c4cf 149/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
150#define JUMPABLE(rn) ( \
151 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
152 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153 OP(rn) == PLUS || OP(rn) == MINMOD || \
154 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
155)
156
cca55fe3
JP
157#define HAS_TEXT(rn) ( \
158 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159)
e2d8ce26 160
a84d97b6
HS
161/*
162 Search for mandatory following text node; for lookahead, the text must
163 follow but for lookbehind (rn->flags != 0) we skip to the next step.
164*/
cca55fe3 165#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 166 while (JUMPABLE(rn)) \
a84d97b6 167 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 168 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
169 else if (OP(rn) == PLUS) \
170 rn = NEXTOPER(rn); \
a84d97b6
HS
171 else if (OP(rn) == IFMATCH) \
172 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 173 else rn += NEXT_OFF(rn); \
5f80c4cf 174} STMT_END
74750237 175
acfe0abc 176static void restore_pos(pTHX_ void *arg);
51371543 177
76e3520e 178STATIC CHECKPOINT
cea2e8a9 179S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 180{
97aff369 181 dVAR;
a3b680e6 182 const int retval = PL_savestack_ix;
b1ce53c5 183#define REGCP_PAREN_ELEMS 4
a3b680e6 184 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
185 int p;
186
e49a9654
IH
187 if (paren_elems_to_push < 0)
188 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
189
a01268b5 190#define REGCP_OTHER_ELEMS 6
4b3c1a47 191 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 192 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 193/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
194 SSPUSHINT(PL_regendp[p]);
195 SSPUSHINT(PL_regstartp[p]);
3280af22 196 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
197 SSPUSHINT(p);
198 }
b1ce53c5 199/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
200 SSPUSHINT(PL_regsize);
201 SSPUSHINT(*PL_reglastparen);
a01268b5 202 SSPUSHINT(*PL_reglastcloseparen);
3280af22 203 SSPUSHPTR(PL_reginput);
41123dfd
JH
204#define REGCP_FRAME_ELEMS 2
205/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
206 * are needed for the regexp context stack bookkeeping. */
207 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 208 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 209
a0d0e21e
LW
210 return retval;
211}
212
c277df42 213/* These are needed since we do not localize EVAL nodes: */
a3621e74 214# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 215 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 216 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 217
a3621e74 218# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 219 PerlIO_printf(Perl_debug_log, \
faccc32b 220 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 221 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 222
76e3520e 223STATIC char *
cea2e8a9 224S_regcppop(pTHX)
a0d0e21e 225{
97aff369 226 dVAR;
b1ce53c5 227 I32 i;
a0d0e21e
LW
228 U32 paren = 0;
229 char *input;
b1ce53c5 230
a3621e74
YO
231 GET_RE_DEBUG_FLAGS_DECL;
232
b1ce53c5 233 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 234 i = SSPOPINT;
b1ce53c5
JH
235 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
236 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 237 input = (char *) SSPOPPTR;
a01268b5 238 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
239 *PL_reglastparen = SSPOPINT;
240 PL_regsize = SSPOPINT;
b1ce53c5
JH
241
242 /* Now restore the parentheses context. */
41123dfd
JH
243 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
244 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 245 I32 tmps;
a0d0e21e 246 paren = (U32)SSPOPINT;
3280af22 247 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
248 PL_regstartp[paren] = SSPOPINT;
249 tmps = SSPOPINT;
3280af22
NIS
250 if (paren <= *PL_reglastparen)
251 PL_regendp[paren] = tmps;
a3621e74 252 DEBUG_EXECUTE_r(
c3464db5 253 PerlIO_printf(Perl_debug_log,
b900a521 254 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 255 (UV)paren, (IV)PL_regstartp[paren],
b900a521 256 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 257 (IV)PL_regendp[paren],
3280af22 258 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 259 );
a0d0e21e 260 }
a3621e74 261 DEBUG_EXECUTE_r(
eb160463 262 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 263 PerlIO_printf(Perl_debug_log,
faccc32b
JH
264 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
265 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
266 }
267 );
daf18116 268#if 1
dafc8851
JH
269 /* It would seem that the similar code in regtry()
270 * already takes care of this, and in fact it is in
271 * a better location to since this code can #if 0-ed out
272 * but the code in regtry() is needed or otherwise tests
273 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
274 * (as of patchlevel 7877) will fail. Then again,
275 * this code seems to be necessary or otherwise
276 * building DynaLoader will fail:
277 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
278 * --jhi */
eb160463
GS
279 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
280 if ((I32)paren > PL_regsize)
cf93c79d
IZ
281 PL_regstartp[paren] = -1;
282 PL_regendp[paren] = -1;
a0d0e21e 283 }
dafc8851 284#endif
a0d0e21e
LW
285 return input;
286}
287
0f5d15d6 288STATIC char *
cea2e8a9 289S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 290{
97aff369 291 dVAR;
a3b680e6 292 const I32 tmp = PL_savestack_ix;
0f5d15d6
IZ
293
294 PL_savestack_ix = ss;
295 regcppop();
296 PL_savestack_ix = tmp;
942e002e 297 return Nullch;
0f5d15d6
IZ
298}
299
300typedef struct re_cc_state
301{
302 I32 ss;
303 regnode *node;
304 struct re_cc_state *prev;
305 CURCUR *cc;
306 regexp *re;
307} re_cc_state;
308
02db2b7b 309#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 310
29d1e993
HS
311#define TRYPAREN(paren, n, input) { \
312 if (paren) { \
313 if (n) { \
314 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
315 PL_regendp[paren] = input - PL_bostr; \
316 } \
317 else \
318 PL_regendp[paren] = -1; \
319 } \
320 if (regmatch(next)) \
321 sayYES; \
322 if (paren && n) \
323 PL_regendp[paren] = -1; \
324}
325
326
a687059c 327/*
e50aee73 328 * pregexec and friends
a687059c
LW
329 */
330
331/*
c277df42 332 - pregexec - match a regexp against a string
a687059c 333 */
c277df42 334I32
864dbfa3 335Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 336 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
337/* strend: pointer to null at end of string */
338/* strbeg: real beginning of string */
339/* minend: end of match must be >=minend after stringarg. */
340/* nosave: For optimizations. */
341{
342 return
9041c2e3 343 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
344 nosave ? 0 : REXEC_COPY_STR);
345}
0f5d15d6
IZ
346
347STATIC void
cea2e8a9 348S_cache_re(pTHX_ regexp *prog)
0f5d15d6 349{
97aff369 350 dVAR;
0f5d15d6
IZ
351 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
352#ifdef DEBUGGING
353 PL_regprogram = prog->program;
354#endif
355 PL_regnpar = prog->nparens;
9041c2e3
NIS
356 PL_regdata = prog->data;
357 PL_reg_re = prog;
0f5d15d6 358}
22e551b9 359
9041c2e3 360/*
cad2e5aa
JH
361 * Need to implement the following flags for reg_anch:
362 *
363 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
364 * USE_INTUIT_ML
365 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
366 * INTUIT_AUTORITATIVE_ML
367 * INTUIT_ONCE_NOML - Intuit can match in one location only.
368 * INTUIT_ONCE_ML
369 *
370 * Another flag for this function: SECOND_TIME (so that float substrs
371 * with giant delta may be not rechecked).
372 */
373
374/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
375
3f7c398e 376/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
377 Otherwise, only SvCUR(sv) is used to get strbeg. */
378
379/* XXXX We assume that strpos is strbeg unless sv. */
380
6eb5f6b9
JH
381/* XXXX Some places assume that there is a fixed substring.
382 An update may be needed if optimizer marks as "INTUITable"
383 RExen without fixed substrings. Similarly, it is assumed that
384 lengths of all the strings are no more than minlen, thus they
385 cannot come from lookahead.
386 (Or minlen should take into account lookahead.) */
387
2c2d71f5
JH
388/* A failure to find a constant substring means that there is no need to make
389 an expensive call to REx engine, thus we celebrate a failure. Similarly,
390 finding a substring too deep into the string means that less calls to
30944b6d
IZ
391 regtry() should be needed.
392
393 REx compiler's optimizer found 4 possible hints:
394 a) Anchored substring;
395 b) Fixed substring;
396 c) Whether we are anchored (beginning-of-line or \G);
397 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 398 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
399 string which does not contradict any of them.
400 */
2c2d71f5 401
6eb5f6b9
JH
402/* Most of decisions we do here should have been done at compile time.
403 The nodes of the REx which we used for the search should have been
404 deleted from the finite automaton. */
405
cad2e5aa
JH
406char *
407Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
408 char *strend, U32 flags, re_scream_pos_data *data)
409{
97aff369 410 dVAR;
b7953727 411 register I32 start_shift = 0;
cad2e5aa 412 /* Should be nonnegative! */
b7953727 413 register I32 end_shift = 0;
2c2d71f5
JH
414 register char *s;
415 register SV *check;
a1933d95 416 char *strbeg;
cad2e5aa 417 char *t;
a3b680e6 418 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 419 I32 ml_anch;
6eb5f6b9 420 register char *other_last = Nullch; /* other substr checked before this */
b7953727 421 char *check_at = Nullch; /* check substr found at this pos */
1df70142 422 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 423#ifdef DEBUGGING
890ce7af
AL
424 const char * const i_strpos = strpos;
425 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 426#endif
a3621e74
YO
427
428 GET_RE_DEBUG_FLAGS_DECL;
429
a30b2f1f 430 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 431
b8d68ded 432 if (prog->reganch & ROPT_UTF8) {
a3621e74 433 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
434 "UTF-8 regex...\n"));
435 PL_reg_flags |= RF_utf8;
436 }
437
a3621e74 438 DEBUG_EXECUTE_r({
1df70142 439 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
440 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
441 strpos;
1df70142 442 const int len = PL_reg_match_utf8 ?
b8d68ded 443 strlen(s) : strend - strpos;
2a782b5b
JH
444 if (!PL_colorset)
445 reginitcolors();
b8d68ded 446 if (PL_reg_match_utf8)
a3621e74 447 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 448 "UTF-8 target...\n"));
2a782b5b 449 PerlIO_printf(Perl_debug_log,
a0288114 450 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 451 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
452 prog->precomp,
453 PL_colors[1],
454 (strlen(prog->precomp) > 60 ? "..." : ""),
455 PL_colors[0],
456 (int)(len > 60 ? 60 : len),
457 s, PL_colors[1],
458 (len > 60 ? "..." : "")
459 );
460 });
cad2e5aa 461
c344f387
JH
462 /* CHR_DIST() would be more correct here but it makes things slow. */
463 if (prog->minlen > strend - strpos) {
a3621e74 464 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 465 "String too short... [re_intuit_start]\n"));
cad2e5aa 466 goto fail;
2c2d71f5 467 }
a1933d95 468 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 469 PL_regeol = strend;
33b8afdf
JH
470 if (do_utf8) {
471 if (!prog->check_utf8 && prog->check_substr)
472 to_utf8_substr(prog);
473 check = prog->check_utf8;
474 } else {
475 if (!prog->check_substr && prog->check_utf8)
476 to_byte_substr(prog);
477 check = prog->check_substr;
478 }
479 if (check == &PL_sv_undef) {
a3621e74 480 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
481 "Non-utf string cannot match utf check string\n"));
482 goto fail;
483 }
2c2d71f5 484 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
485 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
486 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 487 && !multiline ) ); /* Check after \n? */
cad2e5aa 488
7e25d62c
JH
489 if (!ml_anch) {
490 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
491 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 492 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
493 && sv && !SvROK(sv)
494 && (strpos != strbeg)) {
a3621e74 495 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
496 goto fail;
497 }
498 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 499 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 500 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
501 I32 slen;
502
1aa99e6b 503 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
504 if (SvTAIL(check)) {
505 slen = SvCUR(check); /* >= 1 */
cad2e5aa 506
9041c2e3 507 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 508 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 509 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 510 goto fail_finish;
cad2e5aa
JH
511 }
512 /* Now should match s[0..slen-2] */
513 slen--;
3f7c398e 514 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 515 || (slen > 1
3f7c398e 516 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 517 report_neq:
a3621e74 518 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
519 goto fail_finish;
520 }
cad2e5aa 521 }
3f7c398e 522 else if (*SvPVX_const(check) != *s
653099ff 523 || ((slen = SvCUR(check)) > 1
3f7c398e 524 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 525 goto report_neq;
c315bfe8 526 check_at = s;
2c2d71f5 527 goto success_at_start;
7e25d62c 528 }
cad2e5aa 529 }
2c2d71f5 530 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 531 s = strpos;
2c2d71f5 532 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 533 end_shift = prog->minlen - start_shift -
653099ff 534 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 535 if (!ml_anch) {
a3b680e6 536 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 537 - (SvTAIL(check) != 0);
a3b680e6 538 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
539
540 if (end_shift < eshift)
541 end_shift = eshift;
542 }
cad2e5aa 543 }
2c2d71f5 544 else { /* Can match at random position */
cad2e5aa
JH
545 ml_anch = 0;
546 s = strpos;
2c2d71f5
JH
547 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
548 /* Should be nonnegative! */
549 end_shift = prog->minlen - start_shift -
653099ff 550 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
551 }
552
2c2d71f5 553#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 554 if (end_shift < 0)
6bbae5e6 555 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
556#endif
557
2c2d71f5
JH
558 restart:
559 /* Find a possible match in the region s..strend by looking for
560 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 561 if (flags & REXEC_SCREAM) {
cad2e5aa 562 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 563 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 564
2c2d71f5
JH
565 if (PL_screamfirst[BmRARE(check)] >= 0
566 || ( BmRARE(check) == '\n'
567 && (BmPREVIOUS(check) == SvCUR(check) - 1)
568 && SvTAIL(check) ))
9041c2e3 569 s = screaminstr(sv, check,
2c2d71f5 570 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 571 else
2c2d71f5 572 goto fail_finish;
4addbd3b
HS
573 /* we may be pointing at the wrong string */
574 if (s && RX_MATCH_COPIED(prog))
3f7c398e 575 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
576 if (data)
577 *data->scream_olds = s;
578 }
f33976b4 579 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
580 s = fbm_instr((U8*)(s + start_shift),
581 (U8*)(strend - end_shift),
7fba1cd6 582 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 583 else
1aa99e6b
IH
584 s = fbm_instr(HOP3(s, start_shift, strend),
585 HOP3(strend, -end_shift, strbeg),
7fba1cd6 586 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
587
588 /* Update the count-of-usability, remove useless subpatterns,
589 unshift s. */
2c2d71f5 590
a0288114 591 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 592 (s ? "Found" : "Did not find"),
33b8afdf 593 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 594 PL_colors[0],
7b0972df 595 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 596 SvPVX_const(check),
2c2d71f5
JH
597 PL_colors[1], (SvTAIL(check) ? "$" : ""),
598 (s ? " at offset " : "...\n") ) );
599
600 if (!s)
601 goto fail_finish;
602
6eb5f6b9
JH
603 check_at = s;
604
2c2d71f5 605 /* Finish the diagnostic message */
a3621e74 606 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
607
608 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
609 Start with the other substr.
610 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 611 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
612 *always* match. Probably should be marked during compile...
613 Probably it is right to do no SCREAM here...
614 */
615
33b8afdf 616 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 617 /* Take into account the "other" substring. */
2c2d71f5
JH
618 /* XXXX May be hopelessly wrong for UTF... */
619 if (!other_last)
6eb5f6b9 620 other_last = strpos;
33b8afdf 621 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
622 do_other_anchored:
623 {
890ce7af
AL
624 char * const last = HOP3c(s, -start_shift, strbeg);
625 char *last1, *last2;
2c2d71f5 626 char *s1 = s;
33b8afdf 627 SV* must;
2c2d71f5 628
2c2d71f5
JH
629 t = s - prog->check_offset_max;
630 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 631 && (!do_utf8
1aa99e6b 632 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 633 && t > strpos)))
30944b6d 634 /* EMPTY */;
2c2d71f5
JH
635 else
636 t = strpos;
1aa99e6b 637 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
638 if (t < other_last) /* These positions already checked */
639 t = other_last;
1aa99e6b 640 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
641 if (last < last1)
642 last1 = last;
643 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
644 /* On end-of-str: see comment below. */
33b8afdf
JH
645 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
646 if (must == &PL_sv_undef) {
647 s = (char*)NULL;
a3621e74 648 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
649 }
650 else
651 s = fbm_instr(
652 (unsigned char*)t,
653 HOP3(HOP3(last1, prog->anchored_offset, strend)
654 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
655 must,
7fba1cd6 656 multiline ? FBMrf_MULTILINE : 0
33b8afdf 657 );
a3621e74 658 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 659 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
660 (s ? "Found" : "Contradicts"),
661 PL_colors[0],
33b8afdf
JH
662 (int)(SvCUR(must)
663 - (SvTAIL(must)!=0)),
3f7c398e 664 SvPVX_const(must),
33b8afdf 665 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
666 if (!s) {
667 if (last1 >= last2) {
a3621e74 668 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
669 ", giving up...\n"));
670 goto fail_finish;
671 }
a3621e74 672 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 673 ", trying floating at offset %ld...\n",
1aa99e6b
IH
674 (long)(HOP3c(s1, 1, strend) - i_strpos)));
675 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
676 s = HOP3c(last, 1, strend);
2c2d71f5
JH
677 goto restart;
678 }
679 else {
a3621e74 680 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 681 (long)(s - i_strpos)));
1aa99e6b
IH
682 t = HOP3c(s, -prog->anchored_offset, strbeg);
683 other_last = HOP3c(s, 1, strend);
30944b6d 684 s = s1;
2c2d71f5
JH
685 if (t == strpos)
686 goto try_at_start;
2c2d71f5
JH
687 goto try_at_offset;
688 }
30944b6d 689 }
2c2d71f5
JH
690 }
691 else { /* Take into account the floating substring. */
33b8afdf
JH
692 char *last, *last1;
693 char *s1 = s;
694 SV* must;
695
696 t = HOP3c(s, -start_shift, strbeg);
697 last1 = last =
698 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
699 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
700 last = HOP3c(t, prog->float_max_offset, strend);
701 s = HOP3c(t, prog->float_min_offset, strend);
702 if (s < other_last)
703 s = other_last;
2c2d71f5 704 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
705 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
706 /* fbm_instr() takes into account exact value of end-of-str
707 if the check is SvTAIL(ed). Since false positives are OK,
708 and end-of-str is not later than strend we are OK. */
709 if (must == &PL_sv_undef) {
710 s = (char*)NULL;
a3621e74 711 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
712 }
713 else
2c2d71f5 714 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
715 (unsigned char*)last + SvCUR(must)
716 - (SvTAIL(must)!=0),
7fba1cd6 717 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 718 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
719 (s ? "Found" : "Contradicts"),
720 PL_colors[0],
721 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 722 SvPVX_const(must),
33b8afdf
JH
723 PL_colors[1], (SvTAIL(must) ? "$" : "")));
724 if (!s) {
725 if (last1 == last) {
a3621e74 726 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
727 ", giving up...\n"));
728 goto fail_finish;
2c2d71f5 729 }
a3621e74 730 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
731 ", trying anchored starting at offset %ld...\n",
732 (long)(s1 + 1 - i_strpos)));
733 other_last = last;
734 s = HOP3c(t, 1, strend);
735 goto restart;
736 }
737 else {
a3621e74 738 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
739 (long)(s - i_strpos)));
740 other_last = s; /* Fix this later. --Hugo */
741 s = s1;
742 if (t == strpos)
743 goto try_at_start;
744 goto try_at_offset;
745 }
2c2d71f5 746 }
cad2e5aa 747 }
2c2d71f5
JH
748
749 t = s - prog->check_offset_max;
2c2d71f5 750 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 751 && (!do_utf8
1aa99e6b
IH
752 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
753 && t > strpos))) {
2c2d71f5
JH
754 /* Fixed substring is found far enough so that the match
755 cannot start at strpos. */
756 try_at_offset:
cad2e5aa 757 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
758 /* Eventually fbm_*() should handle this, but often
759 anchored_offset is not 0, so this check will not be wasted. */
760 /* XXXX In the code below we prefer to look for "^" even in
761 presence of anchored substrings. And we search even
762 beyond the found float position. These pessimizations
763 are historical artefacts only. */
764 find_anchor:
2c2d71f5 765 while (t < strend - prog->minlen) {
cad2e5aa 766 if (*t == '\n') {
4ee3650e 767 if (t < check_at - prog->check_offset_min) {
33b8afdf 768 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
769 /* Since we moved from the found position,
770 we definitely contradict the found anchored
30944b6d
IZ
771 substr. Due to the above check we do not
772 contradict "check" substr.
773 Thus we can arrive here only if check substr
774 is float. Redo checking for "other"=="fixed".
775 */
9041c2e3 776 strpos = t + 1;
a3621e74 777 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 778 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
779 goto do_other_anchored;
780 }
4ee3650e
GS
781 /* We don't contradict the found floating substring. */
782 /* XXXX Why not check for STCLASS? */
cad2e5aa 783 s = t + 1;
a3621e74 784 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 785 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
786 goto set_useful;
787 }
4ee3650e
GS
788 /* Position contradicts check-string */
789 /* XXXX probably better to look for check-string
790 than for "\n", so one should lower the limit for t? */
a3621e74 791 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 792 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 793 other_last = strpos = s = t + 1;
cad2e5aa
JH
794 goto restart;
795 }
796 t++;
797 }
a3621e74 798 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 799 PL_colors[0], PL_colors[1]));
2c2d71f5 800 goto fail_finish;
cad2e5aa 801 }
f5952150 802 else {
a3621e74 803 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 804 PL_colors[0], PL_colors[1]));
f5952150 805 }
cad2e5aa
JH
806 s = t;
807 set_useful:
33b8afdf 808 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
809 }
810 else {
f5952150 811 /* The found string does not prohibit matching at strpos,
2c2d71f5 812 - no optimization of calling REx engine can be performed,
f5952150
GS
813 unless it was an MBOL and we are not after MBOL,
814 or a future STCLASS check will fail this. */
2c2d71f5
JH
815 try_at_start:
816 /* Even in this situation we may use MBOL flag if strpos is offset
817 wrt the start of the string. */
05b4157f 818 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 819 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
820 /* May be due to an implicit anchor of m{.*foo} */
821 && !(prog->reganch & ROPT_IMPLICIT))
822 {
cad2e5aa
JH
823 t = strpos;
824 goto find_anchor;
825 }
a3621e74 826 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 827 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 828 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 829 );
2c2d71f5 830 success_at_start:
30944b6d 831 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
832 && (do_utf8 ? (
833 prog->check_utf8 /* Could be deleted already */
834 && --BmUSEFUL(prog->check_utf8) < 0
835 && (prog->check_utf8 == prog->float_utf8)
836 ) : (
837 prog->check_substr /* Could be deleted already */
838 && --BmUSEFUL(prog->check_substr) < 0
839 && (prog->check_substr == prog->float_substr)
840 )))
66e933ab 841 {
cad2e5aa 842 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 843 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
844 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
845 if (do_utf8 ? prog->check_substr : prog->check_utf8)
846 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
847 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
848 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
5e39e1e5 849 check = Nullsv; /* abort */
cad2e5aa 850 s = strpos;
3cf5c195
IZ
851 /* XXXX This is a remnant of the old implementation. It
852 looks wasteful, since now INTUIT can use many
6eb5f6b9 853 other heuristics. */
cad2e5aa
JH
854 prog->reganch &= ~RE_USE_INTUIT;
855 }
856 else
857 s = strpos;
858 }
859
6eb5f6b9
JH
860 /* Last resort... */
861 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
862 if (prog->regstclass) {
863 /* minlen == 0 is possible if regstclass is \b or \B,
864 and the fixed substr is ''$.
865 Since minlen is already taken into account, s+1 is before strend;
866 accidentally, minlen >= 1 guaranties no false positives at s + 1
867 even for \b or \B. But (minlen? 1 : 0) below assumes that
868 regstclass does not come from lookahead... */
869 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
870 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 871 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 872 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 873 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 874 : 1);
a3b680e6 875 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 876 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 877 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
878 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
879 cl_l, strend)
880 : strend);
6eb5f6b9
JH
881
882 t = s;
9926ca43 883 cache_re(prog);
06b5626a 884 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
885 if (!s) {
886#ifdef DEBUGGING
cbbf8932 887 const char *what = NULL;
6eb5f6b9
JH
888#endif
889 if (endpos == strend) {
a3621e74 890 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
891 "Could not match STCLASS...\n") );
892 goto fail;
893 }
a3621e74 894 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 895 "This position contradicts STCLASS...\n") );
653099ff
GS
896 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
897 goto fail;
6eb5f6b9 898 /* Contradict one of substrings */
33b8afdf
JH
899 if (prog->anchored_substr || prog->anchored_utf8) {
900 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 901 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 902 hop_and_restart:
1aa99e6b 903 s = HOP3c(t, 1, strend);
66e933ab
GS
904 if (s + start_shift + end_shift > strend) {
905 /* XXXX Should be taken into account earlier? */
a3621e74 906 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
907 "Could not match STCLASS...\n") );
908 goto fail;
909 }
5e39e1e5
HS
910 if (!check)
911 goto giveup;
a3621e74 912 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 913 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
914 what, (long)(s + start_shift - i_strpos)) );
915 goto restart;
916 }
66e933ab 917 /* Have both, check_string is floating */
6eb5f6b9
JH
918 if (t + start_shift >= check_at) /* Contradicts floating=check */
919 goto retry_floating_check;
920 /* Recheck anchored substring, but not floating... */
9041c2e3 921 s = check_at;
5e39e1e5
HS
922 if (!check)
923 goto giveup;
a3621e74 924 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 925 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
926 (long)(other_last - i_strpos)) );
927 goto do_other_anchored;
928 }
60e71179
GS
929 /* Another way we could have checked stclass at the
930 current position only: */
931 if (ml_anch) {
932 s = t = t + 1;
5e39e1e5
HS
933 if (!check)
934 goto giveup;
a3621e74 935 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 936 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 937 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 938 goto try_at_offset;
66e933ab 939 }
33b8afdf 940 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 941 goto fail;
6eb5f6b9
JH
942 /* Check is floating subtring. */
943 retry_floating_check:
944 t = check_at - start_shift;
a3621e74 945 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
946 goto hop_and_restart;
947 }
b7953727 948 if (t != s) {
a3621e74 949 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 950 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
951 (long)(t - i_strpos), (long)(s - i_strpos))
952 );
953 }
954 else {
a3621e74 955 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
956 "Does not contradict STCLASS...\n");
957 );
958 }
6eb5f6b9 959 }
5e39e1e5 960 giveup:
a3621e74 961 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
962 PL_colors[4], (check ? "Guessed" : "Giving up"),
963 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 964 return s;
2c2d71f5
JH
965
966 fail_finish: /* Substring not found */
33b8afdf
JH
967 if (prog->check_substr || prog->check_utf8) /* could be removed already */
968 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 969 fail:
a3621e74 970 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 971 PL_colors[4], PL_colors[5]));
cad2e5aa
JH
972 return Nullch;
973}
9661b544 974
6eb5f6b9 975/* We know what class REx starts with. Try to find this position... */
3c3eec57 976STATIC char *
a3b680e6 977S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 978{
27da23d5 979 dVAR;
1df70142 980 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 981 char *m;
d8093b23 982 STRLEN ln;
5dab1207 983 STRLEN lnc;
078c425b 984 register STRLEN uskip;
d8093b23
G
985 unsigned int c1;
986 unsigned int c2;
6eb5f6b9
JH
987 char *e;
988 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 989 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 990
6eb5f6b9
JH
991 /* We know what class it must start with. */
992 switch (OP(c)) {
6eb5f6b9 993 case ANYOF:
388cc4de 994 if (do_utf8) {
078c425b 995 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
996 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
997 !UTF8_IS_INVARIANT((U8)s[0]) ?
998 reginclass(c, (U8*)s, 0, do_utf8) :
999 REGINCLASS(c, (U8*)s)) {
1000 if (tmp && (norun || regtry(prog, s)))
1001 goto got_it;
1002 else
1003 tmp = doevery;
1004 }
1005 else
1006 tmp = 1;
078c425b 1007 s += uskip;
388cc4de
HS
1008 }
1009 }
1010 else {
1011 while (s < strend) {
1012 STRLEN skip = 1;
1013
1014 if (REGINCLASS(c, (U8*)s) ||
1015 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1016 /* The assignment of 2 is intentional:
1017 * for the folded sharp s, the skip is 2. */
1018 (skip = SHARP_S_SKIP))) {
1019 if (tmp && (norun || regtry(prog, s)))
1020 goto got_it;
1021 else
1022 tmp = doevery;
1023 }
1024 else
1025 tmp = 1;
1026 s += skip;
1027 }
a0d0e21e 1028 }
6eb5f6b9 1029 break;
f33976b4
DB
1030 case CANY:
1031 while (s < strend) {
1032 if (tmp && (norun || regtry(prog, s)))
1033 goto got_it;
1034 else
1035 tmp = doevery;
1036 s++;
1037 }
1038 break;
6eb5f6b9 1039 case EXACTF:
5dab1207
NIS
1040 m = STRING(c);
1041 ln = STR_LEN(c); /* length to match in octets/bytes */
1042 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1043 if (UTF) {
a2a2844f 1044 STRLEN ulen1, ulen2;
5dab1207 1045 U8 *sm = (U8 *) m;
89ebb4a3
JH
1046 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1047 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
041457d9 1048 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
a2a2844f
JH
1049
1050 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1051 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1052
89ebb4a3 1053 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1054 0, uniflags);
89ebb4a3 1055 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1056 0, uniflags);
5dab1207
NIS
1057 lnc = 0;
1058 while (sm < ((U8 *) m + ln)) {
1059 lnc++;
1060 sm += UTF8SKIP(sm);
1061 }
1aa99e6b
IH
1062 }
1063 else {
1064 c1 = *(U8*)m;
1065 c2 = PL_fold[c1];
1066 }
6eb5f6b9
JH
1067 goto do_exactf;
1068 case EXACTFL:
5dab1207
NIS
1069 m = STRING(c);
1070 ln = STR_LEN(c);
1071 lnc = (I32) ln;
d8093b23 1072 c1 = *(U8*)m;
6eb5f6b9
JH
1073 c2 = PL_fold_locale[c1];
1074 do_exactf:
db12adc6 1075 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1076
6eb5f6b9
JH
1077 if (norun && e < s)
1078 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1079
60a8b682
JH
1080 /* The idea in the EXACTF* cases is to first find the
1081 * first character of the EXACTF* node and then, if
1082 * necessary, case-insensitively compare the full
1083 * text of the node. The c1 and c2 are the first
1084 * characters (though in Unicode it gets a bit
1085 * more complicated because there are more cases
7f16dd3d
JH
1086 * than just upper and lower: one needs to use
1087 * the so-called folding case for case-insensitive
1088 * matching (called "loose matching" in Unicode).
1089 * ibcmp_utf8() will do just that. */
60a8b682 1090
1aa99e6b 1091 if (do_utf8) {
575cac57 1092 UV c, f;
89ebb4a3 1093 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1094 STRLEN len, foldlen;
041457d9 1095 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
09091399 1096 if (c1 == c2) {
5dab1207
NIS
1097 /* Upper and lower of 1st char are equal -
1098 * probably not a "letter". */
1aa99e6b 1099 while (s <= e) {
89ebb4a3 1100 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1101 uniflags);
80aecb99
JH
1102 if ( c == c1
1103 && (ln == len ||
66423254 1104 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1105 m, (char **)0, ln, (bool)UTF))
55da9344 1106 && (norun || regtry(prog, s)) )
1aa99e6b 1107 goto got_it;
80aecb99 1108 else {
1df70142 1109 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1110 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1111 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1112 if ( f != c
1113 && (f == c1 || f == c2)
1114 && (ln == foldlen ||
66423254
JH
1115 !ibcmp_utf8((char *) foldbuf,
1116 (char **)0, foldlen, do_utf8,
d07ddd77 1117 m,
eb160463 1118 (char **)0, ln, (bool)UTF))
80aecb99
JH
1119 && (norun || regtry(prog, s)) )
1120 goto got_it;
1121 }
1aa99e6b
IH
1122 s += len;
1123 }
09091399
JH
1124 }
1125 else {
1aa99e6b 1126 while (s <= e) {
89ebb4a3 1127 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1128 uniflags);
80aecb99 1129
60a8b682 1130 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1131 * Note that not all the possible combinations
1132 * are handled here: some of them are handled
1133 * by the standard folding rules, and some of
1134 * them (the character class or ANYOF cases)
1135 * are handled during compiletime in
1136 * regexec.c:S_regclass(). */
880bd946
JH
1137 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1138 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1139 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1140
1141 if ( (c == c1 || c == c2)
1142 && (ln == len ||
66423254 1143 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1144 m, (char **)0, ln, (bool)UTF))
55da9344 1145 && (norun || regtry(prog, s)) )
1aa99e6b 1146 goto got_it;
80aecb99 1147 else {
1df70142 1148 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1149 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1150 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1151 if ( f != c
1152 && (f == c1 || f == c2)
1153 && (ln == foldlen ||
a6872d42 1154 !ibcmp_utf8((char *) foldbuf,
66423254 1155 (char **)0, foldlen, do_utf8,
d07ddd77 1156 m,
eb160463 1157 (char **)0, ln, (bool)UTF))
80aecb99
JH
1158 && (norun || regtry(prog, s)) )
1159 goto got_it;
1160 }
1aa99e6b
IH
1161 s += len;
1162 }
09091399 1163 }
1aa99e6b
IH
1164 }
1165 else {
1166 if (c1 == c2)
1167 while (s <= e) {
1168 if ( *(U8*)s == c1
1169 && (ln == 1 || !(OP(c) == EXACTF
1170 ? ibcmp(s, m, ln)
1171 : ibcmp_locale(s, m, ln)))
1172 && (norun || regtry(prog, s)) )
1173 goto got_it;
1174 s++;
1175 }
1176 else
1177 while (s <= e) {
1178 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1179 && (ln == 1 || !(OP(c) == EXACTF
1180 ? ibcmp(s, m, ln)
1181 : ibcmp_locale(s, m, ln)))
1182 && (norun || regtry(prog, s)) )
1183 goto got_it;
1184 s++;
1185 }
b3c9acc1
IZ
1186 }
1187 break;
bbce6d69 1188 case BOUNDL:
3280af22 1189 PL_reg_flags |= RF_tainted;
bbce6d69 1190 /* FALL THROUGH */
a0d0e21e 1191 case BOUND:
ffc61ed2 1192 if (do_utf8) {
12d33761 1193 if (s == PL_bostr)
ffc61ed2
JH
1194 tmp = '\n';
1195 else {
b4f7163a 1196 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1197
b4f7163a 1198 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1199 }
1200 tmp = ((OP(c) == BOUND ?
9041c2e3 1201 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1202 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1203 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1204 if (tmp == !(OP(c) == BOUND ?
3568d838 1205 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1206 isALNUM_LC_utf8((U8*)s)))
1207 {
1208 tmp = !tmp;
1209 if ((norun || regtry(prog, s)))
1210 goto got_it;
1211 }
078c425b 1212 s += uskip;
a687059c 1213 }
a0d0e21e 1214 }
667bb95a 1215 else {
12d33761 1216 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1217 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1218 while (s < strend) {
1219 if (tmp ==
1220 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1221 tmp = !tmp;
1222 if ((norun || regtry(prog, s)))
1223 goto got_it;
1224 }
1225 s++;
a0ed51b3 1226 }
a0ed51b3 1227 }
6eb5f6b9 1228 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1229 goto got_it;
1230 break;
bbce6d69 1231 case NBOUNDL:
3280af22 1232 PL_reg_flags |= RF_tainted;
bbce6d69 1233 /* FALL THROUGH */
a0d0e21e 1234 case NBOUND:
ffc61ed2 1235 if (do_utf8) {
12d33761 1236 if (s == PL_bostr)
ffc61ed2
JH
1237 tmp = '\n';
1238 else {
b4f7163a 1239 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1240
b4f7163a 1241 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1242 }
1243 tmp = ((OP(c) == NBOUND ?
9041c2e3 1244 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1245 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1246 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1247 if (tmp == !(OP(c) == NBOUND ?
3568d838 1248 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1249 isALNUM_LC_utf8((U8*)s)))
1250 tmp = !tmp;
1251 else if ((norun || regtry(prog, s)))
1252 goto got_it;
078c425b 1253 s += uskip;
ffc61ed2 1254 }
a0d0e21e 1255 }
667bb95a 1256 else {
12d33761 1257 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1258 tmp = ((OP(c) == NBOUND ?
1259 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1260 while (s < strend) {
1261 if (tmp ==
1262 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1263 tmp = !tmp;
1264 else if ((norun || regtry(prog, s)))
1265 goto got_it;
1266 s++;
1267 }
a0ed51b3 1268 }
6eb5f6b9 1269 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1270 goto got_it;
1271 break;
a0d0e21e 1272 case ALNUM:
ffc61ed2 1273 if (do_utf8) {
1a4fad37 1274 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1275 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1276 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
bbce6d69 1282 else
ffc61ed2 1283 tmp = 1;
078c425b 1284 s += uskip;
bbce6d69 1285 }
bbce6d69 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (isALNUM(*s)) {
1290 if (tmp && (norun || regtry(prog, s)))
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
bbce6d69 1301 case ALNUML:
3280af22 1302 PL_reg_flags |= RF_tainted;
ffc61ed2 1303 if (do_utf8) {
078c425b 1304 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1305 if (isALNUM_LC_utf8((U8*)s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a687059c 1311 else
ffc61ed2 1312 tmp = 1;
078c425b 1313 s += uskip;
a0d0e21e 1314 }
a0d0e21e 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (isALNUM_LC(*s)) {
1319 if (tmp && (norun || regtry(prog, s)))
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
a0ed51b3 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
a0ed51b3 1327 }
a0ed51b3
LW
1328 }
1329 break;
a0d0e21e 1330 case NALNUM:
ffc61ed2 1331 if (do_utf8) {
1a4fad37 1332 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1333 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1334 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
bbce6d69 1340 else
ffc61ed2 1341 tmp = 1;
078c425b 1342 s += uskip;
bbce6d69 1343 }
bbce6d69 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (!isALNUM(*s)) {
1348 if (tmp && (norun || regtry(prog, s)))
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
bbce6d69 1359 case NALNUML:
3280af22 1360 PL_reg_flags |= RF_tainted;
ffc61ed2 1361 if (do_utf8) {
078c425b 1362 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1363 if (!isALNUM_LC_utf8((U8*)s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a687059c 1369 else
ffc61ed2 1370 tmp = 1;
078c425b 1371 s += uskip;
a687059c 1372 }
a0d0e21e 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (!isALNUM_LC(*s)) {
1377 if (tmp && (norun || regtry(prog, s)))
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
a0ed51b3 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
a0ed51b3 1385 }
a0ed51b3
LW
1386 }
1387 break;
a0d0e21e 1388 case SPACE:
ffc61ed2 1389 if (do_utf8) {
1a4fad37 1390 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1391 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1392 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
a0d0e21e 1398 else
ffc61ed2 1399 tmp = 1;
078c425b 1400 s += uskip;
2304df62 1401 }
a0d0e21e 1402 }
ffc61ed2
JH
1403 else {
1404 while (s < strend) {
1405 if (isSPACE(*s)) {
1406 if (tmp && (norun || regtry(prog, s)))
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
a0ed51b3 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s++;
a0ed51b3 1414 }
a0ed51b3
LW
1415 }
1416 break;
bbce6d69 1417 case SPACEL:
3280af22 1418 PL_reg_flags |= RF_tainted;
ffc61ed2 1419 if (do_utf8) {
078c425b 1420 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1421 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1422 if (tmp && (norun || regtry(prog, s)))
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
bbce6d69 1427 else
ffc61ed2 1428 tmp = 1;
078c425b 1429 s += uskip;
bbce6d69 1430 }
bbce6d69 1431 }
ffc61ed2
JH
1432 else {
1433 while (s < strend) {
1434 if (isSPACE_LC(*s)) {
1435 if (tmp && (norun || regtry(prog, s)))
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
a0ed51b3 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s++;
a0ed51b3 1443 }
a0ed51b3
LW
1444 }
1445 break;
a0d0e21e 1446 case NSPACE:
ffc61ed2 1447 if (do_utf8) {
1a4fad37 1448 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1449 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1450 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1451 if (tmp && (norun || regtry(prog, s)))
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
a0d0e21e 1456 else
ffc61ed2 1457 tmp = 1;
078c425b 1458 s += uskip;
a687059c 1459 }
a0d0e21e 1460 }
ffc61ed2
JH
1461 else {
1462 while (s < strend) {
1463 if (!isSPACE(*s)) {
1464 if (tmp && (norun || regtry(prog, s)))
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
a0ed51b3 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s++;
a0ed51b3 1472 }
a0ed51b3
LW
1473 }
1474 break;
bbce6d69 1475 case NSPACEL:
3280af22 1476 PL_reg_flags |= RF_tainted;
ffc61ed2 1477 if (do_utf8) {
078c425b 1478 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1479 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1480 if (tmp && (norun || regtry(prog, s)))
1481 goto got_it;
1482 else
1483 tmp = doevery;
1484 }
bbce6d69 1485 else
ffc61ed2 1486 tmp = 1;
078c425b 1487 s += uskip;
bbce6d69 1488 }
bbce6d69 1489 }
ffc61ed2
JH
1490 else {
1491 while (s < strend) {
1492 if (!isSPACE_LC(*s)) {
1493 if (tmp && (norun || regtry(prog, s)))
1494 goto got_it;
1495 else
1496 tmp = doevery;
1497 }
a0ed51b3 1498 else
ffc61ed2
JH
1499 tmp = 1;
1500 s++;
a0ed51b3 1501 }
a0ed51b3
LW
1502 }
1503 break;
a0d0e21e 1504 case DIGIT:
ffc61ed2 1505 if (do_utf8) {
1a4fad37 1506 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1507 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1508 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1509 if (tmp && (norun || regtry(prog, s)))
1510 goto got_it;
1511 else
1512 tmp = doevery;
1513 }
a0d0e21e 1514 else
ffc61ed2 1515 tmp = 1;
078c425b 1516 s += uskip;
2b69d0c2 1517 }
a0d0e21e 1518 }
ffc61ed2
JH
1519 else {
1520 while (s < strend) {
1521 if (isDIGIT(*s)) {
1522 if (tmp && (norun || regtry(prog, s)))
1523 goto got_it;
1524 else
1525 tmp = doevery;
1526 }
a0ed51b3 1527 else
ffc61ed2
JH
1528 tmp = 1;
1529 s++;
a0ed51b3 1530 }
a0ed51b3
LW
1531 }
1532 break;
b8c5462f
JH
1533 case DIGITL:
1534 PL_reg_flags |= RF_tainted;
ffc61ed2 1535 if (do_utf8) {
078c425b 1536 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1537 if (isDIGIT_LC_utf8((U8*)s)) {
1538 if (tmp && (norun || regtry(prog, s)))
1539 goto got_it;
1540 else
1541 tmp = doevery;
1542 }
b8c5462f 1543 else
ffc61ed2 1544 tmp = 1;
078c425b 1545 s += uskip;
b8c5462f 1546 }
b8c5462f 1547 }
ffc61ed2
JH
1548 else {
1549 while (s < strend) {
1550 if (isDIGIT_LC(*s)) {
1551 if (tmp && (norun || regtry(prog, s)))
1552 goto got_it;
1553 else
1554 tmp = doevery;
1555 }
b8c5462f 1556 else
ffc61ed2
JH
1557 tmp = 1;
1558 s++;
b8c5462f 1559 }
b8c5462f
JH
1560 }
1561 break;
a0d0e21e 1562 case NDIGIT:
ffc61ed2 1563 if (do_utf8) {
1a4fad37 1564 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1565 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1566 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1567 if (tmp && (norun || regtry(prog, s)))
1568 goto got_it;
1569 else
1570 tmp = doevery;
1571 }
a0d0e21e 1572 else
ffc61ed2 1573 tmp = 1;
078c425b 1574 s += uskip;
a687059c 1575 }
a0d0e21e 1576 }
ffc61ed2
JH
1577 else {
1578 while (s < strend) {
1579 if (!isDIGIT(*s)) {
1580 if (tmp && (norun || regtry(prog, s)))
1581 goto got_it;
1582 else
1583 tmp = doevery;
1584 }
a0ed51b3 1585 else
ffc61ed2
JH
1586 tmp = 1;
1587 s++;
a0ed51b3 1588 }
a0ed51b3
LW
1589 }
1590 break;
b8c5462f
JH
1591 case NDIGITL:
1592 PL_reg_flags |= RF_tainted;
ffc61ed2 1593 if (do_utf8) {
078c425b 1594 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1595 if (!isDIGIT_LC_utf8((U8*)s)) {
1596 if (tmp && (norun || regtry(prog, s)))
1597 goto got_it;
1598 else
1599 tmp = doevery;
1600 }
b8c5462f 1601 else
ffc61ed2 1602 tmp = 1;
078c425b 1603 s += uskip;
b8c5462f 1604 }
a0ed51b3 1605 }
ffc61ed2
JH
1606 else {
1607 while (s < strend) {
1608 if (!isDIGIT_LC(*s)) {
1609 if (tmp && (norun || regtry(prog, s)))
1610 goto got_it;
1611 else
1612 tmp = doevery;
1613 }
cf93c79d 1614 else
ffc61ed2
JH
1615 tmp = 1;
1616 s++;
b8c5462f 1617 }
b8c5462f
JH
1618 }
1619 break;
b3c9acc1 1620 default:
3c3eec57
GS
1621 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1622 break;
d6a28714 1623 }
6eb5f6b9
JH
1624 return 0;
1625 got_it:
1626 return s;
1627}
1628
1629/*
1630 - regexec_flags - match a regexp against a string
1631 */
1632I32
1633Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1634 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1635/* strend: pointer to null at end of string */
1636/* strbeg: real beginning of string */
1637/* minend: end of match must be >=minend after stringarg. */
1638/* data: May be used for some additional optimizations. */
1639/* nosave: For optimizations. */
1640{
97aff369 1641 dVAR;
6eb5f6b9
JH
1642 register char *s;
1643 register regnode *c;
1644 register char *startpos = stringarg;
6eb5f6b9
JH
1645 I32 minlen; /* must match at least this many chars */
1646 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1647 I32 end_shift = 0; /* Same for the end. */ /* CC */
1648 I32 scream_pos = -1; /* Internal iterator of scream. */
1649 char *scream_olds;
1650 SV* oreplsv = GvSV(PL_replgv);
1df70142 1651 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1652 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1653#ifdef DEBUGGING
9e55ce06
JH
1654 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1655 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1656#endif
a3621e74
YO
1657
1658 GET_RE_DEBUG_FLAGS_DECL;
1659
9d4ba2ae 1660 PERL_UNUSED_ARG(data);
a30b2f1f 1661 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1662
1663 PL_regcc = 0;
1664
1665 cache_re(prog);
1666#ifdef DEBUGGING
aea4f609 1667 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1668#endif
1669
1670 /* Be paranoid... */
1671 if (prog == NULL || startpos == NULL) {
1672 Perl_croak(aTHX_ "NULL regexp parameter");
1673 return 0;
1674 }
1675
1676 minlen = prog->minlen;
61a36c01 1677 if (strend - startpos < minlen) {
a3621e74 1678 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1679 "String too short [regexec_flags]...\n"));
1680 goto phooey;
1aa99e6b 1681 }
6eb5f6b9 1682
6eb5f6b9
JH
1683 /* Check validity of program. */
1684 if (UCHARAT(prog->program) != REG_MAGIC) {
1685 Perl_croak(aTHX_ "corrupted regexp program");
1686 }
1687
1688 PL_reg_flags = 0;
1689 PL_reg_eval_set = 0;
1690 PL_reg_maxiter = 0;
1691
1692 if (prog->reganch & ROPT_UTF8)
1693 PL_reg_flags |= RF_utf8;
1694
1695 /* Mark beginning of line for ^ and lookbehind. */
1696 PL_regbol = startpos;
1697 PL_bostr = strbeg;
1698 PL_reg_sv = sv;
1699
1700 /* Mark end of line for $ (and such) */
1701 PL_regeol = strend;
1702
1703 /* see how far we have to get to not match where we matched before */
1704 PL_regtill = startpos+minend;
1705
1706 /* We start without call_cc context. */
1707 PL_reg_call_cc = 0;
1708
1709 /* If there is a "must appear" string, look for it. */
1710 s = startpos;
1711
1712 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1713 MAGIC *mg;
1714
1715 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1716 PL_reg_ganch = startpos;
1717 else if (sv && SvTYPE(sv) >= SVt_PVMG
1718 && SvMAGIC(sv)
14befaf4
DM
1719 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1720 && mg->mg_len >= 0) {
6eb5f6b9
JH
1721 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1722 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1723 if (s > PL_reg_ganch)
6eb5f6b9
JH
1724 goto phooey;
1725 s = PL_reg_ganch;
1726 }
1727 }
1728 else /* pos() not defined */
1729 PL_reg_ganch = strbeg;
1730 }
1731
33b8afdf 1732 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
6eb5f6b9
JH
1733 re_scream_pos_data d;
1734
1735 d.scream_olds = &scream_olds;
1736 d.scream_pos = &scream_pos;
1737 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1738 if (!s) {
a3621e74 1739 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1740 goto phooey; /* not present */
3fa9c3d7 1741 }
6eb5f6b9
JH
1742 }
1743
a3621e74 1744 DEBUG_EXECUTE_r({
1df70142
AL
1745 const char * const s0 = UTF
1746 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1747 UNI_DISPLAY_REGEX)
1748 : prog->precomp;
1749 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1750 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1751 UNI_DISPLAY_REGEX) : startpos;
1df70142 1752 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1753 if (!PL_colorset)
1754 reginitcolors();
1755 PerlIO_printf(Perl_debug_log,
a0288114 1756 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1757 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1758 len0, len0, s0,
2a782b5b 1759 PL_colors[1],
9e55ce06 1760 len0 > 60 ? "..." : "",
2a782b5b 1761 PL_colors[0],
9e55ce06
JH
1762 (int)(len1 > 60 ? 60 : len1),
1763 s1, PL_colors[1],
1764 (len1 > 60 ? "..." : "")
2a782b5b
JH
1765 );
1766 });
6eb5f6b9
JH
1767
1768 /* Simplest case: anchored match need be tried only once. */
1769 /* [unless only anchor is BOL and multiline is set] */
1770 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1771 if (s == startpos && regtry(prog, startpos))
1772 goto got_it;
7fba1cd6 1773 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1774 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1775 {
1776 char *end;
1777
1778 if (minlen)
1779 dontbother = minlen - 1;
1aa99e6b 1780 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1781 /* for multiline we only have to try after newlines */
33b8afdf 1782 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1783 if (s == startpos)
1784 goto after_try;
1785 while (1) {
1786 if (regtry(prog, s))
1787 goto got_it;
1788 after_try:
1789 if (s >= end)
1790 goto phooey;
1791 if (prog->reganch & RE_USE_INTUIT) {
1792 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1793 if (!s)
1794 goto phooey;
1795 }
1796 else
1797 s++;
1798 }
1799 } else {
1800 if (s > startpos)
1801 s--;
1802 while (s < end) {
1803 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1804 if (regtry(prog, s))
1805 goto got_it;
1806 }
1807 }
1808 }
1809 }
1810 goto phooey;
1811 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1812 if (regtry(prog, PL_reg_ganch))
1813 goto got_it;
1814 goto phooey;
1815 }
1816
1817 /* Messy cases: unanchored match. */
33b8afdf 1818 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1819 /* we have /x+whatever/ */
1820 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1821 char ch;
bf93d4cc
GS
1822#ifdef DEBUGGING
1823 int did_match = 0;
1824#endif
33b8afdf
JH
1825 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1826 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1827 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1828
1aa99e6b 1829 if (do_utf8) {
6eb5f6b9
JH
1830 while (s < strend) {
1831 if (*s == ch) {
a3621e74 1832 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1833 if (regtry(prog, s)) goto got_it;
1834 s += UTF8SKIP(s);
1835 while (s < strend && *s == ch)
1836 s += UTF8SKIP(s);
1837 }
1838 s += UTF8SKIP(s);
1839 }
1840 }
1841 else {
1842 while (s < strend) {
1843 if (*s == ch) {
a3621e74 1844 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1845 if (regtry(prog, s)) goto got_it;
1846 s++;
1847 while (s < strend && *s == ch)
1848 s++;
1849 }
1850 s++;
1851 }
1852 }
a3621e74 1853 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1854 PerlIO_printf(Perl_debug_log,
b7953727
JH
1855 "Did not find anchored character...\n")
1856 );
6eb5f6b9 1857 }
33b8afdf
JH
1858 else if (prog->anchored_substr != Nullsv
1859 || prog->anchored_utf8 != Nullsv
1860 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1861 && prog->float_max_offset < strend - s)) {
1862 SV *must;
1863 I32 back_max;
1864 I32 back_min;
1865 char *last;
6eb5f6b9 1866 char *last1; /* Last position checked before */
bf93d4cc
GS
1867#ifdef DEBUGGING
1868 int did_match = 0;
1869#endif
33b8afdf
JH
1870 if (prog->anchored_substr || prog->anchored_utf8) {
1871 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1872 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1873 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1874 back_max = back_min = prog->anchored_offset;
1875 } else {
1876 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1877 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1878 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1879 back_max = prog->float_max_offset;
1880 back_min = prog->float_min_offset;
1881 }
1882 if (must == &PL_sv_undef)
1883 /* could not downgrade utf8 check substring, so must fail */
1884 goto phooey;
1885
1886 last = HOP3c(strend, /* Cannot start after this */
1887 -(I32)(CHR_SVLEN(must)
1888 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1889
1890 if (s > PL_bostr)
1891 last1 = HOPc(s, -1);
1892 else
1893 last1 = s - 1; /* bogus */
1894
a0288114 1895 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1896 check_substr==must. */
1897 scream_pos = -1;
1898 dontbother = end_shift;
1899 strend = HOPc(strend, -dontbother);
1900 while ( (s <= last) &&
9041c2e3 1901 ((flags & REXEC_SCREAM)
1aa99e6b 1902 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1903 end_shift, &scream_pos, 0))
1aa99e6b 1904 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1905 (unsigned char*)strend, must,
7fba1cd6 1906 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1907 /* we may be pointing at the wrong string */
1908 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1909 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1910 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1911 if (HOPc(s, -back_max) > last1) {
1912 last1 = HOPc(s, -back_min);
1913 s = HOPc(s, -back_max);
1914 }
1915 else {
1916 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1917
1918 last1 = HOPc(s, -back_min);
1919 s = t;
1920 }
1aa99e6b 1921 if (do_utf8) {
6eb5f6b9
JH
1922 while (s <= last1) {
1923 if (regtry(prog, s))
1924 goto got_it;
1925 s += UTF8SKIP(s);
1926 }
1927 }
1928 else {
1929 while (s <= last1) {
1930 if (regtry(prog, s))
1931 goto got_it;
1932 s++;
1933 }
1934 }
1935 }
a3621e74 1936 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1937 PerlIO_printf(Perl_debug_log,
a0288114 1938 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1939 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1940 ? "anchored" : "floating"),
1941 PL_colors[0],
1942 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1943 SvPVX_const(must),
b7953727
JH
1944 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1945 );
6eb5f6b9
JH
1946 goto phooey;
1947 }
155aba94 1948 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1949 if (minlen) {
1950 I32 op = (U8)OP(prog->regstclass);
66e933ab 1951 /* don't bother with what can't match */
f14c76ed
RGS
1952 if (PL_regkind[op] != EXACT && op != CANY)
1953 strend = HOPc(strend, -(minlen - 1));
1954 }
a3621e74 1955 DEBUG_EXECUTE_r({
ffc61ed2 1956 SV *prop = sv_newmortal();
cfd0369c
NC
1957 const char *s0;
1958 const char *s1;
9e55ce06
JH
1959 int len0;
1960 int len1;
1961
ffc61ed2 1962 regprop(prop, c);
9e55ce06 1963 s0 = UTF ?
3f7c398e 1964 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1965 UNI_DISPLAY_REGEX) :
cfd0369c 1966 SvPVX_const(prop);
9e55ce06
JH
1967 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1968 s1 = UTF ?
c728cb41 1969 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1970 len1 = UTF ? SvCUR(dsv1) : strend - s;
1971 PerlIO_printf(Perl_debug_log,
a0288114 1972 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1973 len0, len0, s0,
1974 len1, len1, s1);
ffc61ed2 1975 });
06b5626a 1976 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1977 goto got_it;
a3621e74 1978 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1979 }
1980 else {
1981 dontbother = 0;
33b8afdf
JH
1982 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1983 /* Trim the end. */
d6a28714 1984 char *last;
33b8afdf
JH
1985 SV* float_real;
1986
1987 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1988 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1989 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1990
1991 if (flags & REXEC_SCREAM) {
33b8afdf 1992 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1993 end_shift, &scream_pos, 1); /* last one */
1994 if (!last)
ffc61ed2 1995 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1996 /* we may be pointing at the wrong string */
1997 else if (RX_MATCH_COPIED(prog))
3f7c398e 1998 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1999 }
d6a28714
JH
2000 else {
2001 STRLEN len;
cfd0369c 2002 const char * const little = SvPV_const(float_real, len);
d6a28714 2003
33b8afdf 2004 if (SvTAIL(float_real)) {
d6a28714
JH
2005 if (memEQ(strend - len + 1, little, len - 1))
2006 last = strend - len + 1;
7fba1cd6 2007 else if (!multiline)
9041c2e3 2008 last = memEQ(strend - len, little, len)
d6a28714 2009 ? strend - len : Nullch;
b8c5462f 2010 else
d6a28714
JH
2011 goto find_last;
2012 } else {
2013 find_last:
9041c2e3 2014 if (len)
d6a28714 2015 last = rninstr(s, strend, little, little + len);
b8c5462f 2016 else
a0288114 2017 last = strend; /* matching "$" */
b8c5462f 2018 }
b8c5462f 2019 }
bf93d4cc 2020 if (last == NULL) {
a3621e74 2021 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 2022 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 2023 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2024 goto phooey; /* Should not happen! */
2025 }
d6a28714
JH
2026 dontbother = strend - last + prog->float_min_offset;
2027 }
2028 if (minlen && (dontbother < minlen))
2029 dontbother = minlen - 1;
2030 strend -= dontbother; /* this one's always in bytes! */
2031 /* We don't know much -- general case. */
1aa99e6b 2032 if (do_utf8) {
d6a28714
JH
2033 for (;;) {
2034 if (regtry(prog, s))
2035 goto got_it;
2036 if (s >= strend)
2037 break;
b8c5462f 2038 s += UTF8SKIP(s);
d6a28714
JH
2039 };
2040 }
2041 else {
2042 do {
2043 if (regtry(prog, s))
2044 goto got_it;
2045 } while (s++ < strend);
2046 }
2047 }
2048
2049 /* Failure. */
2050 goto phooey;
2051
2052got_it:
2053 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2054
2055 if (PL_reg_eval_set) {
2056 /* Preserve the current value of $^R */
2057 if (oreplsv != GvSV(PL_replgv))
2058 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2059 restored, the value remains
2060 the same. */
acfe0abc 2061 restore_pos(aTHX_ 0);
d6a28714
JH
2062 }
2063
2064 /* make sure $`, $&, $', and $digit will work later */
2065 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2066 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2067 if (flags & REXEC_COPY_STR) {
2068 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2069#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2070 if ((SvIsCOW(sv)
2071 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2072 if (DEBUG_C_TEST) {
2073 PerlIO_printf(Perl_debug_log,
2074 "Copy on write: regexp capture, type %d\n",
2075 (int) SvTYPE(sv));
2076 }
2077 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2078 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2079 assert (SvPOKp(prog->saved_copy));
2080 } else
2081#endif
2082 {
2083 RX_MATCH_COPIED_on(prog);
2084 s = savepvn(strbeg, i);
2085 prog->subbeg = s;
2086 }
d6a28714 2087 prog->sublen = i;
d6a28714
JH
2088 }
2089 else {
2090 prog->subbeg = strbeg;
2091 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2092 }
2093 }
9041c2e3 2094
d6a28714
JH
2095 return 1;
2096
2097phooey:
a3621e74 2098 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2099 PL_colors[4], PL_colors[5]));
d6a28714 2100 if (PL_reg_eval_set)
acfe0abc 2101 restore_pos(aTHX_ 0);
d6a28714
JH
2102 return 0;
2103}
2104
2105/*
2106 - regtry - try match at specific point
2107 */
2108STATIC I32 /* 0 failure, 1 success */
2109S_regtry(pTHX_ regexp *prog, char *startpos)
2110{
97aff369 2111 dVAR;
d6a28714
JH
2112 register I32 i;
2113 register I32 *sp;
2114 register I32 *ep;
2115 CHECKPOINT lastcp;
a3621e74 2116 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2117
02db2b7b
IZ
2118#ifdef DEBUGGING
2119 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2120#endif
d6a28714
JH
2121 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2122 MAGIC *mg;
2123
2124 PL_reg_eval_set = RS_init;
a3621e74 2125 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2126 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2127 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2128 ));
e8347627 2129 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2130 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2131 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2132 SAVETMPS;
2133 /* Apparently this is not needed, judging by wantarray. */
e8347627 2134 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2135 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2136
2137 if (PL_reg_sv) {
2138 /* Make $_ available to executed code. */
2139 if (PL_reg_sv != DEFSV) {
59f00321 2140 SAVE_DEFSV;
d6a28714 2141 DEFSV = PL_reg_sv;
b8c5462f 2142 }
d6a28714 2143
9041c2e3 2144 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2145 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2146 /* prepare for quick setting of pos */
14befaf4
DM
2147 sv_magic(PL_reg_sv, (SV*)0,
2148 PERL_MAGIC_regex_global, Nullch, 0);
2149 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2150 mg->mg_len = -1;
b8c5462f 2151 }
d6a28714
JH
2152 PL_reg_magic = mg;
2153 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2154 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2155 }
09687e5a 2156 if (!PL_reg_curpm) {
a02a5408 2157 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2158#ifdef USE_ITHREADS
2159 {
2160 SV* repointer = newSViv(0);
577e12cc 2161 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2162 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2163 av_push(PL_regex_padav,repointer);
2164 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2165 PL_regex_pad = AvARRAY(PL_regex_padav);
2166 }
2167#endif
2168 }
aaa362c4 2169 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2170 PL_reg_oldcurpm = PL_curpm;
2171 PL_curpm = PL_reg_curpm;
2172 if (RX_MATCH_COPIED(prog)) {
2173 /* Here is a serious problem: we cannot rewrite subbeg,
2174 since it may be needed if this match fails. Thus
2175 $` inside (?{}) could fail... */
2176 PL_reg_oldsaved = prog->subbeg;
2177 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2178#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2179 PL_nrs = prog->saved_copy;
2180#endif
d6a28714
JH
2181 RX_MATCH_COPIED_off(prog);
2182 }
2183 else
2184 PL_reg_oldsaved = Nullch;
2185 prog->subbeg = PL_bostr;
2186 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2187 }
973dddac 2188 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2189 PL_reginput = startpos;
2190 PL_regstartp = prog->startp;
2191 PL_regendp = prog->endp;
2192 PL_reglastparen = &prog->lastparen;
a01268b5 2193 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2194 prog->lastparen = 0;
03994de8 2195 prog->lastcloseparen = 0;
d6a28714 2196 PL_regsize = 0;
a3621e74 2197 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2198 if (PL_reg_start_tmpl <= prog->nparens) {
2199 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2200 if(PL_reg_start_tmp)
2201 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2202 else
a02a5408 2203 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2204 }
2205
2206 /* XXXX What this code is doing here?!!! There should be no need
2207 to do this again and again, PL_reglastparen should take care of
3dd2943c 2208 this! --ilya*/
dafc8851
JH
2209
2210 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2211 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2212 * PL_reglastparen), is not needed at all by the test suite
2213 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2214 * enough, for building DynaLoader, or otherwise this
2215 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2216 * will happen. Meanwhile, this code *is* needed for the
2217 * above-mentioned test suite tests to succeed. The common theme
2218 * on those tests seems to be returning null fields from matches.
2219 * --jhi */
dafc8851 2220#if 1
d6a28714
JH
2221 sp = prog->startp;
2222 ep = prog->endp;
2223 if (prog->nparens) {
eb160463 2224 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2225 *++sp = -1;
2226 *++ep = -1;
2227 }
2228 }
dafc8851 2229#endif
02db2b7b 2230 REGCP_SET(lastcp);
d6a28714
JH
2231 if (regmatch(prog->program + 1)) {
2232 prog->endp[0] = PL_reginput - PL_bostr;
2233 return 1;
2234 }
02db2b7b 2235 REGCP_UNWIND(lastcp);
d6a28714
JH
2236 return 0;
2237}
2238
02db2b7b
IZ
2239#define RE_UNWIND_BRANCH 1
2240#define RE_UNWIND_BRANCHJ 2
2241
2242union re_unwind_t;
2243
2244typedef struct { /* XX: makes sense to enlarge it... */
2245 I32 type;
2246 I32 prev;
2247 CHECKPOINT lastcp;
2248} re_unwind_generic_t;
2249
2250typedef struct {
2251 I32 type;
2252 I32 prev;
2253 CHECKPOINT lastcp;
2254 I32 lastparen;
2255 regnode *next;
2256 char *locinput;
2257 I32 nextchr;
2258#ifdef DEBUGGING
2259 int regindent;
2260#endif
2261} re_unwind_branch_t;
2262
2263typedef union re_unwind_t {
2264 I32 type;
2265 re_unwind_generic_t generic;
2266 re_unwind_branch_t branch;
2267} re_unwind_t;
2268
8ba1375e
MJD
2269#define sayYES goto yes
2270#define sayNO goto no
e0f9d4a8 2271#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2272#define sayYES_FINAL goto yes_final
2273#define sayYES_LOUD goto yes_loud
2274#define sayNO_FINAL goto no_final
2275#define sayNO_SILENT goto do_no
2276#define saySAME(x) if (x) goto yes; else goto no
2277
3ab3c9b4
HS
2278#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2279#define POSCACHE_SEEN 1 /* we know what we're caching */
2280#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2281#define CACHEsayYES STMT_START { \
2282 if (cache_offset | cache_bit) { \
2283 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2284 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2285 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2286 /* cache records failure, but this is success */ \
2287 DEBUG_r( \
2288 PerlIO_printf(Perl_debug_log, \
2289 "%*s (remove success from failure cache)\n", \
2290 REPORT_CODE_OFF+PL_regindent*2, "") \
2291 ); \
2292 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2293 } \
2294 } \
2295 sayYES; \
2296} STMT_END
2297#define CACHEsayNO STMT_START { \
2298 if (cache_offset | cache_bit) { \
2299 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2300 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2301 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2302 /* cache records success, but this is failure */ \
2303 DEBUG_r( \
2304 PerlIO_printf(Perl_debug_log, \
2305 "%*s (remove failure from success cache)\n", \
2306 REPORT_CODE_OFF+PL_regindent*2, "") \
2307 ); \
2308 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2309 } \
2310 } \
2311 sayNO; \
2312} STMT_END
2313
a3621e74
YO
2314/* this is used to determine how far from the left messages like
2315 'failed...' are printed. Currently 29 makes these messages line
2316 up with the opcode they refer to. Earlier perls used 25 which
2317 left these messages outdented making reviewing a debug output
2318 quite difficult.
2319*/
2320#define REPORT_CODE_OFF 29
2321
2322
2323/* Make sure there is a test for this +1 options in re_tests */
2324#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2325
2326#define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START { \
2327 if ( trie->states[ state ].wordnum ) { \
2328 if ( !accepted ) { \
2329 ENTER; \
2330 SAVETMPS; \
2331 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ; \
2332 sv_accept_buff=NEWSV( 1234, \
2333 bufflen * sizeof(reg_trie_accepted) - 1 ); \
2334 SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) ); \
2335 SvPOK_on( sv_accept_buff ); \
2336 sv_2mortal( sv_accept_buff ); \
2337 accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2338 } else { \
2339 if ( accepted >= bufflen ) { \
2340 bufflen *= 2; \
2341 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2342 bufflen * sizeof(reg_trie_accepted) ); \
2343 } \
2344 SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff ) \
2345 + sizeof( reg_trie_accepted ) ); \
2346 } \
2347 accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2348 accept_buff[ accepted ].endpos = uc; \
2349 ++accepted; \
2350 } } STMT_END
2351
2352#define TRIE_HANDLE_CHAR STMT_START { \
2353 if ( uvc < 256 ) { \
2354 charid = trie->charmap[ uvc ]; \
2355 } else { \
2356 charid = 0; \
2357 if( trie->widecharmap ) { \
2358 SV** svpp = (SV**)NULL; \
2359 svpp = hv_fetch( trie->widecharmap, (char*)&uvc, \
2360 sizeof( UV ), 0 ); \
2361 if ( svpp ) { \
2362 charid = (U16)SvIV( *svpp ); \
2363 } \
2364 } \
2365 } \
2366 if ( charid && \
cc601c31
YO
2367 ( base + charid > trie->uniquecharcount ) && \
2368 ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
a3621e74
YO
2369 trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2370 { \
2371 state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next; \
2372 } else { \
2373 state = 0; \
2374 } \
2375 uc += len; \
2376 } STMT_END
8ba1375e 2377
d6a28714
JH
2378/*
2379 - regmatch - main matching routine
2380 *
2381 * Conceptually the strategy is simple: check to see whether the current
2382 * node matches, call self recursively to see whether the rest matches,
2383 * and then act accordingly. In practice we make some effort to avoid
2384 * recursion, in particular by going through "ordinary" nodes (that don't
2385 * need to know whether the rest of the match failed) by a loop instead of
2386 * by recursion.
2387 */
2388/* [lwall] I've hoisted the register declarations to the outer block in order to
2389 * maybe save a little bit of pushing and popping on the stack. It also takes
2390 * advantage of machines that use a register save mask on subroutine entry.
2391 */
2392STATIC I32 /* 0 failure, 1 success */
2393S_regmatch(pTHX_ regnode *prog)
2394{
27da23d5 2395 dVAR;
d6a28714
JH
2396 register regnode *scan; /* Current node. */
2397 regnode *next; /* Next node. */
2398 regnode *inner; /* Next node in internal branch. */
2399 register I32 nextchr; /* renamed nextchr - nextchar colides with
2400 function of same name */
2401 register I32 n; /* no or next */
b7953727
JH
2402 register I32 ln = 0; /* len or last */
2403 register char *s = Nullch; /* operand or save */
d6a28714 2404 register char *locinput = PL_reginput;
b7953727 2405 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2406 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2407 I32 unwind = 0;
a3621e74
YO
2408
2409 /* used by the trie code */
cbbf8932
AL
2410 SV *sv_accept_buff = NULL; /* accepting states we have traversed */
2411 reg_trie_accepted *accept_buff = NULL; /* "" */
ab74612d
NC
2412 reg_trie_data *trie; /* what trie are we using right now */
2413 U32 accepted = 0; /* how many accepting states we have seen*/
a3621e74 2414
b7953727 2415#if 0
02db2b7b 2416 I32 firstcp = PL_savestack_ix;
b7953727 2417#endif
0d46e09a 2418 register const bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2419#ifdef DEBUGGING
ce333219
JH
2420 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2421 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2422 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
a3621e74 2423
ab74612d 2424 SV *re_debug_flags = NULL;
2a782b5b 2425#endif
041457d9 2426 U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
02db2b7b 2427
a3621e74
YO
2428 GET_RE_DEBUG_FLAGS;
2429
d6a28714
JH
2430#ifdef DEBUGGING
2431 PL_regindent++;
2432#endif
2433
a3621e74 2434
d6a28714
JH
2435 /* Note that nextchr is a byte even in UTF */
2436 nextchr = UCHARAT(locinput);
2437 scan = prog;
2438 while (scan != NULL) {
8ba1375e 2439
a3621e74 2440 DEBUG_EXECUTE_r( {
d6a28714 2441 SV *prop = sv_newmortal();
1df70142
AL
2442 const int docolor = *PL_colors[0];
2443 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2444 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2445 /* The part of the string before starttry has one color
2446 (pref0_len chars), between starttry and current
2447 position another one (pref_len - pref0_len chars),
2448 after the current position the third one.
2449 We assume that pref0_len <= pref_len, otherwise we
2450 decrease pref0_len. */
9041c2e3 2451 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2452 ? (5 + taill) - l : locinput - PL_bostr;
2453 int pref0_len;
d6a28714 2454
df1ffd02 2455 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2456 pref_len++;
2457 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2458 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2459 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2460 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2461 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2462 l--;
d6a28714
JH
2463 if (pref0_len < 0)
2464 pref0_len = 0;
2465 if (pref0_len > pref_len)
2466 pref0_len = pref_len;
2467 regprop(prop, scan);
2a782b5b 2468 {
1df70142 2469 const char * const s0 =
f14c76ed 2470 do_utf8 && OP(scan) != CANY ?
2a782b5b 2471 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2472 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2473 locinput - pref_len;
1df70142
AL
2474 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2475 const char * const s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2476 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2477 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2478 locinput - pref_len + pref0_len;
1df70142
AL
2479 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2480 const char * const s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2481 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2482 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2483 locinput;
1df70142 2484 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2485 PerlIO_printf(Perl_debug_log,
2486 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2487 (IV)(locinput - PL_bostr),
2488 PL_colors[4],
2489 len0, s0,
2490 PL_colors[5],
2491 PL_colors[2],
2492 len1, s1,
2493 PL_colors[3],
2494 (docolor ? "" : "> <"),
2495 PL_colors[0],
2496 len2, s2,
2497 PL_colors[1],
2498 15 - l - pref_len + 1,
2499 "",
2500 (IV)(scan - PL_regprogram), PL_regindent*2, "",
3f7c398e 2501 SvPVX_const(prop));
2a782b5b
JH
2502 }
2503 });
d6a28714
JH
2504
2505 next = scan + NEXT_OFF(scan);
2506 if (next == scan)
2507 next = NULL;
2508
2509 switch (OP(scan)) {
2510 case BOL:
7fba1cd6 2511 if (locinput == PL_bostr)
d6a28714
JH
2512 {
2513 /* regtill = regbol; */
b8c5462f
JH
2514 break;
2515 }
d6a28714
JH
2516 sayNO;
2517 case MBOL:
12d33761
HS
2518 if (locinput == PL_bostr ||
2519 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2520 {
b8c5462f
JH
2521 break;
2522 }
d6a28714
JH
2523 sayNO;
2524 case SBOL:
c2a73568 2525 if (locinput == PL_bostr)
b8c5462f 2526 break;
d6a28714
JH
2527 sayNO;
2528 case GPOS:
2529 if (locinput == PL_reg_ganch)
2530 break;
2531 sayNO;
2532 case EOL:
d6a28714
JH
2533 goto seol;
2534 case MEOL:
d6a28714 2535 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2536 sayNO;
b8c5462f 2537 break;
d6a28714
JH
2538 case SEOL:
2539 seol:
2540 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2541 sayNO;
d6a28714 2542 if (PL_regeol - locinput > 1)
b8c5462f 2543 sayNO;
b8c5462f 2544 break;
d6a28714
JH
2545 case EOS:
2546 if (PL_regeol != locinput)
b8c5462f 2547 sayNO;
d6a28714 2548 break;
ffc61ed2 2549 case SANY:
d6a28714 2550 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2551 sayNO;
f33976b4
DB
2552 if (do_utf8) {
2553 locinput += PL_utf8skip[nextchr];
2554 if (locinput > PL_regeol)
2555 sayNO;
2556 nextchr = UCHARAT(locinput);
2557 }
2558 else
2559 nextchr = UCHARAT(++locinput);
2560 break;
2561 case CANY:
2562 if (!nextchr && locinput >= PL_regeol)
2563 sayNO;
b8c5462f 2564 nextchr = UCHARAT(++locinput);
a0d0e21e 2565 break;
ffc61ed2 2566 case REG_ANY:
1aa99e6b
IH
2567 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2568 sayNO;
2569 if (do_utf8) {
b8c5462f 2570 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2571 if (locinput > PL_regeol)
2572 sayNO;
a0ed51b3 2573 nextchr = UCHARAT(locinput);
a0ed51b3 2574 }
1aa99e6b
IH
2575 else
2576 nextchr = UCHARAT(++locinput);
a0ed51b3 2577 break;
a3621e74
YO
2578
2579
2580
2581 /*
2582 traverse the TRIE keeping track of all accepting states
2583 we transition through until we get to a failing node.
2584
2585 we use two slightly different pieces of code to handle
2586 the traversal depending on whether its case sensitive or
2587 not. we reuse the accept code however. (this should probably
2588 be turned into a macro.)
2589
2590 */
2591 case TRIEF:
2592 case TRIEFL:
2593 {
a3621e74
YO
2594 U8 *uc = ( U8* )locinput;
2595 U32 state = 1;
2596 U16 charid = 0;
2597 U32 base = 0;
2598 UV uvc = 0;
2599 STRLEN len = 0;
2600 STRLEN foldlen = 0;
a3621e74
YO
2601 U8 *uscan = (U8*)NULL;
2602 STRLEN bufflen=0;
2603 accepted = 0;
2604
2605 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2606
2607 while ( state && uc <= (U8*)PL_regeol ) {
2608
2609 TRIE_CHECK_STATE_IS_ACCEPTING;
2610
2611 base = trie->states[ state ].trans.base;
2612
2613 DEBUG_TRIE_EXECUTE_r(
2614 PerlIO_printf( Perl_debug_log,
e4584336 2615 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2616 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e4584336 2617 (UV)state, (UV)base, (UV)accepted );
a3621e74
YO
2618 );
2619
2620 if ( base ) {
2621
108bb1ad 2622 if ( do_utf8 ) {
a3621e74
YO
2623 if ( foldlen>0 ) {
2624 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2625 foldlen -= len;
2626 uscan += len;
2627 len=0;
2628 } else {
1df70142 2629 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2630 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2631 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2632 foldlen -= UNISKIP( uvc );
2633 uscan = foldbuf + UNISKIP( uvc );
2634 }
2635 } else {
e4584336 2636 uvc = (UV)*uc;
a3621e74
YO
2637 len = 1;
2638 }
2639
2640 TRIE_HANDLE_CHAR;
2641
2642 } else {
2643 state = 0;
2644 }
2645 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2646 PerlIO_printf( Perl_debug_log,
2647 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2648 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2649 );
2650 }
2651 if ( !accepted ) {
2652 sayNO;
2653 } else {
2654 goto TrieAccept;
2655 }
2656 }
2657 /* unreached codepoint: we jump into the middle of the next case
2658 from previous if blocks */
2659 case TRIE:
2660 {
a3621e74
YO
2661 U8 *uc = (U8*)locinput;
2662 U32 state = 1;
2663 U16 charid = 0;
2664 U32 base = 0;
2665 UV uvc = 0;
2666 STRLEN len = 0;
2667 STRLEN bufflen = 0;
2668 accepted = 0;
2669
2670 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2671
2672 while ( state && uc <= (U8*)PL_regeol ) {
2673
2674 TRIE_CHECK_STATE_IS_ACCEPTING;
2675
2676 base = trie->states[ state ].trans.base;
2677
2678 DEBUG_TRIE_EXECUTE_r(
2679 PerlIO_printf( Perl_debug_log,
e4584336 2680 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2681 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e4584336 2682 (UV)state, (UV)base, (UV)accepted );
a3621e74
YO
2683 );
2684
2685 if ( base ) {
2686
108bb1ad 2687 if ( do_utf8 ) {
a3621e74
YO
2688 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2689 } else {
2690 uvc = (U32)*uc;
2691 len = 1;
2692 }
2693
2694 TRIE_HANDLE_CHAR;
2695
2696 } else {
2697 state = 0;
2698 }
2699 DEBUG_TRIE_EXECUTE_r(
2700 PerlIO_printf( Perl_debug_log,
e4584336
RB
2701 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2702 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2703 );
2704 }
2705 if ( !accepted ) {
2706 sayNO;
2707 }
2708 }
2709
2710
2711 /*
2712 There was at least one accepting state that we
2713 transitioned through. Presumably the number of accepting
2714 states is going to be low, typically one or two. So we
2715 simply scan through to find the one with lowest wordnum.
2716 Once we find it, we swap the last state into its place
2717 and decrement the size. We then try to match the rest of
2718 the pattern at the point where the word ends, if we
2719 succeed then we end the loop, otherwise the loop
2720 eventually terminates once all of the accepting states
2721 have been tried.
2722 */
2723 TrieAccept:
2724 {
2725 int gotit = 0;
2726
2727 if ( accepted == 1 ) {
2728 DEBUG_EXECUTE_r({
2729 SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2730 PerlIO_printf( Perl_debug_log,
2731 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2732 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2733 accept_buff[ 0 ].wordnum,
cfd0369c 2734 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2735 PL_colors[5] );
2736 });
cc601c31 2737 PL_reginput = (char *)accept_buff[ 0 ].endpos;
a3621e74
YO
2738 /* in this case we free tmps/leave before we call regmatch
2739 as we wont be using accept_buff again. */
2740 FREETMPS;
2741 LEAVE;
2742 gotit = regmatch( scan + NEXT_OFF( scan ) );
2743 } else {
2744 DEBUG_EXECUTE_r(
e4584336
RB
2745 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
2746 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
a3621e74
YO
2747 PL_colors[5] );
2748 );
2749 while ( !gotit && accepted-- ) {
2750 U32 best = 0;
2751 U32 cur;
2752 for( cur = 1 ; cur <= accepted ; cur++ ) {
e4584336
RB
2753 DEBUG_TRIE_EXECUTE_r(
2754 PerlIO_printf( Perl_debug_log,
2755 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2756 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2757 (IV)best, accept_buff[ best ].wordnum, (IV)cur,
2758 accept_buff[ cur ].wordnum, PL_colors[5] );
2759 );
a3621e74
YO
2760
2761 if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2762 best = cur;
2763 }
2764 DEBUG_EXECUTE_r({
2765 SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2766 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2767 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2768 accept_buff[best].wordnum,
cfd0369c 2769 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2770 PL_colors[5] );
2771 });
2772 if ( best<accepted ) {
2773 reg_trie_accepted tmp = accept_buff[ best ];
2774 accept_buff[ best ] = accept_buff[ accepted ];
2775 accept_buff[ accepted ] = tmp;
2776 best = accepted;
2777 }
cc601c31 2778 PL_reginput = (char *)accept_buff[ best ].endpos;
a3621e74
YO
2779
2780 /*
2781 as far as I can tell we only need the SAVETMPS/FREETMPS
2782 for re's with EVAL in them but I'm leaving them in for
2783 all until I can be sure.
2784 */
2785 SAVETMPS;
2786 gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2787 FREETMPS;
2788 }
2789 FREETMPS;
2790 LEAVE;
2791 }
2792
2793 if ( gotit ) {
2794 sayYES;
2795 } else {
2796 sayNO;
2797 }
2798 }
2799 /* unreached codepoint */
d6a28714 2800 case EXACT:
cd439c50
IZ
2801 s = STRING(scan);
2802 ln = STR_LEN(scan);
eb160463 2803 if (do_utf8 != UTF) {
bc517b45 2804 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2805 char *l = locinput;
a3b680e6 2806 const char *e = s + ln;
a72c7584 2807
5ff6fc6d
JH
2808 if (do_utf8) {
2809 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2810 while (s < e) {
a3b680e6 2811 STRLEN ulen;
1aa99e6b 2812 if (l >= PL_regeol)
5ff6fc6d
JH
2813 sayNO;
2814 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2815 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2816 uniflags))
5ff6fc6d 2817 sayNO;
bc517b45 2818 l += ulen;
5ff6fc6d 2819 s ++;
1aa99e6b 2820 }
5ff6fc6d
JH
2821 }
2822 else {
2823 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2824 while (s < e) {
a3b680e6 2825 STRLEN ulen;
1aa99e6b
IH
2826 if (l >= PL_regeol)
2827 sayNO;
5ff6fc6d 2828 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2829 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2830 uniflags))
1aa99e6b 2831 sayNO;
bc517b45 2832 s += ulen;
a72c7584 2833 l ++;
1aa99e6b 2834 }
5ff6fc6d 2835 }
1aa99e6b
IH
2836 locinput = l;
2837 nextchr = UCHARAT(locinput);
2838 break;
2839 }
bc517b45 2840 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2841 /* Inline the first character, for speed. */
2842 if (UCHARAT(s) != nextchr)
2843 sayNO;
2844 if (PL_regeol - locinput < ln)
2845 sayNO;
2846 if (ln > 1 && memNE(s, locinput, ln))
2847 sayNO;
2848 locinput += ln;
2849 nextchr = UCHARAT(locinput);
2850 break;
2851 case EXACTFL:
b8c5462f
JH
2852 PL_reg_flags |= RF_tainted;
2853 /* FALL THROUGH */
d6a28714 2854 case EXACTF:
cd439c50
IZ
2855 s = STRING(scan);
2856 ln = STR_LEN(scan);
d6a28714 2857
d07ddd77
JH
2858 if (do_utf8 || UTF) {
2859 /* Either target or the pattern are utf8. */
d6a28714 2860 char *l = locinput;
d07ddd77 2861 char *e = PL_regeol;
bc517b45 2862
eb160463 2863 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2864 l, &e, 0, do_utf8)) {
5486206c
JH
2865 /* One more case for the sharp s:
2866 * pack("U0U*", 0xDF) =~ /ss/i,
2867 * the 0xC3 0x9F are the UTF-8
2868 * byte sequence for the U+00DF. */
2869 if (!(do_utf8 &&
2870 toLOWER(s[0]) == 's' &&
2871 ln >= 2 &&
2872 toLOWER(s[1]) == 's' &&
2873 (U8)l[0] == 0xC3 &&
2874 e - l >= 2 &&
2875 (U8)l[1] == 0x9F))
2876 sayNO;
2877 }
d07ddd77
JH
2878 locinput = e;
2879 nextchr = UCHARAT(locinput);
2880 break;
a0ed51b3 2881 }
d6a28714 2882
bc517b45
JH
2883 /* Neither the target and the pattern are utf8. */
2884
d6a28714
JH
2885 /* Inline the first character, for speed. */
2886 if (UCHARAT(s) != nextchr &&
2887 UCHARAT(s) != ((OP(scan) == EXACTF)
2888 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2889 sayNO;
d6a28714 2890 if (PL_regeol - locinput < ln)
b8c5462f 2891 sayNO;
d6a28714
JH
2892 if (ln > 1 && (OP(scan) == EXACTF
2893 ? ibcmp(s, locinput, ln)
2894 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2895 sayNO;
d6a28714
JH
2896 locinput += ln;
2897 nextchr = UCHARAT(locinput);
a0d0e21e 2898 break;
d6a28714 2899 case ANYOF:
ffc61ed2 2900 if (do_utf8) {
9e55ce06
JH
2901 STRLEN inclasslen = PL_regeol - locinput;
2902
ba7b4546 2903 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2904 sayNO_ANYOF;
ffc61ed2
JH
2905 if (locinput >= PL_regeol)
2906 sayNO;
0f0076b4 2907 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2908 nextchr = UCHARAT(locinput);
e0f9d4a8 2909 break;
ffc61ed2
JH
2910 }
2911 else {
2912 if (nextchr < 0)
2913 nextchr = UCHARAT(locinput);
7d3e948e 2914 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2915 sayNO_ANYOF;
ffc61ed2
JH
2916 if (!nextchr && locinput >= PL_regeol)
2917 sayNO;
2918 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2919 break;
2920 }
2921 no_anyof:
2922 /* If we might have the case of the German sharp s
2923 * in a casefolding Unicode character class. */
2924
ebc501f0
JH
2925 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2926 locinput += SHARP_S_SKIP;
e0f9d4a8 2927 nextchr = UCHARAT(locinput);
ffc61ed2 2928 }
e0f9d4a8
JH
2929 else
2930 sayNO;
b8c5462f 2931 break;
d6a28714 2932 case ALNUML:
b8c5462f
JH
2933 PL_reg_flags |= RF_tainted;
2934 /* FALL THROUGH */
d6a28714 2935 case ALNUM:
b8c5462f 2936 if (!nextchr)
4633a7c4 2937 sayNO;
ffc61ed2 2938 if (do_utf8) {
1a4fad37 2939 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2940 if (!(OP(scan) == ALNUM
3568d838 2941 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2942 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2943 {
2944 sayNO;
a0ed51b3 2945 }
b8c5462f 2946 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2947 nextchr = UCHARAT(locinput);
2948 break;
2949 }
ffc61ed2 2950 if (!(OP(scan) == ALNUM
d6a28714 2951 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2952 sayNO;
b8c5462f 2953 nextchr = UCHARAT(++locinput);
a0d0e21e 2954 break;
d6a28714 2955 case NALNUML:
b8c5462f
JH
2956 PL_reg_flags |= RF_tainted;
2957 /* FALL THROUGH */
d6a28714
JH
2958 case NALNUM:
2959 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2960 sayNO;
ffc61ed2 2961 if (do_utf8) {
1a4fad37 2962 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2963 if (OP(scan) == NALNUM
3568d838 2964 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2965 : isALNUM_LC_utf8((U8*)locinput))
2966 {
b8c5462f 2967 sayNO;
d6a28714 2968 }
b8c5462f
JH
2969 locinput += PL_utf8skip[nextchr];
2970 nextchr = UCHARAT(locinput);
2971 break;
2972 }
ffc61ed2 2973 if (OP(scan) == NALNUM
d6a28714 2974 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2975 sayNO;
76e3520e 2976 nextchr = UCHARAT(++locinput);
a0d0e21e 2977 break;
d6a28714
JH
2978 case BOUNDL:
2979 case NBOUNDL:
3280af22 2980 PL_reg_flags |= RF_tainted;
bbce6d69 2981 /* FALL THROUGH */
d6a28714
JH
2982 case BOUND:
2983 case NBOUND:
2984 /* was last char in word? */
ffc61ed2 2985 if (do_utf8) {
12d33761
HS
2986 if (locinput == PL_bostr)
2987 ln = '\n';
ffc61ed2 2988 else {
a3b680e6 2989 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2990
b4f7163a 2991 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2992 }
2993 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2994 ln = isALNUM_uni(ln);
1a4fad37 2995 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 2996 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2997 }
2998 else {
9041c2e3 2999 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3000 n = isALNUM_LC_utf8((U8*)locinput);
3001 }
a0ed51b3 3002 }
d6a28714 3003 else {
12d33761
HS
3004 ln = (locinput != PL_bostr) ?
3005 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
3006 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3007 ln = isALNUM(ln);
3008 n = isALNUM(nextchr);
3009 }
3010 else {
3011 ln = isALNUM_LC(ln);
3012 n = isALNUM_LC(nextchr);
3013 }
d6a28714 3014 }
ffc61ed2
JH
3015 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3016 OP(scan) == BOUNDL))
3017 sayNO;
a0ed51b3 3018 break;
d6a28714 3019 case SPACEL:
3280af22 3020 PL_reg_flags |= RF_tainted;
bbce6d69 3021 /* FALL THROUGH */
d6a28714 3022 case SPACE:
9442cb0e 3023 if (!nextchr)
4633a7c4 3024 sayNO;
1aa99e6b 3025 if (do_utf8) {
fd400ab9 3026 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3027 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3028 if (!(OP(scan) == SPACE
3568d838 3029 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3030 : isSPACE_LC_utf8((U8*)locinput)))
3031 {
3032 sayNO;
3033 }
3034 locinput += PL_utf8skip[nextchr];
3035 nextchr = UCHARAT(locinput);
3036 break;
d6a28714 3037 }
ffc61ed2
JH
3038 if (!(OP(scan) == SPACE
3039 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3040 sayNO;
3041 nextchr = UCHARAT(++locinput);
3042 }
3043 else {
3044 if (!(OP(scan) == SPACE
3045 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3046 sayNO;
3047 nextchr = UCHARAT(++locinput);
a0ed51b3 3048 }
a0ed51b3 3049 break;
d6a28714 3050 case NSPACEL:
3280af22 3051 PL_reg_flags |= RF_tainted;
bbce6d69 3052 /* FALL THROUGH */
d6a28714 3053 case NSPACE:
9442cb0e 3054 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3055 sayNO;
1aa99e6b 3056 if (do_utf8) {
1a4fad37 3057 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3058 if (OP(scan) == NSPACE
3568d838 3059 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3060 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3061 {
3062 sayNO;
3063 }
3064 locinput += PL_utf8skip[nextchr];
3065 nextchr = UCHARAT(locinput);
3066 break;
a0ed51b3 3067 }
ffc61ed2 3068 if (OP(scan) == NSPACE
d6a28714 3069 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3070 sayNO;
76e3520e 3071 nextchr = UCHARAT(++locinput);
a0d0e21e 3072 break;
d6a28714 3073 case DIGITL:
a0ed51b3
LW
3074 PL_reg_flags |= RF_tainted;
3075 /* FALL THROUGH */
d6a28714 3076 case DIGIT:
9442cb0e 3077 if (!nextchr)
a0ed51b3 3078 sayNO;
1aa99e6b 3079 if (do_utf8) {
1a4fad37 3080 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3081 if (!(OP(scan) == DIGIT
3568d838 3082 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3083 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3084 {
a0ed51b3 3085 sayNO;
dfe13c55 3086 }
6f06b55f 3087 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3088 nextchr = UCHARAT(locinput);
3089 break;
3090 }
ffc61ed2 3091 if (!(OP(scan) == DIGIT
9442cb0e 3092 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3093 sayNO;
3094 nextchr = UCHARAT(++locinput);
3095 break;
d6a28714 3096 case NDIGITL:
b8c5462f
JH
3097 PL_reg_flags |= RF_tainted;
3098 /* FALL THROUGH */
d6a28714 3099 case NDIGIT:
9442cb0e 3100 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3101 sayNO;
1aa99e6b 3102 if (do_utf8) {
1a4fad37 3103 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3104 if (OP(scan) == NDIGIT
3568d838 3105 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3106 : isDIGIT_LC_utf8((U8*)locinput))
3107 {
a0ed51b3 3108 sayNO;
9442cb0e 3109 }
6f06b55f 3110 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3111 nextchr = UCHARAT(locinput);
3112 break;
3113 }
ffc61ed2 3114 if (OP(scan) == NDIGIT
9442cb0e 3115 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3116 sayNO;
3117 nextchr = UCHARAT(++locinput);
3118 break;
3119 case CLUMP:
b7c83a7e 3120 if (locinput >= PL_regeol)
a0ed51b3 3121 sayNO;
b7c83a7e 3122 if (do_utf8) {
1a4fad37 3123 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3124 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3125 sayNO;
3126 locinput += PL_utf8skip[nextchr];
3127 while (locinput < PL_regeol &&
3128 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3129 locinput += UTF8SKIP(locinput);
3130 if (locinput > PL_regeol)
3131 sayNO;
eb08e2da
JH
3132 }
3133 else
3134 locinput++;
a0ed51b3
LW
3135 nextchr = UCHARAT(locinput);
3136 break;
c8756f30 3137 case REFFL:
3280af22 3138 PL_reg_flags |= RF_tainted;
c8756f30 3139 /* FALL THROUGH */
c277df42 3140 case REF:
c8756f30 3141 case REFF:
c277df42 3142 n = ARG(scan); /* which paren pair */
cf93c79d 3143 ln = PL_regstartp[n];
2c2d71f5 3144 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 3145 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 3146 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3147 if (ln == PL_regendp[n])
a0d0e21e 3148 break;
a0ed51b3 3149
cf93c79d 3150 s = PL_bostr + ln;
1aa99e6b 3151 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3152 char *l = locinput;
a3b680e6 3153 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3154 /*
3155 * Note that we can't do the "other character" lookup trick as
3156 * in the 8-bit case (no pun intended) because in Unicode we
3157 * have to map both upper and title case to lower case.
3158 */
3159 if (OP(scan) == REFF) {
3160 while (s < e) {
a3b680e6
AL
3161 STRLEN ulen1, ulen2;
3162 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3163 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3164
a0ed51b3
LW
3165 if (l >= PL_regeol)
3166 sayNO;
a2a2844f
JH
3167 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3168 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3169 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3170 sayNO;
a2a2844f
JH
3171 s += ulen1;
3172 l += ulen2;
a0ed51b3
LW
3173 }
3174 }
3175 locinput = l;
3176 nextchr = UCHARAT(locinput);
3177 break;
3178 }
3179
a0d0e21e 3180 /* Inline the first character, for speed. */
76e3520e 3181 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3182 (OP(scan) == REF ||
3183 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3184 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3185 sayNO;
cf93c79d 3186 ln = PL_regendp[n] - ln;
3280af22 3187 if (locinput + ln > PL_regeol)
4633a7c4 3188 sayNO;
c8756f30
AK
3189 if (ln > 1 && (OP(scan) == REF
3190 ? memNE(s, locinput, ln)
3191 : (OP(scan) == REFF
3192 ? ibcmp(s, locinput, ln)
3193 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3194 sayNO;
a0d0e21e 3195 locinput += ln;
76e3520e 3196 nextchr = UCHARAT(locinput);
a0d0e21e
LW
3197 break;
3198
3199 case NOTHING:
c277df42 3200 case TAIL:
a0d0e21e
LW
3201 break;
3202 case BACK:
3203 break;
c277df42
IZ
3204 case EVAL:
3205 {
3206 dSP;
533c011a 3207 OP_4tree *oop = PL_op;
3280af22 3208 COP *ocurcop = PL_curcop;
f3548bdc 3209 PAD *old_comppad;
c277df42 3210 SV *ret;
080c2dec 3211 struct regexp *oreg = PL_reg_re;
9041c2e3 3212
c277df42 3213 n = ARG(scan);
533c011a 3214 PL_op = (OP_4tree*)PL_regdata->data[n];
a3621e74 3215 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 3216 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 3217 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3218
8e5e9ebe
RGS
3219 {
3220 SV **before = SP;
3221 CALLRUNOPS(aTHX); /* Scalar context. */
3222 SPAGAIN;
3223 if (SP == before)
075aa684 3224 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3225 else {
3226 ret = POPs;
3227 PUTBACK;
3228 }
3229 }
3230
0f5d15d6 3231 PL_op = oop;
f3548bdc 3232 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 3233 PL_curcop = ocurcop;
c277df42 3234 if (logical) {
0f5d15d6
IZ
3235 if (logical == 2) { /* Postponed subexpression. */
3236 regexp *re;
22c35a8c 3237 MAGIC *mg = Null(MAGIC*);
0f5d15d6 3238 re_cc_state state;
0f5d15d6 3239 CHECKPOINT cp, lastcp;
cb50f42d 3240 int toggleutf;
faf82a0b 3241 register SV *sv;
0f5d15d6 3242
faf82a0b
AE
3243 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3244 mg = mg_find(sv, PERL_MAGIC_qr);
3245 else if (SvSMAGICAL(ret)) {
3246 if (SvGMAGICAL(ret))
3247 sv_unmagic(ret, PERL_MAGIC_qr);
3248 else
3249 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3250 }
faf82a0b 3251
0f5d15d6
IZ
3252 if (mg) {
3253 re = (regexp *)mg->mg_obj;
df0003d4 3254 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3255 }
3256 else {
3257 STRLEN len;
83003860 3258 const char *t = SvPV_const(ret, len);
0f5d15d6 3259 PMOP pm;
a3b680e6
AL
3260 char * const oprecomp = PL_regprecomp;
3261 const I32 osize = PL_regsize;
3262 const I32 onpar = PL_regnpar;
0f5d15d6 3263
5fcd1c1b 3264 Zero(&pm, 1, PMOP);
cb50f42d 3265 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3266 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3267 if (!(SvFLAGS(ret)
faf82a0b
AE
3268 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3269 | SVs_GMG)))
14befaf4
DM
3270 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3271 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
3272 PL_regprecomp = oprecomp;
3273 PL_regsize = osize;
3274 PL_regnpar = onpar;
3275 }
a3621e74 3276 DEBUG_EXECUTE_r(
9041c2e3 3277 PerlIO_printf(Perl_debug_log,
a0288114 3278 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
3279 PL_colors[0],
3280 re->precomp,
3281 PL_colors[1],
3282 (strlen(re->precomp) > 60 ? "..." : ""))
3283 );
3284 state.node = next;
3285 state.prev = PL_reg_call_cc;
3286 state.cc = PL_regcc;
3287 state.re = PL_reg_re;
3288
2ab05381 3289 PL_regcc = 0;
9041c2e3 3290
0f5d15d6 3291 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3292 REGCP_SET(lastcp);
0f5d15d6
IZ
3293 cache_re(re);
3294 state.ss = PL_savestack_ix;
3295 *PL_reglastparen = 0;
a01268b5 3296 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
3297 PL_reg_call_cc = &state;
3298 PL_reginput = locinput;
cb50f42d
YST
3299 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3300 ((re->reganch & ROPT_UTF8) != 0);
3301 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3302
3303 /* XXXX This is too dramatic a measure... */
3304 PL_reg_maxiter = 0;
3305
0f5d15d6 3306 if (regmatch(re->program + 1)) {
2c914db6
IZ
3307 /* Even though we succeeded, we need to restore
3308 global variables, since we may be wrapped inside
3309 SUSPEND, thus the match may be not finished yet. */
3310
3311 /* XXXX Do this only if SUSPENDed? */
3312 PL_reg_call_cc = state.prev;
3313 PL_regcc = state.cc;
3314 PL_reg_re = state.re;
3315 cache_re(PL_reg_re);
cb50f42d 3316 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
3317
3318 /* XXXX This is too dramatic a measure... */
3319 PL_reg_maxiter = 0;
3320
3321 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3322 ReREFCNT_dec(re);
3323 regcpblow(cp);
3324 sayYES;
3325 }
0f5d15d6 3326 ReREFCNT_dec(re);
02db2b7b 3327 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3328 regcppop();
3329 PL_reg_call_cc = state.prev;
3330 PL_regcc = state.cc;
3331 PL_reg_re = state.re;
d3790889 3332 cache_re(PL_reg_re);
cb50f42d 3333 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3334
3335 /* XXXX This is too dramatic a measure... */
3336 PL_reg_maxiter = 0;
3337
8e514ae6 3338 logical = 0;
0f5d15d6
IZ
3339 sayNO;
3340 }
c277df42 3341 sw = SvTRUE(ret);
0f5d15d6 3342 logical = 0;
a0ed51b3 3343 }
080c2dec 3344 else {
3280af22 3345 sv_setsv(save_scalar(PL_replgv), ret);
080c2dec
AE
3346 cache_re(oreg);
3347 }
c277df42
IZ
3348 break;
3349 }
a0d0e21e 3350 case OPEN:
c277df42 3351 n = ARG(scan); /* which paren pair */
3280af22
NIS
3352 PL_reg_start_tmp[n] = locinput;
3353 if (n > PL_regsize)
3354 PL_regsize = n;
a0d0e21e
LW
3355 break;
3356 case CLOSE:
c277df42 3357 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3358 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3359 PL_regendp[n] = locinput - PL_bostr;
eb160463 3360 if (n > (I32)*PL_reglastparen)
3280af22 3361 *PL_reglastparen = n;
a01268b5 3362 *PL_reglastcloseparen = n;
a0d0e21e 3363 break;
c277df42
IZ
3364 case GROUPP:
3365 n = ARG(scan); /* which paren pair */
eb160463 3366 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3367 break;
3368 case IFTHEN:
2c2d71f5 3369 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3370 if (sw)
3371 next = NEXTOPER(NEXTOPER(scan));
3372 else {
3373 next = scan + ARG(scan);
3374 if (OP(next) == IFTHEN) /* Fake one. */
3375 next = NEXTOPER(NEXTOPER(next));
3376 }
3377 break;
3378 case LOGICAL:
0f5d15d6 3379 logical = scan->flags;
c277df42 3380 break;
2ab05381
IZ
3381/*******************************************************************
3382 PL_regcc contains infoblock about the innermost (...)* loop, and
3383 a pointer to the next outer infoblock.
3384
3385 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3386
3387 1) After matching X, regnode for CURLYX is processed;
3388
9041c2e3 3389 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
3390 regmatch() recursively with the starting point at WHILEM node;
3391
3392 3) Each hit of WHILEM node tries to match A and Z (in the order
3393 depending on the current iteration, min/max of {min,max} and
3394 greediness). The information about where are nodes for "A"
3395 and "Z" is read from the infoblock, as is info on how many times "A"
3396 was already matched, and greediness.
3397
3398 4) After A matches, the same WHILEM node is hit again.
3399
3400 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3401 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3402 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3403 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3404 of the external loop.
3405
3406 Currently present infoblocks form a tree with a stem formed by PL_curcc
3407 and whatever it mentions via ->next, and additional attached trees
3408 corresponding to temporarily unset infoblocks as in "5" above.
3409
9041c2e3 3410 In the following picture infoblocks for outer loop of
2ab05381
IZ
3411 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3412 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3413 infoblocks are drawn below the "reset" infoblock.
3414
3415 In fact in the picture below we do not show failed matches for Z and T
3416 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3417 more obvious *why* one needs to *temporary* unset infoblocks.]
3418
3419 Matched REx position InfoBlocks Comment
3420 (Y(A)*?Z)*?T x
3421 Y(A)*?Z)*?T x <- O
3422 Y (A)*?Z)*?T x <- O
3423 Y A)*?Z)*?T x <- O <- I
3424 YA )*?Z)*?T x <- O <- I
3425 YA A)*?Z)*?T x <- O <- I
3426 YAA )*?Z)*?T x <- O <- I
3427 YAA Z)*?T x <- O # Temporary unset I
3428 I
3429
3430 YAAZ Y(A)*?Z)*?T x <- O
3431 I
3432
3433 YAAZY (A)*?Z)*?T x <- O
3434 I
3435
3436 YAAZY A)*?Z)*?T x <- O <- I
3437 I
3438
3439 YAAZYA )*?Z)*?T x <- O <- I
3440 I
3441
3442 YAAZYA Z)*?T x <- O # Temporary unset I
3443 I,I
3444
3445 YAAZYAZ )*?T x <- O
3446 I,I
3447
3448 YAAZYAZ T x # Temporary unset O
3449 O
3450 I,I
3451
3452 YAAZYAZT x
3453 O
3454 I,I
3455 *******************************************************************/
a0d0e21e
LW
3456 case CURLYX: {
3457 CURCUR cc;
3280af22 3458 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3459 /* No need to save/restore up to this paren */
3460 I32 parenfloor = scan->flags;
c277df42
IZ
3461
3462 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3463 next += ARG(next);
3280af22
NIS
3464 cc.oldcc = PL_regcc;
3465 PL_regcc = &cc;
cb434fcc
IZ
3466 /* XXXX Probably it is better to teach regpush to support
3467 parenfloor > PL_regsize... */
eb160463 3468 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3469 parenfloor = *PL_reglastparen; /* Pessimization... */
3470 cc.parenfloor = parenfloor;
a0d0e21e
LW
3471 cc.cur = -1;
3472 cc.min = ARG1(scan);
3473 cc.max = ARG2(scan);
c277df42 3474 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3475 cc.next = next;
3476 cc.minmod = minmod;
3477 cc.lastloc = 0;
3280af22 3478 PL_reginput = locinput;
a0d0e21e
LW
3479 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3480 regcpblow(cp);
3280af22 3481 PL_regcc = cc.oldcc;
4633a7c4 3482 saySAME(n);
a0d0e21e
LW
3483 }
3484 /* NOT REACHED */
3485 case WHILEM: {
3486 /*
3487 * This is really hard to understand, because after we match
3488 * what we're trying to match, we must make sure the rest of
2c2d71f5 3489 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3490 * to go back UP the parse tree by recursing ever deeper. And
3491 * if it fails, we have to reset our parent's current state
3492 * that we can try again after backing off.
3493 */
3494
c277df42 3495 CHECKPOINT cp, lastcp;
3280af22 3496 CURCUR* cc = PL_regcc;
c277df42 3497 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3ab3c9b4 3498 I32 cache_offset = 0, cache_bit = 0;
c277df42 3499
4633a7c4 3500 n = cc->cur + 1; /* how many we know we matched */
3280af22 3501 PL_reginput = locinput;
a0d0e21e 3502
a3621e74 3503 DEBUG_EXECUTE_r(
9041c2e3 3504 PerlIO_printf(Perl_debug_log,
91f3b821 3505 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3506 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3507 (long)n, (long)cc->min,
2797576d 3508 (long)cc->max, PTR2UV(cc))
c277df42 3509 );
4633a7c4 3510
a0d0e21e
LW
3511 /* If degenerate scan matches "", assume scan done. */
3512
579cf2c3 3513 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3514 PL_regcc = cc->oldcc;
2ab05381
IZ
3515 if (PL_regcc)
3516 ln = PL_regcc->cur;
a3621e74 3517 DEBUG_EXECUTE_r(
c3464db5
DD
3518 PerlIO_printf(Perl_debug_log,
3519 "%*s empty match detected, try continuation...\n",
3280af22 3520 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3521 );
a0d0e21e 3522 if (regmatch(cc->next))
4633a7c4 3523 sayYES;
2ab05381
IZ
3524 if (PL_regcc)
3525 PL_regcc->cur = ln;
3280af22 3526 PL_regcc = cc;
4633a7c4 3527 sayNO;
a0d0e21e
LW
3528 }
3529
3530 /* First just match a string of min scans. */
3531
3532 if (n < cc->min) {
3533 cc->cur = n;
3534 cc->lastloc = locinput;
4633a7c4
LW
3535 if (regmatch(cc->scan))
3536 sayYES;
3537 cc->cur = n - 1;
c277df42 3538 cc->lastloc = lastloc;
4633a7c4 3539 sayNO;
a0d0e21e
LW
3540 }
3541
2c2d71f5
JH
3542 if (scan->flags) {
3543 /* Check whether we already were at this position.
3544 Postpone detection until we know the match is not
3545 *that* much linear. */
3546 if (!PL_reg_maxiter) {
3547 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3548 PL_reg_leftiter = PL_reg_maxiter;
3549 }
3550 if (PL_reg_leftiter-- == 0) {
a3b680e6 3551 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3552 if (PL_reg_poscache) {
eb160463 3553 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3554 Renew(PL_reg_poscache, size, char);
3555 PL_reg_poscache_size = size;
3556 }
3557 Zero(PL_reg_poscache, size, char);
3558 }
3559 else {
3560 PL_reg_poscache_size = size;
a02a5408 3561 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3562 }
a3621e74 3563 DEBUG_EXECUTE_r(
2c2d71f5
JH
3564 PerlIO_printf(Perl_debug_log,
3565 "%sDetected a super-linear match, switching on caching%s...\n",
3566 PL_colors[4], PL_colors[5])
3567 );
3568 }
3569 if (PL_reg_leftiter < 0) {
3ab3c9b4 3570 cache_offset = locinput - PL_bostr;
2c2d71f5 3571
3ab3c9b4
HS
3572 cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3573 + cache_offset * (scan->flags>>4);
3574 cache_bit = cache_offset % 8;
3575 cache_offset /= 8;
3576 if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
a3621e74 3577 DEBUG_EXECUTE_r(
2c2d71f5
JH
3578 PerlIO_printf(Perl_debug_log,
3579 "%*s already tried at this position...\n",
3580 REPORT_CODE_OFF+PL_regindent*2, "")
3581 );
3ab3c9b4
HS
3582 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3583 /* cache records success */
c2b0868c
HS
3584 sayYES;
3585 else
3ab3c9b4 3586 /* cache records failure */
c2b0868c 3587 sayNO_SILENT;
2c2d71f5 3588 }
3ab3c9b4 3589 PL_reg_poscache[cache_offset] |= (1<<cache_bit);
2c2d71f5
JH
3590 }
3591 }
3592
a0d0e21e
LW
3593 /* Prefer next over scan for minimal matching. */
3594
3595 if (cc->minmod) {
3280af22 3596 PL_regcc = cc->oldcc;
2ab05381
IZ
3597 if (PL_regcc)
3598 ln = PL_regcc->cur;
5f05dabc 3599 cp = regcppush(cc->parenfloor);
02db2b7b 3600 REGCP_SET(lastcp);
5f05dabc 3601 if (regmatch(cc->next)) {
c277df42 3602 regcpblow(cp);
3ab3c9b4 3603 CACHEsayYES; /* All done. */
5f05dabc 3604 }
02db2b7b 3605 REGCP_UNWIND(lastcp);
5f05dabc 3606 regcppop();
2ab05381
IZ
3607 if (PL_regcc)
3608 PL_regcc->cur = ln;
3280af22 3609 PL_regcc = cc;
a0d0e21e 3610
c277df42 3611 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3612 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3613 && !(PL_reg_flags & RF_warned)) {
3614 PL_reg_flags |= RF_warned;
9014280d 3615 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3616 "Complex regular subexpression recursion",
3617 REG_INFTY - 1);
c277df42 3618 }
3ab3c9b4 3619 CACHEsayNO;
c277df42 3620 }
a687059c 3621
a3621e74 3622 DEBUG_EXECUTE_r(
c3464db5
DD
3623 PerlIO_printf(Perl_debug_log,
3624 "%*s trying longer...\n",
3280af22 3625 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3626 );
a0d0e21e 3627 /* Try scanning more and see if it helps. */
3280af22 3628 PL_reginput = locinput;
a0d0e21e
LW
3629 cc->cur = n;
3630 cc->lastloc = locinput;
5f05dabc 3631 cp = regcppush(cc->parenfloor);
02db2b7b 3632 REGCP_SET(lastcp);
5f05dabc 3633 if (regmatch(cc->scan)) {
c277df42 3634 regcpblow(cp);
3ab3c9b4 3635 CACHEsayYES;
5f05dabc 3636 }
02db2b7b 3637 REGCP_UNWIND(lastcp);
5f05dabc 3638 regcppop();
4633a7c4 3639 cc->cur = n - 1;
c277df42 3640 cc->lastloc = lastloc;
3ab3c9b4 3641 CACHEsayNO;
a0d0e21e
LW
3642 }
3643
3644 /* Prefer scan over next for maximal matching. */
3645
3646 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3647 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3648 cc->cur = n;
3649 cc->lastloc = locinput;
02db2b7b 3650 REGCP_SET(lastcp);
5f05dabc 3651 if (regmatch(cc->scan)) {
c277df42 3652 regcpblow(cp);
3ab3c9b4 3653 CACHEsayYES;
5f05dabc 3654 }
02db2b7b 3655 REGCP_UNWIND(lastcp);
a0d0e21e 3656 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3657 PL_reginput = locinput;
a3621e74 3658 DEBUG_EXECUTE_r(
c3464db5
DD
3659 PerlIO_printf(Perl_debug_log,
3660 "%*s failed, try continuation...\n",
3280af22 3661 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3662 );
3663 }
9041c2e3 3664 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3665 && !(PL_reg_flags & RF_warned)) {
3280af22 3666 PL_reg_flags |= RF_warned;
9014280d 3667 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3668 "Complex regular subexpression recursion",
3669 REG_INFTY - 1);
a0d0e21e
LW
3670 }
3671
3672 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3673 PL_regcc = cc->oldcc;
2ab05381
IZ
3674 if (PL_regcc)
3675 ln = PL_regcc->cur;
a0d0e21e 3676 if (regmatch(cc->next))
3ab3c9b4 3677 CACHEsayYES;
2ab05381
IZ
3678 if (PL_regcc)
3679 PL_regcc->cur = ln;
3280af22 3680 PL_regcc = cc;
4633a7c4 3681 cc->cur = n - 1;
c277df42 3682 cc->lastloc = lastloc;
3ab3c9b4 3683 CACHEsayNO;
a0d0e21e
LW
3684 }
3685 /* NOT REACHED */
9041c2e3 3686 case BRANCHJ:
c277df42
IZ
3687 next = scan + ARG(scan);
3688 if (next == scan)
3689 next = NULL;
3690 inner = NEXTOPER(NEXTOPER(scan));
3691 goto do_branch;
9041c2e3 3692 case BRANCH:
c277df42
IZ
3693 inner = NEXTOPER(scan);
3694 do_branch:
3695 {
c277df42
IZ
3696 c1 = OP(scan);
3697 if (OP(next) != c1) /* No choice. */
3698 next = inner; /* Avoid recursion. */
a0d0e21e 3699 else {
a3b680e6 3700 const I32 lastparen = *PL_reglastparen;
02db2b7b
IZ
3701 I32 unwind1;
3702 re_unwind_branch_t *uw;
3703
3704 /* Put unwinding data on stack */
3705 unwind1 = SSNEWt(1,re_unwind_branch_t);
3706 uw = SSPTRt(unwind1,re_unwind_branch_t);
3707 uw->prev = unwind;
3708 unwind = unwind1;
3709 uw->type = ((c1 == BRANCH)
3710 ? RE_UNWIND_BRANCH
3711 : RE_UNWIND_BRANCHJ);
3712 uw->lastparen = lastparen;
3713 uw->next = next;
3714 uw->locinput = locinput;
3715 uw->nextchr = nextchr;
3716#ifdef DEBUGGING
3717 uw->regindent = ++PL_regindent;
3718#endif
c277df42 3719
02db2b7b
IZ
3720 REGCP_SET(uw->lastcp);
3721
3722 /* Now go into the first branch */
3723 next = inner;
a687059c 3724 }
a0d0e21e
LW
3725 }
3726 break;
3727 case MINMOD:
3728 minmod = 1;
3729 break;
c277df42
IZ
3730 case CURLYM:
3731 {
00db4c45 3732 I32 l = 0;
c277df42 3733 CHECKPOINT lastcp;
9041c2e3 3734
c277df42 3735 /* We suppose that the next guy does not need
0e788c72 3736 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3737 and has no parenths to influence future backrefs. */
3738 ln = ARG1(scan); /* min to match */
3739 n = ARG2(scan); /* max to match */
c277df42
IZ
3740 paren = scan->flags;
3741 if (paren) {
3280af22
NIS
3742 if (paren > PL_regsize)
3743 PL_regsize = paren;
eb160463 3744 if (paren > (I32)*PL_reglastparen)
3280af22 3745 *PL_reglastparen = paren;
c277df42 3746 }
dc45a647 3747 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3748 if (paren)
3749 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3750 PL_reginput = locinput;
c277df42
IZ
3751 if (minmod) {
3752 minmod = 0;
3753 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3754 sayNO;
3280af22 3755 locinput = PL_reginput;
cca55fe3 3756 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3757 regnode *text_node = next;
3758
cca55fe3 3759 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3760
cca55fe3 3761 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3762 else {
cca55fe3 3763 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3764 c1 = c2 = -1000;
3765 goto assume_ok_MM;
cca55fe3
JP
3766 }
3767 else { c1 = (U8)*STRING(text_node); }
af5decee 3768 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3769 c2 = PL_fold[c1];
af5decee 3770 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3771 c2 = PL_fold_locale[c1];
3772 else
3773 c2 = c1;
3774 }
a0ed51b3
LW
3775 }
3776 else
c277df42 3777 c1 = c2 = -1000;
cca55fe3 3778 assume_ok_MM:
02db2b7b 3779 REGCP_SET(lastcp);
0e788c72 3780 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3781 /* If it could work, try it. */
3782 if (c1 == -1000 ||
3280af22
NIS
3783 UCHARAT(PL_reginput) == c1 ||
3784 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3785 {
3786 if (paren) {
f31a99c8 3787 if (ln) {
cf93c79d
IZ
3788 PL_regstartp[paren] =
3789 HOPc(PL_reginput, -l) - PL_bostr;
3790 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3791 }
3792 else
cf93c79d 3793 PL_regendp[paren] = -1;
c277df42
IZ
3794 }
3795 if (regmatch(next))
3796 sayYES;
02db2b7b 3797 REGCP_UNWIND(lastcp);
c277df42
IZ
3798 }
3799 /* Couldn't or didn't -- move forward. */
3280af22 3800 PL_reginput = locinput;
c277df42
IZ
3801 if (regrepeat_hard(scan, 1, &l)) {
3802 ln++;
3280af22 3803 locinput = PL_reginput;
c277df42
IZ
3804 }
3805 else
3806 sayNO;
3807 }
a0ed51b3
LW
3808 }
3809 else {
c277df42 3810 n = regrepeat_hard(scan, n, &l);
3280af22 3811 locinput = PL_reginput;
a3621e74 3812 DEBUG_EXECUTE_r(
5c0ca799 3813 PerlIO_printf(Perl_debug_log,
faccc32b 3814 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3815 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3816 (IV) n, (IV)l)
c277df42
IZ
3817 );
3818 if (n >= ln) {
cca55fe3 3819 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3820 regnode *text_node = next;
3821
cca55fe3 3822 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3823
cca55fe3 3824 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3825 else {
cca55fe3 3826 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3827 c1 = c2 = -1000;
3828 goto assume_ok_REG;
cca55fe3
JP
3829 }
3830 else { c1 = (U8)*STRING(text_node); }
3831
af5decee 3832 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3833 c2 = PL_fold[c1];
af5decee 3834 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3835 c2 = PL_fold_locale[c1];
3836 else
3837 c2 = c1;
3838 }
a0ed51b3
LW
3839 }
3840 else
c277df42
IZ
3841 c1 = c2 = -1000;
3842 }
cca55fe3 3843 assume_ok_REG:
02db2b7b 3844 REGCP_SET(lastcp);
c277df42
IZ
3845 while (n >= ln) {
3846 /* If it could work, try it. */
3847 if (c1 == -1000 ||
3280af22
NIS
3848 UCHARAT(PL_reginput) == c1 ||
3849 UCHARAT(PL_reginput) == c2)
a0ed51b3 3850 {
a3621e74 3851 DEBUG_EXECUTE_r(
c3464db5 3852 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3853 "%*s trying tail with n=%"IVdf"...\n",
3854 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3855 );
3856 if (paren) {
3857 if (n) {
cf93c79d
IZ
3858 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3859 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3860 }
a0ed51b3 3861 else
cf93c79d 3862 PL_regendp[paren] = -1;
c277df42 3863 }
a0ed51b3
LW
3864 if (regmatch(next))
3865 sayYES;
02db2b7b 3866 REGCP_UNWIND(lastcp);
a0ed51b3 3867 }
c277df42
IZ
3868 /* Couldn't or didn't -- back up. */
3869 n--;
dfe13c55 3870 locinput = HOPc(locinput, -l);
3280af22 3871 PL_reginput = locinput;
c277df42
IZ
3872 }
3873 }
3874 sayNO;
3875 break;
3876 }
3877 case CURLYN:
3878 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3879 if (paren > PL_regsize)
3880 PL_regsize = paren;
eb160463 3881 if (paren > (I32)*PL_reglastparen)
3280af22 3882 *PL_reglastparen = paren;
c277df42
IZ
3883 ln = ARG1(scan); /* min to match */
3884 n = ARG2(scan); /* max to match */
dc45a647 3885 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3886 goto repeat;
a0d0e21e 3887 case CURLY:
c277df42 3888 paren = 0;
a0d0e21e
LW
3889 ln = ARG1(scan); /* min to match */
3890 n = ARG2(scan); /* max to match */
dc45a647 3891 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3892 goto repeat;
3893 case STAR:
3894 ln = 0;
c277df42 3895 n = REG_INFTY;
a0d0e21e 3896 scan = NEXTOPER(scan);
c277df42 3897 paren = 0;
a0d0e21e
LW
3898 goto repeat;
3899 case PLUS:
c277df42
IZ
3900 ln = 1;
3901 n = REG_INFTY;
3902 scan = NEXTOPER(scan);
3903 paren = 0;
3904 repeat:
a0d0e21e
LW
3905 /*
3906 * Lookahead to avoid useless match attempts
3907 * when we know what character comes next.
3908 */
5f80c4cf
JP
3909
3910 /*
3911 * Used to only do .*x and .*?x, but now it allows
3912 * for )'s, ('s and (?{ ... })'s to be in the way
3913 * of the quantifier and the EXACT-like node. -- japhy
3914 */
3915
cca55fe3 3916 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3917 U8 *s;
3918 regnode *text_node = next;
3919
cca55fe3 3920 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3921
cca55fe3 3922 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3923 else {
cca55fe3 3924 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3925 c1 = c2 = -1000;
3926 goto assume_ok_easy;
cca55fe3
JP
3927 }
3928 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3929
3930 if (!UTF) {
3931 c2 = c1 = *s;
f65d3ee7 3932 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3933 c2 = PL_fold[c1];
f65d3ee7 3934 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3935 c2 = PL_fold_locale[c1];
1aa99e6b 3936 }
5f80c4cf 3937 else { /* UTF */
f65d3ee7 3938 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3939 STRLEN ulen1, ulen2;
89ebb4a3
JH
3940 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3941 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
3942
3943 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3944 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3945
89ebb4a3 3946 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 3947 uniflags);
89ebb4a3 3948 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 3949 uniflags);
5f80c4cf
JP
3950 }
3951 else {
89ebb4a3 3952 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 3953 uniflags);
5f80c4cf 3954 }
1aa99e6b
IH
3955 }
3956 }
bbce6d69 3957 }
a0d0e21e 3958 else
bbce6d69 3959 c1 = c2 = -1000;
cca55fe3 3960 assume_ok_easy:
3280af22 3961 PL_reginput = locinput;
a0d0e21e 3962 if (minmod) {
c277df42 3963 CHECKPOINT lastcp;
a0d0e21e
LW
3964 minmod = 0;
3965 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3966 sayNO;
a0ed51b3 3967 locinput = PL_reginput;
02db2b7b 3968 REGCP_SET(lastcp);
0fe9bf95 3969 if (c1 != -1000) {
1aa99e6b 3970 char *e; /* Should not check after this */
0fe9bf95 3971 char *old = locinput;
b2f2f093 3972 int count = 0;
0fe9bf95 3973
1aa99e6b 3974 if (n == REG_INFTY) {
0fe9bf95 3975 e = PL_regeol - 1;
1aa99e6b
IH
3976 if (do_utf8)
3977 while (UTF8_IS_CONTINUATION(*(U8*)e))
3978 e--;
3979 }
3980 else if (do_utf8) {
3981 int m = n - ln;
3982 for (e = locinput;
3983 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3984 e += UTF8SKIP(e);
3985 }
3986 else {
3987 e = locinput + n - ln;
3988 if (e >= PL_regeol)
3989 e = PL_regeol - 1;
3990 }
0fe9bf95
IZ
3991 while (1) {
3992 /* Find place 'next' could work */
1aa99e6b
IH
3993 if (!do_utf8) {
3994 if (c1 == c2) {
a8e8ab15
JH
3995 while (locinput <= e &&
3996 UCHARAT(locinput) != c1)
1aa99e6b
IH
3997 locinput++;
3998 } else {
9041c2e3 3999 while (locinput <= e
a8e8ab15
JH
4000 && UCHARAT(locinput) != c1
4001 && UCHARAT(locinput) != c2)
1aa99e6b
IH
4002 locinput++;
4003 }
4004 count = locinput - old;
4005 }
4006 else {
1aa99e6b 4007 if (c1 == c2) {
a3b680e6 4008 STRLEN len;
872c91ae
JH
4009 /* count initialised to
4010 * utf8_distance(old, locinput) */
b2f2f093 4011 while (locinput <= e &&
872c91ae 4012 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4013 UTF8_MAXBYTES, &len,
041457d9 4014 uniflags) != (UV)c1) {
1aa99e6b 4015 locinput += len;
b2f2f093
JH
4016 count++;
4017 }
1aa99e6b 4018 } else {
a3b680e6 4019 STRLEN len;
872c91ae
JH
4020 /* count initialised to
4021 * utf8_distance(old, locinput) */
b2f2f093 4022 while (locinput <= e) {
872c91ae 4023 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4024 UTF8_MAXBYTES, &len,
041457d9 4025 uniflags);
eb160463 4026 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 4027 break;
b2f2f093
JH
4028 locinput += len;
4029 count++;
1aa99e6b
IH
4030 }
4031 }
0fe9bf95 4032 }
9041c2e3 4033 if (locinput > e)
0fe9bf95
IZ
4034 sayNO;
4035 /* PL_reginput == old now */
4036 if (locinput != old) {
4037 ln = 1; /* Did some */
1aa99e6b 4038 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
4039 sayNO;
4040 }
4041 /* PL_reginput == locinput now */
29d1e993 4042 TRYPAREN(paren, ln, locinput);
0fe9bf95 4043 PL_reginput = locinput; /* Could be reset... */
02db2b7b 4044 REGCP_UNWIND(lastcp);
0fe9bf95 4045 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
4046 old = locinput;
4047 if (do_utf8)
4048 locinput += UTF8SKIP(locinput);
4049 else
4050 locinput++;
b2f2f093 4051 count = 1;
0fe9bf95
IZ
4052 }
4053 }
4054 else
c277df42 4055 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
4056 UV c;
4057 if (c1 != -1000) {
4058 if (do_utf8)
872c91ae 4059 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4060 UTF8_MAXBYTES, 0,
041457d9 4061 uniflags);
1aa99e6b 4062 else
9041c2e3 4063 c = UCHARAT(PL_reginput);
2390ecbc 4064 /* If it could work, try it. */
eb160463 4065 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 4066 {
ecc99935 4067 TRYPAREN(paren, ln, PL_reginput);
2390ecbc
PP
4068 REGCP_UNWIND(lastcp);
4069 }
1aa99e6b 4070 }
a0d0e21e 4071 /* If it could work, try it. */
2390ecbc 4072 else if (c1 == -1000)
bbce6d69 4073 {
ecc99935 4074 TRYPAREN(paren, ln, PL_reginput);
02db2b7b 4075 REGCP_UNWIND(lastcp);
bbce6d69 4076 }
c277df42 4077 /* Couldn't or didn't -- move forward. */
a0ed51b3 4078 PL_reginput = locinput;
a0d0e21e
LW
4079 if (regrepeat(scan, 1)) {
4080 ln++;
a0ed51b3
LW
4081 locinput = PL_reginput;
4082 }
4083 else
4633a7c4 4084 sayNO;
a0d0e21e
LW
4085 }
4086 }
4087 else {
c277df42 4088 CHECKPOINT lastcp;
a0d0e21e 4089 n = regrepeat(scan, n);
a0ed51b3 4090 locinput = PL_reginput;
22c35a8c 4091 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4092 (OP(next) != MEOL ||
15272685
HS
4093 OP(next) == SEOL || OP(next) == EOS))
4094 {
a0d0e21e 4095 ln = n; /* why back off? */
1aeab75a
GS
4096 /* ...because $ and \Z can match before *and* after
4097 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4098 We should back off by one in this case. */
4099 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4100 ln--;
4101 }
02db2b7b 4102 REGCP_SET(lastcp);
c277df42 4103 if (paren) {
8fa7f367 4104 UV c = 0;
c277df42 4105 while (n >= ln) {
1aa99e6b
IH
4106 if (c1 != -1000) {
4107 if (do_utf8)
872c91ae 4108 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4109 UTF8_MAXBYTES, 0,
041457d9 4110 uniflags);
1aa99e6b 4111 else
9041c2e3 4112 c = UCHARAT(PL_reginput);
1aa99e6b 4113 }
c277df42 4114 /* If it could work, try it. */
eb160463 4115 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4116 {
29d1e993 4117 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4118 REGCP_UNWIND(lastcp);
c277df42
IZ
4119 }
4120 /* Couldn't or didn't -- back up. */
4121 n--;
dfe13c55 4122 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 4123 }
a0ed51b3
LW
4124 }
4125 else {
8fa7f367 4126 UV c = 0;
c277df42 4127 while (n >= ln) {
1aa99e6b
IH
4128 if (c1 != -1000) {
4129 if (do_utf8)
872c91ae 4130 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4131 UTF8_MAXBYTES, 0,
041457d9 4132 uniflags);
1aa99e6b 4133 else
9041c2e3 4134 c = UCHARAT(PL_reginput);
1aa99e6b 4135 }
c277df42 4136 /* If it could work, try it. */
eb160463 4137 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4138 {
29d1e993 4139 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4140 REGCP_UNWIND(lastcp);
c277df42
IZ
4141 }
4142 /* Couldn't or didn't -- back up. */
4143 n--;
dfe13c55 4144 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4145 }
a0d0e21e
LW
4146 }
4147 }
4633a7c4 4148 sayNO;
c277df42 4149 break;
a0d0e21e 4150 case END:
0f5d15d6
IZ
4151 if (PL_reg_call_cc) {
4152 re_cc_state *cur_call_cc = PL_reg_call_cc;
4153 CURCUR *cctmp = PL_regcc;
4154 regexp *re = PL_reg_re;
4155 CHECKPOINT cp, lastcp;
4156
4157 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 4158 REGCP_SET(lastcp);
0f5d15d6
IZ
4159 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4160 the caller. */
4161 PL_reginput = locinput; /* Make position available to
4162 the callcc. */
4163 cache_re(PL_reg_call_cc->re);
4164 PL_regcc = PL_reg_call_cc->cc;
4165 PL_reg_call_cc = PL_reg_call_cc->prev;
4166 if (regmatch(cur_call_cc->node)) {
4167 PL_reg_call_cc = cur_call_cc;
4168 regcpblow(cp);
4169 sayYES;
4170 }
02db2b7b 4171 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
4172 regcppop();
4173 PL_reg_call_cc = cur_call_cc;
4174 PL_regcc = cctmp;
4175 PL_reg_re = re;
4176 cache_re(re);
4177
a3621e74 4178 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4179 PerlIO_printf(Perl_debug_log,
4180 "%*s continuation failed...\n",
4181 REPORT_CODE_OFF+PL_regindent*2, "")
4182 );
7821416a 4183 sayNO_SILENT;
0f5d15d6 4184 }
7821416a 4185 if (locinput < PL_regtill) {
a3621e74 4186 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4187 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4188 PL_colors[4],
4189 (long)(locinput - PL_reg_starttry),
4190 (long)(PL_regtill - PL_reg_starttry),
4191 PL_colors[5]));
4192 sayNO_FINAL; /* Cannot match: too short. */
4193 }
4194 PL_reginput = locinput; /* put where regtry can find it */
4195 sayYES_FINAL; /* Success! */
7e5428c5 4196 case SUCCEED:
3280af22 4197 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4198 sayYES_LOUD; /* Success! */
c277df42
IZ
4199 case SUSPEND:
4200 n = 1;
9fe1d20c 4201 PL_reginput = locinput;
9041c2e3 4202 goto do_ifmatch;
a0d0e21e 4203 case UNLESSM:
c277df42 4204 n = 0;
a0ed51b3 4205 if (scan->flags) {
efb30f32
HS
4206 s = HOPBACKc(locinput, scan->flags);
4207 if (!s)
4208 goto say_yes;
4209 PL_reginput = s;
a0ed51b3
LW
4210 }
4211 else
4212 PL_reginput = locinput;
c277df42
IZ
4213 goto do_ifmatch;
4214 case IFMATCH:
4215 n = 1;
a0ed51b3 4216 if (scan->flags) {
efb30f32
HS
4217 s = HOPBACKc(locinput, scan->flags);
4218 if (!s)
4219 goto say_no;
4220 PL_reginput = s;
a0ed51b3
LW
4221 }
4222 else
4223 PL_reginput = locinput;
4224
c277df42 4225 do_ifmatch:
c277df42
IZ
4226 inner = NEXTOPER(NEXTOPER(scan));
4227 if (regmatch(inner) != n) {
4228 say_no:
4229 if (logical) {
4230 logical = 0;
4231 sw = 0;
4232 goto do_longjump;
a0ed51b3
LW
4233 }
4234 else
c277df42
IZ
4235 sayNO;
4236 }
4237 say_yes:
4238 if (logical) {
4239 logical = 0;
4240 sw = 1;
4241 }
fe44a5e8 4242 if (OP(scan) == SUSPEND) {
3280af22 4243 locinput = PL_reginput;
565764a8 4244 nextchr = UCHARAT(locinput);
fe44a5e8 4245 }
c277df42
IZ
4246 /* FALL THROUGH. */
4247 case LONGJMP:
4248 do_longjump:
4249 next = scan + ARG(scan);
4250 if (next == scan)
4251 next = NULL;
a0d0e21e
LW
4252 break;
4253 default:
b900a521 4254 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4255 PTR2UV(scan), OP(scan));
cea2e8a9 4256 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4257 }
02db2b7b 4258 reenter:
a0d0e21e
LW
4259 scan = next;
4260 }
a687059c 4261
a0d0e21e
LW
4262 /*
4263 * We get here only if there's trouble -- normally "case END" is
4264 * the terminating point.
4265 */
cea2e8a9 4266 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4267 /*NOTREACHED*/
4633a7c4
LW
4268 sayNO;
4269
7821416a 4270yes_loud:
a3621e74 4271 DEBUG_EXECUTE_r(
7821416a
IZ
4272 PerlIO_printf(Perl_debug_log,
4273 "%*s %scould match...%s\n",
e4584336 4274 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4275 );
4276 goto yes;
4277yes_final:
a3621e74 4278 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4279 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4280yes:
4281#ifdef DEBUGGING
3280af22 4282 PL_regindent--;
4633a7c4 4283#endif
02db2b7b
IZ
4284
4285#if 0 /* Breaks $^R */
4286 if (unwind)
4287 regcpblow(firstcp);
4288#endif
4633a7c4
LW
4289 return 1;
4290
4291no:
a3621e74 4292 DEBUG_EXECUTE_r(
7821416a
IZ
4293 PerlIO_printf(Perl_debug_log,
4294 "%*s %sfailed...%s\n",
e4584336 4295 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4296 );
4297 goto do_no;
4298no_final:
4299do_no:
02db2b7b
IZ
4300 if (unwind) {
4301 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4302
4303 switch (uw->type) {
4304 case RE_UNWIND_BRANCH:
4305 case RE_UNWIND_BRANCHJ:
4306 {
4307 re_unwind_branch_t *uwb = &(uw->branch);
a3b680e6 4308 const I32 lastparen = uwb->lastparen;
9041c2e3 4309
02db2b7b
IZ
4310 REGCP_UNWIND(uwb->lastcp);
4311 for (n = *PL_reglastparen; n > lastparen; n--)
4312 PL_regendp[n] = -1;
4313 *PL_reglastparen = n;
4314 scan = next = uwb->next;
9041c2e3
NIS
4315 if ( !scan ||
4316 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4317 ? BRANCH : BRANCHJ) ) { /* Failure */
4318 unwind = uwb->prev;
4319#ifdef DEBUGGING
4320 PL_regindent--;
4321#endif
4322 goto do_no;
4323 }
4324 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4325 if ((n = (uwb->type == RE_UNWIND_BRANCH
4326 ? NEXT_OFF(next) : ARG(next))))
4327 next += n;
4328 else
4329 next = NULL; /* XXXX Needn't unwinding in this case... */
4330 uwb->next = next;
4331 next = NEXTOPER(scan);
4332 if (uwb->type == RE_UNWIND_BRANCHJ)
4333 next = NEXTOPER(next);
4334 locinput = uwb->locinput;
4335 nextchr = uwb->nextchr;
4336#ifdef DEBUGGING
4337 PL_regindent = uwb->regindent;
4338#endif
4339
4340 goto reenter;
4341 }
4342 /* NOT REACHED */
4343 default:
4344 Perl_croak(aTHX_ "regexp unwind memory corruption");
4345 }
4346 /* NOT REACHED */
4347 }
4633a7c4 4348#ifdef DEBUGGING
3280af22 4349 PL_regindent--;
4633a7c4 4350#endif
a0d0e21e 4351 return 0;
a687059c
LW
4352}
4353
4354/*
4355 - regrepeat - repeatedly match something simple, report how many
4356 */
4357/*
4358 * [This routine now assumes that it will only match on things of length 1.
4359 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4360 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4361 */
76e3520e 4362STATIC I32
a3b680e6 4363S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4364{
27da23d5 4365 dVAR;
a0d0e21e 4366 register char *scan;
a0d0e21e 4367 register I32 c;
3280af22 4368 register char *loceol = PL_regeol;
a0ed51b3 4369 register I32 hardcount = 0;
53c4c00c 4370 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4371
3280af22 4372 scan = PL_reginput;
faf11cac
HS
4373 if (max == REG_INFTY)
4374 max = I32_MAX;
4375 else if (max < loceol - scan)
a0d0e21e 4376 loceol = scan + max;
a0d0e21e 4377 switch (OP(p)) {
22c35a8c 4378 case REG_ANY:
1aa99e6b 4379 if (do_utf8) {
ffc61ed2 4380 loceol = PL_regeol;
1aa99e6b 4381 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4382 scan += UTF8SKIP(scan);
4383 hardcount++;
4384 }
4385 } else {
4386 while (scan < loceol && *scan != '\n')
4387 scan++;
a0ed51b3
LW
4388 }
4389 break;
ffc61ed2 4390 case SANY:
def8e4ea
JH
4391 if (do_utf8) {
4392 loceol = PL_regeol;
a0804c9e 4393 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4394 scan += UTF8SKIP(scan);
4395 hardcount++;
4396 }
4397 }
4398 else
4399 scan = loceol;
a0ed51b3 4400 break;
f33976b4
DB
4401 case CANY:
4402 scan = loceol;
4403 break;
090f7165
JH
4404 case EXACT: /* length of string is 1 */
4405 c = (U8)*STRING(p);
4406 while (scan < loceol && UCHARAT(scan) == c)
4407 scan++;
bbce6d69 4408 break;
4409 case EXACTF: /* length of string is 1 */
cd439c50 4410 c = (U8)*STRING(p);
bbce6d69 4411 while (scan < loceol &&
22c35a8c 4412 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4413 scan++;
4414 break;
4415 case EXACTFL: /* length of string is 1 */
3280af22 4416 PL_reg_flags |= RF_tainted;
cd439c50 4417 c = (U8)*STRING(p);
bbce6d69 4418 while (scan < loceol &&
22c35a8c 4419 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4420 scan++;
4421 break;
4422 case ANYOF:
ffc61ed2
JH
4423 if (do_utf8) {
4424 loceol = PL_regeol;
cfc92286
JH
4425 while (hardcount < max && scan < loceol &&
4426 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4427 scan += UTF8SKIP(scan);
4428 hardcount++;
4429 }
4430 } else {
7d3e948e 4431 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4432 scan++;
4433 }
a0d0e21e
LW
4434 break;
4435 case ALNUM:
1aa99e6b 4436 if (do_utf8) {
ffc61ed2 4437 loceol = PL_regeol;
1a4fad37 4438 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4439 while (hardcount < max && scan < loceol &&
3568d838 4440 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4441 scan += UTF8SKIP(scan);
4442 hardcount++;
4443 }
4444 } else {
4445 while (scan < loceol && isALNUM(*scan))
4446 scan++;
a0ed51b3
LW
4447 }
4448 break;
bbce6d69 4449 case ALNUML:
3280af22 4450 PL_reg_flags |= RF_tainted;
1aa99e6b 4451 if (do_utf8) {
ffc61ed2 4452 loceol = PL_regeol;
1aa99e6b
IH
4453 while (hardcount < max && scan < loceol &&
4454 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4455 scan += UTF8SKIP(scan);
4456 hardcount++;
4457 }
4458 } else {
4459 while (scan < loceol && isALNUM_LC(*scan))
4460 scan++;
a0ed51b3
LW
4461 }
4462 break;
a0d0e21e 4463 case NALNUM:
1aa99e6b 4464 if (do_utf8) {
ffc61ed2 4465 loceol = PL_regeol;
1a4fad37 4466 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4467 while (hardcount < max && scan < loceol &&
3568d838 4468 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4469 scan += UTF8SKIP(scan);
4470 hardcount++;
4471 }
4472 } else {
4473 while (scan < loceol && !isALNUM(*scan))
4474 scan++;
a0ed51b3
LW
4475 }
4476 break;
bbce6d69 4477 case NALNUML:
3280af22 4478 PL_reg_flags |= RF_tainted;
1aa99e6b 4479 if (do_utf8) {
ffc61ed2 4480 loceol = PL_regeol;
1aa99e6b
IH
4481 while (hardcount < max && scan < loceol &&
4482 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4483 scan += UTF8SKIP(scan);
4484 hardcount++;
4485 }
4486 } else {
4487 while (scan < loceol && !isALNUM_LC(*scan))
4488 scan++;
a0ed51b3
LW
4489 }
4490 break;
a0d0e21e 4491 case SPACE:
1aa99e6b 4492 if (do_utf8) {
ffc61ed2 4493 loceol = PL_regeol;
1a4fad37 4494 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4495 while (hardcount < max && scan < loceol &&
3568d838
JH
4496 (*scan == ' ' ||
4497 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4498 scan += UTF8SKIP(scan);
4499 hardcount++;
4500 }
4501 } else {
4502 while (scan < loceol && isSPACE(*scan))
4503 scan++;
a0ed51b3
LW
4504 }
4505 break;
bbce6d69 4506 case SPACEL:
3280af22 4507 PL_reg_flags |= RF_tainted;
1aa99e6b 4508 if (do_utf8) {
ffc61ed2 4509 loceol = PL_regeol;
1aa99e6b 4510 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4511 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4512 scan += UTF8SKIP(scan);
4513 hardcount++;
4514 }
4515 } else {
4516 while (scan < loceol && isSPACE_LC(*scan))
4517 scan++;
a0ed51b3
LW
4518 }
4519 break;
a0d0e21e 4520 case NSPACE:
1aa99e6b 4521 if (do_utf8) {
ffc61ed2 4522 loceol = PL_regeol;
1a4fad37 4523 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4524 while (hardcount < max && scan < loceol &&
3568d838
JH
4525 !(*scan == ' ' ||
4526 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4527 scan += UTF8SKIP(scan);
4528 hardcount++;
4529 }
4530 } else {
4531 while (scan < loceol && !isSPACE(*scan))
4532 scan++;
4533 break;
a0ed51b3 4534 }
bbce6d69 4535 case NSPACEL:
3280af22 4536 PL_reg_flags |= RF_tainted;
1aa99e6b 4537 if (do_utf8) {
ffc61ed2 4538 loceol = PL_regeol;
1aa99e6b 4539 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4540 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4541 scan += UTF8SKIP(scan);
4542 hardcount++;
4543 }
4544 } else {
4545 while (scan < loceol && !isSPACE_LC(*scan))
4546 scan++;
a0ed51b3
LW
4547 }
4548 break;
a0d0e21e 4549 case DIGIT:
1aa99e6b 4550 if (do_utf8) {
ffc61ed2 4551 loceol = PL_regeol;
1a4fad37 4552 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4553 while (hardcount < max && scan < loceol &&
3568d838 4554 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4555 scan += UTF8SKIP(scan);
4556 hardcount++;
4557 }
4558 } else {
4559 while (scan < loceol && isDIGIT(*scan))
4560 scan++;
a0ed51b3
LW
4561 }
4562 break;
a0d0e21e 4563 case NDIGIT:
1aa99e6b 4564 if (do_utf8) {
ffc61ed2 4565 loceol = PL_regeol;
1a4fad37 4566 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4567 while (hardcount < max && scan < loceol &&
3568d838 4568 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4569 scan += UTF8SKIP(scan);
4570 hardcount++;
4571 }
4572 } else {
4573 while (scan < loceol && !isDIGIT(*scan))
4574 scan++;
a0ed51b3
LW
4575 }
4576 break;
a0d0e21e
LW
4577 default: /* Called on something of 0 width. */
4578 break; /* So match right here or not at all. */
4579 }
a687059c 4580
a0ed51b3
LW
4581 if (hardcount)
4582 c = hardcount;
4583 else
4584 c = scan - PL_reginput;
3280af22 4585 PL_reginput = scan;
a687059c 4586
a3621e74 4587 DEBUG_r({
ab74612d 4588 SV *re_debug_flags = NULL;
c277df42 4589 SV *prop = sv_newmortal();
a3621e74
YO
4590 GET_RE_DEBUG_FLAGS;
4591 DEBUG_EXECUTE_r({
c277df42 4592 regprop(prop, p);
9041c2e3
NIS
4593 PerlIO_printf(Perl_debug_log,
4594 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4595 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4596 });
a3621e74 4597 });
9041c2e3 4598
a0d0e21e 4599 return(c);
a687059c
LW
4600}
4601
4602/*
c277df42 4603 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4604 *
0e788c72 4605 * The repeater is supposed to have constant non-zero length.
c277df42
IZ
4606 */
4607
76e3520e 4608STATIC I32
cea2e8a9 4609S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4610{
97aff369 4611 dVAR;
b7953727 4612 register char *scan = Nullch;
c277df42 4613 register char *start;
3280af22 4614 register char *loceol = PL_regeol;
a0ed51b3 4615 I32 l = 0;
708e3b05 4616 I32 count = 0, res = 1;
a0ed51b3
LW
4617
4618 if (!max)
4619 return 0;
c277df42 4620
3280af22 4621 start = PL_reginput;
53c4c00c 4622 if (PL_reg_match_utf8) {
708e3b05 4623 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4624 if (!count++) {
4625 l = 0;
4626 while (start < PL_reginput) {
4627 l++;
4628 start += UTF8SKIP(start);
4629 }
4630 *lp = l;
4631 if (l == 0)
4632 return max;
4633 }
4634 if (count == max)
4635 return count;
4636 }
4637 }
4638 else {
708e3b05 4639 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4640 if (!count++) {
4641 *lp = l = PL_reginput - start;
4642 if (max != REG_INFTY && l*max < loceol - scan)
4643 loceol = scan + l*max;
4644 if (l == 0)
4645 return max;
c277df42
IZ
4646 }
4647 }
4648 }
708e3b05 4649 if (!res)
3280af22 4650 PL_reginput = scan;
9041c2e3 4651
a0ed51b3 4652 return count;
c277df42
IZ
4653}
4654
4655/*
ffc61ed2
JH
4656- regclass_swash - prepare the utf8 swash
4657*/
4658
4659SV *
a3b680e6 4660Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4661{
97aff369 4662 dVAR;
9e55ce06
JH
4663 SV *sw = NULL;
4664 SV *si = NULL;
4665 SV *alt = NULL;
ffc61ed2
JH
4666
4667 if (PL_regdata && PL_regdata->count) {
a3b680e6 4668 const U32 n = ARG(node);
ffc61ed2
JH
4669
4670 if (PL_regdata->what[n] == 's') {
890ce7af
AL
4671 SV * const rv = (SV*)PL_regdata->data[n];
4672 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4673 SV **const ary = AvARRAY(av);
9e55ce06 4674 SV **a, **b;
9041c2e3 4675
711a919c 4676 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4677 * documentation of these array elements. */
4678
b11f357e 4679 si = *ary;
8f7f7219 4680 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4681 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4682
ffc61ed2
JH
4683 if (a)
4684 sw = *a;
4685 else if (si && doinit) {
4686 sw = swash_init("utf8", "", si, 1, 0);
4687 (void)av_store(av, 1, sw);
4688 }
9e55ce06
JH
4689 if (b)
4690 alt = *b;
ffc61ed2
JH
4691 }
4692 }
4693
9e55ce06
JH
4694 if (listsvp)
4695 *listsvp = si;
4696 if (altsvp)
4697 *altsvp = alt;
ffc61ed2
JH
4698
4699 return sw;
4700}
4701
4702/*
ba7b4546 4703 - reginclass - determine if a character falls into a character class
832705d4
JH
4704
4705 The n is the ANYOF regnode, the p is the target string, lenp
4706 is pointer to the maximum length of how far to go in the p
4707 (if the lenp is zero, UTF8SKIP(p) is used),
4708 do_utf8 tells whether the target string is in UTF-8.
4709
bbce6d69 4710 */
4711
76e3520e 4712STATIC bool
a3b680e6 4713S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4714{
27da23d5 4715 dVAR;
a3b680e6 4716 const char flags = ANYOF_FLAGS(n);
bbce6d69 4717 bool match = FALSE;
cc07378b 4718 UV c = *p;
ae9ddab8 4719 STRLEN len = 0;
9e55ce06 4720 STRLEN plen;
1aa99e6b 4721
19f67299
TS
4722 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4723 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4724 ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4725 UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4726 if (len == (STRLEN)-1)
4727 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4728 }
bbce6d69 4729
0f0076b4 4730 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4731 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4732 if (lenp)
4733 *lenp = 0;
ffc61ed2 4734 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4735 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4736 match = TRUE;
bbce6d69 4737 }
3568d838 4738 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4739 match = TRUE;
ffc61ed2 4740 if (!match) {
9e55ce06 4741 AV *av;
890ce7af 4742 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4743
4744 if (sw) {
3568d838 4745 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4746 match = TRUE;
4747 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4748 if (!match && lenp && av) {
4749 I32 i;
9e55ce06 4750 for (i = 0; i <= av_len(av); i++) {
890ce7af 4751 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4752 STRLEN len;
890ce7af 4753 const char * const s = SvPV_const(sv, len);
9e55ce06 4754
061b10df 4755 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4756 *lenp = len;
4757 match = TRUE;
4758 break;
4759 }
4760 }
4761 }
4762 if (!match) {
89ebb4a3 4763 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4764 STRLEN tmplen;
4765
9e55ce06
JH
4766 to_utf8_fold(p, tmpbuf, &tmplen);
4767 if (swash_fetch(sw, tmpbuf, do_utf8))
4768 match = TRUE;
4769 }
ffc61ed2
JH
4770 }
4771 }
bbce6d69 4772 }
9e55ce06 4773 if (match && lenp && *lenp == 0)
0f0076b4 4774 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4775 }
1aa99e6b 4776 if (!match && c < 256) {
ffc61ed2
JH
4777 if (ANYOF_BITMAP_TEST(n, c))
4778 match = TRUE;
4779 else if (flags & ANYOF_FOLD) {
eb160463 4780 U8 f;
a0ed51b3 4781
ffc61ed2
JH
4782 if (flags & ANYOF_LOCALE) {
4783 PL_reg_flags |= RF_tainted;
4784 f = PL_fold_locale[c];
4785 }
4786 else
4787 f = PL_fold[c];
4788 if (f != c && ANYOF_BITMAP_TEST(n, f))
4789 match = TRUE;
4790 }
4791
4792 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4793 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4794 if (
4795 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4796 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4797 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4798 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4799 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4800 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4801 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4802 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4803 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4804 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4805 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4806 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4807 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4808 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4809 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4810 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4811 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4812 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4813 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4814 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4815 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4816 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4817 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4818 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4819 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4820 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4821 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4822 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4823 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4824 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4825 ) /* How's that for a conditional? */
4826 {
4827 match = TRUE;
4828 }
a0ed51b3 4829 }
a0ed51b3
LW
4830 }
4831
a0ed51b3
LW
4832 return (flags & ANYOF_INVERT) ? !match : match;
4833}
161b471a 4834
dfe13c55 4835STATIC U8 *
cea2e8a9 4836S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4837{
97aff369 4838 dVAR;
1aa99e6b
IH
4839 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4840}
4841
4842STATIC U8 *
4843S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4844{
97aff369 4845 dVAR;
a0ed51b3 4846 if (off >= 0) {
1aa99e6b 4847 while (off-- && s < lim) {
ffc61ed2 4848 /* XXX could check well-formedness here */
a0ed51b3 4849 s += UTF8SKIP(s);
ffc61ed2 4850 }
a0ed51b3
LW
4851 }
4852 else {
4853 while (off++) {
1aa99e6b 4854 if (s > lim) {
a0ed51b3 4855 s--;
ffc61ed2 4856 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4857 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4858 s--;
ffc61ed2
JH
4859 }
4860 /* XXX could check well-formedness here */
a0ed51b3
LW
4861 }
4862 }
4863 }
4864 return s;
4865}
161b471a 4866
dfe13c55 4867STATIC U8 *
1aa99e6b 4868S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4869{
97aff369 4870 dVAR;
1aa99e6b
IH
4871 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4872}
4873
4874STATIC U8 *
4875S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3 4876{
97aff369 4877 dVAR;
a0ed51b3 4878 if (off >= 0) {
1aa99e6b 4879 while (off-- && s < lim) {
ffc61ed2 4880 /* XXX could check well-formedness here */
a0ed51b3 4881 s += UTF8SKIP(s);
ffc61ed2 4882 }
a0ed51b3
LW
4883 if (off >= 0)
4884 return 0;
4885 }
4886 else {
4887 while (off++) {
1aa99e6b 4888 if (s > lim) {
a0ed51b3 4889 s--;
ffc61ed2 4890 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4891 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4892 s--;
ffc61ed2
JH
4893 }
4894 /* XXX could check well-formedness here */
a0ed51b3
LW
4895 }
4896 else
4897 break;
4898 }
4899 if (off <= 0)
4900 return 0;
4901 }
4902 return s;
4903}
51371543 4904
51371543 4905static void
acfe0abc 4906restore_pos(pTHX_ void *arg)
51371543 4907{
97aff369 4908 dVAR;
9d4ba2ae 4909 PERL_UNUSED_ARG(arg);
51371543
GS
4910 if (PL_reg_eval_set) {
4911 if (PL_reg_oldsaved) {
4912 PL_reg_re->subbeg = PL_reg_oldsaved;
4913 PL_reg_re->sublen = PL_reg_oldsavedlen;
f8c7b90f 4914#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
4915 PL_reg_re->saved_copy = PL_nrs;
4916#endif
51371543
GS
4917 RX_MATCH_COPIED_on(PL_reg_re);
4918 }
4919 PL_reg_magic->mg_len = PL_reg_oldpos;
4920 PL_reg_eval_set = 0;
4921 PL_curpm = PL_reg_oldcurpm;
4922 }
4923}
33b8afdf
JH
4924
4925STATIC void
4926S_to_utf8_substr(pTHX_ register regexp *prog)
4927{
33b8afdf 4928 if (prog->float_substr && !prog->float_utf8) {
9d4ba2ae 4929 SV* sv;
f2b990bf 4930 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
4931 sv_utf8_upgrade(sv);
4932 if (SvTAIL(prog->float_substr))
4933 SvTAIL_on(sv);
4934 if (prog->float_substr == prog->check_substr)
4935 prog->check_utf8 = sv;
4936 }
4937 if (prog->anchored_substr && !prog->anchored_utf8) {
9d4ba2ae 4938 SV* sv;
f2b990bf 4939 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
4940 sv_utf8_upgrade(sv);
4941 if (SvTAIL(prog->anchored_substr))
4942 SvTAIL_on(sv);
4943 if (prog->anchored_substr == prog->check_substr)
4944 prog->check_utf8 = sv;
4945 }
4946}
4947
4948STATIC void
4949S_to_byte_substr(pTHX_ register regexp *prog)
4950{
97aff369 4951 dVAR;
33b8afdf 4952 if (prog->float_utf8 && !prog->float_substr) {
9d4ba2ae 4953 SV* sv;
f2b990bf 4954 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
4955 if (sv_utf8_downgrade(sv, TRUE)) {
4956 if (SvTAIL(prog->float_utf8))
4957 SvTAIL_on(sv);
4958 } else {
4959 SvREFCNT_dec(sv);
4960 prog->float_substr = sv = &PL_sv_undef;
4961 }
4962 if (prog->float_utf8 == prog->check_utf8)
4963 prog->check_substr = sv;
4964 }
4965 if (prog->anchored_utf8 && !prog->anchored_substr) {
9d4ba2ae 4966 SV* sv;
f2b990bf 4967 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
4968 if (sv_utf8_downgrade(sv, TRUE)) {
4969 if (SvTAIL(prog->anchored_utf8))
4970 SvTAIL_on(sv);
4971 } else {
4972 SvREFCNT_dec(sv);
4973 prog->anchored_substr = sv = &PL_sv_undef;
4974 }
4975 if (prog->anchored_utf8 == prog->check_utf8)
4976 prog->check_substr = sv;
4977 }
4978}
66610fdd
RGS
4979
4980/*
4981 * Local variables:
4982 * c-indentation-style: bsd
4983 * c-basic-offset: 4
4984 * indent-tabs-mode: t
4985 * End:
4986 *
37442d52
RGS
4987 * ex: set ts=8 sts=4 sw=4 noet:
4988 */