This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Retract #12824, doesn't help and is ignored
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
2a782b5b 386 SV *dsv = sv_2mortal(newSVpvn("", 0));
30944b6d 387#endif
cad2e5aa 388
2a782b5b 389 DEBUG_r({
40eddc46
JH
390 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
391 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
392 if (!PL_colorset)
393 reginitcolors();
394 PerlIO_printf(Perl_debug_log,
395 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
396 PL_colors[4],PL_colors[5],PL_colors[0],
397 prog->precomp,
398 PL_colors[1],
399 (strlen(prog->precomp) > 60 ? "..." : ""),
400 PL_colors[0],
401 (int)(len > 60 ? 60 : len),
402 s, PL_colors[1],
403 (len > 60 ? "..." : "")
404 );
405 });
cad2e5aa 406
1aa99e6b
IH
407 if (prog->reganch & ROPT_UTF8)
408 PL_reg_flags |= RF_utf8;
409
410 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
411 DEBUG_r(PerlIO_printf(Perl_debug_log,
412 "String too short... [re_intuit_start]\n"));
cad2e5aa 413 goto fail;
2c2d71f5 414 }
a1933d95 415 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 416 PL_regeol = strend;
653099ff 417 check = prog->check_substr;
2c2d71f5 418 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
419 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
420 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 421 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 422
7e25d62c
JH
423 if (!ml_anch) {
424 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
425 | ROPT_IMPLICIT)) /* not a real BOL */
426 /* SvCUR is not set on references: SvRV and SvPVX overlap */
427 && sv && !SvROK(sv)
428 && (strpos != strbeg)) {
429 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
430 goto fail;
431 }
432 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 433 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 434 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
435 I32 slen;
436
1aa99e6b 437 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
438 if (SvTAIL(check)) {
439 slen = SvCUR(check); /* >= 1 */
cad2e5aa 440
9041c2e3 441 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
442 || (strend - s == slen && strend[-1] != '\n')) {
443 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
444 goto fail_finish;
cad2e5aa
JH
445 }
446 /* Now should match s[0..slen-2] */
447 slen--;
653099ff 448 if (slen && (*SvPVX(check) != *s
cad2e5aa 449 || (slen > 1
653099ff 450 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
451 report_neq:
452 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
453 goto fail_finish;
454 }
cad2e5aa 455 }
653099ff
GS
456 else if (*SvPVX(check) != *s
457 || ((slen = SvCUR(check)) > 1
458 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
459 goto report_neq;
460 goto success_at_start;
7e25d62c 461 }
cad2e5aa 462 }
2c2d71f5 463 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 464 s = strpos;
2c2d71f5 465 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 468 if (!ml_anch) {
653099ff
GS
469 I32 end = prog->check_offset_max + CHR_SVLEN(check)
470 - (SvTAIL(check) != 0);
1aa99e6b 471 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
472
473 if (end_shift < eshift)
474 end_shift = eshift;
475 }
cad2e5aa 476 }
2c2d71f5 477 else { /* Can match at random position */
cad2e5aa
JH
478 ml_anch = 0;
479 s = strpos;
2c2d71f5
JH
480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
481 /* Should be nonnegative! */
482 end_shift = prog->minlen - start_shift -
653099ff 483 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
484 }
485
2c2d71f5 486#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 487 if (end_shift < 0)
6bbae5e6 488 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
489#endif
490
2c2d71f5
JH
491 restart:
492 /* Find a possible match in the region s..strend by looking for
493 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 494 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
495 I32 p = -1; /* Internal iterator of scream. */
496 I32 *pp = data ? data->scream_pos : &p;
497
2c2d71f5
JH
498 if (PL_screamfirst[BmRARE(check)] >= 0
499 || ( BmRARE(check) == '\n'
500 && (BmPREVIOUS(check) == SvCUR(check) - 1)
501 && SvTAIL(check) ))
9041c2e3 502 s = screaminstr(sv, check,
2c2d71f5 503 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 504 else
2c2d71f5 505 goto fail_finish;
cad2e5aa
JH
506 if (data)
507 *data->scream_olds = s;
508 }
f33976b4 509 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
510 s = fbm_instr((U8*)(s + start_shift),
511 (U8*)(strend - end_shift),
512 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 513 else
1aa99e6b
IH
514 s = fbm_instr(HOP3(s, start_shift, strend),
515 HOP3(strend, -end_shift, strbeg),
2c2d71f5 516 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
517
518 /* Update the count-of-usability, remove useless subpatterns,
519 unshift s. */
2c2d71f5
JH
520
521 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
522 (s ? "Found" : "Did not find"),
523 ((check == prog->anchored_substr) ? "anchored" : "floating"),
524 PL_colors[0],
7b0972df
JH
525 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
526 SvPVX(check),
2c2d71f5
JH
527 PL_colors[1], (SvTAIL(check) ? "$" : ""),
528 (s ? " at offset " : "...\n") ) );
529
530 if (!s)
531 goto fail_finish;
532
6eb5f6b9
JH
533 check_at = s;
534
2c2d71f5 535 /* Finish the diagnostic message */
30944b6d 536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
537
538 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
539 Start with the other substr.
540 XXXX no SCREAM optimization yet - and a very coarse implementation
541 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
542 *always* match. Probably should be marked during compile...
543 Probably it is right to do no SCREAM here...
544 */
545
546 if (prog->float_substr && prog->anchored_substr) {
30944b6d 547 /* Take into account the "other" substring. */
2c2d71f5
JH
548 /* XXXX May be hopelessly wrong for UTF... */
549 if (!other_last)
6eb5f6b9 550 other_last = strpos;
2c2d71f5 551 if (check == prog->float_substr) {
30944b6d
IZ
552 do_other_anchored:
553 {
1aa99e6b 554 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
555 char *s1 = s;
556
2c2d71f5
JH
557 t = s - prog->check_offset_max;
558 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
559 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 560 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 561 && t > strpos)))
30944b6d 562 /* EMPTY */;
2c2d71f5
JH
563 else
564 t = strpos;
1aa99e6b 565 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
566 if (t < other_last) /* These positions already checked */
567 t = other_last;
1aa99e6b 568 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
569 if (last < last1)
570 last1 = last;
571 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
572 /* On end-of-str: see comment below. */
573 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
574 HOP3(HOP3(last1, prog->anchored_offset, strend)
575 + SvCUR(prog->anchored_substr),
576 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
577 prog->anchored_substr,
578 PL_multiline ? FBMrf_MULTILINE : 0);
579 DEBUG_r(PerlIO_printf(Perl_debug_log,
580 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
581 (s ? "Found" : "Contradicts"),
582 PL_colors[0],
7b0972df
JH
583 (int)(SvCUR(prog->anchored_substr)
584 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
585 SvPVX(prog->anchored_substr),
586 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
587 if (!s) {
588 if (last1 >= last2) {
589 DEBUG_r(PerlIO_printf(Perl_debug_log,
590 ", giving up...\n"));
591 goto fail_finish;
592 }
593 DEBUG_r(PerlIO_printf(Perl_debug_log,
594 ", trying floating at offset %ld...\n",
1aa99e6b
IH
595 (long)(HOP3c(s1, 1, strend) - i_strpos)));
596 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
597 s = HOP3c(last, 1, strend);
2c2d71f5
JH
598 goto restart;
599 }
600 else {
601 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 602 (long)(s - i_strpos)));
1aa99e6b
IH
603 t = HOP3c(s, -prog->anchored_offset, strbeg);
604 other_last = HOP3c(s, 1, strend);
30944b6d 605 s = s1;
2c2d71f5
JH
606 if (t == strpos)
607 goto try_at_start;
2c2d71f5
JH
608 goto try_at_offset;
609 }
30944b6d 610 }
2c2d71f5
JH
611 }
612 else { /* Take into account the floating substring. */
613 char *last, *last1;
614 char *s1 = s;
615
1aa99e6b
IH
616 t = HOP3c(s, -start_shift, strbeg);
617 last1 = last =
618 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
619 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
620 last = HOP3c(t, prog->float_max_offset, strend);
621 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
622 if (s < other_last)
623 s = other_last;
2c2d71f5
JH
624 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
625 /* fbm_instr() takes into account exact value of end-of-str
626 if the check is SvTAIL(ed). Since false positives are OK,
627 and end-of-str is not later than strend we are OK. */
628 s = fbm_instr((unsigned char*)s,
629 (unsigned char*)last + SvCUR(prog->float_substr)
630 - (SvTAIL(prog->float_substr)!=0),
631 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
632 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
633 (s ? "Found" : "Contradicts"),
634 PL_colors[0],
7b0972df
JH
635 (int)(SvCUR(prog->float_substr)
636 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
637 SvPVX(prog->float_substr),
638 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
639 if (!s) {
640 if (last1 == last) {
641 DEBUG_r(PerlIO_printf(Perl_debug_log,
642 ", giving up...\n"));
643 goto fail_finish;
644 }
645 DEBUG_r(PerlIO_printf(Perl_debug_log,
646 ", trying anchored starting at offset %ld...\n",
30944b6d 647 (long)(s1 + 1 - i_strpos)));
803ff556 648 other_last = last;
1aa99e6b 649 s = HOP3c(t, 1, strend);
2c2d71f5
JH
650 goto restart;
651 }
652 else {
653 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 654 (long)(s - i_strpos)));
803ff556 655 other_last = s; /* Fix this later. --Hugo */
30944b6d 656 s = s1;
2c2d71f5
JH
657 if (t == strpos)
658 goto try_at_start;
2c2d71f5
JH
659 goto try_at_offset;
660 }
661 }
cad2e5aa 662 }
2c2d71f5
JH
663
664 t = s - prog->check_offset_max;
2c2d71f5
JH
665 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
666 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
667 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
668 && t > strpos))) {
2c2d71f5
JH
669 /* Fixed substring is found far enough so that the match
670 cannot start at strpos. */
671 try_at_offset:
cad2e5aa 672 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
673 /* Eventually fbm_*() should handle this, but often
674 anchored_offset is not 0, so this check will not be wasted. */
675 /* XXXX In the code below we prefer to look for "^" even in
676 presence of anchored substrings. And we search even
677 beyond the found float position. These pessimizations
678 are historical artefacts only. */
679 find_anchor:
2c2d71f5 680 while (t < strend - prog->minlen) {
cad2e5aa 681 if (*t == '\n') {
4ee3650e 682 if (t < check_at - prog->check_offset_min) {
30944b6d 683 if (prog->anchored_substr) {
4ee3650e
GS
684 /* Since we moved from the found position,
685 we definitely contradict the found anchored
30944b6d
IZ
686 substr. Due to the above check we do not
687 contradict "check" substr.
688 Thus we can arrive here only if check substr
689 is float. Redo checking for "other"=="fixed".
690 */
9041c2e3 691 strpos = t + 1;
30944b6d
IZ
692 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
693 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
694 goto do_other_anchored;
695 }
4ee3650e
GS
696 /* We don't contradict the found floating substring. */
697 /* XXXX Why not check for STCLASS? */
cad2e5aa 698 s = t + 1;
2c2d71f5 699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 700 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
701 goto set_useful;
702 }
4ee3650e
GS
703 /* Position contradicts check-string */
704 /* XXXX probably better to look for check-string
705 than for "\n", so one should lower the limit for t? */
706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 707 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 708 other_last = strpos = s = t + 1;
cad2e5aa
JH
709 goto restart;
710 }
711 t++;
712 }
2c2d71f5
JH
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
714 PL_colors[0],PL_colors[1]));
715 goto fail_finish;
cad2e5aa 716 }
f5952150
GS
717 else {
718 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
719 PL_colors[0],PL_colors[1]));
720 }
cad2e5aa
JH
721 s = t;
722 set_useful:
2c2d71f5 723 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
724 }
725 else {
f5952150 726 /* The found string does not prohibit matching at strpos,
2c2d71f5 727 - no optimization of calling REx engine can be performed,
f5952150
GS
728 unless it was an MBOL and we are not after MBOL,
729 or a future STCLASS check will fail this. */
2c2d71f5
JH
730 try_at_start:
731 /* Even in this situation we may use MBOL flag if strpos is offset
732 wrt the start of the string. */
05b4157f 733 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 734 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
735 /* May be due to an implicit anchor of m{.*foo} */
736 && !(prog->reganch & ROPT_IMPLICIT))
737 {
cad2e5aa
JH
738 t = strpos;
739 goto find_anchor;
740 }
30944b6d 741 DEBUG_r( if (ml_anch)
f5952150
GS
742 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
743 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 744 );
2c2d71f5 745 success_at_start:
30944b6d 746 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 747 && prog->check_substr /* Could be deleted already */
cad2e5aa 748 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
749 && prog->check_substr == prog->float_substr)
750 {
cad2e5aa 751 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 752 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
753 SvREFCNT_dec(prog->check_substr);
754 prog->check_substr = Nullsv; /* disable */
755 prog->float_substr = Nullsv; /* clear */
5e39e1e5 756 check = Nullsv; /* abort */
cad2e5aa 757 s = strpos;
3cf5c195
IZ
758 /* XXXX This is a remnant of the old implementation. It
759 looks wasteful, since now INTUIT can use many
6eb5f6b9 760 other heuristics. */
cad2e5aa
JH
761 prog->reganch &= ~RE_USE_INTUIT;
762 }
763 else
764 s = strpos;
765 }
766
6eb5f6b9
JH
767 /* Last resort... */
768 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
769 if (prog->regstclass) {
770 /* minlen == 0 is possible if regstclass is \b or \B,
771 and the fixed substr is ''$.
772 Since minlen is already taken into account, s+1 is before strend;
773 accidentally, minlen >= 1 guaranties no false positives at s + 1
774 even for \b or \B. But (minlen? 1 : 0) below assumes that
775 regstclass does not come from lookahead... */
776 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
777 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 778 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 779 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 780 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 781 : 1);
6eb5f6b9 782 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
783 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
784 : (prog->float_substr
785 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
786 cl_l, strend)
787 : strend);
a1933d95 788 char *startpos = strbeg;
6eb5f6b9
JH
789
790 t = s;
76384e4a 791 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 792 PL_regdata = prog->data;
76384e4a
GS
793 PL_bostr = startpos;
794 }
f33976b4 795 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
796 if (!s) {
797#ifdef DEBUGGING
b7953727 798 char *what = 0;
6eb5f6b9
JH
799#endif
800 if (endpos == strend) {
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
66e933ab
GS
805 DEBUG_r( PerlIO_printf(Perl_debug_log,
806 "This position contradicts STCLASS...\n") );
653099ff
GS
807 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
808 goto fail;
6eb5f6b9
JH
809 /* Contradict one of substrings */
810 if (prog->anchored_substr) {
6eb5f6b9
JH
811 if (prog->anchored_substr == check) {
812 DEBUG_r( what = "anchored" );
813 hop_and_restart:
1aa99e6b 814 s = HOP3c(t, 1, strend);
66e933ab
GS
815 if (s + start_shift + end_shift > strend) {
816 /* XXXX Should be taken into account earlier? */
817 DEBUG_r( PerlIO_printf(Perl_debug_log,
818 "Could not match STCLASS...\n") );
819 goto fail;
820 }
5e39e1e5
HS
821 if (!check)
822 goto giveup;
6eb5f6b9 823 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 824 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
825 what, (long)(s + start_shift - i_strpos)) );
826 goto restart;
827 }
66e933ab 828 /* Have both, check_string is floating */
6eb5f6b9
JH
829 if (t + start_shift >= check_at) /* Contradicts floating=check */
830 goto retry_floating_check;
831 /* Recheck anchored substring, but not floating... */
9041c2e3 832 s = check_at;
5e39e1e5
HS
833 if (!check)
834 goto giveup;
6eb5f6b9 835 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 836 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
837 (long)(other_last - i_strpos)) );
838 goto do_other_anchored;
839 }
60e71179
GS
840 /* Another way we could have checked stclass at the
841 current position only: */
842 if (ml_anch) {
843 s = t = t + 1;
5e39e1e5
HS
844 if (!check)
845 goto giveup;
60e71179 846 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
847 "Looking for /%s^%s/m starting at offset %ld...\n",
848 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 849 goto try_at_offset;
66e933ab 850 }
60e71179
GS
851 if (!prog->float_substr) /* Could have been deleted */
852 goto fail;
6eb5f6b9
JH
853 /* Check is floating subtring. */
854 retry_floating_check:
855 t = check_at - start_shift;
856 DEBUG_r( what = "floating" );
857 goto hop_and_restart;
858 }
b7953727
JH
859 if (t != s) {
860 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 861 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
862 (long)(t - i_strpos), (long)(s - i_strpos))
863 );
864 }
865 else {
866 DEBUG_r(PerlIO_printf(Perl_debug_log,
867 "Does not contradict STCLASS...\n");
868 );
869 }
6eb5f6b9 870 }
5e39e1e5
HS
871 giveup:
872 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
873 PL_colors[4], (check ? "Guessed" : "Giving up"),
874 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 875 return s;
2c2d71f5
JH
876
877 fail_finish: /* Substring not found */
66e933ab
GS
878 if (prog->check_substr) /* could be removed already */
879 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 880 fail:
2c2d71f5 881 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
882 PL_colors[4],PL_colors[5]));
883 return Nullch;
884}
9661b544 885
6eb5f6b9 886/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
887STATIC char *
888S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 889{
6eb5f6b9
JH
890 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
891 char *m;
d8093b23 892 STRLEN ln;
893 unsigned int c1;
894 unsigned int c2;
6eb5f6b9
JH
895 char *e;
896 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 897 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 898
6eb5f6b9
JH
899 /* We know what class it must start with. */
900 switch (OP(c)) {
6eb5f6b9 901 case ANYOF:
a0ed51b3 902 while (s < strend) {
ffc61ed2 903 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
904 if (tmp && (norun || regtry(prog, s)))
905 goto got_it;
906 else
907 tmp = doevery;
a0ed51b3 908 }
6eb5f6b9
JH
909 else
910 tmp = 1;
ffc61ed2 911 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 912 }
6eb5f6b9 913 break;
f33976b4
DB
914 case CANY:
915 while (s < strend) {
916 if (tmp && (norun || regtry(prog, s)))
917 goto got_it;
918 else
919 tmp = doevery;
920 s++;
921 }
922 break;
6eb5f6b9
JH
923 case EXACTF:
924 m = STRING(c);
925 ln = STR_LEN(c);
1aa99e6b 926 if (UTF) {
a2a2844f
JH
927 STRLEN ulen1, ulen2;
928 U8 tmpbuf1[UTF8_MAXLEN*2+1];
929 U8 tmpbuf2[UTF8_MAXLEN*2+1];
930
931 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
932 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
933
934 c1 = utf8_to_uvuni(tmpbuf1, 0);
935 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
936 }
937 else {
938 c1 = *(U8*)m;
939 c2 = PL_fold[c1];
940 }
6eb5f6b9
JH
941 goto do_exactf;
942 case EXACTFL:
943 m = STRING(c);
944 ln = STR_LEN(c);
d8093b23 945 c1 = *(U8*)m;
6eb5f6b9
JH
946 c2 = PL_fold_locale[c1];
947 do_exactf:
948 e = strend - ln;
b3c9acc1 949
6eb5f6b9
JH
950 if (norun && e < s)
951 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
952
953 if (do_utf8) {
954 STRLEN len;
955 if (c1 == c2)
956 while (s <= e) {
9041c2e3 957 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
958 && regtry(prog, s) )
959 goto got_it;
960 s += len;
961 }
962 else
963 while (s <= e) {
9041c2e3 964 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
965 if ( (c == c1 || c == c2) && regtry(prog, s) )
966 goto got_it;
967 s += len;
968 }
969 }
970 else {
971 if (c1 == c2)
972 while (s <= e) {
973 if ( *(U8*)s == c1
974 && (ln == 1 || !(OP(c) == EXACTF
975 ? ibcmp(s, m, ln)
976 : ibcmp_locale(s, m, ln)))
977 && (norun || regtry(prog, s)) )
978 goto got_it;
979 s++;
980 }
981 else
982 while (s <= e) {
983 if ( (*(U8*)s == c1 || *(U8*)s == c2)
984 && (ln == 1 || !(OP(c) == EXACTF
985 ? ibcmp(s, m, ln)
986 : ibcmp_locale(s, m, ln)))
987 && (norun || regtry(prog, s)) )
988 goto got_it;
989 s++;
990 }
b3c9acc1
IZ
991 }
992 break;
bbce6d69 993 case BOUNDL:
3280af22 994 PL_reg_flags |= RF_tainted;
bbce6d69 995 /* FALL THROUGH */
a0d0e21e 996 case BOUND:
ffc61ed2 997 if (do_utf8) {
12d33761 998 if (s == PL_bostr)
ffc61ed2
JH
999 tmp = '\n';
1000 else {
1aa99e6b 1001 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1002
0064a8a9
JH
1003 if (s > (char*)r)
1004 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1005 }
1006 tmp = ((OP(c) == BOUND ?
9041c2e3 1007 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1008 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1009 while (s < strend) {
1010 if (tmp == !(OP(c) == BOUND ?
3568d838 1011 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1012 isALNUM_LC_utf8((U8*)s)))
1013 {
1014 tmp = !tmp;
1015 if ((norun || regtry(prog, s)))
1016 goto got_it;
1017 }
1018 s += UTF8SKIP(s);
a687059c 1019 }
a0d0e21e 1020 }
667bb95a 1021 else {
12d33761 1022 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1023 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1024 while (s < strend) {
1025 if (tmp ==
1026 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1027 tmp = !tmp;
1028 if ((norun || regtry(prog, s)))
1029 goto got_it;
1030 }
1031 s++;
a0ed51b3 1032 }
a0ed51b3 1033 }
6eb5f6b9 1034 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1035 goto got_it;
1036 break;
bbce6d69 1037 case NBOUNDL:
3280af22 1038 PL_reg_flags |= RF_tainted;
bbce6d69 1039 /* FALL THROUGH */
a0d0e21e 1040 case NBOUND:
ffc61ed2 1041 if (do_utf8) {
12d33761 1042 if (s == PL_bostr)
ffc61ed2
JH
1043 tmp = '\n';
1044 else {
1aa99e6b 1045 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1046
0064a8a9
JH
1047 if (s > (char*)r)
1048 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1049 }
1050 tmp = ((OP(c) == NBOUND ?
9041c2e3 1051 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1052 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1053 while (s < strend) {
1054 if (tmp == !(OP(c) == NBOUND ?
3568d838 1055 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1056 isALNUM_LC_utf8((U8*)s)))
1057 tmp = !tmp;
1058 else if ((norun || regtry(prog, s)))
1059 goto got_it;
1060 s += UTF8SKIP(s);
1061 }
a0d0e21e 1062 }
667bb95a 1063 else {
12d33761 1064 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1065 tmp = ((OP(c) == NBOUND ?
1066 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1067 while (s < strend) {
1068 if (tmp ==
1069 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1070 tmp = !tmp;
1071 else if ((norun || regtry(prog, s)))
1072 goto got_it;
1073 s++;
1074 }
a0ed51b3 1075 }
6eb5f6b9 1076 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1077 goto got_it;
1078 break;
a0d0e21e 1079 case ALNUM:
ffc61ed2 1080 if (do_utf8) {
8269fa76 1081 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1082 while (s < strend) {
3568d838 1083 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1084 if (tmp && (norun || regtry(prog, s)))
1085 goto got_it;
1086 else
1087 tmp = doevery;
1088 }
bbce6d69 1089 else
ffc61ed2
JH
1090 tmp = 1;
1091 s += UTF8SKIP(s);
bbce6d69 1092 }
bbce6d69 1093 }
ffc61ed2
JH
1094 else {
1095 while (s < strend) {
1096 if (isALNUM(*s)) {
1097 if (tmp && (norun || regtry(prog, s)))
1098 goto got_it;
1099 else
1100 tmp = doevery;
1101 }
a0ed51b3 1102 else
ffc61ed2
JH
1103 tmp = 1;
1104 s++;
a0ed51b3 1105 }
a0ed51b3
LW
1106 }
1107 break;
bbce6d69 1108 case ALNUML:
3280af22 1109 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1110 if (do_utf8) {
1111 while (s < strend) {
1112 if (isALNUM_LC_utf8((U8*)s)) {
1113 if (tmp && (norun || regtry(prog, s)))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
a687059c 1118 else
ffc61ed2
JH
1119 tmp = 1;
1120 s += UTF8SKIP(s);
a0d0e21e 1121 }
a0d0e21e 1122 }
ffc61ed2
JH
1123 else {
1124 while (s < strend) {
1125 if (isALNUM_LC(*s)) {
1126 if (tmp && (norun || regtry(prog, s)))
1127 goto got_it;
1128 else
1129 tmp = doevery;
1130 }
a0ed51b3 1131 else
ffc61ed2
JH
1132 tmp = 1;
1133 s++;
a0ed51b3 1134 }
a0ed51b3
LW
1135 }
1136 break;
a0d0e21e 1137 case NALNUM:
ffc61ed2 1138 if (do_utf8) {
8269fa76 1139 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1140 while (s < strend) {
3568d838 1141 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
bbce6d69 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s += UTF8SKIP(s);
bbce6d69 1150 }
bbce6d69 1151 }
ffc61ed2
JH
1152 else {
1153 while (s < strend) {
1154 if (!isALNUM(*s)) {
1155 if (tmp && (norun || regtry(prog, s)))
1156 goto got_it;
1157 else
1158 tmp = doevery;
1159 }
a0ed51b3 1160 else
ffc61ed2
JH
1161 tmp = 1;
1162 s++;
a0ed51b3 1163 }
a0ed51b3
LW
1164 }
1165 break;
bbce6d69 1166 case NALNUML:
3280af22 1167 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1168 if (do_utf8) {
1169 while (s < strend) {
1170 if (!isALNUM_LC_utf8((U8*)s)) {
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
a687059c 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s += UTF8SKIP(s);
a687059c 1179 }
a0d0e21e 1180 }
ffc61ed2
JH
1181 else {
1182 while (s < strend) {
1183 if (!isALNUM_LC(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
a0ed51b3 1189 else
ffc61ed2
JH
1190 tmp = 1;
1191 s++;
a0ed51b3 1192 }
a0ed51b3
LW
1193 }
1194 break;
a0d0e21e 1195 case SPACE:
ffc61ed2 1196 if (do_utf8) {
8269fa76 1197 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1198 while (s < strend) {
3568d838 1199 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
a0d0e21e 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s += UTF8SKIP(s);
2304df62 1208 }
a0d0e21e 1209 }
ffc61ed2
JH
1210 else {
1211 while (s < strend) {
1212 if (isSPACE(*s)) {
1213 if (tmp && (norun || regtry(prog, s)))
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
a0ed51b3 1218 else
ffc61ed2
JH
1219 tmp = 1;
1220 s++;
a0ed51b3 1221 }
a0ed51b3
LW
1222 }
1223 break;
bbce6d69 1224 case SPACEL:
3280af22 1225 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1226 if (do_utf8) {
1227 while (s < strend) {
1228 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
bbce6d69 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s += UTF8SKIP(s);
bbce6d69 1237 }
bbce6d69 1238 }
ffc61ed2
JH
1239 else {
1240 while (s < strend) {
1241 if (isSPACE_LC(*s)) {
1242 if (tmp && (norun || regtry(prog, s)))
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
a0ed51b3 1247 else
ffc61ed2
JH
1248 tmp = 1;
1249 s++;
a0ed51b3 1250 }
a0ed51b3
LW
1251 }
1252 break;
a0d0e21e 1253 case NSPACE:
ffc61ed2 1254 if (do_utf8) {
8269fa76 1255 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1256 while (s < strend) {
3568d838 1257 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
a0d0e21e 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s += UTF8SKIP(s);
a687059c 1266 }
a0d0e21e 1267 }
ffc61ed2
JH
1268 else {
1269 while (s < strend) {
1270 if (!isSPACE(*s)) {
1271 if (tmp && (norun || regtry(prog, s)))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
a0ed51b3 1276 else
ffc61ed2
JH
1277 tmp = 1;
1278 s++;
a0ed51b3 1279 }
a0ed51b3
LW
1280 }
1281 break;
bbce6d69 1282 case NSPACEL:
3280af22 1283 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1284 if (do_utf8) {
1285 while (s < strend) {
1286 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
bbce6d69 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s += UTF8SKIP(s);
bbce6d69 1295 }
bbce6d69 1296 }
ffc61ed2
JH
1297 else {
1298 while (s < strend) {
1299 if (!isSPACE_LC(*s)) {
1300 if (tmp && (norun || regtry(prog, s)))
1301 goto got_it;
1302 else
1303 tmp = doevery;
1304 }
a0ed51b3 1305 else
ffc61ed2
JH
1306 tmp = 1;
1307 s++;
a0ed51b3 1308 }
a0ed51b3
LW
1309 }
1310 break;
a0d0e21e 1311 case DIGIT:
ffc61ed2 1312 if (do_utf8) {
8269fa76 1313 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1314 while (s < strend) {
3568d838 1315 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
a0d0e21e 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s += UTF8SKIP(s);
2b69d0c2 1324 }
a0d0e21e 1325 }
ffc61ed2
JH
1326 else {
1327 while (s < strend) {
1328 if (isDIGIT(*s)) {
1329 if (tmp && (norun || regtry(prog, s)))
1330 goto got_it;
1331 else
1332 tmp = doevery;
1333 }
a0ed51b3 1334 else
ffc61ed2
JH
1335 tmp = 1;
1336 s++;
a0ed51b3 1337 }
a0ed51b3
LW
1338 }
1339 break;
b8c5462f
JH
1340 case DIGITL:
1341 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1342 if (do_utf8) {
1343 while (s < strend) {
1344 if (isDIGIT_LC_utf8((U8*)s)) {
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
b8c5462f 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s += UTF8SKIP(s);
b8c5462f 1353 }
b8c5462f 1354 }
ffc61ed2
JH
1355 else {
1356 while (s < strend) {
1357 if (isDIGIT_LC(*s)) {
1358 if (tmp && (norun || regtry(prog, s)))
1359 goto got_it;
1360 else
1361 tmp = doevery;
1362 }
b8c5462f 1363 else
ffc61ed2
JH
1364 tmp = 1;
1365 s++;
b8c5462f 1366 }
b8c5462f
JH
1367 }
1368 break;
a0d0e21e 1369 case NDIGIT:
ffc61ed2 1370 if (do_utf8) {
8269fa76 1371 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1372 while (s < strend) {
3568d838 1373 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
a0d0e21e 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s += UTF8SKIP(s);
a687059c 1382 }
a0d0e21e 1383 }
ffc61ed2
JH
1384 else {
1385 while (s < strend) {
1386 if (!isDIGIT(*s)) {
1387 if (tmp && (norun || regtry(prog, s)))
1388 goto got_it;
1389 else
1390 tmp = doevery;
1391 }
a0ed51b3 1392 else
ffc61ed2
JH
1393 tmp = 1;
1394 s++;
a0ed51b3 1395 }
a0ed51b3
LW
1396 }
1397 break;
b8c5462f
JH
1398 case NDIGITL:
1399 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1400 if (do_utf8) {
1401 while (s < strend) {
1402 if (!isDIGIT_LC_utf8((U8*)s)) {
1403 if (tmp && (norun || regtry(prog, s)))
1404 goto got_it;
1405 else
1406 tmp = doevery;
1407 }
b8c5462f 1408 else
ffc61ed2
JH
1409 tmp = 1;
1410 s += UTF8SKIP(s);
b8c5462f 1411 }
a0ed51b3 1412 }
ffc61ed2
JH
1413 else {
1414 while (s < strend) {
1415 if (!isDIGIT_LC(*s)) {
1416 if (tmp && (norun || regtry(prog, s)))
1417 goto got_it;
1418 else
1419 tmp = doevery;
1420 }
cf93c79d 1421 else
ffc61ed2
JH
1422 tmp = 1;
1423 s++;
b8c5462f 1424 }
b8c5462f
JH
1425 }
1426 break;
b3c9acc1 1427 default:
3c3eec57
GS
1428 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1429 break;
d6a28714 1430 }
6eb5f6b9
JH
1431 return 0;
1432 got_it:
1433 return s;
1434}
1435
1436/*
1437 - regexec_flags - match a regexp against a string
1438 */
1439I32
1440Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1441 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1442/* strend: pointer to null at end of string */
1443/* strbeg: real beginning of string */
1444/* minend: end of match must be >=minend after stringarg. */
1445/* data: May be used for some additional optimizations. */
1446/* nosave: For optimizations. */
1447{
6eb5f6b9
JH
1448 register char *s;
1449 register regnode *c;
1450 register char *startpos = stringarg;
6eb5f6b9
JH
1451 I32 minlen; /* must match at least this many chars */
1452 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1453 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1454 constant substr. */ /* CC */
1455 I32 end_shift = 0; /* Same for the end. */ /* CC */
1456 I32 scream_pos = -1; /* Internal iterator of scream. */
1457 char *scream_olds;
1458 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1459 bool do_utf8 = DO_UTF8(sv);
2a782b5b
JH
1460#ifdef DEBUGGING
1461 SV *dsv = sv_2mortal(newSVpvn("", 0));
1462#endif
6eb5f6b9
JH
1463
1464 PL_regcc = 0;
1465
1466 cache_re(prog);
1467#ifdef DEBUGGING
aea4f609 1468 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1469#endif
1470
1471 /* Be paranoid... */
1472 if (prog == NULL || startpos == NULL) {
1473 Perl_croak(aTHX_ "NULL regexp parameter");
1474 return 0;
1475 }
1476
1477 minlen = prog->minlen;
a72c7584
JH
1478 if (strend - startpos < minlen) {
1479 DEBUG_r(PerlIO_printf(Perl_debug_log,
1480 "String too short [regexec_flags]...\n"));
1481 goto phooey;
1aa99e6b 1482 }
6eb5f6b9 1483
6eb5f6b9
JH
1484 /* Check validity of program. */
1485 if (UCHARAT(prog->program) != REG_MAGIC) {
1486 Perl_croak(aTHX_ "corrupted regexp program");
1487 }
1488
1489 PL_reg_flags = 0;
1490 PL_reg_eval_set = 0;
1491 PL_reg_maxiter = 0;
1492
1493 if (prog->reganch & ROPT_UTF8)
1494 PL_reg_flags |= RF_utf8;
1495
1496 /* Mark beginning of line for ^ and lookbehind. */
1497 PL_regbol = startpos;
1498 PL_bostr = strbeg;
1499 PL_reg_sv = sv;
1500
1501 /* Mark end of line for $ (and such) */
1502 PL_regeol = strend;
1503
1504 /* see how far we have to get to not match where we matched before */
1505 PL_regtill = startpos+minend;
1506
1507 /* We start without call_cc context. */
1508 PL_reg_call_cc = 0;
1509
1510 /* If there is a "must appear" string, look for it. */
1511 s = startpos;
1512
1513 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1514 MAGIC *mg;
1515
1516 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1517 PL_reg_ganch = startpos;
1518 else if (sv && SvTYPE(sv) >= SVt_PVMG
1519 && SvMAGIC(sv)
14befaf4
DM
1520 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1521 && mg->mg_len >= 0) {
6eb5f6b9
JH
1522 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1523 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1524 if (s > PL_reg_ganch)
6eb5f6b9
JH
1525 goto phooey;
1526 s = PL_reg_ganch;
1527 }
1528 }
1529 else /* pos() not defined */
1530 PL_reg_ganch = strbeg;
1531 }
1532
699c3c34
JH
1533 if (do_utf8 == (UTF!=0) &&
1534 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1535 re_scream_pos_data d;
1536
1537 d.scream_olds = &scream_olds;
1538 d.scream_pos = &scream_pos;
1539 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1540 if (!s) {
1541 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1542 goto phooey; /* not present */
3fa9c3d7 1543 }
6eb5f6b9
JH
1544 }
1545
2a782b5b 1546 DEBUG_r({
df1ffd02
JH
1547 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1548 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1549 if (!PL_colorset)
1550 reginitcolors();
1551 PerlIO_printf(Perl_debug_log,
1552 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1553 PL_colors[4],PL_colors[5],PL_colors[0],
1554 prog->precomp,
1555 PL_colors[1],
1556 (strlen(prog->precomp) > 60 ? "..." : ""),
1557 PL_colors[0],
1558 (int)(len > 60 ? 60 : len),
1559 s, PL_colors[1],
1560 (len > 60 ? "..." : "")
1561 );
1562 });
6eb5f6b9
JH
1563
1564 /* Simplest case: anchored match need be tried only once. */
1565 /* [unless only anchor is BOL and multiline is set] */
1566 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1567 if (s == startpos && regtry(prog, startpos))
1568 goto got_it;
1569 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1570 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1571 {
1572 char *end;
1573
1574 if (minlen)
1575 dontbother = minlen - 1;
1aa99e6b 1576 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1577 /* for multiline we only have to try after newlines */
1578 if (prog->check_substr) {
1579 if (s == startpos)
1580 goto after_try;
1581 while (1) {
1582 if (regtry(prog, s))
1583 goto got_it;
1584 after_try:
1585 if (s >= end)
1586 goto phooey;
1587 if (prog->reganch & RE_USE_INTUIT) {
1588 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1589 if (!s)
1590 goto phooey;
1591 }
1592 else
1593 s++;
1594 }
1595 } else {
1596 if (s > startpos)
1597 s--;
1598 while (s < end) {
1599 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1600 if (regtry(prog, s))
1601 goto got_it;
1602 }
1603 }
1604 }
1605 }
1606 goto phooey;
1607 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1608 if (regtry(prog, PL_reg_ganch))
1609 goto got_it;
1610 goto phooey;
1611 }
1612
1613 /* Messy cases: unanchored match. */
9041c2e3 1614 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1615 /* we have /x+whatever/ */
1616 /* it must be a one character string (XXXX Except UTF?) */
1617 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1618#ifdef DEBUGGING
1619 int did_match = 0;
1620#endif
1621
1aa99e6b 1622 if (do_utf8) {
6eb5f6b9
JH
1623 while (s < strend) {
1624 if (*s == ch) {
bf93d4cc 1625 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1626 if (regtry(prog, s)) goto got_it;
1627 s += UTF8SKIP(s);
1628 while (s < strend && *s == ch)
1629 s += UTF8SKIP(s);
1630 }
1631 s += UTF8SKIP(s);
1632 }
1633 }
1634 else {
1635 while (s < strend) {
1636 if (*s == ch) {
bf93d4cc 1637 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1638 if (regtry(prog, s)) goto got_it;
1639 s++;
1640 while (s < strend && *s == ch)
1641 s++;
1642 }
1643 s++;
1644 }
1645 }
b7953727 1646 DEBUG_r(if (!did_match)
bf93d4cc 1647 PerlIO_printf(Perl_debug_log,
b7953727
JH
1648 "Did not find anchored character...\n")
1649 );
6eb5f6b9
JH
1650 }
1651 /*SUPPRESS 560*/
1aa99e6b
IH
1652 else if (do_utf8 == (UTF!=0) &&
1653 (prog->anchored_substr != Nullsv
9041c2e3 1654 || (prog->float_substr != Nullsv
1aa99e6b 1655 && prog->float_max_offset < strend - s))) {
9041c2e3 1656 SV *must = prog->anchored_substr
6eb5f6b9 1657 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1658 I32 back_max =
6eb5f6b9 1659 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1660 I32 back_min =
6eb5f6b9 1661 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1662 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1663 -(I32)(CHR_SVLEN(must)
1aa99e6b 1664 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1665 char *last1; /* Last position checked before */
bf93d4cc
GS
1666#ifdef DEBUGGING
1667 int did_match = 0;
1668#endif
6eb5f6b9
JH
1669
1670 if (s > PL_bostr)
1671 last1 = HOPc(s, -1);
1672 else
1673 last1 = s - 1; /* bogus */
1674
1675 /* XXXX check_substr already used to find `s', can optimize if
1676 check_substr==must. */
1677 scream_pos = -1;
1678 dontbother = end_shift;
1679 strend = HOPc(strend, -dontbother);
1680 while ( (s <= last) &&
9041c2e3 1681 ((flags & REXEC_SCREAM)
1aa99e6b 1682 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1683 end_shift, &scream_pos, 0))
1aa99e6b 1684 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1685 (unsigned char*)strend, must,
6eb5f6b9 1686 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1687 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1688 if (HOPc(s, -back_max) > last1) {
1689 last1 = HOPc(s, -back_min);
1690 s = HOPc(s, -back_max);
1691 }
1692 else {
1693 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1694
1695 last1 = HOPc(s, -back_min);
1696 s = t;
1697 }
1aa99e6b 1698 if (do_utf8) {
6eb5f6b9
JH
1699 while (s <= last1) {
1700 if (regtry(prog, s))
1701 goto got_it;
1702 s += UTF8SKIP(s);
1703 }
1704 }
1705 else {
1706 while (s <= last1) {
1707 if (regtry(prog, s))
1708 goto got_it;
1709 s++;
1710 }
1711 }
1712 }
b7953727
JH
1713 DEBUG_r(if (!did_match)
1714 PerlIO_printf(Perl_debug_log,
1715 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1716 ((must == prog->anchored_substr)
1717 ? "anchored" : "floating"),
1718 PL_colors[0],
1719 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1720 SvPVX(must),
b7953727
JH
1721 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1722 );
6eb5f6b9
JH
1723 goto phooey;
1724 }
155aba94 1725 else if ((c = prog->regstclass)) {
66e933ab
GS
1726 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1727 /* don't bother with what can't match */
6eb5f6b9 1728 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1729 DEBUG_r({
1730 SV *prop = sv_newmortal();
1731 regprop(prop, c);
2a782b5b 1732 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1733 });
6eb5f6b9
JH
1734 if (find_byclass(prog, c, s, strend, startpos, 0))
1735 goto got_it;
bf93d4cc 1736 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1737 }
1738 else {
1739 dontbother = 0;
1740 if (prog->float_substr != Nullsv) { /* Trim the end. */
1741 char *last;
d6a28714
JH
1742
1743 if (flags & REXEC_SCREAM) {
1744 last = screaminstr(sv, prog->float_substr, s - strbeg,
1745 end_shift, &scream_pos, 1); /* last one */
1746 if (!last)
ffc61ed2 1747 last = scream_olds; /* Only one occurrence. */
b8c5462f 1748 }
d6a28714
JH
1749 else {
1750 STRLEN len;
1751 char *little = SvPV(prog->float_substr, len);
1752
1753 if (SvTAIL(prog->float_substr)) {
1754 if (memEQ(strend - len + 1, little, len - 1))
1755 last = strend - len + 1;
1756 else if (!PL_multiline)
9041c2e3 1757 last = memEQ(strend - len, little, len)
d6a28714 1758 ? strend - len : Nullch;
b8c5462f 1759 else
d6a28714
JH
1760 goto find_last;
1761 } else {
1762 find_last:
9041c2e3 1763 if (len)
d6a28714 1764 last = rninstr(s, strend, little, little + len);
b8c5462f 1765 else
d6a28714 1766 last = strend; /* matching `$' */
b8c5462f 1767 }
b8c5462f 1768 }
bf93d4cc
GS
1769 if (last == NULL) {
1770 DEBUG_r(PerlIO_printf(Perl_debug_log,
1771 "%sCan't trim the tail, match fails (should not happen)%s\n",
1772 PL_colors[4],PL_colors[5]));
1773 goto phooey; /* Should not happen! */
1774 }
d6a28714
JH
1775 dontbother = strend - last + prog->float_min_offset;
1776 }
1777 if (minlen && (dontbother < minlen))
1778 dontbother = minlen - 1;
1779 strend -= dontbother; /* this one's always in bytes! */
1780 /* We don't know much -- general case. */
1aa99e6b 1781 if (do_utf8) {
d6a28714
JH
1782 for (;;) {
1783 if (regtry(prog, s))
1784 goto got_it;
1785 if (s >= strend)
1786 break;
b8c5462f 1787 s += UTF8SKIP(s);
d6a28714
JH
1788 };
1789 }
1790 else {
1791 do {
1792 if (regtry(prog, s))
1793 goto got_it;
1794 } while (s++ < strend);
1795 }
1796 }
1797
1798 /* Failure. */
1799 goto phooey;
1800
1801got_it:
1802 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1803
1804 if (PL_reg_eval_set) {
1805 /* Preserve the current value of $^R */
1806 if (oreplsv != GvSV(PL_replgv))
1807 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1808 restored, the value remains
1809 the same. */
acfe0abc 1810 restore_pos(aTHX_ 0);
d6a28714
JH
1811 }
1812
1813 /* make sure $`, $&, $', and $digit will work later */
1814 if ( !(flags & REXEC_NOT_FIRST) ) {
1815 if (RX_MATCH_COPIED(prog)) {
1816 Safefree(prog->subbeg);
1817 RX_MATCH_COPIED_off(prog);
1818 }
1819 if (flags & REXEC_COPY_STR) {
1820 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1821
1822 s = savepvn(strbeg, i);
1823 prog->subbeg = s;
1824 prog->sublen = i;
1825 RX_MATCH_COPIED_on(prog);
1826 }
1827 else {
1828 prog->subbeg = strbeg;
1829 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1830 }
1831 }
9041c2e3 1832
d6a28714
JH
1833 return 1;
1834
1835phooey:
bf93d4cc
GS
1836 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1837 PL_colors[4],PL_colors[5]));
d6a28714 1838 if (PL_reg_eval_set)
acfe0abc 1839 restore_pos(aTHX_ 0);
d6a28714
JH
1840 return 0;
1841}
1842
1843/*
1844 - regtry - try match at specific point
1845 */
1846STATIC I32 /* 0 failure, 1 success */
1847S_regtry(pTHX_ regexp *prog, char *startpos)
1848{
d6a28714
JH
1849 register I32 i;
1850 register I32 *sp;
1851 register I32 *ep;
1852 CHECKPOINT lastcp;
1853
02db2b7b
IZ
1854#ifdef DEBUGGING
1855 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1856#endif
d6a28714
JH
1857 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1858 MAGIC *mg;
1859
1860 PL_reg_eval_set = RS_init;
1861 DEBUG_r(DEBUG_s(
b900a521
JH
1862 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1863 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1864 ));
e8347627 1865 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1866 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1867 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1868 SAVETMPS;
1869 /* Apparently this is not needed, judging by wantarray. */
e8347627 1870 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1871 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1872
1873 if (PL_reg_sv) {
1874 /* Make $_ available to executed code. */
1875 if (PL_reg_sv != DEFSV) {
4d1ff10f 1876 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1877 SAVESPTR(DEFSV);
1878 DEFSV = PL_reg_sv;
b8c5462f 1879 }
d6a28714 1880
9041c2e3 1881 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1882 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1883 /* prepare for quick setting of pos */
14befaf4
DM
1884 sv_magic(PL_reg_sv, (SV*)0,
1885 PERL_MAGIC_regex_global, Nullch, 0);
1886 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1887 mg->mg_len = -1;
b8c5462f 1888 }
d6a28714
JH
1889 PL_reg_magic = mg;
1890 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1891 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1892 }
09687e5a 1893 if (!PL_reg_curpm) {
0f79a09d 1894 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1895#ifdef USE_ITHREADS
1896 {
1897 SV* repointer = newSViv(0);
577e12cc 1898 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1899 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1900 av_push(PL_regex_padav,repointer);
1901 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1902 PL_regex_pad = AvARRAY(PL_regex_padav);
1903 }
1904#endif
1905 }
aaa362c4 1906 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1907 PL_reg_oldcurpm = PL_curpm;
1908 PL_curpm = PL_reg_curpm;
1909 if (RX_MATCH_COPIED(prog)) {
1910 /* Here is a serious problem: we cannot rewrite subbeg,
1911 since it may be needed if this match fails. Thus
1912 $` inside (?{}) could fail... */
1913 PL_reg_oldsaved = prog->subbeg;
1914 PL_reg_oldsavedlen = prog->sublen;
1915 RX_MATCH_COPIED_off(prog);
1916 }
1917 else
1918 PL_reg_oldsaved = Nullch;
1919 prog->subbeg = PL_bostr;
1920 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1921 }
1922 prog->startp[0] = startpos - PL_bostr;
1923 PL_reginput = startpos;
1924 PL_regstartp = prog->startp;
1925 PL_regendp = prog->endp;
1926 PL_reglastparen = &prog->lastparen;
a01268b5 1927 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1928 prog->lastparen = 0;
1929 PL_regsize = 0;
1930 DEBUG_r(PL_reg_starttry = startpos);
1931 if (PL_reg_start_tmpl <= prog->nparens) {
1932 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1933 if(PL_reg_start_tmp)
1934 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1935 else
1936 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1937 }
1938
1939 /* XXXX What this code is doing here?!!! There should be no need
1940 to do this again and again, PL_reglastparen should take care of
3dd2943c 1941 this! --ilya*/
dafc8851
JH
1942
1943 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1944 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1945 * PL_reglastparen), is not needed at all by the test suite
1946 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1947 * enough, for building DynaLoader, or otherwise this
1948 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1949 * will happen. Meanwhile, this code *is* needed for the
1950 * above-mentioned test suite tests to succeed. The common theme
1951 * on those tests seems to be returning null fields from matches.
1952 * --jhi */
dafc8851 1953#if 1
d6a28714
JH
1954 sp = prog->startp;
1955 ep = prog->endp;
1956 if (prog->nparens) {
09e8ae3b 1957 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1958 *++sp = -1;
1959 *++ep = -1;
1960 }
1961 }
dafc8851 1962#endif
02db2b7b 1963 REGCP_SET(lastcp);
d6a28714
JH
1964 if (regmatch(prog->program + 1)) {
1965 prog->endp[0] = PL_reginput - PL_bostr;
1966 return 1;
1967 }
02db2b7b 1968 REGCP_UNWIND(lastcp);
d6a28714
JH
1969 return 0;
1970}
1971
02db2b7b
IZ
1972#define RE_UNWIND_BRANCH 1
1973#define RE_UNWIND_BRANCHJ 2
1974
1975union re_unwind_t;
1976
1977typedef struct { /* XX: makes sense to enlarge it... */
1978 I32 type;
1979 I32 prev;
1980 CHECKPOINT lastcp;
1981} re_unwind_generic_t;
1982
1983typedef struct {
1984 I32 type;
1985 I32 prev;
1986 CHECKPOINT lastcp;
1987 I32 lastparen;
1988 regnode *next;
1989 char *locinput;
1990 I32 nextchr;
1991#ifdef DEBUGGING
1992 int regindent;
1993#endif
1994} re_unwind_branch_t;
1995
1996typedef union re_unwind_t {
1997 I32 type;
1998 re_unwind_generic_t generic;
1999 re_unwind_branch_t branch;
2000} re_unwind_t;
2001
8ba1375e
MJD
2002#define sayYES goto yes
2003#define sayNO goto no
2004#define sayYES_FINAL goto yes_final
2005#define sayYES_LOUD goto yes_loud
2006#define sayNO_FINAL goto no_final
2007#define sayNO_SILENT goto do_no
2008#define saySAME(x) if (x) goto yes; else goto no
2009
2010#define REPORT_CODE_OFF 24
2011
d6a28714
JH
2012/*
2013 - regmatch - main matching routine
2014 *
2015 * Conceptually the strategy is simple: check to see whether the current
2016 * node matches, call self recursively to see whether the rest matches,
2017 * and then act accordingly. In practice we make some effort to avoid
2018 * recursion, in particular by going through "ordinary" nodes (that don't
2019 * need to know whether the rest of the match failed) by a loop instead of
2020 * by recursion.
2021 */
2022/* [lwall] I've hoisted the register declarations to the outer block in order to
2023 * maybe save a little bit of pushing and popping on the stack. It also takes
2024 * advantage of machines that use a register save mask on subroutine entry.
2025 */
2026STATIC I32 /* 0 failure, 1 success */
2027S_regmatch(pTHX_ regnode *prog)
2028{
d6a28714
JH
2029 register regnode *scan; /* Current node. */
2030 regnode *next; /* Next node. */
2031 regnode *inner; /* Next node in internal branch. */
2032 register I32 nextchr; /* renamed nextchr - nextchar colides with
2033 function of same name */
2034 register I32 n; /* no or next */
b7953727
JH
2035 register I32 ln = 0; /* len or last */
2036 register char *s = Nullch; /* operand or save */
d6a28714 2037 register char *locinput = PL_reginput;
b7953727 2038 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2039 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2040 I32 unwind = 0;
b7953727 2041#if 0
02db2b7b 2042 I32 firstcp = PL_savestack_ix;
b7953727 2043#endif
53c4c00c 2044 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b
JH
2045#ifdef DEBUGGING
2046 SV *dsv0 = sv_2mortal(newSVpvn("", 0));
2047 SV *dsv1 = sv_2mortal(newSVpvn("", 0));
2048 SV *dsv2 = sv_2mortal(newSVpvn("", 0));
2049#endif
02db2b7b 2050
d6a28714
JH
2051#ifdef DEBUGGING
2052 PL_regindent++;
2053#endif
2054
2055 /* Note that nextchr is a byte even in UTF */
2056 nextchr = UCHARAT(locinput);
2057 scan = prog;
2058 while (scan != NULL) {
8ba1375e 2059
2a782b5b 2060 DEBUG_r( {
d6a28714
JH
2061 SV *prop = sv_newmortal();
2062 int docolor = *PL_colors[0];
2063 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2064 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2065 /* The part of the string before starttry has one color
2066 (pref0_len chars), between starttry and current
2067 position another one (pref_len - pref0_len chars),
2068 after the current position the third one.
2069 We assume that pref0_len <= pref_len, otherwise we
2070 decrease pref0_len. */
9041c2e3 2071 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2072 ? (5 + taill) - l : locinput - PL_bostr;
2073 int pref0_len;
d6a28714 2074
df1ffd02 2075 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2076 pref_len++;
2077 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2078 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2079 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2080 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2081 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2082 l--;
d6a28714
JH
2083 if (pref0_len < 0)
2084 pref0_len = 0;
2085 if (pref0_len > pref_len)
2086 pref0_len = pref_len;
2087 regprop(prop, scan);
2a782b5b
JH
2088 {
2089 char *s0 =
df1ffd02 2090 do_utf8 ?
2a782b5b
JH
2091 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2092 pref0_len, 60, 0) :
2093 locinput - pref_len;
df1ffd02
JH
2094 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2095 char *s1 = do_utf8 ?
2a782b5b
JH
2096 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2097 pref_len - pref0_len, 60, 0) :
2098 locinput - pref_len + pref0_len;
df1ffd02
JH
2099 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2100 char *s2 = do_utf8 ?
2a782b5b
JH
2101 pv_uni_display(dsv2, (U8*)locinput,
2102 PL_regeol - locinput, 60, 0) :
2103 locinput;
df1ffd02 2104 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2105 PerlIO_printf(Perl_debug_log,
2106 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2107 (IV)(locinput - PL_bostr),
2108 PL_colors[4],
2109 len0, s0,
2110 PL_colors[5],
2111 PL_colors[2],
2112 len1, s1,
2113 PL_colors[3],
2114 (docolor ? "" : "> <"),
2115 PL_colors[0],
2116 len2, s2,
2117 PL_colors[1],
2118 15 - l - pref_len + 1,
2119 "",
2120 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2121 SvPVX(prop));
2122 }
2123 });
d6a28714
JH
2124
2125 next = scan + NEXT_OFF(scan);
2126 if (next == scan)
2127 next = NULL;
2128
2129 switch (OP(scan)) {
2130 case BOL:
12d33761
HS
2131 if (locinput == PL_bostr || (PL_multiline &&
2132 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2133 {
2134 /* regtill = regbol; */
b8c5462f
JH
2135 break;
2136 }
d6a28714
JH
2137 sayNO;
2138 case MBOL:
12d33761
HS
2139 if (locinput == PL_bostr ||
2140 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2141 {
b8c5462f
JH
2142 break;
2143 }
d6a28714
JH
2144 sayNO;
2145 case SBOL:
c2a73568 2146 if (locinput == PL_bostr)
b8c5462f 2147 break;
d6a28714
JH
2148 sayNO;
2149 case GPOS:
2150 if (locinput == PL_reg_ganch)
2151 break;
2152 sayNO;
2153 case EOL:
2154 if (PL_multiline)
2155 goto meol;
2156 else
2157 goto seol;
2158 case MEOL:
2159 meol:
2160 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2161 sayNO;
b8c5462f 2162 break;
d6a28714
JH
2163 case SEOL:
2164 seol:
2165 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2166 sayNO;
d6a28714 2167 if (PL_regeol - locinput > 1)
b8c5462f 2168 sayNO;
b8c5462f 2169 break;
d6a28714
JH
2170 case EOS:
2171 if (PL_regeol != locinput)
b8c5462f 2172 sayNO;
d6a28714 2173 break;
ffc61ed2 2174 case SANY:
d6a28714 2175 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2176 sayNO;
f33976b4
DB
2177 if (do_utf8) {
2178 locinput += PL_utf8skip[nextchr];
2179 if (locinput > PL_regeol)
2180 sayNO;
2181 nextchr = UCHARAT(locinput);
2182 }
2183 else
2184 nextchr = UCHARAT(++locinput);
2185 break;
2186 case CANY:
2187 if (!nextchr && locinput >= PL_regeol)
2188 sayNO;
b8c5462f 2189 nextchr = UCHARAT(++locinput);
a0d0e21e 2190 break;
ffc61ed2 2191 case REG_ANY:
1aa99e6b
IH
2192 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2193 sayNO;
2194 if (do_utf8) {
b8c5462f 2195 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2196 if (locinput > PL_regeol)
2197 sayNO;
a0ed51b3 2198 nextchr = UCHARAT(locinput);
a0ed51b3 2199 }
1aa99e6b
IH
2200 else
2201 nextchr = UCHARAT(++locinput);
a0ed51b3 2202 break;
d6a28714 2203 case EXACT:
cd439c50
IZ
2204 s = STRING(scan);
2205 ln = STR_LEN(scan);
1aa99e6b
IH
2206 if (do_utf8 != (UTF!=0)) {
2207 char *l = locinput;
2208 char *e = s + ln;
2209 STRLEN len;
a72c7584 2210
1aa99e6b
IH
2211 if (do_utf8)
2212 while (s < e) {
a72c7584
JH
2213 UV uv;
2214
1aa99e6b
IH
2215 if (l >= PL_regeol)
2216 sayNO;
a72c7584
JH
2217 uv = NATIVE_TO_UNI(*(U8*)s);
2218 if (UTF8_IS_START(uv)) {
2219 len = UTF8SKIP(s);
2220 if (memNE(s, l, len))
2221 sayNO;
2222 l += len;
2223 s += len;
2224 } else {
2225 if (uv != utf8_to_uvchr((U8*)l, &len))
2226 sayNO;
2227 l += len;
2228 s ++;
2229 }
1aa99e6b
IH
2230 }
2231 else
2232 while (s < e) {
2233 if (l >= PL_regeol)
2234 sayNO;
9041c2e3 2235 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b 2236 sayNO;
a72c7584
JH
2237 s += len;
2238 l ++;
1aa99e6b
IH
2239 }
2240 locinput = l;
2241 nextchr = UCHARAT(locinput);
2242 break;
2243 }
d6a28714
JH
2244 /* Inline the first character, for speed. */
2245 if (UCHARAT(s) != nextchr)
2246 sayNO;
2247 if (PL_regeol - locinput < ln)
2248 sayNO;
2249 if (ln > 1 && memNE(s, locinput, ln))
2250 sayNO;
2251 locinput += ln;
2252 nextchr = UCHARAT(locinput);
2253 break;
2254 case EXACTFL:
b8c5462f
JH
2255 PL_reg_flags |= RF_tainted;
2256 /* FALL THROUGH */
d6a28714 2257 case EXACTF:
cd439c50
IZ
2258 s = STRING(scan);
2259 ln = STR_LEN(scan);
d6a28714 2260
1aa99e6b 2261 if (do_utf8) {
d6a28714 2262 char *l = locinput;
1aa99e6b 2263 char *e;
a2a2844f
JH
2264 STRLEN ulen;
2265 U8 tmpbuf[UTF8_MAXLEN*2+1];
1aa99e6b 2266 e = s + ln;
d6a28714 2267 while (s < e) {
a2a2844f 2268 if (l >= PL_regeol)
d6a28714 2269 sayNO;
a2a2844f 2270 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
7114a2d2 2271 if (memNE(s, (char*)tmpbuf, ulen))
a2a2844f
JH
2272 sayNO;
2273 s += UTF8SKIP(s);
2274 l += ulen;
b8c5462f 2275 }
d6a28714 2276 locinput = l;
a0ed51b3
LW
2277 nextchr = UCHARAT(locinput);
2278 break;
2279 }
d6a28714
JH
2280
2281 /* Inline the first character, for speed. */
2282 if (UCHARAT(s) != nextchr &&
2283 UCHARAT(s) != ((OP(scan) == EXACTF)
2284 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2285 sayNO;
d6a28714 2286 if (PL_regeol - locinput < ln)
b8c5462f 2287 sayNO;
d6a28714
JH
2288 if (ln > 1 && (OP(scan) == EXACTF
2289 ? ibcmp(s, locinput, ln)
2290 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2291 sayNO;
d6a28714
JH
2292 locinput += ln;
2293 nextchr = UCHARAT(locinput);
a0d0e21e 2294 break;
d6a28714 2295 case ANYOF:
ffc61ed2
JH
2296 if (do_utf8) {
2297 if (!reginclass(scan, (U8*)locinput, do_utf8))
2298 sayNO;
2299 if (locinput >= PL_regeol)
2300 sayNO;
2301 locinput += PL_utf8skip[nextchr];
b8c5462f 2302 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2303 }
2304 else {
2305 if (nextchr < 0)
2306 nextchr = UCHARAT(locinput);
2307 if (!reginclass(scan, (U8*)locinput, do_utf8))
2308 sayNO;
2309 if (!nextchr && locinput >= PL_regeol)
2310 sayNO;
2311 nextchr = UCHARAT(++locinput);
2312 }
b8c5462f 2313 break;
d6a28714 2314 case ALNUML:
b8c5462f
JH
2315 PL_reg_flags |= RF_tainted;
2316 /* FALL THROUGH */
d6a28714 2317 case ALNUM:
b8c5462f 2318 if (!nextchr)
4633a7c4 2319 sayNO;
ffc61ed2 2320 if (do_utf8) {
ad24be35 2321 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2322 if (!(OP(scan) == ALNUM
3568d838 2323 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2324 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2325 {
2326 sayNO;
a0ed51b3 2327 }
b8c5462f 2328 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2329 nextchr = UCHARAT(locinput);
2330 break;
2331 }
ffc61ed2 2332 if (!(OP(scan) == ALNUM
d6a28714 2333 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2334 sayNO;
b8c5462f 2335 nextchr = UCHARAT(++locinput);
a0d0e21e 2336 break;
d6a28714 2337 case NALNUML:
b8c5462f
JH
2338 PL_reg_flags |= RF_tainted;
2339 /* FALL THROUGH */
d6a28714
JH
2340 case NALNUM:
2341 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2342 sayNO;
ffc61ed2 2343 if (do_utf8) {
8269fa76 2344 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2345 if (OP(scan) == NALNUM
3568d838 2346 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2347 : isALNUM_LC_utf8((U8*)locinput))
2348 {
b8c5462f 2349 sayNO;
d6a28714 2350 }
b8c5462f
JH
2351 locinput += PL_utf8skip[nextchr];
2352 nextchr = UCHARAT(locinput);
2353 break;
2354 }
ffc61ed2 2355 if (OP(scan) == NALNUM
d6a28714 2356 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2357 sayNO;
76e3520e 2358 nextchr = UCHARAT(++locinput);
a0d0e21e 2359 break;
d6a28714
JH
2360 case BOUNDL:
2361 case NBOUNDL:
3280af22 2362 PL_reg_flags |= RF_tainted;
bbce6d69 2363 /* FALL THROUGH */
d6a28714
JH
2364 case BOUND:
2365 case NBOUND:
2366 /* was last char in word? */
ffc61ed2 2367 if (do_utf8) {
12d33761
HS
2368 if (locinput == PL_bostr)
2369 ln = '\n';
ffc61ed2
JH
2370 else {
2371 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2372
2b9d42f0 2373 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2374 }
2375 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2376 ln = isALNUM_uni(ln);
8269fa76 2377 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2378 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2379 }
2380 else {
9041c2e3 2381 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2382 n = isALNUM_LC_utf8((U8*)locinput);
2383 }
a0ed51b3 2384 }
d6a28714 2385 else {
12d33761
HS
2386 ln = (locinput != PL_bostr) ?
2387 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2388 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2389 ln = isALNUM(ln);
2390 n = isALNUM(nextchr);
2391 }
2392 else {
2393 ln = isALNUM_LC(ln);
2394 n = isALNUM_LC(nextchr);
2395 }
d6a28714 2396 }
ffc61ed2
JH
2397 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2398 OP(scan) == BOUNDL))
2399 sayNO;
a0ed51b3 2400 break;
d6a28714 2401 case SPACEL:
3280af22 2402 PL_reg_flags |= RF_tainted;
bbce6d69 2403 /* FALL THROUGH */
d6a28714 2404 case SPACE:
9442cb0e 2405 if (!nextchr)
4633a7c4 2406 sayNO;
1aa99e6b 2407 if (do_utf8) {
fd400ab9 2408 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2409 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2410 if (!(OP(scan) == SPACE
3568d838 2411 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2412 : isSPACE_LC_utf8((U8*)locinput)))
2413 {
2414 sayNO;
2415 }
2416 locinput += PL_utf8skip[nextchr];
2417 nextchr = UCHARAT(locinput);
2418 break;
d6a28714 2419 }
ffc61ed2
JH
2420 if (!(OP(scan) == SPACE
2421 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2422 sayNO;
2423 nextchr = UCHARAT(++locinput);
2424 }
2425 else {
2426 if (!(OP(scan) == SPACE
2427 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2428 sayNO;
2429 nextchr = UCHARAT(++locinput);
a0ed51b3 2430 }
a0ed51b3 2431 break;
d6a28714 2432 case NSPACEL:
3280af22 2433 PL_reg_flags |= RF_tainted;
bbce6d69 2434 /* FALL THROUGH */
d6a28714 2435 case NSPACE:
9442cb0e 2436 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2437 sayNO;
1aa99e6b 2438 if (do_utf8) {
8269fa76 2439 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2440 if (OP(scan) == NSPACE
3568d838 2441 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2442 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2443 {
2444 sayNO;
2445 }
2446 locinput += PL_utf8skip[nextchr];
2447 nextchr = UCHARAT(locinput);
2448 break;
a0ed51b3 2449 }
ffc61ed2 2450 if (OP(scan) == NSPACE
d6a28714 2451 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2452 sayNO;
76e3520e 2453 nextchr = UCHARAT(++locinput);
a0d0e21e 2454 break;
d6a28714 2455 case DIGITL:
a0ed51b3
LW
2456 PL_reg_flags |= RF_tainted;
2457 /* FALL THROUGH */
d6a28714 2458 case DIGIT:
9442cb0e 2459 if (!nextchr)
a0ed51b3 2460 sayNO;
1aa99e6b 2461 if (do_utf8) {
8269fa76 2462 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2463 if (!(OP(scan) == DIGIT
3568d838 2464 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2465 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2466 {
a0ed51b3 2467 sayNO;
dfe13c55 2468 }
6f06b55f 2469 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2470 nextchr = UCHARAT(locinput);
2471 break;
2472 }
ffc61ed2 2473 if (!(OP(scan) == DIGIT
9442cb0e 2474 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2475 sayNO;
2476 nextchr = UCHARAT(++locinput);
2477 break;
d6a28714 2478 case NDIGITL:
b8c5462f
JH
2479 PL_reg_flags |= RF_tainted;
2480 /* FALL THROUGH */
d6a28714 2481 case NDIGIT:
9442cb0e 2482 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2483 sayNO;
1aa99e6b 2484 if (do_utf8) {
8269fa76 2485 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2486 if (OP(scan) == NDIGIT
3568d838 2487 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2488 : isDIGIT_LC_utf8((U8*)locinput))
2489 {
a0ed51b3 2490 sayNO;
9442cb0e 2491 }
6f06b55f 2492 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2493 nextchr = UCHARAT(locinput);
2494 break;
2495 }
ffc61ed2 2496 if (OP(scan) == NDIGIT
9442cb0e 2497 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2498 sayNO;
2499 nextchr = UCHARAT(++locinput);
2500 break;
2501 case CLUMP:
8269fa76 2502 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2503 if (locinput >= PL_regeol ||
2504 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2505 sayNO;
6f06b55f 2506 locinput += PL_utf8skip[nextchr];
3568d838
JH
2507 while (locinput < PL_regeol &&
2508 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2509 locinput += UTF8SKIP(locinput);
2510 if (locinput > PL_regeol)
2511 sayNO;
2512 nextchr = UCHARAT(locinput);
2513 break;
c8756f30 2514 case REFFL:
3280af22 2515 PL_reg_flags |= RF_tainted;
c8756f30 2516 /* FALL THROUGH */
c277df42 2517 case REF:
c8756f30 2518 case REFF:
c277df42 2519 n = ARG(scan); /* which paren pair */
cf93c79d 2520 ln = PL_regstartp[n];
2c2d71f5 2521 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2522 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2523 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2524 if (ln == PL_regendp[n])
a0d0e21e 2525 break;
a0ed51b3 2526
cf93c79d 2527 s = PL_bostr + ln;
1aa99e6b 2528 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2529 char *l = locinput;
cf93c79d 2530 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2531 /*
2532 * Note that we can't do the "other character" lookup trick as
2533 * in the 8-bit case (no pun intended) because in Unicode we
2534 * have to map both upper and title case to lower case.
2535 */
2536 if (OP(scan) == REFF) {
a2a2844f
JH
2537 STRLEN ulen1, ulen2;
2538 U8 tmpbuf1[UTF8_MAXLEN*2+1];
2539 U8 tmpbuf2[UTF8_MAXLEN*2+1];
a0ed51b3
LW
2540 while (s < e) {
2541 if (l >= PL_regeol)
2542 sayNO;
a2a2844f
JH
2543 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2544 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2545 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2546 sayNO;
a2a2844f
JH
2547 s += ulen1;
2548 l += ulen2;
a0ed51b3
LW
2549 }
2550 }
2551 locinput = l;
2552 nextchr = UCHARAT(locinput);
2553 break;
2554 }
2555
a0d0e21e 2556 /* Inline the first character, for speed. */
76e3520e 2557 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2558 (OP(scan) == REF ||
2559 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2560 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2561 sayNO;
cf93c79d 2562 ln = PL_regendp[n] - ln;
3280af22 2563 if (locinput + ln > PL_regeol)
4633a7c4 2564 sayNO;
c8756f30
AK
2565 if (ln > 1 && (OP(scan) == REF
2566 ? memNE(s, locinput, ln)
2567 : (OP(scan) == REFF
2568 ? ibcmp(s, locinput, ln)
2569 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2570 sayNO;
a0d0e21e 2571 locinput += ln;
76e3520e 2572 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2573 break;
2574
2575 case NOTHING:
c277df42 2576 case TAIL:
a0d0e21e
LW
2577 break;
2578 case BACK:
2579 break;
c277df42
IZ
2580 case EVAL:
2581 {
2582 dSP;
533c011a 2583 OP_4tree *oop = PL_op;
3280af22
NIS
2584 COP *ocurcop = PL_curcop;
2585 SV **ocurpad = PL_curpad;
c277df42 2586 SV *ret;
9041c2e3 2587
c277df42 2588 n = ARG(scan);
533c011a 2589 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2590 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2591 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2592 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2593
8e5e9ebe
RGS
2594 {
2595 SV **before = SP;
2596 CALLRUNOPS(aTHX); /* Scalar context. */
2597 SPAGAIN;
2598 if (SP == before)
2599 ret = Nullsv; /* protect against empty (?{}) blocks. */
2600 else {
2601 ret = POPs;
2602 PUTBACK;
2603 }
2604 }
2605
0f5d15d6
IZ
2606 PL_op = oop;
2607 PL_curpad = ocurpad;
2608 PL_curcop = ocurcop;
c277df42 2609 if (logical) {
0f5d15d6
IZ
2610 if (logical == 2) { /* Postponed subexpression. */
2611 regexp *re;
22c35a8c 2612 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2613 re_cc_state state;
0f5d15d6
IZ
2614 CHECKPOINT cp, lastcp;
2615
2616 if(SvROK(ret) || SvRMAGICAL(ret)) {
2617 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2618
2619 if(SvMAGICAL(sv))
14befaf4 2620 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2621 }
2622 if (mg) {
2623 re = (regexp *)mg->mg_obj;
df0003d4 2624 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2625 }
2626 else {
2627 STRLEN len;
2628 char *t = SvPV(ret, len);
2629 PMOP pm;
2630 char *oprecomp = PL_regprecomp;
2631 I32 osize = PL_regsize;
2632 I32 onpar = PL_regnpar;
2633
5fcd1c1b 2634 Zero(&pm, 1, PMOP);
cea2e8a9 2635 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2636 if (!(SvFLAGS(ret)
0f5d15d6 2637 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2638 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2639 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2640 PL_regprecomp = oprecomp;
2641 PL_regsize = osize;
2642 PL_regnpar = onpar;
2643 }
2644 DEBUG_r(
9041c2e3 2645 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2646 "Entering embedded `%s%.60s%s%s'\n",
2647 PL_colors[0],
2648 re->precomp,
2649 PL_colors[1],
2650 (strlen(re->precomp) > 60 ? "..." : ""))
2651 );
2652 state.node = next;
2653 state.prev = PL_reg_call_cc;
2654 state.cc = PL_regcc;
2655 state.re = PL_reg_re;
2656
2ab05381 2657 PL_regcc = 0;
9041c2e3 2658
0f5d15d6 2659 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2660 REGCP_SET(lastcp);
0f5d15d6
IZ
2661 cache_re(re);
2662 state.ss = PL_savestack_ix;
2663 *PL_reglastparen = 0;
a01268b5 2664 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2665 PL_reg_call_cc = &state;
2666 PL_reginput = locinput;
2c2d71f5
JH
2667
2668 /* XXXX This is too dramatic a measure... */
2669 PL_reg_maxiter = 0;
2670
0f5d15d6 2671 if (regmatch(re->program + 1)) {
2c914db6
IZ
2672 /* Even though we succeeded, we need to restore
2673 global variables, since we may be wrapped inside
2674 SUSPEND, thus the match may be not finished yet. */
2675
2676 /* XXXX Do this only if SUSPENDed? */
2677 PL_reg_call_cc = state.prev;
2678 PL_regcc = state.cc;
2679 PL_reg_re = state.re;
2680 cache_re(PL_reg_re);
2681
2682 /* XXXX This is too dramatic a measure... */
2683 PL_reg_maxiter = 0;
2684
2685 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2686 ReREFCNT_dec(re);
2687 regcpblow(cp);
2688 sayYES;
2689 }
0f5d15d6 2690 ReREFCNT_dec(re);
02db2b7b 2691 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2692 regcppop();
2693 PL_reg_call_cc = state.prev;
2694 PL_regcc = state.cc;
2695 PL_reg_re = state.re;
d3790889 2696 cache_re(PL_reg_re);
2c2d71f5
JH
2697
2698 /* XXXX This is too dramatic a measure... */
2699 PL_reg_maxiter = 0;
2700
8e514ae6 2701 logical = 0;
0f5d15d6
IZ
2702 sayNO;
2703 }
c277df42 2704 sw = SvTRUE(ret);
0f5d15d6 2705 logical = 0;
a0ed51b3
LW
2706 }
2707 else
3280af22 2708 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2709 break;
2710 }
a0d0e21e 2711 case OPEN:
c277df42 2712 n = ARG(scan); /* which paren pair */
3280af22
NIS
2713 PL_reg_start_tmp[n] = locinput;
2714 if (n > PL_regsize)
2715 PL_regsize = n;
a0d0e21e
LW
2716 break;
2717 case CLOSE:
c277df42 2718 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2719 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2720 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2721 if (n > *PL_reglastparen)
2722 *PL_reglastparen = n;
a01268b5 2723 *PL_reglastcloseparen = n;
a0d0e21e 2724 break;
c277df42
IZ
2725 case GROUPP:
2726 n = ARG(scan); /* which paren pair */
cf93c79d 2727 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2728 break;
2729 case IFTHEN:
2c2d71f5 2730 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2731 if (sw)
2732 next = NEXTOPER(NEXTOPER(scan));
2733 else {
2734 next = scan + ARG(scan);
2735 if (OP(next) == IFTHEN) /* Fake one. */
2736 next = NEXTOPER(NEXTOPER(next));
2737 }
2738 break;
2739 case LOGICAL:
0f5d15d6 2740 logical = scan->flags;
c277df42 2741 break;
2ab05381
IZ
2742/*******************************************************************
2743 PL_regcc contains infoblock about the innermost (...)* loop, and
2744 a pointer to the next outer infoblock.
2745
2746 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2747
2748 1) After matching X, regnode for CURLYX is processed;
2749
9041c2e3 2750 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2751 regmatch() recursively with the starting point at WHILEM node;
2752
2753 3) Each hit of WHILEM node tries to match A and Z (in the order
2754 depending on the current iteration, min/max of {min,max} and
2755 greediness). The information about where are nodes for "A"
2756 and "Z" is read from the infoblock, as is info on how many times "A"
2757 was already matched, and greediness.
2758
2759 4) After A matches, the same WHILEM node is hit again.
2760
2761 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2762 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2763 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2764 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2765 of the external loop.
2766
2767 Currently present infoblocks form a tree with a stem formed by PL_curcc
2768 and whatever it mentions via ->next, and additional attached trees
2769 corresponding to temporarily unset infoblocks as in "5" above.
2770
9041c2e3 2771 In the following picture infoblocks for outer loop of
2ab05381
IZ
2772 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2773 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2774 infoblocks are drawn below the "reset" infoblock.
2775
2776 In fact in the picture below we do not show failed matches for Z and T
2777 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2778 more obvious *why* one needs to *temporary* unset infoblocks.]
2779
2780 Matched REx position InfoBlocks Comment
2781 (Y(A)*?Z)*?T x
2782 Y(A)*?Z)*?T x <- O
2783 Y (A)*?Z)*?T x <- O
2784 Y A)*?Z)*?T x <- O <- I
2785 YA )*?Z)*?T x <- O <- I
2786 YA A)*?Z)*?T x <- O <- I
2787 YAA )*?Z)*?T x <- O <- I
2788 YAA Z)*?T x <- O # Temporary unset I
2789 I
2790
2791 YAAZ Y(A)*?Z)*?T x <- O
2792 I
2793
2794 YAAZY (A)*?Z)*?T x <- O
2795 I
2796
2797 YAAZY A)*?Z)*?T x <- O <- I
2798 I
2799
2800 YAAZYA )*?Z)*?T x <- O <- I
2801 I
2802
2803 YAAZYA Z)*?T x <- O # Temporary unset I
2804 I,I
2805
2806 YAAZYAZ )*?T x <- O
2807 I,I
2808
2809 YAAZYAZ T x # Temporary unset O
2810 O
2811 I,I
2812
2813 YAAZYAZT x
2814 O
2815 I,I
2816 *******************************************************************/
a0d0e21e
LW
2817 case CURLYX: {
2818 CURCUR cc;
3280af22 2819 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2820 /* No need to save/restore up to this paren */
2821 I32 parenfloor = scan->flags;
c277df42
IZ
2822
2823 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2824 next += ARG(next);
3280af22
NIS
2825 cc.oldcc = PL_regcc;
2826 PL_regcc = &cc;
cb434fcc
IZ
2827 /* XXXX Probably it is better to teach regpush to support
2828 parenfloor > PL_regsize... */
2829 if (parenfloor > *PL_reglastparen)
2830 parenfloor = *PL_reglastparen; /* Pessimization... */
2831 cc.parenfloor = parenfloor;
a0d0e21e
LW
2832 cc.cur = -1;
2833 cc.min = ARG1(scan);
2834 cc.max = ARG2(scan);
c277df42 2835 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2836 cc.next = next;
2837 cc.minmod = minmod;
2838 cc.lastloc = 0;
3280af22 2839 PL_reginput = locinput;
a0d0e21e
LW
2840 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2841 regcpblow(cp);
3280af22 2842 PL_regcc = cc.oldcc;
4633a7c4 2843 saySAME(n);
a0d0e21e
LW
2844 }
2845 /* NOT REACHED */
2846 case WHILEM: {
2847 /*
2848 * This is really hard to understand, because after we match
2849 * what we're trying to match, we must make sure the rest of
2c2d71f5 2850 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2851 * to go back UP the parse tree by recursing ever deeper. And
2852 * if it fails, we have to reset our parent's current state
2853 * that we can try again after backing off.
2854 */
2855
c277df42 2856 CHECKPOINT cp, lastcp;
3280af22 2857 CURCUR* cc = PL_regcc;
c277df42
IZ
2858 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2859
4633a7c4 2860 n = cc->cur + 1; /* how many we know we matched */
3280af22 2861 PL_reginput = locinput;
a0d0e21e 2862
c277df42 2863 DEBUG_r(
9041c2e3
NIS
2864 PerlIO_printf(Perl_debug_log,
2865 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2866 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2867 (long)n, (long)cc->min,
c277df42
IZ
2868 (long)cc->max, (long)cc)
2869 );
4633a7c4 2870
a0d0e21e
LW
2871 /* If degenerate scan matches "", assume scan done. */
2872
579cf2c3 2873 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2874 PL_regcc = cc->oldcc;
2ab05381
IZ
2875 if (PL_regcc)
2876 ln = PL_regcc->cur;
c277df42 2877 DEBUG_r(
c3464db5
DD
2878 PerlIO_printf(Perl_debug_log,
2879 "%*s empty match detected, try continuation...\n",
3280af22 2880 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2881 );
a0d0e21e 2882 if (regmatch(cc->next))
4633a7c4 2883 sayYES;
2ab05381
IZ
2884 if (PL_regcc)
2885 PL_regcc->cur = ln;
3280af22 2886 PL_regcc = cc;
4633a7c4 2887 sayNO;
a0d0e21e
LW
2888 }
2889
2890 /* First just match a string of min scans. */
2891
2892 if (n < cc->min) {
2893 cc->cur = n;
2894 cc->lastloc = locinput;
4633a7c4
LW
2895 if (regmatch(cc->scan))
2896 sayYES;
2897 cc->cur = n - 1;
c277df42 2898 cc->lastloc = lastloc;
4633a7c4 2899 sayNO;
a0d0e21e
LW
2900 }
2901
2c2d71f5
JH
2902 if (scan->flags) {
2903 /* Check whether we already were at this position.
2904 Postpone detection until we know the match is not
2905 *that* much linear. */
2906 if (!PL_reg_maxiter) {
2907 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2908 PL_reg_leftiter = PL_reg_maxiter;
2909 }
2910 if (PL_reg_leftiter-- == 0) {
2911 I32 size = (PL_reg_maxiter + 7)/8;
2912 if (PL_reg_poscache) {
2913 if (PL_reg_poscache_size < size) {
2914 Renew(PL_reg_poscache, size, char);
2915 PL_reg_poscache_size = size;
2916 }
2917 Zero(PL_reg_poscache, size, char);
2918 }
2919 else {
2920 PL_reg_poscache_size = size;
2921 Newz(29, PL_reg_poscache, size, char);
2922 }
2923 DEBUG_r(
2924 PerlIO_printf(Perl_debug_log,
2925 "%sDetected a super-linear match, switching on caching%s...\n",
2926 PL_colors[4], PL_colors[5])
2927 );
2928 }
2929 if (PL_reg_leftiter < 0) {
2930 I32 o = locinput - PL_bostr, b;
2931
2932 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2933 b = o % 8;
2934 o /= 8;
2935 if (PL_reg_poscache[o] & (1<<b)) {
2936 DEBUG_r(
2937 PerlIO_printf(Perl_debug_log,
2938 "%*s already tried at this position...\n",
2939 REPORT_CODE_OFF+PL_regindent*2, "")
2940 );
7821416a 2941 sayNO_SILENT;
2c2d71f5
JH
2942 }
2943 PL_reg_poscache[o] |= (1<<b);
2944 }
2945 }
2946
a0d0e21e
LW
2947 /* Prefer next over scan for minimal matching. */
2948
2949 if (cc->minmod) {
3280af22 2950 PL_regcc = cc->oldcc;
2ab05381
IZ
2951 if (PL_regcc)
2952 ln = PL_regcc->cur;
5f05dabc 2953 cp = regcppush(cc->parenfloor);
02db2b7b 2954 REGCP_SET(lastcp);
5f05dabc 2955 if (regmatch(cc->next)) {
c277df42 2956 regcpblow(cp);
4633a7c4 2957 sayYES; /* All done. */
5f05dabc 2958 }
02db2b7b 2959 REGCP_UNWIND(lastcp);
5f05dabc 2960 regcppop();
2ab05381
IZ
2961 if (PL_regcc)
2962 PL_regcc->cur = ln;
3280af22 2963 PL_regcc = cc;
a0d0e21e 2964
c277df42 2965 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2966 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2967 && !(PL_reg_flags & RF_warned)) {
2968 PL_reg_flags |= RF_warned;
e476b1b5 2969 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2970 "Complex regular subexpression recursion",
2971 REG_INFTY - 1);
c277df42 2972 }
4633a7c4 2973 sayNO;
c277df42 2974 }
a687059c 2975
c277df42 2976 DEBUG_r(
c3464db5
DD
2977 PerlIO_printf(Perl_debug_log,
2978 "%*s trying longer...\n",
3280af22 2979 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2980 );
a0d0e21e 2981 /* Try scanning more and see if it helps. */
3280af22 2982 PL_reginput = locinput;
a0d0e21e
LW
2983 cc->cur = n;
2984 cc->lastloc = locinput;
5f05dabc 2985 cp = regcppush(cc->parenfloor);
02db2b7b 2986 REGCP_SET(lastcp);
5f05dabc 2987 if (regmatch(cc->scan)) {
c277df42 2988 regcpblow(cp);
4633a7c4 2989 sayYES;
5f05dabc 2990 }
02db2b7b 2991 REGCP_UNWIND(lastcp);
5f05dabc 2992 regcppop();
4633a7c4 2993 cc->cur = n - 1;
c277df42 2994 cc->lastloc = lastloc;
4633a7c4 2995 sayNO;
a0d0e21e
LW
2996 }
2997
2998 /* Prefer scan over next for maximal matching. */
2999
3000 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3001 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3002 cc->cur = n;
3003 cc->lastloc = locinput;
02db2b7b 3004 REGCP_SET(lastcp);
5f05dabc 3005 if (regmatch(cc->scan)) {
c277df42 3006 regcpblow(cp);
4633a7c4 3007 sayYES;
5f05dabc 3008 }
02db2b7b 3009 REGCP_UNWIND(lastcp);
a0d0e21e 3010 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3011 PL_reginput = locinput;
c277df42 3012 DEBUG_r(
c3464db5
DD
3013 PerlIO_printf(Perl_debug_log,
3014 "%*s failed, try continuation...\n",
3280af22 3015 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3016 );
3017 }
9041c2e3 3018 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3019 && !(PL_reg_flags & RF_warned)) {
3280af22 3020 PL_reg_flags |= RF_warned;
e476b1b5 3021 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3022 "Complex regular subexpression recursion",
3023 REG_INFTY - 1);
a0d0e21e
LW
3024 }
3025
3026 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3027 PL_regcc = cc->oldcc;
2ab05381
IZ
3028 if (PL_regcc)
3029 ln = PL_regcc->cur;
a0d0e21e 3030 if (regmatch(cc->next))
4633a7c4 3031 sayYES;
2ab05381
IZ
3032 if (PL_regcc)
3033 PL_regcc->cur = ln;
3280af22 3034 PL_regcc = cc;
4633a7c4 3035 cc->cur = n - 1;
c277df42 3036 cc->lastloc = lastloc;
4633a7c4 3037 sayNO;
a0d0e21e
LW
3038 }
3039 /* NOT REACHED */
9041c2e3 3040 case BRANCHJ:
c277df42
IZ
3041 next = scan + ARG(scan);
3042 if (next == scan)
3043 next = NULL;
3044 inner = NEXTOPER(NEXTOPER(scan));
3045 goto do_branch;
9041c2e3 3046 case BRANCH:
c277df42
IZ
3047 inner = NEXTOPER(scan);
3048 do_branch:
3049 {
c277df42
IZ
3050 c1 = OP(scan);
3051 if (OP(next) != c1) /* No choice. */
3052 next = inner; /* Avoid recursion. */
a0d0e21e 3053 else {
02db2b7b
IZ
3054 I32 lastparen = *PL_reglastparen;
3055 I32 unwind1;
3056 re_unwind_branch_t *uw;
3057
3058 /* Put unwinding data on stack */
3059 unwind1 = SSNEWt(1,re_unwind_branch_t);
3060 uw = SSPTRt(unwind1,re_unwind_branch_t);
3061 uw->prev = unwind;
3062 unwind = unwind1;
3063 uw->type = ((c1 == BRANCH)
3064 ? RE_UNWIND_BRANCH
3065 : RE_UNWIND_BRANCHJ);
3066 uw->lastparen = lastparen;
3067 uw->next = next;
3068 uw->locinput = locinput;
3069 uw->nextchr = nextchr;
3070#ifdef DEBUGGING
3071 uw->regindent = ++PL_regindent;
3072#endif
c277df42 3073
02db2b7b
IZ
3074 REGCP_SET(uw->lastcp);
3075
3076 /* Now go into the first branch */
3077 next = inner;
a687059c 3078 }
a0d0e21e
LW
3079 }
3080 break;
3081 case MINMOD:
3082 minmod = 1;
3083 break;
c277df42
IZ
3084 case CURLYM:
3085 {
00db4c45 3086 I32 l = 0;
c277df42 3087 CHECKPOINT lastcp;
9041c2e3 3088
c277df42
IZ
3089 /* We suppose that the next guy does not need
3090 backtracking: in particular, it is of constant length,
3091 and has no parenths to influence future backrefs. */
3092 ln = ARG1(scan); /* min to match */
3093 n = ARG2(scan); /* max to match */
c277df42
IZ
3094 paren = scan->flags;
3095 if (paren) {
3280af22
NIS
3096 if (paren > PL_regsize)
3097 PL_regsize = paren;
3098 if (paren > *PL_reglastparen)
3099 *PL_reglastparen = paren;
c277df42 3100 }
dc45a647 3101 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3102 if (paren)
3103 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3104 PL_reginput = locinput;
c277df42
IZ
3105 if (minmod) {
3106 minmod = 0;
3107 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3108 sayNO;
f31a99c8
HS
3109 /* if we matched something zero-length we don't need to
3110 backtrack - capturing parens are already defined, so
3111 the caveat in the maximal case doesn't apply
3112
3113 XXXX if ln == 0, we can redo this check first time
3114 through the following loop
3115 */
3116 if (ln && l == 0)
3117 n = ln; /* don't backtrack */
3280af22 3118 locinput = PL_reginput;
e2d8ce26 3119 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3120 regnode *text_node = next;
3121
3122 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3123 NEXT_IMPT(text_node);
5f80c4cf
JP
3124
3125 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3126 c1 = c2 = -1000;
3127 }
3128 else {
3129 c1 = (U8)*STRING(text_node);
3130 if (OP(next) == EXACTF)
3131 c2 = PL_fold[c1];
3132 else if (OP(text_node) == EXACTFL)
3133 c2 = PL_fold_locale[c1];
3134 else
3135 c2 = c1;
3136 }
a0ed51b3
LW
3137 }
3138 else
c277df42 3139 c1 = c2 = -1000;
02db2b7b 3140 REGCP_SET(lastcp);
5f4b28b2 3141 /* This may be improved if l == 0. */
c277df42
IZ
3142 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3143 /* If it could work, try it. */
3144 if (c1 == -1000 ||
3280af22
NIS
3145 UCHARAT(PL_reginput) == c1 ||
3146 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3147 {
3148 if (paren) {
f31a99c8 3149 if (ln) {
cf93c79d
IZ
3150 PL_regstartp[paren] =
3151 HOPc(PL_reginput, -l) - PL_bostr;
3152 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3153 }
3154 else
cf93c79d 3155 PL_regendp[paren] = -1;
c277df42
IZ
3156 }
3157 if (regmatch(next))
3158 sayYES;
02db2b7b 3159 REGCP_UNWIND(lastcp);
c277df42
IZ
3160 }
3161 /* Couldn't or didn't -- move forward. */
3280af22 3162 PL_reginput = locinput;
c277df42
IZ
3163 if (regrepeat_hard(scan, 1, &l)) {
3164 ln++;
3280af22 3165 locinput = PL_reginput;
c277df42
IZ
3166 }
3167 else
3168 sayNO;
3169 }
a0ed51b3
LW
3170 }
3171 else {
c277df42 3172 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3173 /* if we matched something zero-length we don't need to
3174 backtrack, unless the minimum count is zero and we
3175 are capturing the result - in that case the capture
3176 being defined or not may affect later execution
3177 */
3178 if (n != 0 && l == 0 && !(paren && ln == 0))
3179 ln = n; /* don't backtrack */
3280af22 3180 locinput = PL_reginput;
c277df42 3181 DEBUG_r(
5c0ca799 3182 PerlIO_printf(Perl_debug_log,
faccc32b 3183 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3184 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3185 (IV) n, (IV)l)
c277df42
IZ
3186 );
3187 if (n >= ln) {
e2d8ce26 3188 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3189 regnode *text_node = next;
3190
3191 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3192 NEXT_IMPT(text_node);
5f80c4cf
JP
3193
3194 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3195 c1 = c2 = -1000;
3196 }
3197 else {
3198 c1 = (U8)*STRING(text_node);
3199 if (OP(text_node) == EXACTF)
3200 c2 = PL_fold[c1];
3201 else if (OP(text_node) == EXACTFL)
3202 c2 = PL_fold_locale[c1];
3203 else
3204 c2 = c1;
3205 }
a0ed51b3
LW
3206 }
3207 else
c277df42
IZ
3208 c1 = c2 = -1000;
3209 }
02db2b7b 3210 REGCP_SET(lastcp);
c277df42
IZ
3211 while (n >= ln) {
3212 /* If it could work, try it. */
3213 if (c1 == -1000 ||
3280af22
NIS
3214 UCHARAT(PL_reginput) == c1 ||
3215 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3216 {
3217 DEBUG_r(
c3464db5 3218 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3219 "%*s trying tail with n=%"IVdf"...\n",
3220 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3221 );
3222 if (paren) {
3223 if (n) {
cf93c79d
IZ
3224 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3225 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3226 }
a0ed51b3 3227 else
cf93c79d 3228 PL_regendp[paren] = -1;
c277df42 3229 }
a0ed51b3
LW
3230 if (regmatch(next))
3231 sayYES;
02db2b7b 3232 REGCP_UNWIND(lastcp);
a0ed51b3 3233 }
c277df42
IZ
3234 /* Couldn't or didn't -- back up. */
3235 n--;
dfe13c55 3236 locinput = HOPc(locinput, -l);
3280af22 3237 PL_reginput = locinput;
c277df42
IZ
3238 }
3239 }
3240 sayNO;
3241 break;
3242 }
3243 case CURLYN:
3244 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3245 if (paren > PL_regsize)
3246 PL_regsize = paren;
3247 if (paren > *PL_reglastparen)
3248 *PL_reglastparen = paren;
c277df42
IZ
3249 ln = ARG1(scan); /* min to match */
3250 n = ARG2(scan); /* max to match */
dc45a647 3251 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3252 goto repeat;
a0d0e21e 3253 case CURLY:
c277df42 3254 paren = 0;
a0d0e21e
LW
3255 ln = ARG1(scan); /* min to match */
3256 n = ARG2(scan); /* max to match */
dc45a647 3257 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3258 goto repeat;
3259 case STAR:
3260 ln = 0;
c277df42 3261 n = REG_INFTY;
a0d0e21e 3262 scan = NEXTOPER(scan);
c277df42 3263 paren = 0;
a0d0e21e
LW
3264 goto repeat;
3265 case PLUS:
c277df42
IZ
3266 ln = 1;
3267 n = REG_INFTY;
3268 scan = NEXTOPER(scan);
3269 paren = 0;
3270 repeat:
a0d0e21e
LW
3271 /*
3272 * Lookahead to avoid useless match attempts
3273 * when we know what character comes next.
3274 */
5f80c4cf
JP
3275
3276 /*
3277 * Used to only do .*x and .*?x, but now it allows
3278 * for )'s, ('s and (?{ ... })'s to be in the way
3279 * of the quantifier and the EXACT-like node. -- japhy
3280 */
3281
e2d8ce26 3282 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3283 U8 *s;
3284 regnode *text_node = next;
3285
3286 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3287 NEXT_IMPT(text_node);
5f80c4cf
JP
3288
3289 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3290 c1 = c2 = -1000;
3291 }
3292 else {
3293 s = (U8*)STRING(text_node);
3294
3295 if (!UTF) {
3296 c2 = c1 = *s;
3297 if (OP(text_node) == EXACTF)
3298 c2 = PL_fold[c1];
3299 else if (OP(text_node) == EXACTFL)
3300 c2 = PL_fold_locale[c1];
1aa99e6b 3301 }
5f80c4cf
JP
3302 else { /* UTF */
3303 if (OP(text_node) == EXACTF) {
a2a2844f
JH
3304 STRLEN ulen1, ulen2;
3305 U8 tmpbuf1[UTF8_MAXLEN*2+1];
3306 U8 tmpbuf2[UTF8_MAXLEN*2+1];
3307
3308 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3309 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3310
3311 c1 = utf8_to_uvuni(tmpbuf1, 0);
3312 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3313 }
3314 else {
3315 c2 = c1 = utf8_to_uvchr(s, NULL);
3316 }
1aa99e6b
IH
3317 }
3318 }
bbce6d69 3319 }
a0d0e21e 3320 else
bbce6d69 3321 c1 = c2 = -1000;
3280af22 3322 PL_reginput = locinput;
a0d0e21e 3323 if (minmod) {
c277df42 3324 CHECKPOINT lastcp;
a0d0e21e
LW
3325 minmod = 0;
3326 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3327 sayNO;
a0ed51b3 3328 locinput = PL_reginput;
02db2b7b 3329 REGCP_SET(lastcp);
0fe9bf95 3330 if (c1 != -1000) {
1aa99e6b 3331 char *e; /* Should not check after this */
0fe9bf95
IZ
3332 char *old = locinput;
3333
1aa99e6b 3334 if (n == REG_INFTY) {
0fe9bf95 3335 e = PL_regeol - 1;
1aa99e6b
IH
3336 if (do_utf8)
3337 while (UTF8_IS_CONTINUATION(*(U8*)e))
3338 e--;
3339 }
3340 else if (do_utf8) {
3341 int m = n - ln;
3342 for (e = locinput;
3343 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3344 e += UTF8SKIP(e);
3345 }
3346 else {
3347 e = locinput + n - ln;
3348 if (e >= PL_regeol)
3349 e = PL_regeol - 1;
3350 }
0fe9bf95 3351 while (1) {
1aa99e6b 3352 int count;
0fe9bf95 3353 /* Find place 'next' could work */
1aa99e6b
IH
3354 if (!do_utf8) {
3355 if (c1 == c2) {
a8e8ab15
JH
3356 while (locinput <= e &&
3357 UCHARAT(locinput) != c1)
1aa99e6b
IH
3358 locinput++;
3359 } else {
9041c2e3 3360 while (locinput <= e
a8e8ab15
JH
3361 && UCHARAT(locinput) != c1
3362 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3363 locinput++;
3364 }
3365 count = locinput - old;
3366 }
3367 else {
3368 STRLEN len;
3369 if (c1 == c2) {
3370 for (count = 0;
3371 locinput <= e &&
9041c2e3 3372 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3373 count++)
3374 locinput += len;
3375
3376 } else {
3377 for (count = 0; locinput <= e; count++) {
9041c2e3 3378 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3379 if (c == c1 || c == c2)
3380 break;
9041c2e3 3381 locinput += len;
1aa99e6b
IH
3382 }
3383 }
0fe9bf95 3384 }
9041c2e3 3385 if (locinput > e)
0fe9bf95
IZ
3386 sayNO;
3387 /* PL_reginput == old now */
3388 if (locinput != old) {
3389 ln = 1; /* Did some */
1aa99e6b 3390 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3391 sayNO;
3392 }
3393 /* PL_reginput == locinput now */
29d1e993 3394 TRYPAREN(paren, ln, locinput);
0fe9bf95 3395 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3396 REGCP_UNWIND(lastcp);
0fe9bf95 3397 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3398 old = locinput;
3399 if (do_utf8)
3400 locinput += UTF8SKIP(locinput);
3401 else
3402 locinput++;
0fe9bf95
IZ
3403 }
3404 }
3405 else
c277df42 3406 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3407 UV c;
3408 if (c1 != -1000) {
3409 if (do_utf8)
9041c2e3 3410 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3411 else
9041c2e3 3412 c = UCHARAT(PL_reginput);
2390ecbc
PP
3413 /* If it could work, try it. */
3414 if (c == c1 || c == c2)
3415 {
3416 TRYPAREN(paren, n, PL_reginput);
3417 REGCP_UNWIND(lastcp);
3418 }
1aa99e6b 3419 }
a0d0e21e 3420 /* If it could work, try it. */
2390ecbc 3421 else if (c1 == -1000)
bbce6d69 3422 {
29d1e993 3423 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3424 REGCP_UNWIND(lastcp);
bbce6d69 3425 }
c277df42 3426 /* Couldn't or didn't -- move forward. */
a0ed51b3 3427 PL_reginput = locinput;
a0d0e21e
LW
3428 if (regrepeat(scan, 1)) {
3429 ln++;
a0ed51b3
LW
3430 locinput = PL_reginput;
3431 }
3432 else
4633a7c4 3433 sayNO;
a0d0e21e
LW
3434 }
3435 }
3436 else {
c277df42 3437 CHECKPOINT lastcp;
a0d0e21e 3438 n = regrepeat(scan, n);
a0ed51b3 3439 locinput = PL_reginput;
22c35a8c 3440 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3441 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3442 ln = n; /* why back off? */
1aeab75a
GS
3443 /* ...because $ and \Z can match before *and* after
3444 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3445 We should back off by one in this case. */
3446 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3447 ln--;
3448 }
02db2b7b 3449 REGCP_SET(lastcp);
c277df42 3450 if (paren) {
8fa7f367 3451 UV c = 0;
c277df42 3452 while (n >= ln) {
1aa99e6b
IH
3453 if (c1 != -1000) {
3454 if (do_utf8)
9041c2e3 3455 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3456 else
9041c2e3 3457 c = UCHARAT(PL_reginput);
1aa99e6b 3458 }
c277df42 3459 /* If it could work, try it. */
1aa99e6b 3460 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3461 {
29d1e993 3462 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3463 REGCP_UNWIND(lastcp);
c277df42
IZ
3464 }
3465 /* Couldn't or didn't -- back up. */
3466 n--;
dfe13c55 3467 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3468 }
a0ed51b3
LW
3469 }
3470 else {
8fa7f367 3471 UV c = 0;
c277df42 3472 while (n >= ln) {
1aa99e6b
IH
3473 if (c1 != -1000) {
3474 if (do_utf8)
9041c2e3 3475 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3476 else
9041c2e3 3477 c = UCHARAT(PL_reginput);
1aa99e6b 3478 }
c277df42 3479 /* If it could work, try it. */
1aa99e6b 3480 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3481 {
29d1e993 3482 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3483 REGCP_UNWIND(lastcp);
c277df42
IZ
3484 }
3485 /* Couldn't or didn't -- back up. */
3486 n--;
dfe13c55 3487 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3488 }
a0d0e21e
LW
3489 }
3490 }
4633a7c4 3491 sayNO;
c277df42 3492 break;
a0d0e21e 3493 case END:
0f5d15d6
IZ
3494 if (PL_reg_call_cc) {
3495 re_cc_state *cur_call_cc = PL_reg_call_cc;
3496 CURCUR *cctmp = PL_regcc;
3497 regexp *re = PL_reg_re;
3498 CHECKPOINT cp, lastcp;
3499
3500 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3501 REGCP_SET(lastcp);
0f5d15d6
IZ
3502 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3503 the caller. */
3504 PL_reginput = locinput; /* Make position available to
3505 the callcc. */
3506 cache_re(PL_reg_call_cc->re);
3507 PL_regcc = PL_reg_call_cc->cc;
3508 PL_reg_call_cc = PL_reg_call_cc->prev;
3509 if (regmatch(cur_call_cc->node)) {
3510 PL_reg_call_cc = cur_call_cc;
3511 regcpblow(cp);
3512 sayYES;
3513 }
02db2b7b 3514 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3515 regcppop();
3516 PL_reg_call_cc = cur_call_cc;
3517 PL_regcc = cctmp;
3518 PL_reg_re = re;
3519 cache_re(re);
3520
3521 DEBUG_r(
3522 PerlIO_printf(Perl_debug_log,
3523 "%*s continuation failed...\n",
3524 REPORT_CODE_OFF+PL_regindent*2, "")
3525 );
7821416a 3526 sayNO_SILENT;
0f5d15d6 3527 }
7821416a
IZ
3528 if (locinput < PL_regtill) {
3529 DEBUG_r(PerlIO_printf(Perl_debug_log,
3530 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3531 PL_colors[4],
3532 (long)(locinput - PL_reg_starttry),
3533 (long)(PL_regtill - PL_reg_starttry),
3534 PL_colors[5]));
3535 sayNO_FINAL; /* Cannot match: too short. */
3536 }
3537 PL_reginput = locinput; /* put where regtry can find it */
3538 sayYES_FINAL; /* Success! */
7e5428c5 3539 case SUCCEED:
3280af22 3540 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3541 sayYES_LOUD; /* Success! */
c277df42
IZ
3542 case SUSPEND:
3543 n = 1;
9fe1d20c 3544 PL_reginput = locinput;
9041c2e3 3545 goto do_ifmatch;
a0d0e21e 3546 case UNLESSM:
c277df42 3547 n = 0;
a0ed51b3 3548 if (scan->flags) {
efb30f32
HS
3549 s = HOPBACKc(locinput, scan->flags);
3550 if (!s)
3551 goto say_yes;
3552 PL_reginput = s;
a0ed51b3
LW
3553 }
3554 else
3555 PL_reginput = locinput;
c277df42
IZ
3556 goto do_ifmatch;
3557 case IFMATCH:
3558 n = 1;
a0ed51b3 3559 if (scan->flags) {
efb30f32
HS
3560 s = HOPBACKc(locinput, scan->flags);
3561 if (!s)
3562 goto say_no;
3563 PL_reginput = s;
a0ed51b3
LW
3564 }
3565 else
3566 PL_reginput = locinput;
3567
c277df42 3568 do_ifmatch:
c277df42
IZ
3569 inner = NEXTOPER(NEXTOPER(scan));
3570 if (regmatch(inner) != n) {
3571 say_no:
3572 if (logical) {
3573 logical = 0;
3574 sw = 0;
3575 goto do_longjump;
a0ed51b3
LW
3576 }
3577 else
c277df42
IZ
3578 sayNO;
3579 }
3580 say_yes:
3581 if (logical) {
3582 logical = 0;
3583 sw = 1;
3584 }
fe44a5e8 3585 if (OP(scan) == SUSPEND) {
3280af22 3586 locinput = PL_reginput;
565764a8 3587 nextchr = UCHARAT(locinput);
fe44a5e8 3588 }
c277df42
IZ
3589 /* FALL THROUGH. */
3590 case LONGJMP:
3591 do_longjump:
3592 next = scan + ARG(scan);
3593 if (next == scan)
3594 next = NULL;