This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More UTF8 EXACT fixing.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
2a782b5b 386 SV *dsv = sv_2mortal(newSVpvn("", 0));
30944b6d 387#endif
cad2e5aa 388
2a782b5b 389 DEBUG_r({
40eddc46
JH
390 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
391 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
392 if (!PL_colorset)
393 reginitcolors();
394 PerlIO_printf(Perl_debug_log,
395 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
396 PL_colors[4],PL_colors[5],PL_colors[0],
397 prog->precomp,
398 PL_colors[1],
399 (strlen(prog->precomp) > 60 ? "..." : ""),
400 PL_colors[0],
401 (int)(len > 60 ? 60 : len),
402 s, PL_colors[1],
403 (len > 60 ? "..." : "")
404 );
405 });
cad2e5aa 406
1aa99e6b
IH
407 if (prog->reganch & ROPT_UTF8)
408 PL_reg_flags |= RF_utf8;
409
410 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 411 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 412 goto fail;
2c2d71f5 413 }
a1933d95 414 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 415 PL_regeol = strend;
653099ff 416 check = prog->check_substr;
2c2d71f5 417 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
418 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
419 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 420 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 421
7e25d62c
JH
422 if (!ml_anch) {
423 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
424 | ROPT_IMPLICIT)) /* not a real BOL */
425 /* SvCUR is not set on references: SvRV and SvPVX overlap */
426 && sv && !SvROK(sv)
427 && (strpos != strbeg)) {
428 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
429 goto fail;
430 }
431 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 432 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 433 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
434 I32 slen;
435
1aa99e6b 436 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
437 if (SvTAIL(check)) {
438 slen = SvCUR(check); /* >= 1 */
cad2e5aa 439
9041c2e3 440 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
441 || (strend - s == slen && strend[-1] != '\n')) {
442 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
443 goto fail_finish;
cad2e5aa
JH
444 }
445 /* Now should match s[0..slen-2] */
446 slen--;
653099ff 447 if (slen && (*SvPVX(check) != *s
cad2e5aa 448 || (slen > 1
653099ff 449 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
450 report_neq:
451 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
452 goto fail_finish;
453 }
cad2e5aa 454 }
653099ff
GS
455 else if (*SvPVX(check) != *s
456 || ((slen = SvCUR(check)) > 1
457 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
458 goto report_neq;
459 goto success_at_start;
7e25d62c 460 }
cad2e5aa 461 }
2c2d71f5 462 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 463 s = strpos;
2c2d71f5 464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 465 end_shift = prog->minlen - start_shift -
653099ff 466 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 467 if (!ml_anch) {
653099ff
GS
468 I32 end = prog->check_offset_max + CHR_SVLEN(check)
469 - (SvTAIL(check) != 0);
1aa99e6b 470 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
471
472 if (end_shift < eshift)
473 end_shift = eshift;
474 }
cad2e5aa 475 }
2c2d71f5 476 else { /* Can match at random position */
cad2e5aa
JH
477 ml_anch = 0;
478 s = strpos;
2c2d71f5
JH
479 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
480 /* Should be nonnegative! */
481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
483 }
484
2c2d71f5 485#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 486 if (end_shift < 0)
6bbae5e6 487 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
488#endif
489
2c2d71f5
JH
490 restart:
491 /* Find a possible match in the region s..strend by looking for
492 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 493 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
494 I32 p = -1; /* Internal iterator of scream. */
495 I32 *pp = data ? data->scream_pos : &p;
496
2c2d71f5
JH
497 if (PL_screamfirst[BmRARE(check)] >= 0
498 || ( BmRARE(check) == '\n'
499 && (BmPREVIOUS(check) == SvCUR(check) - 1)
500 && SvTAIL(check) ))
9041c2e3 501 s = screaminstr(sv, check,
2c2d71f5 502 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 503 else
2c2d71f5 504 goto fail_finish;
cad2e5aa
JH
505 if (data)
506 *data->scream_olds = s;
507 }
f33976b4 508 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
509 s = fbm_instr((U8*)(s + start_shift),
510 (U8*)(strend - end_shift),
511 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 512 else
1aa99e6b
IH
513 s = fbm_instr(HOP3(s, start_shift, strend),
514 HOP3(strend, -end_shift, strbeg),
2c2d71f5 515 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
516
517 /* Update the count-of-usability, remove useless subpatterns,
518 unshift s. */
2c2d71f5
JH
519
520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
521 (s ? "Found" : "Did not find"),
522 ((check == prog->anchored_substr) ? "anchored" : "floating"),
523 PL_colors[0],
7b0972df
JH
524 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
525 SvPVX(check),
2c2d71f5
JH
526 PL_colors[1], (SvTAIL(check) ? "$" : ""),
527 (s ? " at offset " : "...\n") ) );
528
529 if (!s)
530 goto fail_finish;
531
6eb5f6b9
JH
532 check_at = s;
533
2c2d71f5 534 /* Finish the diagnostic message */
30944b6d 535 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
536
537 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
538 Start with the other substr.
539 XXXX no SCREAM optimization yet - and a very coarse implementation
540 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
541 *always* match. Probably should be marked during compile...
542 Probably it is right to do no SCREAM here...
543 */
544
545 if (prog->float_substr && prog->anchored_substr) {
30944b6d 546 /* Take into account the "other" substring. */
2c2d71f5
JH
547 /* XXXX May be hopelessly wrong for UTF... */
548 if (!other_last)
6eb5f6b9 549 other_last = strpos;
2c2d71f5 550 if (check == prog->float_substr) {
30944b6d
IZ
551 do_other_anchored:
552 {
1aa99e6b 553 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
554 char *s1 = s;
555
2c2d71f5
JH
556 t = s - prog->check_offset_max;
557 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
558 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 559 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 560 && t > strpos)))
30944b6d 561 /* EMPTY */;
2c2d71f5
JH
562 else
563 t = strpos;
1aa99e6b 564 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
565 if (t < other_last) /* These positions already checked */
566 t = other_last;
1aa99e6b 567 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
568 if (last < last1)
569 last1 = last;
570 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
571 /* On end-of-str: see comment below. */
572 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
573 HOP3(HOP3(last1, prog->anchored_offset, strend)
574 + SvCUR(prog->anchored_substr),
575 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
576 prog->anchored_substr,
577 PL_multiline ? FBMrf_MULTILINE : 0);
578 DEBUG_r(PerlIO_printf(Perl_debug_log,
579 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
580 (s ? "Found" : "Contradicts"),
581 PL_colors[0],
7b0972df
JH
582 (int)(SvCUR(prog->anchored_substr)
583 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
584 SvPVX(prog->anchored_substr),
585 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
586 if (!s) {
587 if (last1 >= last2) {
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", giving up...\n"));
590 goto fail_finish;
591 }
592 DEBUG_r(PerlIO_printf(Perl_debug_log,
593 ", trying floating at offset %ld...\n",
1aa99e6b
IH
594 (long)(HOP3c(s1, 1, strend) - i_strpos)));
595 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
596 s = HOP3c(last, 1, strend);
2c2d71f5
JH
597 goto restart;
598 }
599 else {
600 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 601 (long)(s - i_strpos)));
1aa99e6b
IH
602 t = HOP3c(s, -prog->anchored_offset, strbeg);
603 other_last = HOP3c(s, 1, strend);
30944b6d 604 s = s1;
2c2d71f5
JH
605 if (t == strpos)
606 goto try_at_start;
2c2d71f5
JH
607 goto try_at_offset;
608 }
30944b6d 609 }
2c2d71f5
JH
610 }
611 else { /* Take into account the floating substring. */
612 char *last, *last1;
613 char *s1 = s;
614
1aa99e6b
IH
615 t = HOP3c(s, -start_shift, strbeg);
616 last1 = last =
617 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
618 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
619 last = HOP3c(t, prog->float_max_offset, strend);
620 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
621 if (s < other_last)
622 s = other_last;
2c2d71f5
JH
623 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
624 /* fbm_instr() takes into account exact value of end-of-str
625 if the check is SvTAIL(ed). Since false positives are OK,
626 and end-of-str is not later than strend we are OK. */
627 s = fbm_instr((unsigned char*)s,
628 (unsigned char*)last + SvCUR(prog->float_substr)
629 - (SvTAIL(prog->float_substr)!=0),
630 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
631 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
632 (s ? "Found" : "Contradicts"),
633 PL_colors[0],
7b0972df
JH
634 (int)(SvCUR(prog->float_substr)
635 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
636 SvPVX(prog->float_substr),
637 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
638 if (!s) {
639 if (last1 == last) {
640 DEBUG_r(PerlIO_printf(Perl_debug_log,
641 ", giving up...\n"));
642 goto fail_finish;
643 }
644 DEBUG_r(PerlIO_printf(Perl_debug_log,
645 ", trying anchored starting at offset %ld...\n",
30944b6d 646 (long)(s1 + 1 - i_strpos)));
803ff556 647 other_last = last;
1aa99e6b 648 s = HOP3c(t, 1, strend);
2c2d71f5
JH
649 goto restart;
650 }
651 else {
652 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 653 (long)(s - i_strpos)));
803ff556 654 other_last = s; /* Fix this later. --Hugo */
30944b6d 655 s = s1;
2c2d71f5
JH
656 if (t == strpos)
657 goto try_at_start;
2c2d71f5
JH
658 goto try_at_offset;
659 }
660 }
cad2e5aa 661 }
2c2d71f5
JH
662
663 t = s - prog->check_offset_max;
2c2d71f5
JH
664 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
665 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
666 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
667 && t > strpos))) {
2c2d71f5
JH
668 /* Fixed substring is found far enough so that the match
669 cannot start at strpos. */
670 try_at_offset:
cad2e5aa 671 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
672 /* Eventually fbm_*() should handle this, but often
673 anchored_offset is not 0, so this check will not be wasted. */
674 /* XXXX In the code below we prefer to look for "^" even in
675 presence of anchored substrings. And we search even
676 beyond the found float position. These pessimizations
677 are historical artefacts only. */
678 find_anchor:
2c2d71f5 679 while (t < strend - prog->minlen) {
cad2e5aa 680 if (*t == '\n') {
4ee3650e 681 if (t < check_at - prog->check_offset_min) {
30944b6d 682 if (prog->anchored_substr) {
4ee3650e
GS
683 /* Since we moved from the found position,
684 we definitely contradict the found anchored
30944b6d
IZ
685 substr. Due to the above check we do not
686 contradict "check" substr.
687 Thus we can arrive here only if check substr
688 is float. Redo checking for "other"=="fixed".
689 */
9041c2e3 690 strpos = t + 1;
30944b6d
IZ
691 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
692 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
693 goto do_other_anchored;
694 }
4ee3650e
GS
695 /* We don't contradict the found floating substring. */
696 /* XXXX Why not check for STCLASS? */
cad2e5aa 697 s = t + 1;
2c2d71f5 698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 699 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
700 goto set_useful;
701 }
4ee3650e
GS
702 /* Position contradicts check-string */
703 /* XXXX probably better to look for check-string
704 than for "\n", so one should lower the limit for t? */
705 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 706 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 707 other_last = strpos = s = t + 1;
cad2e5aa
JH
708 goto restart;
709 }
710 t++;
711 }
2c2d71f5
JH
712 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
713 PL_colors[0],PL_colors[1]));
714 goto fail_finish;
cad2e5aa 715 }
f5952150
GS
716 else {
717 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
718 PL_colors[0],PL_colors[1]));
719 }
cad2e5aa
JH
720 s = t;
721 set_useful:
2c2d71f5 722 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
723 }
724 else {
f5952150 725 /* The found string does not prohibit matching at strpos,
2c2d71f5 726 - no optimization of calling REx engine can be performed,
f5952150
GS
727 unless it was an MBOL and we are not after MBOL,
728 or a future STCLASS check will fail this. */
2c2d71f5
JH
729 try_at_start:
730 /* Even in this situation we may use MBOL flag if strpos is offset
731 wrt the start of the string. */
05b4157f 732 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 733 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
734 /* May be due to an implicit anchor of m{.*foo} */
735 && !(prog->reganch & ROPT_IMPLICIT))
736 {
cad2e5aa
JH
737 t = strpos;
738 goto find_anchor;
739 }
30944b6d 740 DEBUG_r( if (ml_anch)
f5952150
GS
741 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
742 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 743 );
2c2d71f5 744 success_at_start:
30944b6d 745 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 746 && prog->check_substr /* Could be deleted already */
cad2e5aa 747 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
748 && prog->check_substr == prog->float_substr)
749 {
cad2e5aa 750 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 751 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
752 SvREFCNT_dec(prog->check_substr);
753 prog->check_substr = Nullsv; /* disable */
754 prog->float_substr = Nullsv; /* clear */
5e39e1e5 755 check = Nullsv; /* abort */
cad2e5aa 756 s = strpos;
3cf5c195
IZ
757 /* XXXX This is a remnant of the old implementation. It
758 looks wasteful, since now INTUIT can use many
6eb5f6b9 759 other heuristics. */
cad2e5aa
JH
760 prog->reganch &= ~RE_USE_INTUIT;
761 }
762 else
763 s = strpos;
764 }
765
6eb5f6b9
JH
766 /* Last resort... */
767 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
768 if (prog->regstclass) {
769 /* minlen == 0 is possible if regstclass is \b or \B,
770 and the fixed substr is ''$.
771 Since minlen is already taken into account, s+1 is before strend;
772 accidentally, minlen >= 1 guaranties no false positives at s + 1
773 even for \b or \B. But (minlen? 1 : 0) below assumes that
774 regstclass does not come from lookahead... */
775 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
776 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 777 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 778 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 779 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 780 : 1);
6eb5f6b9 781 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
782 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
783 : (prog->float_substr
784 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
785 cl_l, strend)
786 : strend);
a1933d95 787 char *startpos = strbeg;
6eb5f6b9
JH
788
789 t = s;
76384e4a 790 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 791 PL_regdata = prog->data;
76384e4a
GS
792 PL_bostr = startpos;
793 }
f33976b4 794 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
795 if (!s) {
796#ifdef DEBUGGING
b7953727 797 char *what = 0;
6eb5f6b9
JH
798#endif
799 if (endpos == strend) {
800 DEBUG_r( PerlIO_printf(Perl_debug_log,
801 "Could not match STCLASS...\n") );
802 goto fail;
803 }
66e933ab
GS
804 DEBUG_r( PerlIO_printf(Perl_debug_log,
805 "This position contradicts STCLASS...\n") );
653099ff
GS
806 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
807 goto fail;
6eb5f6b9
JH
808 /* Contradict one of substrings */
809 if (prog->anchored_substr) {
6eb5f6b9
JH
810 if (prog->anchored_substr == check) {
811 DEBUG_r( what = "anchored" );
812 hop_and_restart:
1aa99e6b 813 s = HOP3c(t, 1, strend);
66e933ab
GS
814 if (s + start_shift + end_shift > strend) {
815 /* XXXX Should be taken into account earlier? */
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
5e39e1e5
HS
820 if (!check)
821 goto giveup;
6eb5f6b9 822 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 823 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
824 what, (long)(s + start_shift - i_strpos)) );
825 goto restart;
826 }
66e933ab 827 /* Have both, check_string is floating */
6eb5f6b9
JH
828 if (t + start_shift >= check_at) /* Contradicts floating=check */
829 goto retry_floating_check;
830 /* Recheck anchored substring, but not floating... */
9041c2e3 831 s = check_at;
5e39e1e5
HS
832 if (!check)
833 goto giveup;
6eb5f6b9 834 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 835 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
836 (long)(other_last - i_strpos)) );
837 goto do_other_anchored;
838 }
60e71179
GS
839 /* Another way we could have checked stclass at the
840 current position only: */
841 if (ml_anch) {
842 s = t = t + 1;
5e39e1e5
HS
843 if (!check)
844 goto giveup;
60e71179 845 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
846 "Looking for /%s^%s/m starting at offset %ld...\n",
847 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 848 goto try_at_offset;
66e933ab 849 }
60e71179
GS
850 if (!prog->float_substr) /* Could have been deleted */
851 goto fail;
6eb5f6b9
JH
852 /* Check is floating subtring. */
853 retry_floating_check:
854 t = check_at - start_shift;
855 DEBUG_r( what = "floating" );
856 goto hop_and_restart;
857 }
b7953727
JH
858 if (t != s) {
859 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 860 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
861 (long)(t - i_strpos), (long)(s - i_strpos))
862 );
863 }
864 else {
865 DEBUG_r(PerlIO_printf(Perl_debug_log,
866 "Does not contradict STCLASS...\n");
867 );
868 }
6eb5f6b9 869 }
5e39e1e5
HS
870 giveup:
871 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
872 PL_colors[4], (check ? "Guessed" : "Giving up"),
873 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 874 return s;
2c2d71f5
JH
875
876 fail_finish: /* Substring not found */
66e933ab
GS
877 if (prog->check_substr) /* could be removed already */
878 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 879 fail:
2c2d71f5 880 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
881 PL_colors[4],PL_colors[5]));
882 return Nullch;
883}
9661b544 884
6eb5f6b9 885/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
886STATIC char *
887S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 888{
6eb5f6b9
JH
889 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
890 char *m;
d8093b23
G
891 STRLEN ln;
892 unsigned int c1;
893 unsigned int c2;
6eb5f6b9
JH
894 char *e;
895 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 896 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 897
6eb5f6b9
JH
898 /* We know what class it must start with. */
899 switch (OP(c)) {
6eb5f6b9 900 case ANYOF:
a0ed51b3 901 while (s < strend) {
ffc61ed2 902 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
903 if (tmp && (norun || regtry(prog, s)))
904 goto got_it;
905 else
906 tmp = doevery;
a0ed51b3 907 }
6eb5f6b9
JH
908 else
909 tmp = 1;
ffc61ed2 910 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 911 }
6eb5f6b9 912 break;
f33976b4
DB
913 case CANY:
914 while (s < strend) {
915 if (tmp && (norun || regtry(prog, s)))
916 goto got_it;
917 else
918 tmp = doevery;
919 s++;
920 }
921 break;
6eb5f6b9
JH
922 case EXACTF:
923 m = STRING(c);
924 ln = STR_LEN(c);
1aa99e6b 925 if (UTF) {
a2a2844f
JH
926 STRLEN ulen1, ulen2;
927 U8 tmpbuf1[UTF8_MAXLEN*2+1];
928 U8 tmpbuf2[UTF8_MAXLEN*2+1];
929
930 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
931 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
932
933 c1 = utf8_to_uvuni(tmpbuf1, 0);
934 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
935 }
936 else {
937 c1 = *(U8*)m;
938 c2 = PL_fold[c1];
939 }
6eb5f6b9
JH
940 goto do_exactf;
941 case EXACTFL:
942 m = STRING(c);
943 ln = STR_LEN(c);
d8093b23 944 c1 = *(U8*)m;
6eb5f6b9
JH
945 c2 = PL_fold_locale[c1];
946 do_exactf:
947 e = strend - ln;
b3c9acc1 948
6eb5f6b9
JH
949 if (norun && e < s)
950 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
951
952 if (do_utf8) {
953 STRLEN len;
954 if (c1 == c2)
955 while (s <= e) {
9041c2e3 956 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
957 && regtry(prog, s) )
958 goto got_it;
959 s += len;
960 }
961 else
962 while (s <= e) {
9041c2e3 963 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
964 if ( (c == c1 || c == c2) && regtry(prog, s) )
965 goto got_it;
966 s += len;
967 }
968 }
969 else {
970 if (c1 == c2)
971 while (s <= e) {
972 if ( *(U8*)s == c1
973 && (ln == 1 || !(OP(c) == EXACTF
974 ? ibcmp(s, m, ln)
975 : ibcmp_locale(s, m, ln)))
976 && (norun || regtry(prog, s)) )
977 goto got_it;
978 s++;
979 }
980 else
981 while (s <= e) {
982 if ( (*(U8*)s == c1 || *(U8*)s == c2)
983 && (ln == 1 || !(OP(c) == EXACTF
984 ? ibcmp(s, m, ln)
985 : ibcmp_locale(s, m, ln)))
986 && (norun || regtry(prog, s)) )
987 goto got_it;
988 s++;
989 }
b3c9acc1
IZ
990 }
991 break;
bbce6d69 992 case BOUNDL:
3280af22 993 PL_reg_flags |= RF_tainted;
bbce6d69 994 /* FALL THROUGH */
a0d0e21e 995 case BOUND:
ffc61ed2 996 if (do_utf8) {
12d33761 997 if (s == PL_bostr)
ffc61ed2
JH
998 tmp = '\n';
999 else {
1aa99e6b 1000 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1001
0064a8a9
JH
1002 if (s > (char*)r)
1003 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1004 }
1005 tmp = ((OP(c) == BOUND ?
9041c2e3 1006 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1007 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1008 while (s < strend) {
1009 if (tmp == !(OP(c) == BOUND ?
3568d838 1010 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1011 isALNUM_LC_utf8((U8*)s)))
1012 {
1013 tmp = !tmp;
1014 if ((norun || regtry(prog, s)))
1015 goto got_it;
1016 }
1017 s += UTF8SKIP(s);
a687059c 1018 }
a0d0e21e 1019 }
667bb95a 1020 else {
12d33761 1021 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1022 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1023 while (s < strend) {
1024 if (tmp ==
1025 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1026 tmp = !tmp;
1027 if ((norun || regtry(prog, s)))
1028 goto got_it;
1029 }
1030 s++;
a0ed51b3 1031 }
a0ed51b3 1032 }
6eb5f6b9 1033 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1034 goto got_it;
1035 break;
bbce6d69 1036 case NBOUNDL:
3280af22 1037 PL_reg_flags |= RF_tainted;
bbce6d69 1038 /* FALL THROUGH */
a0d0e21e 1039 case NBOUND:
ffc61ed2 1040 if (do_utf8) {
12d33761 1041 if (s == PL_bostr)
ffc61ed2
JH
1042 tmp = '\n';
1043 else {
1aa99e6b 1044 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1045
0064a8a9
JH
1046 if (s > (char*)r)
1047 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1048 }
1049 tmp = ((OP(c) == NBOUND ?
9041c2e3 1050 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1051 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1052 while (s < strend) {
1053 if (tmp == !(OP(c) == NBOUND ?
3568d838 1054 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1055 isALNUM_LC_utf8((U8*)s)))
1056 tmp = !tmp;
1057 else if ((norun || regtry(prog, s)))
1058 goto got_it;
1059 s += UTF8SKIP(s);
1060 }
a0d0e21e 1061 }
667bb95a 1062 else {
12d33761 1063 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1064 tmp = ((OP(c) == NBOUND ?
1065 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1066 while (s < strend) {
1067 if (tmp ==
1068 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1069 tmp = !tmp;
1070 else if ((norun || regtry(prog, s)))
1071 goto got_it;
1072 s++;
1073 }
a0ed51b3 1074 }
6eb5f6b9 1075 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1076 goto got_it;
1077 break;
a0d0e21e 1078 case ALNUM:
ffc61ed2 1079 if (do_utf8) {
8269fa76 1080 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1081 while (s < strend) {
3568d838 1082 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1083 if (tmp && (norun || regtry(prog, s)))
1084 goto got_it;
1085 else
1086 tmp = doevery;
1087 }
bbce6d69 1088 else
ffc61ed2
JH
1089 tmp = 1;
1090 s += UTF8SKIP(s);
bbce6d69 1091 }
bbce6d69 1092 }
ffc61ed2
JH
1093 else {
1094 while (s < strend) {
1095 if (isALNUM(*s)) {
1096 if (tmp && (norun || regtry(prog, s)))
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
a0ed51b3 1101 else
ffc61ed2
JH
1102 tmp = 1;
1103 s++;
a0ed51b3 1104 }
a0ed51b3
LW
1105 }
1106 break;
bbce6d69 1107 case ALNUML:
3280af22 1108 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1109 if (do_utf8) {
1110 while (s < strend) {
1111 if (isALNUM_LC_utf8((U8*)s)) {
1112 if (tmp && (norun || regtry(prog, s)))
1113 goto got_it;
1114 else
1115 tmp = doevery;
1116 }
a687059c 1117 else
ffc61ed2
JH
1118 tmp = 1;
1119 s += UTF8SKIP(s);
a0d0e21e 1120 }
a0d0e21e 1121 }
ffc61ed2
JH
1122 else {
1123 while (s < strend) {
1124 if (isALNUM_LC(*s)) {
1125 if (tmp && (norun || regtry(prog, s)))
1126 goto got_it;
1127 else
1128 tmp = doevery;
1129 }
a0ed51b3 1130 else
ffc61ed2
JH
1131 tmp = 1;
1132 s++;
a0ed51b3 1133 }
a0ed51b3
LW
1134 }
1135 break;
a0d0e21e 1136 case NALNUM:
ffc61ed2 1137 if (do_utf8) {
8269fa76 1138 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1139 while (s < strend) {
3568d838 1140 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1141 if (tmp && (norun || regtry(prog, s)))
1142 goto got_it;
1143 else
1144 tmp = doevery;
1145 }
bbce6d69 1146 else
ffc61ed2
JH
1147 tmp = 1;
1148 s += UTF8SKIP(s);
bbce6d69 1149 }
bbce6d69 1150 }
ffc61ed2
JH
1151 else {
1152 while (s < strend) {
1153 if (!isALNUM(*s)) {
1154 if (tmp && (norun || regtry(prog, s)))
1155 goto got_it;
1156 else
1157 tmp = doevery;
1158 }
a0ed51b3 1159 else
ffc61ed2
JH
1160 tmp = 1;
1161 s++;
a0ed51b3 1162 }
a0ed51b3
LW
1163 }
1164 break;
bbce6d69 1165 case NALNUML:
3280af22 1166 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1167 if (do_utf8) {
1168 while (s < strend) {
1169 if (!isALNUM_LC_utf8((U8*)s)) {
1170 if (tmp && (norun || regtry(prog, s)))
1171 goto got_it;
1172 else
1173 tmp = doevery;
1174 }
a687059c 1175 else
ffc61ed2
JH
1176 tmp = 1;
1177 s += UTF8SKIP(s);
a687059c 1178 }
a0d0e21e 1179 }
ffc61ed2
JH
1180 else {
1181 while (s < strend) {
1182 if (!isALNUM_LC(*s)) {
1183 if (tmp && (norun || regtry(prog, s)))
1184 goto got_it;
1185 else
1186 tmp = doevery;
1187 }
a0ed51b3 1188 else
ffc61ed2
JH
1189 tmp = 1;
1190 s++;
a0ed51b3 1191 }
a0ed51b3
LW
1192 }
1193 break;
a0d0e21e 1194 case SPACE:
ffc61ed2 1195 if (do_utf8) {
8269fa76 1196 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1197 while (s < strend) {
3568d838 1198 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1199 if (tmp && (norun || regtry(prog, s)))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
a0d0e21e 1204 else
ffc61ed2
JH
1205 tmp = 1;
1206 s += UTF8SKIP(s);
2304df62 1207 }
a0d0e21e 1208 }
ffc61ed2
JH
1209 else {
1210 while (s < strend) {
1211 if (isSPACE(*s)) {
1212 if (tmp && (norun || regtry(prog, s)))
1213 goto got_it;
1214 else
1215 tmp = doevery;
1216 }
a0ed51b3 1217 else
ffc61ed2
JH
1218 tmp = 1;
1219 s++;
a0ed51b3 1220 }
a0ed51b3
LW
1221 }
1222 break;
bbce6d69 1223 case SPACEL:
3280af22 1224 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1225 if (do_utf8) {
1226 while (s < strend) {
1227 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1228 if (tmp && (norun || regtry(prog, s)))
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
bbce6d69 1233 else
ffc61ed2
JH
1234 tmp = 1;
1235 s += UTF8SKIP(s);
bbce6d69 1236 }
bbce6d69 1237 }
ffc61ed2
JH
1238 else {
1239 while (s < strend) {
1240 if (isSPACE_LC(*s)) {
1241 if (tmp && (norun || regtry(prog, s)))
1242 goto got_it;
1243 else
1244 tmp = doevery;
1245 }
a0ed51b3 1246 else
ffc61ed2
JH
1247 tmp = 1;
1248 s++;
a0ed51b3 1249 }
a0ed51b3
LW
1250 }
1251 break;
a0d0e21e 1252 case NSPACE:
ffc61ed2 1253 if (do_utf8) {
8269fa76 1254 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1255 while (s < strend) {
3568d838 1256 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1257 if (tmp && (norun || regtry(prog, s)))
1258 goto got_it;
1259 else
1260 tmp = doevery;
1261 }
a0d0e21e 1262 else
ffc61ed2
JH
1263 tmp = 1;
1264 s += UTF8SKIP(s);
a687059c 1265 }
a0d0e21e 1266 }
ffc61ed2
JH
1267 else {
1268 while (s < strend) {
1269 if (!isSPACE(*s)) {
1270 if (tmp && (norun || regtry(prog, s)))
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
a0ed51b3 1275 else
ffc61ed2
JH
1276 tmp = 1;
1277 s++;
a0ed51b3 1278 }
a0ed51b3
LW
1279 }
1280 break;
bbce6d69 1281 case NSPACEL:
3280af22 1282 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1283 if (do_utf8) {
1284 while (s < strend) {
1285 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1286 if (tmp && (norun || regtry(prog, s)))
1287 goto got_it;
1288 else
1289 tmp = doevery;
1290 }
bbce6d69 1291 else
ffc61ed2
JH
1292 tmp = 1;
1293 s += UTF8SKIP(s);
bbce6d69 1294 }
bbce6d69 1295 }
ffc61ed2
JH
1296 else {
1297 while (s < strend) {
1298 if (!isSPACE_LC(*s)) {
1299 if (tmp && (norun || regtry(prog, s)))
1300 goto got_it;
1301 else
1302 tmp = doevery;
1303 }
a0ed51b3 1304 else
ffc61ed2
JH
1305 tmp = 1;
1306 s++;
a0ed51b3 1307 }
a0ed51b3
LW
1308 }
1309 break;
a0d0e21e 1310 case DIGIT:
ffc61ed2 1311 if (do_utf8) {
8269fa76 1312 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1313 while (s < strend) {
3568d838 1314 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1315 if (tmp && (norun || regtry(prog, s)))
1316 goto got_it;
1317 else
1318 tmp = doevery;
1319 }
a0d0e21e 1320 else
ffc61ed2
JH
1321 tmp = 1;
1322 s += UTF8SKIP(s);
2b69d0c2 1323 }
a0d0e21e 1324 }
ffc61ed2
JH
1325 else {
1326 while (s < strend) {
1327 if (isDIGIT(*s)) {
1328 if (tmp && (norun || regtry(prog, s)))
1329 goto got_it;
1330 else
1331 tmp = doevery;
1332 }
a0ed51b3 1333 else
ffc61ed2
JH
1334 tmp = 1;
1335 s++;
a0ed51b3 1336 }
a0ed51b3
LW
1337 }
1338 break;
b8c5462f
JH
1339 case DIGITL:
1340 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1341 if (do_utf8) {
1342 while (s < strend) {
1343 if (isDIGIT_LC_utf8((U8*)s)) {
1344 if (tmp && (norun || regtry(prog, s)))
1345 goto got_it;
1346 else
1347 tmp = doevery;
1348 }
b8c5462f 1349 else
ffc61ed2
JH
1350 tmp = 1;
1351 s += UTF8SKIP(s);
b8c5462f 1352 }
b8c5462f 1353 }
ffc61ed2
JH
1354 else {
1355 while (s < strend) {
1356 if (isDIGIT_LC(*s)) {
1357 if (tmp && (norun || regtry(prog, s)))
1358 goto got_it;
1359 else
1360 tmp = doevery;
1361 }
b8c5462f 1362 else
ffc61ed2
JH
1363 tmp = 1;
1364 s++;
b8c5462f 1365 }
b8c5462f
JH
1366 }
1367 break;
a0d0e21e 1368 case NDIGIT:
ffc61ed2 1369 if (do_utf8) {
8269fa76 1370 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1371 while (s < strend) {
3568d838 1372 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1373 if (tmp && (norun || regtry(prog, s)))
1374 goto got_it;
1375 else
1376 tmp = doevery;
1377 }
a0d0e21e 1378 else
ffc61ed2
JH
1379 tmp = 1;
1380 s += UTF8SKIP(s);
a687059c 1381 }
a0d0e21e 1382 }
ffc61ed2
JH
1383 else {
1384 while (s < strend) {
1385 if (!isDIGIT(*s)) {
1386 if (tmp && (norun || regtry(prog, s)))
1387 goto got_it;
1388 else
1389 tmp = doevery;
1390 }
a0ed51b3 1391 else
ffc61ed2
JH
1392 tmp = 1;
1393 s++;
a0ed51b3 1394 }
a0ed51b3
LW
1395 }
1396 break;
b8c5462f
JH
1397 case NDIGITL:
1398 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1399 if (do_utf8) {
1400 while (s < strend) {
1401 if (!isDIGIT_LC_utf8((U8*)s)) {
1402 if (tmp && (norun || regtry(prog, s)))
1403 goto got_it;
1404 else
1405 tmp = doevery;
1406 }
b8c5462f 1407 else
ffc61ed2
JH
1408 tmp = 1;
1409 s += UTF8SKIP(s);
b8c5462f 1410 }
a0ed51b3 1411 }
ffc61ed2
JH
1412 else {
1413 while (s < strend) {
1414 if (!isDIGIT_LC(*s)) {
1415 if (tmp && (norun || regtry(prog, s)))
1416 goto got_it;
1417 else
1418 tmp = doevery;
1419 }
cf93c79d 1420 else
ffc61ed2
JH
1421 tmp = 1;
1422 s++;
b8c5462f 1423 }
b8c5462f
JH
1424 }
1425 break;
b3c9acc1 1426 default:
3c3eec57
GS
1427 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1428 break;
d6a28714 1429 }
6eb5f6b9
JH
1430 return 0;
1431 got_it:
1432 return s;
1433}
1434
1435/*
1436 - regexec_flags - match a regexp against a string
1437 */
1438I32
1439Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1440 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1441/* strend: pointer to null at end of string */
1442/* strbeg: real beginning of string */
1443/* minend: end of match must be >=minend after stringarg. */
1444/* data: May be used for some additional optimizations. */
1445/* nosave: For optimizations. */
1446{
6eb5f6b9
JH
1447 register char *s;
1448 register regnode *c;
1449 register char *startpos = stringarg;
6eb5f6b9
JH
1450 I32 minlen; /* must match at least this many chars */
1451 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1452 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1453 constant substr. */ /* CC */
1454 I32 end_shift = 0; /* Same for the end. */ /* CC */
1455 I32 scream_pos = -1; /* Internal iterator of scream. */
1456 char *scream_olds;
1457 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1458 bool do_utf8 = DO_UTF8(sv);
2a782b5b
JH
1459#ifdef DEBUGGING
1460 SV *dsv = sv_2mortal(newSVpvn("", 0));
1461#endif
6eb5f6b9
JH
1462
1463 PL_regcc = 0;
1464
1465 cache_re(prog);
1466#ifdef DEBUGGING
aea4f609 1467 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1468#endif
1469
1470 /* Be paranoid... */
1471 if (prog == NULL || startpos == NULL) {
1472 Perl_croak(aTHX_ "NULL regexp parameter");
1473 return 0;
1474 }
1475
1476 minlen = prog->minlen;
f33976b4 1477 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3fa9c3d7
JH
1478 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) {
1479 DEBUG_r(PerlIO_printf(Perl_debug_log,
1480 "Too short (in characters)...\n"));
1481 goto phooey;
1482 }
1aa99e6b
IH
1483 }
1484 else {
3fa9c3d7
JH
1485 if (strend - startpos < minlen) {
1486 DEBUG_r(PerlIO_printf(Perl_debug_log,
1487 "Too short (in bytes)...\n"));
1488 goto phooey;
1489 }
1aa99e6b 1490 }
6eb5f6b9 1491
6eb5f6b9
JH
1492 /* Check validity of program. */
1493 if (UCHARAT(prog->program) != REG_MAGIC) {
1494 Perl_croak(aTHX_ "corrupted regexp program");
1495 }
1496
1497 PL_reg_flags = 0;
1498 PL_reg_eval_set = 0;
1499 PL_reg_maxiter = 0;
1500
1501 if (prog->reganch & ROPT_UTF8)
1502 PL_reg_flags |= RF_utf8;
1503
1504 /* Mark beginning of line for ^ and lookbehind. */
1505 PL_regbol = startpos;
1506 PL_bostr = strbeg;
1507 PL_reg_sv = sv;
1508
1509 /* Mark end of line for $ (and such) */
1510 PL_regeol = strend;
1511
1512 /* see how far we have to get to not match where we matched before */
1513 PL_regtill = startpos+minend;
1514
1515 /* We start without call_cc context. */
1516 PL_reg_call_cc = 0;
1517
1518 /* If there is a "must appear" string, look for it. */
1519 s = startpos;
1520
1521 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1522 MAGIC *mg;
1523
1524 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1525 PL_reg_ganch = startpos;
1526 else if (sv && SvTYPE(sv) >= SVt_PVMG
1527 && SvMAGIC(sv)
14befaf4
DM
1528 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1529 && mg->mg_len >= 0) {
6eb5f6b9
JH
1530 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1531 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1532 if (s > PL_reg_ganch)
6eb5f6b9
JH
1533 goto phooey;
1534 s = PL_reg_ganch;
1535 }
1536 }
1537 else /* pos() not defined */
1538 PL_reg_ganch = strbeg;
1539 }
1540
699c3c34
JH
1541 if (do_utf8 == (UTF!=0) &&
1542 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1543 re_scream_pos_data d;
1544
1545 d.scream_olds = &scream_olds;
1546 d.scream_pos = &scream_pos;
1547 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1548 if (!s) {
1549 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1550 goto phooey; /* not present */
3fa9c3d7 1551 }
6eb5f6b9
JH
1552 }
1553
2a782b5b 1554 DEBUG_r({
df1ffd02
JH
1555 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1556 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1557 if (!PL_colorset)
1558 reginitcolors();
1559 PerlIO_printf(Perl_debug_log,
1560 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1561 PL_colors[4],PL_colors[5],PL_colors[0],
1562 prog->precomp,
1563 PL_colors[1],
1564 (strlen(prog->precomp) > 60 ? "..." : ""),
1565 PL_colors[0],
1566 (int)(len > 60 ? 60 : len),
1567 s, PL_colors[1],
1568 (len > 60 ? "..." : "")
1569 );
1570 });
6eb5f6b9
JH
1571
1572 /* Simplest case: anchored match need be tried only once. */
1573 /* [unless only anchor is BOL and multiline is set] */
1574 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1575 if (s == startpos && regtry(prog, startpos))
1576 goto got_it;
1577 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1578 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1579 {
1580 char *end;
1581
1582 if (minlen)
1583 dontbother = minlen - 1;
1aa99e6b 1584 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1585 /* for multiline we only have to try after newlines */
1586 if (prog->check_substr) {
1587 if (s == startpos)
1588 goto after_try;
1589 while (1) {
1590 if (regtry(prog, s))
1591 goto got_it;
1592 after_try:
1593 if (s >= end)
1594 goto phooey;
1595 if (prog->reganch & RE_USE_INTUIT) {
1596 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1597 if (!s)
1598 goto phooey;
1599 }
1600 else
1601 s++;
1602 }
1603 } else {
1604 if (s > startpos)
1605 s--;
1606 while (s < end) {
1607 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1608 if (regtry(prog, s))
1609 goto got_it;
1610 }
1611 }
1612 }
1613 }
1614 goto phooey;
1615 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1616 if (regtry(prog, PL_reg_ganch))
1617 goto got_it;
1618 goto phooey;
1619 }
1620
1621 /* Messy cases: unanchored match. */
9041c2e3 1622 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1623 /* we have /x+whatever/ */
1624 /* it must be a one character string (XXXX Except UTF?) */
1625 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1626#ifdef DEBUGGING
1627 int did_match = 0;
1628#endif
1629
1aa99e6b 1630 if (do_utf8) {
6eb5f6b9
JH
1631 while (s < strend) {
1632 if (*s == ch) {
bf93d4cc 1633 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1634 if (regtry(prog, s)) goto got_it;
1635 s += UTF8SKIP(s);
1636 while (s < strend && *s == ch)
1637 s += UTF8SKIP(s);
1638 }
1639 s += UTF8SKIP(s);
1640 }
1641 }
1642 else {
1643 while (s < strend) {
1644 if (*s == ch) {
bf93d4cc 1645 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1646 if (regtry(prog, s)) goto got_it;
1647 s++;
1648 while (s < strend && *s == ch)
1649 s++;
1650 }
1651 s++;
1652 }
1653 }
b7953727 1654 DEBUG_r(if (!did_match)
bf93d4cc 1655 PerlIO_printf(Perl_debug_log,
b7953727
JH
1656 "Did not find anchored character...\n")
1657 );
6eb5f6b9
JH
1658 }
1659 /*SUPPRESS 560*/
1aa99e6b
IH
1660 else if (do_utf8 == (UTF!=0) &&
1661 (prog->anchored_substr != Nullsv
9041c2e3 1662 || (prog->float_substr != Nullsv
1aa99e6b 1663 && prog->float_max_offset < strend - s))) {
9041c2e3 1664 SV *must = prog->anchored_substr
6eb5f6b9 1665 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1666 I32 back_max =
6eb5f6b9 1667 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1668 I32 back_min =
6eb5f6b9 1669 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1670 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1671 -(I32)(CHR_SVLEN(must)
1aa99e6b 1672 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1673 char *last1; /* Last position checked before */
bf93d4cc
GS
1674#ifdef DEBUGGING
1675 int did_match = 0;
1676#endif
6eb5f6b9
JH
1677
1678 if (s > PL_bostr)
1679 last1 = HOPc(s, -1);
1680 else
1681 last1 = s - 1; /* bogus */
1682
1683 /* XXXX check_substr already used to find `s', can optimize if
1684 check_substr==must. */
1685 scream_pos = -1;
1686 dontbother = end_shift;
1687 strend = HOPc(strend, -dontbother);
1688 while ( (s <= last) &&
9041c2e3 1689 ((flags & REXEC_SCREAM)
1aa99e6b 1690 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1691 end_shift, &scream_pos, 0))
1aa99e6b 1692 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1693 (unsigned char*)strend, must,
6eb5f6b9 1694 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1695 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1696 if (HOPc(s, -back_max) > last1) {
1697 last1 = HOPc(s, -back_min);
1698 s = HOPc(s, -back_max);
1699 }
1700 else {
1701 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1702
1703 last1 = HOPc(s, -back_min);
1704 s = t;
1705 }
1aa99e6b 1706 if (do_utf8) {
6eb5f6b9
JH
1707 while (s <= last1) {
1708 if (regtry(prog, s))
1709 goto got_it;
1710 s += UTF8SKIP(s);
1711 }
1712 }
1713 else {
1714 while (s <= last1) {
1715 if (regtry(prog, s))
1716 goto got_it;
1717 s++;
1718 }
1719 }
1720 }
b7953727
JH
1721 DEBUG_r(if (!did_match)
1722 PerlIO_printf(Perl_debug_log,
1723 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1724 ((must == prog->anchored_substr)
1725 ? "anchored" : "floating"),
1726 PL_colors[0],
1727 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1728 SvPVX(must),
b7953727
JH
1729 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1730 );
6eb5f6b9
JH
1731 goto phooey;
1732 }
155aba94 1733 else if ((c = prog->regstclass)) {
66e933ab
GS
1734 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1735 /* don't bother with what can't match */
6eb5f6b9 1736 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1737 DEBUG_r({
1738 SV *prop = sv_newmortal();
1739 regprop(prop, c);
2a782b5b 1740 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1741 });
6eb5f6b9
JH
1742 if (find_byclass(prog, c, s, strend, startpos, 0))
1743 goto got_it;
bf93d4cc 1744 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1745 }
1746 else {
1747 dontbother = 0;
1748 if (prog->float_substr != Nullsv) { /* Trim the end. */
1749 char *last;
d6a28714
JH
1750
1751 if (flags & REXEC_SCREAM) {
1752 last = screaminstr(sv, prog->float_substr, s - strbeg,
1753 end_shift, &scream_pos, 1); /* last one */
1754 if (!last)
ffc61ed2 1755 last = scream_olds; /* Only one occurrence. */
b8c5462f 1756 }
d6a28714
JH
1757 else {
1758 STRLEN len;
1759 char *little = SvPV(prog->float_substr, len);
1760
1761 if (SvTAIL(prog->float_substr)) {
1762 if (memEQ(strend - len + 1, little, len - 1))
1763 last = strend - len + 1;
1764 else if (!PL_multiline)
9041c2e3 1765 last = memEQ(strend - len, little, len)
d6a28714 1766 ? strend - len : Nullch;
b8c5462f 1767 else
d6a28714
JH
1768 goto find_last;
1769 } else {
1770 find_last:
9041c2e3 1771 if (len)
d6a28714 1772 last = rninstr(s, strend, little, little + len);
b8c5462f 1773 else
d6a28714 1774 last = strend; /* matching `$' */
b8c5462f 1775 }
b8c5462f 1776 }
bf93d4cc
GS
1777 if (last == NULL) {
1778 DEBUG_r(PerlIO_printf(Perl_debug_log,
1779 "%sCan't trim the tail, match fails (should not happen)%s\n",
1780 PL_colors[4],PL_colors[5]));
1781 goto phooey; /* Should not happen! */
1782 }
d6a28714
JH
1783 dontbother = strend - last + prog->float_min_offset;
1784 }
1785 if (minlen && (dontbother < minlen))
1786 dontbother = minlen - 1;
1787 strend -= dontbother; /* this one's always in bytes! */
1788 /* We don't know much -- general case. */
1aa99e6b 1789 if (do_utf8) {
d6a28714
JH
1790 for (;;) {
1791 if (regtry(prog, s))
1792 goto got_it;
1793 if (s >= strend)
1794 break;
b8c5462f 1795 s += UTF8SKIP(s);
d6a28714
JH
1796 };
1797 }
1798 else {
1799 do {
1800 if (regtry(prog, s))
1801 goto got_it;
1802 } while (s++ < strend);
1803 }
1804 }
1805
1806 /* Failure. */
1807 goto phooey;
1808
1809got_it:
1810 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1811
1812 if (PL_reg_eval_set) {
1813 /* Preserve the current value of $^R */
1814 if (oreplsv != GvSV(PL_replgv))
1815 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1816 restored, the value remains
1817 the same. */
acfe0abc 1818 restore_pos(aTHX_ 0);
d6a28714
JH
1819 }
1820
1821 /* make sure $`, $&, $', and $digit will work later */
1822 if ( !(flags & REXEC_NOT_FIRST) ) {
1823 if (RX_MATCH_COPIED(prog)) {
1824 Safefree(prog->subbeg);
1825 RX_MATCH_COPIED_off(prog);
1826 }
1827 if (flags & REXEC_COPY_STR) {
1828 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1829
1830 s = savepvn(strbeg, i);
1831 prog->subbeg = s;
1832 prog->sublen = i;
1833 RX_MATCH_COPIED_on(prog);
1834 }
1835 else {
1836 prog->subbeg = strbeg;
1837 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1838 }
1839 }
9041c2e3 1840
d6a28714
JH
1841 return 1;
1842
1843phooey:
bf93d4cc
GS
1844 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1845 PL_colors[4],PL_colors[5]));
d6a28714 1846 if (PL_reg_eval_set)
acfe0abc 1847 restore_pos(aTHX_ 0);
d6a28714
JH
1848 return 0;
1849}
1850
1851/*
1852 - regtry - try match at specific point
1853 */
1854STATIC I32 /* 0 failure, 1 success */
1855S_regtry(pTHX_ regexp *prog, char *startpos)
1856{
d6a28714
JH
1857 register I32 i;
1858 register I32 *sp;
1859 register I32 *ep;
1860 CHECKPOINT lastcp;
1861
02db2b7b
IZ
1862#ifdef DEBUGGING
1863 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1864#endif
d6a28714
JH
1865 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1866 MAGIC *mg;
1867
1868 PL_reg_eval_set = RS_init;
1869 DEBUG_r(DEBUG_s(
b900a521
JH
1870 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1871 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1872 ));
e8347627 1873 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1874 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1875 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1876 SAVETMPS;
1877 /* Apparently this is not needed, judging by wantarray. */
e8347627 1878 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1879 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1880
1881 if (PL_reg_sv) {
1882 /* Make $_ available to executed code. */
1883 if (PL_reg_sv != DEFSV) {
4d1ff10f 1884 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1885 SAVESPTR(DEFSV);
1886 DEFSV = PL_reg_sv;
b8c5462f 1887 }
d6a28714 1888
9041c2e3 1889 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1890 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1891 /* prepare for quick setting of pos */
14befaf4
DM
1892 sv_magic(PL_reg_sv, (SV*)0,
1893 PERL_MAGIC_regex_global, Nullch, 0);
1894 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1895 mg->mg_len = -1;
b8c5462f 1896 }
d6a28714
JH
1897 PL_reg_magic = mg;
1898 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1899 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1900 }
09687e5a 1901 if (!PL_reg_curpm) {
0f79a09d 1902 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1903#ifdef USE_ITHREADS
1904 {
1905 SV* repointer = newSViv(0);
577e12cc 1906 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1907 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1908 av_push(PL_regex_padav,repointer);
1909 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1910 PL_regex_pad = AvARRAY(PL_regex_padav);
1911 }
1912#endif
1913 }
aaa362c4 1914 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1915 PL_reg_oldcurpm = PL_curpm;
1916 PL_curpm = PL_reg_curpm;
1917 if (RX_MATCH_COPIED(prog)) {
1918 /* Here is a serious problem: we cannot rewrite subbeg,
1919 since it may be needed if this match fails. Thus
1920 $` inside (?{}) could fail... */
1921 PL_reg_oldsaved = prog->subbeg;
1922 PL_reg_oldsavedlen = prog->sublen;
1923 RX_MATCH_COPIED_off(prog);
1924 }
1925 else
1926 PL_reg_oldsaved = Nullch;
1927 prog->subbeg = PL_bostr;
1928 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1929 }
1930 prog->startp[0] = startpos - PL_bostr;
1931 PL_reginput = startpos;
1932 PL_regstartp = prog->startp;
1933 PL_regendp = prog->endp;
1934 PL_reglastparen = &prog->lastparen;
a01268b5 1935 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1936 prog->lastparen = 0;
1937 PL_regsize = 0;
1938 DEBUG_r(PL_reg_starttry = startpos);
1939 if (PL_reg_start_tmpl <= prog->nparens) {
1940 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1941 if(PL_reg_start_tmp)
1942 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1943 else
1944 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1945 }
1946
1947 /* XXXX What this code is doing here?!!! There should be no need
1948 to do this again and again, PL_reglastparen should take care of
3dd2943c 1949 this! --ilya*/
dafc8851
JH
1950
1951 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1952 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1953 * PL_reglastparen), is not needed at all by the test suite
1954 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1955 * enough, for building DynaLoader, or otherwise this
1956 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1957 * will happen. Meanwhile, this code *is* needed for the
1958 * above-mentioned test suite tests to succeed. The common theme
1959 * on those tests seems to be returning null fields from matches.
1960 * --jhi */
dafc8851 1961#if 1
d6a28714
JH
1962 sp = prog->startp;
1963 ep = prog->endp;
1964 if (prog->nparens) {
09e8ae3b 1965 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1966 *++sp = -1;
1967 *++ep = -1;
1968 }
1969 }
dafc8851 1970#endif
02db2b7b 1971 REGCP_SET(lastcp);
d6a28714
JH
1972 if (regmatch(prog->program + 1)) {
1973 prog->endp[0] = PL_reginput - PL_bostr;
1974 return 1;
1975 }
02db2b7b 1976 REGCP_UNWIND(lastcp);
d6a28714
JH
1977 return 0;
1978}
1979
02db2b7b
IZ
1980#define RE_UNWIND_BRANCH 1
1981#define RE_UNWIND_BRANCHJ 2
1982
1983union re_unwind_t;
1984
1985typedef struct { /* XX: makes sense to enlarge it... */
1986 I32 type;
1987 I32 prev;
1988 CHECKPOINT lastcp;
1989} re_unwind_generic_t;
1990
1991typedef struct {
1992 I32 type;
1993 I32 prev;
1994 CHECKPOINT lastcp;
1995 I32 lastparen;
1996 regnode *next;
1997 char *locinput;
1998 I32 nextchr;
1999#ifdef DEBUGGING
2000 int regindent;
2001#endif
2002} re_unwind_branch_t;
2003
2004typedef union re_unwind_t {
2005 I32 type;
2006 re_unwind_generic_t generic;
2007 re_unwind_branch_t branch;
2008} re_unwind_t;
2009
8ba1375e
MJD
2010#define sayYES goto yes
2011#define sayNO goto no
2012#define sayYES_FINAL goto yes_final
2013#define sayYES_LOUD goto yes_loud
2014#define sayNO_FINAL goto no_final
2015#define sayNO_SILENT goto do_no
2016#define saySAME(x) if (x) goto yes; else goto no
2017
2018#define REPORT_CODE_OFF 24
2019
d6a28714
JH
2020/*
2021 - regmatch - main matching routine
2022 *
2023 * Conceptually the strategy is simple: check to see whether the current
2024 * node matches, call self recursively to see whether the rest matches,
2025 * and then act accordingly. In practice we make some effort to avoid
2026 * recursion, in particular by going through "ordinary" nodes (that don't
2027 * need to know whether the rest of the match failed) by a loop instead of
2028 * by recursion.
2029 */
2030/* [lwall] I've hoisted the register declarations to the outer block in order to
2031 * maybe save a little bit of pushing and popping on the stack. It also takes
2032 * advantage of machines that use a register save mask on subroutine entry.
2033 */
2034STATIC I32 /* 0 failure, 1 success */
2035S_regmatch(pTHX_ regnode *prog)
2036{
d6a28714
JH
2037 register regnode *scan; /* Current node. */
2038 regnode *next; /* Next node. */
2039 regnode *inner; /* Next node in internal branch. */
2040 register I32 nextchr; /* renamed nextchr - nextchar colides with
2041 function of same name */
2042 register I32 n; /* no or next */
b7953727
JH
2043 register I32 ln = 0; /* len or last */
2044 register char *s = Nullch; /* operand or save */
d6a28714 2045 register char *locinput = PL_reginput;
b7953727 2046 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2047 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2048 I32 unwind = 0;
b7953727 2049#if 0
02db2b7b 2050 I32 firstcp = PL_savestack_ix;
b7953727 2051#endif
53c4c00c 2052 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b
JH
2053#ifdef DEBUGGING
2054 SV *dsv0 = sv_2mortal(newSVpvn("", 0));
2055 SV *dsv1 = sv_2mortal(newSVpvn("", 0));
2056 SV *dsv2 = sv_2mortal(newSVpvn("", 0));
2057#endif
02db2b7b 2058
d6a28714
JH
2059#ifdef DEBUGGING
2060 PL_regindent++;
2061#endif
2062
2063 /* Note that nextchr is a byte even in UTF */
2064 nextchr = UCHARAT(locinput);
2065 scan = prog;
2066 while (scan != NULL) {
8ba1375e 2067
2a782b5b 2068 DEBUG_r( {
d6a28714
JH
2069 SV *prop = sv_newmortal();
2070 int docolor = *PL_colors[0];
2071 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2072 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2073 /* The part of the string before starttry has one color
2074 (pref0_len chars), between starttry and current
2075 position another one (pref_len - pref0_len chars),
2076 after the current position the third one.
2077 We assume that pref0_len <= pref_len, otherwise we
2078 decrease pref0_len. */
9041c2e3 2079 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2080 ? (5 + taill) - l : locinput - PL_bostr;
2081 int pref0_len;
d6a28714 2082
df1ffd02 2083 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2084 pref_len++;
2085 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2086 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2087 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2088 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2089 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2090 l--;
d6a28714
JH
2091 if (pref0_len < 0)
2092 pref0_len = 0;
2093 if (pref0_len > pref_len)
2094 pref0_len = pref_len;
2095 regprop(prop, scan);
2a782b5b
JH
2096 {
2097 char *s0 =
df1ffd02 2098 do_utf8 ?
2a782b5b
JH
2099 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2100 pref0_len, 60, 0) :
2101 locinput - pref_len;
df1ffd02
JH
2102 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2103 char *s1 = do_utf8 ?
2a782b5b
JH
2104 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2105 pref_len - pref0_len, 60, 0) :
2106 locinput - pref_len + pref0_len;
df1ffd02
JH
2107 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2108 char *s2 = do_utf8 ?
2a782b5b
JH
2109 pv_uni_display(dsv2, (U8*)locinput,
2110 PL_regeol - locinput, 60, 0) :
2111 locinput;
df1ffd02 2112 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2113 PerlIO_printf(Perl_debug_log,
2114 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2115 (IV)(locinput - PL_bostr),
2116 PL_colors[4],
2117 len0, s0,
2118 PL_colors[5],
2119 PL_colors[2],
2120 len1, s1,
2121 PL_colors[3],
2122 (docolor ? "" : "> <"),
2123 PL_colors[0],
2124 len2, s2,
2125 PL_colors[1],
2126 15 - l - pref_len + 1,
2127 "",
2128 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2129 SvPVX(prop));
2130 }
2131 });
d6a28714
JH
2132
2133 next = scan + NEXT_OFF(scan);
2134 if (next == scan)
2135 next = NULL;
2136
2137 switch (OP(scan)) {
2138 case BOL:
12d33761
HS
2139 if (locinput == PL_bostr || (PL_multiline &&
2140 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2141 {
2142 /* regtill = regbol; */
b8c5462f
JH
2143 break;
2144 }
d6a28714
JH
2145 sayNO;
2146 case MBOL:
12d33761
HS
2147 if (locinput == PL_bostr ||
2148 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2149 {
b8c5462f
JH
2150 break;
2151 }
d6a28714
JH
2152 sayNO;
2153 case SBOL:
c2a73568 2154 if (locinput == PL_bostr)
b8c5462f 2155 break;
d6a28714
JH
2156 sayNO;
2157 case GPOS:
2158 if (locinput == PL_reg_ganch)
2159 break;
2160 sayNO;
2161 case EOL:
2162 if (PL_multiline)
2163 goto meol;
2164 else
2165 goto seol;
2166 case MEOL:
2167 meol:
2168 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2169 sayNO;
b8c5462f 2170 break;
d6a28714
JH
2171 case SEOL:
2172 seol:
2173 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2174 sayNO;
d6a28714 2175 if (PL_regeol - locinput > 1)
b8c5462f 2176 sayNO;
b8c5462f 2177 break;
d6a28714
JH
2178 case EOS:
2179 if (PL_regeol != locinput)
b8c5462f 2180 sayNO;
d6a28714 2181 break;
ffc61ed2 2182 case SANY:
d6a28714 2183 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2184 sayNO;
f33976b4
DB
2185 if (do_utf8) {
2186 locinput += PL_utf8skip[nextchr];
2187 if (locinput > PL_regeol)
2188 sayNO;
2189 nextchr = UCHARAT(locinput);
2190 }
2191 else
2192 nextchr = UCHARAT(++locinput);
2193 break;
2194 case CANY:
2195 if (!nextchr && locinput >= PL_regeol)
2196 sayNO;
b8c5462f 2197 nextchr = UCHARAT(++locinput);
a0d0e21e 2198 break;
ffc61ed2 2199 case REG_ANY:
1aa99e6b
IH
2200 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2201 sayNO;
2202 if (do_utf8) {
b8c5462f 2203 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2204 if (locinput > PL_regeol)
2205 sayNO;
a0ed51b3 2206 nextchr = UCHARAT(locinput);
a0ed51b3 2207 }
1aa99e6b
IH
2208 else
2209 nextchr = UCHARAT(++locinput);
a0ed51b3 2210 break;
d6a28714 2211 case EXACT:
cd439c50
IZ
2212 s = STRING(scan);
2213 ln = STR_LEN(scan);
1aa99e6b
IH
2214 if (do_utf8 != (UTF!=0)) {
2215 char *l = locinput;
2216 char *e = s + ln;
2217 STRLEN len;
2218 if (do_utf8)
2219 while (s < e) {
2220 if (l >= PL_regeol)
2221 sayNO;
9041c2e3 2222 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b 2223 sayNO;
cf27e83c 2224 s += len;
1aa99e6b
IH
2225 l += len;
2226 }
2227 else
2228 while (s < e) {
2229 if (l >= PL_regeol)
2230 sayNO;
9041c2e3 2231 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b 2232 sayNO;
545666db 2233 s+=len;
1aa99e6b
IH
2234 l++;
2235 }
2236 locinput = l;
2237 nextchr = UCHARAT(locinput);
2238 break;
2239 }
d6a28714
JH
2240 /* Inline the first character, for speed. */
2241 if (UCHARAT(s) != nextchr)
2242 sayNO;
2243 if (PL_regeol - locinput < ln)
2244 sayNO;
2245 if (ln > 1 && memNE(s, locinput, ln))
2246 sayNO;
2247 locinput += ln;
2248 nextchr = UCHARAT(locinput);
2249 break;
2250 case EXACTFL:
b8c5462f
JH
2251 PL_reg_flags |= RF_tainted;
2252 /* FALL THROUGH */
d6a28714 2253 case EXACTF:
cd439c50
IZ
2254 s = STRING(scan);
2255 ln = STR_LEN(scan);
d6a28714 2256
1aa99e6b 2257 if (do_utf8) {
d6a28714 2258 char *l = locinput;
1aa99e6b 2259 char *e;
a2a2844f
JH
2260 STRLEN ulen;
2261 U8 tmpbuf[UTF8_MAXLEN*2+1];
1aa99e6b 2262 e = s + ln;
d6a28714 2263 while (s < e) {
a2a2844f 2264 if (l >= PL_regeol)
d6a28714 2265 sayNO;
a2a2844f 2266 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
7114a2d2 2267 if (memNE(s, (char*)tmpbuf, ulen))
a2a2844f
JH
2268 sayNO;
2269 s += UTF8SKIP(s);
2270 l += ulen;
b8c5462f 2271 }
d6a28714 2272 locinput = l;
a0ed51b3
LW
2273 nextchr = UCHARAT(locinput);
2274 break;
2275 }
d6a28714
JH
2276
2277 /* Inline the first character, for speed. */
2278 if (UCHARAT(s) != nextchr &&
2279 UCHARAT(s) != ((OP(scan) == EXACTF)
2280 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2281 sayNO;
d6a28714 2282 if (PL_regeol - locinput < ln)
b8c5462f 2283 sayNO;
d6a28714
JH
2284 if (ln > 1 && (OP(scan) == EXACTF
2285 ? ibcmp(s, locinput, ln)
2286 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2287 sayNO;
d6a28714
JH
2288 locinput += ln;
2289 nextchr = UCHARAT(locinput);
a0d0e21e 2290 break;
d6a28714 2291 case ANYOF:
ffc61ed2
JH
2292 if (do_utf8) {
2293 if (!reginclass(scan, (U8*)locinput, do_utf8))
2294 sayNO;
2295 if (locinput >= PL_regeol)
2296 sayNO;
2297 locinput += PL_utf8skip[nextchr];
b8c5462f 2298 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2299 }
2300 else {
2301 if (nextchr < 0)
2302 nextchr = UCHARAT(locinput);
2303 if (!reginclass(scan, (U8*)locinput, do_utf8))
2304 sayNO;
2305 if (!nextchr && locinput >= PL_regeol)
2306 sayNO;
2307 nextchr = UCHARAT(++locinput);
2308 }
b8c5462f 2309 break;
d6a28714 2310 case ALNUML:
b8c5462f
JH
2311 PL_reg_flags |= RF_tainted;
2312 /* FALL THROUGH */
d6a28714 2313 case ALNUM:
b8c5462f 2314 if (!nextchr)
4633a7c4 2315 sayNO;
ffc61ed2 2316 if (do_utf8) {
ad24be35 2317 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2318 if (!(OP(scan) == ALNUM
3568d838 2319 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2320 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2321 {
2322 sayNO;
a0ed51b3 2323 }
b8c5462f 2324 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2325 nextchr = UCHARAT(locinput);
2326 break;
2327 }
ffc61ed2 2328 if (!(OP(scan) == ALNUM
d6a28714 2329 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2330 sayNO;
b8c5462f 2331 nextchr = UCHARAT(++locinput);
a0d0e21e 2332 break;
d6a28714 2333 case NALNUML:
b8c5462f
JH
2334 PL_reg_flags |= RF_tainted;
2335 /* FALL THROUGH */
d6a28714
JH
2336 case NALNUM:
2337 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2338 sayNO;
ffc61ed2 2339 if (do_utf8) {
8269fa76 2340 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2341 if (OP(scan) == NALNUM
3568d838 2342 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2343 : isALNUM_LC_utf8((U8*)locinput))
2344 {
b8c5462f 2345 sayNO;
d6a28714 2346 }
b8c5462f
JH
2347 locinput += PL_utf8skip[nextchr];
2348 nextchr = UCHARAT(locinput);
2349 break;
2350 }
ffc61ed2 2351 if (OP(scan) == NALNUM
d6a28714 2352 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2353 sayNO;
76e3520e 2354 nextchr = UCHARAT(++locinput);
a0d0e21e 2355 break;
d6a28714
JH
2356 case BOUNDL:
2357 case NBOUNDL:
3280af22 2358 PL_reg_flags |= RF_tainted;
bbce6d69 2359 /* FALL THROUGH */
d6a28714
JH
2360 case BOUND:
2361 case NBOUND:
2362 /* was last char in word? */
ffc61ed2 2363 if (do_utf8) {
12d33761
HS
2364 if (locinput == PL_bostr)
2365 ln = '\n';
ffc61ed2
JH
2366 else {
2367 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2368
2b9d42f0 2369 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2370 }
2371 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2372 ln = isALNUM_uni(ln);
8269fa76 2373 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2374 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2375 }
2376 else {
9041c2e3 2377 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2378 n = isALNUM_LC_utf8((U8*)locinput);
2379 }
a0ed51b3 2380 }
d6a28714 2381 else {
12d33761
HS
2382 ln = (locinput != PL_bostr) ?
2383 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2384 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2385 ln = isALNUM(ln);
2386 n = isALNUM(nextchr);
2387 }
2388 else {
2389 ln = isALNUM_LC(ln);
2390 n = isALNUM_LC(nextchr);
2391 }
d6a28714 2392 }
ffc61ed2
JH
2393 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2394 OP(scan) == BOUNDL))
2395 sayNO;
a0ed51b3 2396 break;
d6a28714 2397 case SPACEL:
3280af22 2398 PL_reg_flags |= RF_tainted;
bbce6d69 2399 /* FALL THROUGH */
d6a28714 2400 case SPACE:
9442cb0e 2401 if (!nextchr)
4633a7c4 2402 sayNO;
1aa99e6b 2403 if (do_utf8) {
fd400ab9 2404 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2405 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2406 if (!(OP(scan) == SPACE
3568d838 2407 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2408 : isSPACE_LC_utf8((U8*)locinput)))
2409 {
2410 sayNO;
2411 }
2412 locinput += PL_utf8skip[nextchr];
2413 nextchr = UCHARAT(locinput);
2414 break;
d6a28714 2415 }
ffc61ed2
JH
2416 if (!(OP(scan) == SPACE
2417 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2418 sayNO;
2419 nextchr = UCHARAT(++locinput);
2420 }
2421 else {
2422 if (!(OP(scan) == SPACE
2423 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2424 sayNO;
2425 nextchr = UCHARAT(++locinput);
a0ed51b3 2426 }
a0ed51b3 2427 break;
d6a28714 2428 case NSPACEL:
3280af22 2429 PL_reg_flags |= RF_tainted;
bbce6d69 2430 /* FALL THROUGH */
d6a28714 2431 case NSPACE:
9442cb0e 2432 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2433 sayNO;
1aa99e6b 2434 if (do_utf8) {
8269fa76 2435 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2436 if (OP(scan) == NSPACE
3568d838 2437 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2438 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2439 {
2440 sayNO;
2441 }
2442 locinput += PL_utf8skip[nextchr];
2443 nextchr = UCHARAT(locinput);
2444 break;
a0ed51b3 2445 }
ffc61ed2 2446 if (OP(scan) == NSPACE
d6a28714 2447 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2448 sayNO;
76e3520e 2449 nextchr = UCHARAT(++locinput);
a0d0e21e 2450 break;
d6a28714 2451 case DIGITL:
a0ed51b3
LW
2452 PL_reg_flags |= RF_tainted;
2453 /* FALL THROUGH */
d6a28714 2454 case DIGIT:
9442cb0e 2455 if (!nextchr)
a0ed51b3 2456 sayNO;
1aa99e6b 2457 if (do_utf8) {
8269fa76 2458 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2459 if (!(OP(scan) == DIGIT
3568d838 2460 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2461 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2462 {
a0ed51b3 2463 sayNO;
dfe13c55 2464 }
6f06b55f 2465 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2466 nextchr = UCHARAT(locinput);
2467 break;
2468 }
ffc61ed2 2469 if (!(OP(scan) == DIGIT
9442cb0e 2470 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2471 sayNO;
2472 nextchr = UCHARAT(++locinput);
2473 break;
d6a28714 2474 case NDIGITL:
b8c5462f
JH
2475 PL_reg_flags |= RF_tainted;
2476 /* FALL THROUGH */
d6a28714 2477 case NDIGIT:
9442cb0e 2478 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2479 sayNO;
1aa99e6b 2480 if (do_utf8) {
8269fa76 2481 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2482 if (OP(scan) == NDIGIT
3568d838 2483 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2484 : isDIGIT_LC_utf8((U8*)locinput))
2485 {
a0ed51b3 2486 sayNO;
9442cb0e 2487 }
6f06b55f 2488 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2489 nextchr = UCHARAT(locinput);
2490 break;
2491 }
ffc61ed2 2492 if (OP(scan) == NDIGIT
9442cb0e 2493 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2494 sayNO;
2495 nextchr = UCHARAT(++locinput);
2496 break;
2497 case CLUMP:
8269fa76 2498 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2499 if (locinput >= PL_regeol ||
2500 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2501 sayNO;
6f06b55f 2502 locinput += PL_utf8skip[nextchr];
3568d838
JH
2503 while (locinput < PL_regeol &&
2504 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2505 locinput += UTF8SKIP(locinput);
2506 if (locinput > PL_regeol)
2507 sayNO;
2508 nextchr = UCHARAT(locinput);
2509 break;
c8756f30 2510 case REFFL:
3280af22 2511 PL_reg_flags |= RF_tainted;
c8756f30 2512 /* FALL THROUGH */
c277df42 2513 case REF:
c8756f30 2514 case REFF:
c277df42 2515 n = ARG(scan); /* which paren pair */
cf93c79d 2516 ln = PL_regstartp[n];
2c2d71f5 2517 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2518 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2519 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2520 if (ln == PL_regendp[n])
a0d0e21e 2521 break;
a0ed51b3 2522
cf93c79d 2523 s = PL_bostr + ln;
1aa99e6b 2524 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2525 char *l = locinput;
cf93c79d 2526 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2527 /*
2528 * Note that we can't do the "other character" lookup trick as
2529 * in the 8-bit case (no pun intended) because in Unicode we
2530 * have to map both upper and title case to lower case.
2531 */
2532 if (OP(scan) == REFF) {
a2a2844f
JH
2533 STRLEN ulen1, ulen2;
2534 U8 tmpbuf1[UTF8_MAXLEN*2+1];
2535 U8 tmpbuf2[UTF8_MAXLEN*2+1];
a0ed51b3
LW
2536 while (s < e) {
2537 if (l >= PL_regeol)
2538 sayNO;
a2a2844f
JH
2539 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2540 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2541 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2542 sayNO;
a2a2844f
JH
2543 s += ulen1;
2544 l += ulen2;
a0ed51b3
LW
2545 }
2546 }
2547 locinput = l;
2548 nextchr = UCHARAT(locinput);
2549 break;
2550 }
2551
a0d0e21e 2552 /* Inline the first character, for speed. */
76e3520e 2553 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2554 (OP(scan) == REF ||
2555 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2556 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2557 sayNO;
cf93c79d 2558 ln = PL_regendp[n] - ln;
3280af22 2559 if (locinput + ln > PL_regeol)
4633a7c4 2560 sayNO;
c8756f30
AK
2561 if (ln > 1 && (OP(scan) == REF
2562 ? memNE(s, locinput, ln)
2563 : (OP(scan) == REFF
2564 ? ibcmp(s, locinput, ln)
2565 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2566 sayNO;
a0d0e21e 2567 locinput += ln;
76e3520e 2568 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2569 break;
2570
2571 case NOTHING:
c277df42 2572 case TAIL:
a0d0e21e
LW
2573 break;
2574 case BACK:
2575 break;
c277df42
IZ
2576 case EVAL:
2577 {
2578 dSP;
533c011a 2579 OP_4tree *oop = PL_op;
3280af22
NIS
2580 COP *ocurcop = PL_curcop;
2581 SV **ocurpad = PL_curpad;
c277df42 2582 SV *ret;
9041c2e3 2583
c277df42 2584 n = ARG(scan);
533c011a 2585 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2586 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2587 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2588 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2589
8e5e9ebe
RGS
2590 {
2591 SV **before = SP;
2592 CALLRUNOPS(aTHX); /* Scalar context. */
2593 SPAGAIN;
2594 if (SP == before)
2595 ret = Nullsv; /* protect against empty (?{}) blocks. */
2596 else {
2597 ret = POPs;
2598 PUTBACK;
2599 }
2600 }
2601
0f5d15d6
IZ
2602 PL_op = oop;
2603 PL_curpad = ocurpad;
2604 PL_curcop = ocurcop;
c277df42 2605 if (logical) {
0f5d15d6
IZ
2606 if (logical == 2) { /* Postponed subexpression. */
2607 regexp *re;
22c35a8c 2608 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2609 re_cc_state state;
0f5d15d6
IZ
2610 CHECKPOINT cp, lastcp;
2611
2612 if(SvROK(ret) || SvRMAGICAL(ret)) {
2613 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2614
2615 if(SvMAGICAL(sv))
14befaf4 2616 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2617 }
2618 if (mg) {
2619 re = (regexp *)mg->mg_obj;
df0003d4 2620 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2621 }
2622 else {
2623 STRLEN len;
2624 char *t = SvPV(ret, len);
2625 PMOP pm;
2626 char *oprecomp = PL_regprecomp;
2627 I32 osize = PL_regsize;
2628 I32 onpar = PL_regnpar;
2629
5fcd1c1b 2630 Zero(&pm, 1, PMOP);
cea2e8a9 2631 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2632 if (!(SvFLAGS(ret)
0f5d15d6 2633 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2634 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2635 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2636 PL_regprecomp = oprecomp;
2637 PL_regsize = osize;
2638 PL_regnpar = onpar;
2639 }
2640 DEBUG_r(
9041c2e3 2641 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2642 "Entering embedded `%s%.60s%s%s'\n",
2643 PL_colors[0],
2644 re->precomp,
2645 PL_colors[1],
2646 (strlen(re->precomp) > 60 ? "..." : ""))
2647 );
2648 state.node = next;
2649 state.prev = PL_reg_call_cc;
2650 state.cc = PL_regcc;
2651 state.re = PL_reg_re;
2652
2ab05381 2653 PL_regcc = 0;
9041c2e3 2654
0f5d15d6 2655 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2656 REGCP_SET(lastcp);
0f5d15d6
IZ
2657 cache_re(re);
2658 state.ss = PL_savestack_ix;
2659 *PL_reglastparen = 0;
a01268b5 2660 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2661 PL_reg_call_cc = &state;
2662 PL_reginput = locinput;
2c2d71f5
JH
2663
2664 /* XXXX This is too dramatic a measure... */
2665 PL_reg_maxiter = 0;
2666
0f5d15d6 2667 if (regmatch(re->program + 1)) {
2c914db6
IZ
2668 /* Even though we succeeded, we need to restore
2669 global variables, since we may be wrapped inside
2670 SUSPEND, thus the match may be not finished yet. */
2671
2672 /* XXXX Do this only if SUSPENDed? */
2673 PL_reg_call_cc = state.prev;
2674 PL_regcc = state.cc;
2675 PL_reg_re = state.re;
2676 cache_re(PL_reg_re);
2677
2678 /* XXXX This is too dramatic a measure... */
2679 PL_reg_maxiter = 0;
2680
2681 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2682 ReREFCNT_dec(re);
2683 regcpblow(cp);
2684 sayYES;
2685 }
0f5d15d6 2686 ReREFCNT_dec(re);
02db2b7b 2687 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2688 regcppop();
2689 PL_reg_call_cc = state.prev;
2690 PL_regcc = state.cc;
2691 PL_reg_re = state.re;
d3790889 2692 cache_re(PL_reg_re);
2c2d71f5
JH
2693
2694 /* XXXX This is too dramatic a measure... */
2695 PL_reg_maxiter = 0;
2696
8e514ae6 2697 logical = 0;
0f5d15d6
IZ
2698 sayNO;
2699 }
c277df42 2700 sw = SvTRUE(ret);
0f5d15d6 2701 logical = 0;
a0ed51b3
LW
2702 }
2703 else
3280af22 2704 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2705 break;
2706 }
a0d0e21e 2707 case OPEN:
c277df42 2708 n = ARG(scan); /* which paren pair */
3280af22
NIS
2709 PL_reg_start_tmp[n] = locinput;
2710 if (n > PL_regsize)
2711 PL_regsize = n;
a0d0e21e
LW
2712 break;
2713 case CLOSE:
c277df42 2714 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2715 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2716 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2717 if (n > *PL_reglastparen)
2718 *PL_reglastparen = n;
a01268b5 2719 *PL_reglastcloseparen = n;
a0d0e21e 2720 break;
c277df42
IZ
2721 case GROUPP:
2722 n = ARG(scan); /* which paren pair */
cf93c79d 2723 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2724 break;
2725 case IFTHEN:
2c2d71f5 2726 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2727 if (sw)
2728 next = NEXTOPER(NEXTOPER(scan));
2729 else {
2730 next = scan + ARG(scan);
2731 if (OP(next) == IFTHEN) /* Fake one. */
2732 next = NEXTOPER(NEXTOPER(next));
2733 }
2734 break;
2735 case LOGICAL:
0f5d15d6 2736 logical = scan->flags;
c277df42 2737 break;
2ab05381
IZ
2738/*******************************************************************
2739 PL_regcc contains infoblock about the innermost (...)* loop, and
2740 a pointer to the next outer infoblock.
2741
2742 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2743
2744 1) After matching X, regnode for CURLYX is processed;
2745
9041c2e3 2746 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2747 regmatch() recursively with the starting point at WHILEM node;
2748
2749 3) Each hit of WHILEM node tries to match A and Z (in the order
2750 depending on the current iteration, min/max of {min,max} and
2751 greediness). The information about where are nodes for "A"
2752 and "Z" is read from the infoblock, as is info on how many times "A"
2753 was already matched, and greediness.
2754
2755 4) After A matches, the same WHILEM node is hit again.
2756
2757 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2758 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2759 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2760 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2761 of the external loop.
2762
2763 Currently present infoblocks form a tree with a stem formed by PL_curcc
2764 and whatever it mentions via ->next, and additional attached trees
2765 corresponding to temporarily unset infoblocks as in "5" above.
2766
9041c2e3 2767 In the following picture infoblocks for outer loop of
2ab05381
IZ
2768 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2769 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2770 infoblocks are drawn below the "reset" infoblock.
2771
2772 In fact in the picture below we do not show failed matches for Z and T
2773 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2774 more obvious *why* one needs to *temporary* unset infoblocks.]
2775
2776 Matched REx position InfoBlocks Comment
2777 (Y(A)*?Z)*?T x
2778 Y(A)*?Z)*?T x <- O
2779 Y (A)*?Z)*?T x <- O
2780 Y A)*?Z)*?T x <- O <- I
2781 YA )*?Z)*?T x <- O <- I
2782 YA A)*?Z)*?T x <- O <- I
2783 YAA )*?Z)*?T x <- O <- I
2784 YAA Z)*?T x <- O # Temporary unset I
2785 I
2786
2787 YAAZ Y(A)*?Z)*?T x <- O
2788 I
2789
2790 YAAZY (A)*?Z)*?T x <- O
2791 I
2792
2793 YAAZY A)*?Z)*?T x <- O <- I
2794 I
2795
2796 YAAZYA )*?Z)*?T x <- O <- I
2797 I
2798
2799 YAAZYA Z)*?T x <- O # Temporary unset I
2800 I,I
2801
2802 YAAZYAZ )*?T x <- O
2803 I,I
2804
2805 YAAZYAZ T x # Temporary unset O
2806 O
2807 I,I
2808
2809 YAAZYAZT x
2810 O
2811 I,I
2812 *******************************************************************/
a0d0e21e
LW
2813 case CURLYX: {
2814 CURCUR cc;
3280af22 2815 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2816 /* No need to save/restore up to this paren */
2817 I32 parenfloor = scan->flags;
c277df42
IZ
2818
2819 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2820 next += ARG(next);
3280af22
NIS
2821 cc.oldcc = PL_regcc;
2822 PL_regcc = &cc;
cb434fcc
IZ
2823 /* XXXX Probably it is better to teach regpush to support
2824 parenfloor > PL_regsize... */
2825 if (parenfloor > *PL_reglastparen)
2826 parenfloor = *PL_reglastparen; /* Pessimization... */
2827 cc.parenfloor = parenfloor;
a0d0e21e
LW
2828 cc.cur = -1;
2829 cc.min = ARG1(scan);
2830 cc.max = ARG2(scan);
c277df42 2831 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2832 cc.next = next;
2833 cc.minmod = minmod;
2834 cc.lastloc = 0;
3280af22 2835 PL_reginput = locinput;
a0d0e21e
LW
2836 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2837 regcpblow(cp);
3280af22 2838 PL_regcc = cc.oldcc;
4633a7c4 2839 saySAME(n);
a0d0e21e
LW
2840 }
2841 /* NOT REACHED */
2842 case WHILEM: {
2843 /*
2844 * This is really hard to understand, because after we match
2845 * what we're trying to match, we must make sure the rest of
2c2d71f5 2846 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2847 * to go back UP the parse tree by recursing ever deeper. And
2848 * if it fails, we have to reset our parent's current state
2849 * that we can try again after backing off.
2850 */
2851
c277df42 2852 CHECKPOINT cp, lastcp;
3280af22 2853 CURCUR* cc = PL_regcc;
c277df42
IZ
2854 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2855
4633a7c4 2856 n = cc->cur + 1; /* how many we know we matched */
3280af22 2857 PL_reginput = locinput;
a0d0e21e 2858
c277df42 2859 DEBUG_r(
9041c2e3
NIS
2860 PerlIO_printf(Perl_debug_log,
2861 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2862 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2863 (long)n, (long)cc->min,
c277df42
IZ
2864 (long)cc->max, (long)cc)
2865 );
4633a7c4 2866
a0d0e21e
LW
2867 /* If degenerate scan matches "", assume scan done. */
2868
579cf2c3 2869 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2870 PL_regcc = cc->oldcc;
2ab05381
IZ
2871 if (PL_regcc)
2872 ln = PL_regcc->cur;
c277df42 2873 DEBUG_r(
c3464db5
DD
2874 PerlIO_printf(Perl_debug_log,
2875 "%*s empty match detected, try continuation...\n",
3280af22 2876 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2877 );
a0d0e21e 2878 if (regmatch(cc->next))
4633a7c4 2879 sayYES;
2ab05381
IZ
2880 if (PL_regcc)
2881 PL_regcc->cur = ln;
3280af22 2882 PL_regcc = cc;
4633a7c4 2883 sayNO;
a0d0e21e
LW
2884 }
2885
2886 /* First just match a string of min scans. */
2887
2888 if (n < cc->min) {
2889 cc->cur = n;
2890 cc->lastloc = locinput;
4633a7c4
LW
2891 if (regmatch(cc->scan))
2892 sayYES;
2893 cc->cur = n - 1;
c277df42 2894 cc->lastloc = lastloc;
4633a7c4 2895 sayNO;
a0d0e21e
LW
2896 }
2897
2c2d71f5
JH
2898 if (scan->flags) {
2899 /* Check whether we already were at this position.
2900 Postpone detection until we know the match is not
2901 *that* much linear. */
2902 if (!PL_reg_maxiter) {
2903 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2904 PL_reg_leftiter = PL_reg_maxiter;
2905 }
2906 if (PL_reg_leftiter-- == 0) {
2907 I32 size = (PL_reg_maxiter + 7)/8;
2908 if (PL_reg_poscache) {
2909 if (PL_reg_poscache_size < size) {
2910 Renew(PL_reg_poscache, size, char);
2911 PL_reg_poscache_size = size;
2912 }
2913 Zero(PL_reg_poscache, size, char);
2914 }
2915 else {
2916 PL_reg_poscache_size = size;
2917 Newz(29, PL_reg_poscache, size, char);
2918 }
2919 DEBUG_r(
2920 PerlIO_printf(Perl_debug_log,
2921 "%sDetected a super-linear match, switching on caching%s...\n",
2922 PL_colors[4], PL_colors[5])
2923 );
2924 }
2925 if (PL_reg_leftiter < 0) {
2926 I32 o = locinput - PL_bostr, b;
2927
2928 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2929 b = o % 8;
2930 o /= 8;
2931 if (PL_reg_poscache[o] & (1<<b)) {
2932 DEBUG_r(
2933 PerlIO_printf(Perl_debug_log,
2934 "%*s already tried at this position...\n",
2935 REPORT_CODE_OFF+PL_regindent*2, "")
2936 );
7821416a 2937 sayNO_SILENT;
2c2d71f5
JH
2938 }
2939 PL_reg_poscache[o] |= (1<<b);
2940 }
2941 }
2942
a0d0e21e
LW
2943 /* Prefer next over scan for minimal matching. */
2944
2945 if (cc->minmod) {
3280af22 2946 PL_regcc = cc->oldcc;
2ab05381
IZ
2947 if (PL_regcc)
2948 ln = PL_regcc->cur;
5f05dabc 2949 cp = regcppush(cc->parenfloor);
02db2b7b 2950 REGCP_SET(lastcp);
5f05dabc 2951 if (regmatch(cc->next)) {
c277df42 2952 regcpblow(cp);
4633a7c4 2953 sayYES; /* All done. */
5f05dabc 2954 }
02db2b7b 2955 REGCP_UNWIND(lastcp);
5f05dabc 2956 regcppop();
2ab05381
IZ
2957 if (PL_regcc)
2958 PL_regcc->cur = ln;
3280af22 2959 PL_regcc = cc;
a0d0e21e 2960
c277df42 2961 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2962 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2963 && !(PL_reg_flags & RF_warned)) {
2964 PL_reg_flags |= RF_warned;
e476b1b5 2965 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2966 "Complex regular subexpression recursion",
2967 REG_INFTY - 1);
c277df42 2968 }
4633a7c4 2969 sayNO;
c277df42 2970 }
a687059c 2971
c277df42 2972 DEBUG_r(
c3464db5
DD
2973 PerlIO_printf(Perl_debug_log,
2974 "%*s trying longer...\n",
3280af22 2975 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2976 );
a0d0e21e 2977 /* Try scanning more and see if it helps. */
3280af22 2978 PL_reginput = locinput;
a0d0e21e
LW
2979 cc->cur = n;
2980 cc->lastloc = locinput;
5f05dabc 2981 cp = regcppush(cc->parenfloor);
02db2b7b 2982 REGCP_SET(lastcp);
5f05dabc 2983 if (regmatch(cc->scan)) {
c277df42 2984 regcpblow(cp);
4633a7c4 2985 sayYES;
5f05dabc 2986 }
02db2b7b 2987 REGCP_UNWIND(lastcp);
5f05dabc 2988 regcppop();
4633a7c4 2989 cc->cur = n - 1;
c277df42 2990 cc->lastloc = lastloc;
4633a7c4 2991 sayNO;
a0d0e21e
LW
2992 }
2993
2994 /* Prefer scan over next for maximal matching. */
2995
2996 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2997 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2998 cc->cur = n;
2999 cc->lastloc = locinput;
02db2b7b 3000 REGCP_SET(lastcp);
5f05dabc 3001 if (regmatch(cc->scan)) {
c277df42 3002 regcpblow(cp);
4633a7c4 3003 sayYES;
5f05dabc 3004 }
02db2b7b 3005 REGCP_UNWIND(lastcp);
a0d0e21e 3006 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3007 PL_reginput = locinput;
c277df42 3008 DEBUG_r(
c3464db5
DD
3009 PerlIO_printf(Perl_debug_log,
3010 "%*s failed, try continuation...\n",
3280af22 3011 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3012 );
3013 }
9041c2e3 3014 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3015 && !(PL_reg_flags & RF_warned)) {
3280af22 3016 PL_reg_flags |= RF_warned;
e476b1b5 3017 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3018 "Complex regular subexpression recursion",
3019 REG_INFTY - 1);
a0d0e21e
LW
3020 }
3021
3022 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3023 PL_regcc = cc->oldcc;
2ab05381
IZ
3024 if (PL_regcc)
3025 ln = PL_regcc->cur;
a0d0e21e 3026 if (regmatch(cc->next))
4633a7c4 3027 sayYES;
2ab05381
IZ
3028 if (PL_regcc)
3029 PL_regcc->cur = ln;
3280af22 3030 PL_regcc = cc;
4633a7c4 3031 cc->cur = n - 1;
c277df42 3032 cc->lastloc = lastloc;
4633a7c4 3033 sayNO;
a0d0e21e
LW
3034 }
3035 /* NOT REACHED */
9041c2e3 3036 case BRANCHJ:
c277df42
IZ
3037 next = scan + ARG(scan);
3038 if (next == scan)
3039 next = NULL;
3040 inner = NEXTOPER(NEXTOPER(scan));
3041 goto do_branch;
9041c2e3 3042 case BRANCH:
c277df42
IZ
3043 inner = NEXTOPER(scan);
3044 do_branch:
3045 {
c277df42
IZ
3046 c1 = OP(scan);
3047 if (OP(next) != c1) /* No choice. */
3048 next = inner; /* Avoid recursion. */
a0d0e21e 3049 else {
02db2b7b
IZ
3050 I32 lastparen = *PL_reglastparen;
3051 I32 unwind1;
3052 re_unwind_branch_t *uw;
3053
3054 /* Put unwinding data on stack */
3055 unwind1 = SSNEWt(1,re_unwind_branch_t);
3056 uw = SSPTRt(unwind1,re_unwind_branch_t);
3057 uw->prev = unwind;
3058 unwind = unwind1;
3059 uw->type = ((c1 == BRANCH)
3060 ? RE_UNWIND_BRANCH
3061 : RE_UNWIND_BRANCHJ);
3062 uw->lastparen = lastparen;
3063 uw->next = next;
3064 uw->locinput = locinput;
3065 uw->nextchr = nextchr;
3066#ifdef DEBUGGING
3067 uw->regindent = ++PL_regindent;
3068#endif
c277df42 3069
02db2b7b
IZ
3070 REGCP_SET(uw->lastcp);
3071
3072 /* Now go into the first branch */
3073 next = inner;
a687059c 3074 }
a0d0e21e
LW
3075 }
3076 break;
3077 case MINMOD:
3078 minmod = 1;
3079 break;
c277df42
IZ
3080 case CURLYM:
3081 {
00db4c45 3082 I32 l = 0;
c277df42 3083 CHECKPOINT lastcp;
9041c2e3 3084
c277df42
IZ
3085 /* We suppose that the next guy does not need
3086 backtracking: in particular, it is of constant length,
3087 and has no parenths to influence future backrefs. */
3088 ln = ARG1(scan); /* min to match */
3089 n = ARG2(scan); /* max to match */
c277df42
IZ
3090 paren = scan->flags;
3091 if (paren) {
3280af22
NIS
3092 if (paren > PL_regsize)
3093 PL_regsize = paren;
3094 if (paren > *PL_reglastparen)
3095 *PL_reglastparen = paren;
c277df42 3096 }
dc45a647 3097 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3098 if (paren)
3099 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3100 PL_reginput = locinput;
c277df42
IZ
3101 if (minmod) {
3102 minmod = 0;
3103 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3104 sayNO;
f31a99c8
HS
3105 /* if we matched something zero-length we don't need to
3106 backtrack - capturing parens are already defined, so
3107 the caveat in the maximal case doesn't apply
3108
3109 XXXX if ln == 0, we can redo this check first time
3110 through the following loop
3111 */
3112 if (ln && l == 0)
3113 n = ln; /* don't backtrack */
3280af22 3114 locinput = PL_reginput;
e2d8ce26 3115 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3116 regnode *text_node = next;
3117
3118 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3119 NEXT_IMPT(text_node);
5f80c4cf
JP
3120
3121 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3122 c1 = c2 = -1000;
3123 }
3124 else {
3125 c1 = (U8)*STRING(text_node);
3126 if (OP(next) == EXACTF)
3127 c2 = PL_fold[c1];
3128 else if (OP(text_node) == EXACTFL)
3129 c2 = PL_fold_locale[c1];
3130 else
3131 c2 = c1;
3132 }
a0ed51b3
LW
3133 }
3134 else
c277df42 3135 c1 = c2 = -1000;
02db2b7b 3136 REGCP_SET(lastcp);
5f4b28b2 3137 /* This may be improved if l == 0. */
c277df42
IZ
3138 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3139 /* If it could work, try it. */
3140 if (c1 == -1000 ||
3280af22
NIS
3141 UCHARAT(PL_reginput) == c1 ||
3142 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3143 {
3144 if (paren) {
f31a99c8 3145 if (ln) {
cf93c79d
IZ
3146 PL_regstartp[paren] =
3147 HOPc(PL_reginput, -l) - PL_bostr;
3148 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3149 }
3150 else
cf93c79d 3151 PL_regendp[paren] = -1;
c277df42
IZ
3152 }
3153 if (regmatch(next))
3154 sayYES;
02db2b7b 3155 REGCP_UNWIND(lastcp);
c277df42
IZ
3156 }
3157 /* Couldn't or didn't -- move forward. */
3280af22 3158 PL_reginput = locinput;
c277df42
IZ
3159 if (regrepeat_hard(scan, 1, &l)) {
3160 ln++;
3280af22 3161 locinput = PL_reginput;
c277df42
IZ
3162 }
3163 else
3164 sayNO;
3165 }
a0ed51b3
LW
3166 }
3167 else {
c277df42 3168 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3169 /* if we matched something zero-length we don't need to
3170 backtrack, unless the minimum count is zero and we
3171 are capturing the result - in that case the capture
3172 being defined or not may affect later execution
3173 */
3174 if (n != 0 && l == 0 && !(paren && ln == 0))
3175 ln = n; /* don't backtrack */
3280af22 3176 locinput = PL_reginput;
c277df42 3177 DEBUG_r(
5c0ca799 3178 PerlIO_printf(Perl_debug_log,
faccc32b 3179 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3180 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3181 (IV) n, (IV)l)
c277df42
IZ
3182 );
3183 if (n >= ln) {
e2d8ce26 3184 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3185 regnode *text_node = next;
3186
3187 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3188 NEXT_IMPT(text_node);
5f80c4cf
JP
3189
3190 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3191 c1 = c2 = -1000;
3192 }
3193 else {
3194 c1 = (U8)*STRING(text_node);
3195 if (OP(text_node) == EXACTF)
3196 c2 = PL_fold[c1];
3197 else if (OP(text_node) == EXACTFL)
3198 c2 = PL_fold_locale[c1];
3199 else
3200 c2 = c1;
3201 }
a0ed51b3
LW
3202 }
3203 else
c277df42
IZ
3204 c1 = c2 = -1000;
3205 }
02db2b7b 3206 REGCP_SET(lastcp);
c277df42
IZ
3207 while (n >= ln) {
3208 /* If it could work, try it. */
3209 if (c1 == -1000 ||
3280af22
NIS
3210 UCHARAT(PL_reginput) == c1 ||
3211 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3212 {
3213 DEBUG_r(
c3464db5 3214 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3215 "%*s trying tail with n=%"IVdf"...\n",
3216 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3217 );
3218 if (paren) {
3219 if (n) {
cf93c79d
IZ
3220 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3221 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3222 }
a0ed51b3 3223 else
cf93c79d 3224 PL_regendp[paren] = -1;
c277df42 3225 }
a0ed51b3
LW
3226 if (regmatch(next))
3227 sayYES;
02db2b7b 3228 REGCP_UNWIND(lastcp);
a0ed51b3 3229 }
c277df42
IZ
3230 /* Couldn't or didn't -- back up. */
3231 n--;
dfe13c55 3232 locinput = HOPc(locinput, -l);
3280af22 3233 PL_reginput = locinput;
c277df42
IZ
3234 }
3235 }
3236 sayNO;
3237 break;
3238 }
3239 case CURLYN:
3240 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3241 if (paren > PL_regsize)
3242 PL_regsize = paren;
3243 if (paren > *PL_reglastparen)
3244 *PL_reglastparen = paren;
c277df42
IZ
3245 ln = ARG1(scan); /* min to match */
3246 n = ARG2(scan); /* max to match */
dc45a647 3247 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3248 goto repeat;
a0d0e21e 3249 case CURLY:
c277df42 3250 paren = 0;
a0d0e21e
LW
3251 ln = ARG1(scan); /* min to match */
3252 n = ARG2(scan); /* max to match */
dc45a647 3253 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3254 goto repeat;
3255 case STAR:
3256 ln = 0;
c277df42 3257 n = REG_INFTY;
a0d0e21e 3258 scan = NEXTOPER(scan);
c277df42 3259 paren = 0;
a0d0e21e
LW
3260 goto repeat;
3261 case PLUS:
c277df42
IZ
3262 ln = 1;
3263 n = REG_INFTY;
3264 scan = NEXTOPER(scan);
3265 paren = 0;
3266 repeat:
a0d0e21e
LW
3267 /*
3268 * Lookahead to avoid useless match attempts
3269 * when we know what character comes next.
3270 */
5f80c4cf
JP
3271
3272 /*
3273 * Used to only do .*x and .*?x, but now it allows
3274 * for )'s, ('s and (?{ ... })'s to be in the way
3275 * of the quantifier and the EXACT-like node. -- japhy
3276 */
3277
e2d8ce26 3278 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3279 U8 *s;
3280 regnode *text_node = next;
3281
3282 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3283 NEXT_IMPT(text_node);
5f80c4cf
JP
3284
3285 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3286 c1 = c2 = -1000;
3287 }
3288 else {
3289 s = (U8*)STRING(text_node);
3290
3291 if (!UTF) {
3292 c2 = c1 = *s;
3293 if (OP(text_node) == EXACTF)
3294 c2 = PL_fold[c1];
3295 else if (OP(text_node) == EXACTFL)
3296 c2 = PL_fold_locale[c1];
1aa99e6b 3297 }
5f80c4cf
JP
3298 else { /* UTF */
3299 if (OP(text_node) == EXACTF) {
a2a2844f
JH
3300 STRLEN ulen1, ulen2;
3301 U8 tmpbuf1[UTF8_MAXLEN*2+1];
3302 U8 tmpbuf2[UTF8_MAXLEN*2+1];
3303
3304 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3305 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3306
3307 c1 = utf8_to_uvuni(tmpbuf1, 0);
3308 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3309 }
3310 else {
3311 c2 = c1 = utf8_to_uvchr(s, NULL);
3312 }
1aa99e6b
IH
3313 }
3314 }
bbce6d69 3315 }
a0d0e21e 3316 else
bbce6d69 3317 c1 = c2 = -1000;
3280af22 3318 PL_reginput = locinput;
a0d0e21e 3319 if (minmod) {
c277df42 3320 CHECKPOINT lastcp;
a0d0e21e
LW
3321 minmod = 0;
3322 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3323 sayNO;
a0ed51b3 3324 locinput = PL_reginput;
02db2b7b 3325 REGCP_SET(lastcp);
0fe9bf95 3326 if (c1 != -1000) {
1aa99e6b 3327 char *e; /* Should not check after this */
0fe9bf95
IZ
3328 char *old = locinput;
3329
1aa99e6b 3330 if (n == REG_INFTY) {
0fe9bf95 3331 e = PL_regeol - 1;
1aa99e6b
IH
3332 if (do_utf8)
3333 while (UTF8_IS_CONTINUATION(*(U8*)e))
3334 e--;
3335 }
3336 else if (do_utf8) {
3337 int m = n - ln;
3338 for (e = locinput;
3339 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3340 e += UTF8SKIP(e);
3341 }
3342 else {
3343 e = locinput + n - ln;
3344 if (e >= PL_regeol)
3345 e = PL_regeol - 1;
3346 }
0fe9bf95 3347 while (1) {
1aa99e6b 3348 int count;
0fe9bf95 3349 /* Find place 'next' could work */
1aa99e6b
IH
3350 if (!do_utf8) {
3351 if (c1 == c2) {
a8e8ab15
JH
3352 while (locinput <= e &&
3353 UCHARAT(locinput) != c1)
1aa99e6b
IH
3354 locinput++;
3355 } else {
9041c2e3 3356 while (locinput <= e
a8e8ab15
JH
3357 && UCHARAT(locinput) != c1
3358 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3359 locinput++;
3360 }
3361 count = locinput - old;
3362 }
3363 else {
3364 STRLEN len;
3365 if (c1 == c2) {
3366 for (count = 0;
3367 locinput <= e &&
9041c2e3 3368 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3369 count++)
3370 locinput += len;
3371
3372 } else {
3373 for (count = 0; locinput <= e; count++) {
9041c2e3 3374 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3375 if (c == c1 || c == c2)
3376 break;
9041c2e3 3377 locinput += len;
1aa99e6b
IH
3378 }
3379 }
0fe9bf95 3380 }
9041c2e3 3381 if (locinput > e)
0fe9bf95
IZ
3382 sayNO;
3383 /* PL_reginput == old now */
3384 if (locinput != old) {
3385 ln = 1; /* Did some */
1aa99e6b 3386 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3387 sayNO;
3388 }
3389 /* PL_reginput == locinput now */
29d1e993 3390 TRYPAREN(paren, ln, locinput);
0fe9bf95 3391 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3392 REGCP_UNWIND(lastcp);
0fe9bf95 3393 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3394 old = locinput;
3395 if (do_utf8)
3396 locinput += UTF8SKIP(locinput);
3397 else
3398 locinput++;
0fe9bf95
IZ
3399 }
3400 }
3401 else
c277df42 3402 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3403 UV c;
3404 if (c1 != -1000) {
3405 if (do_utf8)
9041c2e3 3406 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3407 else
9041c2e3 3408 c = UCHARAT(PL_reginput);
2390ecbc
PP
3409 /* If it could work, try it. */
3410 if (c == c1 || c == c2)
3411 {
3412 TRYPAREN(paren, n, PL_reginput);
3413 REGCP_UNWIND(lastcp);
3414 }
1aa99e6b 3415 }
a0d0e21e 3416 /* If it could work, try it. */
2390ecbc 3417 else if (c1 == -1000)
bbce6d69 3418 {
29d1e993 3419 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3420 REGCP_UNWIND(lastcp);
bbce6d69 3421 }
c277df42 3422 /* Couldn't or didn't -- move forward. */
a0ed51b3 3423 PL_reginput = locinput;
a0d0e21e
LW
3424 if (regrepeat(scan, 1)) {
3425 ln++;
a0ed51b3
LW
3426 locinput = PL_reginput;
3427 }
3428 else
4633a7c4 3429 sayNO;
a0d0e21e
LW
3430 }
3431 }
3432 else {
c277df42 3433 CHECKPOINT lastcp;
a0d0e21e 3434 n = regrepeat(scan, n);
a0ed51b3 3435 locinput = PL_reginput;
22c35a8c 3436 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3437 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3438 ln = n; /* why back off? */
1aeab75a
GS
3439 /* ...because $ and \Z can match before *and* after
3440 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3441 We should back off by one in this case. */
3442 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3443 ln--;
3444 }
02db2b7b 3445 REGCP_SET(lastcp);
c277df42 3446 if (paren) {
8fa7f367 3447 UV c = 0;
c277df42 3448 while (n >= ln) {
1aa99e6b
IH
3449 if (c1 != -1000) {
3450 if (do_utf8)
9041c2e3 3451 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3452 else
9041c2e3 3453 c = UCHARAT(PL_reginput);
1aa99e6b 3454 }
c277df42 3455 /* If it could work, try it. */
1aa99e6b 3456 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3457 {
29d1e993 3458 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3459 REGCP_UNWIND(lastcp);
c277df42
IZ
3460 }
3461 /* Couldn't or didn't -- back up. */
3462 n--;
dfe13c55 3463 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3464 }
a0ed51b3
LW
3465 }
3466 else {
8fa7f367 3467 UV c = 0;
c277df42 3468 while (n >= ln) {
1aa99e6b
IH
3469 if (c1 != -1000) {
3470 if (do_utf8)
9041c2e3 3471 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3472 else
9041c2e3 3473 c = UCHARAT(PL_reginput);
1aa99e6b 3474 }
c277df42 3475 /* If it could work, try it. */
1aa99e6b 3476 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3477 {
29d1e993 3478 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3479 REGCP_UNWIND(lastcp);
c277df42
IZ
3480 }
3481 /* Couldn't or didn't -- back up. */
3482 n--;
dfe13c55 3483 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3484 }
a0d0e21e
LW
3485 }
3486 }
4633a7c4 3487 sayNO;
c277df42 3488 break;
a0d0e21e 3489 case END:
0f5d15d6
IZ
3490 if (PL_reg_call_cc) {
3491 re_cc_state *cur_call_cc = PL_reg_call_cc;
3492 CURCUR *cctmp = PL_regcc;
3493 regexp *re = PL_reg_re;
3494 CHECKPOINT cp, lastcp;
3495
3496 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3497 REGCP_SET(lastcp);
0f5d15d6
IZ
3498 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3499 the caller. */
3500 PL_reginput = locinput; /* Make position available to
3501 the callcc. */
3502 cache_re(PL_reg_call_cc->re);
3503 PL_regcc = PL_reg_call_cc->cc;
3504 PL_reg_call_cc = PL_reg_call_cc->prev;
3505 if (regmatch(cur_call_cc->node)) {
3506 PL_reg_call_cc = cur_call_cc;
3507 regcpblow(cp);
3508 sayYES;
3509 }
02db2b7b 3510 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3511 regcppop();
3512 PL_reg_call_cc = cur_call_cc;
3513 PL_regcc = cctmp;
3514 PL_reg_re = re;
3515 cache_re(re);
3516
3517 DEBUG_r(
3518 PerlIO_printf(Perl_debug_log,
3519 "%*s continuation failed...\n",
3520 REPORT_CODE_OFF+PL_regindent*2, "")
3521 );
7821416a 3522 sayNO_SILENT;
0f5d15d6 3523 }
7821416a
IZ
3524 if (locinput < PL_regtill) {
3525 DEBUG_r(PerlIO_printf(Perl_debug_log,
3526 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3527 PL_colors[4],
3528 (long)(locinput - PL_reg_starttry),
3529 (long)(PL_regtill - PL_reg_starttry),
3530 PL_colors[5]));
3531 sayNO_FINAL; /* Cannot match: too short. */
3532 }
3533 PL_reginput = locinput; /* put where regtry can find it */
3534 sayYES_FINAL; /* Success! */
7e5428c5 3535 case SUCCEED:
3280af22 3536 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3537 sayYES_LOUD; /* Success! */
c277df42
IZ
3538 case SUSPEND:
3539 n = 1;
9fe1d20c 3540 PL_reginput = locinput;
9041c2e3 3541 goto do_ifmatch;
a0d0e21e 3542 case UNLESSM:
c277df42 3543 n = 0;
a0ed51b3 3544 if (scan->flags) {
efb30f32
HS
3545 s = HOPBACKc(locinput, scan->flags);
3546 if (!s)
3547 goto say_yes;
3548 PL_reginput = s;
a0ed51b3
LW
3549 }
3550 else
3551 PL_reginput = locinput;
c277df42
IZ
3552 goto do_ifmatch;
3553 case IFMATCH:
3554 n = 1;
a0ed51b3 3555 if (scan->flags) {
efb30f32
HS
3556 s = HOPBACKc(locinput, scan->flags);
3557 if (!s)
3558 goto say_no;
3559 PL_reginput = s;
a0ed51b3
LW
3560 }
3561 else
3562 PL_reginput = locinput;
3563
c277df42 3564 do_ifmatch:
c277df42
IZ
3565 inner = NEXTOPER(NEXTOPER(scan));
3566 if (regmatch(inner) != n) {
3567 say_no:
3568 if (logical) {
3569 logical = 0;
3570 sw = 0;
3571 goto do_longjump;
a0ed51b3
LW
3572 }
3573 else
c277df42
IZ
3574 sayNO;
3575 }
3576 say_yes:
3577 if (logical) {
3578 logical = 0;
3579 sw = 1;
3580 }
fe44a5e8 3581 if (OP(scan) == SUSPEND) {
3280af22 3582 locinput = PL_reginput;
565764a8 3583 nextchr = UCHARAT(locinput);
fe44a5e8 3584 }
c277df42
IZ
3585 /* FALL THROUGH. */
3586 case LONGJMP:
3587 do_longjump:
3588 next = scan + ARG(scan);
3589 if (next == scan)
3590 next = NULL;
a0d0e21e
LW
3591 break;
3592 default:
b900a521 3593 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3594 PTR2UV(scan), OP(scan));
cea2e8a9 3595 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3596 }
02db2b7b 3597 reenter:
a0d0e21e
LW
3598 scan = next;
3599 }
a687059c 3600
a0d0e21e
LW
3601 /*
3602 * We get here only if there's trouble -- normally "case END" is
3603 * the terminating point.
3604 */
cea2e8a9 3605 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3606 /*NOTREACHED*/
4633a7c4
LW
3607 sayNO;
3608
7821416a
IZ
3609yes_loud:
3610 DEBUG_r(
3611 PerlIO_printf(Perl_debug_log,
3612 "%*s %scould match...%s\n",
3613 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3614 );
3615 goto yes;
3616yes_final:
3617 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3618 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3619yes:
3620#ifdef DEBUGGING
3280af22 3621 PL_regindent--;
4633a7c4 3622#endif
02db2b7b
IZ
3623
3624#if 0 /* Breaks $^R */
3625 if (unwind)
3626 regcpblow(firstcp);
3627#endif
4633a7c4
LW
3628 return 1;
3629
3630no:
7821416a
IZ
3631 DEBUG_r(
3632 PerlIO_printf(Perl_debug_log,
3633 "%*s %sfailed...%s\n",
3634 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3635 );
3636 goto do_no;
3637no_final:
3638do_no:
02db2b7b
IZ
3639 if (unwind) {
3640 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3641
3642 switch (uw->type) {
3643 case RE_UNWIND_BRANCH:
3644 case RE_UNWIND_BRANCHJ:
3645 {
3646 re_unwind_branch_t *uwb = &(uw->branch);
3647 I32 lastparen = uwb->lastparen;
9041c2e3 3648
02db2b7b
IZ
3649 REGCP_UNWIND(uwb->lastcp);
3650 for (n = *PL_reglastparen; n > lastparen; n--)
3651 PL_regendp[n] = -1;
3652 *PL_reglastparen = n;
3653 scan = next = uwb->next;
9041c2e3
NIS
3654 if ( !scan ||
3655 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3656 ? BRANCH : BRANCHJ) ) { /* Failure */
3657 unwind = uwb->prev;
3658#ifdef DEBUGGING
3659 PL_regindent--;
3660#endif
3661 goto do_no;
3662 }
3663 /* Have more choice yet. Reuse the same uwb. */
3664 /*SUPPRESS 560*/
3665 if ((n = (uwb->type == RE_UNWIND_BRANCH
3666 ? NEXT_OFF(next) : ARG(next))))
3667 next += n;
3668 else
3669 next = NULL; /* XXXX Needn't unwinding in this case... */
3670 uwb->next = next;
3671 next = NEXTOPER(scan);
3672 if (uwb->type == RE_UNWIND_BRANCHJ)
3673 next = NEXTOPER(next);
3674 locinput = uwb->locinput;
3675 nextchr = uwb->nextchr;
3676#ifdef DEBUGGING
3677 PL_regindent = uwb->regindent;
3678#endif
3679
3680 goto reenter;
3681 }
3682 /* NOT REACHED */
3683 default:
3684 Perl_croak(aTHX_ "regexp unwind memory corruption");
3685 }
3686 /* NOT REACHED */
3687 }
4633a7c4 3688#ifdef DEBUGGING
3280af22 3689 PL_regindent--;
4633a7c4 3690#endif
a0d0e21e 3691 return 0;
a687059c
LW
3692}
3693
3694/*
3695 - regrepeat - repeatedly match something simple, report how many
3696 */
3697/*
3698 * [This routine now assumes that it will only match on things of length 1.
3699 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3700 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3701 */
76e3520e 3702STATIC I32
cea2e8a9 3703S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3704{
a0d0e21e 3705 register char *scan;
a0d0e21e 3706 register I32 c;
3280af22 3707 register char *loceol = PL_regeol;
a0ed51b3 3708 register I32 hardcount = 0;
53c4c00c 3709 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3710
3280af22 3711 scan = PL_reginput;
c277df42 3712 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3713 loceol = scan + max;
a0d0e21e 3714 switch (OP(p)) {
22c35a8c 3715 case REG_ANY:
1aa99e6b 3716 if (do_utf8) {
ffc61ed2 3717 loceol = PL_regeol;
1aa99e6b 3718 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3719 scan += UTF8SKIP(scan);
3720 hardcount++;
3721 }
3722 } else {
3723 while (scan < loceol && *scan != '\n')
3724 scan++;
a0ed51b3
LW
3725 }
3726 break;
ffc61ed2 3727 case SANY:
3baa4c62 3728 scan = loceol;
a0ed51b3 3729 break;
f33976b4
DB
3730 case CANY:
3731 scan = loceol;
3732 break;
bbce6d69 3733 case EXACT: /* length of string is 1 */
cd439c50 3734 c = (U8)*STRING(p);
bbce6d69 3735 while (scan < loceol && UCHARAT(scan) == c)
3736 scan++;
3737 break;
3738 case EXACTF: /* length of string is 1 */
cd439c50 3739 c = (U8)*STRING(p);
bbce6d69 3740 while (scan < loceol &&
22c35a8c 3741 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3742 scan++;
3743 break;
3744 case EXACTFL: /* length of string is 1 */
3280af22 3745 PL_reg_flags |= RF_tainted;
cd439c50 3746 c = (U8)*STRING(p);
bbce6d69 3747 while (scan < loceol &&
22c35a8c 3748 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3749 scan++;
3750 break;
3751 case ANYOF:
ffc61ed2
JH
3752 if (do_utf8) {
3753 loceol = PL_regeol;
1aa99e6b
IH
3754 while (hardcount < max && scan < loceol &&
3755 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3756 scan += UTF8SKIP(scan);
3757 hardcount++;
3758 }
3759 } else {
3760 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3761 scan++;
3762 }
a0d0e21e
LW
3763 break;
3764 case ALNUM:
1aa99e6b 3765 if (do_utf8) {
ffc61ed2 3766 loceol = PL_regeol;
8269fa76 3767 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3768 while (hardcount < max && scan < loceol &&
3568d838 3769 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3770 scan += UTF8SKIP(scan);
3771 hardcount++;
3772 }
3773 } else {
3774 while (scan < loceol && isALNUM(*scan))
3775 scan++;
a0ed51b3
LW
3776 }
3777 break;
bbce6d69 3778 case ALNUML:
3280af22 3779 PL_reg_flags |= RF_tainted;
1aa99e6b 3780 if (do_utf8) {
ffc61ed2 3781 loceol = PL_regeol;
1aa99e6b
IH
3782 while (hardcount < max && scan < loceol &&
3783 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3784 scan += UTF8SKIP(scan);
3785 hardcount++;
3786 }
3787 } else {
3788 while (scan < loceol && isALNUM_LC(*scan))
3789 scan++;
a0ed51b3
LW
3790 }
3791 break;
a0d0e21e 3792 case NALNUM:
1aa99e6b 3793 if (do_utf8) {
ffc61ed2 3794 loceol = PL_regeol;
8269fa76 3795 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3796 while (hardcount < max && scan < loceol &&
3568d838 3797 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3798 scan += UTF8SKIP(scan);
3799 hardcount++;
3800 }
3801 } else {
3802 while (scan < loceol && !isALNUM(*scan))
3803 scan++;
a0ed51b3
LW
3804 }
3805 break;
bbce6d69 3806 case NALNUML:
3280af22 3807 PL_reg_flags |= RF_tainted;
1aa99e6b 3808 if (do_utf8) {
ffc61ed2 3809 loceol = PL_regeol;
1aa99e6b
IH
3810 while (hardcount < max && scan < loceol &&
3811 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3812 scan += UTF8SKIP(scan);
3813 hardcount++;
3814 }
3815 } else {
3816 while (scan < loceol && !isALNUM_LC(*scan))
3817 scan++;
a0ed51b3
LW
3818 }
3819 break;
a0d0e21e 3820 case SPACE:
1aa99e6b 3821 if (do_utf8) {
ffc61ed2 3822 loceol = PL_regeol;
8269fa76 3823 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3824 while (hardcount < max && scan < loceol &&
3568d838
JH
3825 (*scan == ' ' ||
3826 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3827 scan += UTF8SKIP(scan);
3828 hardcount++;
3829 }
3830 } else {
3831 while (scan < loceol && isSPACE(*scan))
3832 scan++;
a0ed51b3
LW
3833 }
3834 break;
bbce6d69 3835 case SPACEL:
3280af22 3836 PL_reg_flags |= RF_tainted;
1aa99e6b 3837 if (do_utf8) {
ffc61ed2 3838 loceol = PL_regeol;
1aa99e6b 3839 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3840 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3841 scan += UTF8SKIP(scan);
3842 hardcount++;
3843 }
3844 } else {
3845 while (scan < loceol && isSPACE_LC(*scan))
3846 scan++;
a0ed51b3
LW
3847 }
3848 break;
a0d0e21e 3849 case NSPACE:
1aa99e6b 3850 if (do_utf8) {
ffc61ed2 3851 loceol = PL_regeol;
8269fa76 3852 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3853 while (hardcount < max && scan < loceol &&
3568d838
JH
3854 !(*scan == ' ' ||
3855 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3856 scan += UTF8SKIP(scan);
3857 hardcount++;
3858 }
3859 } else {
3860 while (scan < loceol && !isSPACE(*scan))
3861 scan++;
3862 break;
a0ed51b3 3863 }
bbce6d69 3864 case NSPACEL:
3280af22 3865 PL_reg_flags |= RF_tainted;
1aa99e6b 3866 if (do_utf8) {
ffc61ed2 3867 loceol = PL_regeol;
1aa99e6b 3868 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3869 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3870 scan += UTF8SKIP(scan);
3871 hardcount++;
3872 }
3873 } else {
3874 while (scan < loceol && !isSPACE_LC(*scan))
3875 scan++;
a0ed51b3
LW
3876 }
3877 break;
a0d0e21e 3878 case DIGIT:
1aa99e6b 3879 if (do_utf8) {
ffc61ed2 3880 loceol = PL_regeol;
8269fa76 3881 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3882 while (hardcount < max && scan < loceol &&
3568d838 3883 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3884 scan += UTF8SKIP(scan);
3885 hardcount++;
3886 }
3887 } else {
3888 while (scan < loceol && isDIGIT(*scan))
3889 scan++;
a0ed51b3
LW
3890 }
3891 break;
a0d0e21e 3892 case NDIGIT:
1aa99e6b 3893 if (do_utf8) {
ffc61ed2 3894 loceol = PL_regeol;
8269fa76 3895 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3896 while (hardcount < max && scan < loceol &&
3568d838 3897 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3898 scan += UTF8SKIP(scan);
3899 hardcount++;
3900 }
3901 } else {
3902 while (scan < loceol && !isDIGIT(*scan))
3903 scan++;
a0ed51b3
LW
3904 }
3905 break;
a0d0e21e
LW
3906 default: /* Called on something of 0 width. */
3907 break; /* So match right here or not at all. */
3908 }
a687059c 3909
a0ed51b3
LW
3910 if (hardcount)
3911 c = hardcount;
3912 else
3913 c = scan - PL_reginput;
3280af22 3914 PL_reginput = scan;
a687059c 3915
9041c2e3 3916 DEBUG_r(
c277df42
IZ
3917 {
3918 SV *prop = sv_newmortal();
3919
3920 regprop(prop, p);
9041c2e3
NIS
3921 PerlIO_printf(Perl_debug_log,
3922 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3923 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3924 });
9041c2e3 3925
a0d0e21e 3926 return(c);
a687059c
LW
3927}
3928
3929/*
c277df42 3930 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3931 *
c277df42
IZ
3932 * The repeater is supposed to have constant length.
3933 */
3934
76e3520e 3935STATIC I32
cea2e8a9 3936S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3937{
b7953727 3938 register char *scan = Nullch;
c277df42 3939 register char *start;
3280af22 3940 register char *loceol = PL_regeol;
a0ed51b3 3941 I32 l = 0;
708e3b05 3942 I32 count = 0, res = 1;
a0ed51b3
LW
3943
3944 if (!max)
3945 return 0;
c277df42 3946
3280af22 3947 start = PL_reginput;
53c4c00c 3948 if (PL_reg_match_utf8) {
708e3b05 3949 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3950 if (!count++) {
3951 l = 0;
3952 while (start < PL_reginput) {
3953 l++;
3954 start += UTF8SKIP(start);
3955 }
3956 *lp = l;
3957 if (l == 0)
3958 return max;
3959 }
3960 if (count == max)
3961 return count;
3962 }
3963 }
3964 else {
708e3b05 3965 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3966 if (!count++) {
3967 *lp = l = PL_reginput - start;
3968 if (max != REG_INFTY && l*max < loceol - scan)
3969 loceol = scan + l*max;
3970 if (l == 0)
3971 return max;
c277df42
IZ
3972 }
3973 }
3974 }
708e3b05 3975 if (!res)
3280af22 3976 PL_reginput = scan;
9041c2e3 3977
a0ed51b3 3978 return count;
c277df42
IZ
3979}
3980
3981/*
ffc61ed2
JH
3982- regclass_swash - prepare the utf8 swash
3983*/
3984
3985SV *
3986Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3987{
3988 SV *sw = NULL;
3989 SV *si = NULL;
3990
3991 if (PL_regdata && PL_regdata->count) {
3992 U32 n = ARG(node);
3993
3994 if (PL_regdata->what[n] == 's') {
3995 SV *rv = (SV*)PL_regdata->data[n];
3996 AV *av = (AV*)SvRV((SV*)rv);
3997 SV **a;
9041c2e3 3998
ffc61ed2
JH
3999 si = *av_fetch(av, 0, FALSE);
4000 a = av_fetch(av, 1, FALSE);
9041c2e3 4001
ffc61ed2
JH
4002 if (a)
4003 sw = *a;
4004 else if (si && doinit) {
4005 sw = swash_init("utf8", "", si, 1, 0);
4006 (void)av_store(av, 1, sw);
4007 }
4008 }
4009 }
4010
4011 if (initsvp)
4012 *initsvp = si;
4013
4014 return sw;
4015}
4016
4017/*
cb8d8820 4018 - reginclass - determine if a character falls into a character class
bbce6d69 4019 */
4020
76e3520e 4021STATIC bool
ffc61ed2 4022S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4023{
ffc61ed2 4024 char flags = ANYOF_FLAGS(n);
bbce6d69 4025 bool match = FALSE;
1aa99e6b 4026 UV c;
3568d838 4027 STRLEN len = 0;
1aa99e6b 4028
3568d838 4029 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4030
ffc61ed2
JH
4031 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4032 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4033 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4034 match = TRUE;
bbce6d69 4035 }
3568d838 4036 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4037 match = TRUE;
ffc61ed2
JH
4038 if (!match) {
4039 SV *sw = regclass_swash(n, TRUE, 0);
4040
4041 if (sw) {
3568d838 4042 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4043 match = TRUE;
4044 else if (flags & ANYOF_FOLD) {
a2a2844f
JH
4045 STRLEN ulen;
4046 U8 tmpbuf[UTF8_MAXLEN*2+1];
4047
4048 toLOWER_utf8(p, tmpbuf, &ulen);
3568d838 4049 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4050 match = TRUE;
4051 }
4052 }
bbce6d69 4053 }
4054 }
1aa99e6b 4055 if (!match && c < 256) {
ffc61ed2
JH
4056 if (ANYOF_BITMAP_TEST(n, c))
4057 match = TRUE;
4058 else if (flags & ANYOF_FOLD) {
3568d838 4059 I32 f;
a0ed51b3 4060
ffc61ed2
JH
4061 if (flags & ANYOF_LOCALE) {
4062 PL_reg_flags |= RF_tainted;
4063 f = PL_fold_locale[c];
4064 }
4065 else
4066 f = PL_fold[c];
4067 if (f != c && ANYOF_BITMAP_TEST(n, f))
4068 match = TRUE;
4069 }
4070
4071 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4072 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4073 if (
4074 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4075 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4076 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4077 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4078 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4079 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4080 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4081 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4082 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4083 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4084 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4085 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4086 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4087 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4088 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4089 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4090 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4091 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4092 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4093 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4094 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4095 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4096 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4097 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4098 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4099 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4100 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4101 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4102 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4103 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4104 ) /* How's that for a conditional? */
4105 {
4106 match = TRUE;
4107 }
a0ed51b3 4108 }
a0ed51b3
LW
4109 }
4110
a0ed51b3
LW
4111 return (flags & ANYOF_INVERT) ? !match : match;
4112}
161b471a 4113
dfe13c55 4114STATIC U8 *
cea2e8a9 4115S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4116{
1aa99e6b
IH
4117 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4118}
4119
4120STATIC U8 *
4121S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4122{
a0ed51b3 4123 if (off >= 0) {
1aa99e6b 4124 while (off-- && s < lim) {
ffc61ed2 4125 /* XXX could check well-formedness here */
a0ed51b3 4126 s += UTF8SKIP(s);
ffc61ed2 4127 }
a0ed51b3
LW
4128 }
4129 else {
4130 while (off++) {
1aa99e6b 4131 if (s > lim) {
a0ed51b3 4132 s--;
ffc61ed2 4133 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4134 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4135 s--;
ffc61ed2
JH
4136 }
4137 /* XXX could check well-formedness here */
a0ed51b3
LW
4138 }
4139 }
4140 }
4141 return s;
4142}
161b471a 4143
dfe13c55 4144STATIC U8 *
1aa99e6b 4145S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4146{
1aa99e6b
IH
4147 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4148}
4149
4150STATIC U8 *
4151S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4152{
4153 if (off >= 0) {
1aa99e6b 4154 while (off-- && s < lim) {
ffc61ed2 4155 /* XXX could check well-formedness here */
a0ed51b3 4156 s += UTF8SKIP(s);
ffc61ed2 4157 }
a0ed51b3
LW
4158 if (off >= 0)
4159 return 0;
4160 }
4161 else {
4162 while (off++) {
1aa99e6b 4163 if (s > lim) {
a0ed51b3 4164 s--;
ffc61ed2 4165 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4166 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4167 s--;
ffc61ed2
JH
4168 }
4169 /* XXX could check well-formedness here */
a0ed51b3
LW
4170 }
4171 else
4172 break;
4173 }
4174 if (off <= 0)
4175 return 0;
4176 }
4177 return s;
4178}
51371543 4179
51371543 4180static void
acfe0abc 4181restore_pos(pTHX_ void *arg)
51371543 4182{
51371543
GS
4183 if (PL_reg_eval_set) {
4184 if (PL_reg_oldsaved) {
4185 PL_reg_re->subbeg = PL_reg_oldsaved;
4186 PL_reg_re->sublen = PL_reg_oldsavedlen;
4187 RX_MATCH_COPIED_on(PL_reg_re);
4188 }
4189 PL_reg_magic->mg_len = PL_reg_oldpos;
4190 PL_reg_eval_set = 0;
4191 PL_curpm = PL_reg_oldcurpm;
4192 }
4193}