This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
EXACT correction attempt. Continued in #12863.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
2a782b5b 386 SV *dsv = sv_2mortal(newSVpvn("", 0));
30944b6d 387#endif
cad2e5aa 388
2a782b5b 389 DEBUG_r({
40eddc46
JH
390 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
391 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
392 if (!PL_colorset)
393 reginitcolors();
394 PerlIO_printf(Perl_debug_log,
395 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
396 PL_colors[4],PL_colors[5],PL_colors[0],
397 prog->precomp,
398 PL_colors[1],
399 (strlen(prog->precomp) > 60 ? "..." : ""),
400 PL_colors[0],
401 (int)(len > 60 ? 60 : len),
402 s, PL_colors[1],
403 (len > 60 ? "..." : "")
404 );
405 });
cad2e5aa 406
1aa99e6b
IH
407 if (prog->reganch & ROPT_UTF8)
408 PL_reg_flags |= RF_utf8;
409
410 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 411 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 412 goto fail;
2c2d71f5 413 }
a1933d95 414 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 415 PL_regeol = strend;
653099ff 416 check = prog->check_substr;
2c2d71f5 417 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
418 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
419 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 420 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 421
7e25d62c
JH
422 if (!ml_anch) {
423 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
424 | ROPT_IMPLICIT)) /* not a real BOL */
425 /* SvCUR is not set on references: SvRV and SvPVX overlap */
426 && sv && !SvROK(sv)
427 && (strpos != strbeg)) {
428 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
429 goto fail;
430 }
431 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 432 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 433 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
434 I32 slen;
435
1aa99e6b 436 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
437 if (SvTAIL(check)) {
438 slen = SvCUR(check); /* >= 1 */
cad2e5aa 439
9041c2e3 440 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
441 || (strend - s == slen && strend[-1] != '\n')) {
442 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
443 goto fail_finish;
cad2e5aa
JH
444 }
445 /* Now should match s[0..slen-2] */
446 slen--;
653099ff 447 if (slen && (*SvPVX(check) != *s
cad2e5aa 448 || (slen > 1
653099ff 449 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
450 report_neq:
451 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
452 goto fail_finish;
453 }
cad2e5aa 454 }
653099ff
GS
455 else if (*SvPVX(check) != *s
456 || ((slen = SvCUR(check)) > 1
457 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
458 goto report_neq;
459 goto success_at_start;
7e25d62c 460 }
cad2e5aa 461 }
2c2d71f5 462 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 463 s = strpos;
2c2d71f5 464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 465 end_shift = prog->minlen - start_shift -
653099ff 466 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 467 if (!ml_anch) {
653099ff
GS
468 I32 end = prog->check_offset_max + CHR_SVLEN(check)
469 - (SvTAIL(check) != 0);
1aa99e6b 470 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
471
472 if (end_shift < eshift)
473 end_shift = eshift;
474 }
cad2e5aa 475 }
2c2d71f5 476 else { /* Can match at random position */
cad2e5aa
JH
477 ml_anch = 0;
478 s = strpos;
2c2d71f5
JH
479 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
480 /* Should be nonnegative! */
481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
483 }
484
2c2d71f5 485#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 486 if (end_shift < 0)
6bbae5e6 487 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
488#endif
489
2c2d71f5
JH
490 restart:
491 /* Find a possible match in the region s..strend by looking for
492 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 493 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
494 I32 p = -1; /* Internal iterator of scream. */
495 I32 *pp = data ? data->scream_pos : &p;
496
2c2d71f5
JH
497 if (PL_screamfirst[BmRARE(check)] >= 0
498 || ( BmRARE(check) == '\n'
499 && (BmPREVIOUS(check) == SvCUR(check) - 1)
500 && SvTAIL(check) ))
9041c2e3 501 s = screaminstr(sv, check,
2c2d71f5 502 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 503 else
2c2d71f5 504 goto fail_finish;
cad2e5aa
JH
505 if (data)
506 *data->scream_olds = s;
507 }
f33976b4 508 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
509 s = fbm_instr((U8*)(s + start_shift),
510 (U8*)(strend - end_shift),
511 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 512 else
1aa99e6b
IH
513 s = fbm_instr(HOP3(s, start_shift, strend),
514 HOP3(strend, -end_shift, strbeg),
2c2d71f5 515 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
516
517 /* Update the count-of-usability, remove useless subpatterns,
518 unshift s. */
2c2d71f5
JH
519
520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
521 (s ? "Found" : "Did not find"),
522 ((check == prog->anchored_substr) ? "anchored" : "floating"),
523 PL_colors[0],
7b0972df
JH
524 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
525 SvPVX(check),
2c2d71f5
JH
526 PL_colors[1], (SvTAIL(check) ? "$" : ""),
527 (s ? " at offset " : "...\n") ) );
528
529 if (!s)
530 goto fail_finish;
531
6eb5f6b9
JH
532 check_at = s;
533
2c2d71f5 534 /* Finish the diagnostic message */
30944b6d 535 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
536
537 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
538 Start with the other substr.
539 XXXX no SCREAM optimization yet - and a very coarse implementation
540 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
541 *always* match. Probably should be marked during compile...
542 Probably it is right to do no SCREAM here...
543 */
544
545 if (prog->float_substr && prog->anchored_substr) {
30944b6d 546 /* Take into account the "other" substring. */
2c2d71f5
JH
547 /* XXXX May be hopelessly wrong for UTF... */
548 if (!other_last)
6eb5f6b9 549 other_last = strpos;
2c2d71f5 550 if (check == prog->float_substr) {
30944b6d
IZ
551 do_other_anchored:
552 {
1aa99e6b 553 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
554 char *s1 = s;
555
2c2d71f5
JH
556 t = s - prog->check_offset_max;
557 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
558 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 559 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 560 && t > strpos)))
30944b6d 561 /* EMPTY */;
2c2d71f5
JH
562 else
563 t = strpos;
1aa99e6b 564 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
565 if (t < other_last) /* These positions already checked */
566 t = other_last;
1aa99e6b 567 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
568 if (last < last1)
569 last1 = last;
570 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
571 /* On end-of-str: see comment below. */
572 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
573 HOP3(HOP3(last1, prog->anchored_offset, strend)
574 + SvCUR(prog->anchored_substr),
575 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
576 prog->anchored_substr,
577 PL_multiline ? FBMrf_MULTILINE : 0);
578 DEBUG_r(PerlIO_printf(Perl_debug_log,
579 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
580 (s ? "Found" : "Contradicts"),
581 PL_colors[0],
7b0972df
JH
582 (int)(SvCUR(prog->anchored_substr)
583 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
584 SvPVX(prog->anchored_substr),
585 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
586 if (!s) {
587 if (last1 >= last2) {
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", giving up...\n"));
590 goto fail_finish;
591 }
592 DEBUG_r(PerlIO_printf(Perl_debug_log,
593 ", trying floating at offset %ld...\n",
1aa99e6b
IH
594 (long)(HOP3c(s1, 1, strend) - i_strpos)));
595 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
596 s = HOP3c(last, 1, strend);
2c2d71f5
JH
597 goto restart;
598 }
599 else {
600 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 601 (long)(s - i_strpos)));
1aa99e6b
IH
602 t = HOP3c(s, -prog->anchored_offset, strbeg);
603 other_last = HOP3c(s, 1, strend);
30944b6d 604 s = s1;
2c2d71f5
JH
605 if (t == strpos)
606 goto try_at_start;
2c2d71f5
JH
607 goto try_at_offset;
608 }
30944b6d 609 }
2c2d71f5
JH
610 }
611 else { /* Take into account the floating substring. */
612 char *last, *last1;
613 char *s1 = s;
614
1aa99e6b
IH
615 t = HOP3c(s, -start_shift, strbeg);
616 last1 = last =
617 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
618 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
619 last = HOP3c(t, prog->float_max_offset, strend);
620 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
621 if (s < other_last)
622 s = other_last;
2c2d71f5
JH
623 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
624 /* fbm_instr() takes into account exact value of end-of-str
625 if the check is SvTAIL(ed). Since false positives are OK,
626 and end-of-str is not later than strend we are OK. */
627 s = fbm_instr((unsigned char*)s,
628 (unsigned char*)last + SvCUR(prog->float_substr)
629 - (SvTAIL(prog->float_substr)!=0),
630 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
631 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
632 (s ? "Found" : "Contradicts"),
633 PL_colors[0],
7b0972df
JH
634 (int)(SvCUR(prog->float_substr)
635 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
636 SvPVX(prog->float_substr),
637 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
638 if (!s) {
639 if (last1 == last) {
640 DEBUG_r(PerlIO_printf(Perl_debug_log,
641 ", giving up...\n"));
642 goto fail_finish;
643 }
644 DEBUG_r(PerlIO_printf(Perl_debug_log,
645 ", trying anchored starting at offset %ld...\n",
30944b6d 646 (long)(s1 + 1 - i_strpos)));
803ff556 647 other_last = last;
1aa99e6b 648 s = HOP3c(t, 1, strend);
2c2d71f5
JH
649 goto restart;
650 }
651 else {
652 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 653 (long)(s - i_strpos)));
803ff556 654 other_last = s; /* Fix this later. --Hugo */
30944b6d 655 s = s1;
2c2d71f5
JH
656 if (t == strpos)
657 goto try_at_start;
2c2d71f5
JH
658 goto try_at_offset;
659 }
660 }
cad2e5aa 661 }
2c2d71f5
JH
662
663 t = s - prog->check_offset_max;
2c2d71f5
JH
664 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
665 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
666 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
667 && t > strpos))) {
2c2d71f5
JH
668 /* Fixed substring is found far enough so that the match
669 cannot start at strpos. */
670 try_at_offset:
cad2e5aa 671 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
672 /* Eventually fbm_*() should handle this, but often
673 anchored_offset is not 0, so this check will not be wasted. */
674 /* XXXX In the code below we prefer to look for "^" even in
675 presence of anchored substrings. And we search even
676 beyond the found float position. These pessimizations
677 are historical artefacts only. */
678 find_anchor:
2c2d71f5 679 while (t < strend - prog->minlen) {
cad2e5aa 680 if (*t == '\n') {
4ee3650e 681 if (t < check_at - prog->check_offset_min) {
30944b6d 682 if (prog->anchored_substr) {
4ee3650e
GS
683 /* Since we moved from the found position,
684 we definitely contradict the found anchored
30944b6d
IZ
685 substr. Due to the above check we do not
686 contradict "check" substr.
687 Thus we can arrive here only if check substr
688 is float. Redo checking for "other"=="fixed".
689 */
9041c2e3 690 strpos = t + 1;
30944b6d
IZ
691 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
692 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
693 goto do_other_anchored;
694 }
4ee3650e
GS
695 /* We don't contradict the found floating substring. */
696 /* XXXX Why not check for STCLASS? */
cad2e5aa 697 s = t + 1;
2c2d71f5 698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 699 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
700 goto set_useful;
701 }
4ee3650e
GS
702 /* Position contradicts check-string */
703 /* XXXX probably better to look for check-string
704 than for "\n", so one should lower the limit for t? */
705 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 706 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 707 other_last = strpos = s = t + 1;
cad2e5aa
JH
708 goto restart;
709 }
710 t++;
711 }
2c2d71f5
JH
712 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
713 PL_colors[0],PL_colors[1]));
714 goto fail_finish;
cad2e5aa 715 }
f5952150
GS
716 else {
717 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
718 PL_colors[0],PL_colors[1]));
719 }
cad2e5aa
JH
720 s = t;
721 set_useful:
2c2d71f5 722 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
723 }
724 else {
f5952150 725 /* The found string does not prohibit matching at strpos,
2c2d71f5 726 - no optimization of calling REx engine can be performed,
f5952150
GS
727 unless it was an MBOL and we are not after MBOL,
728 or a future STCLASS check will fail this. */
2c2d71f5
JH
729 try_at_start:
730 /* Even in this situation we may use MBOL flag if strpos is offset
731 wrt the start of the string. */
05b4157f 732 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 733 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
734 /* May be due to an implicit anchor of m{.*foo} */
735 && !(prog->reganch & ROPT_IMPLICIT))
736 {
cad2e5aa
JH
737 t = strpos;
738 goto find_anchor;
739 }
30944b6d 740 DEBUG_r( if (ml_anch)
f5952150
GS
741 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
742 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 743 );
2c2d71f5 744 success_at_start:
30944b6d 745 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 746 && prog->check_substr /* Could be deleted already */
cad2e5aa 747 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
748 && prog->check_substr == prog->float_substr)
749 {
cad2e5aa 750 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 751 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
752 SvREFCNT_dec(prog->check_substr);
753 prog->check_substr = Nullsv; /* disable */
754 prog->float_substr = Nullsv; /* clear */
5e39e1e5 755 check = Nullsv; /* abort */
cad2e5aa 756 s = strpos;
3cf5c195
IZ
757 /* XXXX This is a remnant of the old implementation. It
758 looks wasteful, since now INTUIT can use many
6eb5f6b9 759 other heuristics. */
cad2e5aa
JH
760 prog->reganch &= ~RE_USE_INTUIT;
761 }
762 else
763 s = strpos;
764 }
765
6eb5f6b9
JH
766 /* Last resort... */
767 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
768 if (prog->regstclass) {
769 /* minlen == 0 is possible if regstclass is \b or \B,
770 and the fixed substr is ''$.
771 Since minlen is already taken into account, s+1 is before strend;
772 accidentally, minlen >= 1 guaranties no false positives at s + 1
773 even for \b or \B. But (minlen? 1 : 0) below assumes that
774 regstclass does not come from lookahead... */
775 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
776 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 777 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 778 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 779 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 780 : 1);
6eb5f6b9 781 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
782 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
783 : (prog->float_substr
784 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
785 cl_l, strend)
786 : strend);
a1933d95 787 char *startpos = strbeg;
6eb5f6b9
JH
788
789 t = s;
76384e4a 790 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 791 PL_regdata = prog->data;
76384e4a
GS
792 PL_bostr = startpos;
793 }
f33976b4 794 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
795 if (!s) {
796#ifdef DEBUGGING
b7953727 797 char *what = 0;
6eb5f6b9
JH
798#endif
799 if (endpos == strend) {
800 DEBUG_r( PerlIO_printf(Perl_debug_log,
801 "Could not match STCLASS...\n") );
802 goto fail;
803 }
66e933ab
GS
804 DEBUG_r( PerlIO_printf(Perl_debug_log,
805 "This position contradicts STCLASS...\n") );
653099ff
GS
806 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
807 goto fail;
6eb5f6b9
JH
808 /* Contradict one of substrings */
809 if (prog->anchored_substr) {
6eb5f6b9
JH
810 if (prog->anchored_substr == check) {
811 DEBUG_r( what = "anchored" );
812 hop_and_restart:
1aa99e6b 813 s = HOP3c(t, 1, strend);
66e933ab
GS
814 if (s + start_shift + end_shift > strend) {
815 /* XXXX Should be taken into account earlier? */
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
5e39e1e5
HS
820 if (!check)
821 goto giveup;
6eb5f6b9 822 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 823 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
824 what, (long)(s + start_shift - i_strpos)) );
825 goto restart;
826 }
66e933ab 827 /* Have both, check_string is floating */
6eb5f6b9
JH
828 if (t + start_shift >= check_at) /* Contradicts floating=check */
829 goto retry_floating_check;
830 /* Recheck anchored substring, but not floating... */
9041c2e3 831 s = check_at;
5e39e1e5
HS
832 if (!check)
833 goto giveup;
6eb5f6b9 834 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 835 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
836 (long)(other_last - i_strpos)) );
837 goto do_other_anchored;
838 }
60e71179
GS
839 /* Another way we could have checked stclass at the
840 current position only: */
841 if (ml_anch) {
842 s = t = t + 1;
5e39e1e5
HS
843 if (!check)
844 goto giveup;
60e71179 845 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
846 "Looking for /%s^%s/m starting at offset %ld...\n",
847 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 848 goto try_at_offset;
66e933ab 849 }
60e71179
GS
850 if (!prog->float_substr) /* Could have been deleted */
851 goto fail;
6eb5f6b9
JH
852 /* Check is floating subtring. */
853 retry_floating_check:
854 t = check_at - start_shift;
855 DEBUG_r( what = "floating" );
856 goto hop_and_restart;
857 }
b7953727
JH
858 if (t != s) {
859 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 860 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
861 (long)(t - i_strpos), (long)(s - i_strpos))
862 );
863 }
864 else {
865 DEBUG_r(PerlIO_printf(Perl_debug_log,
866 "Does not contradict STCLASS...\n");
867 );
868 }
6eb5f6b9 869 }
5e39e1e5
HS
870 giveup:
871 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
872 PL_colors[4], (check ? "Guessed" : "Giving up"),
873 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 874 return s;
2c2d71f5
JH
875
876 fail_finish: /* Substring not found */
66e933ab
GS
877 if (prog->check_substr) /* could be removed already */
878 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 879 fail:
2c2d71f5 880 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
881 PL_colors[4],PL_colors[5]));
882 return Nullch;
883}
9661b544 884
6eb5f6b9 885/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
886STATIC char *
887S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 888{
6eb5f6b9
JH
889 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
890 char *m;
d8093b23
G
891 STRLEN ln;
892 unsigned int c1;
893 unsigned int c2;
6eb5f6b9
JH
894 char *e;
895 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 896 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 897
6eb5f6b9
JH
898 /* We know what class it must start with. */
899 switch (OP(c)) {
6eb5f6b9 900 case ANYOF:
a0ed51b3 901 while (s < strend) {
ffc61ed2 902 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
903 if (tmp && (norun || regtry(prog, s)))
904 goto got_it;
905 else
906 tmp = doevery;
a0ed51b3 907 }
6eb5f6b9
JH
908 else
909 tmp = 1;
ffc61ed2 910 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 911 }
6eb5f6b9 912 break;
f33976b4
DB
913 case CANY:
914 while (s < strend) {
915 if (tmp && (norun || regtry(prog, s)))
916 goto got_it;
917 else
918 tmp = doevery;
919 s++;
920 }
921 break;
6eb5f6b9
JH
922 case EXACTF:
923 m = STRING(c);
924 ln = STR_LEN(c);
1aa99e6b 925 if (UTF) {
a2a2844f
JH
926 STRLEN ulen1, ulen2;
927 U8 tmpbuf1[UTF8_MAXLEN*2+1];
928 U8 tmpbuf2[UTF8_MAXLEN*2+1];
929
930 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
931 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
932
933 c1 = utf8_to_uvuni(tmpbuf1, 0);
934 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
935 }
936 else {
937 c1 = *(U8*)m;
938 c2 = PL_fold[c1];
939 }
6eb5f6b9
JH
940 goto do_exactf;
941 case EXACTFL:
942 m = STRING(c);
943 ln = STR_LEN(c);
d8093b23 944 c1 = *(U8*)m;
6eb5f6b9
JH
945 c2 = PL_fold_locale[c1];
946 do_exactf:
947 e = strend - ln;
b3c9acc1 948
6eb5f6b9
JH
949 if (norun && e < s)
950 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
951
952 if (do_utf8) {
953 STRLEN len;
954 if (c1 == c2)
955 while (s <= e) {
9041c2e3 956 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
957 && regtry(prog, s) )
958 goto got_it;
959 s += len;
960 }
961 else
962 while (s <= e) {
9041c2e3 963 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
964 if ( (c == c1 || c == c2) && regtry(prog, s) )
965 goto got_it;
966 s += len;
967 }
968 }
969 else {
970 if (c1 == c2)
971 while (s <= e) {
972 if ( *(U8*)s == c1
973 && (ln == 1 || !(OP(c) == EXACTF
974 ? ibcmp(s, m, ln)
975 : ibcmp_locale(s, m, ln)))
976 && (norun || regtry(prog, s)) )
977 goto got_it;
978 s++;
979 }
980 else
981 while (s <= e) {
982 if ( (*(U8*)s == c1 || *(U8*)s == c2)
983 && (ln == 1 || !(OP(c) == EXACTF
984 ? ibcmp(s, m, ln)
985 : ibcmp_locale(s, m, ln)))
986 && (norun || regtry(prog, s)) )
987 goto got_it;
988 s++;
989 }
b3c9acc1
IZ
990 }
991 break;
bbce6d69 992 case BOUNDL:
3280af22 993 PL_reg_flags |= RF_tainted;
bbce6d69 994 /* FALL THROUGH */
a0d0e21e 995 case BOUND:
ffc61ed2 996 if (do_utf8) {
12d33761 997 if (s == PL_bostr)
ffc61ed2
JH
998 tmp = '\n';
999 else {
1aa99e6b 1000 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1001
0064a8a9
JH
1002 if (s > (char*)r)
1003 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1004 }
1005 tmp = ((OP(c) == BOUND ?
9041c2e3 1006 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1007 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1008 while (s < strend) {
1009 if (tmp == !(OP(c) == BOUND ?
3568d838 1010 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1011 isALNUM_LC_utf8((U8*)s)))
1012 {
1013 tmp = !tmp;
1014 if ((norun || regtry(prog, s)))
1015 goto got_it;
1016 }
1017 s += UTF8SKIP(s);
a687059c 1018 }
a0d0e21e 1019 }
667bb95a 1020 else {
12d33761 1021 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1022 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1023 while (s < strend) {
1024 if (tmp ==
1025 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1026 tmp = !tmp;
1027 if ((norun || regtry(prog, s)))
1028 goto got_it;
1029 }
1030 s++;
a0ed51b3 1031 }
a0ed51b3 1032 }
6eb5f6b9 1033 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1034 goto got_it;
1035 break;
bbce6d69 1036 case NBOUNDL:
3280af22 1037 PL_reg_flags |= RF_tainted;
bbce6d69 1038 /* FALL THROUGH */
a0d0e21e 1039 case NBOUND:
ffc61ed2 1040 if (do_utf8) {
12d33761 1041 if (s == PL_bostr)
ffc61ed2
JH
1042 tmp = '\n';
1043 else {
1aa99e6b 1044 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1045
0064a8a9
JH
1046 if (s > (char*)r)
1047 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1048 }
1049 tmp = ((OP(c) == NBOUND ?
9041c2e3 1050 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1051 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1052 while (s < strend) {
1053 if (tmp == !(OP(c) == NBOUND ?
3568d838 1054 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1055 isALNUM_LC_utf8((U8*)s)))
1056 tmp = !tmp;
1057 else if ((norun || regtry(prog, s)))
1058 goto got_it;
1059 s += UTF8SKIP(s);
1060 }
a0d0e21e 1061 }
667bb95a 1062 else {
12d33761 1063 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1064 tmp = ((OP(c) == NBOUND ?
1065 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1066 while (s < strend) {
1067 if (tmp ==
1068 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1069 tmp = !tmp;
1070 else if ((norun || regtry(prog, s)))
1071 goto got_it;
1072 s++;
1073 }
a0ed51b3 1074 }
6eb5f6b9 1075 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1076 goto got_it;
1077 break;
a0d0e21e 1078 case ALNUM:
ffc61ed2 1079 if (do_utf8) {
8269fa76 1080 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1081 while (s < strend) {
3568d838 1082 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1083 if (tmp && (norun || regtry(prog, s)))
1084 goto got_it;
1085 else
1086 tmp = doevery;
1087 }
bbce6d69 1088 else
ffc61ed2
JH
1089 tmp = 1;
1090 s += UTF8SKIP(s);
bbce6d69 1091 }
bbce6d69 1092 }
ffc61ed2
JH
1093 else {
1094 while (s < strend) {
1095 if (isALNUM(*s)) {
1096 if (tmp && (norun || regtry(prog, s)))
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
a0ed51b3 1101 else
ffc61ed2
JH
1102 tmp = 1;
1103 s++;
a0ed51b3 1104 }
a0ed51b3
LW
1105 }
1106 break;
bbce6d69 1107 case ALNUML:
3280af22 1108 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1109 if (do_utf8) {
1110 while (s < strend) {
1111 if (isALNUM_LC_utf8((U8*)s)) {
1112 if (tmp && (norun || regtry(prog, s)))
1113 goto got_it;
1114 else
1115 tmp = doevery;
1116 }
a687059c 1117 else
ffc61ed2
JH
1118 tmp = 1;
1119 s += UTF8SKIP(s);
a0d0e21e 1120 }
a0d0e21e 1121 }
ffc61ed2
JH
1122 else {
1123 while (s < strend) {
1124 if (isALNUM_LC(*s)) {
1125 if (tmp && (norun || regtry(prog, s)))
1126 goto got_it;
1127 else
1128 tmp = doevery;
1129 }
a0ed51b3 1130 else
ffc61ed2
JH
1131 tmp = 1;
1132 s++;
a0ed51b3 1133 }
a0ed51b3
LW
1134 }
1135 break;
a0d0e21e 1136 case NALNUM:
ffc61ed2 1137 if (do_utf8) {
8269fa76 1138 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1139 while (s < strend) {
3568d838 1140 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1141 if (tmp && (norun || regtry(prog, s)))
1142 goto got_it;
1143 else
1144 tmp = doevery;
1145 }
bbce6d69 1146 else
ffc61ed2
JH
1147 tmp = 1;
1148 s += UTF8SKIP(s);
bbce6d69 1149 }
bbce6d69 1150 }
ffc61ed2
JH
1151 else {
1152 while (s < strend) {
1153 if (!isALNUM(*s)) {
1154 if (tmp && (norun || regtry(prog, s)))
1155 goto got_it;
1156 else
1157 tmp = doevery;
1158 }
a0ed51b3 1159 else
ffc61ed2
JH
1160 tmp = 1;
1161 s++;
a0ed51b3 1162 }
a0ed51b3
LW
1163 }
1164 break;
bbce6d69 1165 case NALNUML:
3280af22 1166 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1167 if (do_utf8) {
1168 while (s < strend) {
1169 if (!isALNUM_LC_utf8((U8*)s)) {
1170 if (tmp && (norun || regtry(prog, s)))
1171 goto got_it;
1172 else
1173 tmp = doevery;
1174 }
a687059c 1175 else
ffc61ed2
JH
1176 tmp = 1;
1177 s += UTF8SKIP(s);
a687059c 1178 }
a0d0e21e 1179 }
ffc61ed2
JH
1180 else {
1181 while (s < strend) {
1182 if (!isALNUM_LC(*s)) {
1183 if (tmp && (norun || regtry(prog, s)))
1184 goto got_it;
1185 else
1186 tmp = doevery;
1187 }
a0ed51b3 1188 else
ffc61ed2
JH
1189 tmp = 1;
1190 s++;
a0ed51b3 1191 }
a0ed51b3
LW
1192 }
1193 break;
a0d0e21e 1194 case SPACE:
ffc61ed2 1195 if (do_utf8) {
8269fa76 1196 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1197 while (s < strend) {
3568d838 1198 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1199 if (tmp && (norun || regtry(prog, s)))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
a0d0e21e 1204 else
ffc61ed2
JH
1205 tmp = 1;
1206 s += UTF8SKIP(s);
2304df62 1207 }
a0d0e21e 1208 }
ffc61ed2
JH
1209 else {
1210 while (s < strend) {
1211 if (isSPACE(*s)) {
1212 if (tmp && (norun || regtry(prog, s)))
1213 goto got_it;
1214 else
1215 tmp = doevery;
1216 }
a0ed51b3 1217 else
ffc61ed2
JH
1218 tmp = 1;
1219 s++;
a0ed51b3 1220 }
a0ed51b3
LW
1221 }
1222 break;
bbce6d69 1223 case SPACEL:
3280af22 1224 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1225 if (do_utf8) {
1226 while (s < strend) {
1227 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1228 if (tmp && (norun || regtry(prog, s)))
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
bbce6d69 1233 else
ffc61ed2
JH
1234 tmp = 1;
1235 s += UTF8SKIP(s);
bbce6d69 1236 }
bbce6d69 1237 }
ffc61ed2
JH
1238 else {
1239 while (s < strend) {
1240 if (isSPACE_LC(*s)) {
1241 if (tmp && (norun || regtry(prog, s)))
1242 goto got_it;
1243 else
1244 tmp = doevery;
1245 }
a0ed51b3 1246 else
ffc61ed2
JH
1247 tmp = 1;
1248 s++;
a0ed51b3 1249 }
a0ed51b3
LW
1250 }
1251 break;
a0d0e21e 1252 case NSPACE:
ffc61ed2 1253 if (do_utf8) {
8269fa76 1254 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1255 while (s < strend) {
3568d838 1256 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1257 if (tmp && (norun || regtry(prog, s)))
1258 goto got_it;
1259 else
1260 tmp = doevery;
1261 }
a0d0e21e 1262 else
ffc61ed2
JH
1263 tmp = 1;
1264 s += UTF8SKIP(s);
a687059c 1265 }
a0d0e21e 1266 }
ffc61ed2
JH
1267 else {
1268 while (s < strend) {
1269 if (!isSPACE(*s)) {
1270 if (tmp && (norun || regtry(prog, s)))
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
a0ed51b3 1275 else
ffc61ed2
JH
1276 tmp = 1;
1277 s++;
a0ed51b3 1278 }
a0ed51b3
LW
1279 }
1280 break;
bbce6d69 1281 case NSPACEL:
3280af22 1282 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1283 if (do_utf8) {
1284 while (s < strend) {
1285 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1286 if (tmp && (norun || regtry(prog, s)))
1287 goto got_it;
1288 else
1289 tmp = doevery;
1290 }
bbce6d69 1291 else
ffc61ed2
JH
1292 tmp = 1;
1293 s += UTF8SKIP(s);
bbce6d69 1294 }
bbce6d69 1295 }
ffc61ed2
JH
1296 else {
1297 while (s < strend) {
1298 if (!isSPACE_LC(*s)) {
1299 if (tmp && (norun || regtry(prog, s)))
1300 goto got_it;
1301 else
1302 tmp = doevery;
1303 }
a0ed51b3 1304 else
ffc61ed2
JH
1305 tmp = 1;
1306 s++;
a0ed51b3 1307 }
a0ed51b3
LW
1308 }
1309 break;
a0d0e21e 1310 case DIGIT:
ffc61ed2 1311 if (do_utf8) {
8269fa76 1312 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1313 while (s < strend) {
3568d838 1314 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1315 if (tmp && (norun || regtry(prog, s)))
1316 goto got_it;
1317 else
1318 tmp = doevery;
1319 }
a0d0e21e 1320 else
ffc61ed2
JH
1321 tmp = 1;
1322 s += UTF8SKIP(s);
2b69d0c2 1323 }
a0d0e21e 1324 }
ffc61ed2
JH
1325 else {
1326 while (s < strend) {
1327 if (isDIGIT(*s)) {
1328 if (tmp && (norun || regtry(prog, s)))
1329 goto got_it;
1330 else
1331 tmp = doevery;
1332 }
a0ed51b3 1333 else
ffc61ed2
JH
1334 tmp = 1;
1335 s++;
a0ed51b3 1336 }
a0ed51b3
LW
1337 }
1338 break;
b8c5462f
JH
1339 case DIGITL:
1340 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1341 if (do_utf8) {
1342 while (s < strend) {
1343 if (isDIGIT_LC_utf8((U8*)s)) {
1344 if (tmp && (norun || regtry(prog, s)))
1345 goto got_it;
1346 else
1347 tmp = doevery;
1348 }
b8c5462f 1349 else
ffc61ed2
JH
1350 tmp = 1;
1351 s += UTF8SKIP(s);
b8c5462f 1352 }
b8c5462f 1353 }
ffc61ed2
JH
1354 else {
1355 while (s < strend) {
1356 if (isDIGIT_LC(*s)) {
1357 if (tmp && (norun || regtry(prog, s)))
1358 goto got_it;
1359 else
1360 tmp = doevery;
1361 }
b8c5462f 1362 else
ffc61ed2
JH
1363 tmp = 1;
1364 s++;
b8c5462f 1365 }
b8c5462f
JH
1366 }
1367 break;
a0d0e21e 1368 case NDIGIT:
ffc61ed2 1369 if (do_utf8) {
8269fa76 1370 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1371 while (s < strend) {
3568d838 1372 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1373 if (tmp && (norun || regtry(prog, s)))
1374 goto got_it;
1375 else
1376 tmp = doevery;
1377 }
a0d0e21e 1378 else
ffc61ed2
JH
1379 tmp = 1;
1380 s += UTF8SKIP(s);
a687059c 1381 }
a0d0e21e 1382 }
ffc61ed2
JH
1383 else {
1384 while (s < strend) {
1385 if (!isDIGIT(*s)) {
1386 if (tmp && (norun || regtry(prog, s)))
1387 goto got_it;
1388 else
1389 tmp = doevery;
1390 }
a0ed51b3 1391 else
ffc61ed2
JH
1392 tmp = 1;
1393 s++;
a0ed51b3 1394 }
a0ed51b3
LW
1395 }
1396 break;
b8c5462f
JH
1397 case NDIGITL:
1398 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1399 if (do_utf8) {
1400 while (s < strend) {
1401 if (!isDIGIT_LC_utf8((U8*)s)) {
1402 if (tmp && (norun || regtry(prog, s)))
1403 goto got_it;
1404 else
1405 tmp = doevery;
1406 }
b8c5462f 1407 else
ffc61ed2
JH
1408 tmp = 1;
1409 s += UTF8SKIP(s);
b8c5462f 1410 }
a0ed51b3 1411 }
ffc61ed2
JH
1412 else {
1413 while (s < strend) {
1414 if (!isDIGIT_LC(*s)) {
1415 if (tmp && (norun || regtry(prog, s)))
1416 goto got_it;
1417 else
1418 tmp = doevery;
1419 }
cf93c79d 1420 else
ffc61ed2
JH
1421 tmp = 1;
1422 s++;
b8c5462f 1423 }
b8c5462f
JH
1424 }
1425 break;
b3c9acc1 1426 default:
3c3eec57
GS
1427 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1428 break;
d6a28714 1429 }
6eb5f6b9
JH
1430 return 0;
1431 got_it:
1432 return s;
1433}
1434
1435/*
1436 - regexec_flags - match a regexp against a string
1437 */
1438I32
1439Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1440 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1441/* strend: pointer to null at end of string */
1442/* strbeg: real beginning of string */
1443/* minend: end of match must be >=minend after stringarg. */
1444/* data: May be used for some additional optimizations. */
1445/* nosave: For optimizations. */
1446{
6eb5f6b9
JH
1447 register char *s;
1448 register regnode *c;
1449 register char *startpos = stringarg;
6eb5f6b9
JH
1450 I32 minlen; /* must match at least this many chars */
1451 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1452 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1453 constant substr. */ /* CC */
1454 I32 end_shift = 0; /* Same for the end. */ /* CC */
1455 I32 scream_pos = -1; /* Internal iterator of scream. */
1456 char *scream_olds;
1457 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1458 bool do_utf8 = DO_UTF8(sv);
2a782b5b
JH
1459#ifdef DEBUGGING
1460 SV *dsv = sv_2mortal(newSVpvn("", 0));
1461#endif
6eb5f6b9
JH
1462
1463 PL_regcc = 0;
1464
1465 cache_re(prog);
1466#ifdef DEBUGGING
aea4f609 1467 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1468#endif
1469
1470 /* Be paranoid... */
1471 if (prog == NULL || startpos == NULL) {
1472 Perl_croak(aTHX_ "NULL regexp parameter");
1473 return 0;
1474 }
1475
1476 minlen = prog->minlen;
f33976b4 1477 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1478 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1479 }
1480 else {
f33976b4 1481 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1482 }
6eb5f6b9 1483
6eb5f6b9
JH
1484 /* Check validity of program. */
1485 if (UCHARAT(prog->program) != REG_MAGIC) {
1486 Perl_croak(aTHX_ "corrupted regexp program");
1487 }
1488
1489 PL_reg_flags = 0;
1490 PL_reg_eval_set = 0;
1491 PL_reg_maxiter = 0;
1492
1493 if (prog->reganch & ROPT_UTF8)
1494 PL_reg_flags |= RF_utf8;
1495
1496 /* Mark beginning of line for ^ and lookbehind. */
1497 PL_regbol = startpos;
1498 PL_bostr = strbeg;
1499 PL_reg_sv = sv;
1500
1501 /* Mark end of line for $ (and such) */
1502 PL_regeol = strend;
1503
1504 /* see how far we have to get to not match where we matched before */
1505 PL_regtill = startpos+minend;
1506
1507 /* We start without call_cc context. */
1508 PL_reg_call_cc = 0;
1509
1510 /* If there is a "must appear" string, look for it. */
1511 s = startpos;
1512
1513 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1514 MAGIC *mg;
1515
1516 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1517 PL_reg_ganch = startpos;
1518 else if (sv && SvTYPE(sv) >= SVt_PVMG
1519 && SvMAGIC(sv)
14befaf4
DM
1520 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1521 && mg->mg_len >= 0) {
6eb5f6b9
JH
1522 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1523 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1524 if (s > PL_reg_ganch)
6eb5f6b9
JH
1525 goto phooey;
1526 s = PL_reg_ganch;
1527 }
1528 }
1529 else /* pos() not defined */
1530 PL_reg_ganch = strbeg;
1531 }
1532
699c3c34
JH
1533 if (do_utf8 == (UTF!=0) &&
1534 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1535 re_scream_pos_data d;
1536
1537 d.scream_olds = &scream_olds;
1538 d.scream_pos = &scream_pos;
1539 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1540 if (!s)
1541 goto phooey; /* not present */
1542 }
1543
2a782b5b 1544 DEBUG_r({
df1ffd02
JH
1545 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1546 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1547 if (!PL_colorset)
1548 reginitcolors();
1549 PerlIO_printf(Perl_debug_log,
1550 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1551 PL_colors[4],PL_colors[5],PL_colors[0],
1552 prog->precomp,
1553 PL_colors[1],
1554 (strlen(prog->precomp) > 60 ? "..." : ""),
1555 PL_colors[0],
1556 (int)(len > 60 ? 60 : len),
1557 s, PL_colors[1],
1558 (len > 60 ? "..." : "")
1559 );
1560 });
6eb5f6b9
JH
1561
1562 /* Simplest case: anchored match need be tried only once. */
1563 /* [unless only anchor is BOL and multiline is set] */
1564 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1565 if (s == startpos && regtry(prog, startpos))
1566 goto got_it;
1567 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1568 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1569 {
1570 char *end;
1571
1572 if (minlen)
1573 dontbother = minlen - 1;
1aa99e6b 1574 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1575 /* for multiline we only have to try after newlines */
1576 if (prog->check_substr) {
1577 if (s == startpos)
1578 goto after_try;
1579 while (1) {
1580 if (regtry(prog, s))
1581 goto got_it;
1582 after_try:
1583 if (s >= end)
1584 goto phooey;
1585 if (prog->reganch & RE_USE_INTUIT) {
1586 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1587 if (!s)
1588 goto phooey;
1589 }
1590 else
1591 s++;
1592 }
1593 } else {
1594 if (s > startpos)
1595 s--;
1596 while (s < end) {
1597 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1598 if (regtry(prog, s))
1599 goto got_it;
1600 }
1601 }
1602 }
1603 }
1604 goto phooey;
1605 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1606 if (regtry(prog, PL_reg_ganch))
1607 goto got_it;
1608 goto phooey;
1609 }
1610
1611 /* Messy cases: unanchored match. */
9041c2e3 1612 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1613 /* we have /x+whatever/ */
1614 /* it must be a one character string (XXXX Except UTF?) */
1615 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1616#ifdef DEBUGGING
1617 int did_match = 0;
1618#endif
1619
1aa99e6b 1620 if (do_utf8) {
6eb5f6b9
JH
1621 while (s < strend) {
1622 if (*s == ch) {
bf93d4cc 1623 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1624 if (regtry(prog, s)) goto got_it;
1625 s += UTF8SKIP(s);
1626 while (s < strend && *s == ch)
1627 s += UTF8SKIP(s);
1628 }
1629 s += UTF8SKIP(s);
1630 }
1631 }
1632 else {
1633 while (s < strend) {
1634 if (*s == ch) {
bf93d4cc 1635 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1636 if (regtry(prog, s)) goto got_it;
1637 s++;
1638 while (s < strend && *s == ch)
1639 s++;
1640 }
1641 s++;
1642 }
1643 }
b7953727 1644 DEBUG_r(if (!did_match)
bf93d4cc 1645 PerlIO_printf(Perl_debug_log,
b7953727
JH
1646 "Did not find anchored character...\n")
1647 );
6eb5f6b9
JH
1648 }
1649 /*SUPPRESS 560*/
1aa99e6b
IH
1650 else if (do_utf8 == (UTF!=0) &&
1651 (prog->anchored_substr != Nullsv
9041c2e3 1652 || (prog->float_substr != Nullsv
1aa99e6b 1653 && prog->float_max_offset < strend - s))) {
9041c2e3 1654 SV *must = prog->anchored_substr
6eb5f6b9 1655 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1656 I32 back_max =
6eb5f6b9 1657 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1658 I32 back_min =
6eb5f6b9 1659 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1660 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1661 -(I32)(CHR_SVLEN(must)
1aa99e6b 1662 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1663 char *last1; /* Last position checked before */
bf93d4cc
GS
1664#ifdef DEBUGGING
1665 int did_match = 0;
1666#endif
6eb5f6b9
JH
1667
1668 if (s > PL_bostr)
1669 last1 = HOPc(s, -1);
1670 else
1671 last1 = s - 1; /* bogus */
1672
1673 /* XXXX check_substr already used to find `s', can optimize if
1674 check_substr==must. */
1675 scream_pos = -1;
1676 dontbother = end_shift;
1677 strend = HOPc(strend, -dontbother);
1678 while ( (s <= last) &&
9041c2e3 1679 ((flags & REXEC_SCREAM)
1aa99e6b 1680 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1681 end_shift, &scream_pos, 0))
1aa99e6b 1682 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1683 (unsigned char*)strend, must,
6eb5f6b9 1684 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1685 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1686 if (HOPc(s, -back_max) > last1) {
1687 last1 = HOPc(s, -back_min);
1688 s = HOPc(s, -back_max);
1689 }
1690 else {
1691 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1692
1693 last1 = HOPc(s, -back_min);
1694 s = t;
1695 }
1aa99e6b 1696 if (do_utf8) {
6eb5f6b9
JH
1697 while (s <= last1) {
1698 if (regtry(prog, s))
1699 goto got_it;
1700 s += UTF8SKIP(s);
1701 }
1702 }
1703 else {
1704 while (s <= last1) {
1705 if (regtry(prog, s))
1706 goto got_it;
1707 s++;
1708 }
1709 }
1710 }
b7953727
JH
1711 DEBUG_r(if (!did_match)
1712 PerlIO_printf(Perl_debug_log,
1713 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1714 ((must == prog->anchored_substr)
1715 ? "anchored" : "floating"),
1716 PL_colors[0],
1717 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1718 SvPVX(must),
b7953727
JH
1719 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1720 );
6eb5f6b9
JH
1721 goto phooey;
1722 }
155aba94 1723 else if ((c = prog->regstclass)) {
66e933ab
GS
1724 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1725 /* don't bother with what can't match */
6eb5f6b9 1726 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1727 DEBUG_r({
1728 SV *prop = sv_newmortal();
1729 regprop(prop, c);
2a782b5b 1730 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1731 });
6eb5f6b9
JH
1732 if (find_byclass(prog, c, s, strend, startpos, 0))
1733 goto got_it;
bf93d4cc 1734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1735 }
1736 else {
1737 dontbother = 0;
1738 if (prog->float_substr != Nullsv) { /* Trim the end. */
1739 char *last;
d6a28714
JH
1740
1741 if (flags & REXEC_SCREAM) {
1742 last = screaminstr(sv, prog->float_substr, s - strbeg,
1743 end_shift, &scream_pos, 1); /* last one */
1744 if (!last)
ffc61ed2 1745 last = scream_olds; /* Only one occurrence. */
b8c5462f 1746 }
d6a28714
JH
1747 else {
1748 STRLEN len;
1749 char *little = SvPV(prog->float_substr, len);
1750
1751 if (SvTAIL(prog->float_substr)) {
1752 if (memEQ(strend - len + 1, little, len - 1))
1753 last = strend - len + 1;
1754 else if (!PL_multiline)
9041c2e3 1755 last = memEQ(strend - len, little, len)
d6a28714 1756 ? strend - len : Nullch;
b8c5462f 1757 else
d6a28714
JH
1758 goto find_last;
1759 } else {
1760 find_last:
9041c2e3 1761 if (len)
d6a28714 1762 last = rninstr(s, strend, little, little + len);
b8c5462f 1763 else
d6a28714 1764 last = strend; /* matching `$' */
b8c5462f 1765 }
b8c5462f 1766 }
bf93d4cc
GS
1767 if (last == NULL) {
1768 DEBUG_r(PerlIO_printf(Perl_debug_log,
1769 "%sCan't trim the tail, match fails (should not happen)%s\n",
1770 PL_colors[4],PL_colors[5]));
1771 goto phooey; /* Should not happen! */
1772 }
d6a28714
JH
1773 dontbother = strend - last + prog->float_min_offset;
1774 }
1775 if (minlen && (dontbother < minlen))
1776 dontbother = minlen - 1;
1777 strend -= dontbother; /* this one's always in bytes! */
1778 /* We don't know much -- general case. */
1aa99e6b 1779 if (do_utf8) {
d6a28714
JH
1780 for (;;) {
1781 if (regtry(prog, s))
1782 goto got_it;
1783 if (s >= strend)
1784 break;
b8c5462f 1785 s += UTF8SKIP(s);
d6a28714
JH
1786 };
1787 }
1788 else {
1789 do {
1790 if (regtry(prog, s))
1791 goto got_it;
1792 } while (s++ < strend);
1793 }
1794 }
1795
1796 /* Failure. */
1797 goto phooey;
1798
1799got_it:
1800 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1801
1802 if (PL_reg_eval_set) {
1803 /* Preserve the current value of $^R */
1804 if (oreplsv != GvSV(PL_replgv))
1805 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1806 restored, the value remains
1807 the same. */
acfe0abc 1808 restore_pos(aTHX_ 0);
d6a28714
JH
1809 }
1810
1811 /* make sure $`, $&, $', and $digit will work later */
1812 if ( !(flags & REXEC_NOT_FIRST) ) {
1813 if (RX_MATCH_COPIED(prog)) {
1814 Safefree(prog->subbeg);
1815 RX_MATCH_COPIED_off(prog);
1816 }
1817 if (flags & REXEC_COPY_STR) {
1818 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1819
1820 s = savepvn(strbeg, i);
1821 prog->subbeg = s;
1822 prog->sublen = i;
1823 RX_MATCH_COPIED_on(prog);
1824 }
1825 else {
1826 prog->subbeg = strbeg;
1827 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1828 }
1829 }
9041c2e3 1830
d6a28714
JH
1831 return 1;
1832
1833phooey:
bf93d4cc
GS
1834 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1835 PL_colors[4],PL_colors[5]));
d6a28714 1836 if (PL_reg_eval_set)
acfe0abc 1837 restore_pos(aTHX_ 0);
d6a28714
JH
1838 return 0;
1839}
1840
1841/*
1842 - regtry - try match at specific point
1843 */
1844STATIC I32 /* 0 failure, 1 success */
1845S_regtry(pTHX_ regexp *prog, char *startpos)
1846{
d6a28714
JH
1847 register I32 i;
1848 register I32 *sp;
1849 register I32 *ep;
1850 CHECKPOINT lastcp;
1851
02db2b7b
IZ
1852#ifdef DEBUGGING
1853 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1854#endif
d6a28714
JH
1855 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1856 MAGIC *mg;
1857
1858 PL_reg_eval_set = RS_init;
1859 DEBUG_r(DEBUG_s(
b900a521
JH
1860 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1861 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1862 ));
e8347627 1863 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1864 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1865 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1866 SAVETMPS;
1867 /* Apparently this is not needed, judging by wantarray. */
e8347627 1868 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1869 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1870
1871 if (PL_reg_sv) {
1872 /* Make $_ available to executed code. */
1873 if (PL_reg_sv != DEFSV) {
4d1ff10f 1874 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1875 SAVESPTR(DEFSV);
1876 DEFSV = PL_reg_sv;
b8c5462f 1877 }
d6a28714 1878
9041c2e3 1879 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1880 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1881 /* prepare for quick setting of pos */
14befaf4
DM
1882 sv_magic(PL_reg_sv, (SV*)0,
1883 PERL_MAGIC_regex_global, Nullch, 0);
1884 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1885 mg->mg_len = -1;
b8c5462f 1886 }
d6a28714
JH
1887 PL_reg_magic = mg;
1888 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1889 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1890 }
09687e5a 1891 if (!PL_reg_curpm) {
0f79a09d 1892 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1893#ifdef USE_ITHREADS
1894 {
1895 SV* repointer = newSViv(0);
577e12cc 1896 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1897 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1898 av_push(PL_regex_padav,repointer);
1899 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1900 PL_regex_pad = AvARRAY(PL_regex_padav);
1901 }
1902#endif
1903 }
aaa362c4 1904 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1905 PL_reg_oldcurpm = PL_curpm;
1906 PL_curpm = PL_reg_curpm;
1907 if (RX_MATCH_COPIED(prog)) {
1908 /* Here is a serious problem: we cannot rewrite subbeg,
1909 since it may be needed if this match fails. Thus
1910 $` inside (?{}) could fail... */
1911 PL_reg_oldsaved = prog->subbeg;
1912 PL_reg_oldsavedlen = prog->sublen;
1913 RX_MATCH_COPIED_off(prog);
1914 }
1915 else
1916 PL_reg_oldsaved = Nullch;
1917 prog->subbeg = PL_bostr;
1918 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1919 }
1920 prog->startp[0] = startpos - PL_bostr;
1921 PL_reginput = startpos;
1922 PL_regstartp = prog->startp;
1923 PL_regendp = prog->endp;
1924 PL_reglastparen = &prog->lastparen;
a01268b5 1925 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1926 prog->lastparen = 0;
1927 PL_regsize = 0;
1928 DEBUG_r(PL_reg_starttry = startpos);
1929 if (PL_reg_start_tmpl <= prog->nparens) {
1930 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1931 if(PL_reg_start_tmp)
1932 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1933 else
1934 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1935 }
1936
1937 /* XXXX What this code is doing here?!!! There should be no need
1938 to do this again and again, PL_reglastparen should take care of
3dd2943c 1939 this! --ilya*/
dafc8851
JH
1940
1941 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1942 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1943 * PL_reglastparen), is not needed at all by the test suite
1944 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1945 * enough, for building DynaLoader, or otherwise this
1946 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1947 * will happen. Meanwhile, this code *is* needed for the
1948 * above-mentioned test suite tests to succeed. The common theme
1949 * on those tests seems to be returning null fields from matches.
1950 * --jhi */
dafc8851 1951#if 1
d6a28714
JH
1952 sp = prog->startp;
1953 ep = prog->endp;
1954 if (prog->nparens) {
09e8ae3b 1955 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1956 *++sp = -1;
1957 *++ep = -1;
1958 }
1959 }
dafc8851 1960#endif
02db2b7b 1961 REGCP_SET(lastcp);
d6a28714
JH
1962 if (regmatch(prog->program + 1)) {
1963 prog->endp[0] = PL_reginput - PL_bostr;
1964 return 1;
1965 }
02db2b7b 1966 REGCP_UNWIND(lastcp);
d6a28714
JH
1967 return 0;
1968}
1969
02db2b7b
IZ
1970#define RE_UNWIND_BRANCH 1
1971#define RE_UNWIND_BRANCHJ 2
1972
1973union re_unwind_t;
1974
1975typedef struct { /* XX: makes sense to enlarge it... */
1976 I32 type;
1977 I32 prev;
1978 CHECKPOINT lastcp;
1979} re_unwind_generic_t;
1980
1981typedef struct {
1982 I32 type;
1983 I32 prev;
1984 CHECKPOINT lastcp;
1985 I32 lastparen;
1986 regnode *next;
1987 char *locinput;
1988 I32 nextchr;
1989#ifdef DEBUGGING
1990 int regindent;
1991#endif
1992} re_unwind_branch_t;
1993
1994typedef union re_unwind_t {
1995 I32 type;
1996 re_unwind_generic_t generic;
1997 re_unwind_branch_t branch;
1998} re_unwind_t;
1999
8ba1375e
MJD
2000#define sayYES goto yes
2001#define sayNO goto no
2002#define sayYES_FINAL goto yes_final
2003#define sayYES_LOUD goto yes_loud
2004#define sayNO_FINAL goto no_final
2005#define sayNO_SILENT goto do_no
2006#define saySAME(x) if (x) goto yes; else goto no
2007
2008#define REPORT_CODE_OFF 24
2009
d6a28714
JH
2010/*
2011 - regmatch - main matching routine
2012 *
2013 * Conceptually the strategy is simple: check to see whether the current
2014 * node matches, call self recursively to see whether the rest matches,
2015 * and then act accordingly. In practice we make some effort to avoid
2016 * recursion, in particular by going through "ordinary" nodes (that don't
2017 * need to know whether the rest of the match failed) by a loop instead of
2018 * by recursion.
2019 */
2020/* [lwall] I've hoisted the register declarations to the outer block in order to
2021 * maybe save a little bit of pushing and popping on the stack. It also takes
2022 * advantage of machines that use a register save mask on subroutine entry.
2023 */
2024STATIC I32 /* 0 failure, 1 success */
2025S_regmatch(pTHX_ regnode *prog)
2026{
d6a28714
JH
2027 register regnode *scan; /* Current node. */
2028 regnode *next; /* Next node. */
2029 regnode *inner; /* Next node in internal branch. */
2030 register I32 nextchr; /* renamed nextchr - nextchar colides with
2031 function of same name */
2032 register I32 n; /* no or next */
b7953727
JH
2033 register I32 ln = 0; /* len or last */
2034 register char *s = Nullch; /* operand or save */
d6a28714 2035 register char *locinput = PL_reginput;
b7953727 2036 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2037 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2038 I32 unwind = 0;
b7953727 2039#if 0
02db2b7b 2040 I32 firstcp = PL_savestack_ix;
b7953727 2041#endif
53c4c00c 2042 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b
JH
2043#ifdef DEBUGGING
2044 SV *dsv0 = sv_2mortal(newSVpvn("", 0));
2045 SV *dsv1 = sv_2mortal(newSVpvn("", 0));
2046 SV *dsv2 = sv_2mortal(newSVpvn("", 0));
2047#endif
02db2b7b 2048
d6a28714
JH
2049#ifdef DEBUGGING
2050 PL_regindent++;
2051#endif
2052
2053 /* Note that nextchr is a byte even in UTF */
2054 nextchr = UCHARAT(locinput);
2055 scan = prog;
2056 while (scan != NULL) {
8ba1375e 2057
2a782b5b 2058 DEBUG_r( {
d6a28714
JH
2059 SV *prop = sv_newmortal();
2060 int docolor = *PL_colors[0];
2061 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2062 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2063 /* The part of the string before starttry has one color
2064 (pref0_len chars), between starttry and current
2065 position another one (pref_len - pref0_len chars),
2066 after the current position the third one.
2067 We assume that pref0_len <= pref_len, otherwise we
2068 decrease pref0_len. */
9041c2e3 2069 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2070 ? (5 + taill) - l : locinput - PL_bostr;
2071 int pref0_len;
d6a28714 2072
df1ffd02 2073 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2074 pref_len++;
2075 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2076 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2077 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2078 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2079 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2080 l--;
d6a28714
JH
2081 if (pref0_len < 0)
2082 pref0_len = 0;
2083 if (pref0_len > pref_len)
2084 pref0_len = pref_len;
2085 regprop(prop, scan);
2a782b5b
JH
2086 {
2087 char *s0 =
df1ffd02 2088 do_utf8 ?
2a782b5b
JH
2089 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2090 pref0_len, 60, 0) :
2091 locinput - pref_len;
df1ffd02
JH
2092 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2093 char *s1 = do_utf8 ?
2a782b5b
JH
2094 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2095 pref_len - pref0_len, 60, 0) :
2096 locinput - pref_len + pref0_len;
df1ffd02
JH
2097 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2098 char *s2 = do_utf8 ?
2a782b5b
JH
2099 pv_uni_display(dsv2, (U8*)locinput,
2100 PL_regeol - locinput, 60, 0) :
2101 locinput;
df1ffd02 2102 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2103 PerlIO_printf(Perl_debug_log,
2104 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2105 (IV)(locinput - PL_bostr),
2106 PL_colors[4],
2107 len0, s0,
2108 PL_colors[5],
2109 PL_colors[2],
2110 len1, s1,
2111 PL_colors[3],
2112 (docolor ? "" : "> <"),
2113 PL_colors[0],
2114 len2, s2,
2115 PL_colors[1],
2116 15 - l - pref_len + 1,
2117 "",
2118 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2119 SvPVX(prop));
2120 }
2121 });
d6a28714
JH
2122
2123 next = scan + NEXT_OFF(scan);
2124 if (next == scan)
2125 next = NULL;
2126
2127 switch (OP(scan)) {
2128 case BOL:
12d33761
HS
2129 if (locinput == PL_bostr || (PL_multiline &&
2130 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2131 {
2132 /* regtill = regbol; */
b8c5462f
JH
2133 break;
2134 }
d6a28714
JH
2135 sayNO;
2136 case MBOL:
12d33761
HS
2137 if (locinput == PL_bostr ||
2138 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2139 {
b8c5462f
JH
2140 break;
2141 }
d6a28714
JH
2142 sayNO;
2143 case SBOL:
c2a73568 2144 if (locinput == PL_bostr)
b8c5462f 2145 break;
d6a28714
JH
2146 sayNO;
2147 case GPOS:
2148 if (locinput == PL_reg_ganch)
2149 break;
2150 sayNO;
2151 case EOL:
2152 if (PL_multiline)
2153 goto meol;
2154 else
2155 goto seol;
2156 case MEOL:
2157 meol:
2158 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2159 sayNO;
b8c5462f 2160 break;
d6a28714
JH
2161 case SEOL:
2162 seol:
2163 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2164 sayNO;
d6a28714 2165 if (PL_regeol - locinput > 1)
b8c5462f 2166 sayNO;
b8c5462f 2167 break;
d6a28714
JH
2168 case EOS:
2169 if (PL_regeol != locinput)
b8c5462f 2170 sayNO;
d6a28714 2171 break;
ffc61ed2 2172 case SANY:
d6a28714 2173 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2174 sayNO;
f33976b4
DB
2175 if (do_utf8) {
2176 locinput += PL_utf8skip[nextchr];
2177 if (locinput > PL_regeol)
2178 sayNO;
2179 nextchr = UCHARAT(locinput);
2180 }
2181 else
2182 nextchr = UCHARAT(++locinput);
2183 break;
2184 case CANY:
2185 if (!nextchr && locinput >= PL_regeol)
2186 sayNO;
b8c5462f 2187 nextchr = UCHARAT(++locinput);
a0d0e21e 2188 break;
ffc61ed2 2189 case REG_ANY:
1aa99e6b
IH
2190 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2191 sayNO;
2192 if (do_utf8) {
b8c5462f 2193 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2194 if (locinput > PL_regeol)
2195 sayNO;
a0ed51b3 2196 nextchr = UCHARAT(locinput);
a0ed51b3 2197 }
1aa99e6b
IH
2198 else
2199 nextchr = UCHARAT(++locinput);
a0ed51b3 2200 break;
d6a28714 2201 case EXACT:
cd439c50
IZ
2202 s = STRING(scan);
2203 ln = STR_LEN(scan);
1aa99e6b
IH
2204 if (do_utf8 != (UTF!=0)) {
2205 char *l = locinput;
2206 char *e = s + ln;
2207 STRLEN len;
2208 if (do_utf8)
2209 while (s < e) {
2210 if (l >= PL_regeol)
2211 sayNO;
9041c2e3 2212 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b 2213 sayNO;
cf27e83c 2214 s += len;
1aa99e6b
IH
2215 l += len;
2216 }
2217 else
2218 while (s < e) {
2219 if (l >= PL_regeol)
2220 sayNO;
9041c2e3 2221 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b 2222 sayNO;
cf27e83c 2223 s++;
1aa99e6b
IH
2224 l++;
2225 }
2226 locinput = l;
2227 nextchr = UCHARAT(locinput);
2228 break;
2229 }
d6a28714
JH
2230 /* Inline the first character, for speed. */
2231 if (UCHARAT(s) != nextchr)
2232 sayNO;
2233 if (PL_regeol - locinput < ln)
2234 sayNO;
2235 if (ln > 1 && memNE(s, locinput, ln))
2236 sayNO;
2237 locinput += ln;
2238 nextchr = UCHARAT(locinput);
2239 break;
2240 case EXACTFL:
b8c5462f
JH
2241 PL_reg_flags |= RF_tainted;
2242 /* FALL THROUGH */
d6a28714 2243 case EXACTF:
cd439c50
IZ
2244 s = STRING(scan);
2245 ln = STR_LEN(scan);
d6a28714 2246
1aa99e6b 2247 if (do_utf8) {
d6a28714 2248 char *l = locinput;
1aa99e6b 2249 char *e;
a2a2844f
JH
2250 STRLEN ulen;
2251 U8 tmpbuf[UTF8_MAXLEN*2+1];
1aa99e6b 2252 e = s + ln;
d6a28714 2253 while (s < e) {
a2a2844f 2254 if (l >= PL_regeol)
d6a28714 2255 sayNO;
a2a2844f 2256 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
7114a2d2 2257 if (memNE(s, (char*)tmpbuf, ulen))
a2a2844f
JH
2258 sayNO;
2259 s += UTF8SKIP(s);
2260 l += ulen;
b8c5462f 2261 }
d6a28714 2262 locinput = l;
a0ed51b3
LW
2263 nextchr = UCHARAT(locinput);
2264 break;
2265 }
d6a28714
JH
2266
2267 /* Inline the first character, for speed. */
2268 if (UCHARAT(s) != nextchr &&
2269 UCHARAT(s) != ((OP(scan) == EXACTF)
2270 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2271 sayNO;
d6a28714 2272 if (PL_regeol - locinput < ln)
b8c5462f 2273 sayNO;
d6a28714
JH
2274 if (ln > 1 && (OP(scan) == EXACTF
2275 ? ibcmp(s, locinput, ln)
2276 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2277 sayNO;
d6a28714
JH
2278 locinput += ln;
2279 nextchr = UCHARAT(locinput);
a0d0e21e 2280 break;
d6a28714 2281 case ANYOF:
ffc61ed2
JH
2282 if (do_utf8) {
2283 if (!reginclass(scan, (U8*)locinput, do_utf8))
2284 sayNO;
2285 if (locinput >= PL_regeol)
2286 sayNO;
2287 locinput += PL_utf8skip[nextchr];
b8c5462f 2288 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2289 }
2290 else {
2291 if (nextchr < 0)
2292 nextchr = UCHARAT(locinput);
2293 if (!reginclass(scan, (U8*)locinput, do_utf8))
2294 sayNO;
2295 if (!nextchr && locinput >= PL_regeol)
2296 sayNO;
2297 nextchr = UCHARAT(++locinput);
2298 }
b8c5462f 2299 break;
d6a28714 2300 case ALNUML:
b8c5462f
JH
2301 PL_reg_flags |= RF_tainted;
2302 /* FALL THROUGH */
d6a28714 2303 case ALNUM:
b8c5462f 2304 if (!nextchr)
4633a7c4 2305 sayNO;
ffc61ed2 2306 if (do_utf8) {
ad24be35 2307 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2308 if (!(OP(scan) == ALNUM
3568d838 2309 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2310 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2311 {
2312 sayNO;
a0ed51b3 2313 }
b8c5462f 2314 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2315 nextchr = UCHARAT(locinput);
2316 break;
2317 }
ffc61ed2 2318 if (!(OP(scan) == ALNUM
d6a28714 2319 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2320 sayNO;
b8c5462f 2321 nextchr = UCHARAT(++locinput);
a0d0e21e 2322 break;
d6a28714 2323 case NALNUML:
b8c5462f
JH
2324 PL_reg_flags |= RF_tainted;
2325 /* FALL THROUGH */
d6a28714
JH
2326 case NALNUM:
2327 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2328 sayNO;
ffc61ed2 2329 if (do_utf8) {
8269fa76 2330 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2331 if (OP(scan) == NALNUM
3568d838 2332 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2333 : isALNUM_LC_utf8((U8*)locinput))
2334 {
b8c5462f 2335 sayNO;
d6a28714 2336 }
b8c5462f
JH
2337 locinput += PL_utf8skip[nextchr];
2338 nextchr = UCHARAT(locinput);
2339 break;
2340 }
ffc61ed2 2341 if (OP(scan) == NALNUM
d6a28714 2342 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2343 sayNO;
76e3520e 2344 nextchr = UCHARAT(++locinput);
a0d0e21e 2345 break;
d6a28714
JH
2346 case BOUNDL:
2347 case NBOUNDL:
3280af22 2348 PL_reg_flags |= RF_tainted;
bbce6d69 2349 /* FALL THROUGH */
d6a28714
JH
2350 case BOUND:
2351 case NBOUND:
2352 /* was last char in word? */
ffc61ed2 2353 if (do_utf8) {
12d33761
HS
2354 if (locinput == PL_bostr)
2355 ln = '\n';
ffc61ed2
JH
2356 else {
2357 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2358
2b9d42f0 2359 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2360 }
2361 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2362 ln = isALNUM_uni(ln);
8269fa76 2363 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2364 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2365 }
2366 else {
9041c2e3 2367 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2368 n = isALNUM_LC_utf8((U8*)locinput);
2369 }
a0ed51b3 2370 }
d6a28714 2371 else {
12d33761
HS
2372 ln = (locinput != PL_bostr) ?
2373 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2374 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2375 ln = isALNUM(ln);
2376 n = isALNUM(nextchr);
2377 }
2378 else {
2379 ln = isALNUM_LC(ln);
2380 n = isALNUM_LC(nextchr);
2381 }
d6a28714 2382 }
ffc61ed2
JH
2383 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2384 OP(scan) == BOUNDL))
2385 sayNO;
a0ed51b3 2386 break;
d6a28714 2387 case SPACEL:
3280af22 2388 PL_reg_flags |= RF_tainted;
bbce6d69 2389 /* FALL THROUGH */
d6a28714 2390 case SPACE:
9442cb0e 2391 if (!nextchr)
4633a7c4 2392 sayNO;
1aa99e6b 2393 if (do_utf8) {
fd400ab9 2394 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2395 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2396 if (!(OP(scan) == SPACE
3568d838 2397 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2398 : isSPACE_LC_utf8((U8*)locinput)))
2399 {
2400 sayNO;
2401 }
2402 locinput += PL_utf8skip[nextchr];
2403 nextchr = UCHARAT(locinput);
2404 break;
d6a28714 2405 }
ffc61ed2
JH
2406 if (!(OP(scan) == SPACE
2407 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2408 sayNO;
2409 nextchr = UCHARAT(++locinput);
2410 }
2411 else {
2412 if (!(OP(scan) == SPACE
2413 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2414 sayNO;
2415 nextchr = UCHARAT(++locinput);
a0ed51b3 2416 }
a0ed51b3 2417 break;
d6a28714 2418 case NSPACEL:
3280af22 2419 PL_reg_flags |= RF_tainted;
bbce6d69 2420 /* FALL THROUGH */
d6a28714 2421 case NSPACE:
9442cb0e 2422 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2423 sayNO;
1aa99e6b 2424 if (do_utf8) {
8269fa76 2425 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2426 if (OP(scan) == NSPACE
3568d838 2427 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2428 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2429 {
2430 sayNO;
2431 }
2432 locinput += PL_utf8skip[nextchr];
2433 nextchr = UCHARAT(locinput);
2434 break;
a0ed51b3 2435 }
ffc61ed2 2436 if (OP(scan) == NSPACE
d6a28714 2437 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2438 sayNO;
76e3520e 2439 nextchr = UCHARAT(++locinput);
a0d0e21e 2440 break;
d6a28714 2441 case DIGITL:
a0ed51b3
LW
2442 PL_reg_flags |= RF_tainted;
2443 /* FALL THROUGH */
d6a28714 2444 case DIGIT:
9442cb0e 2445 if (!nextchr)
a0ed51b3 2446 sayNO;
1aa99e6b 2447 if (do_utf8) {
8269fa76 2448 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2449 if (!(OP(scan) == DIGIT
3568d838 2450 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2451 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2452 {
a0ed51b3 2453 sayNO;
dfe13c55 2454 }
6f06b55f 2455 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2456 nextchr = UCHARAT(locinput);
2457 break;
2458 }
ffc61ed2 2459 if (!(OP(scan) == DIGIT
9442cb0e 2460 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2461 sayNO;
2462 nextchr = UCHARAT(++locinput);
2463 break;
d6a28714 2464 case NDIGITL:
b8c5462f
JH
2465 PL_reg_flags |= RF_tainted;
2466 /* FALL THROUGH */
d6a28714 2467 case NDIGIT:
9442cb0e 2468 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2469 sayNO;
1aa99e6b 2470 if (do_utf8) {
8269fa76 2471 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2472 if (OP(scan) == NDIGIT
3568d838 2473 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2474 : isDIGIT_LC_utf8((U8*)locinput))
2475 {
a0ed51b3 2476 sayNO;
9442cb0e 2477 }
6f06b55f 2478 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2479 nextchr = UCHARAT(locinput);
2480 break;
2481 }
ffc61ed2 2482 if (OP(scan) == NDIGIT
9442cb0e 2483 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2484 sayNO;
2485 nextchr = UCHARAT(++locinput);
2486 break;
2487 case CLUMP:
8269fa76 2488 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2489 if (locinput >= PL_regeol ||
2490 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2491 sayNO;
6f06b55f 2492 locinput += PL_utf8skip[nextchr];
3568d838
JH
2493 while (locinput < PL_regeol &&
2494 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2495 locinput += UTF8SKIP(locinput);
2496 if (locinput > PL_regeol)
2497 sayNO;
2498 nextchr = UCHARAT(locinput);
2499 break;
c8756f30 2500 case REFFL:
3280af22 2501 PL_reg_flags |= RF_tainted;
c8756f30 2502 /* FALL THROUGH */
c277df42 2503 case REF:
c8756f30 2504 case REFF:
c277df42 2505 n = ARG(scan); /* which paren pair */
cf93c79d 2506 ln = PL_regstartp[n];
2c2d71f5 2507 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2508 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2509 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2510 if (ln == PL_regendp[n])
a0d0e21e 2511 break;
a0ed51b3 2512
cf93c79d 2513 s = PL_bostr + ln;
1aa99e6b 2514 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2515 char *l = locinput;
cf93c79d 2516 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2517 /*
2518 * Note that we can't do the "other character" lookup trick as
2519 * in the 8-bit case (no pun intended) because in Unicode we
2520 * have to map both upper and title case to lower case.
2521 */
2522 if (OP(scan) == REFF) {
a2a2844f
JH
2523 STRLEN ulen1, ulen2;
2524 U8 tmpbuf1[UTF8_MAXLEN*2+1];
2525 U8 tmpbuf2[UTF8_MAXLEN*2+1];
a0ed51b3
LW
2526 while (s < e) {
2527 if (l >= PL_regeol)
2528 sayNO;
a2a2844f
JH
2529 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2530 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2531 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2532 sayNO;
a2a2844f
JH
2533 s += ulen1;
2534 l += ulen2;
a0ed51b3
LW
2535 }
2536 }
2537 locinput = l;
2538 nextchr = UCHARAT(locinput);
2539 break;
2540 }
2541
a0d0e21e 2542 /* Inline the first character, for speed. */
76e3520e 2543 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2544 (OP(scan) == REF ||
2545 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2546 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2547 sayNO;
cf93c79d 2548 ln = PL_regendp[n] - ln;
3280af22 2549 if (locinput + ln > PL_regeol)
4633a7c4 2550 sayNO;
c8756f30
AK
2551 if (ln > 1 && (OP(scan) == REF
2552 ? memNE(s, locinput, ln)
2553 : (OP(scan) == REFF
2554 ? ibcmp(s, locinput, ln)
2555 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2556 sayNO;
a0d0e21e 2557 locinput += ln;
76e3520e 2558 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2559 break;
2560
2561 case NOTHING:
c277df42 2562 case TAIL:
a0d0e21e
LW
2563 break;
2564 case BACK:
2565 break;
c277df42
IZ
2566 case EVAL:
2567 {
2568 dSP;
533c011a 2569 OP_4tree *oop = PL_op;
3280af22
NIS
2570 COP *ocurcop = PL_curcop;
2571 SV **ocurpad = PL_curpad;
c277df42 2572 SV *ret;
9041c2e3 2573
c277df42 2574 n = ARG(scan);
533c011a 2575 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2576 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2577 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2578 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2579
8e5e9ebe
RGS
2580 {
2581 SV **before = SP;
2582 CALLRUNOPS(aTHX); /* Scalar context. */
2583 SPAGAIN;
2584 if (SP == before)
2585 ret = Nullsv; /* protect against empty (?{}) blocks. */
2586 else {
2587 ret = POPs;
2588 PUTBACK;
2589 }
2590 }
2591
0f5d15d6
IZ
2592 PL_op = oop;
2593 PL_curpad = ocurpad;
2594 PL_curcop = ocurcop;
c277df42 2595 if (logical) {
0f5d15d6
IZ
2596 if (logical == 2) { /* Postponed subexpression. */
2597 regexp *re;
22c35a8c 2598 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2599 re_cc_state state;
0f5d15d6
IZ
2600 CHECKPOINT cp, lastcp;
2601
2602 if(SvROK(ret) || SvRMAGICAL(ret)) {
2603 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2604
2605 if(SvMAGICAL(sv))
14befaf4 2606 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2607 }
2608 if (mg) {
2609 re = (regexp *)mg->mg_obj;
df0003d4 2610 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2611 }
2612 else {
2613 STRLEN len;
2614 char *t = SvPV(ret, len);
2615 PMOP pm;
2616 char *oprecomp = PL_regprecomp;
2617 I32 osize = PL_regsize;
2618 I32 onpar = PL_regnpar;
2619
5fcd1c1b 2620 Zero(&pm, 1, PMOP);
cea2e8a9 2621 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2622 if (!(SvFLAGS(ret)
0f5d15d6 2623 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2624 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2625 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2626 PL_regprecomp = oprecomp;
2627 PL_regsize = osize;
2628 PL_regnpar = onpar;
2629 }
2630 DEBUG_r(
9041c2e3 2631 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2632 "Entering embedded `%s%.60s%s%s'\n",
2633 PL_colors[0],
2634 re->precomp,
2635 PL_colors[1],
2636 (strlen(re->precomp) > 60 ? "..." : ""))
2637 );
2638 state.node = next;
2639 state.prev = PL_reg_call_cc;
2640 state.cc = PL_regcc;
2641 state.re = PL_reg_re;
2642
2ab05381 2643 PL_regcc = 0;
9041c2e3 2644
0f5d15d6 2645 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2646 REGCP_SET(lastcp);
0f5d15d6
IZ
2647 cache_re(re);
2648 state.ss = PL_savestack_ix;
2649 *PL_reglastparen = 0;
a01268b5 2650 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2651 PL_reg_call_cc = &state;
2652 PL_reginput = locinput;
2c2d71f5
JH
2653
2654 /* XXXX This is too dramatic a measure... */
2655 PL_reg_maxiter = 0;
2656
0f5d15d6 2657 if (regmatch(re->program + 1)) {
2c914db6
IZ
2658 /* Even though we succeeded, we need to restore
2659 global variables, since we may be wrapped inside
2660 SUSPEND, thus the match may be not finished yet. */
2661
2662 /* XXXX Do this only if SUSPENDed? */
2663 PL_reg_call_cc = state.prev;
2664 PL_regcc = state.cc;
2665 PL_reg_re = state.re;
2666 cache_re(PL_reg_re);
2667
2668 /* XXXX This is too dramatic a measure... */
2669 PL_reg_maxiter = 0;
2670
2671 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2672 ReREFCNT_dec(re);
2673 regcpblow(cp);
2674 sayYES;
2675 }
0f5d15d6 2676 ReREFCNT_dec(re);
02db2b7b 2677 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2678 regcppop();
2679 PL_reg_call_cc = state.prev;
2680 PL_regcc = state.cc;
2681 PL_reg_re = state.re;
d3790889 2682 cache_re(PL_reg_re);
2c2d71f5
JH
2683
2684 /* XXXX This is too dramatic a measure... */
2685 PL_reg_maxiter = 0;
2686
8e514ae6 2687 logical = 0;
0f5d15d6
IZ
2688 sayNO;
2689 }
c277df42 2690 sw = SvTRUE(ret);
0f5d15d6 2691 logical = 0;
a0ed51b3
LW
2692 }
2693 else
3280af22 2694 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2695 break;
2696 }
a0d0e21e 2697 case OPEN:
c277df42 2698 n = ARG(scan); /* which paren pair */
3280af22
NIS
2699 PL_reg_start_tmp[n] = locinput;
2700 if (n > PL_regsize)
2701 PL_regsize = n;
a0d0e21e
LW
2702 break;
2703 case CLOSE:
c277df42 2704 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2705 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2706 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2707 if (n > *PL_reglastparen)
2708 *PL_reglastparen = n;
a01268b5 2709 *PL_reglastcloseparen = n;
a0d0e21e 2710 break;
c277df42
IZ
2711 case GROUPP:
2712 n = ARG(scan); /* which paren pair */
cf93c79d 2713 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2714 break;
2715 case IFTHEN:
2c2d71f5 2716 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2717 if (sw)
2718 next = NEXTOPER(NEXTOPER(scan));
2719 else {
2720 next = scan + ARG(scan);
2721 if (OP(next) == IFTHEN) /* Fake one. */
2722 next = NEXTOPER(NEXTOPER(next));
2723 }
2724 break;
2725 case LOGICAL:
0f5d15d6 2726 logical = scan->flags;
c277df42 2727 break;
2ab05381
IZ
2728/*******************************************************************
2729 PL_regcc contains infoblock about the innermost (...)* loop, and
2730 a pointer to the next outer infoblock.
2731
2732 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2733
2734 1) After matching X, regnode for CURLYX is processed;
2735
9041c2e3 2736 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2737 regmatch() recursively with the starting point at WHILEM node;
2738
2739 3) Each hit of WHILEM node tries to match A and Z (in the order
2740 depending on the current iteration, min/max of {min,max} and
2741 greediness). The information about where are nodes for "A"
2742 and "Z" is read from the infoblock, as is info on how many times "A"
2743 was already matched, and greediness.
2744
2745 4) After A matches, the same WHILEM node is hit again.
2746
2747 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2748 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2749 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2750 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2751 of the external loop.
2752
2753 Currently present infoblocks form a tree with a stem formed by PL_curcc
2754 and whatever it mentions via ->next, and additional attached trees
2755 corresponding to temporarily unset infoblocks as in "5" above.
2756
9041c2e3 2757 In the following picture infoblocks for outer loop of
2ab05381
IZ
2758 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2759 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2760 infoblocks are drawn below the "reset" infoblock.
2761
2762 In fact in the picture below we do not show failed matches for Z and T
2763 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2764 more obvious *why* one needs to *temporary* unset infoblocks.]
2765
2766 Matched REx position InfoBlocks Comment
2767 (Y(A)*?Z)*?T x
2768 Y(A)*?Z)*?T x <- O
2769 Y (A)*?Z)*?T x <- O
2770 Y A)*?Z)*?T x <- O <- I
2771 YA )*?Z)*?T x <- O <- I
2772 YA A)*?Z)*?T x <- O <- I
2773 YAA )*?Z)*?T x <- O <- I
2774 YAA Z)*?T x <- O # Temporary unset I
2775 I
2776
2777 YAAZ Y(A)*?Z)*?T x <- O
2778 I
2779
2780 YAAZY (A)*?Z)*?T x <- O
2781 I
2782
2783 YAAZY A)*?Z)*?T x <- O <- I
2784 I
2785
2786 YAAZYA )*?Z)*?T x <- O <- I
2787 I
2788
2789 YAAZYA Z)*?T x <- O # Temporary unset I
2790 I,I
2791
2792 YAAZYAZ )*?T x <- O
2793 I,I
2794
2795 YAAZYAZ T x # Temporary unset O
2796 O
2797 I,I
2798
2799 YAAZYAZT x
2800 O
2801 I,I
2802 *******************************************************************/
a0d0e21e
LW
2803 case CURLYX: {
2804 CURCUR cc;
3280af22 2805 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2806 /* No need to save/restore up to this paren */
2807 I32 parenfloor = scan->flags;
c277df42
IZ
2808
2809 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2810 next += ARG(next);
3280af22
NIS
2811 cc.oldcc = PL_regcc;
2812 PL_regcc = &cc;
cb434fcc
IZ
2813 /* XXXX Probably it is better to teach regpush to support
2814 parenfloor > PL_regsize... */
2815 if (parenfloor > *PL_reglastparen)
2816 parenfloor = *PL_reglastparen; /* Pessimization... */
2817 cc.parenfloor = parenfloor;
a0d0e21e
LW
2818 cc.cur = -1;
2819 cc.min = ARG1(scan);
2820 cc.max = ARG2(scan);
c277df42 2821 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2822 cc.next = next;
2823 cc.minmod = minmod;
2824 cc.lastloc = 0;
3280af22 2825 PL_reginput = locinput;
a0d0e21e
LW
2826 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2827 regcpblow(cp);
3280af22 2828 PL_regcc = cc.oldcc;
4633a7c4 2829 saySAME(n);
a0d0e21e
LW
2830 }
2831 /* NOT REACHED */
2832 case WHILEM: {
2833 /*
2834 * This is really hard to understand, because after we match
2835 * what we're trying to match, we must make sure the rest of
2c2d71f5 2836 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2837 * to go back UP the parse tree by recursing ever deeper. And
2838 * if it fails, we have to reset our parent's current state
2839 * that we can try again after backing off.
2840 */
2841
c277df42 2842 CHECKPOINT cp, lastcp;
3280af22 2843 CURCUR* cc = PL_regcc;
c277df42
IZ
2844 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2845
4633a7c4 2846 n = cc->cur + 1; /* how many we know we matched */
3280af22 2847 PL_reginput = locinput;
a0d0e21e 2848
c277df42 2849 DEBUG_r(
9041c2e3
NIS
2850 PerlIO_printf(Perl_debug_log,
2851 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2852 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2853 (long)n, (long)cc->min,
c277df42
IZ
2854 (long)cc->max, (long)cc)
2855 );
4633a7c4 2856
a0d0e21e
LW
2857 /* If degenerate scan matches "", assume scan done. */
2858
579cf2c3 2859 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2860 PL_regcc = cc->oldcc;
2ab05381
IZ
2861 if (PL_regcc)
2862 ln = PL_regcc->cur;
c277df42 2863 DEBUG_r(
c3464db5
DD
2864 PerlIO_printf(Perl_debug_log,
2865 "%*s empty match detected, try continuation...\n",
3280af22 2866 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2867 );
a0d0e21e 2868 if (regmatch(cc->next))
4633a7c4 2869 sayYES;
2ab05381
IZ
2870 if (PL_regcc)
2871 PL_regcc->cur = ln;
3280af22 2872 PL_regcc = cc;
4633a7c4 2873 sayNO;
a0d0e21e
LW
2874 }
2875
2876 /* First just match a string of min scans. */
2877
2878 if (n < cc->min) {
2879 cc->cur = n;
2880 cc->lastloc = locinput;
4633a7c4
LW
2881 if (regmatch(cc->scan))
2882 sayYES;
2883 cc->cur = n - 1;
c277df42 2884 cc->lastloc = lastloc;
4633a7c4 2885 sayNO;
a0d0e21e
LW
2886 }
2887
2c2d71f5
JH
2888 if (scan->flags) {
2889 /* Check whether we already were at this position.
2890 Postpone detection until we know the match is not
2891 *that* much linear. */
2892 if (!PL_reg_maxiter) {
2893 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2894 PL_reg_leftiter = PL_reg_maxiter;
2895 }
2896 if (PL_reg_leftiter-- == 0) {
2897 I32 size = (PL_reg_maxiter + 7)/8;
2898 if (PL_reg_poscache) {
2899 if (PL_reg_poscache_size < size) {
2900 Renew(PL_reg_poscache, size, char);
2901 PL_reg_poscache_size = size;
2902 }
2903 Zero(PL_reg_poscache, size, char);
2904 }
2905 else {
2906 PL_reg_poscache_size = size;
2907 Newz(29, PL_reg_poscache, size, char);
2908 }
2909 DEBUG_r(
2910 PerlIO_printf(Perl_debug_log,
2911 "%sDetected a super-linear match, switching on caching%s...\n",
2912 PL_colors[4], PL_colors[5])
2913 );
2914 }
2915 if (PL_reg_leftiter < 0) {
2916 I32 o = locinput - PL_bostr, b;
2917
2918 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2919 b = o % 8;
2920 o /= 8;
2921 if (PL_reg_poscache[o] & (1<<b)) {
2922 DEBUG_r(
2923 PerlIO_printf(Perl_debug_log,
2924 "%*s already tried at this position...\n",
2925 REPORT_CODE_OFF+PL_regindent*2, "")
2926 );
7821416a 2927 sayNO_SILENT;
2c2d71f5
JH
2928 }
2929 PL_reg_poscache[o] |= (1<<b);
2930 }
2931 }
2932
a0d0e21e
LW
2933 /* Prefer next over scan for minimal matching. */
2934
2935 if (cc->minmod) {
3280af22 2936 PL_regcc = cc->oldcc;
2ab05381
IZ
2937 if (PL_regcc)
2938 ln = PL_regcc->cur;
5f05dabc 2939 cp = regcppush(cc->parenfloor);
02db2b7b 2940 REGCP_SET(lastcp);
5f05dabc 2941 if (regmatch(cc->next)) {
c277df42 2942 regcpblow(cp);
4633a7c4 2943 sayYES; /* All done. */
5f05dabc 2944 }
02db2b7b 2945 REGCP_UNWIND(lastcp);
5f05dabc 2946 regcppop();
2ab05381
IZ
2947 if (PL_regcc)
2948 PL_regcc->cur = ln;
3280af22 2949 PL_regcc = cc;
a0d0e21e 2950
c277df42 2951 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2952 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2953 && !(PL_reg_flags & RF_warned)) {
2954 PL_reg_flags |= RF_warned;
e476b1b5 2955 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2956 "Complex regular subexpression recursion",
2957 REG_INFTY - 1);
c277df42 2958 }
4633a7c4 2959 sayNO;
c277df42 2960 }
a687059c 2961
c277df42 2962 DEBUG_r(
c3464db5
DD
2963 PerlIO_printf(Perl_debug_log,
2964 "%*s trying longer...\n",
3280af22 2965 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2966 );
a0d0e21e 2967 /* Try scanning more and see if it helps. */
3280af22 2968 PL_reginput = locinput;
a0d0e21e
LW
2969 cc->cur = n;
2970 cc->lastloc = locinput;
5f05dabc 2971 cp = regcppush(cc->parenfloor);
02db2b7b 2972 REGCP_SET(lastcp);
5f05dabc 2973 if (regmatch(cc->scan)) {
c277df42 2974 regcpblow(cp);
4633a7c4 2975 sayYES;
5f05dabc 2976 }
02db2b7b 2977 REGCP_UNWIND(lastcp);
5f05dabc 2978 regcppop();
4633a7c4 2979 cc->cur = n - 1;
c277df42 2980 cc->lastloc = lastloc;
4633a7c4 2981 sayNO;
a0d0e21e
LW
2982 }
2983
2984 /* Prefer scan over next for maximal matching. */
2985
2986 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2987 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2988 cc->cur = n;
2989 cc->lastloc = locinput;
02db2b7b 2990 REGCP_SET(lastcp);
5f05dabc 2991 if (regmatch(cc->scan)) {
c277df42 2992 regcpblow(cp);
4633a7c4 2993 sayYES;
5f05dabc 2994 }
02db2b7b 2995 REGCP_UNWIND(lastcp);
a0d0e21e 2996 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2997 PL_reginput = locinput;
c277df42 2998 DEBUG_r(
c3464db5
DD
2999 PerlIO_printf(Perl_debug_log,
3000 "%*s failed, try continuation...\n",
3280af22 3001 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3002 );
3003 }
9041c2e3 3004 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3005 && !(PL_reg_flags & RF_warned)) {
3280af22 3006 PL_reg_flags |= RF_warned;
e476b1b5 3007 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3008 "Complex regular subexpression recursion",
3009 REG_INFTY - 1);
a0d0e21e
LW
3010 }
3011
3012 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3013 PL_regcc = cc->oldcc;
2ab05381
IZ
3014 if (PL_regcc)
3015 ln = PL_regcc->cur;
a0d0e21e 3016 if (regmatch(cc->next))
4633a7c4 3017 sayYES;
2ab05381
IZ
3018 if (PL_regcc)
3019 PL_regcc->cur = ln;
3280af22 3020 PL_regcc = cc;
4633a7c4 3021 cc->cur = n - 1;
c277df42 3022 cc->lastloc = lastloc;
4633a7c4 3023 sayNO;
a0d0e21e
LW
3024 }
3025 /* NOT REACHED */
9041c2e3 3026 case BRANCHJ:
c277df42
IZ
3027 next = scan + ARG(scan);
3028 if (next == scan)
3029 next = NULL;
3030 inner = NEXTOPER(NEXTOPER(scan));
3031 goto do_branch;
9041c2e3 3032 case BRANCH:
c277df42
IZ
3033 inner = NEXTOPER(scan);
3034 do_branch:
3035 {
c277df42
IZ
3036 c1 = OP(scan);
3037 if (OP(next) != c1) /* No choice. */
3038 next = inner; /* Avoid recursion. */
a0d0e21e 3039 else {
02db2b7b
IZ
3040 I32 lastparen = *PL_reglastparen;
3041 I32 unwind1;
3042 re_unwind_branch_t *uw;
3043
3044 /* Put unwinding data on stack */
3045 unwind1 = SSNEWt(1,re_unwind_branch_t);
3046 uw = SSPTRt(unwind1,re_unwind_branch_t);
3047 uw->prev = unwind;
3048 unwind = unwind1;
3049 uw->type = ((c1 == BRANCH)
3050 ? RE_UNWIND_BRANCH
3051 : RE_UNWIND_BRANCHJ);
3052 uw->lastparen = lastparen;
3053 uw->next = next;
3054 uw->locinput = locinput;
3055 uw->nextchr = nextchr;
3056#ifdef DEBUGGING
3057 uw->regindent = ++PL_regindent;
3058#endif
c277df42 3059
02db2b7b
IZ
3060 REGCP_SET(uw->lastcp);
3061
3062 /* Now go into the first branch */
3063 next = inner;
a687059c 3064 }
a0d0e21e
LW
3065 }
3066 break;
3067 case MINMOD:
3068 minmod = 1;
3069 break;
c277df42
IZ
3070 case CURLYM:
3071 {
00db4c45 3072 I32 l = 0;
c277df42 3073 CHECKPOINT lastcp;
9041c2e3 3074
c277df42
IZ
3075 /* We suppose that the next guy does not need
3076 backtracking: in particular, it is of constant length,
3077 and has no parenths to influence future backrefs. */
3078 ln = ARG1(scan); /* min to match */
3079 n = ARG2(scan); /* max to match */
c277df42
IZ
3080 paren = scan->flags;
3081 if (paren) {
3280af22
NIS
3082 if (paren > PL_regsize)
3083 PL_regsize = paren;
3084 if (paren > *PL_reglastparen)
3085 *PL_reglastparen = paren;
c277df42 3086 }
dc45a647 3087 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3088 if (paren)
3089 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3090 PL_reginput = locinput;
c277df42
IZ
3091 if (minmod) {
3092 minmod = 0;
3093 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3094 sayNO;
f31a99c8
HS
3095 /* if we matched something zero-length we don't need to
3096 backtrack - capturing parens are already defined, so
3097 the caveat in the maximal case doesn't apply
3098
3099 XXXX if ln == 0, we can redo this check first time
3100 through the following loop
3101 */
3102 if (ln && l == 0)
3103 n = ln; /* don't backtrack */
3280af22 3104 locinput = PL_reginput;
e2d8ce26 3105 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3106 regnode *text_node = next;
3107
3108 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3109 NEXT_IMPT(text_node);
5f80c4cf
JP
3110
3111 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3112 c1 = c2 = -1000;
3113 }
3114 else {
3115 c1 = (U8)*STRING(text_node);
3116 if (OP(next) == EXACTF)
3117 c2 = PL_fold[c1];
3118 else if (OP(text_node) == EXACTFL)
3119 c2 = PL_fold_locale[c1];
3120 else
3121 c2 = c1;
3122 }
a0ed51b3
LW
3123 }
3124 else
c277df42 3125 c1 = c2 = -1000;
02db2b7b 3126 REGCP_SET(lastcp);
5f4b28b2 3127 /* This may be improved if l == 0. */
c277df42
IZ
3128 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3129 /* If it could work, try it. */
3130 if (c1 == -1000 ||
3280af22
NIS
3131 UCHARAT(PL_reginput) == c1 ||
3132 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3133 {
3134 if (paren) {
f31a99c8 3135 if (ln) {
cf93c79d
IZ
3136 PL_regstartp[paren] =
3137 HOPc(PL_reginput, -l) - PL_bostr;
3138 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3139 }
3140 else
cf93c79d 3141 PL_regendp[paren] = -1;
c277df42
IZ
3142 }
3143 if (regmatch(next))
3144 sayYES;
02db2b7b 3145 REGCP_UNWIND(lastcp);
c277df42
IZ
3146 }
3147 /* Couldn't or didn't -- move forward. */
3280af22 3148 PL_reginput = locinput;
c277df42
IZ
3149 if (regrepeat_hard(scan, 1, &l)) {
3150 ln++;
3280af22 3151 locinput = PL_reginput;
c277df42
IZ
3152 }
3153 else
3154 sayNO;
3155 }
a0ed51b3
LW
3156 }
3157 else {
c277df42 3158 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3159 /* if we matched something zero-length we don't need to
3160 backtrack, unless the minimum count is zero and we
3161 are capturing the result - in that case the capture
3162 being defined or not may affect later execution
3163 */
3164 if (n != 0 && l == 0 && !(paren && ln == 0))
3165 ln = n; /* don't backtrack */
3280af22 3166 locinput = PL_reginput;
c277df42 3167 DEBUG_r(
5c0ca799 3168 PerlIO_printf(Perl_debug_log,
faccc32b 3169 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3170 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3171 (IV) n, (IV)l)
c277df42
IZ
3172 );
3173 if (n >= ln) {
e2d8ce26 3174 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3175 regnode *text_node = next;
3176
3177 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3178 NEXT_IMPT(text_node);
5f80c4cf
JP
3179
3180 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3181 c1 = c2 = -1000;
3182 }
3183 else {
3184 c1 = (U8)*STRING(text_node);
3185 if (OP(text_node) == EXACTF)
3186 c2 = PL_fold[c1];
3187 else if (OP(text_node) == EXACTFL)
3188 c2 = PL_fold_locale[c1];
3189 else
3190 c2 = c1;
3191 }
a0ed51b3
LW
3192 }
3193 else
c277df42
IZ
3194 c1 = c2 = -1000;
3195 }
02db2b7b 3196 REGCP_SET(lastcp);
c277df42
IZ
3197 while (n >= ln) {
3198 /* If it could work, try it. */
3199 if (c1 == -1000 ||
3280af22
NIS
3200 UCHARAT(PL_reginput) == c1 ||
3201 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3202 {
3203 DEBUG_r(
c3464db5 3204 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3205 "%*s trying tail with n=%"IVdf"...\n",
3206 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3207 );
3208 if (paren) {
3209 if (n) {
cf93c79d
IZ
3210 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3211 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3212 }
a0ed51b3 3213 else
cf93c79d 3214 PL_regendp[paren] = -1;
c277df42 3215 }
a0ed51b3
LW
3216 if (regmatch(next))
3217 sayYES;
02db2b7b 3218 REGCP_UNWIND(lastcp);
a0ed51b3 3219 }
c277df42
IZ
3220 /* Couldn't or didn't -- back up. */
3221 n--;
dfe13c55 3222 locinput = HOPc(locinput, -l);
3280af22 3223 PL_reginput = locinput;
c277df42
IZ
3224 }
3225 }
3226 sayNO;
3227 break;
3228 }
3229 case CURLYN:
3230 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3231 if (paren > PL_regsize)
3232 PL_regsize = paren;
3233 if (paren > *PL_reglastparen)
3234 *PL_reglastparen = paren;
c277df42
IZ
3235 ln = ARG1(scan); /* min to match */
3236 n = ARG2(scan); /* max to match */
dc45a647 3237 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3238 goto repeat;
a0d0e21e 3239 case CURLY:
c277df42 3240 paren = 0;
a0d0e21e
LW
3241 ln = ARG1(scan); /* min to match */
3242 n = ARG2(scan); /* max to match */
dc45a647 3243 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3244 goto repeat;
3245 case STAR:
3246 ln = 0;
c277df42 3247 n = REG_INFTY;
a0d0e21e 3248 scan = NEXTOPER(scan);
c277df42 3249 paren = 0;
a0d0e21e
LW
3250 goto repeat;
3251 case PLUS:
c277df42
IZ
3252 ln = 1;
3253 n = REG_INFTY;
3254 scan = NEXTOPER(scan);
3255 paren = 0;
3256 repeat:
a0d0e21e
LW
3257 /*
3258 * Lookahead to avoid useless match attempts
3259 * when we know what character comes next.
3260 */
5f80c4cf
JP
3261
3262 /*
3263 * Used to only do .*x and .*?x, but now it allows
3264 * for )'s, ('s and (?{ ... })'s to be in the way
3265 * of the quantifier and the EXACT-like node. -- japhy
3266 */
3267
e2d8ce26 3268 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3269 U8 *s;
3270 regnode *text_node = next;
3271
3272 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3273 NEXT_IMPT(text_node);
5f80c4cf
JP
3274
3275 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3276 c1 = c2 = -1000;
3277 }
3278 else {
3279 s = (U8*)STRING(text_node);
3280
3281 if (!UTF) {
3282 c2 = c1 = *s;
3283 if (OP(text_node) == EXACTF)
3284 c2 = PL_fold[c1];
3285 else if (OP(text_node) == EXACTFL)
3286 c2 = PL_fold_locale[c1];
1aa99e6b 3287 }
5f80c4cf
JP
3288 else { /* UTF */
3289 if (OP(text_node) == EXACTF) {
a2a2844f
JH
3290 STRLEN ulen1, ulen2;
3291 U8 tmpbuf1[UTF8_MAXLEN*2+1];
3292 U8 tmpbuf2[UTF8_MAXLEN*2+1];
3293
3294 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3295 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3296
3297 c1 = utf8_to_uvuni(tmpbuf1, 0);
3298 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3299 }
3300 else {
3301 c2 = c1 = utf8_to_uvchr(s, NULL);
3302 }
1aa99e6b
IH
3303 }
3304 }
bbce6d69 3305 }
a0d0e21e 3306 else
bbce6d69 3307 c1 = c2 = -1000;
3280af22 3308 PL_reginput = locinput;
a0d0e21e 3309 if (minmod) {
c277df42 3310 CHECKPOINT lastcp;
a0d0e21e
LW
3311 minmod = 0;
3312 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3313 sayNO;
a0ed51b3 3314 locinput = PL_reginput;
02db2b7b 3315 REGCP_SET(lastcp);
0fe9bf95 3316 if (c1 != -1000) {
1aa99e6b 3317 char *e; /* Should not check after this */
0fe9bf95
IZ
3318 char *old = locinput;
3319
1aa99e6b 3320 if (n == REG_INFTY) {
0fe9bf95 3321 e = PL_regeol - 1;
1aa99e6b
IH
3322 if (do_utf8)
3323 while (UTF8_IS_CONTINUATION(*(U8*)e))
3324 e--;
3325 }
3326 else if (do_utf8) {
3327 int m = n - ln;
3328 for (e = locinput;
3329 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3330 e += UTF8SKIP(e);
3331 }
3332 else {
3333 e = locinput + n - ln;
3334 if (e >= PL_regeol)
3335 e = PL_regeol - 1;
3336 }
0fe9bf95 3337 while (1) {
1aa99e6b 3338 int count;
0fe9bf95 3339 /* Find place 'next' could work */
1aa99e6b
IH
3340 if (!do_utf8) {
3341 if (c1 == c2) {
a8e8ab15
JH
3342 while (locinput <= e &&
3343 UCHARAT(locinput) != c1)
1aa99e6b
IH
3344 locinput++;
3345 } else {
9041c2e3 3346 while (locinput <= e
a8e8ab15
JH
3347 && UCHARAT(locinput) != c1
3348 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3349 locinput++;
3350 }
3351 count = locinput - old;
3352 }
3353 else {
3354 STRLEN len;
3355 if (c1 == c2) {
3356 for (count = 0;
3357 locinput <= e &&
9041c2e3 3358 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3359 count++)
3360 locinput += len;
3361
3362 } else {
3363 for (count = 0; locinput <= e; count++) {
9041c2e3 3364 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3365 if (c == c1 || c == c2)
3366 break;
9041c2e3 3367 locinput += len;
1aa99e6b
IH
3368 }
3369 }
0fe9bf95 3370 }
9041c2e3 3371 if (locinput > e)
0fe9bf95
IZ
3372 sayNO;
3373 /* PL_reginput == old now */
3374 if (locinput != old) {
3375 ln = 1; /* Did some */
1aa99e6b 3376 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3377 sayNO;
3378 }
3379 /* PL_reginput == locinput now */
29d1e993 3380 TRYPAREN(paren, ln, locinput);
0fe9bf95 3381 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3382 REGCP_UNWIND(lastcp);
0fe9bf95 3383 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3384 old = locinput;
3385 if (do_utf8)
3386 locinput += UTF8SKIP(locinput);
3387 else
3388 locinput++;
0fe9bf95
IZ
3389 }
3390 }
3391 else
c277df42 3392 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3393 UV c;
3394 if (c1 != -1000) {
3395 if (do_utf8)
9041c2e3 3396 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3397 else
9041c2e3 3398 c = UCHARAT(PL_reginput);
2390ecbc
PP
3399 /* If it could work, try it. */
3400 if (c == c1 || c == c2)
3401 {
3402 TRYPAREN(paren, n, PL_reginput);
3403 REGCP_UNWIND(lastcp);
3404 }
1aa99e6b 3405 }
a0d0e21e 3406 /* If it could work, try it. */
2390ecbc 3407 else if (c1 == -1000)
bbce6d69 3408 {
29d1e993 3409 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3410 REGCP_UNWIND(lastcp);
bbce6d69 3411 }
c277df42 3412 /* Couldn't or didn't -- move forward. */
a0ed51b3 3413 PL_reginput = locinput;
a0d0e21e
LW
3414 if (regrepeat(scan, 1)) {
3415 ln++;
a0ed51b3
LW
3416 locinput = PL_reginput;
3417 }
3418 else
4633a7c4 3419 sayNO;
a0d0e21e
LW
3420 }
3421 }
3422 else {
c277df42 3423 CHECKPOINT lastcp;
a0d0e21e 3424 n = regrepeat(scan, n);
a0ed51b3 3425 locinput = PL_reginput;
22c35a8c 3426 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3427 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3428 ln = n; /* why back off? */
1aeab75a
GS
3429 /* ...because $ and \Z can match before *and* after
3430 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3431 We should back off by one in this case. */
3432 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3433 ln--;
3434 }
02db2b7b 3435 REGCP_SET(lastcp);
c277df42 3436 if (paren) {
8fa7f367 3437 UV c = 0;
c277df42 3438 while (n >= ln) {
1aa99e6b
IH
3439 if (c1 != -1000) {
3440 if (do_utf8)
9041c2e3 3441 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3442 else
9041c2e3 3443 c = UCHARAT(PL_reginput);
1aa99e6b 3444 }
c277df42 3445 /* If it could work, try it. */
1aa99e6b 3446 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3447 {
29d1e993 3448 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3449 REGCP_UNWIND(lastcp);
c277df42
IZ
3450 }
3451 /* Couldn't or didn't -- back up. */
3452 n--;
dfe13c55 3453 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3454 }
a0ed51b3
LW
3455 }
3456 else {
8fa7f367 3457 UV c = 0;
c277df42 3458 while (n >= ln) {
1aa99e6b
IH
3459 if (c1 != -1000) {
3460 if (do_utf8)
9041c2e3 3461 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3462 else
9041c2e3 3463 c = UCHARAT(PL_reginput);
1aa99e6b 3464 }
c277df42 3465 /* If it could work, try it. */
1aa99e6b 3466 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3467 {
29d1e993 3468 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3469 REGCP_UNWIND(lastcp);
c277df42
IZ
3470 }
3471 /* Couldn't or didn't -- back up. */
3472 n--;
dfe13c55 3473 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3474 }
a0d0e21e
LW
3475 }
3476 }
4633a7c4 3477 sayNO;
c277df42 3478 break;
a0d0e21e 3479 case END:
0f5d15d6
IZ
3480 if (PL_reg_call_cc) {
3481 re_cc_state *cur_call_cc = PL_reg_call_cc;
3482 CURCUR *cctmp = PL_regcc;
3483 regexp *re = PL_reg_re;
3484 CHECKPOINT cp, lastcp;
3485
3486 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3487 REGCP_SET(lastcp);
0f5d15d6
IZ
3488 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3489 the caller. */
3490 PL_reginput = locinput; /* Make position available to
3491 the callcc. */
3492 cache_re(PL_reg_call_cc->re);
3493 PL_regcc = PL_reg_call_cc->cc;
3494 PL_reg_call_cc = PL_reg_call_cc->prev;
3495 if (regmatch(cur_call_cc->node)) {
3496 PL_reg_call_cc = cur_call_cc;
3497 regcpblow(cp);
3498 sayYES;
3499 }
02db2b7b 3500 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3501 regcppop();
3502 PL_reg_call_cc = cur_call_cc;
3503 PL_regcc = cctmp;
3504 PL_reg_re = re;
3505 cache_re(re);
3506
3507 DEBUG_r(
3508 PerlIO_printf(Perl_debug_log,
3509 "%*s continuation failed...\n",
3510 REPORT_CODE_OFF+PL_regindent*2, "")
3511 );
7821416a 3512 sayNO_SILENT;
0f5d15d6 3513 }
7821416a
IZ
3514 if (locinput < PL_regtill) {
3515 DEBUG_r(PerlIO_printf(Perl_debug_log,
3516 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3517 PL_colors[4],
3518 (long)(locinput - PL_reg_starttry),
3519 (long)(PL_regtill - PL_reg_starttry),
3520 PL_colors[5]));
3521 sayNO_FINAL; /* Cannot match: too short. */
3522 }
3523 PL_reginput = locinput; /* put where regtry can find it */
3524 sayYES_FINAL; /* Success! */
7e5428c5 3525 case SUCCEED:
3280af22 3526 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3527 sayYES_LOUD; /* Success! */
c277df42
IZ
3528 case SUSPEND:
3529 n = 1;
9fe1d20c 3530 PL_reginput = locinput;
9041c2e3 3531 goto do_ifmatch;
a0d0e21e 3532 case UNLESSM:
c277df42 3533 n = 0;
a0ed51b3 3534 if (scan->flags) {
efb30f32
HS
3535 s = HOPBACKc(locinput, scan->flags);
3536 if (!s)
3537 goto say_yes;
3538 PL_reginput = s;
a0ed51b3
LW
3539 }
3540 else
3541 PL_reginput = locinput;
c277df42
IZ
3542 goto do_ifmatch;
3543 case IFMATCH:
3544 n = 1;
a0ed51b3 3545 if (scan->flags) {
efb30f32
HS
3546 s = HOPBACKc(locinput, scan->flags);
3547 if (!s)
3548 goto say_no;
3549 PL_reginput = s;
a0ed51b3
LW
3550 }
3551 else
3552 PL_reginput = locinput;
3553
c277df42 3554 do_ifmatch:
c277df42
IZ
3555 inner = NEXTOPER(NEXTOPER(scan));
3556 if (regmatch(inner) != n) {
3557 say_no:
3558 if (logical) {
3559 logical = 0;
3560 sw = 0;
3561 goto do_longjump;
a0ed51b3
LW
3562 }
3563 else
c277df42
IZ
3564 sayNO;
3565 }
3566 say_yes:
3567 if (logical) {
3568 logical = 0;
3569 sw = 1;
3570 }
fe44a5e8 3571 if (OP(scan) == SUSPEND) {
3280af22 3572 locinput = PL_reginput;
565764a8 3573 nextchr = UCHARAT(locinput);
fe44a5e8 3574 }
c277df42
IZ
3575 /* FALL THROUGH. */
3576 case LONGJMP:
3577 do_longjump:
3578 next = scan + ARG(scan);
3579 if (next == scan)
3580 next = NULL;
a0d0e21e
LW
3581 break;
3582 default:
b900a521 3583 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3584 PTR2UV(scan), OP(scan));
cea2e8a9 3585 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3586 }
02db2b7b 3587 reenter:
a0d0e21e
LW
3588 scan = next;
3589 }
a687059c 3590
a0d0e21e
LW
3591 /*
3592 * We get here only if there's trouble -- normally "case END" is
3593 * the terminating point.
3594 */
cea2e8a9 3595 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3596 /*NOTREACHED*/
4633a7c4
LW
3597 sayNO;
3598
7821416a
IZ
3599yes_loud:
3600 DEBUG_r(
3601 PerlIO_printf(Perl_debug_log,
3602 "%*s %scould match...%s\n",
3603 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3604 );
3605 goto yes;
3606yes_final:
3607 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3608 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3609yes:
3610#ifdef DEBUGGING
3280af22 3611 PL_regindent--;
4633a7c4 3612#endif
02db2b7b
IZ
3613
3614#if 0 /* Breaks $^R */
3615 if (unwind)
3616 regcpblow(firstcp);
3617#endif
4633a7c4
LW
3618 return 1;
3619
3620no:
7821416a
IZ
3621 DEBUG_r(
3622 PerlIO_printf(Perl_debug_log,
3623 "%*s %sfailed...%s\n",
3624 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3625 );
3626 goto do_no;
3627no_final:
3628do_no:
02db2b7b
IZ
3629 if (unwind) {
3630 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3631
3632 switch (uw->type) {
3633 case RE_UNWIND_BRANCH:
3634 case RE_UNWIND_BRANCHJ:
3635 {
3636 re_unwind_branch_t *uwb = &(uw->branch);
3637 I32 lastparen = uwb->lastparen;
9041c2e3 3638
02db2b7b
IZ
3639 REGCP_UNWIND(uwb->lastcp);
3640 for (n = *PL_reglastparen; n > lastparen; n--)
3641 PL_regendp[n] = -1;
3642 *PL_reglastparen = n;
3643 scan = next = uwb->next;
9041c2e3
NIS
3644 if ( !scan ||
3645 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3646 ? BRANCH : BRANCHJ) ) { /* Failure */
3647 unwind = uwb->prev;
3648#ifdef DEBUGGING
3649 PL_regindent--;
3650#endif
3651 goto do_no;
3652 }
3653 /* Have more choice yet. Reuse the same uwb. */
3654 /*SUPPRESS 560*/
3655 if ((n = (uwb->type == RE_UNWIND_BRANCH
3656 ? NEXT_OFF(next) : ARG(next))))
3657 next += n;
3658 else
3659 next = NULL; /* XXXX Needn't unwinding in this case... */
3660 uwb->next = next;
3661 next = NEXTOPER(scan);
3662 if (uwb->type == RE_UNWIND_BRANCHJ)
3663 next = NEXTOPER(next);
3664 locinput = uwb->locinput;
3665 nextchr = uwb->nextchr;
3666#ifdef DEBUGGING
3667 PL_regindent = uwb->regindent;
3668#endif
3669
3670 goto reenter;
3671 }
3672 /* NOT REACHED */
3673 default:
3674 Perl_croak(aTHX_ "regexp unwind memory corruption");
3675 }
3676 /* NOT REACHED */
3677 }
4633a7c4 3678#ifdef DEBUGGING
3280af22 3679 PL_regindent--;
4633a7c4 3680#endif
a0d0e21e 3681 return 0;
a687059c
LW
3682}
3683
3684/*
3685 - regrepeat - repeatedly match something simple, report how many
3686 */
3687/*
3688 * [This routine now assumes that it will only match on things of length 1.
3689 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3690 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3691 */
76e3520e 3692STATIC I32
cea2e8a9 3693S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3694{
a0d0e21e 3695 register char *scan;
a0d0e21e 3696 register I32 c;
3280af22 3697 register char *loceol = PL_regeol;
a0ed51b3 3698 register I32 hardcount = 0;
53c4c00c 3699 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3700
3280af22 3701 scan = PL_reginput;
c277df42 3702 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3703 loceol = scan + max;
a0d0e21e 3704 switch (OP(p)) {
22c35a8c 3705 case REG_ANY:
1aa99e6b 3706 if (do_utf8) {
ffc61ed2 3707 loceol = PL_regeol;
1aa99e6b 3708 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3709 scan += UTF8SKIP(scan);
3710 hardcount++;
3711 }
3712 } else {
3713 while (scan < loceol && *scan != '\n')
3714 scan++;
a0ed51b3
LW
3715 }
3716 break;
ffc61ed2 3717 case SANY:
3baa4c62 3718 scan = loceol;
a0ed51b3 3719 break;
f33976b4
DB
3720 case CANY:
3721 scan = loceol;
3722 break;
bbce6d69 3723 case EXACT: /* length of string is 1 */
cd439c50 3724 c = (U8)*STRING(p);
bbce6d69 3725 while (scan < loceol && UCHARAT(scan) == c)
3726 scan++;
3727 break;
3728 case EXACTF: /* length of string is 1 */
cd439c50 3729 c = (U8)*STRING(p);
bbce6d69 3730 while (scan < loceol &&
22c35a8c 3731 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3732 scan++;
3733 break;
3734 case EXACTFL: /* length of string is 1 */
3280af22 3735 PL_reg_flags |= RF_tainted;
cd439c50 3736 c = (U8)*STRING(p);
bbce6d69 3737 while (scan < loceol &&
22c35a8c 3738 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3739 scan++;
3740 break;
3741 case ANYOF:
ffc61ed2
JH
3742 if (do_utf8) {
3743 loceol = PL_regeol;
1aa99e6b
IH
3744 while (hardcount < max && scan < loceol &&
3745 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3746 scan += UTF8SKIP(scan);
3747 hardcount++;
3748 }
3749 } else {
3750 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3751 scan++;
3752 }
a0d0e21e
LW
3753 break;
3754 case ALNUM:
1aa99e6b 3755 if (do_utf8) {
ffc61ed2 3756 loceol = PL_regeol;
8269fa76 3757 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3758 while (hardcount < max && scan < loceol &&
3568d838 3759 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3760 scan += UTF8SKIP(scan);
3761 hardcount++;
3762 }
3763 } else {
3764 while (scan < loceol && isALNUM(*scan))
3765 scan++;
a0ed51b3
LW
3766 }
3767 break;
bbce6d69 3768 case ALNUML:
3280af22 3769 PL_reg_flags |= RF_tainted;
1aa99e6b 3770 if (do_utf8) {
ffc61ed2 3771 loceol = PL_regeol;
1aa99e6b
IH
3772 while (hardcount < max && scan < loceol &&
3773 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3774 scan += UTF8SKIP(scan);
3775 hardcount++;
3776 }
3777 } else {
3778 while (scan < loceol && isALNUM_LC(*scan))
3779 scan++;
a0ed51b3
LW
3780 }
3781 break;
a0d0e21e 3782 case NALNUM:
1aa99e6b 3783 if (do_utf8) {
ffc61ed2 3784 loceol = PL_regeol;
8269fa76 3785 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3786 while (hardcount < max && scan < loceol &&
3568d838 3787 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3788 scan += UTF8SKIP(scan);
3789 hardcount++;
3790 }
3791 } else {
3792 while (scan < loceol && !isALNUM(*scan))
3793 scan++;
a0ed51b3
LW
3794 }
3795 break;
bbce6d69 3796 case NALNUML:
3280af22 3797 PL_reg_flags |= RF_tainted;
1aa99e6b 3798 if (do_utf8) {
ffc61ed2 3799 loceol = PL_regeol;
1aa99e6b
IH
3800 while (hardcount < max && scan < loceol &&
3801 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3802 scan += UTF8SKIP(scan);
3803 hardcount++;
3804 }
3805 } else {
3806 while (scan < loceol && !isALNUM_LC(*scan))
3807 scan++;
a0ed51b3
LW
3808 }
3809 break;
a0d0e21e 3810 case SPACE:
1aa99e6b 3811 if (do_utf8) {
ffc61ed2 3812 loceol = PL_regeol;
8269fa76 3813 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3814 while (hardcount < max && scan < loceol &&
3568d838
JH
3815 (*scan == ' ' ||
3816 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3817 scan += UTF8SKIP(scan);
3818 hardcount++;
3819 }
3820 } else {
3821 while (scan < loceol && isSPACE(*scan))
3822 scan++;
a0ed51b3
LW
3823 }
3824 break;
bbce6d69 3825 case SPACEL:
3280af22 3826 PL_reg_flags |= RF_tainted;
1aa99e6b 3827 if (do_utf8) {
ffc61ed2 3828 loceol = PL_regeol;
1aa99e6b 3829 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3830 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3831 scan += UTF8SKIP(scan);
3832 hardcount++;
3833 }
3834 } else {
3835 while (scan < loceol && isSPACE_LC(*scan))
3836 scan++;
a0ed51b3
LW
3837 }
3838 break;
a0d0e21e 3839 case NSPACE:
1aa99e6b 3840 if (do_utf8) {
ffc61ed2 3841 loceol = PL_regeol;
8269fa76 3842 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3843 while (hardcount < max && scan < loceol &&
3568d838
JH
3844 !(*scan == ' ' ||
3845 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3846 scan += UTF8SKIP(scan);
3847 hardcount++;
3848 }
3849 } else {
3850 while (scan < loceol && !isSPACE(*scan))
3851 scan++;
3852 break;
a0ed51b3 3853 }
bbce6d69 3854 case NSPACEL:
3280af22 3855 PL_reg_flags |= RF_tainted;
1aa99e6b 3856 if (do_utf8) {
ffc61ed2 3857 loceol = PL_regeol;
1aa99e6b 3858 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3859 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3860 scan += UTF8SKIP(scan);
3861 hardcount++;
3862 }
3863 } else {
3864 while (scan < loceol && !isSPACE_LC(*scan))
3865 scan++;
a0ed51b3
LW
3866 }
3867 break;
a0d0e21e 3868 case DIGIT:
1aa99e6b 3869 if (do_utf8) {
ffc61ed2 3870 loceol = PL_regeol;
8269fa76 3871 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3872 while (hardcount < max && scan < loceol &&
3568d838 3873 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3874 scan += UTF8SKIP(scan);
3875 hardcount++;
3876 }
3877 } else {
3878 while (scan < loceol && isDIGIT(*scan))
3879 scan++;
a0ed51b3
LW
3880 }
3881 break;
a0d0e21e 3882 case NDIGIT:
1aa99e6b 3883 if (do_utf8) {
ffc61ed2 3884 loceol = PL_regeol;
8269fa76 3885 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3886 while (hardcount < max && scan < loceol &&
3568d838 3887 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3888 scan += UTF8SKIP(scan);
3889 hardcount++;
3890 }
3891 } else {
3892 while (scan < loceol && !isDIGIT(*scan))
3893 scan++;
a0ed51b3
LW
3894 }
3895 break;
a0d0e21e
LW
3896 default: /* Called on something of 0 width. */
3897 break; /* So match right here or not at all. */
3898 }
a687059c 3899
a0ed51b3
LW
3900 if (hardcount)
3901 c = hardcount;
3902 else
3903 c = scan - PL_reginput;
3280af22 3904 PL_reginput = scan;
a687059c 3905
9041c2e3 3906 DEBUG_r(
c277df42
IZ
3907 {
3908 SV *prop = sv_newmortal();
3909
3910 regprop(prop, p);
9041c2e3
NIS
3911 PerlIO_printf(Perl_debug_log,
3912 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3913 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3914 });
9041c2e3 3915
a0d0e21e 3916 return(c);
a687059c
LW
3917}
3918
3919/*
c277df42 3920 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3921 *
c277df42
IZ
3922 * The repeater is supposed to have constant length.
3923 */
3924
76e3520e 3925STATIC I32
cea2e8a9 3926S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3927{
b7953727 3928 register char *scan = Nullch;
c277df42 3929 register char *start;
3280af22 3930 register char *loceol = PL_regeol;
a0ed51b3 3931 I32 l = 0;
708e3b05 3932 I32 count = 0, res = 1;
a0ed51b3
LW
3933
3934 if (!max)
3935 return 0;
c277df42 3936
3280af22 3937 start = PL_reginput;
53c4c00c 3938 if (PL_reg_match_utf8) {
708e3b05 3939 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3940 if (!count++) {
3941 l = 0;
3942 while (start < PL_reginput) {
3943 l++;
3944 start += UTF8SKIP(start);
3945 }
3946 *lp = l;
3947 if (l == 0)
3948 return max;
3949 }
3950 if (count == max)
3951 return count;
3952 }
3953 }
3954 else {
708e3b05 3955 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3956 if (!count++) {
3957 *lp = l = PL_reginput - start;
3958 if (max != REG_INFTY && l*max < loceol - scan)
3959 loceol = scan + l*max;
3960 if (l == 0)
3961 return max;
c277df42
IZ
3962 }
3963 }
3964 }
708e3b05 3965 if (!res)
3280af22 3966 PL_reginput = scan;
9041c2e3 3967
a0ed51b3 3968 return count;
c277df42
IZ
3969}
3970
3971/*
ffc61ed2
JH
3972- regclass_swash - prepare the utf8 swash
3973*/
3974
3975SV *
3976Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3977{
3978 SV *sw = NULL;
3979 SV *si = NULL;
3980
3981 if (PL_regdata && PL_regdata->count) {
3982 U32 n = ARG(node);
3983
3984 if (PL_regdata->what[n] == 's') {
3985 SV *rv = (SV*)PL_regdata->data[n];
3986 AV *av = (AV*)SvRV((SV*)rv);
3987 SV **a;
9041c2e3 3988
ffc61ed2
JH
3989 si = *av_fetch(av, 0, FALSE);
3990 a = av_fetch(av, 1, FALSE);
9041c2e3 3991
ffc61ed2
JH
3992 if (a)
3993 sw = *a;
3994 else if (si && doinit) {
3995 sw = swash_init("utf8", "", si, 1, 0);
3996 (void)av_store(av, 1, sw);
3997 }
3998 }
3999 }
4000
4001 if (initsvp)
4002 *initsvp = si;
4003
4004 return sw;
4005}
4006
4007/*
cb8d8820 4008 - reginclass - determine if a character falls into a character class
bbce6d69 4009 */
4010
76e3520e 4011STATIC bool
ffc61ed2 4012S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4013{
ffc61ed2 4014 char flags = ANYOF_FLAGS(n);
bbce6d69 4015 bool match = FALSE;
1aa99e6b 4016 UV c;
3568d838 4017 STRLEN len = 0;
1aa99e6b 4018
3568d838 4019 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4020
ffc61ed2
JH
4021 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4022 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4023 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4024 match = TRUE;
bbce6d69 4025 }
3568d838 4026 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4027 match = TRUE;
ffc61ed2
JH
4028 if (!match) {
4029 SV *sw = regclass_swash(n, TRUE, 0);
4030
4031 if (sw) {
3568d838 4032 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4033 match = TRUE;
4034 else if (flags & ANYOF_FOLD) {
a2a2844f
JH
4035 STRLEN ulen;
4036 U8 tmpbuf[UTF8_MAXLEN*2+1];
4037
4038 toLOWER_utf8(p, tmpbuf, &ulen);
3568d838 4039 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4040 match = TRUE;
4041 }
4042 }
bbce6d69 4043 }
4044 }
1aa99e6b 4045 if (!match && c < 256) {
ffc61ed2
JH
4046 if (ANYOF_BITMAP_TEST(n, c))
4047 match = TRUE;
4048 else if (flags & ANYOF_FOLD) {
3568d838 4049 I32 f;
a0ed51b3 4050
ffc61ed2
JH
4051 if (flags & ANYOF_LOCALE) {
4052 PL_reg_flags |= RF_tainted;
4053 f = PL_fold_locale[c];
4054 }
4055 else
4056 f = PL_fold[c];
4057 if (f != c && ANYOF_BITMAP_TEST(n, f))
4058 match = TRUE;
4059 }
4060
4061 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4062 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4063 if (
4064 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4065 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4066 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4067 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4068 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4069 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4070 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4071 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4072 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4073 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4074 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4075 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4076 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4077 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4078 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4079 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4080 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4081 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4082 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4083 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4084 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4085 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4086 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4087 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4088 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4089 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4090 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4091 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4092 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4093 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4094 ) /* How's that for a conditional? */
4095 {
4096 match = TRUE;
4097 }
a0ed51b3 4098 }
a0ed51b3
LW
4099 }
4100
a0ed51b3
LW
4101 return (flags & ANYOF_INVERT) ? !match : match;
4102}
161b471a 4103
dfe13c55 4104STATIC U8 *
cea2e8a9 4105S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4106{
1aa99e6b
IH
4107 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4108}
4109
4110STATIC U8 *
4111S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4112{
a0ed51b3 4113 if (off >= 0) {
1aa99e6b 4114 while (off-- && s < lim) {
ffc61ed2 4115 /* XXX could check well-formedness here */
a0ed51b3 4116 s += UTF8SKIP(s);
ffc61ed2 4117 }
a0ed51b3
LW
4118 }
4119 else {
4120 while (off++) {
1aa99e6b 4121 if (s > lim) {
a0ed51b3 4122 s--;
ffc61ed2 4123 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4124 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4125 s--;
ffc61ed2
JH
4126 }
4127 /* XXX could check well-formedness here */
a0ed51b3
LW
4128 }
4129 }
4130 }
4131 return s;
4132}
161b471a 4133
dfe13c55 4134STATIC U8 *
1aa99e6b 4135S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4136{
1aa99e6b
IH
4137 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4138}
4139
4140STATIC U8 *
4141S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4142{
4143 if (off >= 0) {
1aa99e6b 4144 while (off-- && s < lim) {
ffc61ed2 4145 /* XXX could check well-formedness here */
a0ed51b3 4146 s += UTF8SKIP(s);
ffc61ed2 4147 }
a0ed51b3
LW
4148 if (off >= 0)
4149 return 0;
4150 }
4151 else {
4152 while (off++) {
1aa99e6b 4153 if (s > lim) {
a0ed51b3 4154 s--;
ffc61ed2 4155 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4156 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4157 s--;
ffc61ed2
JH
4158 }
4159 /* XXX could check well-formedness here */
a0ed51b3
LW
4160 }
4161 else
4162 break;
4163 }
4164 if (off <= 0)
4165 return 0;
4166 }
4167 return s;
4168}
51371543 4169
51371543 4170static void
acfe0abc 4171restore_pos(pTHX_ void *arg)
51371543 4172{
51371543
GS
4173 if (PL_reg_eval_set) {
4174 if (PL_reg_oldsaved) {
4175 PL_reg_re->subbeg = PL_reg_oldsaved;
4176 PL_reg_re->sublen = PL_reg_oldsavedlen;
4177 RX_MATCH_COPIED_on(PL_reg_re);
4178 }
4179 PL_reg_magic->mg_len = PL_reg_oldpos;
4180 PL_reg_eval_set = 0;
4181 PL_curpm = PL_reg_oldcurpm;
4182 }
4183}