This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Why *not* use UNIVERSAL qw( isa can ) ; ??
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
2a782b5b 386 SV *dsv = sv_2mortal(newSVpvn("", 0));
30944b6d 387#endif
cad2e5aa 388
2a782b5b 389 DEBUG_r({
40eddc46
JH
390 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
391 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
392 if (!PL_colorset)
393 reginitcolors();
394 PerlIO_printf(Perl_debug_log,
395 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
396 PL_colors[4],PL_colors[5],PL_colors[0],
397 prog->precomp,
398 PL_colors[1],
399 (strlen(prog->precomp) > 60 ? "..." : ""),
400 PL_colors[0],
401 (int)(len > 60 ? 60 : len),
402 s, PL_colors[1],
403 (len > 60 ? "..." : "")
404 );
405 });
cad2e5aa 406
1aa99e6b
IH
407 if (prog->reganch & ROPT_UTF8)
408 PL_reg_flags |= RF_utf8;
409
410 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
411 DEBUG_r(PerlIO_printf(Perl_debug_log,
412 "String too short... [re_intuit_start]\n"));
cad2e5aa 413 goto fail;
2c2d71f5 414 }
a1933d95 415 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 416 PL_regeol = strend;
653099ff 417 check = prog->check_substr;
2c2d71f5 418 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
419 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
420 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 421 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 422
7e25d62c
JH
423 if (!ml_anch) {
424 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
425 | ROPT_IMPLICIT)) /* not a real BOL */
426 /* SvCUR is not set on references: SvRV and SvPVX overlap */
427 && sv && !SvROK(sv)
428 && (strpos != strbeg)) {
429 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
430 goto fail;
431 }
432 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 433 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 434 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
435 I32 slen;
436
1aa99e6b 437 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
438 if (SvTAIL(check)) {
439 slen = SvCUR(check); /* >= 1 */
cad2e5aa 440
9041c2e3 441 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
442 || (strend - s == slen && strend[-1] != '\n')) {
443 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
444 goto fail_finish;
cad2e5aa
JH
445 }
446 /* Now should match s[0..slen-2] */
447 slen--;
653099ff 448 if (slen && (*SvPVX(check) != *s
cad2e5aa 449 || (slen > 1
653099ff 450 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
451 report_neq:
452 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
453 goto fail_finish;
454 }
cad2e5aa 455 }
653099ff
GS
456 else if (*SvPVX(check) != *s
457 || ((slen = SvCUR(check)) > 1
458 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
459 goto report_neq;
460 goto success_at_start;
7e25d62c 461 }
cad2e5aa 462 }
2c2d71f5 463 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 464 s = strpos;
2c2d71f5 465 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 468 if (!ml_anch) {
653099ff
GS
469 I32 end = prog->check_offset_max + CHR_SVLEN(check)
470 - (SvTAIL(check) != 0);
1aa99e6b 471 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
472
473 if (end_shift < eshift)
474 end_shift = eshift;
475 }
cad2e5aa 476 }
2c2d71f5 477 else { /* Can match at random position */
cad2e5aa
JH
478 ml_anch = 0;
479 s = strpos;
2c2d71f5
JH
480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
481 /* Should be nonnegative! */
482 end_shift = prog->minlen - start_shift -
653099ff 483 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
484 }
485
2c2d71f5 486#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 487 if (end_shift < 0)
6bbae5e6 488 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
489#endif
490
2c2d71f5
JH
491 restart:
492 /* Find a possible match in the region s..strend by looking for
493 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 494 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
495 I32 p = -1; /* Internal iterator of scream. */
496 I32 *pp = data ? data->scream_pos : &p;
497
2c2d71f5
JH
498 if (PL_screamfirst[BmRARE(check)] >= 0
499 || ( BmRARE(check) == '\n'
500 && (BmPREVIOUS(check) == SvCUR(check) - 1)
501 && SvTAIL(check) ))
9041c2e3 502 s = screaminstr(sv, check,
2c2d71f5 503 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 504 else
2c2d71f5 505 goto fail_finish;
cad2e5aa
JH
506 if (data)
507 *data->scream_olds = s;
508 }
f33976b4 509 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
510 s = fbm_instr((U8*)(s + start_shift),
511 (U8*)(strend - end_shift),
512 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 513 else
1aa99e6b
IH
514 s = fbm_instr(HOP3(s, start_shift, strend),
515 HOP3(strend, -end_shift, strbeg),
2c2d71f5 516 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
517
518 /* Update the count-of-usability, remove useless subpatterns,
519 unshift s. */
2c2d71f5
JH
520
521 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
522 (s ? "Found" : "Did not find"),
523 ((check == prog->anchored_substr) ? "anchored" : "floating"),
524 PL_colors[0],
7b0972df
JH
525 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
526 SvPVX(check),
2c2d71f5
JH
527 PL_colors[1], (SvTAIL(check) ? "$" : ""),
528 (s ? " at offset " : "...\n") ) );
529
530 if (!s)
531 goto fail_finish;
532
6eb5f6b9
JH
533 check_at = s;
534
2c2d71f5 535 /* Finish the diagnostic message */
30944b6d 536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
537
538 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
539 Start with the other substr.
540 XXXX no SCREAM optimization yet - and a very coarse implementation
541 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
542 *always* match. Probably should be marked during compile...
543 Probably it is right to do no SCREAM here...
544 */
545
546 if (prog->float_substr && prog->anchored_substr) {
30944b6d 547 /* Take into account the "other" substring. */
2c2d71f5
JH
548 /* XXXX May be hopelessly wrong for UTF... */
549 if (!other_last)
6eb5f6b9 550 other_last = strpos;
2c2d71f5 551 if (check == prog->float_substr) {
30944b6d
IZ
552 do_other_anchored:
553 {
1aa99e6b 554 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
555 char *s1 = s;
556
2c2d71f5
JH
557 t = s - prog->check_offset_max;
558 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
559 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 560 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 561 && t > strpos)))
30944b6d 562 /* EMPTY */;
2c2d71f5
JH
563 else
564 t = strpos;
1aa99e6b 565 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
566 if (t < other_last) /* These positions already checked */
567 t = other_last;
1aa99e6b 568 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
569 if (last < last1)
570 last1 = last;
571 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
572 /* On end-of-str: see comment below. */
573 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
574 HOP3(HOP3(last1, prog->anchored_offset, strend)
575 + SvCUR(prog->anchored_substr),
576 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
577 prog->anchored_substr,
578 PL_multiline ? FBMrf_MULTILINE : 0);
579 DEBUG_r(PerlIO_printf(Perl_debug_log,
580 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
581 (s ? "Found" : "Contradicts"),
582 PL_colors[0],
7b0972df
JH
583 (int)(SvCUR(prog->anchored_substr)
584 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
585 SvPVX(prog->anchored_substr),
586 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
587 if (!s) {
588 if (last1 >= last2) {
589 DEBUG_r(PerlIO_printf(Perl_debug_log,
590 ", giving up...\n"));
591 goto fail_finish;
592 }
593 DEBUG_r(PerlIO_printf(Perl_debug_log,
594 ", trying floating at offset %ld...\n",
1aa99e6b
IH
595 (long)(HOP3c(s1, 1, strend) - i_strpos)));
596 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
597 s = HOP3c(last, 1, strend);
2c2d71f5
JH
598 goto restart;
599 }
600 else {
601 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 602 (long)(s - i_strpos)));
1aa99e6b
IH
603 t = HOP3c(s, -prog->anchored_offset, strbeg);
604 other_last = HOP3c(s, 1, strend);
30944b6d 605 s = s1;
2c2d71f5
JH
606 if (t == strpos)
607 goto try_at_start;
2c2d71f5
JH
608 goto try_at_offset;
609 }
30944b6d 610 }
2c2d71f5
JH
611 }
612 else { /* Take into account the floating substring. */
613 char *last, *last1;
614 char *s1 = s;
615
1aa99e6b
IH
616 t = HOP3c(s, -start_shift, strbeg);
617 last1 = last =
618 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
619 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
620 last = HOP3c(t, prog->float_max_offset, strend);
621 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
622 if (s < other_last)
623 s = other_last;
2c2d71f5
JH
624 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
625 /* fbm_instr() takes into account exact value of end-of-str
626 if the check is SvTAIL(ed). Since false positives are OK,
627 and end-of-str is not later than strend we are OK. */
628 s = fbm_instr((unsigned char*)s,
629 (unsigned char*)last + SvCUR(prog->float_substr)
630 - (SvTAIL(prog->float_substr)!=0),
631 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
632 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
633 (s ? "Found" : "Contradicts"),
634 PL_colors[0],
7b0972df
JH
635 (int)(SvCUR(prog->float_substr)
636 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
637 SvPVX(prog->float_substr),
638 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
639 if (!s) {
640 if (last1 == last) {
641 DEBUG_r(PerlIO_printf(Perl_debug_log,
642 ", giving up...\n"));
643 goto fail_finish;
644 }
645 DEBUG_r(PerlIO_printf(Perl_debug_log,
646 ", trying anchored starting at offset %ld...\n",
30944b6d 647 (long)(s1 + 1 - i_strpos)));
803ff556 648 other_last = last;
1aa99e6b 649 s = HOP3c(t, 1, strend);
2c2d71f5
JH
650 goto restart;
651 }
652 else {
653 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 654 (long)(s - i_strpos)));
803ff556 655 other_last = s; /* Fix this later. --Hugo */
30944b6d 656 s = s1;
2c2d71f5
JH
657 if (t == strpos)
658 goto try_at_start;
2c2d71f5
JH
659 goto try_at_offset;
660 }
661 }
cad2e5aa 662 }
2c2d71f5
JH
663
664 t = s - prog->check_offset_max;
2c2d71f5
JH
665 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
666 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
667 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
668 && t > strpos))) {
2c2d71f5
JH
669 /* Fixed substring is found far enough so that the match
670 cannot start at strpos. */
671 try_at_offset:
cad2e5aa 672 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
673 /* Eventually fbm_*() should handle this, but often
674 anchored_offset is not 0, so this check will not be wasted. */
675 /* XXXX In the code below we prefer to look for "^" even in
676 presence of anchored substrings. And we search even
677 beyond the found float position. These pessimizations
678 are historical artefacts only. */
679 find_anchor:
2c2d71f5 680 while (t < strend - prog->minlen) {
cad2e5aa 681 if (*t == '\n') {
4ee3650e 682 if (t < check_at - prog->check_offset_min) {
30944b6d 683 if (prog->anchored_substr) {
4ee3650e
GS
684 /* Since we moved from the found position,
685 we definitely contradict the found anchored
30944b6d
IZ
686 substr. Due to the above check we do not
687 contradict "check" substr.
688 Thus we can arrive here only if check substr
689 is float. Redo checking for "other"=="fixed".
690 */
9041c2e3 691 strpos = t + 1;
30944b6d
IZ
692 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
693 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
694 goto do_other_anchored;
695 }
4ee3650e
GS
696 /* We don't contradict the found floating substring. */
697 /* XXXX Why not check for STCLASS? */
cad2e5aa 698 s = t + 1;
2c2d71f5 699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 700 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
701 goto set_useful;
702 }
4ee3650e
GS
703 /* Position contradicts check-string */
704 /* XXXX probably better to look for check-string
705 than for "\n", so one should lower the limit for t? */
706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 707 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 708 other_last = strpos = s = t + 1;
cad2e5aa
JH
709 goto restart;
710 }
711 t++;
712 }
2c2d71f5
JH
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
714 PL_colors[0],PL_colors[1]));
715 goto fail_finish;
cad2e5aa 716 }
f5952150
GS
717 else {
718 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
719 PL_colors[0],PL_colors[1]));
720 }
cad2e5aa
JH
721 s = t;
722 set_useful:
2c2d71f5 723 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
724 }
725 else {
f5952150 726 /* The found string does not prohibit matching at strpos,
2c2d71f5 727 - no optimization of calling REx engine can be performed,
f5952150
GS
728 unless it was an MBOL and we are not after MBOL,
729 or a future STCLASS check will fail this. */
2c2d71f5
JH
730 try_at_start:
731 /* Even in this situation we may use MBOL flag if strpos is offset
732 wrt the start of the string. */
05b4157f 733 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 734 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
735 /* May be due to an implicit anchor of m{.*foo} */
736 && !(prog->reganch & ROPT_IMPLICIT))
737 {
cad2e5aa
JH
738 t = strpos;
739 goto find_anchor;
740 }
30944b6d 741 DEBUG_r( if (ml_anch)
f5952150
GS
742 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
743 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 744 );
2c2d71f5 745 success_at_start:
30944b6d 746 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 747 && prog->check_substr /* Could be deleted already */
cad2e5aa 748 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
749 && prog->check_substr == prog->float_substr)
750 {
cad2e5aa 751 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 752 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
753 SvREFCNT_dec(prog->check_substr);
754 prog->check_substr = Nullsv; /* disable */
755 prog->float_substr = Nullsv; /* clear */
5e39e1e5 756 check = Nullsv; /* abort */
cad2e5aa 757 s = strpos;
3cf5c195
IZ
758 /* XXXX This is a remnant of the old implementation. It
759 looks wasteful, since now INTUIT can use many
6eb5f6b9 760 other heuristics. */
cad2e5aa
JH
761 prog->reganch &= ~RE_USE_INTUIT;
762 }
763 else
764 s = strpos;
765 }
766
6eb5f6b9
JH
767 /* Last resort... */
768 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
769 if (prog->regstclass) {
770 /* minlen == 0 is possible if regstclass is \b or \B,
771 and the fixed substr is ''$.
772 Since minlen is already taken into account, s+1 is before strend;
773 accidentally, minlen >= 1 guaranties no false positives at s + 1
774 even for \b or \B. But (minlen? 1 : 0) below assumes that
775 regstclass does not come from lookahead... */
776 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
777 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 778 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 779 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 780 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 781 : 1);
6eb5f6b9 782 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
783 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
784 : (prog->float_substr
785 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
786 cl_l, strend)
787 : strend);
a1933d95 788 char *startpos = strbeg;
6eb5f6b9
JH
789
790 t = s;
76384e4a 791 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 792 PL_regdata = prog->data;
76384e4a
GS
793 PL_bostr = startpos;
794 }
f33976b4 795 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
796 if (!s) {
797#ifdef DEBUGGING
b7953727 798 char *what = 0;
6eb5f6b9
JH
799#endif
800 if (endpos == strend) {
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
66e933ab
GS
805 DEBUG_r( PerlIO_printf(Perl_debug_log,
806 "This position contradicts STCLASS...\n") );
653099ff
GS
807 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
808 goto fail;
6eb5f6b9
JH
809 /* Contradict one of substrings */
810 if (prog->anchored_substr) {
6eb5f6b9
JH
811 if (prog->anchored_substr == check) {
812 DEBUG_r( what = "anchored" );
813 hop_and_restart:
1aa99e6b 814 s = HOP3c(t, 1, strend);
66e933ab
GS
815 if (s + start_shift + end_shift > strend) {
816 /* XXXX Should be taken into account earlier? */
817 DEBUG_r( PerlIO_printf(Perl_debug_log,
818 "Could not match STCLASS...\n") );
819 goto fail;
820 }
5e39e1e5
HS
821 if (!check)
822 goto giveup;
6eb5f6b9 823 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 824 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
825 what, (long)(s + start_shift - i_strpos)) );
826 goto restart;
827 }
66e933ab 828 /* Have both, check_string is floating */
6eb5f6b9
JH
829 if (t + start_shift >= check_at) /* Contradicts floating=check */
830 goto retry_floating_check;
831 /* Recheck anchored substring, but not floating... */
9041c2e3 832 s = check_at;
5e39e1e5
HS
833 if (!check)
834 goto giveup;
6eb5f6b9 835 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 836 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
837 (long)(other_last - i_strpos)) );
838 goto do_other_anchored;
839 }
60e71179
GS
840 /* Another way we could have checked stclass at the
841 current position only: */
842 if (ml_anch) {
843 s = t = t + 1;
5e39e1e5
HS
844 if (!check)
845 goto giveup;
60e71179 846 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
847 "Looking for /%s^%s/m starting at offset %ld...\n",
848 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 849 goto try_at_offset;
66e933ab 850 }
60e71179
GS
851 if (!prog->float_substr) /* Could have been deleted */
852 goto fail;
6eb5f6b9
JH
853 /* Check is floating subtring. */
854 retry_floating_check:
855 t = check_at - start_shift;
856 DEBUG_r( what = "floating" );
857 goto hop_and_restart;
858 }
b7953727
JH
859 if (t != s) {
860 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 861 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
862 (long)(t - i_strpos), (long)(s - i_strpos))
863 );
864 }
865 else {
866 DEBUG_r(PerlIO_printf(Perl_debug_log,
867 "Does not contradict STCLASS...\n");
868 );
869 }
6eb5f6b9 870 }
5e39e1e5
HS
871 giveup:
872 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
873 PL_colors[4], (check ? "Guessed" : "Giving up"),
874 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 875 return s;
2c2d71f5
JH
876
877 fail_finish: /* Substring not found */
66e933ab
GS
878 if (prog->check_substr) /* could be removed already */
879 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 880 fail:
2c2d71f5 881 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
882 PL_colors[4],PL_colors[5]));
883 return Nullch;
884}
9661b544 885
6eb5f6b9 886/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
887STATIC char *
888S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 889{
6eb5f6b9
JH
890 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
891 char *m;
d8093b23
G
892 STRLEN ln;
893 unsigned int c1;
894 unsigned int c2;
6eb5f6b9
JH
895 char *e;
896 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 897 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 898
6eb5f6b9
JH
899 /* We know what class it must start with. */
900 switch (OP(c)) {
6eb5f6b9 901 case ANYOF:
a0ed51b3 902 while (s < strend) {
ffc61ed2 903 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
904 if (tmp && (norun || regtry(prog, s)))
905 goto got_it;
906 else
907 tmp = doevery;
a0ed51b3 908 }
6eb5f6b9
JH
909 else
910 tmp = 1;
ffc61ed2 911 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 912 }
6eb5f6b9 913 break;
f33976b4
DB
914 case CANY:
915 while (s < strend) {
916 if (tmp && (norun || regtry(prog, s)))
917 goto got_it;
918 else
919 tmp = doevery;
920 s++;
921 }
922 break;
6eb5f6b9
JH
923 case EXACTF:
924 m = STRING(c);
925 ln = STR_LEN(c);
1aa99e6b 926 if (UTF) {
a2a2844f
JH
927 STRLEN ulen1, ulen2;
928 U8 tmpbuf1[UTF8_MAXLEN*2+1];
929 U8 tmpbuf2[UTF8_MAXLEN*2+1];
930
931 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
932 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
933
934 c1 = utf8_to_uvuni(tmpbuf1, 0);
935 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
936 }
937 else {
938 c1 = *(U8*)m;
939 c2 = PL_fold[c1];
940 }
6eb5f6b9
JH
941 goto do_exactf;
942 case EXACTFL:
943 m = STRING(c);
944 ln = STR_LEN(c);
d8093b23 945 c1 = *(U8*)m;
6eb5f6b9
JH
946 c2 = PL_fold_locale[c1];
947 do_exactf:
948 e = strend - ln;
b3c9acc1 949
6eb5f6b9
JH
950 if (norun && e < s)
951 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
952
953 if (do_utf8) {
954 STRLEN len;
955 if (c1 == c2)
956 while (s <= e) {
9041c2e3 957 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
958 && regtry(prog, s) )
959 goto got_it;
960 s += len;
961 }
962 else
963 while (s <= e) {
9041c2e3 964 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
965 if ( (c == c1 || c == c2) && regtry(prog, s) )
966 goto got_it;
967 s += len;
968 }
969 }
970 else {
971 if (c1 == c2)
972 while (s <= e) {
973 if ( *(U8*)s == c1
974 && (ln == 1 || !(OP(c) == EXACTF
975 ? ibcmp(s, m, ln)
976 : ibcmp_locale(s, m, ln)))
977 && (norun || regtry(prog, s)) )
978 goto got_it;
979 s++;
980 }
981 else
982 while (s <= e) {
983 if ( (*(U8*)s == c1 || *(U8*)s == c2)
984 && (ln == 1 || !(OP(c) == EXACTF
985 ? ibcmp(s, m, ln)
986 : ibcmp_locale(s, m, ln)))
987 && (norun || regtry(prog, s)) )
988 goto got_it;
989 s++;
990 }
b3c9acc1
IZ
991 }
992 break;
bbce6d69 993 case BOUNDL:
3280af22 994 PL_reg_flags |= RF_tainted;
bbce6d69 995 /* FALL THROUGH */
a0d0e21e 996 case BOUND:
ffc61ed2 997 if (do_utf8) {
12d33761 998 if (s == PL_bostr)
ffc61ed2
JH
999 tmp = '\n';
1000 else {
1aa99e6b 1001 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1002
0064a8a9
JH
1003 if (s > (char*)r)
1004 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1005 }
1006 tmp = ((OP(c) == BOUND ?
9041c2e3 1007 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1008 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1009 while (s < strend) {
1010 if (tmp == !(OP(c) == BOUND ?
3568d838 1011 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1012 isALNUM_LC_utf8((U8*)s)))
1013 {
1014 tmp = !tmp;
1015 if ((norun || regtry(prog, s)))
1016 goto got_it;
1017 }
1018 s += UTF8SKIP(s);
a687059c 1019 }
a0d0e21e 1020 }
667bb95a 1021 else {
12d33761 1022 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1023 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1024 while (s < strend) {
1025 if (tmp ==
1026 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1027 tmp = !tmp;
1028 if ((norun || regtry(prog, s)))
1029 goto got_it;
1030 }
1031 s++;
a0ed51b3 1032 }
a0ed51b3 1033 }
6eb5f6b9 1034 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1035 goto got_it;
1036 break;
bbce6d69 1037 case NBOUNDL:
3280af22 1038 PL_reg_flags |= RF_tainted;
bbce6d69 1039 /* FALL THROUGH */
a0d0e21e 1040 case NBOUND:
ffc61ed2 1041 if (do_utf8) {
12d33761 1042 if (s == PL_bostr)
ffc61ed2
JH
1043 tmp = '\n';
1044 else {
1aa99e6b 1045 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1046
0064a8a9
JH
1047 if (s > (char*)r)
1048 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1049 }
1050 tmp = ((OP(c) == NBOUND ?
9041c2e3 1051 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1052 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1053 while (s < strend) {
1054 if (tmp == !(OP(c) == NBOUND ?
3568d838 1055 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1056 isALNUM_LC_utf8((U8*)s)))
1057 tmp = !tmp;
1058 else if ((norun || regtry(prog, s)))
1059 goto got_it;
1060 s += UTF8SKIP(s);
1061 }
a0d0e21e 1062 }
667bb95a 1063 else {
12d33761 1064 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1065 tmp = ((OP(c) == NBOUND ?
1066 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1067 while (s < strend) {
1068 if (tmp ==
1069 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1070 tmp = !tmp;
1071 else if ((norun || regtry(prog, s)))
1072 goto got_it;
1073 s++;
1074 }
a0ed51b3 1075 }
6eb5f6b9 1076 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1077 goto got_it;
1078 break;
a0d0e21e 1079 case ALNUM:
ffc61ed2 1080 if (do_utf8) {
8269fa76 1081 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1082 while (s < strend) {
3568d838 1083 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1084 if (tmp && (norun || regtry(prog, s)))
1085 goto got_it;
1086 else
1087 tmp = doevery;
1088 }
bbce6d69 1089 else
ffc61ed2
JH
1090 tmp = 1;
1091 s += UTF8SKIP(s);
bbce6d69 1092 }
bbce6d69 1093 }
ffc61ed2
JH
1094 else {
1095 while (s < strend) {
1096 if (isALNUM(*s)) {
1097 if (tmp && (norun || regtry(prog, s)))
1098 goto got_it;
1099 else
1100 tmp = doevery;
1101 }
a0ed51b3 1102 else
ffc61ed2
JH
1103 tmp = 1;
1104 s++;
a0ed51b3 1105 }
a0ed51b3
LW
1106 }
1107 break;
bbce6d69 1108 case ALNUML:
3280af22 1109 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1110 if (do_utf8) {
1111 while (s < strend) {
1112 if (isALNUM_LC_utf8((U8*)s)) {
1113 if (tmp && (norun || regtry(prog, s)))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
a687059c 1118 else
ffc61ed2
JH
1119 tmp = 1;
1120 s += UTF8SKIP(s);
a0d0e21e 1121 }
a0d0e21e 1122 }
ffc61ed2
JH
1123 else {
1124 while (s < strend) {
1125 if (isALNUM_LC(*s)) {
1126 if (tmp && (norun || regtry(prog, s)))
1127 goto got_it;
1128 else
1129 tmp = doevery;
1130 }
a0ed51b3 1131 else
ffc61ed2
JH
1132 tmp = 1;
1133 s++;
a0ed51b3 1134 }
a0ed51b3
LW
1135 }
1136 break;
a0d0e21e 1137 case NALNUM:
ffc61ed2 1138 if (do_utf8) {
8269fa76 1139 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1140 while (s < strend) {
3568d838 1141 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
bbce6d69 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s += UTF8SKIP(s);
bbce6d69 1150 }
bbce6d69 1151 }
ffc61ed2
JH
1152 else {
1153 while (s < strend) {
1154 if (!isALNUM(*s)) {
1155 if (tmp && (norun || regtry(prog, s)))
1156 goto got_it;
1157 else
1158 tmp = doevery;
1159 }
a0ed51b3 1160 else
ffc61ed2
JH
1161 tmp = 1;
1162 s++;
a0ed51b3 1163 }
a0ed51b3
LW
1164 }
1165 break;
bbce6d69 1166 case NALNUML:
3280af22 1167 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1168 if (do_utf8) {
1169 while (s < strend) {
1170 if (!isALNUM_LC_utf8((U8*)s)) {
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
a687059c 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s += UTF8SKIP(s);
a687059c 1179 }
a0d0e21e 1180 }
ffc61ed2
JH
1181 else {
1182 while (s < strend) {
1183 if (!isALNUM_LC(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
a0ed51b3 1189 else
ffc61ed2
JH
1190 tmp = 1;
1191 s++;
a0ed51b3 1192 }
a0ed51b3
LW
1193 }
1194 break;
a0d0e21e 1195 case SPACE:
ffc61ed2 1196 if (do_utf8) {
8269fa76 1197 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1198 while (s < strend) {
3568d838 1199 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
a0d0e21e 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s += UTF8SKIP(s);
2304df62 1208 }
a0d0e21e 1209 }
ffc61ed2
JH
1210 else {
1211 while (s < strend) {
1212 if (isSPACE(*s)) {
1213 if (tmp && (norun || regtry(prog, s)))
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
a0ed51b3 1218 else
ffc61ed2
JH
1219 tmp = 1;
1220 s++;
a0ed51b3 1221 }
a0ed51b3
LW
1222 }
1223 break;
bbce6d69 1224 case SPACEL:
3280af22 1225 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1226 if (do_utf8) {
1227 while (s < strend) {
1228 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
bbce6d69 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s += UTF8SKIP(s);
bbce6d69 1237 }
bbce6d69 1238 }
ffc61ed2
JH
1239 else {
1240 while (s < strend) {
1241 if (isSPACE_LC(*s)) {
1242 if (tmp && (norun || regtry(prog, s)))
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
a0ed51b3 1247 else
ffc61ed2
JH
1248 tmp = 1;
1249 s++;
a0ed51b3 1250 }
a0ed51b3
LW
1251 }
1252 break;
a0d0e21e 1253 case NSPACE:
ffc61ed2 1254 if (do_utf8) {
8269fa76 1255 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1256 while (s < strend) {
3568d838 1257 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
a0d0e21e 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s += UTF8SKIP(s);
a687059c 1266 }
a0d0e21e 1267 }
ffc61ed2
JH
1268 else {
1269 while (s < strend) {
1270 if (!isSPACE(*s)) {
1271 if (tmp && (norun || regtry(prog, s)))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
a0ed51b3 1276 else
ffc61ed2
JH
1277 tmp = 1;
1278 s++;
a0ed51b3 1279 }
a0ed51b3
LW
1280 }
1281 break;
bbce6d69 1282 case NSPACEL:
3280af22 1283 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1284 if (do_utf8) {
1285 while (s < strend) {
1286 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
bbce6d69 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s += UTF8SKIP(s);
bbce6d69 1295 }
bbce6d69 1296 }
ffc61ed2
JH
1297 else {
1298 while (s < strend) {
1299 if (!isSPACE_LC(*s)) {
1300 if (tmp && (norun || regtry(prog, s)))
1301 goto got_it;
1302 else
1303 tmp = doevery;
1304 }
a0ed51b3 1305 else
ffc61ed2
JH
1306 tmp = 1;
1307 s++;
a0ed51b3 1308 }
a0ed51b3
LW
1309 }
1310 break;
a0d0e21e 1311 case DIGIT:
ffc61ed2 1312 if (do_utf8) {
8269fa76 1313 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1314 while (s < strend) {
3568d838 1315 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
a0d0e21e 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s += UTF8SKIP(s);
2b69d0c2 1324 }
a0d0e21e 1325 }
ffc61ed2
JH
1326 else {
1327 while (s < strend) {
1328 if (isDIGIT(*s)) {
1329 if (tmp && (norun || regtry(prog, s)))
1330 goto got_it;
1331 else
1332 tmp = doevery;
1333 }
a0ed51b3 1334 else
ffc61ed2
JH
1335 tmp = 1;
1336 s++;
a0ed51b3 1337 }
a0ed51b3
LW
1338 }
1339 break;
b8c5462f
JH
1340 case DIGITL:
1341 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1342 if (do_utf8) {
1343 while (s < strend) {
1344 if (isDIGIT_LC_utf8((U8*)s)) {
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
b8c5462f 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s += UTF8SKIP(s);
b8c5462f 1353 }
b8c5462f 1354 }
ffc61ed2
JH
1355 else {
1356 while (s < strend) {
1357 if (isDIGIT_LC(*s)) {
1358 if (tmp && (norun || regtry(prog, s)))
1359 goto got_it;
1360 else
1361 tmp = doevery;
1362 }
b8c5462f 1363 else
ffc61ed2
JH
1364 tmp = 1;
1365 s++;
b8c5462f 1366 }
b8c5462f
JH
1367 }
1368 break;
a0d0e21e 1369 case NDIGIT:
ffc61ed2 1370 if (do_utf8) {
8269fa76 1371 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1372 while (s < strend) {
3568d838 1373 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
a0d0e21e 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s += UTF8SKIP(s);
a687059c 1382 }
a0d0e21e 1383 }
ffc61ed2
JH
1384 else {
1385 while (s < strend) {
1386 if (!isDIGIT(*s)) {
1387 if (tmp && (norun || regtry(prog, s)))
1388 goto got_it;
1389 else
1390 tmp = doevery;
1391 }
a0ed51b3 1392 else
ffc61ed2
JH
1393 tmp = 1;
1394 s++;
a0ed51b3 1395 }
a0ed51b3
LW
1396 }
1397 break;
b8c5462f
JH
1398 case NDIGITL:
1399 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1400 if (do_utf8) {
1401 while (s < strend) {
1402 if (!isDIGIT_LC_utf8((U8*)s)) {
1403 if (tmp && (norun || regtry(prog, s)))
1404 goto got_it;
1405 else
1406 tmp = doevery;
1407 }
b8c5462f 1408 else
ffc61ed2
JH
1409 tmp = 1;
1410 s += UTF8SKIP(s);
b8c5462f 1411 }
a0ed51b3 1412 }
ffc61ed2
JH
1413 else {
1414 while (s < strend) {
1415 if (!isDIGIT_LC(*s)) {
1416 if (tmp && (norun || regtry(prog, s)))
1417 goto got_it;
1418 else
1419 tmp = doevery;
1420 }
cf93c79d 1421 else
ffc61ed2
JH
1422 tmp = 1;
1423 s++;
b8c5462f 1424 }
b8c5462f
JH
1425 }
1426 break;
b3c9acc1 1427 default:
3c3eec57
GS
1428 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1429 break;
d6a28714 1430 }
6eb5f6b9
JH
1431 return 0;
1432 got_it:
1433 return s;
1434}
1435
1436/*
1437 - regexec_flags - match a regexp against a string
1438 */
1439I32
1440Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1441 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1442/* strend: pointer to null at end of string */
1443/* strbeg: real beginning of string */
1444/* minend: end of match must be >=minend after stringarg. */
1445/* data: May be used for some additional optimizations. */
1446/* nosave: For optimizations. */
1447{
6eb5f6b9
JH
1448 register char *s;
1449 register regnode *c;
1450 register char *startpos = stringarg;
6eb5f6b9
JH
1451 I32 minlen; /* must match at least this many chars */
1452 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1453 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1454 constant substr. */ /* CC */
1455 I32 end_shift = 0; /* Same for the end. */ /* CC */
1456 I32 scream_pos = -1; /* Internal iterator of scream. */
1457 char *scream_olds;
1458 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1459 bool do_utf8 = DO_UTF8(sv);
2a782b5b
JH
1460#ifdef DEBUGGING
1461 SV *dsv = sv_2mortal(newSVpvn("", 0));
1462#endif
6eb5f6b9
JH
1463
1464 PL_regcc = 0;
1465
1466 cache_re(prog);
1467#ifdef DEBUGGING
aea4f609 1468 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1469#endif
1470
1471 /* Be paranoid... */
1472 if (prog == NULL || startpos == NULL) {
1473 Perl_croak(aTHX_ "NULL regexp parameter");
1474 return 0;
1475 }
1476
1477 minlen = prog->minlen;
a72c7584
JH
1478 if (strend - startpos < minlen) {
1479 DEBUG_r(PerlIO_printf(Perl_debug_log,
1480 "String too short [regexec_flags]...\n"));
1481 goto phooey;
1aa99e6b 1482 }
6eb5f6b9 1483
6eb5f6b9
JH
1484 /* Check validity of program. */
1485 if (UCHARAT(prog->program) != REG_MAGIC) {
1486 Perl_croak(aTHX_ "corrupted regexp program");
1487 }
1488
1489 PL_reg_flags = 0;
1490 PL_reg_eval_set = 0;
1491 PL_reg_maxiter = 0;
1492
1493 if (prog->reganch & ROPT_UTF8)
1494 PL_reg_flags |= RF_utf8;
1495
1496 /* Mark beginning of line for ^ and lookbehind. */
1497 PL_regbol = startpos;
1498 PL_bostr = strbeg;
1499 PL_reg_sv = sv;
1500
1501 /* Mark end of line for $ (and such) */
1502 PL_regeol = strend;
1503
1504 /* see how far we have to get to not match where we matched before */
1505 PL_regtill = startpos+minend;
1506
1507 /* We start without call_cc context. */
1508 PL_reg_call_cc = 0;
1509
1510 /* If there is a "must appear" string, look for it. */
1511 s = startpos;
1512
1513 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1514 MAGIC *mg;
1515
1516 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1517 PL_reg_ganch = startpos;
1518 else if (sv && SvTYPE(sv) >= SVt_PVMG
1519 && SvMAGIC(sv)
14befaf4
DM
1520 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1521 && mg->mg_len >= 0) {
6eb5f6b9
JH
1522 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1523 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1524 if (s > PL_reg_ganch)
6eb5f6b9
JH
1525 goto phooey;
1526 s = PL_reg_ganch;
1527 }
1528 }
1529 else /* pos() not defined */
1530 PL_reg_ganch = strbeg;
1531 }
1532
699c3c34
JH
1533 if (do_utf8 == (UTF!=0) &&
1534 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1535 re_scream_pos_data d;
1536
1537 d.scream_olds = &scream_olds;
1538 d.scream_pos = &scream_pos;
1539 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1540 if (!s) {
1541 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1542 goto phooey; /* not present */
3fa9c3d7 1543 }
6eb5f6b9
JH
1544 }
1545
2a782b5b 1546 DEBUG_r({
df1ffd02
JH
1547 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1548 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1549 if (!PL_colorset)
1550 reginitcolors();
1551 PerlIO_printf(Perl_debug_log,
1552 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1553 PL_colors[4],PL_colors[5],PL_colors[0],
1554 prog->precomp,
1555 PL_colors[1],
1556 (strlen(prog->precomp) > 60 ? "..." : ""),
1557 PL_colors[0],
1558 (int)(len > 60 ? 60 : len),
1559 s, PL_colors[1],
1560 (len > 60 ? "..." : "")
1561 );
1562 });
6eb5f6b9
JH
1563
1564 /* Simplest case: anchored match need be tried only once. */
1565 /* [unless only anchor is BOL and multiline is set] */
1566 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1567 if (s == startpos && regtry(prog, startpos))
1568 goto got_it;
1569 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1570 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1571 {
1572 char *end;
1573
1574 if (minlen)
1575 dontbother = minlen - 1;
1aa99e6b 1576 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1577 /* for multiline we only have to try after newlines */
1578 if (prog->check_substr) {
1579 if (s == startpos)
1580 goto after_try;
1581 while (1) {
1582 if (regtry(prog, s))
1583 goto got_it;
1584 after_try:
1585 if (s >= end)
1586 goto phooey;
1587 if (prog->reganch & RE_USE_INTUIT) {
1588 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1589 if (!s)
1590 goto phooey;
1591 }
1592 else
1593 s++;
1594 }
1595 } else {
1596 if (s > startpos)
1597 s--;
1598 while (s < end) {
1599 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1600 if (regtry(prog, s))
1601 goto got_it;
1602 }
1603 }
1604 }
1605 }
1606 goto phooey;
1607 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1608 if (regtry(prog, PL_reg_ganch))
1609 goto got_it;
1610 goto phooey;
1611 }
1612
1613 /* Messy cases: unanchored match. */
9041c2e3 1614 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1615 /* we have /x+whatever/ */
1616 /* it must be a one character string (XXXX Except UTF?) */
1617 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1618#ifdef DEBUGGING
1619 int did_match = 0;
1620#endif
1621
1aa99e6b 1622 if (do_utf8) {
6eb5f6b9
JH
1623 while (s < strend) {
1624 if (*s == ch) {
bf93d4cc 1625 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1626 if (regtry(prog, s)) goto got_it;
1627 s += UTF8SKIP(s);
1628 while (s < strend && *s == ch)
1629 s += UTF8SKIP(s);
1630 }
1631 s += UTF8SKIP(s);
1632 }
1633 }
1634 else {
1635 while (s < strend) {
1636 if (*s == ch) {
bf93d4cc 1637 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1638 if (regtry(prog, s)) goto got_it;
1639 s++;
1640 while (s < strend && *s == ch)
1641 s++;
1642 }
1643 s++;
1644 }
1645 }
b7953727 1646 DEBUG_r(if (!did_match)
bf93d4cc 1647 PerlIO_printf(Perl_debug_log,
b7953727
JH
1648 "Did not find anchored character...\n")
1649 );
6eb5f6b9
JH
1650 }
1651 /*SUPPRESS 560*/
1aa99e6b
IH
1652 else if (do_utf8 == (UTF!=0) &&
1653 (prog->anchored_substr != Nullsv
9041c2e3 1654 || (prog->float_substr != Nullsv
1aa99e6b 1655 && prog->float_max_offset < strend - s))) {
9041c2e3 1656 SV *must = prog->anchored_substr
6eb5f6b9 1657 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1658 I32 back_max =
6eb5f6b9 1659 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1660 I32 back_min =
6eb5f6b9 1661 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1662 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1663 -(I32)(CHR_SVLEN(must)
1aa99e6b 1664 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1665 char *last1; /* Last position checked before */
bf93d4cc
GS
1666#ifdef DEBUGGING
1667 int did_match = 0;
1668#endif
6eb5f6b9
JH
1669
1670 if (s > PL_bostr)
1671 last1 = HOPc(s, -1);
1672 else
1673 last1 = s - 1; /* bogus */
1674
1675 /* XXXX check_substr already used to find `s', can optimize if
1676 check_substr==must. */
1677 scream_pos = -1;
1678 dontbother = end_shift;
1679 strend = HOPc(strend, -dontbother);
1680 while ( (s <= last) &&
9041c2e3 1681 ((flags & REXEC_SCREAM)
1aa99e6b 1682 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1683 end_shift, &scream_pos, 0))
1aa99e6b 1684 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1685 (unsigned char*)strend, must,
6eb5f6b9 1686 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1687 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1688 if (HOPc(s, -back_max) > last1) {
1689 last1 = HOPc(s, -back_min);
1690 s = HOPc(s, -back_max);
1691 }
1692 else {
1693 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1694
1695 last1 = HOPc(s, -back_min);
1696 s = t;
1697 }
1aa99e6b 1698 if (do_utf8) {
6eb5f6b9
JH
1699 while (s <= last1) {
1700 if (regtry(prog, s))
1701 goto got_it;
1702 s += UTF8SKIP(s);
1703 }
1704 }
1705 else {
1706 while (s <= last1) {
1707 if (regtry(prog, s))
1708 goto got_it;
1709 s++;
1710 }
1711 }
1712 }
b7953727
JH
1713 DEBUG_r(if (!did_match)
1714 PerlIO_printf(Perl_debug_log,
1715 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1716 ((must == prog->anchored_substr)
1717 ? "anchored" : "floating"),
1718 PL_colors[0],
1719 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1720 SvPVX(must),
b7953727
JH
1721 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1722 );
6eb5f6b9
JH
1723 goto phooey;
1724 }
155aba94 1725 else if ((c = prog->regstclass)) {
66e933ab
GS
1726 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1727 /* don't bother with what can't match */
6eb5f6b9 1728 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1729 DEBUG_r({
1730 SV *prop = sv_newmortal();
1731 regprop(prop, c);
2a782b5b 1732 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1733 });
6eb5f6b9
JH
1734 if (find_byclass(prog, c, s, strend, startpos, 0))
1735 goto got_it;
bf93d4cc 1736 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1737 }
1738 else {
1739 dontbother = 0;
1740 if (prog->float_substr != Nullsv) { /* Trim the end. */
1741 char *last;
d6a28714
JH
1742
1743 if (flags & REXEC_SCREAM) {
1744 last = screaminstr(sv, prog->float_substr, s - strbeg,
1745 end_shift, &scream_pos, 1); /* last one */
1746 if (!last)
ffc61ed2 1747 last = scream_olds; /* Only one occurrence. */
b8c5462f 1748 }
d6a28714
JH
1749 else {
1750 STRLEN len;
1751 char *little = SvPV(prog->float_substr, len);
1752
1753 if (SvTAIL(prog->float_substr)) {
1754 if (memEQ(strend - len + 1, little, len - 1))
1755 last = strend - len + 1;
1756 else if (!PL_multiline)
9041c2e3 1757 last = memEQ(strend - len, little, len)
d6a28714 1758 ? strend - len : Nullch;
b8c5462f 1759 else
d6a28714
JH
1760 goto find_last;
1761 } else {
1762 find_last:
9041c2e3 1763 if (len)
d6a28714 1764 last = rninstr(s, strend, little, little + len);
b8c5462f 1765 else
d6a28714 1766 last = strend; /* matching `$' */
b8c5462f 1767 }
b8c5462f 1768 }
bf93d4cc
GS
1769 if (last == NULL) {
1770 DEBUG_r(PerlIO_printf(Perl_debug_log,
1771 "%sCan't trim the tail, match fails (should not happen)%s\n",
1772 PL_colors[4],PL_colors[5]));
1773 goto phooey; /* Should not happen! */
1774 }
d6a28714
JH
1775 dontbother = strend - last + prog->float_min_offset;
1776 }
1777 if (minlen && (dontbother < minlen))
1778 dontbother = minlen - 1;
1779 strend -= dontbother; /* this one's always in bytes! */
1780 /* We don't know much -- general case. */
1aa99e6b 1781 if (do_utf8) {
d6a28714
JH
1782 for (;;) {
1783 if (regtry(prog, s))
1784 goto got_it;
1785 if (s >= strend)
1786 break;
b8c5462f 1787 s += UTF8SKIP(s);
d6a28714
JH
1788 };
1789 }
1790 else {
1791 do {
1792 if (regtry(prog, s))
1793 goto got_it;
1794 } while (s++ < strend);
1795 }
1796 }
1797
1798 /* Failure. */
1799 goto phooey;
1800
1801got_it:
1802 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1803
1804 if (PL_reg_eval_set) {
1805 /* Preserve the current value of $^R */
1806 if (oreplsv != GvSV(PL_replgv))
1807 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1808 restored, the value remains
1809 the same. */
acfe0abc 1810 restore_pos(aTHX_ 0);
d6a28714
JH
1811 }
1812
1813 /* make sure $`, $&, $', and $digit will work later */
1814 if ( !(flags & REXEC_NOT_FIRST) ) {
1815 if (RX_MATCH_COPIED(prog)) {
1816 Safefree(prog->subbeg);
1817 RX_MATCH_COPIED_off(prog);
1818 }
1819 if (flags & REXEC_COPY_STR) {
1820 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1821
1822 s = savepvn(strbeg, i);
1823 prog->subbeg = s;
1824 prog->sublen = i;
1825 RX_MATCH_COPIED_on(prog);
1826 }
1827 else {
1828 prog->subbeg = strbeg;
1829 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1830 }
1831 }
9041c2e3 1832
d6a28714
JH
1833 return 1;
1834
1835phooey:
bf93d4cc
GS
1836 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1837 PL_colors[4],PL_colors[5]));
d6a28714 1838 if (PL_reg_eval_set)
acfe0abc 1839 restore_pos(aTHX_ 0);
d6a28714
JH
1840 return 0;
1841}
1842
1843/*
1844 - regtry - try match at specific point
1845 */
1846STATIC I32 /* 0 failure, 1 success */
1847S_regtry(pTHX_ regexp *prog, char *startpos)
1848{
d6a28714
JH
1849 register I32 i;
1850 register I32 *sp;
1851 register I32 *ep;
1852 CHECKPOINT lastcp;
1853
02db2b7b
IZ
1854#ifdef DEBUGGING
1855 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1856#endif
d6a28714
JH
1857 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1858 MAGIC *mg;
1859
1860 PL_reg_eval_set = RS_init;
1861 DEBUG_r(DEBUG_s(
b900a521
JH
1862 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1863 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1864 ));
e8347627 1865 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1866 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1867 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1868 SAVETMPS;
1869 /* Apparently this is not needed, judging by wantarray. */
e8347627 1870 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1871 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1872
1873 if (PL_reg_sv) {
1874 /* Make $_ available to executed code. */
1875 if (PL_reg_sv != DEFSV) {
4d1ff10f 1876 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1877 SAVESPTR(DEFSV);
1878 DEFSV = PL_reg_sv;
b8c5462f 1879 }
d6a28714 1880
9041c2e3 1881 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1882 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1883 /* prepare for quick setting of pos */
14befaf4
DM
1884 sv_magic(PL_reg_sv, (SV*)0,
1885 PERL_MAGIC_regex_global, Nullch, 0);
1886 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1887 mg->mg_len = -1;
b8c5462f 1888 }
d6a28714
JH
1889 PL_reg_magic = mg;
1890 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1891 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1892 }
09687e5a 1893 if (!PL_reg_curpm) {
0f79a09d 1894 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1895#ifdef USE_ITHREADS
1896 {
1897 SV* repointer = newSViv(0);
577e12cc 1898 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1899 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1900 av_push(PL_regex_padav,repointer);
1901 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1902 PL_regex_pad = AvARRAY(PL_regex_padav);
1903 }
1904#endif
1905 }
aaa362c4 1906 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1907 PL_reg_oldcurpm = PL_curpm;
1908 PL_curpm = PL_reg_curpm;
1909 if (RX_MATCH_COPIED(prog)) {
1910 /* Here is a serious problem: we cannot rewrite subbeg,
1911 since it may be needed if this match fails. Thus
1912 $` inside (?{}) could fail... */
1913 PL_reg_oldsaved = prog->subbeg;
1914 PL_reg_oldsavedlen = prog->sublen;
1915 RX_MATCH_COPIED_off(prog);
1916 }
1917 else
1918 PL_reg_oldsaved = Nullch;
1919 prog->subbeg = PL_bostr;
1920 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1921 }
1922 prog->startp[0] = startpos - PL_bostr;
1923 PL_reginput = startpos;
1924 PL_regstartp = prog->startp;
1925 PL_regendp = prog->endp;
1926 PL_reglastparen = &prog->lastparen;
a01268b5 1927 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1928 prog->lastparen = 0;
1929 PL_regsize = 0;
1930 DEBUG_r(PL_reg_starttry = startpos);
1931 if (PL_reg_start_tmpl <= prog->nparens) {
1932 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1933 if(PL_reg_start_tmp)
1934 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1935 else
1936 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1937 }
1938
1939 /* XXXX What this code is doing here?!!! There should be no need
1940 to do this again and again, PL_reglastparen should take care of
3dd2943c 1941 this! --ilya*/
dafc8851
JH
1942
1943 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1944 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1945 * PL_reglastparen), is not needed at all by the test suite
1946 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1947 * enough, for building DynaLoader, or otherwise this
1948 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1949 * will happen. Meanwhile, this code *is* needed for the
1950 * above-mentioned test suite tests to succeed. The common theme
1951 * on those tests seems to be returning null fields from matches.
1952 * --jhi */
dafc8851 1953#if 1
d6a28714
JH
1954 sp = prog->startp;
1955 ep = prog->endp;
1956 if (prog->nparens) {
09e8ae3b 1957 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1958 *++sp = -1;
1959 *++ep = -1;
1960 }
1961 }
dafc8851 1962#endif
02db2b7b 1963 REGCP_SET(lastcp);
d6a28714
JH
1964 if (regmatch(prog->program + 1)) {
1965 prog->endp[0] = PL_reginput - PL_bostr;
1966 return 1;
1967 }
02db2b7b 1968 REGCP_UNWIND(lastcp);
d6a28714
JH
1969 return 0;
1970}
1971
02db2b7b
IZ
1972#define RE_UNWIND_BRANCH 1
1973#define RE_UNWIND_BRANCHJ 2
1974
1975union re_unwind_t;
1976
1977typedef struct { /* XX: makes sense to enlarge it... */
1978 I32 type;
1979 I32 prev;
1980 CHECKPOINT lastcp;
1981} re_unwind_generic_t;
1982
1983typedef struct {
1984 I32 type;
1985 I32 prev;
1986 CHECKPOINT lastcp;
1987 I32 lastparen;
1988 regnode *next;
1989 char *locinput;
1990 I32 nextchr;
1991#ifdef DEBUGGING
1992 int regindent;
1993#endif
1994} re_unwind_branch_t;
1995
1996typedef union re_unwind_t {
1997 I32 type;
1998 re_unwind_generic_t generic;
1999 re_unwind_branch_t branch;
2000} re_unwind_t;
2001
8ba1375e
MJD
2002#define sayYES goto yes
2003#define sayNO goto no
2004#define sayYES_FINAL goto yes_final
2005#define sayYES_LOUD goto yes_loud
2006#define sayNO_FINAL goto no_final
2007#define sayNO_SILENT goto do_no
2008#define saySAME(x) if (x) goto yes; else goto no
2009
2010#define REPORT_CODE_OFF 24
2011
d6a28714
JH
2012/*
2013 - regmatch - main matching routine
2014 *
2015 * Conceptually the strategy is simple: check to see whether the current
2016 * node matches, call self recursively to see whether the rest matches,
2017 * and then act accordingly. In practice we make some effort to avoid
2018 * recursion, in particular by going through "ordinary" nodes (that don't
2019 * need to know whether the rest of the match failed) by a loop instead of
2020 * by recursion.
2021 */
2022/* [lwall] I've hoisted the register declarations to the outer block in order to
2023 * maybe save a little bit of pushing and popping on the stack. It also takes
2024 * advantage of machines that use a register save mask on subroutine entry.
2025 */
2026STATIC I32 /* 0 failure, 1 success */
2027S_regmatch(pTHX_ regnode *prog)
2028{
d6a28714
JH
2029 register regnode *scan; /* Current node. */
2030 regnode *next; /* Next node. */
2031 regnode *inner; /* Next node in internal branch. */
2032 register I32 nextchr; /* renamed nextchr - nextchar colides with
2033 function of same name */
2034 register I32 n; /* no or next */
b7953727
JH
2035 register I32 ln = 0; /* len or last */
2036 register char *s = Nullch; /* operand or save */
d6a28714 2037 register char *locinput = PL_reginput;
b7953727 2038 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2039 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2040 I32 unwind = 0;
b7953727 2041#if 0
02db2b7b 2042 I32 firstcp = PL_savestack_ix;
b7953727 2043#endif
53c4c00c 2044 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b
JH
2045#ifdef DEBUGGING
2046 SV *dsv0 = sv_2mortal(newSVpvn("", 0));
2047 SV *dsv1 = sv_2mortal(newSVpvn("", 0));
2048 SV *dsv2 = sv_2mortal(newSVpvn("", 0));
2049#endif
02db2b7b 2050
d6a28714
JH
2051#ifdef DEBUGGING
2052 PL_regindent++;
2053#endif
2054
2055 /* Note that nextchr is a byte even in UTF */
2056 nextchr = UCHARAT(locinput);
2057 scan = prog;
2058 while (scan != NULL) {
8ba1375e 2059
2a782b5b 2060 DEBUG_r( {
d6a28714
JH
2061 SV *prop = sv_newmortal();
2062 int docolor = *PL_colors[0];
2063 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2064 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2065 /* The part of the string before starttry has one color
2066 (pref0_len chars), between starttry and current
2067 position another one (pref_len - pref0_len chars),
2068 after the current position the third one.
2069 We assume that pref0_len <= pref_len, otherwise we
2070 decrease pref0_len. */
9041c2e3 2071 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2072 ? (5 + taill) - l : locinput - PL_bostr;
2073 int pref0_len;
d6a28714 2074
df1ffd02 2075 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2076 pref_len++;
2077 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2078 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2079 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2080 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2081 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2082 l--;
d6a28714
JH
2083 if (pref0_len < 0)
2084 pref0_len = 0;
2085 if (pref0_len > pref_len)
2086 pref0_len = pref_len;
2087 regprop(prop, scan);
2a782b5b
JH
2088 {
2089 char *s0 =
df1ffd02 2090 do_utf8 ?
2a782b5b
JH
2091 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2092 pref0_len, 60, 0) :
2093 locinput - pref_len;
df1ffd02
JH
2094 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2095 char *s1 = do_utf8 ?
2a782b5b
JH
2096 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2097 pref_len - pref0_len, 60, 0) :
2098 locinput - pref_len + pref0_len;
df1ffd02
JH
2099 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2100 char *s2 = do_utf8 ?
2a782b5b
JH
2101 pv_uni_display(dsv2, (U8*)locinput,
2102 PL_regeol - locinput, 60, 0) :
2103 locinput;
df1ffd02 2104 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2105 PerlIO_printf(Perl_debug_log,
2106 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2107 (IV)(locinput - PL_bostr),
2108 PL_colors[4],
2109 len0, s0,
2110 PL_colors[5],
2111 PL_colors[2],
2112 len1, s1,
2113 PL_colors[3],
2114 (docolor ? "" : "> <"),
2115 PL_colors[0],
2116 len2, s2,
2117 PL_colors[1],
2118 15 - l - pref_len + 1,
2119 "",
2120 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2121 SvPVX(prop));
2122 }
2123 });
d6a28714
JH
2124
2125 next = scan + NEXT_OFF(scan);
2126 if (next == scan)
2127 next = NULL;
2128
2129 switch (OP(scan)) {
2130 case BOL:
12d33761
HS
2131 if (locinput == PL_bostr || (PL_multiline &&
2132 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2133 {
2134 /* regtill = regbol; */
b8c5462f
JH
2135 break;
2136 }
d6a28714
JH
2137 sayNO;
2138 case MBOL:
12d33761
HS
2139 if (locinput == PL_bostr ||
2140 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2141 {
b8c5462f
JH
2142 break;
2143 }
d6a28714
JH
2144 sayNO;
2145 case SBOL:
c2a73568 2146 if (locinput == PL_bostr)
b8c5462f 2147 break;
d6a28714
JH
2148 sayNO;
2149 case GPOS:
2150 if (locinput == PL_reg_ganch)
2151 break;
2152 sayNO;
2153 case EOL:
2154 if (PL_multiline)
2155 goto meol;
2156 else
2157 goto seol;
2158 case MEOL:
2159 meol:
2160 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2161 sayNO;
b8c5462f 2162 break;
d6a28714
JH
2163 case SEOL:
2164 seol:
2165 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2166 sayNO;
d6a28714 2167 if (PL_regeol - locinput > 1)
b8c5462f 2168 sayNO;
b8c5462f 2169 break;
d6a28714
JH
2170 case EOS:
2171 if (PL_regeol != locinput)
b8c5462f 2172 sayNO;
d6a28714 2173 break;
ffc61ed2 2174 case SANY:
d6a28714 2175 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2176 sayNO;
f33976b4
DB
2177 if (do_utf8) {
2178 locinput += PL_utf8skip[nextchr];
2179 if (locinput > PL_regeol)
2180 sayNO;
2181 nextchr = UCHARAT(locinput);
2182 }
2183 else
2184 nextchr = UCHARAT(++locinput);
2185 break;
2186 case CANY:
2187 if (!nextchr && locinput >= PL_regeol)
2188 sayNO;
b8c5462f 2189 nextchr = UCHARAT(++locinput);
a0d0e21e 2190 break;
ffc61ed2 2191 case REG_ANY:
1aa99e6b
IH
2192 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2193 sayNO;
2194 if (do_utf8) {
b8c5462f 2195 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2196 if (locinput > PL_regeol)
2197 sayNO;
a0ed51b3 2198 nextchr = UCHARAT(locinput);
a0ed51b3 2199 }
1aa99e6b
IH
2200 else
2201 nextchr = UCHARAT(++locinput);
a0ed51b3 2202 break;
d6a28714 2203 case EXACT:
cd439c50
IZ
2204 s = STRING(scan);
2205 ln = STR_LEN(scan);
1aa99e6b 2206 if (do_utf8 != (UTF!=0)) {
5ff6fc6d 2207 /* The target and the pattern have differing "utf8ness". */
1aa99e6b
IH
2208 char *l = locinput;
2209 char *e = s + ln;
2210 STRLEN len;
a72c7584 2211
5ff6fc6d
JH
2212 if (do_utf8) {
2213 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2214 while (s < e) {
2215 if (l >= PL_regeol)
5ff6fc6d
JH
2216 sayNO;
2217 if (NATIVE_TO_UNI(*(U8*)s) !=
2218 utf8_to_uvchr((U8*)l, &len))
2219 sayNO;
2220 l += len;
2221 s ++;
1aa99e6b 2222 }
5ff6fc6d
JH
2223 }
2224 else {
2225 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2226 while (s < e) {
2227 if (l >= PL_regeol)
2228 sayNO;
5ff6fc6d
JH
2229 if (NATIVE_TO_UNI(*((U8*)l)) !=
2230 utf8_to_uvchr((U8*)s, &len))
1aa99e6b 2231 sayNO;
a72c7584
JH
2232 s += len;
2233 l ++;
1aa99e6b 2234 }
5ff6fc6d 2235 }
1aa99e6b
IH
2236 locinput = l;
2237 nextchr = UCHARAT(locinput);
2238 break;
2239 }
5ff6fc6d 2240 /* The target and the pattern have the same "utf8ness". */
d6a28714
JH
2241 /* Inline the first character, for speed. */
2242 if (UCHARAT(s) != nextchr)
2243 sayNO;
2244 if (PL_regeol - locinput < ln)
2245 sayNO;
2246 if (ln > 1 && memNE(s, locinput, ln))
2247 sayNO;
2248 locinput += ln;
2249 nextchr = UCHARAT(locinput);
2250 break;
2251 case EXACTFL:
b8c5462f
JH
2252 PL_reg_flags |= RF_tainted;
2253 /* FALL THROUGH */
d6a28714 2254 case EXACTF:
cd439c50
IZ
2255 s = STRING(scan);
2256 ln = STR_LEN(scan);
d6a28714 2257
1aa99e6b 2258 if (do_utf8) {
d6a28714 2259 char *l = locinput;
1aa99e6b 2260 char *e;
a2a2844f
JH
2261 STRLEN ulen;
2262 U8 tmpbuf[UTF8_MAXLEN*2+1];
1aa99e6b 2263 e = s + ln;
d6a28714 2264 while (s < e) {
a2a2844f 2265 if (l >= PL_regeol)
d6a28714 2266 sayNO;
a2a2844f 2267 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
7114a2d2 2268 if (memNE(s, (char*)tmpbuf, ulen))
a2a2844f
JH
2269 sayNO;
2270 s += UTF8SKIP(s);
2271 l += ulen;
b8c5462f 2272 }
d6a28714 2273 locinput = l;
a0ed51b3
LW
2274 nextchr = UCHARAT(locinput);
2275 break;
2276 }
d6a28714
JH
2277
2278 /* Inline the first character, for speed. */
2279 if (UCHARAT(s) != nextchr &&
2280 UCHARAT(s) != ((OP(scan) == EXACTF)
2281 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2282 sayNO;
d6a28714 2283 if (PL_regeol - locinput < ln)
b8c5462f 2284 sayNO;
d6a28714
JH
2285 if (ln > 1 && (OP(scan) == EXACTF
2286 ? ibcmp(s, locinput, ln)
2287 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2288 sayNO;
d6a28714
JH
2289 locinput += ln;
2290 nextchr = UCHARAT(locinput);
a0d0e21e 2291 break;
d6a28714 2292 case ANYOF:
ffc61ed2
JH
2293 if (do_utf8) {
2294 if (!reginclass(scan, (U8*)locinput, do_utf8))
2295 sayNO;
2296 if (locinput >= PL_regeol)
2297 sayNO;
2298 locinput += PL_utf8skip[nextchr];
b8c5462f 2299 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2300 }
2301 else {
2302 if (nextchr < 0)
2303 nextchr = UCHARAT(locinput);
2304 if (!reginclass(scan, (U8*)locinput, do_utf8))
2305 sayNO;
2306 if (!nextchr && locinput >= PL_regeol)
2307 sayNO;
2308 nextchr = UCHARAT(++locinput);
2309 }
b8c5462f 2310 break;
d6a28714 2311 case ALNUML:
b8c5462f
JH
2312 PL_reg_flags |= RF_tainted;
2313 /* FALL THROUGH */
d6a28714 2314 case ALNUM:
b8c5462f 2315 if (!nextchr)
4633a7c4 2316 sayNO;
ffc61ed2 2317 if (do_utf8) {
ad24be35 2318 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2319 if (!(OP(scan) == ALNUM
3568d838 2320 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2321 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2322 {
2323 sayNO;
a0ed51b3 2324 }
b8c5462f 2325 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2326 nextchr = UCHARAT(locinput);
2327 break;
2328 }
ffc61ed2 2329 if (!(OP(scan) == ALNUM
d6a28714 2330 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2331 sayNO;
b8c5462f 2332 nextchr = UCHARAT(++locinput);
a0d0e21e 2333 break;
d6a28714 2334 case NALNUML:
b8c5462f
JH
2335 PL_reg_flags |= RF_tainted;
2336 /* FALL THROUGH */
d6a28714
JH
2337 case NALNUM:
2338 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2339 sayNO;
ffc61ed2 2340 if (do_utf8) {
8269fa76 2341 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2342 if (OP(scan) == NALNUM
3568d838 2343 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2344 : isALNUM_LC_utf8((U8*)locinput))
2345 {
b8c5462f 2346 sayNO;
d6a28714 2347 }
b8c5462f
JH
2348 locinput += PL_utf8skip[nextchr];
2349 nextchr = UCHARAT(locinput);
2350 break;
2351 }
ffc61ed2 2352 if (OP(scan) == NALNUM
d6a28714 2353 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2354 sayNO;
76e3520e 2355 nextchr = UCHARAT(++locinput);
a0d0e21e 2356 break;
d6a28714
JH
2357 case BOUNDL:
2358 case NBOUNDL:
3280af22 2359 PL_reg_flags |= RF_tainted;
bbce6d69 2360 /* FALL THROUGH */
d6a28714
JH
2361 case BOUND:
2362 case NBOUND:
2363 /* was last char in word? */
ffc61ed2 2364 if (do_utf8) {
12d33761
HS
2365 if (locinput == PL_bostr)
2366 ln = '\n';
ffc61ed2
JH
2367 else {
2368 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2369
2b9d42f0 2370 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2371 }
2372 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2373 ln = isALNUM_uni(ln);
8269fa76 2374 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2375 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2376 }
2377 else {
9041c2e3 2378 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2379 n = isALNUM_LC_utf8((U8*)locinput);
2380 }
a0ed51b3 2381 }
d6a28714 2382 else {
12d33761
HS
2383 ln = (locinput != PL_bostr) ?
2384 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2385 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2386 ln = isALNUM(ln);
2387 n = isALNUM(nextchr);
2388 }
2389 else {
2390 ln = isALNUM_LC(ln);
2391 n = isALNUM_LC(nextchr);
2392 }
d6a28714 2393 }
ffc61ed2
JH
2394 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2395 OP(scan) == BOUNDL))
2396 sayNO;
a0ed51b3 2397 break;
d6a28714 2398 case SPACEL:
3280af22 2399 PL_reg_flags |= RF_tainted;
bbce6d69 2400 /* FALL THROUGH */
d6a28714 2401 case SPACE:
9442cb0e 2402 if (!nextchr)
4633a7c4 2403 sayNO;
1aa99e6b 2404 if (do_utf8) {
fd400ab9 2405 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2406 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2407 if (!(OP(scan) == SPACE
3568d838 2408 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2409 : isSPACE_LC_utf8((U8*)locinput)))
2410 {
2411 sayNO;
2412 }
2413 locinput += PL_utf8skip[nextchr];
2414 nextchr = UCHARAT(locinput);
2415 break;
d6a28714 2416 }
ffc61ed2
JH
2417 if (!(OP(scan) == SPACE
2418 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2419 sayNO;
2420 nextchr = UCHARAT(++locinput);
2421 }
2422 else {
2423 if (!(OP(scan) == SPACE
2424 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2425 sayNO;
2426 nextchr = UCHARAT(++locinput);
a0ed51b3 2427 }
a0ed51b3 2428 break;
d6a28714 2429 case NSPACEL:
3280af22 2430 PL_reg_flags |= RF_tainted;
bbce6d69 2431 /* FALL THROUGH */
d6a28714 2432 case NSPACE:
9442cb0e 2433 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2434 sayNO;
1aa99e6b 2435 if (do_utf8) {
8269fa76 2436 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2437 if (OP(scan) == NSPACE
3568d838 2438 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2439 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2440 {
2441 sayNO;
2442 }
2443 locinput += PL_utf8skip[nextchr];
2444 nextchr = UCHARAT(locinput);
2445 break;
a0ed51b3 2446 }
ffc61ed2 2447 if (OP(scan) == NSPACE
d6a28714 2448 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2449 sayNO;
76e3520e 2450 nextchr = UCHARAT(++locinput);
a0d0e21e 2451 break;
d6a28714 2452 case DIGITL:
a0ed51b3
LW
2453 PL_reg_flags |= RF_tainted;
2454 /* FALL THROUGH */
d6a28714 2455 case DIGIT:
9442cb0e 2456 if (!nextchr)
a0ed51b3 2457 sayNO;
1aa99e6b 2458 if (do_utf8) {
8269fa76 2459 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2460 if (!(OP(scan) == DIGIT
3568d838 2461 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2462 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2463 {
a0ed51b3 2464 sayNO;
dfe13c55 2465 }
6f06b55f 2466 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2467 nextchr = UCHARAT(locinput);
2468 break;
2469 }
ffc61ed2 2470 if (!(OP(scan) == DIGIT
9442cb0e 2471 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2472 sayNO;
2473 nextchr = UCHARAT(++locinput);
2474 break;
d6a28714 2475 case NDIGITL:
b8c5462f
JH
2476 PL_reg_flags |= RF_tainted;
2477 /* FALL THROUGH */
d6a28714 2478 case NDIGIT:
9442cb0e 2479 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2480 sayNO;
1aa99e6b 2481 if (do_utf8) {
8269fa76 2482 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2483 if (OP(scan) == NDIGIT
3568d838 2484 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2485 : isDIGIT_LC_utf8((U8*)locinput))
2486 {
a0ed51b3 2487 sayNO;
9442cb0e 2488 }
6f06b55f 2489 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2490 nextchr = UCHARAT(locinput);
2491 break;
2492 }
ffc61ed2 2493 if (OP(scan) == NDIGIT
9442cb0e 2494 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2495 sayNO;
2496 nextchr = UCHARAT(++locinput);
2497 break;
2498 case CLUMP:
8269fa76 2499 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2500 if (locinput >= PL_regeol ||
2501 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2502 sayNO;
6f06b55f 2503 locinput += PL_utf8skip[nextchr];
3568d838
JH
2504 while (locinput < PL_regeol &&
2505 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2506 locinput += UTF8SKIP(locinput);
2507 if (locinput > PL_regeol)
2508 sayNO;
2509 nextchr = UCHARAT(locinput);
2510 break;
c8756f30 2511 case REFFL:
3280af22 2512 PL_reg_flags |= RF_tainted;
c8756f30 2513 /* FALL THROUGH */
c277df42 2514 case REF:
c8756f30 2515 case REFF:
c277df42 2516 n = ARG(scan); /* which paren pair */
cf93c79d 2517 ln = PL_regstartp[n];
2c2d71f5 2518 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2519 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2520 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2521 if (ln == PL_regendp[n])
a0d0e21e 2522 break;
a0ed51b3 2523
cf93c79d 2524 s = PL_bostr + ln;
1aa99e6b 2525 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2526 char *l = locinput;
cf93c79d 2527 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2528 /*
2529 * Note that we can't do the "other character" lookup trick as
2530 * in the 8-bit case (no pun intended) because in Unicode we
2531 * have to map both upper and title case to lower case.
2532 */
2533 if (OP(scan) == REFF) {
a2a2844f
JH
2534 STRLEN ulen1, ulen2;
2535 U8 tmpbuf1[UTF8_MAXLEN*2+1];
2536 U8 tmpbuf2[UTF8_MAXLEN*2+1];
a0ed51b3
LW
2537 while (s < e) {
2538 if (l >= PL_regeol)
2539 sayNO;
a2a2844f
JH
2540 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2541 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2542 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2543 sayNO;
a2a2844f
JH
2544 s += ulen1;
2545 l += ulen2;
a0ed51b3
LW
2546 }
2547 }
2548 locinput = l;
2549 nextchr = UCHARAT(locinput);
2550 break;
2551 }
2552
a0d0e21e 2553 /* Inline the first character, for speed. */
76e3520e 2554 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2555 (OP(scan) == REF ||
2556 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2557 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2558 sayNO;
cf93c79d 2559 ln = PL_regendp[n] - ln;
3280af22 2560 if (locinput + ln > PL_regeol)
4633a7c4 2561 sayNO;
c8756f30
AK
2562 if (ln > 1 && (OP(scan) == REF
2563 ? memNE(s, locinput, ln)
2564 : (OP(scan) == REFF
2565 ? ibcmp(s, locinput, ln)
2566 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2567 sayNO;
a0d0e21e 2568 locinput += ln;
76e3520e 2569 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2570 break;
2571
2572 case NOTHING:
c277df42 2573 case TAIL:
a0d0e21e
LW
2574 break;
2575 case BACK:
2576 break;
c277df42
IZ
2577 case EVAL:
2578 {
2579 dSP;
533c011a 2580 OP_4tree *oop = PL_op;
3280af22
NIS
2581 COP *ocurcop = PL_curcop;
2582 SV **ocurpad = PL_curpad;
c277df42 2583 SV *ret;
9041c2e3 2584
c277df42 2585 n = ARG(scan);
533c011a 2586 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2587 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2588 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2589 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2590
8e5e9ebe
RGS
2591 {
2592 SV **before = SP;
2593 CALLRUNOPS(aTHX); /* Scalar context. */
2594 SPAGAIN;
2595 if (SP == before)
2596 ret = Nullsv; /* protect against empty (?{}) blocks. */
2597 else {
2598 ret = POPs;
2599 PUTBACK;
2600 }
2601 }
2602
0f5d15d6
IZ
2603 PL_op = oop;
2604 PL_curpad = ocurpad;
2605 PL_curcop = ocurcop;
c277df42 2606 if (logical) {
0f5d15d6
IZ
2607 if (logical == 2) { /* Postponed subexpression. */
2608 regexp *re;
22c35a8c 2609 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2610 re_cc_state state;
0f5d15d6
IZ
2611 CHECKPOINT cp, lastcp;
2612
2613 if(SvROK(ret) || SvRMAGICAL(ret)) {
2614 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2615
2616 if(SvMAGICAL(sv))
14befaf4 2617 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2618 }
2619 if (mg) {
2620 re = (regexp *)mg->mg_obj;
df0003d4 2621 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2622 }
2623 else {
2624 STRLEN len;
2625 char *t = SvPV(ret, len);
2626 PMOP pm;
2627 char *oprecomp = PL_regprecomp;
2628 I32 osize = PL_regsize;
2629 I32 onpar = PL_regnpar;
2630
5fcd1c1b 2631 Zero(&pm, 1, PMOP);
cea2e8a9 2632 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2633 if (!(SvFLAGS(ret)
0f5d15d6 2634 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2635 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2636 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2637 PL_regprecomp = oprecomp;
2638 PL_regsize = osize;
2639 PL_regnpar = onpar;
2640 }
2641 DEBUG_r(
9041c2e3 2642 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2643 "Entering embedded `%s%.60s%s%s'\n",
2644 PL_colors[0],
2645 re->precomp,
2646 PL_colors[1],
2647 (strlen(re->precomp) > 60 ? "..." : ""))
2648 );
2649 state.node = next;
2650 state.prev = PL_reg_call_cc;
2651 state.cc = PL_regcc;
2652 state.re = PL_reg_re;
2653
2ab05381 2654 PL_regcc = 0;
9041c2e3 2655
0f5d15d6 2656 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2657 REGCP_SET(lastcp);
0f5d15d6
IZ
2658 cache_re(re);
2659 state.ss = PL_savestack_ix;
2660 *PL_reglastparen = 0;
a01268b5 2661 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2662 PL_reg_call_cc = &state;
2663 PL_reginput = locinput;
2c2d71f5
JH
2664
2665 /* XXXX This is too dramatic a measure... */
2666 PL_reg_maxiter = 0;
2667
0f5d15d6 2668 if (regmatch(re->program + 1)) {
2c914db6
IZ
2669 /* Even though we succeeded, we need to restore
2670 global variables, since we may be wrapped inside
2671 SUSPEND, thus the match may be not finished yet. */
2672
2673 /* XXXX Do this only if SUSPENDed? */
2674 PL_reg_call_cc = state.prev;
2675 PL_regcc = state.cc;
2676 PL_reg_re = state.re;
2677 cache_re(PL_reg_re);
2678
2679 /* XXXX This is too dramatic a measure... */
2680 PL_reg_maxiter = 0;
2681
2682 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2683 ReREFCNT_dec(re);
2684 regcpblow(cp);
2685 sayYES;
2686 }
0f5d15d6 2687 ReREFCNT_dec(re);
02db2b7b 2688 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2689 regcppop();
2690 PL_reg_call_cc = state.prev;
2691 PL_regcc = state.cc;
2692 PL_reg_re = state.re;
d3790889 2693 cache_re(PL_reg_re);
2c2d71f5
JH
2694
2695 /* XXXX This is too dramatic a measure... */
2696 PL_reg_maxiter = 0;
2697
8e514ae6 2698 logical = 0;
0f5d15d6
IZ
2699 sayNO;
2700 }
c277df42 2701 sw = SvTRUE(ret);
0f5d15d6 2702 logical = 0;
a0ed51b3
LW
2703 }
2704 else
3280af22 2705 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2706 break;
2707 }
a0d0e21e 2708 case OPEN:
c277df42 2709 n = ARG(scan); /* which paren pair */
3280af22
NIS
2710 PL_reg_start_tmp[n] = locinput;
2711 if (n > PL_regsize)
2712 PL_regsize = n;
a0d0e21e
LW
2713 break;
2714 case CLOSE:
c277df42 2715 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2716 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2717 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2718 if (n > *PL_reglastparen)
2719 *PL_reglastparen = n;
a01268b5 2720 *PL_reglastcloseparen = n;
a0d0e21e 2721 break;
c277df42
IZ
2722 case GROUPP:
2723 n = ARG(scan); /* which paren pair */
cf93c79d 2724 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2725 break;
2726 case IFTHEN:
2c2d71f5 2727 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2728 if (sw)
2729 next = NEXTOPER(NEXTOPER(scan));
2730 else {
2731 next = scan + ARG(scan);
2732 if (OP(next) == IFTHEN) /* Fake one. */
2733 next = NEXTOPER(NEXTOPER(next));
2734 }
2735 break;
2736 case LOGICAL:
0f5d15d6 2737 logical = scan->flags;
c277df42 2738 break;
2ab05381
IZ
2739/*******************************************************************
2740 PL_regcc contains infoblock about the innermost (...)* loop, and
2741 a pointer to the next outer infoblock.
2742
2743 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2744
2745 1) After matching X, regnode for CURLYX is processed;
2746
9041c2e3 2747 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2748 regmatch() recursively with the starting point at WHILEM node;
2749
2750 3) Each hit of WHILEM node tries to match A and Z (in the order
2751 depending on the current iteration, min/max of {min,max} and
2752 greediness). The information about where are nodes for "A"
2753 and "Z" is read from the infoblock, as is info on how many times "A"
2754 was already matched, and greediness.
2755
2756 4) After A matches, the same WHILEM node is hit again.
2757
2758 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2759 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2760 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2761 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2762 of the external loop.
2763
2764 Currently present infoblocks form a tree with a stem formed by PL_curcc
2765 and whatever it mentions via ->next, and additional attached trees
2766 corresponding to temporarily unset infoblocks as in "5" above.
2767
9041c2e3 2768 In the following picture infoblocks for outer loop of
2ab05381
IZ
2769 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2770 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2771 infoblocks are drawn below the "reset" infoblock.
2772
2773 In fact in the picture below we do not show failed matches for Z and T
2774 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2775 more obvious *why* one needs to *temporary* unset infoblocks.]
2776
2777 Matched REx position InfoBlocks Comment
2778 (Y(A)*?Z)*?T x
2779 Y(A)*?Z)*?T x <- O
2780 Y (A)*?Z)*?T x <- O
2781 Y A)*?Z)*?T x <- O <- I
2782 YA )*?Z)*?T x <- O <- I
2783 YA A)*?Z)*?T x <- O <- I
2784 YAA )*?Z)*?T x <- O <- I
2785 YAA Z)*?T x <- O # Temporary unset I
2786 I
2787
2788 YAAZ Y(A)*?Z)*?T x <- O
2789 I
2790
2791 YAAZY (A)*?Z)*?T x <- O
2792 I
2793
2794 YAAZY A)*?Z)*?T x <- O <- I
2795 I
2796
2797 YAAZYA )*?Z)*?T x <- O <- I
2798 I
2799
2800 YAAZYA Z)*?T x <- O # Temporary unset I
2801 I,I
2802
2803 YAAZYAZ )*?T x <- O
2804 I,I
2805
2806 YAAZYAZ T x # Temporary unset O
2807 O
2808 I,I
2809
2810 YAAZYAZT x
2811 O
2812 I,I
2813 *******************************************************************/
a0d0e21e
LW
2814 case CURLYX: {
2815 CURCUR cc;
3280af22 2816 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2817 /* No need to save/restore up to this paren */
2818 I32 parenfloor = scan->flags;
c277df42
IZ
2819
2820 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2821 next += ARG(next);
3280af22
NIS
2822 cc.oldcc = PL_regcc;
2823 PL_regcc = &cc;
cb434fcc
IZ
2824 /* XXXX Probably it is better to teach regpush to support
2825 parenfloor > PL_regsize... */
2826 if (parenfloor > *PL_reglastparen)
2827 parenfloor = *PL_reglastparen; /* Pessimization... */
2828 cc.parenfloor = parenfloor;
a0d0e21e
LW
2829 cc.cur = -1;
2830 cc.min = ARG1(scan);
2831 cc.max = ARG2(scan);
c277df42 2832 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2833 cc.next = next;
2834 cc.minmod = minmod;
2835 cc.lastloc = 0;
3280af22 2836 PL_reginput = locinput;
a0d0e21e
LW
2837 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2838 regcpblow(cp);
3280af22 2839 PL_regcc = cc.oldcc;
4633a7c4 2840 saySAME(n);
a0d0e21e
LW
2841 }
2842 /* NOT REACHED */
2843 case WHILEM: {
2844 /*
2845 * This is really hard to understand, because after we match
2846 * what we're trying to match, we must make sure the rest of
2c2d71f5 2847 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2848 * to go back UP the parse tree by recursing ever deeper. And
2849 * if it fails, we have to reset our parent's current state
2850 * that we can try again after backing off.
2851 */
2852
c277df42 2853 CHECKPOINT cp, lastcp;
3280af22 2854 CURCUR* cc = PL_regcc;
c277df42
IZ
2855 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2856
4633a7c4 2857 n = cc->cur + 1; /* how many we know we matched */
3280af22 2858 PL_reginput = locinput;
a0d0e21e 2859
c277df42 2860 DEBUG_r(
9041c2e3
NIS
2861 PerlIO_printf(Perl_debug_log,
2862 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2863 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2864 (long)n, (long)cc->min,
c277df42
IZ
2865 (long)cc->max, (long)cc)
2866 );
4633a7c4 2867
a0d0e21e
LW
2868 /* If degenerate scan matches "", assume scan done. */
2869
579cf2c3 2870 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2871 PL_regcc = cc->oldcc;
2ab05381
IZ
2872 if (PL_regcc)
2873 ln = PL_regcc->cur;
c277df42 2874 DEBUG_r(
c3464db5
DD
2875 PerlIO_printf(Perl_debug_log,
2876 "%*s empty match detected, try continuation...\n",
3280af22 2877 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2878 );
a0d0e21e 2879 if (regmatch(cc->next))
4633a7c4 2880 sayYES;
2ab05381
IZ
2881 if (PL_regcc)
2882 PL_regcc->cur = ln;
3280af22 2883 PL_regcc = cc;
4633a7c4 2884 sayNO;
a0d0e21e
LW
2885 }
2886
2887 /* First just match a string of min scans. */
2888
2889 if (n < cc->min) {
2890 cc->cur = n;
2891 cc->lastloc = locinput;
4633a7c4
LW
2892 if (regmatch(cc->scan))
2893 sayYES;
2894 cc->cur = n - 1;
c277df42 2895 cc->lastloc = lastloc;
4633a7c4 2896 sayNO;
a0d0e21e
LW
2897 }
2898
2c2d71f5
JH
2899 if (scan->flags) {
2900 /* Check whether we already were at this position.
2901 Postpone detection until we know the match is not
2902 *that* much linear. */
2903 if (!PL_reg_maxiter) {
2904 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2905 PL_reg_leftiter = PL_reg_maxiter;
2906 }
2907 if (PL_reg_leftiter-- == 0) {
2908 I32 size = (PL_reg_maxiter + 7)/8;
2909 if (PL_reg_poscache) {
2910 if (PL_reg_poscache_size < size) {
2911 Renew(PL_reg_poscache, size, char);
2912 PL_reg_poscache_size = size;
2913 }
2914 Zero(PL_reg_poscache, size, char);
2915 }
2916 else {
2917 PL_reg_poscache_size = size;
2918 Newz(29, PL_reg_poscache, size, char);
2919 }
2920 DEBUG_r(
2921 PerlIO_printf(Perl_debug_log,
2922 "%sDetected a super-linear match, switching on caching%s...\n",
2923 PL_colors[4], PL_colors[5])
2924 );
2925 }
2926 if (PL_reg_leftiter < 0) {
2927 I32 o = locinput - PL_bostr, b;
2928
2929 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2930 b = o % 8;
2931 o /= 8;
2932 if (PL_reg_poscache[o] & (1<<b)) {
2933 DEBUG_r(
2934 PerlIO_printf(Perl_debug_log,
2935 "%*s already tried at this position...\n",
2936 REPORT_CODE_OFF+PL_regindent*2, "")
2937 );
7821416a 2938 sayNO_SILENT;
2c2d71f5
JH
2939 }
2940 PL_reg_poscache[o] |= (1<<b);
2941 }
2942 }
2943
a0d0e21e
LW
2944 /* Prefer next over scan for minimal matching. */
2945
2946 if (cc->minmod) {
3280af22 2947 PL_regcc = cc->oldcc;
2ab05381
IZ
2948 if (PL_regcc)
2949 ln = PL_regcc->cur;
5f05dabc 2950 cp = regcppush(cc->parenfloor);
02db2b7b 2951 REGCP_SET(lastcp);
5f05dabc 2952 if (regmatch(cc->next)) {
c277df42 2953 regcpblow(cp);
4633a7c4 2954 sayYES; /* All done. */
5f05dabc 2955 }
02db2b7b 2956 REGCP_UNWIND(lastcp);
5f05dabc 2957 regcppop();
2ab05381
IZ
2958 if (PL_regcc)
2959 PL_regcc->cur = ln;
3280af22 2960 PL_regcc = cc;
a0d0e21e 2961
c277df42 2962 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2963 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2964 && !(PL_reg_flags & RF_warned)) {
2965 PL_reg_flags |= RF_warned;
e476b1b5 2966 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2967 "Complex regular subexpression recursion",
2968 REG_INFTY - 1);
c277df42 2969 }
4633a7c4 2970 sayNO;
c277df42 2971 }
a687059c 2972
c277df42 2973 DEBUG_r(
c3464db5
DD
2974 PerlIO_printf(Perl_debug_log,
2975 "%*s trying longer...\n",
3280af22 2976 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2977 );
a0d0e21e 2978 /* Try scanning more and see if it helps. */
3280af22 2979 PL_reginput = locinput;
a0d0e21e
LW
2980 cc->cur = n;
2981 cc->lastloc = locinput;
5f05dabc 2982 cp = regcppush(cc->parenfloor);
02db2b7b 2983 REGCP_SET(lastcp);
5f05dabc 2984 if (regmatch(cc->scan)) {
c277df42 2985 regcpblow(cp);
4633a7c4 2986 sayYES;
5f05dabc 2987 }
02db2b7b 2988 REGCP_UNWIND(lastcp);
5f05dabc 2989 regcppop();
4633a7c4 2990 cc->cur = n - 1;
c277df42 2991 cc->lastloc = lastloc;
4633a7c4 2992 sayNO;
a0d0e21e
LW
2993 }
2994
2995 /* Prefer scan over next for maximal matching. */
2996
2997 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2998 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2999 cc->cur = n;
3000 cc->lastloc = locinput;
02db2b7b 3001 REGCP_SET(lastcp);
5f05dabc 3002 if (regmatch(cc->scan)) {
c277df42 3003 regcpblow(cp);
4633a7c4 3004 sayYES;
5f05dabc 3005 }
02db2b7b 3006 REGCP_UNWIND(lastcp);
a0d0e21e 3007 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3008 PL_reginput = locinput;
c277df42 3009 DEBUG_r(
c3464db5
DD
3010 PerlIO_printf(Perl_debug_log,
3011 "%*s failed, try continuation...\n",
3280af22 3012 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3013 );
3014 }
9041c2e3 3015 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3016 && !(PL_reg_flags & RF_warned)) {
3280af22 3017 PL_reg_flags |= RF_warned;
e476b1b5 3018 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3019 "Complex regular subexpression recursion",
3020 REG_INFTY - 1);
a0d0e21e
LW
3021 }
3022
3023 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3024 PL_regcc = cc->oldcc;
2ab05381
IZ
3025 if (PL_regcc)
3026 ln = PL_regcc->cur;
a0d0e21e 3027 if (regmatch(cc->next))
4633a7c4 3028 sayYES;
2ab05381
IZ
3029 if (PL_regcc)
3030 PL_regcc->cur = ln;
3280af22 3031 PL_regcc = cc;
4633a7c4 3032 cc->cur = n - 1;
c277df42 3033 cc->lastloc = lastloc;
4633a7c4 3034 sayNO;
a0d0e21e
LW
3035 }
3036 /* NOT REACHED */
9041c2e3 3037 case BRANCHJ:
c277df42
IZ
3038 next = scan + ARG(scan);
3039 if (next == scan)
3040 next = NULL;
3041 inner = NEXTOPER(NEXTOPER(scan));
3042 goto do_branch;
9041c2e3 3043 case BRANCH:
c277df42
IZ
3044 inner = NEXTOPER(scan);
3045 do_branch:
3046 {
c277df42
IZ
3047 c1 = OP(scan);
3048 if (OP(next) != c1) /* No choice. */
3049 next = inner; /* Avoid recursion. */
a0d0e21e 3050 else {
02db2b7b
IZ
3051 I32 lastparen = *PL_reglastparen;
3052 I32 unwind1;
3053 re_unwind_branch_t *uw;
3054
3055 /* Put unwinding data on stack */
3056 unwind1 = SSNEWt(1,re_unwind_branch_t);
3057 uw = SSPTRt(unwind1,re_unwind_branch_t);
3058 uw->prev = unwind;
3059 unwind = unwind1;
3060 uw->type = ((c1 == BRANCH)
3061 ? RE_UNWIND_BRANCH
3062 : RE_UNWIND_BRANCHJ);
3063 uw->lastparen = lastparen;
3064 uw->next = next;
3065 uw->locinput = locinput;
3066 uw->nextchr = nextchr;
3067#ifdef DEBUGGING
3068 uw->regindent = ++PL_regindent;
3069#endif
c277df42 3070
02db2b7b
IZ
3071 REGCP_SET(uw->lastcp);
3072
3073 /* Now go into the first branch */
3074 next = inner;
a687059c 3075 }
a0d0e21e
LW
3076 }
3077 break;
3078 case MINMOD:
3079 minmod = 1;
3080 break;
c277df42
IZ
3081 case CURLYM:
3082 {
00db4c45 3083 I32 l = 0;
c277df42 3084 CHECKPOINT lastcp;
9041c2e3 3085
c277df42
IZ
3086 /* We suppose that the next guy does not need
3087 backtracking: in particular, it is of constant length,
3088 and has no parenths to influence future backrefs. */
3089 ln = ARG1(scan); /* min to match */
3090 n = ARG2(scan); /* max to match */
c277df42
IZ
3091 paren = scan->flags;
3092 if (paren) {
3280af22
NIS
3093 if (paren > PL_regsize)
3094 PL_regsize = paren;
3095 if (paren > *PL_reglastparen)
3096 *PL_reglastparen = paren;
c277df42 3097 }
dc45a647 3098 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3099 if (paren)
3100 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3101 PL_reginput = locinput;
c277df42
IZ
3102 if (minmod) {
3103 minmod = 0;
3104 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3105 sayNO;
f31a99c8
HS
3106 /* if we matched something zero-length we don't need to
3107 backtrack - capturing parens are already defined, so
3108 the caveat in the maximal case doesn't apply
3109
3110 XXXX if ln == 0, we can redo this check first time
3111 through the following loop
3112 */
3113 if (ln && l == 0)
3114 n = ln; /* don't backtrack */
3280af22 3115 locinput = PL_reginput;
e2d8ce26 3116 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3117 regnode *text_node = next;
3118
3119 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3120 NEXT_IMPT(text_node);
5f80c4cf
JP
3121
3122 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3123 c1 = c2 = -1000;
3124 }
3125 else {
3126 c1 = (U8)*STRING(text_node);
3127 if (OP(next) == EXACTF)
3128 c2 = PL_fold[c1];
3129 else if (OP(text_node) == EXACTFL)
3130 c2 = PL_fold_locale[c1];
3131 else
3132 c2 = c1;
3133 }
a0ed51b3
LW
3134 }
3135 else
c277df42 3136 c1 = c2 = -1000;
02db2b7b 3137 REGCP_SET(lastcp);
5f4b28b2 3138 /* This may be improved if l == 0. */
c277df42
IZ
3139 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3140 /* If it could work, try it. */
3141 if (c1 == -1000 ||
3280af22
NIS
3142 UCHARAT(PL_reginput) == c1 ||
3143 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3144 {
3145 if (paren) {
f31a99c8 3146 if (ln) {
cf93c79d
IZ
3147 PL_regstartp[paren] =
3148 HOPc(PL_reginput, -l) - PL_bostr;
3149 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3150 }
3151 else
cf93c79d 3152 PL_regendp[paren] = -1;
c277df42
IZ
3153 }
3154 if (regmatch(next))
3155 sayYES;
02db2b7b 3156 REGCP_UNWIND(lastcp);
c277df42
IZ
3157 }
3158 /* Couldn't or didn't -- move forward. */
3280af22 3159 PL_reginput = locinput;
c277df42
IZ
3160 if (regrepeat_hard(scan, 1, &l)) {
3161 ln++;
3280af22 3162 locinput = PL_reginput;
c277df42
IZ
3163 }
3164 else
3165 sayNO;
3166 }
a0ed51b3
LW
3167 }
3168 else {
c277df42 3169 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3170 /* if we matched something zero-length we don't need to
3171 backtrack, unless the minimum count is zero and we
3172 are capturing the result - in that case the capture
3173 being defined or not may affect later execution
3174 */
3175 if (n != 0 && l == 0 && !(paren && ln == 0))
3176 ln = n; /* don't backtrack */
3280af22 3177 locinput = PL_reginput;
c277df42 3178 DEBUG_r(
5c0ca799 3179 PerlIO_printf(Perl_debug_log,
faccc32b 3180 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3181 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3182 (IV) n, (IV)l)
c277df42
IZ
3183 );
3184 if (n >= ln) {
e2d8ce26 3185 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3186 regnode *text_node = next;
3187
3188 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3189 NEXT_IMPT(text_node);
5f80c4cf
JP
3190
3191 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3192 c1 = c2 = -1000;
3193 }
3194 else {
3195 c1 = (U8)*STRING(text_node);
3196 if (OP(text_node) == EXACTF)
3197 c2 = PL_fold[c1];
3198 else if (OP(text_node) == EXACTFL)
3199 c2 = PL_fold_locale[c1];
3200 else
3201 c2 = c1;
3202 }
a0ed51b3
LW
3203 }
3204 else
c277df42
IZ
3205 c1 = c2 = -1000;
3206 }
02db2b7b 3207 REGCP_SET(lastcp);
c277df42
IZ
3208 while (n >= ln) {
3209 /* If it could work, try it. */
3210 if (c1 == -1000 ||
3280af22
NIS
3211 UCHARAT(PL_reginput) == c1 ||
3212 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3213 {
3214 DEBUG_r(
c3464db5 3215 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3216 "%*s trying tail with n=%"IVdf"...\n",
3217 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3218 );
3219 if (paren) {
3220 if (n) {
cf93c79d
IZ
3221 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3222 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3223 }
a0ed51b3 3224 else
cf93c79d 3225 PL_regendp[paren] = -1;
c277df42 3226 }
a0ed51b3
LW
3227 if (regmatch(next))
3228 sayYES;
02db2b7b 3229 REGCP_UNWIND(lastcp);
a0ed51b3 3230 }
c277df42
IZ
3231 /* Couldn't or didn't -- back up. */
3232 n--;
dfe13c55 3233 locinput = HOPc(locinput, -l);
3280af22 3234 PL_reginput = locinput;
c277df42
IZ
3235 }
3236 }
3237 sayNO;
3238 break;
3239 }
3240 case CURLYN:
3241 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3242 if (paren > PL_regsize)
3243 PL_regsize = paren;
3244 if (paren > *PL_reglastparen)
3245 *PL_reglastparen = paren;
c277df42
IZ
3246 ln = ARG1(scan); /* min to match */
3247 n = ARG2(scan); /* max to match */
dc45a647 3248 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3249 goto repeat;
a0d0e21e 3250 case CURLY:
c277df42 3251 paren = 0;
a0d0e21e
LW
3252 ln = ARG1(scan); /* min to match */
3253 n = ARG2(scan); /* max to match */
dc45a647 3254 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3255 goto repeat;
3256 case STAR:
3257 ln = 0;
c277df42 3258 n = REG_INFTY;
a0d0e21e 3259 scan = NEXTOPER(scan);
c277df42 3260 paren = 0;
a0d0e21e
LW
3261 goto repeat;
3262 case PLUS:
c277df42
IZ
3263 ln = 1;
3264 n = REG_INFTY;
3265 scan = NEXTOPER(scan);
3266 paren = 0;
3267 repeat:
a0d0e21e
LW
3268 /*
3269 * Lookahead to avoid useless match attempts
3270 * when we know what character comes next.
3271 */
5f80c4cf
JP
3272
3273 /*
3274 * Used to only do .*x and .*?x, but now it allows
3275 * for )'s, ('s and (?{ ... })'s to be in the way
3276 * of the quantifier and the EXACT-like node. -- japhy
3277 */
3278
e2d8ce26 3279 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3280 U8 *s;
3281 regnode *text_node = next;
3282
3283 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3284 NEXT_IMPT(text_node);
5f80c4cf
JP
3285
3286 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3287 c1 = c2 = -1000;
3288 }
3289 else {
3290 s = (U8*)STRING(text_node);
3291
3292 if (!UTF) {
3293 c2 = c1 = *s;
3294 if (OP(text_node) == EXACTF)
3295 c2 = PL_fold[c1];
3296 else if (OP(text_node) == EXACTFL)
3297 c2 = PL_fold_locale[c1];
1aa99e6b 3298 }
5f80c4cf
JP
3299 else { /* UTF */
3300 if (OP(text_node) == EXACTF) {
a2a2844f
JH
3301 STRLEN ulen1, ulen2;
3302 U8 tmpbuf1[UTF8_MAXLEN*2+1];
3303 U8 tmpbuf2[UTF8_MAXLEN*2+1];
3304
3305 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3306 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3307
3308 c1 = utf8_to_uvuni(tmpbuf1, 0);
3309 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3310 }
3311 else {
3312 c2 = c1 = utf8_to_uvchr(s, NULL);
3313 }
1aa99e6b
IH
3314 }
3315 }
bbce6d69 3316 }
a0d0e21e 3317 else
bbce6d69 3318 c1 = c2 = -1000;
3280af22 3319 PL_reginput = locinput;
a0d0e21e 3320 if (minmod) {
c277df42 3321 CHECKPOINT lastcp;
a0d0e21e
LW
3322 minmod = 0;
3323 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3324 sayNO;
a0ed51b3 3325 locinput = PL_reginput;
02db2b7b 3326 REGCP_SET(lastcp);
0fe9bf95 3327 if (c1 != -1000) {
1aa99e6b 3328 char *e; /* Should not check after this */
0fe9bf95
IZ
3329 char *old = locinput;
3330
1aa99e6b 3331 if (n == REG_INFTY) {
0fe9bf95 3332 e = PL_regeol - 1;
1aa99e6b
IH
3333 if (do_utf8)
3334 while (UTF8_IS_CONTINUATION(*(U8*)e))
3335 e--;
3336 }
3337 else if (do_utf8) {
3338 int m = n - ln;
3339 for (e = locinput;
3340 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3341 e += UTF8SKIP(e);
3342 }
3343 else {
3344 e = locinput + n - ln;
3345 if (e >= PL_regeol)
3346 e = PL_regeol - 1;
3347 }
0fe9bf95 3348 while (1) {
1aa99e6b 3349 int count;
0fe9bf95 3350 /* Find place 'next' could work */
1aa99e6b
IH
3351 if (!do_utf8) {
3352 if (c1 == c2) {
a8e8ab15
JH
3353 while (locinput <= e &&
3354 UCHARAT(locinput) != c1)
1aa99e6b
IH
3355 locinput++;
3356 } else {
9041c2e3 3357 while (locinput <= e
a8e8ab15
JH
3358 && UCHARAT(locinput) != c1
3359 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3360 locinput++;
3361 }
3362 count = locinput - old;
3363 }
3364 else {
3365 STRLEN len;
3366 if (c1 == c2) {
3367 for (count = 0;
3368 locinput <= e &&
9041c2e3 3369 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3370 count++)
3371 locinput += len;
3372
3373 } else {
3374 for (count = 0; locinput <= e; count++) {
9041c2e3 3375 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3376 if (c == c1 || c == c2)
3377 break;
9041c2e3 3378 locinput += len;
1aa99e6b
IH
3379 }
3380 }
0fe9bf95 3381 }
9041c2e3 3382 if (locinput > e)
0fe9bf95
IZ
3383 sayNO;
3384 /* PL_reginput == old now */
3385 if (locinput != old) {
3386 ln = 1; /* Did some */
1aa99e6b 3387 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3388 sayNO;
3389 }
3390 /* PL_reginput == locinput now */
29d1e993 3391 TRYPAREN(paren, ln, locinput);
0fe9bf95 3392 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3393 REGCP_UNWIND(lastcp);
0fe9bf95 3394 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3395 old = locinput;
3396 if (do_utf8)
3397 locinput += UTF8SKIP(locinput);
3398 else
3399 locinput++;
0fe9bf95
IZ
3400 }
3401 }
3402 else
c277df42 3403 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3404 UV c;
3405 if (c1 != -1000) {
3406 if (do_utf8)
9041c2e3 3407 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3408 else
9041c2e3 3409 c = UCHARAT(PL_reginput);
2390ecbc
PP
3410 /* If it could work, try it. */
3411 if (c == c1 || c == c2)
3412 {
3413 TRYPAREN(paren, n, PL_reginput);
3414 REGCP_UNWIND(lastcp);
3415 }
1aa99e6b 3416 }
a0d0e21e 3417 /* If it could work, try it. */
2390ecbc 3418 else if (c1 == -1000)
bbce6d69 3419 {
29d1e993 3420 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3421 REGCP_UNWIND(lastcp);
bbce6d69 3422 }
c277df42 3423 /* Couldn't or didn't -- move forward. */
a0ed51b3 3424 PL_reginput = locinput;
a0d0e21e
LW
3425 if (regrepeat(scan, 1)) {
3426 ln++;
a0ed51b3
LW
3427 locinput = PL_reginput;
3428 }
3429 else
4633a7c4 3430 sayNO;
a0d0e21e
LW
3431 }
3432 }
3433 else {
c277df42 3434 CHECKPOINT lastcp;
a0d0e21e 3435 n = regrepeat(scan, n);
a0ed51b3 3436 locinput = PL_reginput;
22c35a8c 3437 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3438 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3439 ln = n; /* why back off? */
1aeab75a
GS
3440 /* ...because $ and \Z can match before *and* after
3441 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3442 We should back off by one in this case. */
3443 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3444 ln--;
3445 }
02db2b7b 3446 REGCP_SET(lastcp);
c277df42 3447 if (paren) {
8fa7f367 3448 UV c = 0;
c277df42 3449 while (n >= ln) {
1aa99e6b
IH
3450 if (c1 != -1000) {
3451 if (do_utf8)
9041c2e3 3452 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3453 else
9041c2e3 3454 c = UCHARAT(PL_reginput);
1aa99e6b 3455 }
c277df42 3456 /* If it could work, try it. */
1aa99e6b 3457 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3458 {
29d1e993 3459 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3460 REGCP_UNWIND(lastcp);
c277df42
IZ
3461 }
3462 /* Couldn't or didn't -- back up. */
3463 n--;
dfe13c55 3464 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3465 }
a0ed51b3
LW
3466 }
3467 else {
8fa7f367 3468 UV c = 0;
c277df42 3469 while (n >= ln) {
1aa99e6b
IH
3470 if (c1 != -1000) {
3471 if (do_utf8)
9041c2e3 3472 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3473 else
9041c2e3 3474 c = UCHARAT(PL_reginput);
1aa99e6b 3475 }
c277df42 3476 /* If it could work, try it. */
1aa99e6b 3477 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3478 {
29d1e993 3479 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3480 REGCP_UNWIND(lastcp);
c277df42
IZ
3481 }
3482 /* Couldn't or didn't -- back up. */
3483 n--;
dfe13c55 3484 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3485 }
a0d0e21e
LW
3486 }
3487 }
4633a7c4 3488 sayNO;
c277df42 3489 break;
a0d0e21e 3490 case END:
0f5d15d6
IZ
3491 if (PL_reg_call_cc) {
3492 re_cc_state *cur_call_cc = PL_reg_call_cc;
3493 CURCUR *cctmp = PL_regcc;
3494 regexp *re = PL_reg_re;
3495 CHECKPOINT cp, lastcp;
3496
3497 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3498 REGCP_SET(lastcp);
0f5d15d6
IZ
3499 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3500 the caller. */
3501 PL_reginput = locinput; /* Make position available to
3502 the callcc. */
3503 cache_re(PL_reg_call_cc->re);
3504 PL_regcc = PL_reg_call_cc->cc;
3505 PL_reg_call_cc = PL_reg_call_cc->prev;
3506 if (regmatch(cur_call_cc->node)) {
3507 PL_reg_call_cc = cur_call_cc;
3508 regcpblow(cp);
3509 sayYES;
3510 }
02db2b7b 3511 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3512 regcppop();
3513 PL_reg_call_cc = cur_call_cc;
3514 PL_regcc = cctmp;
3515 PL_reg_re = re;
3516 cache_re(re);
3517
3518 DEBUG_r(
3519 PerlIO_printf(Perl_debug_log,
3520 "%*s continuation failed...\n",
3521 REPORT_CODE_OFF+PL_regindent*2, "")
3522 );
7821416a 3523 sayNO_SILENT;
0f5d15d6 3524 }
7821416a
IZ
3525 if (locinput < PL_regtill) {
3526 DEBUG_r(PerlIO_printf(Perl_debug_log,
3527 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3528 PL_colors[4],
3529 (long)(locinput - PL_reg_starttry),
3530 (long)(PL_regtill - PL_reg_starttry),
3531 PL_colors[5]));
3532 sayNO_FINAL; /* Cannot match: too short. */
3533 }
3534 PL_reginput = locinput; /* put where regtry can find it */
3535 sayYES_FINAL; /* Success! */
7e5428c5 3536 case SUCCEED:
3280af22 3537 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3538 sayYES_LOUD; /* Success! */
c277df42
IZ
3539 case SUSPEND:
3540 n = 1;
9fe1d20c 3541 PL_reginput = locinput;
9041c2e3 3542 goto do_ifmatch;
a0d0e21e 3543 case UNLESSM:
c277df42 3544 n = 0;
a0ed51b3 3545 if (scan->flags) {
efb30f32
HS
3546 s = HOPBACKc(locinput, scan->flags);
3547 if (!s)
3548 goto say_yes;
3549 PL_reginput = s;
a0ed51b3
LW
3550 }
3551 else
3552 PL_reginput = locinput;
c277df42
IZ
3553 goto do_ifmatch;
3554 case IFMATCH:
3555 n = 1;
a0ed51b3 3556 if (scan->flags) {
efb30f32
HS
3557 s = HOPBACKc(locinput, scan->flags);
3558 if (!s)
3559 goto say_no;
3560 PL_reginput = s;
a0ed51b3
LW
3561 }
3562 else
3563 PL_reginput = locinput;
3564
c277df42 3565 do_ifmatch:
c277df42
IZ
3566 inner = NEXTOPER(NEXTOPER(scan));
3567 if (regmatch(inner) != n) {
3568 say_no:
3569 if (logical) {
3570 logical = 0;
3571 sw = 0;
3572 goto do_longjump;
a0ed51b3
LW
3573 }
3574 else
c277df42
IZ
3575 sayNO;
3576 }
3577 say_yes:
3578 if (logical) {
3579 logical = 0;
3580 sw = 1;
3581 }
fe44a5e8 3582 if (OP(scan) == SUSPEND) {
3280af22 3583 locinput = PL_reginput;
565764a8 3584 nextchr = UCHARAT(locinput);
fe44a5e8 3585 }
c277df42
IZ
3586 /* FALL THROUGH. */
3587 case LONGJMP:
3588 do_longjump:
3589 next = scan + ARG(scan);
3590 if (next == scan)
3591 next = NULL;
a0d0e21e
LW
3592 break;
3593 default:
b900a521 3594 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3595 PTR2UV(scan), OP(scan));
cea2e8a9 3596 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3597 }
02db2b7b 3598 reenter:
a0d0e21e
LW
3599 scan = next;
3600 }
a687059c 3601
a0d0e21e
LW
3602 /*
3603 * We get here only if there's trouble -- normally "case END" is
3604 * the terminating point.
3605 */
cea2e8a9 3606 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3607 /*NOTREACHED*/
4633a7c4
LW
3608 sayNO;
3609
7821416a
IZ
3610yes_loud:
3611 DEBUG_r(
3612 PerlIO_printf(Perl_debug_log,
3613 "%*s %scould match...%s\n",
3614 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3615 );
3616 goto yes;
3617yes_final:
3618 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3619 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3620yes:
3621#ifdef DEBUGGING
3280af22 3622 PL_regindent--;
4633a7c4 3623#endif
02db2b7b
IZ
3624
3625#if 0 /* Breaks $^R */
3626 if (unwind)
3627 regcpblow(firstcp);
3628#endif
4633a7c4
LW
3629 return 1;
3630
3631no:
7821416a
IZ
3632 DEBUG_r(
3633 PerlIO_printf(Perl_debug_log,
3634 "%*s %sfailed...%s\n",
3635 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3636 );
3637 goto do_no;
3638no_final:
3639do_no:
02db2b7b
IZ
3640 if (unwind) {
3641 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3642
3643 switch (uw->type) {
3644 case RE_UNWIND_BRANCH:
3645 case RE_UNWIND_BRANCHJ:
3646 {
3647 re_unwind_branch_t *uwb = &(uw->branch);
3648 I32 lastparen = uwb->lastparen;
9041c2e3 3649
02db2b7b
IZ
3650 REGCP_UNWIND(uwb->lastcp);
3651 for (n = *PL_reglastparen; n > lastparen; n--)
3652 PL_regendp[n] = -1;
3653 *PL_reglastparen = n;
3654 scan = next = uwb->next;
9041c2e3
NIS
3655 if ( !scan ||
3656 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3657 ? BRANCH : BRANCHJ) ) { /* Failure */
3658 unwind = uwb->prev;
3659#ifdef DEBUGGING
3660 PL_regindent--;
3661#endif
3662 goto do_no;
3663 }
3664 /* Have more choice yet. Reuse the same uwb. */
3665 /*SUPPRESS 560*/
3666 if ((n = (uwb->type == RE_UNWIND_BRANCH
3667 ? NEXT_OFF(next) : ARG(next))))
3668 next += n;
3669 else
3670 next = NULL; /* XXXX Needn't unwinding in this case... */
3671 uwb->next = next;
3672 next = NEXTOPER(scan);
3673 if (uwb->type == RE_UNWIND_BRANCHJ)
3674 next = NEXTOPER(next);
3675 locinput = uwb->locinput;
3676 nextchr = uwb->nextchr;
3677#ifdef DEBUGGING
3678 PL_regindent = uwb->regindent;
3679#endif
3680
3681 goto reenter;
3682 }
3683 /* NOT REACHED */
3684 default:
3685 Perl_croak(aTHX_ "regexp unwind memory corruption");
3686 }
3687 /* NOT REACHED */
3688 }
4633a7c4 3689#ifdef DEBUGGING
3280af22 3690 PL_regindent--;
4633a7c4 3691#endif
a0d0e21e 3692 return 0;
a687059c
LW
3693}
3694
3695/*
3696 - regrepeat - repeatedly match something simple, report how many
3697 */
3698/*
3699 * [This routine now assumes that it will only match on things of length 1.
3700 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3701 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3702 */
76e3520e 3703STATIC I32
cea2e8a9 3704S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3705{
a0d0e21e 3706 register char *scan;
a0d0e21e 3707 register I32 c;
3280af22 3708 register char *loceol = PL_regeol;
a0ed51b3 3709 register I32 hardcount = 0;
53c4c00c 3710 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3711
3280af22 3712 scan = PL_reginput;
c277df42 3713 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3714 loceol = scan + max;
a0d0e21e 3715 switch (OP(p)) {
22c35a8c 3716 case REG_ANY:
1aa99e6b 3717 if (do_utf8) {
ffc61ed2 3718 loceol = PL_regeol;
1aa99e6b 3719 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3720 scan += UTF8SKIP(scan);
3721 hardcount++;
3722 }
3723 } else {
3724 while (scan < loceol && *scan != '\n')
3725 scan++;
a0ed51b3
LW
3726 }
3727 break;
ffc61ed2 3728 case SANY:
3baa4c62 3729 scan = loceol;
a0ed51b3 3730 break;
f33976b4
DB
3731 case CANY:
3732 scan = loceol;
3733 break;
bbce6d69 3734 case EXACT: /* length of string is 1 */
cd439c50 3735 c = (U8)*STRING(p);
bbce6d69 3736 while (scan < loceol && UCHARAT(scan) == c)
3737 scan++;
3738 break;
3739 case EXACTF: /* length of string is 1 */
cd439c50 3740 c = (U8)*STRING(p);
bbce6d69 3741 while (scan < loceol &&
22c35a8c 3742 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3743 scan++;
3744 break;
3745 case EXACTFL: /* length of string is 1 */
3280af22 3746 PL_reg_flags |= RF_tainted;
cd439c50 3747 c = (U8)*STRING(p);
bbce6d69 3748 while (scan < loceol &&
22c35a8c 3749 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3750 scan++;
3751 break;
3752 case ANYOF:
ffc61ed2
JH
3753 if (do_utf8) {
3754 loceol = PL_regeol;
1aa99e6b
IH
3755 while (hardcount < max && scan < loceol &&
3756 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3757 scan += UTF8SKIP(scan);
3758 hardcount++;
3759 }
3760 } else {
3761 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3762 scan++;
3763 }
a0d0e21e
LW
3764 break;
3765 case ALNUM:
1aa99e6b 3766 if (do_utf8) {
ffc61ed2 3767 loceol = PL_regeol;
8269fa76 3768 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3769 while (hardcount < max && scan < loceol &&
3568d838 3770 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3771 scan += UTF8SKIP(scan);
3772 hardcount++;
3773 }
3774 } else {
3775 while (scan < loceol && isALNUM(*scan))
3776 scan++;
a0ed51b3
LW
3777 }
3778 break;
bbce6d69 3779 case ALNUML:
3280af22 3780 PL_reg_flags |= RF_tainted;
1aa99e6b 3781 if (do_utf8) {
ffc61ed2 3782 loceol = PL_regeol;
1aa99e6b
IH
3783 while (hardcount < max && scan < loceol &&
3784 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3785 scan += UTF8SKIP(scan);
3786 hardcount++;
3787 }
3788 } else {
3789 while (scan < loceol && isALNUM_LC(*scan))
3790 scan++;
a0ed51b3
LW
3791 }
3792 break;
a0d0e21e 3793 case NALNUM:
1aa99e6b 3794 if (do_utf8) {
ffc61ed2 3795 loceol = PL_regeol;
8269fa76 3796 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3797 while (hardcount < max && scan < loceol &&
3568d838 3798 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3799 scan += UTF8SKIP(scan);
3800 hardcount++;
3801 }
3802 } else {
3803 while (scan < loceol && !isALNUM(*scan))
3804 scan++;
a0ed51b3
LW
3805 }
3806 break;
bbce6d69 3807 case NALNUML:
3280af22 3808 PL_reg_flags |= RF_tainted;
1aa99e6b 3809 if (do_utf8) {
ffc61ed2 3810 loceol = PL_regeol;
1aa99e6b
IH
3811 while (hardcount < max && scan < loceol &&
3812 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3813 scan += UTF8SKIP(scan);
3814 hardcount++;
3815 }
3816 } else {
3817 while (scan < loceol && !isALNUM_LC(*scan))
3818 scan++;
a0ed51b3
LW
3819 }
3820 break;
a0d0e21e 3821 case SPACE:
1aa99e6b 3822 if (do_utf8) {
ffc61ed2 3823 loceol = PL_regeol;
8269fa76 3824 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3825 while (hardcount < max && scan < loceol &&
3568d838
JH
3826 (*scan == ' ' ||
3827 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3828 scan += UTF8SKIP(scan);
3829 hardcount++;
3830 }
3831 } else {
3832 while (scan < loceol && isSPACE(*scan))
3833 scan++;
a0ed51b3
LW
3834 }
3835 break;
bbce6d69 3836 case SPACEL:
3280af22 3837 PL_reg_flags |= RF_tainted;
1aa99e6b 3838 if (do_utf8) {
ffc61ed2 3839 loceol = PL_regeol;
1aa99e6b 3840 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3841 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3842 scan += UTF8SKIP(scan);
3843 hardcount++;
3844 }
3845 } else {
3846 while (scan < loceol && isSPACE_LC(*scan))
3847 scan++;
a0ed51b3
LW
3848 }
3849 break;
a0d0e21e 3850 case NSPACE:
1aa99e6b 3851 if (do_utf8) {
ffc61ed2 3852 loceol = PL_regeol;
8269fa76 3853 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3854 while (hardcount < max && scan < loceol &&
3568d838
JH
3855 !(*scan == ' ' ||
3856 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3857 scan += UTF8SKIP(scan);
3858 hardcount++;
3859 }
3860 } else {
3861 while (scan < loceol && !isSPACE(*scan))
3862 scan++;
3863 break;
a0ed51b3 3864 }
bbce6d69 3865 case NSPACEL:
3280af22 3866 PL_reg_flags |= RF_tainted;
1aa99e6b 3867 if (do_utf8) {
ffc61ed2 3868 loceol = PL_regeol;
1aa99e6b 3869 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3870 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3871 scan += UTF8SKIP(scan);
3872 hardcount++;
3873 }
3874 } else {
3875 while (scan < loceol && !isSPACE_LC(*scan))
3876 scan++;
a0ed51b3
LW
3877 }
3878 break;
a0d0e21e 3879 case DIGIT:
1aa99e6b 3880 if (do_utf8) {
ffc61ed2 3881 loceol = PL_regeol;
8269fa76 3882 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3883 while (hardcount < max && scan < loceol &&
3568d838 3884 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3885 scan += UTF8SKIP(scan);
3886 hardcount++;
3887 }
3888 } else {
3889 while (scan < loceol && isDIGIT(*scan))
3890 scan++;
a0ed51b3
LW
3891 }
3892 break;
a0d0e21e 3893 case NDIGIT:
1aa99e6b 3894 if (do_utf8) {
ffc61ed2 3895 loceol = PL_regeol;
8269fa76 3896 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3897 while (hardcount < max && scan < loceol &&
3568d838 3898 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3899 scan += UTF8SKIP(scan);
3900 hardcount++;
3901 }
3902 } else {
3903 while (scan < loceol && !isDIGIT(*scan))
3904 scan++;
a0ed51b3
LW
3905 }
3906 break;
a0d0e21e
LW
3907 default: /* Called on something of 0 width. */
3908 break; /* So match right here or not at all. */
3909 }
a687059c 3910
a0ed51b3
LW
3911 if (hardcount)
3912 c = hardcount;
3913 else
3914 c = scan - PL_reginput;
3280af22 3915 PL_reginput = scan;
a687059c 3916
9041c2e3 3917 DEBUG_r(
c277df42
IZ
3918 {
3919 SV *prop = sv_newmortal();
3920
3921 regprop(prop, p);
9041c2e3
NIS
3922 PerlIO_printf(Perl_debug_log,
3923 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3924 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3925 });
9041c2e3 3926
a0d0e21e 3927 return(c);
a687059c
LW
3928}
3929
3930/*
c277df42 3931 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3932 *
c277df42
IZ
3933 * The repeater is supposed to have constant length.
3934 */
3935
76e3520e 3936STATIC I32
cea2e8a9 3937S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3938{
b7953727 3939 register char *scan = Nullch;
c277df42 3940 register char *start;
3280af22 3941 register char *loceol = PL_regeol;
a0ed51b3 3942 I32 l = 0;
708e3b05 3943 I32 count = 0, res = 1;
a0ed51b3
LW
3944
3945 if (!max)
3946 return 0;
c277df42 3947
3280af22 3948 start = PL_reginput;
53c4c00c 3949 if (PL_reg_match_utf8) {
708e3b05 3950 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3951 if (!count++) {
3952 l = 0;
3953 while (start < PL_reginput) {
3954 l++;
3955 start += UTF8SKIP(start);
3956 }
3957 *lp = l;
3958 if (l == 0)
3959 return max;
3960 }
3961 if (count == max)
3962 return count;
3963 }
3964 }
3965 else {
708e3b05 3966 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3967 if (!count++) {
3968 *lp = l = PL_reginput - start;
3969 if (max != REG_INFTY && l*max < loceol - scan)
3970 loceol = scan + l*max;
3971 if (l == 0)
3972 return max;
c277df42
IZ
3973 }
3974 }
3975 }
708e3b05 3976 if (!res)
3280af22 3977 PL_reginput = scan;
9041c2e3 3978
a0ed51b3 3979 return count;
c277df42
IZ
3980}
3981
3982/*
ffc61ed2
JH
3983- regclass_swash - prepare the utf8 swash
3984*/
3985
3986SV *
3987Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3988{
3989 SV *sw = NULL;
3990 SV *si = NULL;
3991
3992 if (PL_regdata && PL_regdata->count) {
3993 U32 n = ARG(node);
3994
3995 if (PL_regdata->what[n] == 's') {
3996 SV *rv = (SV*)PL_regdata->data[n];
3997 AV *av = (AV*)SvRV((SV*)rv);
3998 SV **a;
9041c2e3 3999
ffc61ed2
JH
4000 si = *av_fetch(av, 0, FALSE);
4001 a = av_fetch(av, 1, FALSE);
9041c2e3 4002
ffc61ed2
JH
4003 if (a)
4004 sw = *a;
4005 else if (si && doinit) {
4006 sw = swash_init("utf8", "", si, 1, 0);
4007 (void)av_store(av, 1, sw);
4008 }
4009 }
4010 }
4011
4012 if (initsvp)
4013 *initsvp = si;
4014
4015 return sw;
4016}
4017
4018/*
cb8d8820 4019 - reginclass - determine if a character falls into a character class
bbce6d69 4020 */
4021
76e3520e 4022STATIC bool
ffc61ed2 4023S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4024{
ffc61ed2 4025 char flags = ANYOF_FLAGS(n);
bbce6d69 4026 bool match = FALSE;
1aa99e6b 4027 UV c;
3568d838 4028 STRLEN len = 0;
1aa99e6b 4029
3568d838 4030 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4031
ffc61ed2
JH
4032 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4033 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4034 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4035 match = TRUE;
bbce6d69 4036 }
3568d838 4037 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4038 match = TRUE;
ffc61ed2
JH
4039 if (!match) {
4040 SV *sw = regclass_swash(n, TRUE, 0);
4041
4042 if (sw) {
3568d838 4043 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4044 match = TRUE;
4045 else if (flags & ANYOF_FOLD) {
a2a2844f
JH
4046 STRLEN ulen;
4047 U8 tmpbuf[UTF8_MAXLEN*2+1];
4048
4049 toLOWER_utf8(p, tmpbuf, &ulen);
3568d838 4050 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4051 match = TRUE;
4052 }
4053 }
bbce6d69 4054 }
4055 }
1aa99e6b 4056 if (!match && c < 256) {
ffc61ed2
JH
4057 if (ANYOF_BITMAP_TEST(n, c))
4058 match = TRUE;
4059 else if (flags & ANYOF_FOLD) {
3568d838 4060 I32 f;
a0ed51b3 4061
ffc61ed2
JH
4062 if (flags & ANYOF_LOCALE) {
4063 PL_reg_flags |= RF_tainted;
4064 f = PL_fold_locale[c];
4065 }
4066 else
4067 f = PL_fold[c];
4068 if (f != c && ANYOF_BITMAP_TEST(n, f))
4069 match = TRUE;
4070 }
4071
4072 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4073 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4074 if (
4075 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4076 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4077 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4078 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4079 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4080 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4081 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4082 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4083 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4084 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4085 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4086 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4087 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4088 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4089 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4090 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4091 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4092 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4093 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4094 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4095 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4096 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4097 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4098 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4099 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4100 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4101 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4102 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4103 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4104 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4105 ) /* How's that for a conditional? */
4106 {
4107 match = TRUE;
4108 }
a0ed51b3 4109 }
a0ed51b3
LW
4110 }
4111
a0ed51b3
LW
4112 return (flags & ANYOF_INVERT) ? !match : match;
4113}
161b471a 4114
dfe13c55 4115STATIC U8 *
cea2e8a9 4116S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4117{
1aa99e6b
IH
4118 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4119}
4120
4121STATIC U8 *
4122S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4123{
a0ed51b3 4124 if (off >= 0) {
1aa99e6b 4125 while (off-- && s < lim) {
ffc61ed2 4126 /* XXX could check well-formedness here */
a0ed51b3 4127 s += UTF8SKIP(s);
ffc61ed2 4128 }
a0ed51b3
LW
4129 }
4130 else {
4131 while (off++) {
1aa99e6b 4132 if (s > lim) {
a0ed51b3 4133 s--;
ffc61ed2 4134 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4135 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4136 s--;
ffc61ed2
JH
4137 }
4138 /* XXX could check well-formedness here */
a0ed51b3
LW
4139 }
4140 }
4141 }
4142 return s;
4143}
161b471a 4144
dfe13c55 4145STATIC U8 *
1aa99e6b 4146S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4147{
1aa99e6b
IH
4148 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4149}
4150
4151STATIC U8 *
4152S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4153{
4154 if (off >= 0) {
1aa99e6b 4155 while (off-- && s < lim) {
ffc61ed2 4156 /* XXX could check well-formedness here */
a0ed51b3 4157 s += UTF8SKIP(s);
ffc61ed2 4158 }
a0ed51b3
LW
4159 if (off >= 0)
4160 return 0;
4161 }
4162 else {
4163 while (off++) {
1aa99e6b 4164 if (s > lim) {
a0ed51b3 4165 s--;
ffc61ed2 4166 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4167 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4168 s--;
ffc61ed2
JH
4169 }
4170 /* XXX could check well-formedness here */
a0ed51b3
LW
4171 }
4172 else
4173 break;
4174 }
4175 if (off <= 0)
4176 return 0;
4177 }
4178 return s;
4179}
51371543 4180
51371543 4181static void
acfe0abc 4182restore_pos(pTHX_ void *arg)
51371543 4183{
51371543
GS
4184 if (PL_reg_eval_set) {
4185 if (PL_reg_oldsaved) {
4186 PL_reg_re->subbeg = PL_reg_oldsaved;
4187 PL_reg_re->sublen = PL_reg_oldsavedlen;
4188 RX_MATCH_COPIED_on(PL_reg_re);
4189 }
4190 PL_reg_magic->mg_len = PL_reg_oldpos;
4191 PL_reg_eval_set = 0;
4192 PL_curpm = PL_reg_oldcurpm;
4193 }
4194}