This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unused variable.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23
G
907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
93c50829 963 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 967
60a8b682
JH
968 /* The idea in the EXACTF* cases is to first find the
969 * first character of the EXACTF* node and then, if
970 * necessary, case-insensitively compare the full
971 * text of the node. The c1 and c2 are the first
972 * characters (though in Unicode it gets a bit
973 * more complicated because there are more cases
974 * than just upper and lower: one is really supposed
975 * to use the so-called folding case for case-insensitive
976 * matching (called "loose matching" in Unicode). */
977
1aa99e6b 978 if (do_utf8) {
575cac57
JH
979 UV c, f;
980 U8 tmpbuf [UTF8_MAXLEN+1];
981 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
982 STRLEN len, foldlen;
d7f013c8 983
09091399 984 if (c1 == c2) {
1aa99e6b 985 while (s <= e) {
575cac57 986 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
987 if ( c == c1
988 && (ln == len ||
66423254
JH
989 ibcmp_utf8(s, (char **)0, 0, do_utf8,
990 m, (char **)0, ln, UTF))
55da9344 991 && (norun || regtry(prog, s)) )
1aa99e6b 992 goto got_it;
80aecb99
JH
993 else {
994 uvchr_to_utf8(tmpbuf, c);
ac7e0132 995 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
996 if ( f != c
997 && (f == c1 || f == c2)
998 && (ln == foldlen ||
66423254
JH
999 !ibcmp_utf8((char *) foldbuf,
1000 (char **)0, foldlen, do_utf8,
d07ddd77 1001 m,
66423254 1002 (char **)0, ln, UTF))
80aecb99
JH
1003 && (norun || regtry(prog, s)) )
1004 goto got_it;
1005 }
1aa99e6b
IH
1006 s += len;
1007 }
09091399
JH
1008 }
1009 else {
1aa99e6b 1010 while (s <= e) {
575cac57 1011 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1012
60a8b682
JH
1013 /* Handle some of the three Greek sigmas cases.
1014 * Note that not all the possible combinations
1015 * are handled here: some of them are handled
1016 * handled by the standard folding rules, and
1017 * some of them (the character class or ANYOF
1018 * cases) are handled during compiletime in
1019 * regexec.c:S_regclass(). */
880bd946
JH
1020 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1021 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1022 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1023
1024 if ( (c == c1 || c == c2)
1025 && (ln == len ||
66423254
JH
1026 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1027 m, (char **)0, ln, UTF))
55da9344 1028 && (norun || regtry(prog, s)) )
1aa99e6b 1029 goto got_it;
80aecb99
JH
1030 else {
1031 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1032 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1033 if ( f != c
1034 && (f == c1 || f == c2)
1035 && (ln == foldlen ||
1036 !ibcmp_utf8((char *)foldbuf,
66423254 1037 (char **)0, foldlen, do_utf8,
d07ddd77 1038 m,
66423254 1039 (char **)0, ln, UTF))
80aecb99
JH
1040 && (norun || regtry(prog, s)) )
1041 goto got_it;
1042 }
1aa99e6b
IH
1043 s += len;
1044 }
09091399 1045 }
1aa99e6b
IH
1046 }
1047 else {
1048 if (c1 == c2)
1049 while (s <= e) {
1050 if ( *(U8*)s == c1
1051 && (ln == 1 || !(OP(c) == EXACTF
1052 ? ibcmp(s, m, ln)
1053 : ibcmp_locale(s, m, ln)))
1054 && (norun || regtry(prog, s)) )
1055 goto got_it;
1056 s++;
1057 }
1058 else
1059 while (s <= e) {
1060 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1061 && (ln == 1 || !(OP(c) == EXACTF
1062 ? ibcmp(s, m, ln)
1063 : ibcmp_locale(s, m, ln)))
1064 && (norun || regtry(prog, s)) )
1065 goto got_it;
1066 s++;
1067 }
b3c9acc1
IZ
1068 }
1069 break;
bbce6d69 1070 case BOUNDL:
3280af22 1071 PL_reg_flags |= RF_tainted;
bbce6d69 1072 /* FALL THROUGH */
a0d0e21e 1073 case BOUND:
ffc61ed2 1074 if (do_utf8) {
12d33761 1075 if (s == PL_bostr)
ffc61ed2
JH
1076 tmp = '\n';
1077 else {
1aa99e6b 1078 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1079
0064a8a9
JH
1080 if (s > (char*)r)
1081 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1082 }
1083 tmp = ((OP(c) == BOUND ?
9041c2e3 1084 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1085 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1086 while (s < strend) {
1087 if (tmp == !(OP(c) == BOUND ?
3568d838 1088 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1089 isALNUM_LC_utf8((U8*)s)))
1090 {
1091 tmp = !tmp;
1092 if ((norun || regtry(prog, s)))
1093 goto got_it;
1094 }
1095 s += UTF8SKIP(s);
a687059c 1096 }
a0d0e21e 1097 }
667bb95a 1098 else {
12d33761 1099 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1100 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1101 while (s < strend) {
1102 if (tmp ==
1103 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1104 tmp = !tmp;
1105 if ((norun || regtry(prog, s)))
1106 goto got_it;
1107 }
1108 s++;
a0ed51b3 1109 }
a0ed51b3 1110 }
6eb5f6b9 1111 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1112 goto got_it;
1113 break;
bbce6d69 1114 case NBOUNDL:
3280af22 1115 PL_reg_flags |= RF_tainted;
bbce6d69 1116 /* FALL THROUGH */
a0d0e21e 1117 case NBOUND:
ffc61ed2 1118 if (do_utf8) {
12d33761 1119 if (s == PL_bostr)
ffc61ed2
JH
1120 tmp = '\n';
1121 else {
1aa99e6b 1122 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1123
0064a8a9
JH
1124 if (s > (char*)r)
1125 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1126 }
1127 tmp = ((OP(c) == NBOUND ?
9041c2e3 1128 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1129 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1130 while (s < strend) {
1131 if (tmp == !(OP(c) == NBOUND ?
3568d838 1132 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1133 isALNUM_LC_utf8((U8*)s)))
1134 tmp = !tmp;
1135 else if ((norun || regtry(prog, s)))
1136 goto got_it;
1137 s += UTF8SKIP(s);
1138 }
a0d0e21e 1139 }
667bb95a 1140 else {
12d33761 1141 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1142 tmp = ((OP(c) == NBOUND ?
1143 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1144 while (s < strend) {
1145 if (tmp ==
1146 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1147 tmp = !tmp;
1148 else if ((norun || regtry(prog, s)))
1149 goto got_it;
1150 s++;
1151 }
a0ed51b3 1152 }
6eb5f6b9 1153 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1154 goto got_it;
1155 break;
a0d0e21e 1156 case ALNUM:
ffc61ed2 1157 if (do_utf8) {
8269fa76 1158 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1159 while (s < strend) {
3568d838 1160 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
bbce6d69 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s += UTF8SKIP(s);
bbce6d69 1169 }
bbce6d69 1170 }
ffc61ed2
JH
1171 else {
1172 while (s < strend) {
1173 if (isALNUM(*s)) {
1174 if (tmp && (norun || regtry(prog, s)))
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
a0ed51b3 1179 else
ffc61ed2
JH
1180 tmp = 1;
1181 s++;
a0ed51b3 1182 }
a0ed51b3
LW
1183 }
1184 break;
bbce6d69 1185 case ALNUML:
3280af22 1186 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1187 if (do_utf8) {
1188 while (s < strend) {
1189 if (isALNUM_LC_utf8((U8*)s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
a687059c 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s += UTF8SKIP(s);
a0d0e21e 1198 }
a0d0e21e 1199 }
ffc61ed2
JH
1200 else {
1201 while (s < strend) {
1202 if (isALNUM_LC(*s)) {
1203 if (tmp && (norun || regtry(prog, s)))
1204 goto got_it;
1205 else
1206 tmp = doevery;
1207 }
a0ed51b3 1208 else
ffc61ed2
JH
1209 tmp = 1;
1210 s++;
a0ed51b3 1211 }
a0ed51b3
LW
1212 }
1213 break;
a0d0e21e 1214 case NALNUM:
ffc61ed2 1215 if (do_utf8) {
8269fa76 1216 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1217 while (s < strend) {
3568d838 1218 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
bbce6d69 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s += UTF8SKIP(s);
bbce6d69 1227 }
bbce6d69 1228 }
ffc61ed2
JH
1229 else {
1230 while (s < strend) {
1231 if (!isALNUM(*s)) {
1232 if (tmp && (norun || regtry(prog, s)))
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
a0ed51b3 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s++;
a0ed51b3 1240 }
a0ed51b3
LW
1241 }
1242 break;
bbce6d69 1243 case NALNUML:
3280af22 1244 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1245 if (do_utf8) {
1246 while (s < strend) {
1247 if (!isALNUM_LC_utf8((U8*)s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a687059c 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s += UTF8SKIP(s);
a687059c 1256 }
a0d0e21e 1257 }
ffc61ed2
JH
1258 else {
1259 while (s < strend) {
1260 if (!isALNUM_LC(*s)) {
1261 if (tmp && (norun || regtry(prog, s)))
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a0ed51b3 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s++;
a0ed51b3 1269 }
a0ed51b3
LW
1270 }
1271 break;
a0d0e21e 1272 case SPACE:
ffc61ed2 1273 if (do_utf8) {
8269fa76 1274 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1275 while (s < strend) {
3568d838 1276 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0d0e21e 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s += UTF8SKIP(s);
2304df62 1285 }
a0d0e21e 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (isSPACE(*s)) {
1290 if (tmp && (norun || regtry(prog, s)))
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
bbce6d69 1301 case SPACEL:
3280af22 1302 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1303 if (do_utf8) {
1304 while (s < strend) {
1305 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
bbce6d69 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s += UTF8SKIP(s);
bbce6d69 1314 }
bbce6d69 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (isSPACE_LC(*s)) {
1319 if (tmp && (norun || regtry(prog, s)))
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
a0ed51b3 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
a0ed51b3 1327 }
a0ed51b3
LW
1328 }
1329 break;
a0d0e21e 1330 case NSPACE:
ffc61ed2 1331 if (do_utf8) {
8269fa76 1332 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1333 while (s < strend) {
3568d838 1334 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0d0e21e 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s += UTF8SKIP(s);
a687059c 1343 }
a0d0e21e 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (!isSPACE(*s)) {
1348 if (tmp && (norun || regtry(prog, s)))
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
bbce6d69 1359 case NSPACEL:
3280af22 1360 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1361 if (do_utf8) {
1362 while (s < strend) {
1363 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
bbce6d69 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s += UTF8SKIP(s);
bbce6d69 1372 }
bbce6d69 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (!isSPACE_LC(*s)) {
1377 if (tmp && (norun || regtry(prog, s)))
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
a0ed51b3 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
a0ed51b3 1385 }
a0ed51b3
LW
1386 }
1387 break;
a0d0e21e 1388 case DIGIT:
ffc61ed2 1389 if (do_utf8) {
8269fa76 1390 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1391 while (s < strend) {
3568d838 1392 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
a0d0e21e 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s += UTF8SKIP(s);
2b69d0c2 1401 }
a0d0e21e 1402 }
ffc61ed2
JH
1403 else {
1404 while (s < strend) {
1405 if (isDIGIT(*s)) {
1406 if (tmp && (norun || regtry(prog, s)))
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
a0ed51b3 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s++;
a0ed51b3 1414 }
a0ed51b3
LW
1415 }
1416 break;
b8c5462f
JH
1417 case DIGITL:
1418 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1419 if (do_utf8) {
1420 while (s < strend) {
1421 if (isDIGIT_LC_utf8((U8*)s)) {
1422 if (tmp && (norun || regtry(prog, s)))
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
b8c5462f 1427 else
ffc61ed2
JH
1428 tmp = 1;
1429 s += UTF8SKIP(s);
b8c5462f 1430 }
b8c5462f 1431 }
ffc61ed2
JH
1432 else {
1433 while (s < strend) {
1434 if (isDIGIT_LC(*s)) {
1435 if (tmp && (norun || regtry(prog, s)))
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
b8c5462f 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s++;
b8c5462f 1443 }
b8c5462f
JH
1444 }
1445 break;
a0d0e21e 1446 case NDIGIT:
ffc61ed2 1447 if (do_utf8) {
8269fa76 1448 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1449 while (s < strend) {
3568d838 1450 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1451 if (tmp && (norun || regtry(prog, s)))
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
a0d0e21e 1456 else
ffc61ed2
JH
1457 tmp = 1;
1458 s += UTF8SKIP(s);
a687059c 1459 }
a0d0e21e 1460 }
ffc61ed2
JH
1461 else {
1462 while (s < strend) {
1463 if (!isDIGIT(*s)) {
1464 if (tmp && (norun || regtry(prog, s)))
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
a0ed51b3 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s++;
a0ed51b3 1472 }
a0ed51b3
LW
1473 }
1474 break;
b8c5462f
JH
1475 case NDIGITL:
1476 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1477 if (do_utf8) {
1478 while (s < strend) {
1479 if (!isDIGIT_LC_utf8((U8*)s)) {
1480 if (tmp && (norun || regtry(prog, s)))
1481 goto got_it;
1482 else
1483 tmp = doevery;
1484 }
b8c5462f 1485 else
ffc61ed2
JH
1486 tmp = 1;
1487 s += UTF8SKIP(s);
b8c5462f 1488 }
a0ed51b3 1489 }
ffc61ed2
JH
1490 else {
1491 while (s < strend) {
1492 if (!isDIGIT_LC(*s)) {
1493 if (tmp && (norun || regtry(prog, s)))
1494 goto got_it;
1495 else
1496 tmp = doevery;
1497 }
cf93c79d 1498 else
ffc61ed2
JH
1499 tmp = 1;
1500 s++;
b8c5462f 1501 }
b8c5462f
JH
1502 }
1503 break;
b3c9acc1 1504 default:
3c3eec57
GS
1505 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1506 break;
d6a28714 1507 }
6eb5f6b9
JH
1508 return 0;
1509 got_it:
1510 return s;
1511}
1512
1513/*
1514 - regexec_flags - match a regexp against a string
1515 */
1516I32
1517Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1518 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1519/* strend: pointer to null at end of string */
1520/* strbeg: real beginning of string */
1521/* minend: end of match must be >=minend after stringarg. */
1522/* data: May be used for some additional optimizations. */
1523/* nosave: For optimizations. */
1524{
6eb5f6b9
JH
1525 register char *s;
1526 register regnode *c;
1527 register char *startpos = stringarg;
6eb5f6b9
JH
1528 I32 minlen; /* must match at least this many chars */
1529 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1530 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1531 constant substr. */ /* CC */
1532 I32 end_shift = 0; /* Same for the end. */ /* CC */
1533 I32 scream_pos = -1; /* Internal iterator of scream. */
1534 char *scream_olds;
1535 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1536 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1537#ifdef DEBUGGING
9e55ce06
JH
1538 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1539 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1540#endif
6eb5f6b9
JH
1541
1542 PL_regcc = 0;
1543
1544 cache_re(prog);
1545#ifdef DEBUGGING
aea4f609 1546 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1547#endif
1548
1549 /* Be paranoid... */
1550 if (prog == NULL || startpos == NULL) {
1551 Perl_croak(aTHX_ "NULL regexp parameter");
1552 return 0;
1553 }
1554
1555 minlen = prog->minlen;
9e55ce06
JH
1556 if (strend - startpos < minlen &&
1557 !PL_reg_match_utf8 /* ANYOFs can balloon to EXACTFs */
1558 ) {
a72c7584
JH
1559 DEBUG_r(PerlIO_printf(Perl_debug_log,
1560 "String too short [regexec_flags]...\n"));
1561 goto phooey;
1aa99e6b 1562 }
6eb5f6b9 1563
6eb5f6b9
JH
1564 /* Check validity of program. */
1565 if (UCHARAT(prog->program) != REG_MAGIC) {
1566 Perl_croak(aTHX_ "corrupted regexp program");
1567 }
1568
1569 PL_reg_flags = 0;
1570 PL_reg_eval_set = 0;
1571 PL_reg_maxiter = 0;
1572
1573 if (prog->reganch & ROPT_UTF8)
1574 PL_reg_flags |= RF_utf8;
1575
1576 /* Mark beginning of line for ^ and lookbehind. */
1577 PL_regbol = startpos;
1578 PL_bostr = strbeg;
1579 PL_reg_sv = sv;
1580
1581 /* Mark end of line for $ (and such) */
1582 PL_regeol = strend;
1583
1584 /* see how far we have to get to not match where we matched before */
1585 PL_regtill = startpos+minend;
1586
1587 /* We start without call_cc context. */
1588 PL_reg_call_cc = 0;
1589
1590 /* If there is a "must appear" string, look for it. */
1591 s = startpos;
1592
1593 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1594 MAGIC *mg;
1595
1596 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1597 PL_reg_ganch = startpos;
1598 else if (sv && SvTYPE(sv) >= SVt_PVMG
1599 && SvMAGIC(sv)
14befaf4
DM
1600 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1601 && mg->mg_len >= 0) {
6eb5f6b9
JH
1602 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1603 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1604 if (s > PL_reg_ganch)
6eb5f6b9
JH
1605 goto phooey;
1606 s = PL_reg_ganch;
1607 }
1608 }
1609 else /* pos() not defined */
1610 PL_reg_ganch = strbeg;
1611 }
1612
699c3c34
JH
1613 if (do_utf8 == (UTF!=0) &&
1614 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1615 re_scream_pos_data d;
1616
1617 d.scream_olds = &scream_olds;
1618 d.scream_pos = &scream_pos;
1619 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1620 if (!s) {
1621 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1622 goto phooey; /* not present */
3fa9c3d7 1623 }
6eb5f6b9
JH
1624 }
1625
2a782b5b 1626 DEBUG_r({
9e55ce06
JH
1627 char *s0 = UTF ?
1628 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1629 UNI_DISPLAY_ISPRINT) :
1630 prog->precomp;
1631 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1632 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1633 UNI_DISPLAY_ISPRINT) : startpos;
1634 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1635 if (!PL_colorset)
1636 reginitcolors();
1637 PerlIO_printf(Perl_debug_log,
9e55ce06 1638 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1639 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1640 len0, len0, s0,
2a782b5b 1641 PL_colors[1],
9e55ce06 1642 len0 > 60 ? "..." : "",
2a782b5b 1643 PL_colors[0],
9e55ce06
JH
1644 (int)(len1 > 60 ? 60 : len1),
1645 s1, PL_colors[1],
1646 (len1 > 60 ? "..." : "")
2a782b5b
JH
1647 );
1648 });
6eb5f6b9
JH
1649
1650 /* Simplest case: anchored match need be tried only once. */
1651 /* [unless only anchor is BOL and multiline is set] */
1652 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1653 if (s == startpos && regtry(prog, startpos))
1654 goto got_it;
1655 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1656 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1657 {
1658 char *end;
1659
1660 if (minlen)
1661 dontbother = minlen - 1;
1aa99e6b 1662 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1663 /* for multiline we only have to try after newlines */
1664 if (prog->check_substr) {
1665 if (s == startpos)
1666 goto after_try;
1667 while (1) {
1668 if (regtry(prog, s))
1669 goto got_it;
1670 after_try:
1671 if (s >= end)
1672 goto phooey;
1673 if (prog->reganch & RE_USE_INTUIT) {
1674 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1675 if (!s)
1676 goto phooey;
1677 }
1678 else
1679 s++;
1680 }
1681 } else {
1682 if (s > startpos)
1683 s--;
1684 while (s < end) {
1685 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1686 if (regtry(prog, s))
1687 goto got_it;
1688 }
1689 }
1690 }
1691 }
1692 goto phooey;
1693 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1694 if (regtry(prog, PL_reg_ganch))
1695 goto got_it;
1696 goto phooey;
1697 }
1698
1699 /* Messy cases: unanchored match. */
9041c2e3 1700 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1701 /* we have /x+whatever/ */
1702 /* it must be a one character string (XXXX Except UTF?) */
1703 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1704#ifdef DEBUGGING
1705 int did_match = 0;
1706#endif
1707
1aa99e6b 1708 if (do_utf8) {
6eb5f6b9
JH
1709 while (s < strend) {
1710 if (*s == ch) {
bf93d4cc 1711 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1712 if (regtry(prog, s)) goto got_it;
1713 s += UTF8SKIP(s);
1714 while (s < strend && *s == ch)
1715 s += UTF8SKIP(s);
1716 }
1717 s += UTF8SKIP(s);
1718 }
1719 }
1720 else {
1721 while (s < strend) {
1722 if (*s == ch) {
bf93d4cc 1723 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1724 if (regtry(prog, s)) goto got_it;
1725 s++;
1726 while (s < strend && *s == ch)
1727 s++;
1728 }
1729 s++;
1730 }
1731 }
b7953727 1732 DEBUG_r(if (!did_match)
bf93d4cc 1733 PerlIO_printf(Perl_debug_log,
b7953727
JH
1734 "Did not find anchored character...\n")
1735 );
6eb5f6b9
JH
1736 }
1737 /*SUPPRESS 560*/
1aa99e6b
IH
1738 else if (do_utf8 == (UTF!=0) &&
1739 (prog->anchored_substr != Nullsv
9041c2e3 1740 || (prog->float_substr != Nullsv
1aa99e6b 1741 && prog->float_max_offset < strend - s))) {
9041c2e3 1742 SV *must = prog->anchored_substr
6eb5f6b9 1743 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1744 I32 back_max =
6eb5f6b9 1745 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1746 I32 back_min =
6eb5f6b9 1747 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1748 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1749 -(I32)(CHR_SVLEN(must)
1aa99e6b 1750 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1751 char *last1; /* Last position checked before */
bf93d4cc
GS
1752#ifdef DEBUGGING
1753 int did_match = 0;
1754#endif
6eb5f6b9
JH
1755
1756 if (s > PL_bostr)
1757 last1 = HOPc(s, -1);
1758 else
1759 last1 = s - 1; /* bogus */
1760
1761 /* XXXX check_substr already used to find `s', can optimize if
1762 check_substr==must. */
1763 scream_pos = -1;
1764 dontbother = end_shift;
1765 strend = HOPc(strend, -dontbother);
1766 while ( (s <= last) &&
9041c2e3 1767 ((flags & REXEC_SCREAM)
1aa99e6b 1768 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1769 end_shift, &scream_pos, 0))
1aa99e6b 1770 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1771 (unsigned char*)strend, must,
6eb5f6b9 1772 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1773 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1774 if (HOPc(s, -back_max) > last1) {
1775 last1 = HOPc(s, -back_min);
1776 s = HOPc(s, -back_max);
1777 }
1778 else {
1779 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1780
1781 last1 = HOPc(s, -back_min);
1782 s = t;
1783 }
1aa99e6b 1784 if (do_utf8) {
6eb5f6b9
JH
1785 while (s <= last1) {
1786 if (regtry(prog, s))
1787 goto got_it;
1788 s += UTF8SKIP(s);
1789 }
1790 }
1791 else {
1792 while (s <= last1) {
1793 if (regtry(prog, s))
1794 goto got_it;
1795 s++;
1796 }
1797 }
1798 }
b7953727
JH
1799 DEBUG_r(if (!did_match)
1800 PerlIO_printf(Perl_debug_log,
1801 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1802 ((must == prog->anchored_substr)
1803 ? "anchored" : "floating"),
1804 PL_colors[0],
1805 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1806 SvPVX(must),
b7953727
JH
1807 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1808 );
6eb5f6b9
JH
1809 goto phooey;
1810 }
155aba94 1811 else if ((c = prog->regstclass)) {
66e933ab
GS
1812 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1813 /* don't bother with what can't match */
6eb5f6b9 1814 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1815 DEBUG_r({
1816 SV *prop = sv_newmortal();
9e55ce06
JH
1817 char *s0;
1818 char *s1;
1819 int len0;
1820 int len1;
1821
ffc61ed2 1822 regprop(prop, c);
9e55ce06
JH
1823 s0 = UTF ?
1824 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1825 UNI_DISPLAY_ISPRINT) :
1826 SvPVX(prop);
1827 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1828 s1 = UTF ?
1829 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_ISPRINT) : s;
1830 len1 = UTF ? SvCUR(dsv1) : strend - s;
1831 PerlIO_printf(Perl_debug_log,
1832 "Matching stclass `%*.*s' against `%*.*s'\n",
1833 len0, len0, s0,
1834 len1, len1, s1);
ffc61ed2 1835 });
6eb5f6b9
JH
1836 if (find_byclass(prog, c, s, strend, startpos, 0))
1837 goto got_it;
bf93d4cc 1838 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1839 }
1840 else {
1841 dontbother = 0;
1842 if (prog->float_substr != Nullsv) { /* Trim the end. */
1843 char *last;
d6a28714
JH
1844
1845 if (flags & REXEC_SCREAM) {
1846 last = screaminstr(sv, prog->float_substr, s - strbeg,
1847 end_shift, &scream_pos, 1); /* last one */
1848 if (!last)
ffc61ed2 1849 last = scream_olds; /* Only one occurrence. */
b8c5462f 1850 }
d6a28714
JH
1851 else {
1852 STRLEN len;
1853 char *little = SvPV(prog->float_substr, len);
1854
1855 if (SvTAIL(prog->float_substr)) {
1856 if (memEQ(strend - len + 1, little, len - 1))
1857 last = strend - len + 1;
1858 else if (!PL_multiline)
9041c2e3 1859 last = memEQ(strend - len, little, len)
d6a28714 1860 ? strend - len : Nullch;
b8c5462f 1861 else
d6a28714
JH
1862 goto find_last;
1863 } else {
1864 find_last:
9041c2e3 1865 if (len)
d6a28714 1866 last = rninstr(s, strend, little, little + len);
b8c5462f 1867 else
d6a28714 1868 last = strend; /* matching `$' */
b8c5462f 1869 }
b8c5462f 1870 }
bf93d4cc
GS
1871 if (last == NULL) {
1872 DEBUG_r(PerlIO_printf(Perl_debug_log,
1873 "%sCan't trim the tail, match fails (should not happen)%s\n",
1874 PL_colors[4],PL_colors[5]));
1875 goto phooey; /* Should not happen! */
1876 }
d6a28714
JH
1877 dontbother = strend - last + prog->float_min_offset;
1878 }
1879 if (minlen && (dontbother < minlen))
1880 dontbother = minlen - 1;
1881 strend -= dontbother; /* this one's always in bytes! */
1882 /* We don't know much -- general case. */
1aa99e6b 1883 if (do_utf8) {
d6a28714
JH
1884 for (;;) {
1885 if (regtry(prog, s))
1886 goto got_it;
1887 if (s >= strend)
1888 break;
b8c5462f 1889 s += UTF8SKIP(s);
d6a28714
JH
1890 };
1891 }
1892 else {
1893 do {
1894 if (regtry(prog, s))
1895 goto got_it;
1896 } while (s++ < strend);
1897 }
1898 }
1899
1900 /* Failure. */
1901 goto phooey;
1902
1903got_it:
1904 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1905
1906 if (PL_reg_eval_set) {
1907 /* Preserve the current value of $^R */
1908 if (oreplsv != GvSV(PL_replgv))
1909 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1910 restored, the value remains
1911 the same. */
acfe0abc 1912 restore_pos(aTHX_ 0);
d6a28714
JH
1913 }
1914
1915 /* make sure $`, $&, $', and $digit will work later */
1916 if ( !(flags & REXEC_NOT_FIRST) ) {
1917 if (RX_MATCH_COPIED(prog)) {
1918 Safefree(prog->subbeg);
1919 RX_MATCH_COPIED_off(prog);
1920 }
1921 if (flags & REXEC_COPY_STR) {
1922 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1923
1924 s = savepvn(strbeg, i);
1925 prog->subbeg = s;
1926 prog->sublen = i;
1927 RX_MATCH_COPIED_on(prog);
1928 }
1929 else {
1930 prog->subbeg = strbeg;
1931 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1932 }
1933 }
9041c2e3 1934
d6a28714
JH
1935 return 1;
1936
1937phooey:
bf93d4cc
GS
1938 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1939 PL_colors[4],PL_colors[5]));
d6a28714 1940 if (PL_reg_eval_set)
acfe0abc 1941 restore_pos(aTHX_ 0);
d6a28714
JH
1942 return 0;
1943}
1944
1945/*
1946 - regtry - try match at specific point
1947 */
1948STATIC I32 /* 0 failure, 1 success */
1949S_regtry(pTHX_ regexp *prog, char *startpos)
1950{
d6a28714
JH
1951 register I32 i;
1952 register I32 *sp;
1953 register I32 *ep;
1954 CHECKPOINT lastcp;
1955
02db2b7b
IZ
1956#ifdef DEBUGGING
1957 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1958#endif
d6a28714
JH
1959 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1960 MAGIC *mg;
1961
1962 PL_reg_eval_set = RS_init;
1963 DEBUG_r(DEBUG_s(
b900a521
JH
1964 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1965 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1966 ));
e8347627 1967 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1968 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1969 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1970 SAVETMPS;
1971 /* Apparently this is not needed, judging by wantarray. */
e8347627 1972 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1973 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1974
1975 if (PL_reg_sv) {
1976 /* Make $_ available to executed code. */
1977 if (PL_reg_sv != DEFSV) {
4d1ff10f 1978 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1979 SAVESPTR(DEFSV);
1980 DEFSV = PL_reg_sv;
b8c5462f 1981 }
d6a28714 1982
9041c2e3 1983 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1984 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1985 /* prepare for quick setting of pos */
14befaf4
DM
1986 sv_magic(PL_reg_sv, (SV*)0,
1987 PERL_MAGIC_regex_global, Nullch, 0);
1988 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1989 mg->mg_len = -1;
b8c5462f 1990 }
d6a28714
JH
1991 PL_reg_magic = mg;
1992 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1993 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1994 }
09687e5a 1995 if (!PL_reg_curpm) {
0f79a09d 1996 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1997#ifdef USE_ITHREADS
1998 {
1999 SV* repointer = newSViv(0);
577e12cc 2000 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2001 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2002 av_push(PL_regex_padav,repointer);
2003 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2004 PL_regex_pad = AvARRAY(PL_regex_padav);
2005 }
2006#endif
2007 }
aaa362c4 2008 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2009 PL_reg_oldcurpm = PL_curpm;
2010 PL_curpm = PL_reg_curpm;
2011 if (RX_MATCH_COPIED(prog)) {
2012 /* Here is a serious problem: we cannot rewrite subbeg,
2013 since it may be needed if this match fails. Thus
2014 $` inside (?{}) could fail... */
2015 PL_reg_oldsaved = prog->subbeg;
2016 PL_reg_oldsavedlen = prog->sublen;
2017 RX_MATCH_COPIED_off(prog);
2018 }
2019 else
2020 PL_reg_oldsaved = Nullch;
2021 prog->subbeg = PL_bostr;
2022 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2023 }
2024 prog->startp[0] = startpos - PL_bostr;
2025 PL_reginput = startpos;
2026 PL_regstartp = prog->startp;
2027 PL_regendp = prog->endp;
2028 PL_reglastparen = &prog->lastparen;
a01268b5 2029 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2030 prog->lastparen = 0;
2031 PL_regsize = 0;
2032 DEBUG_r(PL_reg_starttry = startpos);
2033 if (PL_reg_start_tmpl <= prog->nparens) {
2034 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2035 if(PL_reg_start_tmp)
2036 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2037 else
2038 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2039 }
2040
128e8167
JH
2041#ifdef DEBUGGING
2042 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2043 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2044 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2045#endif
2046
d6a28714
JH
2047 /* XXXX What this code is doing here?!!! There should be no need
2048 to do this again and again, PL_reglastparen should take care of
3dd2943c 2049 this! --ilya*/
dafc8851
JH
2050
2051 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2052 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2053 * PL_reglastparen), is not needed at all by the test suite
2054 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2055 * enough, for building DynaLoader, or otherwise this
2056 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2057 * will happen. Meanwhile, this code *is* needed for the
2058 * above-mentioned test suite tests to succeed. The common theme
2059 * on those tests seems to be returning null fields from matches.
2060 * --jhi */
dafc8851 2061#if 1
d6a28714
JH
2062 sp = prog->startp;
2063 ep = prog->endp;
2064 if (prog->nparens) {
09e8ae3b 2065 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2066 *++sp = -1;
2067 *++ep = -1;
2068 }
2069 }
dafc8851 2070#endif
02db2b7b 2071 REGCP_SET(lastcp);
d6a28714
JH
2072 if (regmatch(prog->program + 1)) {
2073 prog->endp[0] = PL_reginput - PL_bostr;
2074 return 1;
2075 }
02db2b7b 2076 REGCP_UNWIND(lastcp);
d6a28714
JH
2077 return 0;
2078}
2079
02db2b7b
IZ
2080#define RE_UNWIND_BRANCH 1
2081#define RE_UNWIND_BRANCHJ 2
2082
2083union re_unwind_t;
2084
2085typedef struct { /* XX: makes sense to enlarge it... */
2086 I32 type;
2087 I32 prev;
2088 CHECKPOINT lastcp;
2089} re_unwind_generic_t;
2090
2091typedef struct {
2092 I32 type;
2093 I32 prev;
2094 CHECKPOINT lastcp;
2095 I32 lastparen;
2096 regnode *next;
2097 char *locinput;
2098 I32 nextchr;
2099#ifdef DEBUGGING
2100 int regindent;
2101#endif
2102} re_unwind_branch_t;
2103
2104typedef union re_unwind_t {
2105 I32 type;
2106 re_unwind_generic_t generic;
2107 re_unwind_branch_t branch;
2108} re_unwind_t;
2109
8ba1375e
MJD
2110#define sayYES goto yes
2111#define sayNO goto no
2112#define sayYES_FINAL goto yes_final
2113#define sayYES_LOUD goto yes_loud
2114#define sayNO_FINAL goto no_final
2115#define sayNO_SILENT goto do_no
2116#define saySAME(x) if (x) goto yes; else goto no
2117
2118#define REPORT_CODE_OFF 24
2119
d6a28714
JH
2120/*
2121 - regmatch - main matching routine
2122 *
2123 * Conceptually the strategy is simple: check to see whether the current
2124 * node matches, call self recursively to see whether the rest matches,
2125 * and then act accordingly. In practice we make some effort to avoid
2126 * recursion, in particular by going through "ordinary" nodes (that don't
2127 * need to know whether the rest of the match failed) by a loop instead of
2128 * by recursion.
2129 */
2130/* [lwall] I've hoisted the register declarations to the outer block in order to
2131 * maybe save a little bit of pushing and popping on the stack. It also takes
2132 * advantage of machines that use a register save mask on subroutine entry.
2133 */
2134STATIC I32 /* 0 failure, 1 success */
2135S_regmatch(pTHX_ regnode *prog)
2136{
d6a28714
JH
2137 register regnode *scan; /* Current node. */
2138 regnode *next; /* Next node. */
2139 regnode *inner; /* Next node in internal branch. */
2140 register I32 nextchr; /* renamed nextchr - nextchar colides with
2141 function of same name */
2142 register I32 n; /* no or next */
b7953727
JH
2143 register I32 ln = 0; /* len or last */
2144 register char *s = Nullch; /* operand or save */
d6a28714 2145 register char *locinput = PL_reginput;
b7953727 2146 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2147 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2148 I32 unwind = 0;
b7953727 2149#if 0
02db2b7b 2150 I32 firstcp = PL_savestack_ix;
b7953727 2151#endif
53c4c00c 2152 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2153#ifdef DEBUGGING
ce333219
JH
2154 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2155 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2156 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2157#endif
02db2b7b 2158
d6a28714
JH
2159#ifdef DEBUGGING
2160 PL_regindent++;
2161#endif
2162
2163 /* Note that nextchr is a byte even in UTF */
2164 nextchr = UCHARAT(locinput);
2165 scan = prog;
2166 while (scan != NULL) {
8ba1375e 2167
2a782b5b 2168 DEBUG_r( {
d6a28714
JH
2169 SV *prop = sv_newmortal();
2170 int docolor = *PL_colors[0];
2171 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2172 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2173 /* The part of the string before starttry has one color
2174 (pref0_len chars), between starttry and current
2175 position another one (pref_len - pref0_len chars),
2176 after the current position the third one.
2177 We assume that pref0_len <= pref_len, otherwise we
2178 decrease pref0_len. */
9041c2e3 2179 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2180 ? (5 + taill) - l : locinput - PL_bostr;
2181 int pref0_len;
d6a28714 2182
df1ffd02 2183 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2184 pref_len++;
2185 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2186 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2187 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2188 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2189 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2190 l--;
d6a28714
JH
2191 if (pref0_len < 0)
2192 pref0_len = 0;
2193 if (pref0_len > pref_len)
2194 pref0_len = pref_len;
2195 regprop(prop, scan);
2a782b5b
JH
2196 {
2197 char *s0 =
df1ffd02 2198 do_utf8 ?
2a782b5b
JH
2199 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2200 pref0_len, 60, 0) :
2201 locinput - pref_len;
df1ffd02
JH
2202 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2203 char *s1 = do_utf8 ?
2a782b5b
JH
2204 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2205 pref_len - pref0_len, 60, 0) :
2206 locinput - pref_len + pref0_len;
df1ffd02
JH
2207 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2208 char *s2 = do_utf8 ?
2a782b5b
JH
2209 pv_uni_display(dsv2, (U8*)locinput,
2210 PL_regeol - locinput, 60, 0) :
2211 locinput;
df1ffd02 2212 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2213 PerlIO_printf(Perl_debug_log,
2214 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2215 (IV)(locinput - PL_bostr),
2216 PL_colors[4],
2217 len0, s0,
2218 PL_colors[5],
2219 PL_colors[2],
2220 len1, s1,
2221 PL_colors[3],
2222 (docolor ? "" : "> <"),
2223 PL_colors[0],
2224 len2, s2,
2225 PL_colors[1],
2226 15 - l - pref_len + 1,
2227 "",
2228 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2229 SvPVX(prop));
2230 }
2231 });
d6a28714
JH
2232
2233 next = scan + NEXT_OFF(scan);
2234 if (next == scan)
2235 next = NULL;
2236
2237 switch (OP(scan)) {
2238 case BOL:
12d33761
HS
2239 if (locinput == PL_bostr || (PL_multiline &&
2240 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2241 {
2242 /* regtill = regbol; */
b8c5462f
JH
2243 break;
2244 }
d6a28714
JH
2245 sayNO;
2246 case MBOL:
12d33761
HS
2247 if (locinput == PL_bostr ||
2248 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2249 {
b8c5462f
JH
2250 break;
2251 }
d6a28714
JH
2252 sayNO;
2253 case SBOL:
c2a73568 2254 if (locinput == PL_bostr)
b8c5462f 2255 break;
d6a28714
JH
2256 sayNO;
2257 case GPOS:
2258 if (locinput == PL_reg_ganch)
2259 break;
2260 sayNO;
2261 case EOL:
2262 if (PL_multiline)
2263 goto meol;
2264 else
2265 goto seol;
2266 case MEOL:
2267 meol:
2268 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2269 sayNO;
b8c5462f 2270 break;
d6a28714
JH
2271 case SEOL:
2272 seol:
2273 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2274 sayNO;
d6a28714 2275 if (PL_regeol - locinput > 1)
b8c5462f 2276 sayNO;
b8c5462f 2277 break;
d6a28714
JH
2278 case EOS:
2279 if (PL_regeol != locinput)
b8c5462f 2280 sayNO;
d6a28714 2281 break;
ffc61ed2 2282 case SANY:
d6a28714 2283 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2284 sayNO;
f33976b4
DB
2285 if (do_utf8) {
2286 locinput += PL_utf8skip[nextchr];
2287 if (locinput > PL_regeol)
2288 sayNO;
2289 nextchr = UCHARAT(locinput);
2290 }
2291 else
2292 nextchr = UCHARAT(++locinput);
2293 break;
2294 case CANY:
2295 if (!nextchr && locinput >= PL_regeol)
2296 sayNO;
b8c5462f 2297 nextchr = UCHARAT(++locinput);
a0d0e21e 2298 break;
ffc61ed2 2299 case REG_ANY:
1aa99e6b
IH
2300 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2301 sayNO;
2302 if (do_utf8) {
b8c5462f 2303 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2304 if (locinput > PL_regeol)
2305 sayNO;
a0ed51b3 2306 nextchr = UCHARAT(locinput);
a0ed51b3 2307 }
1aa99e6b
IH
2308 else
2309 nextchr = UCHARAT(++locinput);
a0ed51b3 2310 break;
d6a28714 2311 case EXACT:
cd439c50
IZ
2312 s = STRING(scan);
2313 ln = STR_LEN(scan);
1aa99e6b 2314 if (do_utf8 != (UTF!=0)) {
bc517b45 2315 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2316 char *l = locinput;
2317 char *e = s + ln;
bc517b45 2318 STRLEN ulen;
a72c7584 2319
5ff6fc6d
JH
2320 if (do_utf8) {
2321 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2322 while (s < e) {
2323 if (l >= PL_regeol)
5ff6fc6d
JH
2324 sayNO;
2325 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2326 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2327 sayNO;
bc517b45 2328 l += ulen;
5ff6fc6d 2329 s ++;
1aa99e6b 2330 }
5ff6fc6d
JH
2331 }
2332 else {
2333 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2334 while (s < e) {
2335 if (l >= PL_regeol)
2336 sayNO;
5ff6fc6d 2337 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2338 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2339 sayNO;
bc517b45 2340 s += ulen;
a72c7584 2341 l ++;
1aa99e6b 2342 }
5ff6fc6d 2343 }
1aa99e6b
IH
2344 locinput = l;
2345 nextchr = UCHARAT(locinput);
2346 break;
2347 }
bc517b45 2348 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2349 /* Inline the first character, for speed. */
2350 if (UCHARAT(s) != nextchr)
2351 sayNO;
2352 if (PL_regeol - locinput < ln)
2353 sayNO;
2354 if (ln > 1 && memNE(s, locinput, ln))
2355 sayNO;
2356 locinput += ln;
2357 nextchr = UCHARAT(locinput);
2358 break;
2359 case EXACTFL:
b8c5462f
JH
2360 PL_reg_flags |= RF_tainted;
2361 /* FALL THROUGH */
d6a28714 2362 case EXACTF:
cd439c50
IZ
2363 s = STRING(scan);
2364 ln = STR_LEN(scan);
d6a28714 2365
d07ddd77
JH
2366 if (do_utf8 || UTF) {
2367 /* Either target or the pattern are utf8. */
d6a28714 2368 char *l = locinput;
d07ddd77 2369 char *e = PL_regeol;
bc517b45 2370
d07ddd77
JH
2371 if (ibcmp_utf8(s, 0, ln, do_utf8,
2372 l, &e, 0, UTF))
2373 sayNO;
2374 locinput = e;
2375 nextchr = UCHARAT(locinput);
2376 break;
a0ed51b3 2377 }
d6a28714 2378
bc517b45
JH
2379 /* Neither the target and the pattern are utf8. */
2380
d6a28714
JH
2381 /* Inline the first character, for speed. */
2382 if (UCHARAT(s) != nextchr &&
2383 UCHARAT(s) != ((OP(scan) == EXACTF)
2384 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2385 sayNO;
d6a28714 2386 if (PL_regeol - locinput < ln)
b8c5462f 2387 sayNO;
d6a28714
JH
2388 if (ln > 1 && (OP(scan) == EXACTF
2389 ? ibcmp(s, locinput, ln)
2390 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2391 sayNO;
d6a28714
JH
2392 locinput += ln;
2393 nextchr = UCHARAT(locinput);
a0d0e21e 2394 break;
d6a28714 2395 case ANYOF:
ffc61ed2 2396 if (do_utf8) {
9e55ce06
JH
2397 STRLEN inclasslen = PL_regeol - locinput;
2398
2399 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
ffc61ed2
JH
2400 sayNO;
2401 if (locinput >= PL_regeol)
2402 sayNO;
9e55ce06 2403 locinput += inclasslen;
b8c5462f 2404 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2405 }
2406 else {
2407 if (nextchr < 0)
2408 nextchr = UCHARAT(locinput);
2409 if (!reginclass(scan, (U8*)locinput, do_utf8))
2410 sayNO;
2411 if (!nextchr && locinput >= PL_regeol)
2412 sayNO;
2413 nextchr = UCHARAT(++locinput);
2414 }
b8c5462f 2415 break;
d6a28714 2416 case ALNUML:
b8c5462f
JH
2417 PL_reg_flags |= RF_tainted;
2418 /* FALL THROUGH */
d6a28714 2419 case ALNUM:
b8c5462f 2420 if (!nextchr)
4633a7c4 2421 sayNO;
ffc61ed2 2422 if (do_utf8) {
ad24be35 2423 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2424 if (!(OP(scan) == ALNUM
3568d838 2425 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2426 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2427 {
2428 sayNO;
a0ed51b3 2429 }
b8c5462f 2430 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2431 nextchr = UCHARAT(locinput);
2432 break;
2433 }
ffc61ed2 2434 if (!(OP(scan) == ALNUM
d6a28714 2435 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2436 sayNO;
b8c5462f 2437 nextchr = UCHARAT(++locinput);
a0d0e21e 2438 break;
d6a28714 2439 case NALNUML:
b8c5462f
JH
2440 PL_reg_flags |= RF_tainted;
2441 /* FALL THROUGH */
d6a28714
JH
2442 case NALNUM:
2443 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2444 sayNO;
ffc61ed2 2445 if (do_utf8) {
8269fa76 2446 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2447 if (OP(scan) == NALNUM
3568d838 2448 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2449 : isALNUM_LC_utf8((U8*)locinput))
2450 {
b8c5462f 2451 sayNO;
d6a28714 2452 }
b8c5462f
JH
2453 locinput += PL_utf8skip[nextchr];
2454 nextchr = UCHARAT(locinput);
2455 break;
2456 }
ffc61ed2 2457 if (OP(scan) == NALNUM
d6a28714 2458 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2459 sayNO;
76e3520e 2460 nextchr = UCHARAT(++locinput);
a0d0e21e 2461 break;
d6a28714
JH
2462 case BOUNDL:
2463 case NBOUNDL:
3280af22 2464 PL_reg_flags |= RF_tainted;
bbce6d69 2465 /* FALL THROUGH */
d6a28714
JH
2466 case BOUND:
2467 case NBOUND:
2468 /* was last char in word? */
ffc61ed2 2469 if (do_utf8) {
12d33761
HS
2470 if (locinput == PL_bostr)
2471 ln = '\n';
ffc61ed2
JH
2472 else {
2473 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2474
2b9d42f0 2475 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2476 }
2477 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2478 ln = isALNUM_uni(ln);
8269fa76 2479 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2480 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2481 }
2482 else {
9041c2e3 2483 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2484 n = isALNUM_LC_utf8((U8*)locinput);
2485 }
a0ed51b3 2486 }
d6a28714 2487 else {
12d33761
HS
2488 ln = (locinput != PL_bostr) ?
2489 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2490 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2491 ln = isALNUM(ln);
2492 n = isALNUM(nextchr);
2493 }
2494 else {
2495 ln = isALNUM_LC(ln);
2496 n = isALNUM_LC(nextchr);
2497 }
d6a28714 2498 }
ffc61ed2
JH
2499 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2500 OP(scan) == BOUNDL))
2501 sayNO;
a0ed51b3 2502 break;
d6a28714 2503 case SPACEL:
3280af22 2504 PL_reg_flags |= RF_tainted;
bbce6d69 2505 /* FALL THROUGH */
d6a28714 2506 case SPACE:
9442cb0e 2507 if (!nextchr)
4633a7c4 2508 sayNO;
1aa99e6b 2509 if (do_utf8) {
fd400ab9 2510 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2511 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2512 if (!(OP(scan) == SPACE
3568d838 2513 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2514 : isSPACE_LC_utf8((U8*)locinput)))
2515 {
2516 sayNO;
2517 }
2518 locinput += PL_utf8skip[nextchr];
2519 nextchr = UCHARAT(locinput);
2520 break;
d6a28714 2521 }
ffc61ed2
JH
2522 if (!(OP(scan) == SPACE
2523 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2524 sayNO;
2525 nextchr = UCHARAT(++locinput);
2526 }
2527 else {
2528 if (!(OP(scan) == SPACE
2529 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2530 sayNO;
2531 nextchr = UCHARAT(++locinput);
a0ed51b3 2532 }
a0ed51b3 2533 break;
d6a28714 2534 case NSPACEL:
3280af22 2535 PL_reg_flags |= RF_tainted;
bbce6d69 2536 /* FALL THROUGH */
d6a28714 2537 case NSPACE:
9442cb0e 2538 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2539 sayNO;
1aa99e6b 2540 if (do_utf8) {
8269fa76 2541 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2542 if (OP(scan) == NSPACE
3568d838 2543 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2544 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2545 {
2546 sayNO;
2547 }
2548 locinput += PL_utf8skip[nextchr];
2549 nextchr = UCHARAT(locinput);
2550 break;
a0ed51b3 2551 }
ffc61ed2 2552 if (OP(scan) == NSPACE
d6a28714 2553 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2554 sayNO;
76e3520e 2555 nextchr = UCHARAT(++locinput);
a0d0e21e 2556 break;
d6a28714 2557 case DIGITL:
a0ed51b3
LW
2558 PL_reg_flags |= RF_tainted;
2559 /* FALL THROUGH */
d6a28714 2560 case DIGIT:
9442cb0e 2561 if (!nextchr)
a0ed51b3 2562 sayNO;
1aa99e6b 2563 if (do_utf8) {
8269fa76 2564 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2565 if (!(OP(scan) == DIGIT
3568d838 2566 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2567 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2568 {
a0ed51b3 2569 sayNO;
dfe13c55 2570 }
6f06b55f 2571 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2572 nextchr = UCHARAT(locinput);
2573 break;
2574 }
ffc61ed2 2575 if (!(OP(scan) == DIGIT
9442cb0e 2576 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2577 sayNO;
2578 nextchr = UCHARAT(++locinput);
2579 break;
d6a28714 2580 case NDIGITL:
b8c5462f
JH
2581 PL_reg_flags |= RF_tainted;
2582 /* FALL THROUGH */
d6a28714 2583 case NDIGIT:
9442cb0e 2584 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2585 sayNO;
1aa99e6b 2586 if (do_utf8) {
8269fa76 2587 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2588 if (OP(scan) == NDIGIT
3568d838 2589 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2590 : isDIGIT_LC_utf8((U8*)locinput))
2591 {
a0ed51b3 2592 sayNO;
9442cb0e 2593 }
6f06b55f 2594 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2595 nextchr = UCHARAT(locinput);
2596 break;
2597 }
ffc61ed2 2598 if (OP(scan) == NDIGIT
9442cb0e 2599 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2600 sayNO;
2601 nextchr = UCHARAT(++locinput);
2602 break;
2603 case CLUMP:
b7c83a7e 2604 if (locinput >= PL_regeol)
a0ed51b3 2605 sayNO;
b7c83a7e
JH
2606 if (do_utf8) {
2607 LOAD_UTF8_CHARCLASS(mark,"~");
2608 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2609 sayNO;
2610 locinput += PL_utf8skip[nextchr];
2611 while (locinput < PL_regeol &&
2612 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2613 locinput += UTF8SKIP(locinput);
2614 if (locinput > PL_regeol)
2615 sayNO;
eb08e2da
JH
2616 }
2617 else
2618 locinput++;
a0ed51b3
LW
2619 nextchr = UCHARAT(locinput);
2620 break;
c8756f30 2621 case REFFL:
3280af22 2622 PL_reg_flags |= RF_tainted;
c8756f30 2623 /* FALL THROUGH */
c277df42 2624 case REF:
c8756f30 2625 case REFF:
c277df42 2626 n = ARG(scan); /* which paren pair */
cf93c79d 2627 ln = PL_regstartp[n];
2c2d71f5 2628 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2629 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2630 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2631 if (ln == PL_regendp[n])
a0d0e21e 2632 break;
a0ed51b3 2633
cf93c79d 2634 s = PL_bostr + ln;
1aa99e6b 2635 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2636 char *l = locinput;
cf93c79d 2637 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2638 /*
2639 * Note that we can't do the "other character" lookup trick as
2640 * in the 8-bit case (no pun intended) because in Unicode we
2641 * have to map both upper and title case to lower case.
2642 */
2643 if (OP(scan) == REFF) {
a2a2844f 2644 STRLEN ulen1, ulen2;
e7ae6809
JH
2645 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2646 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2647 while (s < e) {
2648 if (l >= PL_regeol)
2649 sayNO;
a2a2844f
JH
2650 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2651 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2652 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2653 sayNO;
a2a2844f
JH
2654 s += ulen1;
2655 l += ulen2;
a0ed51b3
LW
2656 }
2657 }
2658 locinput = l;
2659 nextchr = UCHARAT(locinput);
2660 break;
2661 }
2662
a0d0e21e 2663 /* Inline the first character, for speed. */
76e3520e 2664 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2665 (OP(scan) == REF ||
2666 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2667 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2668 sayNO;
cf93c79d 2669 ln = PL_regendp[n] - ln;
3280af22 2670 if (locinput + ln > PL_regeol)
4633a7c4 2671 sayNO;
c8756f30
AK
2672 if (ln > 1 && (OP(scan) == REF
2673 ? memNE(s, locinput, ln)
2674 : (OP(scan) == REFF
2675 ? ibcmp(s, locinput, ln)
2676 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2677 sayNO;
a0d0e21e 2678 locinput += ln;
76e3520e 2679 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2680 break;
2681
2682 case NOTHING:
c277df42 2683 case TAIL:
a0d0e21e
LW
2684 break;
2685 case BACK:
2686 break;
c277df42
IZ
2687 case EVAL:
2688 {
2689 dSP;
533c011a 2690 OP_4tree *oop = PL_op;
3280af22
NIS
2691 COP *ocurcop = PL_curcop;
2692 SV **ocurpad = PL_curpad;
c277df42 2693 SV *ret;
9041c2e3 2694
c277df42 2695 n = ARG(scan);
533c011a 2696 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2697 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2698 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2699 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2700
8e5e9ebe
RGS
2701 {
2702 SV **before = SP;
2703 CALLRUNOPS(aTHX); /* Scalar context. */
2704 SPAGAIN;
2705 if (SP == before)
2706 ret = Nullsv; /* protect against empty (?{}) blocks. */
2707 else {
2708 ret = POPs;
2709 PUTBACK;
2710 }
2711 }
2712
0f5d15d6
IZ
2713 PL_op = oop;
2714 PL_curpad = ocurpad;
2715 PL_curcop = ocurcop;
c277df42 2716 if (logical) {
0f5d15d6
IZ
2717 if (logical == 2) { /* Postponed subexpression. */
2718 regexp *re;
22c35a8c 2719 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2720 re_cc_state state;
0f5d15d6
IZ
2721 CHECKPOINT cp, lastcp;
2722
2723 if(SvROK(ret) || SvRMAGICAL(ret)) {
2724 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2725
2726 if(SvMAGICAL(sv))
14befaf4 2727 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2728 }
2729 if (mg) {
2730 re = (regexp *)mg->mg_obj;
df0003d4 2731 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2732 }
2733 else {
2734 STRLEN len;
2735 char *t = SvPV(ret, len);
2736 PMOP pm;
2737 char *oprecomp = PL_regprecomp;
2738 I32 osize = PL_regsize;
2739 I32 onpar = PL_regnpar;
2740
5fcd1c1b 2741 Zero(&pm, 1, PMOP);
cea2e8a9 2742 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2743 if (!(SvFLAGS(ret)
0f5d15d6 2744 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2745 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2746 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2747 PL_regprecomp = oprecomp;
2748 PL_regsize = osize;
2749 PL_regnpar = onpar;
2750 }
2751 DEBUG_r(
9041c2e3 2752 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2753 "Entering embedded `%s%.60s%s%s'\n",
2754 PL_colors[0],
2755 re->precomp,
2756 PL_colors[1],
2757 (strlen(re->precomp) > 60 ? "..." : ""))
2758 );
2759 state.node = next;
2760 state.prev = PL_reg_call_cc;
2761 state.cc = PL_regcc;
2762 state.re = PL_reg_re;
2763
2ab05381 2764 PL_regcc = 0;
9041c2e3 2765
0f5d15d6 2766 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2767 REGCP_SET(lastcp);
0f5d15d6
IZ
2768 cache_re(re);
2769 state.ss = PL_savestack_ix;
2770 *PL_reglastparen = 0;
a01268b5 2771 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2772 PL_reg_call_cc = &state;
2773 PL_reginput = locinput;
2c2d71f5
JH
2774
2775 /* XXXX This is too dramatic a measure... */
2776 PL_reg_maxiter = 0;
2777
0f5d15d6 2778 if (regmatch(re->program + 1)) {
2c914db6
IZ
2779 /* Even though we succeeded, we need to restore
2780 global variables, since we may be wrapped inside
2781 SUSPEND, thus the match may be not finished yet. */
2782
2783 /* XXXX Do this only if SUSPENDed? */
2784 PL_reg_call_cc = state.prev;
2785 PL_regcc = state.cc;
2786 PL_reg_re = state.re;
2787 cache_re(PL_reg_re);
2788
2789 /* XXXX This is too dramatic a measure... */
2790 PL_reg_maxiter = 0;
2791
2792 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2793 ReREFCNT_dec(re);
2794 regcpblow(cp);
2795 sayYES;
2796 }
0f5d15d6 2797 ReREFCNT_dec(re);
02db2b7b 2798 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2799 regcppop();
2800 PL_reg_call_cc = state.prev;
2801 PL_regcc = state.cc;
2802 PL_reg_re = state.re;
d3790889 2803 cache_re(PL_reg_re);
2c2d71f5
JH
2804
2805 /* XXXX This is too dramatic a measure... */
2806 PL_reg_maxiter = 0;
2807
8e514ae6 2808 logical = 0;
0f5d15d6
IZ
2809 sayNO;
2810 }
c277df42 2811 sw = SvTRUE(ret);
0f5d15d6 2812 logical = 0;
a0ed51b3
LW
2813 }
2814 else
3280af22 2815 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2816 break;
2817 }
a0d0e21e 2818 case OPEN:
c277df42 2819 n = ARG(scan); /* which paren pair */
3280af22
NIS
2820 PL_reg_start_tmp[n] = locinput;
2821 if (n > PL_regsize)
2822 PL_regsize = n;
a0d0e21e
LW
2823 break;
2824 case CLOSE:
c277df42 2825 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2826 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2827 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2828 if (n > *PL_reglastparen)
2829 *PL_reglastparen = n;
a01268b5 2830 *PL_reglastcloseparen = n;
a0d0e21e 2831 break;
c277df42
IZ
2832 case GROUPP:
2833 n = ARG(scan); /* which paren pair */
cf93c79d 2834 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2835 break;
2836 case IFTHEN:
2c2d71f5 2837 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2838 if (sw)
2839 next = NEXTOPER(NEXTOPER(scan));
2840 else {
2841 next = scan + ARG(scan);
2842 if (OP(next) == IFTHEN) /* Fake one. */
2843 next = NEXTOPER(NEXTOPER(next));
2844 }
2845 break;
2846 case LOGICAL:
0f5d15d6 2847 logical = scan->flags;
c277df42 2848 break;
2ab05381
IZ
2849/*******************************************************************
2850 PL_regcc contains infoblock about the innermost (...)* loop, and
2851 a pointer to the next outer infoblock.
2852
2853 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2854
2855 1) After matching X, regnode for CURLYX is processed;
2856
9041c2e3 2857 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2858 regmatch() recursively with the starting point at WHILEM node;
2859
2860 3) Each hit of WHILEM node tries to match A and Z (in the order
2861 depending on the current iteration, min/max of {min,max} and
2862 greediness). The information about where are nodes for "A"
2863 and "Z" is read from the infoblock, as is info on how many times "A"
2864 was already matched, and greediness.
2865
2866 4) After A matches, the same WHILEM node is hit again.
2867
2868 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2869 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2870 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2871 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2872 of the external loop.
2873
2874 Currently present infoblocks form a tree with a stem formed by PL_curcc
2875 and whatever it mentions via ->next, and additional attached trees
2876 corresponding to temporarily unset infoblocks as in "5" above.
2877
9041c2e3 2878 In the following picture infoblocks for outer loop of
2ab05381
IZ
2879 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2880 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2881 infoblocks are drawn below the "reset" infoblock.
2882
2883 In fact in the picture below we do not show failed matches for Z and T
2884 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2885 more obvious *why* one needs to *temporary* unset infoblocks.]
2886
2887 Matched REx position InfoBlocks Comment
2888 (Y(A)*?Z)*?T x
2889 Y(A)*?Z)*?T x <- O
2890 Y (A)*?Z)*?T x <- O
2891 Y A)*?Z)*?T x <- O <- I
2892 YA )*?Z)*?T x <- O <- I
2893 YA A)*?Z)*?T x <- O <- I
2894 YAA )*?Z)*?T x <- O <- I
2895 YAA Z)*?T x <- O # Temporary unset I
2896 I
2897
2898 YAAZ Y(A)*?Z)*?T x <- O
2899 I
2900
2901 YAAZY (A)*?Z)*?T x <- O
2902 I
2903
2904 YAAZY A)*?Z)*?T x <- O <- I
2905 I
2906
2907 YAAZYA )*?Z)*?T x <- O <- I
2908 I
2909
2910 YAAZYA Z)*?T x <- O # Temporary unset I
2911 I,I
2912
2913 YAAZYAZ )*?T x <- O
2914 I,I
2915
2916 YAAZYAZ T x # Temporary unset O
2917 O
2918 I,I
2919
2920 YAAZYAZT x
2921 O
2922 I,I
2923 *******************************************************************/
a0d0e21e
LW
2924 case CURLYX: {
2925 CURCUR cc;
3280af22 2926 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2927 /* No need to save/restore up to this paren */
2928 I32 parenfloor = scan->flags;
c277df42
IZ
2929
2930 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2931 next += ARG(next);
3280af22
NIS
2932 cc.oldcc = PL_regcc;
2933 PL_regcc = &cc;
cb434fcc
IZ
2934 /* XXXX Probably it is better to teach regpush to support
2935 parenfloor > PL_regsize... */
2936 if (parenfloor > *PL_reglastparen)
2937 parenfloor = *PL_reglastparen; /* Pessimization... */
2938 cc.parenfloor = parenfloor;
a0d0e21e
LW
2939 cc.cur = -1;
2940 cc.min = ARG1(scan);
2941 cc.max = ARG2(scan);
c277df42 2942 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2943 cc.next = next;
2944 cc.minmod = minmod;
2945 cc.lastloc = 0;
3280af22 2946 PL_reginput = locinput;
a0d0e21e
LW
2947 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2948 regcpblow(cp);
3280af22 2949 PL_regcc = cc.oldcc;
4633a7c4 2950 saySAME(n);
a0d0e21e
LW
2951 }
2952 /* NOT REACHED */
2953 case WHILEM: {
2954 /*
2955 * This is really hard to understand, because after we match
2956 * what we're trying to match, we must make sure the rest of
2c2d71f5 2957 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2958 * to go back UP the parse tree by recursing ever deeper. And
2959 * if it fails, we have to reset our parent's current state
2960 * that we can try again after backing off.
2961 */
2962
c277df42 2963 CHECKPOINT cp, lastcp;
3280af22 2964 CURCUR* cc = PL_regcc;
c277df42
IZ
2965 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2966
4633a7c4 2967 n = cc->cur + 1; /* how many we know we matched */
3280af22 2968 PL_reginput = locinput;
a0d0e21e 2969
c277df42 2970 DEBUG_r(
9041c2e3
NIS
2971 PerlIO_printf(Perl_debug_log,
2972 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2973 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2974 (long)n, (long)cc->min,
c277df42
IZ
2975 (long)cc->max, (long)cc)
2976 );
4633a7c4 2977
a0d0e21e
LW
2978 /* If degenerate scan matches "", assume scan done. */
2979
579cf2c3 2980 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2981 PL_regcc = cc->oldcc;
2ab05381
IZ
2982 if (PL_regcc)
2983 ln = PL_regcc->cur;
c277df42 2984 DEBUG_r(
c3464db5
DD
2985 PerlIO_printf(Perl_debug_log,
2986 "%*s empty match detected, try continuation...\n",
3280af22 2987 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2988 );
a0d0e21e 2989 if (regmatch(cc->next))
4633a7c4 2990 sayYES;
2ab05381
IZ
2991 if (PL_regcc)
2992 PL_regcc->cur = ln;
3280af22 2993 PL_regcc = cc;
4633a7c4 2994 sayNO;
a0d0e21e
LW
2995 }
2996
2997 /* First just match a string of min scans. */
2998
2999 if (n < cc->min) {
3000 cc->cur = n;
3001 cc->lastloc = locinput;
4633a7c4
LW
3002 if (regmatch(cc->scan))
3003 sayYES;
3004 cc->cur = n - 1;
c277df42 3005 cc->lastloc = lastloc;
4633a7c4 3006 sayNO;
a0d0e21e
LW
3007 }
3008
2c2d71f5
JH
3009 if (scan->flags) {
3010 /* Check whether we already were at this position.
3011 Postpone detection until we know the match is not
3012 *that* much linear. */
3013 if (!PL_reg_maxiter) {
3014 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3015 PL_reg_leftiter = PL_reg_maxiter;
3016 }
3017 if (PL_reg_leftiter-- == 0) {
3018 I32 size = (PL_reg_maxiter + 7)/8;
3019 if (PL_reg_poscache) {
3020 if (PL_reg_poscache_size < size) {
3021 Renew(PL_reg_poscache, size, char);
3022 PL_reg_poscache_size = size;
3023 }
3024 Zero(PL_reg_poscache, size, char);
3025 }
3026 else {
3027 PL_reg_poscache_size = size;
3028 Newz(29, PL_reg_poscache, size, char);
3029 }
3030 DEBUG_r(
3031 PerlIO_printf(Perl_debug_log,
3032 "%sDetected a super-linear match, switching on caching%s...\n",
3033 PL_colors[4], PL_colors[5])
3034 );
3035 }
3036 if (PL_reg_leftiter < 0) {
3037 I32 o = locinput - PL_bostr, b;
3038
3039 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3040 b = o % 8;
3041 o /= 8;
3042 if (PL_reg_poscache[o] & (1<<b)) {
3043 DEBUG_r(
3044 PerlIO_printf(Perl_debug_log,
3045 "%*s already tried at this position...\n",
3046 REPORT_CODE_OFF+PL_regindent*2, "")
3047 );
7821416a 3048 sayNO_SILENT;
2c2d71f5
JH
3049 }
3050 PL_reg_poscache[o] |= (1<<b);
3051 }
3052 }
3053
a0d0e21e
LW
3054 /* Prefer next over scan for minimal matching. */
3055
3056 if (cc->minmod) {
3280af22 3057 PL_regcc = cc->oldcc;
2ab05381
IZ
3058 if (PL_regcc)
3059 ln = PL_regcc->cur;
5f05dabc 3060 cp = regcppush(cc->parenfloor);
02db2b7b 3061 REGCP_SET(lastcp);
5f05dabc 3062 if (regmatch(cc->next)) {
c277df42 3063 regcpblow(cp);
4633a7c4 3064 sayYES; /* All done. */
5f05dabc 3065 }
02db2b7b 3066 REGCP_UNWIND(lastcp);
5f05dabc 3067 regcppop();
2ab05381
IZ
3068 if (PL_regcc)
3069 PL_regcc->cur = ln;
3280af22 3070 PL_regcc = cc;
a0d0e21e 3071
c277df42 3072 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3073 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3074 && !(PL_reg_flags & RF_warned)) {
3075 PL_reg_flags |= RF_warned;
e476b1b5 3076 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3077 "Complex regular subexpression recursion",
3078 REG_INFTY - 1);
c277df42 3079 }
4633a7c4 3080 sayNO;
c277df42 3081 }
a687059c 3082
c277df42 3083 DEBUG_r(
c3464db5
DD
3084 PerlIO_printf(Perl_debug_log,
3085 "%*s trying longer...\n",
3280af22 3086 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3087 );
a0d0e21e 3088 /* Try scanning more and see if it helps. */
3280af22 3089 PL_reginput = locinput;
a0d0e21e
LW
3090 cc->cur = n;
3091 cc->lastloc = locinput;
5f05dabc 3092 cp = regcppush(cc->parenfloor);
02db2b7b 3093 REGCP_SET(lastcp);
5f05dabc 3094 if (regmatch(cc->scan)) {
c277df42 3095 regcpblow(cp);
4633a7c4 3096 sayYES;
5f05dabc 3097 }
02db2b7b 3098 REGCP_UNWIND(lastcp);
5f05dabc 3099 regcppop();
4633a7c4 3100 cc->cur = n - 1;
c277df42 3101 cc->lastloc = lastloc;
4633a7c4 3102 sayNO;
a0d0e21e
LW
3103 }
3104
3105 /* Prefer scan over next for maximal matching. */
3106
3107 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3108 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3109 cc->cur = n;
3110 cc->lastloc = locinput;
02db2b7b 3111 REGCP_SET(lastcp);
5f05dabc 3112 if (regmatch(cc->scan)) {
c277df42 3113 regcpblow(cp);
4633a7c4 3114 sayYES;
5f05dabc 3115 }
02db2b7b 3116 REGCP_UNWIND(lastcp);
a0d0e21e 3117 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3118 PL_reginput = locinput;
c277df42 3119 DEBUG_r(
c3464db5
DD
3120 PerlIO_printf(Perl_debug_log,
3121 "%*s failed, try continuation...\n",
3280af22 3122 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3123 );
3124 }
9041c2e3 3125 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3126 && !(PL_reg_flags & RF_warned)) {
3280af22 3127 PL_reg_flags |= RF_warned;
e476b1b5 3128 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3129 "Complex regular subexpression recursion",
3130 REG_INFTY - 1);
a0d0e21e
LW
3131 }
3132
3133 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3134 PL_regcc = cc->oldcc;
2ab05381
IZ
3135 if (PL_regcc)
3136 ln = PL_regcc->cur;
a0d0e21e 3137 if (regmatch(cc->next))
4633a7c4 3138 sayYES;
2ab05381
IZ
3139 if (PL_regcc)
3140 PL_regcc->cur = ln;
3280af22 3141 PL_regcc = cc;
4633a7c4 3142 cc->cur = n - 1;
c277df42 3143 cc->lastloc = lastloc;
4633a7c4 3144 sayNO;
a0d0e21e
LW
3145 }
3146 /* NOT REACHED */
9041c2e3 3147 case BRANCHJ:
c277df42
IZ
3148 next = scan + ARG(scan);
3149 if (next == scan)
3150 next = NULL;
3151 inner = NEXTOPER(NEXTOPER(scan));
3152 goto do_branch;
9041c2e3 3153 case BRANCH:
c277df42
IZ
3154 inner = NEXTOPER(scan);
3155 do_branch:
3156 {
c277df42
IZ
3157 c1 = OP(scan);
3158 if (OP(next) != c1) /* No choice. */
3159 next = inner; /* Avoid recursion. */
a0d0e21e 3160 else {
02db2b7b
IZ
3161 I32 lastparen = *PL_reglastparen;
3162 I32 unwind1;
3163 re_unwind_branch_t *uw;
3164
3165 /* Put unwinding data on stack */
3166 unwind1 = SSNEWt(1,re_unwind_branch_t);
3167 uw = SSPTRt(unwind1,re_unwind_branch_t);
3168 uw->prev = unwind;
3169 unwind = unwind1;
3170 uw->type = ((c1 == BRANCH)
3171 ? RE_UNWIND_BRANCH
3172 : RE_UNWIND_BRANCHJ);
3173 uw->lastparen = lastparen;
3174 uw->next = next;
3175 uw->locinput = locinput;
3176 uw->nextchr = nextchr;
3177#ifdef DEBUGGING
3178 uw->regindent = ++PL_regindent;
3179#endif
c277df42 3180
02db2b7b
IZ
3181 REGCP_SET(uw->lastcp);
3182
3183 /* Now go into the first branch */
3184 next = inner;
a687059c 3185 }
a0d0e21e
LW
3186 }
3187 break;
3188 case MINMOD:
3189 minmod = 1;
3190 break;
c277df42
IZ
3191 case CURLYM:
3192 {
00db4c45 3193 I32 l = 0;
c277df42 3194 CHECKPOINT lastcp;
9041c2e3 3195
c277df42
IZ
3196 /* We suppose that the next guy does not need
3197 backtracking: in particular, it is of constant length,
3198 and has no parenths to influence future backrefs. */
3199 ln = ARG1(scan); /* min to match */
3200 n = ARG2(scan); /* max to match */
c277df42
IZ
3201 paren = scan->flags;
3202 if (paren) {
3280af22
NIS
3203 if (paren > PL_regsize)
3204 PL_regsize = paren;
3205 if (paren > *PL_reglastparen)
3206 *PL_reglastparen = paren;
c277df42 3207 }
dc45a647 3208 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3209 if (paren)
3210 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3211 PL_reginput = locinput;
c277df42
IZ
3212 if (minmod) {
3213 minmod = 0;
3214 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3215 sayNO;
f31a99c8
HS
3216 /* if we matched something zero-length we don't need to
3217 backtrack - capturing parens are already defined, so
3218 the caveat in the maximal case doesn't apply
3219
3220 XXXX if ln == 0, we can redo this check first time
3221 through the following loop
3222 */
3223 if (ln && l == 0)
3224 n = ln; /* don't backtrack */
3280af22 3225 locinput = PL_reginput;
cca55fe3 3226 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3227 regnode *text_node = next;
3228
cca55fe3 3229 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3230
cca55fe3 3231 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3232 else {
cca55fe3
JP
3233 if (PL_regkind[(U8)OP(text_node)] == REF) {
3234 I32 n, ln;
3235 n = ARG(text_node); /* which paren pair */
3236 ln = PL_regstartp[n];
3237 /* assume yes if we haven't seen CLOSEn */
3238 if (
3239 *PL_reglastparen < n ||
3240 ln == -1 ||
3241 ln == PL_regendp[n]
3242 ) {
3243 c1 = c2 = -1000;
3244 goto assume_ok_MM;
3245 }
3246 c1 = *(PL_bostr + ln);
3247 }
3248 else { c1 = (U8)*STRING(text_node); }
af5decee 3249 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3250 c2 = PL_fold[c1];
af5decee 3251 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3252 c2 = PL_fold_locale[c1];
3253 else
3254 c2 = c1;
3255 }
a0ed51b3
LW
3256 }
3257 else
c277df42 3258 c1 = c2 = -1000;
cca55fe3 3259 assume_ok_MM:
02db2b7b 3260 REGCP_SET(lastcp);
5f4b28b2 3261 /* This may be improved if l == 0. */
c277df42
IZ
3262 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3263 /* If it could work, try it. */
3264 if (c1 == -1000 ||
3280af22
NIS
3265 UCHARAT(PL_reginput) == c1 ||
3266 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3267 {
3268 if (paren) {
f31a99c8 3269 if (ln) {
cf93c79d
IZ
3270 PL_regstartp[paren] =
3271 HOPc(PL_reginput, -l) - PL_bostr;
3272 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3273 }
3274 else
cf93c79d 3275 PL_regendp[paren] = -1;
c277df42
IZ
3276 }
3277 if (regmatch(next))
3278 sayYES;
02db2b7b 3279 REGCP_UNWIND(lastcp);
c277df42
IZ
3280 }
3281 /* Couldn't or didn't -- move forward. */
3280af22 3282 PL_reginput = locinput;
c277df42
IZ
3283 if (regrepeat_hard(scan, 1, &l)) {
3284 ln++;
3280af22 3285 locinput = PL_reginput;
c277df42
IZ
3286 }
3287 else
3288 sayNO;
3289 }
a0ed51b3
LW
3290 }
3291 else {
c277df42 3292 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3293 /* if we matched something zero-length we don't need to
3294 backtrack, unless the minimum count is zero and we
3295 are capturing the result - in that case the capture
3296 being defined or not may affect later execution
3297 */
3298 if (n != 0 && l == 0 && !(paren && ln == 0))
3299 ln = n; /* don't backtrack */
3280af22 3300 locinput = PL_reginput;
c277df42 3301 DEBUG_r(
5c0ca799 3302 PerlIO_printf(Perl_debug_log,
faccc32b 3303 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3304 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3305 (IV) n, (IV)l)
c277df42
IZ
3306 );
3307 if (n >= ln) {
cca55fe3 3308 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3309 regnode *text_node = next;
3310
cca55fe3 3311 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3312
cca55fe3 3313 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3314 else {
cca55fe3
JP
3315 if (PL_regkind[(U8)OP(text_node)] == REF) {
3316 I32 n, ln;
3317 n = ARG(text_node); /* which paren pair */
3318 ln = PL_regstartp[n];
3319 /* assume yes if we haven't seen CLOSEn */
3320 if (
3321 *PL_reglastparen < n ||
3322 ln == -1 ||
3323 ln == PL_regendp[n]
3324 ) {
3325 c1 = c2 = -1000;
3326 goto assume_ok_REG;
3327 }
3328 c1 = *(PL_bostr + ln);
3329 }
3330 else { c1 = (U8)*STRING(text_node); }
3331
af5decee 3332 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3333 c2 = PL_fold[c1];
af5decee 3334 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3335 c2 = PL_fold_locale[c1];
3336 else
3337 c2 = c1;
3338 }
a0ed51b3
LW
3339 }
3340 else
c277df42
IZ
3341 c1 = c2 = -1000;
3342 }
cca55fe3 3343 assume_ok_REG:
02db2b7b 3344 REGCP_SET(lastcp);
c277df42
IZ
3345 while (n >= ln) {
3346 /* If it could work, try it. */
3347 if (c1 == -1000 ||
3280af22
NIS
3348 UCHARAT(PL_reginput) == c1 ||
3349 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3350 {
3351 DEBUG_r(
c3464db5 3352 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3353 "%*s trying tail with n=%"IVdf"...\n",
3354 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3355 );
3356 if (paren) {
3357 if (n) {
cf93c79d
IZ
3358 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3359 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3360 }
a0ed51b3 3361 else
cf93c79d 3362 PL_regendp[paren] = -1;
c277df42 3363 }
a0ed51b3
LW
3364 if (regmatch(next))
3365 sayYES;
02db2b7b 3366 REGCP_UNWIND(lastcp);
a0ed51b3 3367 }
c277df42
IZ
3368 /* Couldn't or didn't -- back up. */
3369 n--;
dfe13c55 3370 locinput = HOPc(locinput, -l);
3280af22 3371 PL_reginput = locinput;
c277df42
IZ
3372 }
3373 }
3374 sayNO;
3375 break;
3376 }
3377 case CURLYN:
3378 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3379 if (paren > PL_regsize)
3380 PL_regsize = paren;
3381 if (paren > *PL_reglastparen)
3382 *PL_reglastparen = paren;
c277df42
IZ
3383 ln = ARG1(scan); /* min to match */
3384 n = ARG2(scan); /* max to match */
dc45a647 3385 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3386 goto repeat;
a0d0e21e 3387 case CURLY:
c277df42 3388 paren = 0;
a0d0e21e
LW
3389 ln = ARG1(scan); /* min to match */
3390 n = ARG2(scan); /* max to match */
dc45a647 3391 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3392 goto repeat;
3393 case STAR:
3394 ln = 0;
c277df42 3395 n = REG_INFTY;
a0d0e21e 3396 scan = NEXTOPER(scan);
c277df42 3397 paren = 0;
a0d0e21e
LW
3398 goto repeat;
3399 case PLUS:
c277df42
IZ
3400 ln = 1;
3401 n = REG_INFTY;
3402 scan = NEXTOPER(scan);
3403 paren = 0;
3404 repeat:
a0d0e21e
LW
3405 /*
3406 * Lookahead to avoid useless match attempts
3407 * when we know what character comes next.
3408 */
5f80c4cf
JP
3409
3410 /*
3411 * Used to only do .*x and .*?x, but now it allows
3412 * for )'s, ('s and (?{ ... })'s to be in the way
3413 * of the quantifier and the EXACT-like node. -- japhy
3414 */
3415
cca55fe3 3416 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3417 U8 *s;
3418 regnode *text_node = next;
3419
cca55fe3 3420 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3421
cca55fe3 3422 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3423 else {
cca55fe3
JP
3424 if (PL_regkind[(U8)OP(text_node)] == REF) {
3425 I32 n, ln;
3426 n = ARG(text_node); /* which paren pair */
3427 ln = PL_regstartp[n];
3428 /* assume yes if we haven't seen CLOSEn */
3429 if (
3430 *PL_reglastparen < n ||
3431 ln == -1 ||
3432 ln == PL_regendp[n]
3433 ) {
3434 c1 = c2 = -1000;
3435 goto assume_ok_easy;
3436 }
9246c65e 3437 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3438 }
3439 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3440
3441 if (!UTF) {
3442 c2 = c1 = *s;
f65d3ee7 3443 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3444 c2 = PL_fold[c1];
f65d3ee7 3445 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3446 c2 = PL_fold_locale[c1];
1aa99e6b 3447 }
5f80c4cf 3448 else { /* UTF */
f65d3ee7 3449 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3450 STRLEN ulen1, ulen2;
e7ae6809
JH
3451 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3452 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3453
3454 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3455 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3456
3457 c1 = utf8_to_uvuni(tmpbuf1, 0);
3458 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3459 }
3460 else {
3461 c2 = c1 = utf8_to_uvchr(s, NULL);
3462 }
1aa99e6b
IH
3463 }
3464 }
bbce6d69 3465 }
a0d0e21e 3466 else
bbce6d69 3467 c1 = c2 = -1000;
cca55fe3 3468 assume_ok_easy:
3280af22 3469 PL_reginput = locinput;
a0d0e21e 3470 if (minmod) {
c277df42 3471 CHECKPOINT lastcp;
a0d0e21e
LW
3472 minmod = 0;
3473 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3474 sayNO;
a0ed51b3 3475 locinput = PL_reginput;
02db2b7b 3476 REGCP_SET(lastcp);
0fe9bf95 3477 if (c1 != -1000) {
1aa99e6b 3478 char *e; /* Should not check after this */
0fe9bf95
IZ
3479 char *old = locinput;
3480
1aa99e6b 3481 if (n == REG_INFTY) {
0fe9bf95 3482 e = PL_regeol - 1;
1aa99e6b
IH
3483 if (do_utf8)
3484 while (UTF8_IS_CONTINUATION(*(U8*)e))
3485 e--;
3486 }
3487 else if (do_utf8) {
3488 int m = n - ln;
3489 for (e = locinput;
3490 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3491 e += UTF8SKIP(e);
3492 }
3493 else {
3494 e = locinput + n - ln;
3495 if (e >= PL_regeol)
3496 e = PL_regeol - 1;
3497 }
0fe9bf95 3498 while (1) {
1aa99e6b 3499 int count;
0fe9bf95 3500 /* Find place 'next' could work */
1aa99e6b
IH
3501 if (!do_utf8) {
3502 if (c1 == c2) {
a8e8ab15
JH
3503 while (locinput <= e &&
3504 UCHARAT(locinput) != c1)
1aa99e6b
IH
3505 locinput++;
3506 } else {
9041c2e3 3507 while (locinput <= e
a8e8ab15
JH
3508 && UCHARAT(locinput) != c1
3509 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3510 locinput++;
3511 }
3512 count = locinput - old;
3513 }
3514 else {
3515 STRLEN len;
3516 if (c1 == c2) {
3517 for (count = 0;
3518 locinput <= e &&
9041c2e3 3519 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3520 count++)
3521 locinput += len;
3522
3523 } else {
3524 for (count = 0; locinput <= e; count++) {
9041c2e3 3525 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3526 if (c == c1 || c == c2)
3527 break;
9041c2e3 3528 locinput += len;
1aa99e6b
IH
3529 }
3530 }
0fe9bf95 3531 }
9041c2e3 3532 if (locinput > e)
0fe9bf95
IZ
3533 sayNO;
3534 /* PL_reginput == old now */
3535 if (locinput != old) {
3536 ln = 1; /* Did some */
1aa99e6b 3537 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3538 sayNO;
3539 }
3540 /* PL_reginput == locinput now */
29d1e993 3541 TRYPAREN(paren, ln, locinput);
0fe9bf95 3542 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3543 REGCP_UNWIND(lastcp);
0fe9bf95 3544 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3545 old = locinput;
3546 if (do_utf8)
3547 locinput += UTF8SKIP(locinput);
3548 else
3549 locinput++;
0fe9bf95
IZ
3550 }
3551 }
3552 else
c277df42 3553 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3554 UV c;
3555 if (c1 != -1000) {
3556 if (do_utf8)
9041c2e3 3557 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3558 else
9041c2e3 3559 c = UCHARAT(PL_reginput);
2390ecbc
PP
3560 /* If it could work, try it. */
3561 if (c == c1 || c == c2)
3562 {
3563 TRYPAREN(paren, n, PL_reginput);
3564 REGCP_UNWIND(lastcp);
3565 }
1aa99e6b 3566 }
a0d0e21e 3567 /* If it could work, try it. */
2390ecbc 3568 else if (c1 == -1000)
bbce6d69 3569 {
29d1e993 3570