This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
{PATCH] Fix: Re: [PATCH] B::C, perlcc.PL, B.xs, B.pm, t/TEST, C.xs
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded 403 char *s = PL_reg_match_utf8 ?
c728cb41
JH
404 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
405 strpos;
b8d68ded
JH
406 int len = PL_reg_match_utf8 ?
407 strlen(s) : strend - strpos;
2a782b5b
JH
408 if (!PL_colorset)
409 reginitcolors();
b8d68ded
JH
410 if (PL_reg_match_utf8)
411 DEBUG_r(PerlIO_printf(Perl_debug_log,
412 "UTF-8 target...\n"));
2a782b5b
JH
413 PerlIO_printf(Perl_debug_log,
414 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
415 PL_colors[4],PL_colors[5],PL_colors[0],
416 prog->precomp,
417 PL_colors[1],
418 (strlen(prog->precomp) > 60 ? "..." : ""),
419 PL_colors[0],
420 (int)(len > 60 ? 60 : len),
421 s, PL_colors[1],
422 (len > 60 ? "..." : "")
423 );
424 });
cad2e5aa 425
1aa99e6b 426 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
427 DEBUG_r(PerlIO_printf(Perl_debug_log,
428 "String too short... [re_intuit_start]\n"));
cad2e5aa 429 goto fail;
2c2d71f5 430 }
a1933d95 431 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 432 PL_regeol = strend;
653099ff 433 check = prog->check_substr;
2c2d71f5 434 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
435 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
436 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 437 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 438
7e25d62c
JH
439 if (!ml_anch) {
440 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
441 | ROPT_IMPLICIT)) /* not a real BOL */
442 /* SvCUR is not set on references: SvRV and SvPVX overlap */
443 && sv && !SvROK(sv)
444 && (strpos != strbeg)) {
445 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
446 goto fail;
447 }
448 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 449 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 450 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
451 I32 slen;
452
1aa99e6b 453 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
454 if (SvTAIL(check)) {
455 slen = SvCUR(check); /* >= 1 */
cad2e5aa 456
9041c2e3 457 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
458 || (strend - s == slen && strend[-1] != '\n')) {
459 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
460 goto fail_finish;
cad2e5aa
JH
461 }
462 /* Now should match s[0..slen-2] */
463 slen--;
653099ff 464 if (slen && (*SvPVX(check) != *s
cad2e5aa 465 || (slen > 1
653099ff 466 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
467 report_neq:
468 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
469 goto fail_finish;
470 }
cad2e5aa 471 }
653099ff
GS
472 else if (*SvPVX(check) != *s
473 || ((slen = SvCUR(check)) > 1
474 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
475 goto report_neq;
476 goto success_at_start;
7e25d62c 477 }
cad2e5aa 478 }
2c2d71f5 479 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 480 s = strpos;
2c2d71f5 481 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 482 end_shift = prog->minlen - start_shift -
653099ff 483 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 484 if (!ml_anch) {
653099ff
GS
485 I32 end = prog->check_offset_max + CHR_SVLEN(check)
486 - (SvTAIL(check) != 0);
1aa99e6b 487 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
488
489 if (end_shift < eshift)
490 end_shift = eshift;
491 }
cad2e5aa 492 }
2c2d71f5 493 else { /* Can match at random position */
cad2e5aa
JH
494 ml_anch = 0;
495 s = strpos;
2c2d71f5
JH
496 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
497 /* Should be nonnegative! */
498 end_shift = prog->minlen - start_shift -
653099ff 499 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
500 }
501
2c2d71f5 502#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 503 if (end_shift < 0)
6bbae5e6 504 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
505#endif
506
2c2d71f5
JH
507 restart:
508 /* Find a possible match in the region s..strend by looking for
509 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 510 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
511 I32 p = -1; /* Internal iterator of scream. */
512 I32 *pp = data ? data->scream_pos : &p;
513
2c2d71f5
JH
514 if (PL_screamfirst[BmRARE(check)] >= 0
515 || ( BmRARE(check) == '\n'
516 && (BmPREVIOUS(check) == SvCUR(check) - 1)
517 && SvTAIL(check) ))
9041c2e3 518 s = screaminstr(sv, check,
2c2d71f5 519 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 520 else
2c2d71f5 521 goto fail_finish;
cad2e5aa
JH
522 if (data)
523 *data->scream_olds = s;
524 }
f33976b4 525 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
526 s = fbm_instr((U8*)(s + start_shift),
527 (U8*)(strend - end_shift),
528 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 529 else
1aa99e6b
IH
530 s = fbm_instr(HOP3(s, start_shift, strend),
531 HOP3(strend, -end_shift, strbeg),
2c2d71f5 532 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
533
534 /* Update the count-of-usability, remove useless subpatterns,
535 unshift s. */
2c2d71f5
JH
536
537 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
538 (s ? "Found" : "Did not find"),
539 ((check == prog->anchored_substr) ? "anchored" : "floating"),
540 PL_colors[0],
7b0972df
JH
541 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
542 SvPVX(check),
2c2d71f5
JH
543 PL_colors[1], (SvTAIL(check) ? "$" : ""),
544 (s ? " at offset " : "...\n") ) );
545
546 if (!s)
547 goto fail_finish;
548
6eb5f6b9
JH
549 check_at = s;
550
2c2d71f5 551 /* Finish the diagnostic message */
30944b6d 552 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
553
554 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
555 Start with the other substr.
556 XXXX no SCREAM optimization yet - and a very coarse implementation
557 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
558 *always* match. Probably should be marked during compile...
559 Probably it is right to do no SCREAM here...
560 */
561
562 if (prog->float_substr && prog->anchored_substr) {
30944b6d 563 /* Take into account the "other" substring. */
2c2d71f5
JH
564 /* XXXX May be hopelessly wrong for UTF... */
565 if (!other_last)
6eb5f6b9 566 other_last = strpos;
2c2d71f5 567 if (check == prog->float_substr) {
30944b6d
IZ
568 do_other_anchored:
569 {
1aa99e6b 570 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
571 char *s1 = s;
572
2c2d71f5
JH
573 t = s - prog->check_offset_max;
574 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
575 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 576 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 577 && t > strpos)))
30944b6d 578 /* EMPTY */;
2c2d71f5
JH
579 else
580 t = strpos;
1aa99e6b 581 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
582 if (t < other_last) /* These positions already checked */
583 t = other_last;
1aa99e6b 584 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
585 if (last < last1)
586 last1 = last;
587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
588 /* On end-of-str: see comment below. */
589 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
590 HOP3(HOP3(last1, prog->anchored_offset, strend)
591 + SvCUR(prog->anchored_substr),
592 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
593 prog->anchored_substr,
594 PL_multiline ? FBMrf_MULTILINE : 0);
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
597 (s ? "Found" : "Contradicts"),
598 PL_colors[0],
7b0972df
JH
599 (int)(SvCUR(prog->anchored_substr)
600 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
601 SvPVX(prog->anchored_substr),
602 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
603 if (!s) {
604 if (last1 >= last2) {
605 DEBUG_r(PerlIO_printf(Perl_debug_log,
606 ", giving up...\n"));
607 goto fail_finish;
608 }
609 DEBUG_r(PerlIO_printf(Perl_debug_log,
610 ", trying floating at offset %ld...\n",
1aa99e6b
IH
611 (long)(HOP3c(s1, 1, strend) - i_strpos)));
612 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
613 s = HOP3c(last, 1, strend);
2c2d71f5
JH
614 goto restart;
615 }
616 else {
617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 618 (long)(s - i_strpos)));
1aa99e6b
IH
619 t = HOP3c(s, -prog->anchored_offset, strbeg);
620 other_last = HOP3c(s, 1, strend);
30944b6d 621 s = s1;
2c2d71f5
JH
622 if (t == strpos)
623 goto try_at_start;
2c2d71f5
JH
624 goto try_at_offset;
625 }
30944b6d 626 }
2c2d71f5
JH
627 }
628 else { /* Take into account the floating substring. */
629 char *last, *last1;
630 char *s1 = s;
631
1aa99e6b
IH
632 t = HOP3c(s, -start_shift, strbeg);
633 last1 = last =
634 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
635 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
636 last = HOP3c(t, prog->float_max_offset, strend);
637 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
638 if (s < other_last)
639 s = other_last;
2c2d71f5
JH
640 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
641 /* fbm_instr() takes into account exact value of end-of-str
642 if the check is SvTAIL(ed). Since false positives are OK,
643 and end-of-str is not later than strend we are OK. */
644 s = fbm_instr((unsigned char*)s,
645 (unsigned char*)last + SvCUR(prog->float_substr)
646 - (SvTAIL(prog->float_substr)!=0),
647 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
648 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
649 (s ? "Found" : "Contradicts"),
650 PL_colors[0],
7b0972df
JH
651 (int)(SvCUR(prog->float_substr)
652 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
653 SvPVX(prog->float_substr),
654 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
655 if (!s) {
656 if (last1 == last) {
657 DEBUG_r(PerlIO_printf(Perl_debug_log,
658 ", giving up...\n"));
659 goto fail_finish;
660 }
661 DEBUG_r(PerlIO_printf(Perl_debug_log,
662 ", trying anchored starting at offset %ld...\n",
30944b6d 663 (long)(s1 + 1 - i_strpos)));
803ff556 664 other_last = last;
1aa99e6b 665 s = HOP3c(t, 1, strend);
2c2d71f5
JH
666 goto restart;
667 }
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 670 (long)(s - i_strpos)));
803ff556 671 other_last = s; /* Fix this later. --Hugo */
30944b6d 672 s = s1;
2c2d71f5
JH
673 if (t == strpos)
674 goto try_at_start;
2c2d71f5
JH
675 goto try_at_offset;
676 }
677 }
cad2e5aa 678 }
2c2d71f5
JH
679
680 t = s - prog->check_offset_max;
2c2d71f5
JH
681 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
682 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
683 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
684 && t > strpos))) {
2c2d71f5
JH
685 /* Fixed substring is found far enough so that the match
686 cannot start at strpos. */
687 try_at_offset:
cad2e5aa 688 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
689 /* Eventually fbm_*() should handle this, but often
690 anchored_offset is not 0, so this check will not be wasted. */
691 /* XXXX In the code below we prefer to look for "^" even in
692 presence of anchored substrings. And we search even
693 beyond the found float position. These pessimizations
694 are historical artefacts only. */
695 find_anchor:
2c2d71f5 696 while (t < strend - prog->minlen) {
cad2e5aa 697 if (*t == '\n') {
4ee3650e 698 if (t < check_at - prog->check_offset_min) {
30944b6d 699 if (prog->anchored_substr) {
4ee3650e
GS
700 /* Since we moved from the found position,
701 we definitely contradict the found anchored
30944b6d
IZ
702 substr. Due to the above check we do not
703 contradict "check" substr.
704 Thus we can arrive here only if check substr
705 is float. Redo checking for "other"=="fixed".
706 */
9041c2e3 707 strpos = t + 1;
30944b6d
IZ
708 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
709 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
710 goto do_other_anchored;
711 }
4ee3650e
GS
712 /* We don't contradict the found floating substring. */
713 /* XXXX Why not check for STCLASS? */
cad2e5aa 714 s = t + 1;
2c2d71f5 715 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 716 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
717 goto set_useful;
718 }
4ee3650e
GS
719 /* Position contradicts check-string */
720 /* XXXX probably better to look for check-string
721 than for "\n", so one should lower the limit for t? */
722 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 723 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 724 other_last = strpos = s = t + 1;
cad2e5aa
JH
725 goto restart;
726 }
727 t++;
728 }
2c2d71f5
JH
729 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
730 PL_colors[0],PL_colors[1]));
731 goto fail_finish;
cad2e5aa 732 }
f5952150
GS
733 else {
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 }
cad2e5aa
JH
737 s = t;
738 set_useful:
2c2d71f5 739 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
740 }
741 else {
f5952150 742 /* The found string does not prohibit matching at strpos,
2c2d71f5 743 - no optimization of calling REx engine can be performed,
f5952150
GS
744 unless it was an MBOL and we are not after MBOL,
745 or a future STCLASS check will fail this. */
2c2d71f5
JH
746 try_at_start:
747 /* Even in this situation we may use MBOL flag if strpos is offset
748 wrt the start of the string. */
05b4157f 749 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 750 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
751 /* May be due to an implicit anchor of m{.*foo} */
752 && !(prog->reganch & ROPT_IMPLICIT))
753 {
cad2e5aa
JH
754 t = strpos;
755 goto find_anchor;
756 }
30944b6d 757 DEBUG_r( if (ml_anch)
f5952150
GS
758 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
759 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 760 );
2c2d71f5 761 success_at_start:
30944b6d 762 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 763 && prog->check_substr /* Could be deleted already */
cad2e5aa 764 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
765 && prog->check_substr == prog->float_substr)
766 {
cad2e5aa 767 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 768 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
769 SvREFCNT_dec(prog->check_substr);
770 prog->check_substr = Nullsv; /* disable */
771 prog->float_substr = Nullsv; /* clear */
5e39e1e5 772 check = Nullsv; /* abort */
cad2e5aa 773 s = strpos;
3cf5c195
IZ
774 /* XXXX This is a remnant of the old implementation. It
775 looks wasteful, since now INTUIT can use many
6eb5f6b9 776 other heuristics. */
cad2e5aa
JH
777 prog->reganch &= ~RE_USE_INTUIT;
778 }
779 else
780 s = strpos;
781 }
782
6eb5f6b9
JH
783 /* Last resort... */
784 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
785 if (prog->regstclass) {
786 /* minlen == 0 is possible if regstclass is \b or \B,
787 and the fixed substr is ''$.
788 Since minlen is already taken into account, s+1 is before strend;
789 accidentally, minlen >= 1 guaranties no false positives at s + 1
790 even for \b or \B. But (minlen? 1 : 0) below assumes that
791 regstclass does not come from lookahead... */
792 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
793 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 794 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 795 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 796 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 797 : 1);
6eb5f6b9 798 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
799 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
800 : (prog->float_substr
801 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
802 cl_l, strend)
803 : strend);
a1933d95 804 char *startpos = strbeg;
6eb5f6b9
JH
805
806 t = s;
76384e4a 807 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 808 PL_regdata = prog->data;
76384e4a
GS
809 PL_bostr = startpos;
810 }
f33976b4 811 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
812 if (!s) {
813#ifdef DEBUGGING
b7953727 814 char *what = 0;
6eb5f6b9
JH
815#endif
816 if (endpos == strend) {
817 DEBUG_r( PerlIO_printf(Perl_debug_log,
818 "Could not match STCLASS...\n") );
819 goto fail;
820 }
66e933ab
GS
821 DEBUG_r( PerlIO_printf(Perl_debug_log,
822 "This position contradicts STCLASS...\n") );
653099ff
GS
823 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
824 goto fail;
6eb5f6b9
JH
825 /* Contradict one of substrings */
826 if (prog->anchored_substr) {
6eb5f6b9
JH
827 if (prog->anchored_substr == check) {
828 DEBUG_r( what = "anchored" );
829 hop_and_restart:
1aa99e6b 830 s = HOP3c(t, 1, strend);
66e933ab
GS
831 if (s + start_shift + end_shift > strend) {
832 /* XXXX Should be taken into account earlier? */
833 DEBUG_r( PerlIO_printf(Perl_debug_log,
834 "Could not match STCLASS...\n") );
835 goto fail;
836 }
5e39e1e5
HS
837 if (!check)
838 goto giveup;
6eb5f6b9 839 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 840 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
841 what, (long)(s + start_shift - i_strpos)) );
842 goto restart;
843 }
66e933ab 844 /* Have both, check_string is floating */
6eb5f6b9
JH
845 if (t + start_shift >= check_at) /* Contradicts floating=check */
846 goto retry_floating_check;
847 /* Recheck anchored substring, but not floating... */
9041c2e3 848 s = check_at;
5e39e1e5
HS
849 if (!check)
850 goto giveup;
6eb5f6b9 851 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 852 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
853 (long)(other_last - i_strpos)) );
854 goto do_other_anchored;
855 }
60e71179
GS
856 /* Another way we could have checked stclass at the
857 current position only: */
858 if (ml_anch) {
859 s = t = t + 1;
5e39e1e5
HS
860 if (!check)
861 goto giveup;
60e71179 862 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
863 "Looking for /%s^%s/m starting at offset %ld...\n",
864 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 865 goto try_at_offset;
66e933ab 866 }
60e71179
GS
867 if (!prog->float_substr) /* Could have been deleted */
868 goto fail;
6eb5f6b9
JH
869 /* Check is floating subtring. */
870 retry_floating_check:
871 t = check_at - start_shift;
872 DEBUG_r( what = "floating" );
873 goto hop_and_restart;
874 }
b7953727
JH
875 if (t != s) {
876 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 877 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
878 (long)(t - i_strpos), (long)(s - i_strpos))
879 );
880 }
881 else {
882 DEBUG_r(PerlIO_printf(Perl_debug_log,
883 "Does not contradict STCLASS...\n");
884 );
885 }
6eb5f6b9 886 }
5e39e1e5
HS
887 giveup:
888 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
889 PL_colors[4], (check ? "Guessed" : "Giving up"),
890 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 891 return s;
2c2d71f5
JH
892
893 fail_finish: /* Substring not found */
66e933ab
GS
894 if (prog->check_substr) /* could be removed already */
895 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 896 fail:
2c2d71f5 897 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
898 PL_colors[4],PL_colors[5]));
899 return Nullch;
900}
9661b544 901
6eb5f6b9 902/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
903STATIC char *
904S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 905{
6eb5f6b9
JH
906 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
907 char *m;
d8093b23 908 STRLEN ln;
909 unsigned int c1;
910 unsigned int c2;
6eb5f6b9
JH
911 char *e;
912 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 913 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 914
6eb5f6b9
JH
915 /* We know what class it must start with. */
916 switch (OP(c)) {
6eb5f6b9 917 case ANYOF:
a0ed51b3 918 while (s < strend) {
ffc61ed2 919 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
920 if (tmp && (norun || regtry(prog, s)))
921 goto got_it;
922 else
923 tmp = doevery;
a0ed51b3 924 }
6eb5f6b9
JH
925 else
926 tmp = 1;
ffc61ed2 927 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 928 }
6eb5f6b9 929 break;
f33976b4
DB
930 case CANY:
931 while (s < strend) {
932 if (tmp && (norun || regtry(prog, s)))
933 goto got_it;
934 else
935 tmp = doevery;
936 s++;
937 }
938 break;
6eb5f6b9
JH
939 case EXACTF:
940 m = STRING(c);
941 ln = STR_LEN(c);
1aa99e6b 942 if (UTF) {
a2a2844f 943 STRLEN ulen1, ulen2;
e7ae6809
JH
944 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
945 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
946
947 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
948 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
949
950 c1 = utf8_to_uvuni(tmpbuf1, 0);
951 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
952 }
953 else {
954 c1 = *(U8*)m;
955 c2 = PL_fold[c1];
956 }
6eb5f6b9
JH
957 goto do_exactf;
958 case EXACTFL:
959 m = STRING(c);
960 ln = STR_LEN(c);
d8093b23 961 c1 = *(U8*)m;
6eb5f6b9
JH
962 c2 = PL_fold_locale[c1];
963 do_exactf:
93c50829 964 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 965
6eb5f6b9
JH
966 if (norun && e < s)
967 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 968
60a8b682
JH
969 /* The idea in the EXACTF* cases is to first find the
970 * first character of the EXACTF* node and then, if
971 * necessary, case-insensitively compare the full
972 * text of the node. The c1 and c2 are the first
973 * characters (though in Unicode it gets a bit
974 * more complicated because there are more cases
975 * than just upper and lower: one is really supposed
976 * to use the so-called folding case for case-insensitive
977 * matching (called "loose matching" in Unicode). */
978
1aa99e6b 979 if (do_utf8) {
575cac57
JH
980 UV c, f;
981 U8 tmpbuf [UTF8_MAXLEN+1];
982 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
983 STRLEN len, foldlen;
d7f013c8 984
09091399 985 if (c1 == c2) {
1aa99e6b 986 while (s <= e) {
575cac57 987 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
988 if ( c == c1
989 && (ln == len ||
66423254
JH
990 ibcmp_utf8(s, (char **)0, 0, do_utf8,
991 m, (char **)0, ln, UTF))
55da9344 992 && (norun || regtry(prog, s)) )
1aa99e6b 993 goto got_it;
80aecb99
JH
994 else {
995 uvchr_to_utf8(tmpbuf, c);
ac7e0132 996 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
997 if ( f != c
998 && (f == c1 || f == c2)
999 && (ln == foldlen ||
66423254
JH
1000 !ibcmp_utf8((char *) foldbuf,
1001 (char **)0, foldlen, do_utf8,
d07ddd77 1002 m,
66423254 1003 (char **)0, ln, UTF))
80aecb99
JH
1004 && (norun || regtry(prog, s)) )
1005 goto got_it;
1006 }
1aa99e6b
IH
1007 s += len;
1008 }
09091399
JH
1009 }
1010 else {
1aa99e6b 1011 while (s <= e) {
575cac57 1012 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1013
60a8b682
JH
1014 /* Handle some of the three Greek sigmas cases.
1015 * Note that not all the possible combinations
1016 * are handled here: some of them are handled
1017 * handled by the standard folding rules, and
1018 * some of them (the character class or ANYOF
1019 * cases) are handled during compiletime in
1020 * regexec.c:S_regclass(). */
880bd946
JH
1021 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1022 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1023 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1024
1025 if ( (c == c1 || c == c2)
1026 && (ln == len ||
66423254
JH
1027 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1028 m, (char **)0, ln, UTF))
55da9344 1029 && (norun || regtry(prog, s)) )
1aa99e6b 1030 goto got_it;
80aecb99
JH
1031 else {
1032 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1033 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1034 if ( f != c
1035 && (f == c1 || f == c2)
1036 && (ln == foldlen ||
1037 !ibcmp_utf8((char *)foldbuf,
66423254 1038 (char **)0, foldlen, do_utf8,
d07ddd77 1039 m,
66423254 1040 (char **)0, ln, UTF))
80aecb99
JH
1041 && (norun || regtry(prog, s)) )
1042 goto got_it;
1043 }
1aa99e6b
IH
1044 s += len;
1045 }
09091399 1046 }
1aa99e6b
IH
1047 }
1048 else {
1049 if (c1 == c2)
1050 while (s <= e) {
1051 if ( *(U8*)s == c1
1052 && (ln == 1 || !(OP(c) == EXACTF
1053 ? ibcmp(s, m, ln)
1054 : ibcmp_locale(s, m, ln)))
1055 && (norun || regtry(prog, s)) )
1056 goto got_it;
1057 s++;
1058 }
1059 else
1060 while (s <= e) {
1061 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1062 && (ln == 1 || !(OP(c) == EXACTF
1063 ? ibcmp(s, m, ln)
1064 : ibcmp_locale(s, m, ln)))
1065 && (norun || regtry(prog, s)) )
1066 goto got_it;
1067 s++;
1068 }
b3c9acc1
IZ
1069 }
1070 break;
bbce6d69 1071 case BOUNDL:
3280af22 1072 PL_reg_flags |= RF_tainted;
bbce6d69 1073 /* FALL THROUGH */
a0d0e21e 1074 case BOUND:
ffc61ed2 1075 if (do_utf8) {
12d33761 1076 if (s == PL_bostr)
ffc61ed2
JH
1077 tmp = '\n';
1078 else {
1aa99e6b 1079 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1080
0064a8a9
JH
1081 if (s > (char*)r)
1082 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1083 }
1084 tmp = ((OP(c) == BOUND ?
9041c2e3 1085 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1086 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1087 while (s < strend) {
1088 if (tmp == !(OP(c) == BOUND ?
3568d838 1089 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1090 isALNUM_LC_utf8((U8*)s)))
1091 {
1092 tmp = !tmp;
1093 if ((norun || regtry(prog, s)))
1094 goto got_it;
1095 }
1096 s += UTF8SKIP(s);
a687059c 1097 }
a0d0e21e 1098 }
667bb95a 1099 else {
12d33761 1100 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1101 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1102 while (s < strend) {
1103 if (tmp ==
1104 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1105 tmp = !tmp;
1106 if ((norun || regtry(prog, s)))
1107 goto got_it;
1108 }
1109 s++;
a0ed51b3 1110 }
a0ed51b3 1111 }
6eb5f6b9 1112 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1113 goto got_it;
1114 break;
bbce6d69 1115 case NBOUNDL:
3280af22 1116 PL_reg_flags |= RF_tainted;
bbce6d69 1117 /* FALL THROUGH */
a0d0e21e 1118 case NBOUND:
ffc61ed2 1119 if (do_utf8) {
12d33761 1120 if (s == PL_bostr)
ffc61ed2
JH
1121 tmp = '\n';
1122 else {
1aa99e6b 1123 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1124
0064a8a9
JH
1125 if (s > (char*)r)
1126 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1127 }
1128 tmp = ((OP(c) == NBOUND ?
9041c2e3 1129 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1130 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1131 while (s < strend) {
1132 if (tmp == !(OP(c) == NBOUND ?
3568d838 1133 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1134 isALNUM_LC_utf8((U8*)s)))
1135 tmp = !tmp;
1136 else if ((norun || regtry(prog, s)))
1137 goto got_it;
1138 s += UTF8SKIP(s);
1139 }
a0d0e21e 1140 }
667bb95a 1141 else {
12d33761 1142 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1143 tmp = ((OP(c) == NBOUND ?
1144 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1145 while (s < strend) {
1146 if (tmp ==
1147 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1148 tmp = !tmp;
1149 else if ((norun || regtry(prog, s)))
1150 goto got_it;
1151 s++;
1152 }
a0ed51b3 1153 }
6eb5f6b9 1154 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1155 goto got_it;
1156 break;
a0d0e21e 1157 case ALNUM:
ffc61ed2 1158 if (do_utf8) {
8269fa76 1159 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1160 while (s < strend) {
3568d838 1161 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1162 if (tmp && (norun || regtry(prog, s)))
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
bbce6d69 1167 else
ffc61ed2
JH
1168 tmp = 1;
1169 s += UTF8SKIP(s);
bbce6d69 1170 }
bbce6d69 1171 }
ffc61ed2
JH
1172 else {
1173 while (s < strend) {
1174 if (isALNUM(*s)) {
1175 if (tmp && (norun || regtry(prog, s)))
1176 goto got_it;
1177 else
1178 tmp = doevery;
1179 }
a0ed51b3 1180 else
ffc61ed2
JH
1181 tmp = 1;
1182 s++;
a0ed51b3 1183 }
a0ed51b3
LW
1184 }
1185 break;
bbce6d69 1186 case ALNUML:
3280af22 1187 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1188 if (do_utf8) {
1189 while (s < strend) {
1190 if (isALNUM_LC_utf8((U8*)s)) {
1191 if (tmp && (norun || regtry(prog, s)))
1192 goto got_it;
1193 else
1194 tmp = doevery;
1195 }
a687059c 1196 else
ffc61ed2
JH
1197 tmp = 1;
1198 s += UTF8SKIP(s);
a0d0e21e 1199 }
a0d0e21e 1200 }
ffc61ed2
JH
1201 else {
1202 while (s < strend) {
1203 if (isALNUM_LC(*s)) {
1204 if (tmp && (norun || regtry(prog, s)))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
a0ed51b3 1209 else
ffc61ed2
JH
1210 tmp = 1;
1211 s++;
a0ed51b3 1212 }
a0ed51b3
LW
1213 }
1214 break;
a0d0e21e 1215 case NALNUM:
ffc61ed2 1216 if (do_utf8) {
8269fa76 1217 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1218 while (s < strend) {
3568d838 1219 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1220 if (tmp && (norun || regtry(prog, s)))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
bbce6d69 1225 else
ffc61ed2
JH
1226 tmp = 1;
1227 s += UTF8SKIP(s);
bbce6d69 1228 }
bbce6d69 1229 }
ffc61ed2
JH
1230 else {
1231 while (s < strend) {
1232 if (!isALNUM(*s)) {
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
a0ed51b3 1238 else
ffc61ed2
JH
1239 tmp = 1;
1240 s++;
a0ed51b3 1241 }
a0ed51b3
LW
1242 }
1243 break;
bbce6d69 1244 case NALNUML:
3280af22 1245 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1246 if (do_utf8) {
1247 while (s < strend) {
1248 if (!isALNUM_LC_utf8((U8*)s)) {
1249 if (tmp && (norun || regtry(prog, s)))
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
a687059c 1254 else
ffc61ed2
JH
1255 tmp = 1;
1256 s += UTF8SKIP(s);
a687059c 1257 }
a0d0e21e 1258 }
ffc61ed2
JH
1259 else {
1260 while (s < strend) {
1261 if (!isALNUM_LC(*s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a0ed51b3 1267 else
ffc61ed2
JH
1268 tmp = 1;
1269 s++;
a0ed51b3 1270 }
a0ed51b3
LW
1271 }
1272 break;
a0d0e21e 1273 case SPACE:
ffc61ed2 1274 if (do_utf8) {
8269fa76 1275 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1276 while (s < strend) {
3568d838 1277 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1278 if (tmp && (norun || regtry(prog, s)))
1279 goto got_it;
1280 else
1281 tmp = doevery;
1282 }
a0d0e21e 1283 else
ffc61ed2
JH
1284 tmp = 1;
1285 s += UTF8SKIP(s);
2304df62 1286 }
a0d0e21e 1287 }
ffc61ed2
JH
1288 else {
1289 while (s < strend) {
1290 if (isSPACE(*s)) {
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
a0ed51b3 1296 else
ffc61ed2
JH
1297 tmp = 1;
1298 s++;
a0ed51b3 1299 }
a0ed51b3
LW
1300 }
1301 break;
bbce6d69 1302 case SPACEL:
3280af22 1303 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1304 if (do_utf8) {
1305 while (s < strend) {
1306 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1307 if (tmp && (norun || regtry(prog, s)))
1308 goto got_it;
1309 else
1310 tmp = doevery;
1311 }
bbce6d69 1312 else
ffc61ed2
JH
1313 tmp = 1;
1314 s += UTF8SKIP(s);
bbce6d69 1315 }
bbce6d69 1316 }
ffc61ed2
JH
1317 else {
1318 while (s < strend) {
1319 if (isSPACE_LC(*s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a0ed51b3 1325 else
ffc61ed2
JH
1326 tmp = 1;
1327 s++;
a0ed51b3 1328 }
a0ed51b3
LW
1329 }
1330 break;
a0d0e21e 1331 case NSPACE:
ffc61ed2 1332 if (do_utf8) {
8269fa76 1333 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1334 while (s < strend) {
3568d838 1335 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1336 if (tmp && (norun || regtry(prog, s)))
1337 goto got_it;
1338 else
1339 tmp = doevery;
1340 }
a0d0e21e 1341 else
ffc61ed2
JH
1342 tmp = 1;
1343 s += UTF8SKIP(s);
a687059c 1344 }
a0d0e21e 1345 }
ffc61ed2
JH
1346 else {
1347 while (s < strend) {
1348 if (!isSPACE(*s)) {
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0ed51b3 1354 else
ffc61ed2
JH
1355 tmp = 1;
1356 s++;
a0ed51b3 1357 }
a0ed51b3
LW
1358 }
1359 break;
bbce6d69 1360 case NSPACEL:
3280af22 1361 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1362 if (do_utf8) {
1363 while (s < strend) {
1364 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1365 if (tmp && (norun || regtry(prog, s)))
1366 goto got_it;
1367 else
1368 tmp = doevery;
1369 }
bbce6d69 1370 else
ffc61ed2
JH
1371 tmp = 1;
1372 s += UTF8SKIP(s);
bbce6d69 1373 }
bbce6d69 1374 }
ffc61ed2
JH
1375 else {
1376 while (s < strend) {
1377 if (!isSPACE_LC(*s)) {
1378 if (tmp && (norun || regtry(prog, s)))
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
a0ed51b3 1383 else
ffc61ed2
JH
1384 tmp = 1;
1385 s++;
a0ed51b3 1386 }
a0ed51b3
LW
1387 }
1388 break;
a0d0e21e 1389 case DIGIT:
ffc61ed2 1390 if (do_utf8) {
8269fa76 1391 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1392 while (s < strend) {
3568d838 1393 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1394 if (tmp && (norun || regtry(prog, s)))
1395 goto got_it;
1396 else
1397 tmp = doevery;
1398 }
a0d0e21e 1399 else
ffc61ed2
JH
1400 tmp = 1;
1401 s += UTF8SKIP(s);
2b69d0c2 1402 }
a0d0e21e 1403 }
ffc61ed2
JH
1404 else {
1405 while (s < strend) {
1406 if (isDIGIT(*s)) {
1407 if (tmp && (norun || regtry(prog, s)))
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0ed51b3 1412 else
ffc61ed2
JH
1413 tmp = 1;
1414 s++;
a0ed51b3 1415 }
a0ed51b3
LW
1416 }
1417 break;
b8c5462f
JH
1418 case DIGITL:
1419 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1420 if (do_utf8) {
1421 while (s < strend) {
1422 if (isDIGIT_LC_utf8((U8*)s)) {
1423 if (tmp && (norun || regtry(prog, s)))
1424 goto got_it;
1425 else
1426 tmp = doevery;
1427 }
b8c5462f 1428 else
ffc61ed2
JH
1429 tmp = 1;
1430 s += UTF8SKIP(s);
b8c5462f 1431 }
b8c5462f 1432 }
ffc61ed2
JH
1433 else {
1434 while (s < strend) {
1435 if (isDIGIT_LC(*s)) {
1436 if (tmp && (norun || regtry(prog, s)))
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
b8c5462f 1441 else
ffc61ed2
JH
1442 tmp = 1;
1443 s++;
b8c5462f 1444 }
b8c5462f
JH
1445 }
1446 break;
a0d0e21e 1447 case NDIGIT:
ffc61ed2 1448 if (do_utf8) {
8269fa76 1449 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1450 while (s < strend) {
3568d838 1451 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1452 if (tmp && (norun || regtry(prog, s)))
1453 goto got_it;
1454 else
1455 tmp = doevery;
1456 }
a0d0e21e 1457 else
ffc61ed2
JH
1458 tmp = 1;
1459 s += UTF8SKIP(s);
a687059c 1460 }
a0d0e21e 1461 }
ffc61ed2
JH
1462 else {
1463 while (s < strend) {
1464 if (!isDIGIT(*s)) {
1465 if (tmp && (norun || regtry(prog, s)))
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0ed51b3 1470 else
ffc61ed2
JH
1471 tmp = 1;
1472 s++;
a0ed51b3 1473 }
a0ed51b3
LW
1474 }
1475 break;
b8c5462f
JH
1476 case NDIGITL:
1477 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1478 if (do_utf8) {
1479 while (s < strend) {
1480 if (!isDIGIT_LC_utf8((U8*)s)) {
1481 if (tmp && (norun || regtry(prog, s)))
1482 goto got_it;
1483 else
1484 tmp = doevery;
1485 }
b8c5462f 1486 else
ffc61ed2
JH
1487 tmp = 1;
1488 s += UTF8SKIP(s);
b8c5462f 1489 }
a0ed51b3 1490 }
ffc61ed2
JH
1491 else {
1492 while (s < strend) {
1493 if (!isDIGIT_LC(*s)) {
1494 if (tmp && (norun || regtry(prog, s)))
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
cf93c79d 1499 else
ffc61ed2
JH
1500 tmp = 1;
1501 s++;
b8c5462f 1502 }
b8c5462f
JH
1503 }
1504 break;
b3c9acc1 1505 default:
3c3eec57
GS
1506 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1507 break;
d6a28714 1508 }
6eb5f6b9
JH
1509 return 0;
1510 got_it:
1511 return s;
1512}
1513
1514/*
1515 - regexec_flags - match a regexp against a string
1516 */
1517I32
1518Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1519 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1520/* strend: pointer to null at end of string */
1521/* strbeg: real beginning of string */
1522/* minend: end of match must be >=minend after stringarg. */
1523/* data: May be used for some additional optimizations. */
1524/* nosave: For optimizations. */
1525{
6eb5f6b9
JH
1526 register char *s;
1527 register regnode *c;
1528 register char *startpos = stringarg;
6eb5f6b9
JH
1529 I32 minlen; /* must match at least this many chars */
1530 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1531 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1532 constant substr. */ /* CC */
1533 I32 end_shift = 0; /* Same for the end. */ /* CC */
1534 I32 scream_pos = -1; /* Internal iterator of scream. */
1535 char *scream_olds;
1536 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1537 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1538#ifdef DEBUGGING
9e55ce06
JH
1539 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1540 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1541#endif
6eb5f6b9
JH
1542
1543 PL_regcc = 0;
1544
1545 cache_re(prog);
1546#ifdef DEBUGGING
aea4f609 1547 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1548#endif
1549
1550 /* Be paranoid... */
1551 if (prog == NULL || startpos == NULL) {
1552 Perl_croak(aTHX_ "NULL regexp parameter");
1553 return 0;
1554 }
1555
1556 minlen = prog->minlen;
61a36c01 1557 if (strend - startpos < minlen) {
a72c7584
JH
1558 DEBUG_r(PerlIO_printf(Perl_debug_log,
1559 "String too short [regexec_flags]...\n"));
1560 goto phooey;
1aa99e6b 1561 }
6eb5f6b9 1562
6eb5f6b9
JH
1563 /* Check validity of program. */
1564 if (UCHARAT(prog->program) != REG_MAGIC) {
1565 Perl_croak(aTHX_ "corrupted regexp program");
1566 }
1567
1568 PL_reg_flags = 0;
1569 PL_reg_eval_set = 0;
1570 PL_reg_maxiter = 0;
1571
1572 if (prog->reganch & ROPT_UTF8)
1573 PL_reg_flags |= RF_utf8;
1574
1575 /* Mark beginning of line for ^ and lookbehind. */
1576 PL_regbol = startpos;
1577 PL_bostr = strbeg;
1578 PL_reg_sv = sv;
1579
1580 /* Mark end of line for $ (and such) */
1581 PL_regeol = strend;
1582
1583 /* see how far we have to get to not match where we matched before */
1584 PL_regtill = startpos+minend;
1585
1586 /* We start without call_cc context. */
1587 PL_reg_call_cc = 0;
1588
1589 /* If there is a "must appear" string, look for it. */
1590 s = startpos;
1591
1592 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1593 MAGIC *mg;
1594
1595 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1596 PL_reg_ganch = startpos;
1597 else if (sv && SvTYPE(sv) >= SVt_PVMG
1598 && SvMAGIC(sv)
14befaf4
DM
1599 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1600 && mg->mg_len >= 0) {
6eb5f6b9
JH
1601 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1602 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1603 if (s > PL_reg_ganch)
6eb5f6b9
JH
1604 goto phooey;
1605 s = PL_reg_ganch;
1606 }
1607 }
1608 else /* pos() not defined */
1609 PL_reg_ganch = strbeg;
1610 }
1611
699c3c34
JH
1612 if (do_utf8 == (UTF!=0) &&
1613 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1614 re_scream_pos_data d;
1615
1616 d.scream_olds = &scream_olds;
1617 d.scream_pos = &scream_pos;
1618 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1619 if (!s) {
1620 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1621 goto phooey; /* not present */
3fa9c3d7 1622 }
6eb5f6b9
JH
1623 }
1624
2a782b5b 1625 DEBUG_r({
9e55ce06
JH
1626 char *s0 = UTF ?
1627 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1628 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1629 prog->precomp;
1630 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1631 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1632 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1633 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1634 if (!PL_colorset)
1635 reginitcolors();
1636 PerlIO_printf(Perl_debug_log,
9e55ce06 1637 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1638 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1639 len0, len0, s0,
2a782b5b 1640 PL_colors[1],
9e55ce06 1641 len0 > 60 ? "..." : "",
2a782b5b 1642 PL_colors[0],
9e55ce06
JH
1643 (int)(len1 > 60 ? 60 : len1),
1644 s1, PL_colors[1],
1645 (len1 > 60 ? "..." : "")
2a782b5b
JH
1646 );
1647 });
6eb5f6b9
JH
1648
1649 /* Simplest case: anchored match need be tried only once. */
1650 /* [unless only anchor is BOL and multiline is set] */
1651 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1652 if (s == startpos && regtry(prog, startpos))
1653 goto got_it;
1654 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1655 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1656 {
1657 char *end;
1658
1659 if (minlen)
1660 dontbother = minlen - 1;
1aa99e6b 1661 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1662 /* for multiline we only have to try after newlines */
1663 if (prog->check_substr) {
1664 if (s == startpos)
1665 goto after_try;
1666 while (1) {
1667 if (regtry(prog, s))
1668 goto got_it;
1669 after_try:
1670 if (s >= end)
1671 goto phooey;
1672 if (prog->reganch & RE_USE_INTUIT) {
1673 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1674 if (!s)
1675 goto phooey;
1676 }
1677 else
1678 s++;
1679 }
1680 } else {
1681 if (s > startpos)
1682 s--;
1683 while (s < end) {
1684 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1685 if (regtry(prog, s))
1686 goto got_it;
1687 }
1688 }
1689 }
1690 }
1691 goto phooey;
1692 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1693 if (regtry(prog, PL_reg_ganch))
1694 goto got_it;
1695 goto phooey;
1696 }
1697
1698 /* Messy cases: unanchored match. */
9041c2e3 1699 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1700 /* we have /x+whatever/ */
1701 /* it must be a one character string (XXXX Except UTF?) */
1702 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1703#ifdef DEBUGGING
1704 int did_match = 0;
1705#endif
1706
1aa99e6b 1707 if (do_utf8) {
6eb5f6b9
JH
1708 while (s < strend) {
1709 if (*s == ch) {
bf93d4cc 1710 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1711 if (regtry(prog, s)) goto got_it;
1712 s += UTF8SKIP(s);
1713 while (s < strend && *s == ch)
1714 s += UTF8SKIP(s);
1715 }
1716 s += UTF8SKIP(s);
1717 }
1718 }
1719 else {
1720 while (s < strend) {
1721 if (*s == ch) {
bf93d4cc 1722 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1723 if (regtry(prog, s)) goto got_it;
1724 s++;
1725 while (s < strend && *s == ch)
1726 s++;
1727 }
1728 s++;
1729 }
1730 }
b7953727 1731 DEBUG_r(if (!did_match)
bf93d4cc 1732 PerlIO_printf(Perl_debug_log,
b7953727
JH
1733 "Did not find anchored character...\n")
1734 );
6eb5f6b9
JH
1735 }
1736 /*SUPPRESS 560*/
1aa99e6b
IH
1737 else if (do_utf8 == (UTF!=0) &&
1738 (prog->anchored_substr != Nullsv
9041c2e3 1739 || (prog->float_substr != Nullsv
1aa99e6b 1740 && prog->float_max_offset < strend - s))) {
9041c2e3 1741 SV *must = prog->anchored_substr
6eb5f6b9 1742 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1743 I32 back_max =
6eb5f6b9 1744 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1745 I32 back_min =
6eb5f6b9 1746 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1747 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1748 -(I32)(CHR_SVLEN(must)
1aa99e6b 1749 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1750 char *last1; /* Last position checked before */
bf93d4cc
GS
1751#ifdef DEBUGGING
1752 int did_match = 0;
1753#endif
6eb5f6b9
JH
1754
1755 if (s > PL_bostr)
1756 last1 = HOPc(s, -1);
1757 else
1758 last1 = s - 1; /* bogus */
1759
1760 /* XXXX check_substr already used to find `s', can optimize if
1761 check_substr==must. */
1762 scream_pos = -1;
1763 dontbother = end_shift;
1764 strend = HOPc(strend, -dontbother);
1765 while ( (s <= last) &&
9041c2e3 1766 ((flags & REXEC_SCREAM)
1aa99e6b 1767 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1768 end_shift, &scream_pos, 0))
1aa99e6b 1769 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1770 (unsigned char*)strend, must,
6eb5f6b9 1771 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1772 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1773 if (HOPc(s, -back_max) > last1) {
1774 last1 = HOPc(s, -back_min);
1775 s = HOPc(s, -back_max);
1776 }
1777 else {
1778 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1779
1780 last1 = HOPc(s, -back_min);
1781 s = t;
1782 }
1aa99e6b 1783 if (do_utf8) {
6eb5f6b9
JH
1784 while (s <= last1) {
1785 if (regtry(prog, s))
1786 goto got_it;
1787 s += UTF8SKIP(s);
1788 }
1789 }
1790 else {
1791 while (s <= last1) {
1792 if (regtry(prog, s))
1793 goto got_it;
1794 s++;
1795 }
1796 }
1797 }
b7953727
JH
1798 DEBUG_r(if (!did_match)
1799 PerlIO_printf(Perl_debug_log,
1800 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1801 ((must == prog->anchored_substr)
1802 ? "anchored" : "floating"),
1803 PL_colors[0],
1804 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1805 SvPVX(must),
b7953727
JH
1806 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1807 );
6eb5f6b9
JH
1808 goto phooey;
1809 }
155aba94 1810 else if ((c = prog->regstclass)) {
66e933ab
GS
1811 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1812 /* don't bother with what can't match */
6eb5f6b9 1813 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1814 DEBUG_r({
1815 SV *prop = sv_newmortal();
9e55ce06
JH
1816 char *s0;
1817 char *s1;
1818 int len0;
1819 int len1;
1820
ffc61ed2 1821 regprop(prop, c);
9e55ce06
JH
1822 s0 = UTF ?
1823 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1824 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1825 SvPVX(prop);
1826 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1827 s1 = UTF ?
c728cb41 1828 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1829 len1 = UTF ? SvCUR(dsv1) : strend - s;
1830 PerlIO_printf(Perl_debug_log,
1831 "Matching stclass `%*.*s' against `%*.*s'\n",
1832 len0, len0, s0,
1833 len1, len1, s1);
ffc61ed2 1834 });
6eb5f6b9
JH
1835 if (find_byclass(prog, c, s, strend, startpos, 0))
1836 goto got_it;
bf93d4cc 1837 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1838 }
1839 else {
1840 dontbother = 0;
1841 if (prog->float_substr != Nullsv) { /* Trim the end. */
1842 char *last;
d6a28714
JH
1843
1844 if (flags & REXEC_SCREAM) {
1845 last = screaminstr(sv, prog->float_substr, s - strbeg,
1846 end_shift, &scream_pos, 1); /* last one */
1847 if (!last)
ffc61ed2 1848 last = scream_olds; /* Only one occurrence. */
b8c5462f 1849 }
d6a28714
JH
1850 else {
1851 STRLEN len;
1852 char *little = SvPV(prog->float_substr, len);
1853
1854 if (SvTAIL(prog->float_substr)) {
1855 if (memEQ(strend - len + 1, little, len - 1))
1856 last = strend - len + 1;
1857 else if (!PL_multiline)
9041c2e3 1858 last = memEQ(strend - len, little, len)
d6a28714 1859 ? strend - len : Nullch;
b8c5462f 1860 else
d6a28714
JH
1861 goto find_last;
1862 } else {
1863 find_last:
9041c2e3 1864 if (len)
d6a28714 1865 last = rninstr(s, strend, little, little + len);
b8c5462f 1866 else
d6a28714 1867 last = strend; /* matching `$' */
b8c5462f 1868 }
b8c5462f 1869 }
bf93d4cc
GS
1870 if (last == NULL) {
1871 DEBUG_r(PerlIO_printf(Perl_debug_log,
1872 "%sCan't trim the tail, match fails (should not happen)%s\n",
1873 PL_colors[4],PL_colors[5]));
1874 goto phooey; /* Should not happen! */
1875 }
d6a28714
JH
1876 dontbother = strend - last + prog->float_min_offset;
1877 }
1878 if (minlen && (dontbother < minlen))
1879 dontbother = minlen - 1;
1880 strend -= dontbother; /* this one's always in bytes! */
1881 /* We don't know much -- general case. */
1aa99e6b 1882 if (do_utf8) {
d6a28714
JH
1883 for (;;) {
1884 if (regtry(prog, s))
1885 goto got_it;
1886 if (s >= strend)
1887 break;
b8c5462f 1888 s += UTF8SKIP(s);
d6a28714
JH
1889 };
1890 }
1891 else {
1892 do {
1893 if (regtry(prog, s))
1894 goto got_it;
1895 } while (s++ < strend);
1896 }
1897 }
1898
1899 /* Failure. */
1900 goto phooey;
1901
1902got_it:
1903 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1904
1905 if (PL_reg_eval_set) {
1906 /* Preserve the current value of $^R */
1907 if (oreplsv != GvSV(PL_replgv))
1908 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1909 restored, the value remains
1910 the same. */
acfe0abc 1911 restore_pos(aTHX_ 0);
d6a28714
JH
1912 }
1913
1914 /* make sure $`, $&, $', and $digit will work later */
1915 if ( !(flags & REXEC_NOT_FIRST) ) {
1916 if (RX_MATCH_COPIED(prog)) {
1917 Safefree(prog->subbeg);
1918 RX_MATCH_COPIED_off(prog);
1919 }
1920 if (flags & REXEC_COPY_STR) {
1921 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1922
1923 s = savepvn(strbeg, i);
1924 prog->subbeg = s;
1925 prog->sublen = i;
1926 RX_MATCH_COPIED_on(prog);
1927 }
1928 else {
1929 prog->subbeg = strbeg;
1930 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1931 }
1932 }
9041c2e3 1933
d6a28714
JH
1934 return 1;
1935
1936phooey:
bf93d4cc
GS
1937 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1938 PL_colors[4],PL_colors[5]));
d6a28714 1939 if (PL_reg_eval_set)
acfe0abc 1940 restore_pos(aTHX_ 0);
d6a28714
JH
1941 return 0;
1942}
1943
1944/*
1945 - regtry - try match at specific point
1946 */
1947STATIC I32 /* 0 failure, 1 success */
1948S_regtry(pTHX_ regexp *prog, char *startpos)
1949{
d6a28714
JH
1950 register I32 i;
1951 register I32 *sp;
1952 register I32 *ep;
1953 CHECKPOINT lastcp;
1954
02db2b7b
IZ
1955#ifdef DEBUGGING
1956 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1957#endif
d6a28714
JH
1958 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1959 MAGIC *mg;
1960
1961 PL_reg_eval_set = RS_init;
1962 DEBUG_r(DEBUG_s(
b900a521
JH
1963 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1964 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1965 ));
e8347627 1966 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1967 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1968 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1969 SAVETMPS;
1970 /* Apparently this is not needed, judging by wantarray. */
e8347627 1971 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1972 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1973
1974 if (PL_reg_sv) {
1975 /* Make $_ available to executed code. */
1976 if (PL_reg_sv != DEFSV) {
4d1ff10f 1977 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1978 SAVESPTR(DEFSV);
1979 DEFSV = PL_reg_sv;
b8c5462f 1980 }
d6a28714 1981
9041c2e3 1982 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1983 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1984 /* prepare for quick setting of pos */
14befaf4
DM
1985 sv_magic(PL_reg_sv, (SV*)0,
1986 PERL_MAGIC_regex_global, Nullch, 0);
1987 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1988 mg->mg_len = -1;
b8c5462f 1989 }
d6a28714
JH
1990 PL_reg_magic = mg;
1991 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1992 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1993 }
09687e5a 1994 if (!PL_reg_curpm) {
0f79a09d 1995 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1996#ifdef USE_ITHREADS
1997 {
1998 SV* repointer = newSViv(0);
577e12cc 1999 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2000 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2001 av_push(PL_regex_padav,repointer);
2002 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2003 PL_regex_pad = AvARRAY(PL_regex_padav);
2004 }
2005#endif
2006 }
aaa362c4 2007 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2008 PL_reg_oldcurpm = PL_curpm;
2009 PL_curpm = PL_reg_curpm;
2010 if (RX_MATCH_COPIED(prog)) {
2011 /* Here is a serious problem: we cannot rewrite subbeg,
2012 since it may be needed if this match fails. Thus
2013 $` inside (?{}) could fail... */
2014 PL_reg_oldsaved = prog->subbeg;
2015 PL_reg_oldsavedlen = prog->sublen;
2016 RX_MATCH_COPIED_off(prog);
2017 }
2018 else
2019 PL_reg_oldsaved = Nullch;
2020 prog->subbeg = PL_bostr;
2021 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2022 }
2023 prog->startp[0] = startpos - PL_bostr;
2024 PL_reginput = startpos;
2025 PL_regstartp = prog->startp;
2026 PL_regendp = prog->endp;
2027 PL_reglastparen = &prog->lastparen;
a01268b5 2028 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2029 prog->lastparen = 0;
2030 PL_regsize = 0;
2031 DEBUG_r(PL_reg_starttry = startpos);
2032 if (PL_reg_start_tmpl <= prog->nparens) {
2033 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2034 if(PL_reg_start_tmp)
2035 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2036 else
2037 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2038 }
2039
128e8167
JH
2040#ifdef DEBUGGING
2041 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2042 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2043 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2044#endif
2045
d6a28714
JH
2046 /* XXXX What this code is doing here?!!! There should be no need
2047 to do this again and again, PL_reglastparen should take care of
3dd2943c 2048 this! --ilya*/
dafc8851
JH
2049
2050 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2051 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2052 * PL_reglastparen), is not needed at all by the test suite
2053 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2054 * enough, for building DynaLoader, or otherwise this
2055 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2056 * will happen. Meanwhile, this code *is* needed for the
2057 * above-mentioned test suite tests to succeed. The common theme
2058 * on those tests seems to be returning null fields from matches.
2059 * --jhi */
dafc8851 2060#if 1
d6a28714
JH
2061 sp = prog->startp;
2062 ep = prog->endp;
2063 if (prog->nparens) {
09e8ae3b 2064 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2065 *++sp = -1;
2066 *++ep = -1;
2067 }
2068 }
dafc8851 2069#endif
02db2b7b 2070 REGCP_SET(lastcp);
d6a28714
JH
2071 if (regmatch(prog->program + 1)) {
2072 prog->endp[0] = PL_reginput - PL_bostr;
2073 return 1;
2074 }
02db2b7b 2075 REGCP_UNWIND(lastcp);
d6a28714
JH
2076 return 0;
2077}
2078
02db2b7b
IZ
2079#define RE_UNWIND_BRANCH 1
2080#define RE_UNWIND_BRANCHJ 2
2081
2082union re_unwind_t;
2083
2084typedef struct { /* XX: makes sense to enlarge it... */
2085 I32 type;
2086 I32 prev;
2087 CHECKPOINT lastcp;
2088} re_unwind_generic_t;
2089
2090typedef struct {
2091 I32 type;
2092 I32 prev;
2093 CHECKPOINT lastcp;
2094 I32 lastparen;
2095 regnode *next;
2096 char *locinput;
2097 I32 nextchr;
2098#ifdef DEBUGGING
2099 int regindent;
2100#endif
2101} re_unwind_branch_t;
2102
2103typedef union re_unwind_t {
2104 I32 type;
2105 re_unwind_generic_t generic;
2106 re_unwind_branch_t branch;
2107} re_unwind_t;
2108
8ba1375e
MJD
2109#define sayYES goto yes
2110#define sayNO goto no
2111#define sayYES_FINAL goto yes_final
2112#define sayYES_LOUD goto yes_loud
2113#define sayNO_FINAL goto no_final
2114#define sayNO_SILENT goto do_no
2115#define saySAME(x) if (x) goto yes; else goto no
2116
2117#define REPORT_CODE_OFF 24
2118
d6a28714
JH
2119/*
2120 - regmatch - main matching routine
2121 *
2122 * Conceptually the strategy is simple: check to see whether the current
2123 * node matches, call self recursively to see whether the rest matches,
2124 * and then act accordingly. In practice we make some effort to avoid
2125 * recursion, in particular by going through "ordinary" nodes (that don't
2126 * need to know whether the rest of the match failed) by a loop instead of
2127 * by recursion.
2128 */
2129/* [lwall] I've hoisted the register declarations to the outer block in order to
2130 * maybe save a little bit of pushing and popping on the stack. It also takes
2131 * advantage of machines that use a register save mask on subroutine entry.
2132 */
2133STATIC I32 /* 0 failure, 1 success */
2134S_regmatch(pTHX_ regnode *prog)
2135{
d6a28714
JH
2136 register regnode *scan; /* Current node. */
2137 regnode *next; /* Next node. */
2138 regnode *inner; /* Next node in internal branch. */
2139 register I32 nextchr; /* renamed nextchr - nextchar colides with
2140 function of same name */
2141 register I32 n; /* no or next */
b7953727
JH
2142 register I32 ln = 0; /* len or last */
2143 register char *s = Nullch; /* operand or save */
d6a28714 2144 register char *locinput = PL_reginput;
b7953727 2145 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2146 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2147 I32 unwind = 0;
b7953727 2148#if 0
02db2b7b 2149 I32 firstcp = PL_savestack_ix;
b7953727 2150#endif
53c4c00c 2151 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2152#ifdef DEBUGGING
ce333219
JH
2153 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2154 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2155 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2156#endif
02db2b7b 2157
d6a28714
JH
2158#ifdef DEBUGGING
2159 PL_regindent++;
2160#endif
2161
2162 /* Note that nextchr is a byte even in UTF */
2163 nextchr = UCHARAT(locinput);
2164 scan = prog;
2165 while (scan != NULL) {
8ba1375e 2166
2a782b5b 2167 DEBUG_r( {
d6a28714
JH
2168 SV *prop = sv_newmortal();
2169 int docolor = *PL_colors[0];
2170 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2171 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2172 /* The part of the string before starttry has one color
2173 (pref0_len chars), between starttry and current
2174 position another one (pref_len - pref0_len chars),
2175 after the current position the third one.
2176 We assume that pref0_len <= pref_len, otherwise we
2177 decrease pref0_len. */
9041c2e3 2178 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2179 ? (5 + taill) - l : locinput - PL_bostr;
2180 int pref0_len;
d6a28714 2181
df1ffd02 2182 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2183 pref_len++;
2184 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2185 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2186 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2187 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2188 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2189 l--;
d6a28714
JH
2190 if (pref0_len < 0)
2191 pref0_len = 0;
2192 if (pref0_len > pref_len)
2193 pref0_len = pref_len;
2194 regprop(prop, scan);
2a782b5b
JH
2195 {
2196 char *s0 =
df1ffd02 2197 do_utf8 ?
2a782b5b 2198 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2199 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2200 locinput - pref_len;
df1ffd02
JH
2201 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2202 char *s1 = do_utf8 ?
2a782b5b 2203 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2204 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2205 locinput - pref_len + pref0_len;
df1ffd02
JH
2206 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2207 char *s2 = do_utf8 ?
2a782b5b 2208 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2209 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2210 locinput;
df1ffd02 2211 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2212 PerlIO_printf(Perl_debug_log,
2213 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2214 (IV)(locinput - PL_bostr),
2215 PL_colors[4],
2216 len0, s0,
2217 PL_colors[5],
2218 PL_colors[2],
2219 len1, s1,
2220 PL_colors[3],
2221 (docolor ? "" : "> <"),
2222 PL_colors[0],
2223 len2, s2,
2224 PL_colors[1],
2225 15 - l - pref_len + 1,
2226 "",
2227 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2228 SvPVX(prop));
2229 }
2230 });
d6a28714
JH
2231
2232 next = scan + NEXT_OFF(scan);
2233 if (next == scan)
2234 next = NULL;
2235
2236 switch (OP(scan)) {
2237 case BOL:
12d33761
HS
2238 if (locinput == PL_bostr || (PL_multiline &&
2239 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2240 {
2241 /* regtill = regbol; */
b8c5462f
JH
2242 break;
2243 }
d6a28714
JH
2244 sayNO;
2245 case MBOL:
12d33761
HS
2246 if (locinput == PL_bostr ||
2247 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2248 {
b8c5462f
JH
2249 break;
2250 }
d6a28714
JH
2251 sayNO;
2252 case SBOL:
c2a73568 2253 if (locinput == PL_bostr)
b8c5462f 2254 break;
d6a28714
JH
2255 sayNO;
2256 case GPOS:
2257 if (locinput == PL_reg_ganch)
2258 break;
2259 sayNO;
2260 case EOL:
2261 if (PL_multiline)
2262 goto meol;
2263 else
2264 goto seol;
2265 case MEOL:
2266 meol:
2267 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2268 sayNO;
b8c5462f 2269 break;
d6a28714
JH
2270 case SEOL:
2271 seol:
2272 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2273 sayNO;
d6a28714 2274 if (PL_regeol - locinput > 1)
b8c5462f 2275 sayNO;
b8c5462f 2276 break;
d6a28714
JH
2277 case EOS:
2278 if (PL_regeol != locinput)
b8c5462f 2279 sayNO;
d6a28714 2280 break;
ffc61ed2 2281 case SANY:
d6a28714 2282 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2283 sayNO;
f33976b4
DB
2284 if (do_utf8) {
2285 locinput += PL_utf8skip[nextchr];
2286 if (locinput > PL_regeol)
2287 sayNO;
2288 nextchr = UCHARAT(locinput);
2289 }
2290 else
2291 nextchr = UCHARAT(++locinput);
2292 break;
2293 case CANY:
2294 if (!nextchr && locinput >= PL_regeol)
2295 sayNO;
b8c5462f 2296 nextchr = UCHARAT(++locinput);
a0d0e21e 2297 break;
ffc61ed2 2298 case REG_ANY:
1aa99e6b
IH
2299 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2300 sayNO;
2301 if (do_utf8) {
b8c5462f 2302 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2303 if (locinput > PL_regeol)
2304 sayNO;
a0ed51b3 2305 nextchr = UCHARAT(locinput);
a0ed51b3 2306 }
1aa99e6b
IH
2307 else
2308 nextchr = UCHARAT(++locinput);
a0ed51b3 2309 break;
d6a28714 2310 case EXACT:
cd439c50
IZ
2311 s = STRING(scan);
2312 ln = STR_LEN(scan);
1aa99e6b 2313 if (do_utf8 != (UTF!=0)) {
bc517b45 2314 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2315 char *l = locinput;
2316 char *e = s + ln;
bc517b45 2317 STRLEN ulen;
a72c7584 2318
5ff6fc6d
JH
2319 if (do_utf8) {
2320 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2321 while (s < e) {
2322 if (l >= PL_regeol)
5ff6fc6d
JH
2323 sayNO;
2324 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2325 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2326 sayNO;
bc517b45 2327 l += ulen;
5ff6fc6d 2328 s ++;
1aa99e6b 2329 }
5ff6fc6d
JH
2330 }
2331 else {
2332 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2333 while (s < e) {
2334 if (l >= PL_regeol)
2335 sayNO;
5ff6fc6d 2336 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2337 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2338 sayNO;
bc517b45 2339 s += ulen;
a72c7584 2340 l ++;
1aa99e6b 2341 }
5ff6fc6d 2342 }
1aa99e6b
IH
2343 locinput = l;
2344 nextchr = UCHARAT(locinput);
2345 break;
2346 }
bc517b45 2347 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2348 /* Inline the first character, for speed. */
2349 if (UCHARAT(s) != nextchr)
2350 sayNO;
2351 if (PL_regeol - locinput < ln)
2352 sayNO;
2353 if (ln > 1 && memNE(s, locinput, ln))
2354 sayNO;
2355 locinput += ln;
2356 nextchr = UCHARAT(locinput);
2357 break;
2358 case EXACTFL:
b8c5462f
JH
2359 PL_reg_flags |= RF_tainted;
2360 /* FALL THROUGH */
d6a28714 2361 case EXACTF:
cd439c50
IZ
2362 s = STRING(scan);
2363 ln = STR_LEN(scan);
d6a28714 2364
d07ddd77
JH
2365 if (do_utf8 || UTF) {
2366 /* Either target or the pattern are utf8. */
d6a28714 2367 char *l = locinput;
d07ddd77 2368 char *e = PL_regeol;
bc517b45 2369
d07ddd77
JH
2370 if (ibcmp_utf8(s, 0, ln, do_utf8,
2371 l, &e, 0, UTF))
2372 sayNO;
2373 locinput = e;
2374 nextchr = UCHARAT(locinput);
2375 break;
a0ed51b3 2376 }
d6a28714 2377
bc517b45
JH
2378 /* Neither the target and the pattern are utf8. */
2379
d6a28714
JH
2380 /* Inline the first character, for speed. */
2381 if (UCHARAT(s) != nextchr &&
2382 UCHARAT(s) != ((OP(scan) == EXACTF)
2383 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2384 sayNO;
d6a28714 2385 if (PL_regeol - locinput < ln)
b8c5462f 2386 sayNO;
d6a28714
JH
2387 if (ln > 1 && (OP(scan) == EXACTF
2388 ? ibcmp(s, locinput, ln)
2389 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2390 sayNO;
d6a28714
JH
2391 locinput += ln;
2392 nextchr = UCHARAT(locinput);
a0d0e21e 2393 break;
d6a28714 2394 case ANYOF:
ffc61ed2 2395 if (do_utf8) {
9e55ce06
JH
2396 STRLEN inclasslen = PL_regeol - locinput;
2397
2398 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
ffc61ed2
JH
2399 sayNO;
2400 if (locinput >= PL_regeol)
2401 sayNO;
9e55ce06 2402 locinput += inclasslen;
b8c5462f 2403 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2404 }
2405 else {
2406 if (nextchr < 0)
2407 nextchr = UCHARAT(locinput);
2408 if (!reginclass(scan, (U8*)locinput, do_utf8))
2409 sayNO;
2410 if (!nextchr && locinput >= PL_regeol)
2411 sayNO;
2412 nextchr = UCHARAT(++locinput);
2413 }
b8c5462f 2414 break;
d6a28714 2415 case ALNUML:
b8c5462f
JH
2416 PL_reg_flags |= RF_tainted;
2417 /* FALL THROUGH */
d6a28714 2418 case ALNUM:
b8c5462f 2419 if (!nextchr)
4633a7c4 2420 sayNO;
ffc61ed2 2421 if (do_utf8) {
ad24be35 2422 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2423 if (!(OP(scan) == ALNUM
3568d838 2424 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2425 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2426 {
2427 sayNO;
a0ed51b3 2428 }
b8c5462f 2429 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2430 nextchr = UCHARAT(locinput);
2431 break;
2432 }
ffc61ed2 2433 if (!(OP(scan) == ALNUM
d6a28714 2434 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2435 sayNO;
b8c5462f 2436 nextchr = UCHARAT(++locinput);
a0d0e21e 2437 break;
d6a28714 2438 case NALNUML:
b8c5462f
JH
2439 PL_reg_flags |= RF_tainted;
2440 /* FALL THROUGH */
d6a28714
JH
2441 case NALNUM:
2442 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2443 sayNO;
ffc61ed2 2444 if (do_utf8) {
8269fa76 2445 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2446 if (OP(scan) == NALNUM
3568d838 2447 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2448 : isALNUM_LC_utf8((U8*)locinput))
2449 {
b8c5462f 2450 sayNO;
d6a28714 2451 }
b8c5462f
JH
2452 locinput += PL_utf8skip[nextchr];
2453 nextchr = UCHARAT(locinput);
2454 break;
2455 }
ffc61ed2 2456 if (OP(scan) == NALNUM
d6a28714 2457 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2458 sayNO;
76e3520e 2459 nextchr = UCHARAT(++locinput);
a0d0e21e 2460 break;
d6a28714
JH
2461 case BOUNDL:
2462 case NBOUNDL:
3280af22 2463 PL_reg_flags |= RF_tainted;
bbce6d69 2464 /* FALL THROUGH */
d6a28714
JH
2465 case BOUND:
2466 case NBOUND:
2467 /* was last char in word? */
ffc61ed2 2468 if (do_utf8) {
12d33761
HS
2469 if (locinput == PL_bostr)
2470 ln = '\n';
ffc61ed2
JH
2471 else {
2472 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2473
2b9d42f0 2474 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2475 }
2476 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2477 ln = isALNUM_uni(ln);
8269fa76 2478 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2479 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2480 }
2481 else {
9041c2e3 2482 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2483 n = isALNUM_LC_utf8((U8*)locinput);
2484 }
a0ed51b3 2485 }
d6a28714 2486 else {
12d33761
HS
2487 ln = (locinput != PL_bostr) ?
2488 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2489 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2490 ln = isALNUM(ln);
2491 n = isALNUM(nextchr);
2492 }
2493 else {
2494 ln = isALNUM_LC(ln);
2495 n = isALNUM_LC(nextchr);
2496 }
d6a28714 2497 }
ffc61ed2
JH
2498 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2499 OP(scan) == BOUNDL))
2500 sayNO;
a0ed51b3 2501 break;
d6a28714 2502 case SPACEL:
3280af22 2503 PL_reg_flags |= RF_tainted;
bbce6d69 2504 /* FALL THROUGH */
d6a28714 2505 case SPACE:
9442cb0e 2506 if (!nextchr)
4633a7c4 2507 sayNO;
1aa99e6b 2508 if (do_utf8) {
fd400ab9 2509 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2510 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2511 if (!(OP(scan) == SPACE
3568d838 2512 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2513 : isSPACE_LC_utf8((U8*)locinput)))
2514 {
2515 sayNO;
2516 }
2517 locinput += PL_utf8skip[nextchr];
2518 nextchr = UCHARAT(locinput);
2519 break;
d6a28714 2520 }
ffc61ed2
JH
2521 if (!(OP(scan) == SPACE
2522 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2523 sayNO;
2524 nextchr = UCHARAT(++locinput);
2525 }
2526 else {
2527 if (!(OP(scan) == SPACE
2528 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2529 sayNO;
2530 nextchr = UCHARAT(++locinput);
a0ed51b3 2531 }
a0ed51b3 2532 break;
d6a28714 2533 case NSPACEL:
3280af22 2534 PL_reg_flags |= RF_tainted;
bbce6d69 2535 /* FALL THROUGH */
d6a28714 2536 case NSPACE:
9442cb0e 2537 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2538 sayNO;
1aa99e6b 2539 if (do_utf8) {
8269fa76 2540 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2541 if (OP(scan) == NSPACE
3568d838 2542 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2543 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2544 {
2545 sayNO;
2546 }
2547 locinput += PL_utf8skip[nextchr];
2548 nextchr = UCHARAT(locinput);
2549 break;
a0ed51b3 2550 }
ffc61ed2 2551 if (OP(scan) == NSPACE
d6a28714 2552 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2553 sayNO;
76e3520e 2554 nextchr = UCHARAT(++locinput);
a0d0e21e 2555 break;
d6a28714 2556 case DIGITL:
a0ed51b3
LW
2557 PL_reg_flags |= RF_tainted;
2558 /* FALL THROUGH */
d6a28714 2559 case DIGIT:
9442cb0e 2560 if (!nextchr)
a0ed51b3 2561 sayNO;
1aa99e6b 2562 if (do_utf8) {
8269fa76 2563 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2564 if (!(OP(scan) == DIGIT
3568d838 2565 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2566 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2567 {
a0ed51b3 2568 sayNO;
dfe13c55 2569 }
6f06b55f 2570 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2571 nextchr = UCHARAT(locinput);
2572 break;
2573 }
ffc61ed2 2574 if (!(OP(scan) == DIGIT
9442cb0e 2575 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2576 sayNO;
2577 nextchr = UCHARAT(++locinput);
2578 break;
d6a28714 2579 case NDIGITL:
b8c5462f
JH
2580 PL_reg_flags |= RF_tainted;
2581 /* FALL THROUGH */
d6a28714 2582 case NDIGIT:
9442cb0e 2583 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2584 sayNO;
1aa99e6b 2585 if (do_utf8) {
8269fa76 2586 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2587 if (OP(scan) == NDIGIT
3568d838 2588 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2589 : isDIGIT_LC_utf8((U8*)locinput))
2590 {
a0ed51b3 2591 sayNO;
9442cb0e 2592 }
6f06b55f 2593 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2594 nextchr = UCHARAT(locinput);
2595 break;
2596 }
ffc61ed2 2597 if (OP(scan) == NDIGIT
9442cb0e 2598 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2599 sayNO;
2600 nextchr = UCHARAT(++locinput);
2601 break;
2602 case CLUMP:
b7c83a7e 2603 if (locinput >= PL_regeol)
a0ed51b3 2604 sayNO;
b7c83a7e
JH
2605 if (do_utf8) {
2606 LOAD_UTF8_CHARCLASS(mark,"~");
2607 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2608 sayNO;
2609 locinput += PL_utf8skip[nextchr];
2610 while (locinput < PL_regeol &&
2611 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2612 locinput += UTF8SKIP(locinput);
2613 if (locinput > PL_regeol)
2614 sayNO;
eb08e2da
JH
2615 }
2616 else
2617 locinput++;
a0ed51b3
LW
2618 nextchr = UCHARAT(locinput);
2619 break;
c8756f30 2620 case REFFL:
3280af22 2621 PL_reg_flags |= RF_tainted;
c8756f30 2622 /* FALL THROUGH */
c277df42 2623 case REF:
c8756f30 2624 case REFF:
c277df42 2625 n = ARG(scan); /* which paren pair */
cf93c79d 2626 ln = PL_regstartp[n];
2c2d71f5 2627 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2628 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2629 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2630 if (ln == PL_regendp[n])
a0d0e21e 2631 break;
a0ed51b3 2632
cf93c79d 2633 s = PL_bostr + ln;
1aa99e6b 2634 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2635 char *l = locinput;
cf93c79d 2636 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2637 /*
2638 * Note that we can't do the "other character" lookup trick as
2639 * in the 8-bit case (no pun intended) because in Unicode we
2640 * have to map both upper and title case to lower case.
2641 */
2642 if (OP(scan) == REFF) {
a2a2844f 2643 STRLEN ulen1, ulen2;
e7ae6809
JH
2644 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2645 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2646 while (s < e) {
2647 if (l >= PL_regeol)
2648 sayNO;
a2a2844f
JH
2649 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2650 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2651 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2652 sayNO;
a2a2844f
JH
2653 s += ulen1;
2654 l += ulen2;
a0ed51b3
LW
2655 }
2656 }
2657 locinput = l;
2658 nextchr = UCHARAT(locinput);
2659 break;
2660 }
2661
a0d0e21e 2662 /* Inline the first character, for speed. */
76e3520e 2663 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2664 (OP(scan) == REF ||
2665 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2666 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2667 sayNO;
cf93c79d 2668 ln = PL_regendp[n] - ln;
3280af22 2669 if (locinput + ln > PL_regeol)
4633a7c4 2670 sayNO;
c8756f30
AK
2671 if (ln > 1 && (OP(scan) == REF
2672 ? memNE(s, locinput, ln)
2673 : (OP(scan) == REFF
2674 ? ibcmp(s, locinput, ln)
2675 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2676 sayNO;
a0d0e21e 2677 locinput += ln;
76e3520e 2678 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2679 break;
2680
2681 case NOTHING:
c277df42 2682 case TAIL:
a0d0e21e
LW
2683 break;
2684 case BACK:
2685 break;
c277df42
IZ
2686 case EVAL:
2687 {
2688 dSP;
533c011a 2689 OP_4tree *oop = PL_op;
3280af22
NIS
2690 COP *ocurcop = PL_curcop;
2691 SV **ocurpad = PL_curpad;
c277df42 2692 SV *ret;
9041c2e3 2693
c277df42 2694 n = ARG(scan);
533c011a 2695 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2696 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2697 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2698 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2699
8e5e9ebe
RGS
2700 {
2701 SV **before = SP;
2702 CALLRUNOPS(aTHX); /* Scalar context. */
2703 SPAGAIN;
2704 if (SP == before)
2705 ret = Nullsv; /* protect against empty (?{}) blocks. */
2706 else {
2707 ret = POPs;
2708 PUTBACK;
2709 }
2710 }
2711
0f5d15d6
IZ
2712 PL_op = oop;
2713 PL_curpad = ocurpad;
2714 PL_curcop = ocurcop;
c277df42 2715 if (logical) {
0f5d15d6
IZ
2716 if (logical == 2) { /* Postponed subexpression. */
2717 regexp *re;
22c35a8c 2718 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2719 re_cc_state state;
0f5d15d6
IZ
2720 CHECKPOINT cp, lastcp;
2721
2722 if(SvROK(ret) || SvRMAGICAL(ret)) {
2723 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2724
2725 if(SvMAGICAL(sv))
14befaf4 2726 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2727 }
2728 if (mg) {
2729 re = (regexp *)mg->mg_obj;
df0003d4 2730 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2731 }
2732 else {
2733 STRLEN len;
2734 char *t = SvPV(ret, len);
2735 PMOP pm;
2736 char *oprecomp = PL_regprecomp;
2737 I32 osize = PL_regsize;
2738 I32 onpar = PL_regnpar;
2739
5fcd1c1b 2740 Zero(&pm, 1, PMOP);
cea2e8a9 2741 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2742 if (!(SvFLAGS(ret)
0f5d15d6 2743 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2744 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2745 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2746 PL_regprecomp = oprecomp;
2747 PL_regsize = osize;
2748 PL_regnpar = onpar;
2749 }
2750 DEBUG_r(
9041c2e3 2751 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2752 "Entering embedded `%s%.60s%s%s'\n",
2753 PL_colors[0],
2754 re->precomp,
2755 PL_colors[1],
2756 (strlen(re->precomp) > 60 ? "..." : ""))
2757 );
2758 state.node = next;
2759 state.prev = PL_reg_call_cc;
2760 state.cc = PL_regcc;
2761 state.re = PL_reg_re;
2762
2ab05381 2763 PL_regcc = 0;
9041c2e3 2764
0f5d15d6 2765 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2766 REGCP_SET(lastcp);
0f5d15d6
IZ
2767 cache_re(re);
2768 state.ss = PL_savestack_ix;
2769 *PL_reglastparen = 0;
a01268b5 2770 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2771 PL_reg_call_cc = &state;
2772 PL_reginput = locinput;
2c2d71f5
JH
2773
2774 /* XXXX This is too dramatic a measure... */
2775 PL_reg_maxiter = 0;
2776
0f5d15d6 2777 if (regmatch(re->program + 1)) {
2c914db6
IZ
2778 /* Even though we succeeded, we need to restore
2779 global variables, since we may be wrapped inside
2780 SUSPEND, thus the match may be not finished yet. */
2781
2782 /* XXXX Do this only if SUSPENDed? */
2783 PL_reg_call_cc = state.prev;
2784 PL_regcc = state.cc;
2785 PL_reg_re = state.re;
2786 cache_re(PL_reg_re);
2787
2788 /* XXXX This is too dramatic a measure... */
2789 PL_reg_maxiter = 0;
2790
2791 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2792 ReREFCNT_dec(re);
2793 regcpblow(cp);
2794 sayYES;
2795 }
0f5d15d6 2796 ReREFCNT_dec(re);
02db2b7b 2797 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2798 regcppop();
2799 PL_reg_call_cc = state.prev;
2800 PL_regcc = state.cc;
2801 PL_reg_re = state.re;
d3790889 2802 cache_re(PL_reg_re);
2c2d71f5
JH
2803
2804 /* XXXX This is too dramatic a measure... */
2805 PL_reg_maxiter = 0;
2806
8e514ae6 2807 logical = 0;
0f5d15d6
IZ
2808 sayNO;
2809 }
c277df42 2810 sw = SvTRUE(ret);
0f5d15d6 2811 logical = 0;
a0ed51b3
LW
2812 }
2813 else
3280af22 2814 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2815 break;
2816 }
a0d0e21e 2817 case OPEN:
c277df42 2818 n = ARG(scan); /* which paren pair */
3280af22
NIS
2819 PL_reg_start_tmp[n] = locinput;
2820 if (n > PL_regsize)
2821 PL_regsize = n;
a0d0e21e
LW
2822 break;
2823 case CLOSE:
c277df42 2824 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2825 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2826 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2827 if (n > *PL_reglastparen)
2828 *PL_reglastparen = n;
a01268b5 2829 *PL_reglastcloseparen = n;
a0d0e21e 2830 break;
c277df42
IZ
2831 case GROUPP:
2832 n = ARG(scan); /* which paren pair */
cf93c79d 2833 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2834 break;
2835 case IFTHEN:
2c2d71f5 2836 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2837 if (sw)
2838 next = NEXTOPER(NEXTOPER(scan));
2839 else {
2840 next = scan + ARG(scan);
2841 if (OP(next) == IFTHEN) /* Fake one. */
2842 next = NEXTOPER(NEXTOPER(next));
2843 }
2844 break;
2845 case LOGICAL:
0f5d15d6 2846 logical = scan->flags;
c277df42 2847 break;
2ab05381
IZ
2848/*******************************************************************
2849 PL_regcc contains infoblock about the innermost (...)* loop, and
2850 a pointer to the next outer infoblock.
2851
2852 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2853
2854 1) After matching X, regnode for CURLYX is processed;
2855
9041c2e3 2856 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2857 regmatch() recursively with the starting point at WHILEM node;
2858
2859 3) Each hit of WHILEM node tries to match A and Z (in the order
2860 depending on the current iteration, min/max of {min,max} and
2861 greediness). The information about where are nodes for "A"
2862 and "Z" is read from the infoblock, as is info on how many times "A"
2863 was already matched, and greediness.
2864
2865 4) After A matches, the same WHILEM node is hit again.
2866
2867 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2868 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2869 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2870 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2871 of the external loop.
2872
2873 Currently present infoblocks form a tree with a stem formed by PL_curcc
2874 and whatever it mentions via ->next, and additional attached trees
2875 corresponding to temporarily unset infoblocks as in "5" above.
2876
9041c2e3 2877 In the following picture infoblocks for outer loop of
2ab05381
IZ
2878 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2879 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2880 infoblocks are drawn below the "reset" infoblock.
2881
2882 In fact in the picture below we do not show failed matches for Z and T
2883 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2884 more obvious *why* one needs to *temporary* unset infoblocks.]
2885
2886 Matched REx position InfoBlocks Comment
2887 (Y(A)*?Z)*?T x
2888 Y(A)*?Z)*?T x <- O
2889 Y (A)*?Z)*?T x <- O
2890 Y A)*?Z)*?T x <- O <- I
2891 YA )*?Z)*?T x <- O <- I
2892 YA A)*?Z)*?T x <- O <- I
2893 YAA )*?Z)*?T x <- O <- I
2894 YAA Z)*?T x <- O # Temporary unset I
2895 I
2896
2897 YAAZ Y(A)*?Z)*?T x <- O
2898 I
2899
2900 YAAZY (A)*?Z)*?T x <- O
2901 I
2902
2903 YAAZY A)*?Z)*?T x <- O <- I
2904 I
2905
2906 YAAZYA )*?Z)*?T x <- O <- I
2907 I
2908
2909 YAAZYA Z)*?T x <- O # Temporary unset I
2910 I,I
2911
2912 YAAZYAZ )*?T x <- O
2913 I,I
2914
2915 YAAZYAZ T x # Temporary unset O
2916 O
2917 I,I
2918
2919 YAAZYAZT x
2920 O
2921 I,I
2922 *******************************************************************/
a0d0e21e
LW
2923 case CURLYX: {
2924 CURCUR cc;
3280af22 2925 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2926 /* No need to save/restore up to this paren */
2927 I32 parenfloor = scan->flags;
c277df42
IZ
2928
2929 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2930 next += ARG(next);
3280af22
NIS
2931 cc.oldcc = PL_regcc;
2932 PL_regcc = &cc;
cb434fcc
IZ
2933 /* XXXX Probably it is better to teach regpush to support
2934 parenfloor > PL_regsize... */
2935 if (parenfloor > *PL_reglastparen)
2936 parenfloor = *PL_reglastparen; /* Pessimization... */
2937 cc.parenfloor = parenfloor;
a0d0e21e
LW
2938 cc.cur = -1;
2939 cc.min = ARG1(scan);
2940 cc.max = ARG2(scan);
c277df42 2941 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2942 cc.next = next;
2943 cc.minmod = minmod;
2944 cc.lastloc = 0;
3280af22 2945 PL_reginput = locinput;
a0d0e21e
LW
2946 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2947 regcpblow(cp);
3280af22 2948 PL_regcc = cc.oldcc;
4633a7c4 2949 saySAME(n);
a0d0e21e
LW
2950 }
2951 /* NOT REACHED */
2952 case WHILEM: {
2953 /*
2954 * This is really hard to understand, because after we match
2955 * what we're trying to match, we must make sure the rest of
2c2d71f5 2956 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2957 * to go back UP the parse tree by recursing ever deeper. And
2958 * if it fails, we have to reset our parent's current state
2959 * that we can try again after backing off.
2960 */
2961
c277df42 2962 CHECKPOINT cp, lastcp;
3280af22 2963 CURCUR* cc = PL_regcc;
c277df42
IZ
2964 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2965
4633a7c4 2966 n = cc->cur + 1; /* how many we know we matched */
3280af22 2967 PL_reginput = locinput;
a0d0e21e 2968
c277df42 2969 DEBUG_r(
9041c2e3
NIS
2970 PerlIO_printf(Perl_debug_log,
2971 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2972 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2973 (long)n, (long)cc->min,
c277df42
IZ
2974 (long)cc->max, (long)cc)
2975 );
4633a7c4 2976
a0d0e21e
LW
2977 /* If degenerate scan matches "", assume scan done. */
2978
579cf2c3 2979 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2980 PL_regcc = cc->oldcc;
2ab05381
IZ
2981 if (PL_regcc)
2982 ln = PL_regcc->cur;
c277df42 2983 DEBUG_r(
c3464db5
DD
2984 PerlIO_printf(Perl_debug_log,
2985 "%*s empty match detected, try continuation...\n",
3280af22 2986 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2987 );
a0d0e21e 2988 if (regmatch(cc->next))
4633a7c4 2989 sayYES;
2ab05381
IZ
2990 if (PL_regcc)
2991 PL_regcc->cur = ln;
3280af22 2992 PL_regcc = cc;
4633a7c4 2993 sayNO;
a0d0e21e
LW
2994 }
2995
2996 /* First just match a string of min scans. */
2997
2998 if (n < cc->min) {
2999 cc->cur = n;
3000 cc->lastloc = locinput;
4633a7c4
LW
3001 if (regmatch(cc->scan))
3002 sayYES;
3003 cc->cur = n - 1;
c277df42 3004 cc->lastloc = lastloc;
4633a7c4 3005 sayNO;
a0d0e21e
LW
3006 }
3007
2c2d71f5
JH
3008 if (scan->flags) {
3009 /* Check whether we already were at this position.
3010 Postpone detection until we know the match is not
3011 *that* much linear. */
3012 if (!PL_reg_maxiter) {
3013 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3014 PL_reg_leftiter = PL_reg_maxiter;
3015 }
3016 if (PL_reg_leftiter-- == 0) {
3017 I32 size = (PL_reg_maxiter + 7)/8;
3018 if (PL_reg_poscache) {
3019 if (PL_reg_poscache_size < size) {
3020 Renew(PL_reg_poscache, size, char);
3021 PL_reg_poscache_size = size;
3022 }
3023 Zero(PL_reg_poscache, size, char);
3024 }
3025 else {
3026 PL_reg_poscache_size = size;
3027 Newz(29, PL_reg_poscache, size, char);
3028 }
3029 DEBUG_r(
3030 PerlIO_printf(Perl_debug_log,
3031 "%sDetected a super-linear match, switching on caching%s...\n",
3032 PL_colors[4], PL_colors[5])
3033 );
3034 }
3035 if (PL_reg_leftiter < 0) {
3036 I32 o = locinput - PL_bostr, b;
3037
3038 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3039 b = o % 8;
3040 o /= 8;
3041 if (PL_reg_poscache[o] & (1<<b)) {
3042 DEBUG_r(
3043 PerlIO_printf(Perl_debug_log,
3044 "%*s already tried at this position...\n",
3045 REPORT_CODE_OFF+PL_regindent*2, "")
3046 );
7821416a 3047 sayNO_SILENT;
2c2d71f5
JH
3048 }
3049 PL_reg_poscache[o] |= (1<<b);
3050 }
3051 }
3052
a0d0e21e
LW
3053 /* Prefer next over scan for minimal matching. */
3054
3055 if (cc->minmod) {
3280af22 3056 PL_regcc = cc->oldcc;
2ab05381
IZ
3057 if (PL_regcc)
3058 ln = PL_regcc->cur;
5f05dabc 3059 cp = regcppush(cc->parenfloor);
02db2b7b 3060 REGCP_SET(lastcp);
5f05dabc 3061 if (regmatch(cc->next)) {
c277df42 3062 regcpblow(cp);
4633a7c4 3063 sayYES; /* All done. */
5f05dabc 3064 }
02db2b7b 3065 REGCP_UNWIND(lastcp);
5f05dabc 3066 regcppop();
2ab05381
IZ
3067 if (PL_regcc)
3068 PL_regcc->cur = ln;
3280af22 3069 PL_regcc = cc;
a0d0e21e 3070
c277df42 3071 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3072 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3073 && !(PL_reg_flags & RF_warned)) {
3074 PL_reg_flags |= RF_warned;
e476b1b5 3075 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3076 "Complex regular subexpression recursion",
3077 REG_INFTY - 1);
c277df42 3078 }
4633a7c4 3079 sayNO;
c277df42 3080 }
a687059c 3081
c277df42 3082 DEBUG_r(
c3464db5
DD
3083 PerlIO_printf(Perl_debug_log,
3084 "%*s trying longer...\n",
3280af22 3085 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3086 );
a0d0e21e 3087 /* Try scanning more and see if it helps. */
3280af22 3088 PL_reginput = locinput;
a0d0e21e
LW
3089 cc->cur = n;
3090 cc->lastloc = locinput;
5f05dabc 3091 cp = regcppush(cc->parenfloor);
02db2b7b 3092 REGCP_SET(lastcp);
5f05dabc 3093 if (regmatch(cc->scan)) {
c277df42 3094 regcpblow(cp);
4633a7c4 3095 sayYES;
5f05dabc 3096 }
02db2b7b 3097 REGCP_UNWIND(lastcp);
5f05dabc 3098 regcppop();
4633a7c4 3099 cc->cur = n - 1;
c277df42 3100 cc->lastloc = lastloc;
4633a7c4 3101 sayNO;
a0d0e21e
LW
3102 }
3103
3104 /* Prefer scan over next for maximal matching. */
3105
3106 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3107 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3108 cc->cur = n;
3109 cc->lastloc = locinput;
02db2b7b 3110 REGCP_SET(lastcp);
5f05dabc 3111 if (regmatch(cc->scan)) {
c277df42 3112 regcpblow(cp);
4633a7c4 3113 sayYES;
5f05dabc 3114 }
02db2b7b 3115 REGCP_UNWIND(lastcp);
a0d0e21e 3116 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3117 PL_reginput = locinput;
c277df42 3118 DEBUG_r(
c3464db5
DD
3119 PerlIO_printf(Perl_debug_log,
3120 "%*s failed, try continuation...\n",
3280af22 3121 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3122 );
3123 }
9041c2e3 3124 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3125 && !(PL_reg_flags & RF_warned)) {
3280af22 3126 PL_reg_flags |= RF_warned;
e476b1b5 3127 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3128 "Complex regular subexpression recursion",
3129 REG_INFTY - 1);
a0d0e21e
LW
3130 }
3131
3132 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3133 PL_regcc = cc->oldcc;
2ab05381
IZ
3134 if (PL_regcc)
3135 ln = PL_regcc->cur;
a0d0e21e 3136 if (regmatch(cc->next))
4633a7c4 3137 sayYES;
2ab05381
IZ
3138 if (PL_regcc)
3139 PL_regcc->cur = ln;
3280af22 3140 PL_regcc = cc;
4633a7c4 3141 cc->cur = n - 1;
c277df42 3142 cc->lastloc = lastloc;
4633a7c4 3143 sayNO;
a0d0e21e
LW
3144 }
3145 /* NOT REACHED */
9041c2e3 3146 case BRANCHJ:
c277df42
IZ
3147 next = scan + ARG(scan);
3148 if (next == scan)
3149 next = NULL;
3150 inner = NEXTOPER(NEXTOPER(scan));
3151 goto do_branch;
9041c2e3 3152 case BRANCH:
c277df42
IZ
3153 inner = NEXTOPER(scan);
3154 do_branch:
3155 {
c277df42
IZ
3156 c1 = OP(scan);
3157 if (OP(next) != c1) /* No choice. */
3158 next = inner; /* Avoid recursion. */
a0d0e21e 3159 else {
02db2b7b
IZ
3160 I32 lastparen = *PL_reglastparen;
3161 I32 unwind1;
3162 re_unwind_branch_t *uw;
3163
3164 /* Put unwinding data on stack */
3165 unwind1 = SSNEWt(1,re_unwind_branch_t);
3166 uw = SSPTRt(unwind1,re_unwind_branch_t);
3167 uw->prev = unwind;
3168 unwind = unwind1;
3169 uw->type = ((c1 == BRANCH)
3170 ? RE_UNWIND_BRANCH
3171 : RE_UNWIND_BRANCHJ);
3172 uw->lastparen = lastparen;
3173 uw->next = next;
3174 uw->locinput = locinput;
3175 uw->nextchr = nextchr;
3176#ifdef DEBUGGING
3177 uw->regindent = ++PL_regindent;
3178#endif
c277df42 3179
02db2b7b
IZ
3180 REGCP_SET(uw->lastcp);
3181
3182 /* Now go into the first branch */
3183 next = inner;
a687059c 3184 }
a0d0e21e
LW
3185 }
3186 break;
3187 case MINMOD:
3188 minmod = 1;
3189 break;
c277df42
IZ
3190 case CURLYM:
3191 {
00db4c45 3192 I32 l = 0;
c277df42 3193 CHECKPOINT lastcp;
9041c2e3 3194
c277df42
IZ
3195 /* We suppose that the next guy does not need
3196 backtracking: in particular, it is of constant length,
3197 and has no parenths to influence future backrefs. */
3198 ln = ARG1(scan); /* min to match */
3199 n = ARG2(scan); /* max to match */
c277df42
IZ
3200 paren = scan->flags;
3201 if (paren) {
3280af22
NIS
3202 if (paren > PL_regsize)
3203 PL_regsize = paren;
3204 if (paren > *PL_reglastparen)
3205 *PL_reglastparen = paren;
c277df42 3206 }
dc45a647 3207 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3208 if (paren)
3209 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3210 PL_reginput = locinput;
c277df42
IZ
3211 if (minmod) {
3212 minmod = 0;
3213 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3214 sayNO;
f31a99c8
HS
3215 /* if we matched something zero-length we don't need to
3216 backtrack - capturing parens are already defined, so
3217 the caveat in the maximal case doesn't apply
3218
3219 XXXX if ln == 0, we can redo this check first time
3220 through the following loop
3221 */
3222 if (ln && l == 0)
3223 n = ln; /* don't backtrack */
3280af22 3224 locinput = PL_reginput;
cca55fe3 3225 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3226 regnode *text_node = next;
3227
cca55fe3 3228 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3229
cca55fe3 3230 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3231 else {
cca55fe3
JP
3232 if (PL_regkind[(U8)OP(text_node)] == REF) {
3233 I32 n, ln;
3234 n = ARG(text_node); /* which paren pair */
3235 ln = PL_regstartp[n];
3236 /* assume yes if we haven't seen CLOSEn */
3237 if (
3238 *PL_reglastparen < n ||
3239 ln == -1 ||
3240 ln == PL_regendp[n]
3241 ) {
3242 c1 = c2 = -1000;
3243 goto assume_ok_MM;
3244 }
3245 c1 = *(PL_bostr + ln);
3246 }
3247 else { c1 = (U8)*STRING(text_node); }
af5decee 3248 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3249 c2 = PL_fold[c1];
af5decee 3250 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3251 c2 = PL_fold_locale[c1];
3252 else
3253 c2 = c1;
3254 }
a0ed51b3
LW
3255 }
3256 else
c277df42 3257 c1 = c2 = -1000;
cca55fe3 3258 assume_ok_MM:
02db2b7b 3259 REGCP_SET(lastcp);
5f4b28b2 3260 /* This may be improved if l == 0. */
c277df42
IZ
3261 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3262 /* If it could work, try it. */
3263 if (c1 == -1000 ||
3280af22
NIS
3264 UCHARAT(PL_reginput) == c1 ||
3265 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3266 {
3267 if (paren) {
f31a99c8 3268 if (ln) {
cf93c79d
IZ
3269 PL_regstartp[paren] =
3270 HOPc(PL_reginput, -l) - PL_bostr;
3271 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3272 }
3273 else
cf93c79d 3274 PL_regendp[paren] = -1;
c277df42
IZ
3275 }
3276 if (regmatch(next))
3277 sayYES;
02db2b7b 3278 REGCP_UNWIND(lastcp);
c277df42
IZ
3279 }
3280 /* Couldn't or didn't -- move forward. */
3280af22 3281 PL_reginput = locinput;
c277df42
IZ
3282 if (regrepeat_hard(scan, 1, &l)) {
3283 ln++;
3280af22 3284 locinput = PL_reginput;
c277df42
IZ
3285 }
3286 else
3287 sayNO;
3288 }
a0ed51b3
LW
3289 }
3290 else {
c277df42 3291 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3292 /* if we matched something zero-length we don't need to
3293 backtrack, unless the minimum count is zero and we
3294 are capturing the result - in that case the capture
3295 being defined or not may affect later execution
3296 */
3297 if (n != 0 && l == 0 && !(paren && ln == 0))
3298 ln = n; /* don't backtrack */
3280af22 3299 locinput = PL_reginput;
c277df42 3300 DEBUG_r(
5c0ca799 3301 PerlIO_printf(Perl_debug_log,
faccc32b 3302 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3303 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3304 (IV) n, (IV)l)
c277df42
IZ
3305 );
3306 if (n >= ln) {
cca55fe3 3307 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3308 regnode *text_node = next;
3309
cca55fe3 3310 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3311
cca55fe3 3312 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3313 else {
cca55fe3
JP
3314 if (PL_regkind[(U8)OP(text_node)] == REF) {
3315 I32 n, ln;
3316 n = ARG(text_node); /* which paren pair */
3317 ln = PL_regstartp[n];
3318 /* assume yes if we haven't seen CLOSEn */
3319 if (
3320 *PL_reglastparen < n ||
3321 ln == -1 ||
3322 ln == PL_regendp[n]
3323 ) {
3324 c1 = c2 = -1000;
3325 goto assume_ok_REG;
3326 }
3327 c1 = *(PL_bostr + ln);
3328 }
3329 else { c1 = (U8)*STRING(text_node); }
3330
af5decee 3331 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3332 c2 = PL_fold[c1];
af5decee 3333 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3334 c2 = PL_fold_locale[c1];
3335 else
3336 c2 = c1;
3337 }
a0ed51b3
LW
3338 }
3339 else
c277df42
IZ
3340 c1 = c2 = -1000;
3341 }
cca55fe3 3342 assume_ok_REG:
02db2b7b 3343 REGCP_SET(lastcp);
c277df42
IZ
3344 while (n >= ln) {
3345 /* If it could work, try it. */
3346 if (c1 == -1000 ||
3280af22
NIS
3347 UCHARAT(PL_reginput) == c1 ||
3348 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3349 {
3350 DEBUG_r(
c3464db5 3351 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3352 "%*s trying tail with n=%"IVdf"...\n",
3353 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3354 );
3355 if (paren) {
3356 if (n) {
cf93c79d
IZ
3357 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3358 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3359 }
a0ed51b3 3360 else
cf93c79d 3361 PL_regendp[paren] = -1;
c277df42 3362 }
a0ed51b3
LW
3363 if (regmatch(next))
3364 sayYES;
02db2b7b 3365 REGCP_UNWIND(lastcp);
a0ed51b3 3366 }
c277df42
IZ
3367 /* Couldn't or didn't -- back up. */
3368 n--;
dfe13c55 3369 locinput = HOPc(locinput, -l);
3280af22 3370 PL_reginput = locinput;
c277df42
IZ
3371 }
3372 }
3373 sayNO;
3374 break;
3375 }
3376 case CURLYN:
3377 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3378 if (paren > PL_regsize)
3379 PL_regsize = paren;
3380 if (paren > *PL_reglastparen)
3381 *PL_reglastparen = paren;
c277df42
IZ
3382 ln = ARG1(scan); /* min to match */
3383 n = ARG2(scan); /* max to match */
dc45a647 3384 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3385 goto repeat;
a0d0e21e 3386 case CURLY:
c277df42 3387 paren = 0;
a0d0e21e
LW
3388 ln = ARG1(scan); /* min to match */
3389 n = ARG2(scan); /* max to match */
dc45a647 3390 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3391 goto repeat;
3392 case STAR:
3393 ln = 0;
c277df42 3394 n = REG_INFTY;
a0d0e21e 3395 scan = NEXTOPER(scan);
c277df42 3396 paren = 0;
a0d0e21e
LW
3397 goto repeat;
3398 case PLUS:
c277df42
IZ
3399 ln = 1;
3400 n = REG_INFTY;
3401 scan = NEXTOPER(scan);
3402 paren = 0;
3403 repeat:
a0d0e21e
LW
3404 /*
3405 * Lookahead to avoid useless match attempts
3406 * when we know what character comes next.
3407 */
5f80c4cf
JP
3408
3409 /*
3410 * Used to only do .*x and .*?x, but now it allows
3411 * for )'s, ('s and (?{ ... })'s to be in the way
3412 * of the quantifier and the EXACT-like node. -- japhy
3413 */
3414
cca55fe3 3415 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3416 U8 *s;
3417 regnode *text_node = next;
3418
cca55fe3 3419 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3420
cca55fe3 3421 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3422 else {
cca55fe3
JP
3423 if (PL_regkind[(U8)OP(text_node)] == REF) {
3424 I32 n, ln;
3425 n = ARG(text_node); /* which paren pair */
3426 ln = PL_regstartp[n];
3427 /* assume yes if we haven't seen CLOSEn */
3428 if (
3429 *PL_reglastparen < n ||
3430 ln == -1 ||
3431 ln == PL_regendp[n]
3432 ) {
3433 c1 = c2 = -1000;
3434 goto assume_ok_easy;
3435 }
9246c65e 3436 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3437 }
3438 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3439
3440 if (!UTF) {
3441 c2 = c1 = *s;
f65d3ee7 3442 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3443 c2 = PL_fold[c1];
f65d3ee7 3444 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3445 c2 = PL_fold_locale[c1];
1aa99e6b 3446 }
5f80c4cf 3447 else { /* UTF */
f65d3ee7 3448 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3449 STRLEN ulen1, ulen2;
e7ae6809
JH
3450 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3451 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3452
3453 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3454 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3455
3456 c1 = utf8_to_uvuni(tmpbuf1, 0);
3457 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3458 }
3459 else {
3460 c2 = c1 = utf8_to_uvchr(s, NULL);
3461 }
1aa99e6b
IH
3462 }
3463 }
bbce6d69 3464 }
a0d0e21e 3465 else
bbce6d69 3466 c1 = c2 = -1000;
cca55fe3 3467 assume_ok_easy:
3280af22 3468 PL_reginput = locinput;
a0d0e21e 3469 if (minmod) {
c277df42 3470 CHECKPOINT lastcp;
a0d0e21e
LW
3471 minmod = 0;
3472 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3473 sayNO;
a0ed51b3 3474 locinput = PL_reginput;
02db2b7b 3475 REGCP_SET(lastcp);
0fe9bf95 3476 if (c1 != -1000) {
1aa99e6b 3477 char *e; /* Should not check after this */
0fe9bf95
IZ
3478 char *old = locinput;
3479
1aa99e6b 3480 if (n == REG_INFTY) {
0fe9bf95 3481 e = PL_regeol - 1;
1aa99e6b
IH
3482 if (do_utf8)
3483 while (UTF8_IS_CONTINUATION(*(U8*)e))
3484 e--;
3485 }
3486 else if (do_utf8) {
3487 int m = n - ln;
3488 for (e = locinput;
3489 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3490 e += UTF8SKIP(e);
3491 }
3492 else {
3493 e = locinput + n - ln;
3494 if (e >= PL_regeol)
3495 e = PL_regeol - 1;
3496 }
0fe9bf95 3497 while (1) {
1aa99e6b 3498 int count;
0fe9bf95 3499 /* Find place 'next' could work */
1aa99e6b
IH
3500 if (!do_utf8) {
3501 if (c1 == c2) {
a8e8ab15
JH
3502 while (locinput <= e &&
3503 UCHARAT(locinput) != c1)
1aa99e6b
IH
3504 locinput++;
3505 } else {
9041c2e3 3506 while (locinput <= e
a8e8ab15
JH
3507 && UCHARAT(locinput) != c1
3508 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3509 locinput++;
3510 }
3511 count = locinput - old;
3512 }
3513 else {
3514 STRLEN len;
3515 if (c1 == c2) {
3516 for (count = 0;
3517 locinput <= e &&
9041c2e3 3518 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3519 count++)
3520 locinput += len;
3521
3522 } else {
3523 for (count = 0; locinput <= e; count++) {
9041c2e3 3524 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3525 if (c == c1 || c == c2)
3526 break;
9041c2e3 3527 locinput += len;
1aa99e6b
IH
3528 }
3529 }
0fe9bf95 3530 }
9041c2e3 3531 if (locinput > e)
0fe9bf95
IZ
3532 sayNO;
3533 /* PL_reginput == old now */
3534 if (locinput != old) {
3535 ln = 1; /* Did some */
1aa99e6b 3536 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3537 sayNO;
3538 }
3539 /* PL_reginput == locinput now */
29d1e993 3540 TRYPAREN(paren, ln, locinput);
0fe9bf95 3541 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3542 REGCP_UNWIND(lastcp);
0fe9bf95 3543 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3544 old = locinput;
3545 if (do_utf8)
3546 locinput += UTF8SKIP(locinput);
3547 else
3548 locinput++;
0fe9bf95
IZ
3549 }
3550 }
3551 else
c277df42 3552 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3553 UV c;
3554 if (c1 != -1000) {
3555 if (do_utf8)
9041c2e3 3556 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3557 else
9041c2e3 3558 c = UCHARAT(PL_reginput);
2390ecbc
PP
3559 /* If it could work, try it. */