This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unicode lookbehind looked bad.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded 403 char *s = PL_reg_match_utf8 ?
c728cb41
JH
404 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
405 strpos;
b8d68ded
JH
406 int len = PL_reg_match_utf8 ?
407 strlen(s) : strend - strpos;
2a782b5b
JH
408 if (!PL_colorset)
409 reginitcolors();
b8d68ded
JH
410 if (PL_reg_match_utf8)
411 DEBUG_r(PerlIO_printf(Perl_debug_log,
412 "UTF-8 target...\n"));
2a782b5b
JH
413 PerlIO_printf(Perl_debug_log,
414 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
415 PL_colors[4],PL_colors[5],PL_colors[0],
416 prog->precomp,
417 PL_colors[1],
418 (strlen(prog->precomp) > 60 ? "..." : ""),
419 PL_colors[0],
420 (int)(len > 60 ? 60 : len),
421 s, PL_colors[1],
422 (len > 60 ? "..." : "")
423 );
424 });
cad2e5aa 425
1aa99e6b 426 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
427 DEBUG_r(PerlIO_printf(Perl_debug_log,
428 "String too short... [re_intuit_start]\n"));
cad2e5aa 429 goto fail;
2c2d71f5 430 }
a1933d95 431 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 432 PL_regeol = strend;
653099ff 433 check = prog->check_substr;
2c2d71f5 434 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
435 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
436 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 437 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 438
7e25d62c
JH
439 if (!ml_anch) {
440 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
441 | ROPT_IMPLICIT)) /* not a real BOL */
442 /* SvCUR is not set on references: SvRV and SvPVX overlap */
443 && sv && !SvROK(sv)
444 && (strpos != strbeg)) {
445 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
446 goto fail;
447 }
448 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 449 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 450 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
451 I32 slen;
452
1aa99e6b 453 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
454 if (SvTAIL(check)) {
455 slen = SvCUR(check); /* >= 1 */
cad2e5aa 456
9041c2e3 457 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
458 || (strend - s == slen && strend[-1] != '\n')) {
459 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
460 goto fail_finish;
cad2e5aa
JH
461 }
462 /* Now should match s[0..slen-2] */
463 slen--;
653099ff 464 if (slen && (*SvPVX(check) != *s
cad2e5aa 465 || (slen > 1
653099ff 466 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
467 report_neq:
468 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
469 goto fail_finish;
470 }
cad2e5aa 471 }
653099ff
GS
472 else if (*SvPVX(check) != *s
473 || ((slen = SvCUR(check)) > 1
474 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
475 goto report_neq;
476 goto success_at_start;
7e25d62c 477 }
cad2e5aa 478 }
2c2d71f5 479 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 480 s = strpos;
2c2d71f5 481 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 482 end_shift = prog->minlen - start_shift -
653099ff 483 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 484 if (!ml_anch) {
653099ff
GS
485 I32 end = prog->check_offset_max + CHR_SVLEN(check)
486 - (SvTAIL(check) != 0);
1aa99e6b 487 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
488
489 if (end_shift < eshift)
490 end_shift = eshift;
491 }
cad2e5aa 492 }
2c2d71f5 493 else { /* Can match at random position */
cad2e5aa
JH
494 ml_anch = 0;
495 s = strpos;
2c2d71f5
JH
496 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
497 /* Should be nonnegative! */
498 end_shift = prog->minlen - start_shift -
653099ff 499 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
500 }
501
2c2d71f5 502#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 503 if (end_shift < 0)
6bbae5e6 504 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
505#endif
506
2c2d71f5
JH
507 restart:
508 /* Find a possible match in the region s..strend by looking for
509 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 510 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
511 I32 p = -1; /* Internal iterator of scream. */
512 I32 *pp = data ? data->scream_pos : &p;
513
2c2d71f5
JH
514 if (PL_screamfirst[BmRARE(check)] >= 0
515 || ( BmRARE(check) == '\n'
516 && (BmPREVIOUS(check) == SvCUR(check) - 1)
517 && SvTAIL(check) ))
9041c2e3 518 s = screaminstr(sv, check,
2c2d71f5 519 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 520 else
2c2d71f5 521 goto fail_finish;
cad2e5aa
JH
522 if (data)
523 *data->scream_olds = s;
524 }
f33976b4 525 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
526 s = fbm_instr((U8*)(s + start_shift),
527 (U8*)(strend - end_shift),
528 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 529 else
1aa99e6b
IH
530 s = fbm_instr(HOP3(s, start_shift, strend),
531 HOP3(strend, -end_shift, strbeg),
2c2d71f5 532 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
533
534 /* Update the count-of-usability, remove useless subpatterns,
535 unshift s. */
2c2d71f5
JH
536
537 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
538 (s ? "Found" : "Did not find"),
539 ((check == prog->anchored_substr) ? "anchored" : "floating"),
540 PL_colors[0],
7b0972df
JH
541 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
542 SvPVX(check),
2c2d71f5
JH
543 PL_colors[1], (SvTAIL(check) ? "$" : ""),
544 (s ? " at offset " : "...\n") ) );
545
546 if (!s)
547 goto fail_finish;
548
6eb5f6b9
JH
549 check_at = s;
550
2c2d71f5 551 /* Finish the diagnostic message */
30944b6d 552 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
553
554 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
555 Start with the other substr.
556 XXXX no SCREAM optimization yet - and a very coarse implementation
557 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
558 *always* match. Probably should be marked during compile...
559 Probably it is right to do no SCREAM here...
560 */
561
562 if (prog->float_substr && prog->anchored_substr) {
30944b6d 563 /* Take into account the "other" substring. */
2c2d71f5
JH
564 /* XXXX May be hopelessly wrong for UTF... */
565 if (!other_last)
6eb5f6b9 566 other_last = strpos;
2c2d71f5 567 if (check == prog->float_substr) {
30944b6d
IZ
568 do_other_anchored:
569 {
1aa99e6b 570 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
571 char *s1 = s;
572
2c2d71f5
JH
573 t = s - prog->check_offset_max;
574 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
575 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 576 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 577 && t > strpos)))
30944b6d 578 /* EMPTY */;
2c2d71f5
JH
579 else
580 t = strpos;
1aa99e6b 581 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
582 if (t < other_last) /* These positions already checked */
583 t = other_last;
1aa99e6b 584 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
585 if (last < last1)
586 last1 = last;
587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
588 /* On end-of-str: see comment below. */
589 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
590 HOP3(HOP3(last1, prog->anchored_offset, strend)
591 + SvCUR(prog->anchored_substr),
592 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
593 prog->anchored_substr,
594 PL_multiline ? FBMrf_MULTILINE : 0);
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
597 (s ? "Found" : "Contradicts"),
598 PL_colors[0],
7b0972df
JH
599 (int)(SvCUR(prog->anchored_substr)
600 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
601 SvPVX(prog->anchored_substr),
602 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
603 if (!s) {
604 if (last1 >= last2) {
605 DEBUG_r(PerlIO_printf(Perl_debug_log,
606 ", giving up...\n"));
607 goto fail_finish;
608 }
609 DEBUG_r(PerlIO_printf(Perl_debug_log,
610 ", trying floating at offset %ld...\n",
1aa99e6b
IH
611 (long)(HOP3c(s1, 1, strend) - i_strpos)));
612 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
613 s = HOP3c(last, 1, strend);
2c2d71f5
JH
614 goto restart;
615 }
616 else {
617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 618 (long)(s - i_strpos)));
1aa99e6b
IH
619 t = HOP3c(s, -prog->anchored_offset, strbeg);
620 other_last = HOP3c(s, 1, strend);
30944b6d 621 s = s1;
2c2d71f5
JH
622 if (t == strpos)
623 goto try_at_start;
2c2d71f5
JH
624 goto try_at_offset;
625 }
30944b6d 626 }
2c2d71f5
JH
627 }
628 else { /* Take into account the floating substring. */
629 char *last, *last1;
630 char *s1 = s;
631
1aa99e6b
IH
632 t = HOP3c(s, -start_shift, strbeg);
633 last1 = last =
634 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
635 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
636 last = HOP3c(t, prog->float_max_offset, strend);
637 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
638 if (s < other_last)
639 s = other_last;
2c2d71f5
JH
640 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
641 /* fbm_instr() takes into account exact value of end-of-str
642 if the check is SvTAIL(ed). Since false positives are OK,
643 and end-of-str is not later than strend we are OK. */
644 s = fbm_instr((unsigned char*)s,
645 (unsigned char*)last + SvCUR(prog->float_substr)
646 - (SvTAIL(prog->float_substr)!=0),
647 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
648 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
649 (s ? "Found" : "Contradicts"),
650 PL_colors[0],
7b0972df
JH
651 (int)(SvCUR(prog->float_substr)
652 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
653 SvPVX(prog->float_substr),
654 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
655 if (!s) {
656 if (last1 == last) {
657 DEBUG_r(PerlIO_printf(Perl_debug_log,
658 ", giving up...\n"));
659 goto fail_finish;
660 }
661 DEBUG_r(PerlIO_printf(Perl_debug_log,
662 ", trying anchored starting at offset %ld...\n",
30944b6d 663 (long)(s1 + 1 - i_strpos)));
803ff556 664 other_last = last;
1aa99e6b 665 s = HOP3c(t, 1, strend);
2c2d71f5
JH
666 goto restart;
667 }
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 670 (long)(s - i_strpos)));
803ff556 671 other_last = s; /* Fix this later. --Hugo */
30944b6d 672 s = s1;
2c2d71f5
JH
673 if (t == strpos)
674 goto try_at_start;
2c2d71f5
JH
675 goto try_at_offset;
676 }
677 }
cad2e5aa 678 }
2c2d71f5
JH
679
680 t = s - prog->check_offset_max;
2c2d71f5
JH
681 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
682 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
683 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
684 && t > strpos))) {
2c2d71f5
JH
685 /* Fixed substring is found far enough so that the match
686 cannot start at strpos. */
687 try_at_offset:
cad2e5aa 688 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
689 /* Eventually fbm_*() should handle this, but often
690 anchored_offset is not 0, so this check will not be wasted. */
691 /* XXXX In the code below we prefer to look for "^" even in
692 presence of anchored substrings. And we search even
693 beyond the found float position. These pessimizations
694 are historical artefacts only. */
695 find_anchor:
2c2d71f5 696 while (t < strend - prog->minlen) {
cad2e5aa 697 if (*t == '\n') {
4ee3650e 698 if (t < check_at - prog->check_offset_min) {
30944b6d 699 if (prog->anchored_substr) {
4ee3650e
GS
700 /* Since we moved from the found position,
701 we definitely contradict the found anchored
30944b6d
IZ
702 substr. Due to the above check we do not
703 contradict "check" substr.
704 Thus we can arrive here only if check substr
705 is float. Redo checking for "other"=="fixed".
706 */
9041c2e3 707 strpos = t + 1;
30944b6d
IZ
708 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
709 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
710 goto do_other_anchored;
711 }
4ee3650e
GS
712 /* We don't contradict the found floating substring. */
713 /* XXXX Why not check for STCLASS? */
cad2e5aa 714 s = t + 1;
2c2d71f5 715 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 716 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
717 goto set_useful;
718 }
4ee3650e
GS
719 /* Position contradicts check-string */
720 /* XXXX probably better to look for check-string
721 than for "\n", so one should lower the limit for t? */
722 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 723 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 724 other_last = strpos = s = t + 1;
cad2e5aa
JH
725 goto restart;
726 }
727 t++;
728 }
2c2d71f5
JH
729 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
730 PL_colors[0],PL_colors[1]));
731 goto fail_finish;
cad2e5aa 732 }
f5952150
GS
733 else {
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 }
cad2e5aa
JH
737 s = t;
738 set_useful:
2c2d71f5 739 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
740 }
741 else {
f5952150 742 /* The found string does not prohibit matching at strpos,
2c2d71f5 743 - no optimization of calling REx engine can be performed,
f5952150
GS
744 unless it was an MBOL and we are not after MBOL,
745 or a future STCLASS check will fail this. */
2c2d71f5
JH
746 try_at_start:
747 /* Even in this situation we may use MBOL flag if strpos is offset
748 wrt the start of the string. */
05b4157f 749 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 750 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
751 /* May be due to an implicit anchor of m{.*foo} */
752 && !(prog->reganch & ROPT_IMPLICIT))
753 {
cad2e5aa
JH
754 t = strpos;
755 goto find_anchor;
756 }
30944b6d 757 DEBUG_r( if (ml_anch)
f5952150
GS
758 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
759 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 760 );
2c2d71f5 761 success_at_start:
30944b6d 762 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 763 && prog->check_substr /* Could be deleted already */
cad2e5aa 764 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
765 && prog->check_substr == prog->float_substr)
766 {
cad2e5aa 767 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 768 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
769 SvREFCNT_dec(prog->check_substr);
770 prog->check_substr = Nullsv; /* disable */
771 prog->float_substr = Nullsv; /* clear */
5e39e1e5 772 check = Nullsv; /* abort */
cad2e5aa 773 s = strpos;
3cf5c195
IZ
774 /* XXXX This is a remnant of the old implementation. It
775 looks wasteful, since now INTUIT can use many
6eb5f6b9 776 other heuristics. */
cad2e5aa
JH
777 prog->reganch &= ~RE_USE_INTUIT;
778 }
779 else
780 s = strpos;
781 }
782
6eb5f6b9
JH
783 /* Last resort... */
784 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
785 if (prog->regstclass) {
786 /* minlen == 0 is possible if regstclass is \b or \B,
787 and the fixed substr is ''$.
788 Since minlen is already taken into account, s+1 is before strend;
789 accidentally, minlen >= 1 guaranties no false positives at s + 1
790 even for \b or \B. But (minlen? 1 : 0) below assumes that
791 regstclass does not come from lookahead... */
792 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
793 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 794 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 795 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 796 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 797 : 1);
6eb5f6b9 798 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
799 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
800 : (prog->float_substr
801 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
802 cl_l, strend)
803 : strend);
a1933d95 804 char *startpos = strbeg;
6eb5f6b9
JH
805
806 t = s;
76384e4a 807 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 808 PL_regdata = prog->data;
76384e4a
GS
809 PL_bostr = startpos;
810 }
f33976b4 811 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
812 if (!s) {
813#ifdef DEBUGGING
b7953727 814 char *what = 0;
6eb5f6b9
JH
815#endif
816 if (endpos == strend) {
817 DEBUG_r( PerlIO_printf(Perl_debug_log,
818 "Could not match STCLASS...\n") );
819 goto fail;
820 }
66e933ab
GS
821 DEBUG_r( PerlIO_printf(Perl_debug_log,
822 "This position contradicts STCLASS...\n") );
653099ff
GS
823 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
824 goto fail;
6eb5f6b9
JH
825 /* Contradict one of substrings */
826 if (prog->anchored_substr) {
6eb5f6b9
JH
827 if (prog->anchored_substr == check) {
828 DEBUG_r( what = "anchored" );
829 hop_and_restart:
1aa99e6b 830 s = HOP3c(t, 1, strend);
66e933ab
GS
831 if (s + start_shift + end_shift > strend) {
832 /* XXXX Should be taken into account earlier? */
833 DEBUG_r( PerlIO_printf(Perl_debug_log,
834 "Could not match STCLASS...\n") );
835 goto fail;
836 }
5e39e1e5
HS
837 if (!check)
838 goto giveup;
6eb5f6b9 839 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 840 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
841 what, (long)(s + start_shift - i_strpos)) );
842 goto restart;
843 }
66e933ab 844 /* Have both, check_string is floating */
6eb5f6b9
JH
845 if (t + start_shift >= check_at) /* Contradicts floating=check */
846 goto retry_floating_check;
847 /* Recheck anchored substring, but not floating... */
9041c2e3 848 s = check_at;
5e39e1e5
HS
849 if (!check)
850 goto giveup;
6eb5f6b9 851 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 852 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
853 (long)(other_last - i_strpos)) );
854 goto do_other_anchored;
855 }
60e71179
GS
856 /* Another way we could have checked stclass at the
857 current position only: */
858 if (ml_anch) {
859 s = t = t + 1;
5e39e1e5
HS
860 if (!check)
861 goto giveup;
60e71179 862 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
863 "Looking for /%s^%s/m starting at offset %ld...\n",
864 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 865 goto try_at_offset;
66e933ab 866 }
60e71179
GS
867 if (!prog->float_substr) /* Could have been deleted */
868 goto fail;
6eb5f6b9
JH
869 /* Check is floating subtring. */
870 retry_floating_check:
871 t = check_at - start_shift;
872 DEBUG_r( what = "floating" );
873 goto hop_and_restart;
874 }
b7953727
JH
875 if (t != s) {
876 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 877 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
878 (long)(t - i_strpos), (long)(s - i_strpos))
879 );
880 }
881 else {
882 DEBUG_r(PerlIO_printf(Perl_debug_log,
883 "Does not contradict STCLASS...\n");
884 );
885 }
6eb5f6b9 886 }
5e39e1e5
HS
887 giveup:
888 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
889 PL_colors[4], (check ? "Guessed" : "Giving up"),
890 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 891 return s;
2c2d71f5
JH
892
893 fail_finish: /* Substring not found */
66e933ab
GS
894 if (prog->check_substr) /* could be removed already */
895 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 896 fail:
2c2d71f5 897 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
898 PL_colors[4],PL_colors[5]));
899 return Nullch;
900}
9661b544 901
6eb5f6b9 902/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
903STATIC char *
904S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 905{
6eb5f6b9
JH
906 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
907 char *m;
d8093b23 908 STRLEN ln;
909 unsigned int c1;
910 unsigned int c2;
6eb5f6b9
JH
911 char *e;
912 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 913 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 914
6eb5f6b9
JH
915 /* We know what class it must start with. */
916 switch (OP(c)) {
6eb5f6b9 917 case ANYOF:
a0ed51b3 918 while (s < strend) {
ffc61ed2 919 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
920 if (tmp && (norun || regtry(prog, s)))
921 goto got_it;
922 else
923 tmp = doevery;
a0ed51b3 924 }
6eb5f6b9
JH
925 else
926 tmp = 1;
ffc61ed2 927 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 928 }
6eb5f6b9 929 break;
f33976b4
DB
930 case CANY:
931 while (s < strend) {
932 if (tmp && (norun || regtry(prog, s)))
933 goto got_it;
934 else
935 tmp = doevery;
936 s++;
937 }
938 break;
6eb5f6b9
JH
939 case EXACTF:
940 m = STRING(c);
941 ln = STR_LEN(c);
1aa99e6b 942 if (UTF) {
a2a2844f 943 STRLEN ulen1, ulen2;
e7ae6809
JH
944 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
945 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
946
947 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
948 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
949
950 c1 = utf8_to_uvuni(tmpbuf1, 0);
951 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
952 }
953 else {
954 c1 = *(U8*)m;
955 c2 = PL_fold[c1];
956 }
6eb5f6b9
JH
957 goto do_exactf;
958 case EXACTFL:
959 m = STRING(c);
960 ln = STR_LEN(c);
d8093b23 961 c1 = *(U8*)m;
6eb5f6b9
JH
962 c2 = PL_fold_locale[c1];
963 do_exactf:
93c50829 964 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 965
6eb5f6b9
JH
966 if (norun && e < s)
967 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 968
60a8b682
JH
969 /* The idea in the EXACTF* cases is to first find the
970 * first character of the EXACTF* node and then, if
971 * necessary, case-insensitively compare the full
972 * text of the node. The c1 and c2 are the first
973 * characters (though in Unicode it gets a bit
974 * more complicated because there are more cases
975 * than just upper and lower: one is really supposed
976 * to use the so-called folding case for case-insensitive
977 * matching (called "loose matching" in Unicode). */
978
1aa99e6b 979 if (do_utf8) {
575cac57
JH
980 UV c, f;
981 U8 tmpbuf [UTF8_MAXLEN+1];
982 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
983 STRLEN len, foldlen;
d7f013c8 984
09091399 985 if (c1 == c2) {
1aa99e6b 986 while (s <= e) {
575cac57 987 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
988 if ( c == c1
989 && (ln == len ||
66423254
JH
990 ibcmp_utf8(s, (char **)0, 0, do_utf8,
991 m, (char **)0, ln, UTF))
55da9344 992 && (norun || regtry(prog, s)) )
1aa99e6b 993 goto got_it;
80aecb99
JH
994 else {
995 uvchr_to_utf8(tmpbuf, c);
ac7e0132 996 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
997 if ( f != c
998 && (f == c1 || f == c2)
999 && (ln == foldlen ||
66423254
JH
1000 !ibcmp_utf8((char *) foldbuf,
1001 (char **)0, foldlen, do_utf8,
d07ddd77 1002 m,
66423254 1003 (char **)0, ln, UTF))
80aecb99
JH
1004 && (norun || regtry(prog, s)) )
1005 goto got_it;
1006 }
1aa99e6b
IH
1007 s += len;
1008 }
09091399
JH
1009 }
1010 else {
1aa99e6b 1011 while (s <= e) {
575cac57 1012 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1013
60a8b682
JH
1014 /* Handle some of the three Greek sigmas cases.
1015 * Note that not all the possible combinations
1016 * are handled here: some of them are handled
1017 * handled by the standard folding rules, and
1018 * some of them (the character class or ANYOF
1019 * cases) are handled during compiletime in
1020 * regexec.c:S_regclass(). */
880bd946
JH
1021 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1022 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1023 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1024
1025 if ( (c == c1 || c == c2)
1026 && (ln == len ||
66423254
JH
1027 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1028 m, (char **)0, ln, UTF))
55da9344 1029 && (norun || regtry(prog, s)) )
1aa99e6b 1030 goto got_it;
80aecb99
JH
1031 else {
1032 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1033 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1034 if ( f != c
1035 && (f == c1 || f == c2)
1036 && (ln == foldlen ||
1037 !ibcmp_utf8((char *)foldbuf,
66423254 1038 (char **)0, foldlen, do_utf8,
d07ddd77 1039 m,
66423254 1040 (char **)0, ln, UTF))
80aecb99
JH
1041 && (norun || regtry(prog, s)) )
1042 goto got_it;
1043 }
1aa99e6b
IH
1044 s += len;
1045 }
09091399 1046 }
1aa99e6b
IH
1047 }
1048 else {
1049 if (c1 == c2)
1050 while (s <= e) {
1051 if ( *(U8*)s == c1
1052 && (ln == 1 || !(OP(c) == EXACTF
1053 ? ibcmp(s, m, ln)
1054 : ibcmp_locale(s, m, ln)))
1055 && (norun || regtry(prog, s)) )
1056 goto got_it;
1057 s++;
1058 }
1059 else
1060 while (s <= e) {
1061 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1062 && (ln == 1 || !(OP(c) == EXACTF
1063 ? ibcmp(s, m, ln)
1064 : ibcmp_locale(s, m, ln)))
1065 && (norun || regtry(prog, s)) )
1066 goto got_it;
1067 s++;
1068 }
b3c9acc1
IZ
1069 }
1070 break;
bbce6d69 1071 case BOUNDL:
3280af22 1072 PL_reg_flags |= RF_tainted;
bbce6d69 1073 /* FALL THROUGH */
a0d0e21e 1074 case BOUND:
ffc61ed2 1075 if (do_utf8) {
12d33761 1076 if (s == PL_bostr)
ffc61ed2
JH
1077 tmp = '\n';
1078 else {
1aa99e6b 1079 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1080
0064a8a9
JH
1081 if (s > (char*)r)
1082 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1083 }
1084 tmp = ((OP(c) == BOUND ?
9041c2e3 1085 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1086 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1087 while (s < strend) {
1088 if (tmp == !(OP(c) == BOUND ?
3568d838 1089 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1090 isALNUM_LC_utf8((U8*)s)))
1091 {
1092 tmp = !tmp;
1093 if ((norun || regtry(prog, s)))
1094 goto got_it;
1095 }
1096 s += UTF8SKIP(s);
a687059c 1097 }
a0d0e21e 1098 }
667bb95a 1099 else {
12d33761 1100 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1101 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1102 while (s < strend) {
1103 if (tmp ==
1104 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1105 tmp = !tmp;
1106 if ((norun || regtry(prog, s)))
1107 goto got_it;
1108 }
1109 s++;
a0ed51b3 1110 }
a0ed51b3 1111 }
6eb5f6b9 1112 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1113 goto got_it;
1114 break;
bbce6d69 1115 case NBOUNDL:
3280af22 1116 PL_reg_flags |= RF_tainted;
bbce6d69 1117 /* FALL THROUGH */
a0d0e21e 1118 case NBOUND:
ffc61ed2 1119 if (do_utf8) {
12d33761 1120 if (s == PL_bostr)
ffc61ed2
JH
1121 tmp = '\n';
1122 else {
1aa99e6b 1123 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1124
0064a8a9
JH
1125 if (s > (char*)r)
1126 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1127 }
1128 tmp = ((OP(c) == NBOUND ?
9041c2e3 1129 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1130 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1131 while (s < strend) {
1132 if (tmp == !(OP(c) == NBOUND ?
3568d838 1133 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1134 isALNUM_LC_utf8((U8*)s)))
1135 tmp = !tmp;
1136 else if ((norun || regtry(prog, s)))
1137 goto got_it;
1138 s += UTF8SKIP(s);
1139 }
a0d0e21e 1140 }
667bb95a 1141 else {
12d33761 1142 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1143 tmp = ((OP(c) == NBOUND ?
1144 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1145 while (s < strend) {
1146 if (tmp ==
1147 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1148 tmp = !tmp;
1149 else if ((norun || regtry(prog, s)))
1150 goto got_it;
1151 s++;
1152 }
a0ed51b3 1153 }
6eb5f6b9 1154 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1155 goto got_it;
1156 break;
a0d0e21e 1157 case ALNUM:
ffc61ed2 1158 if (do_utf8) {
8269fa76 1159 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1160 while (s < strend) {
3568d838 1161 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1162 if (tmp && (norun || regtry(prog, s)))
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
bbce6d69 1167 else
ffc61ed2
JH
1168 tmp = 1;
1169 s += UTF8SKIP(s);
bbce6d69 1170 }
bbce6d69 1171 }
ffc61ed2
JH
1172 else {
1173 while (s < strend) {
1174 if (isALNUM(*s)) {
1175 if (tmp && (norun || regtry(prog, s)))
1176 goto got_it;
1177 else
1178 tmp = doevery;
1179 }
a0ed51b3 1180 else
ffc61ed2
JH
1181 tmp = 1;
1182 s++;
a0ed51b3 1183 }
a0ed51b3
LW
1184 }
1185 break;
bbce6d69 1186 case ALNUML:
3280af22 1187 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1188 if (do_utf8) {
1189 while (s < strend) {
1190 if (isALNUM_LC_utf8((U8*)s)) {
1191 if (tmp && (norun || regtry(prog, s)))
1192 goto got_it;
1193 else
1194 tmp = doevery;
1195 }
a687059c 1196 else
ffc61ed2
JH
1197 tmp = 1;
1198 s += UTF8SKIP(s);
a0d0e21e 1199 }
a0d0e21e 1200 }
ffc61ed2
JH
1201 else {
1202 while (s < strend) {
1203 if (isALNUM_LC(*s)) {
1204 if (tmp && (norun || regtry(prog, s)))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
a0ed51b3 1209 else
ffc61ed2
JH
1210 tmp = 1;
1211 s++;
a0ed51b3 1212 }
a0ed51b3
LW
1213 }
1214 break;
a0d0e21e 1215 case NALNUM:
ffc61ed2 1216 if (do_utf8) {
8269fa76 1217 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1218 while (s < strend) {
3568d838 1219 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1220 if (tmp && (norun || regtry(prog, s)))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
bbce6d69 1225 else
ffc61ed2
JH
1226 tmp = 1;
1227 s += UTF8SKIP(s);
bbce6d69 1228 }
bbce6d69 1229 }
ffc61ed2
JH
1230 else {
1231 while (s < strend) {
1232 if (!isALNUM(*s)) {
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
a0ed51b3 1238 else
ffc61ed2
JH
1239 tmp = 1;
1240 s++;
a0ed51b3 1241 }
a0ed51b3
LW
1242 }
1243 break;
bbce6d69 1244 case NALNUML:
3280af22 1245 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1246 if (do_utf8) {
1247 while (s < strend) {
1248 if (!isALNUM_LC_utf8((U8*)s)) {
1249 if (tmp && (norun || regtry(prog, s)))
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
a687059c 1254 else
ffc61ed2
JH
1255 tmp = 1;
1256 s += UTF8SKIP(s);
a687059c 1257 }
a0d0e21e 1258 }
ffc61ed2
JH
1259 else {
1260 while (s < strend) {
1261 if (!isALNUM_LC(*s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a0ed51b3 1267 else
ffc61ed2
JH
1268 tmp = 1;
1269 s++;
a0ed51b3 1270 }
a0ed51b3
LW
1271 }
1272 break;
a0d0e21e 1273 case SPACE:
ffc61ed2 1274 if (do_utf8) {
8269fa76 1275 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1276 while (s < strend) {
3568d838 1277 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1278 if (tmp && (norun || regtry(prog, s)))
1279 goto got_it;
1280 else
1281 tmp = doevery;
1282 }
a0d0e21e 1283 else
ffc61ed2
JH
1284 tmp = 1;
1285 s += UTF8SKIP(s);
2304df62 1286 }
a0d0e21e 1287 }
ffc61ed2
JH
1288 else {
1289 while (s < strend) {
1290 if (isSPACE(*s)) {
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
a0ed51b3 1296 else
ffc61ed2
JH
1297 tmp = 1;
1298 s++;
a0ed51b3 1299 }
a0ed51b3
LW
1300 }
1301 break;
bbce6d69 1302 case SPACEL:
3280af22 1303 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1304 if (do_utf8) {
1305 while (s < strend) {
1306 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1307 if (tmp && (norun || regtry(prog, s)))
1308 goto got_it;
1309 else
1310 tmp = doevery;
1311 }
bbce6d69 1312 else
ffc61ed2
JH
1313 tmp = 1;
1314 s += UTF8SKIP(s);
bbce6d69 1315 }
bbce6d69 1316 }
ffc61ed2
JH
1317 else {
1318 while (s < strend) {
1319 if (isSPACE_LC(*s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a0ed51b3 1325 else
ffc61ed2
JH
1326 tmp = 1;
1327 s++;
a0ed51b3 1328 }
a0ed51b3
LW
1329 }
1330 break;
a0d0e21e 1331 case NSPACE:
ffc61ed2 1332 if (do_utf8) {
8269fa76 1333 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1334 while (s < strend) {
3568d838 1335 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1336 if (tmp && (norun || regtry(prog, s)))
1337 goto got_it;
1338 else
1339 tmp = doevery;
1340 }
a0d0e21e 1341 else
ffc61ed2
JH
1342 tmp = 1;
1343 s += UTF8SKIP(s);
a687059c 1344 }
a0d0e21e 1345 }
ffc61ed2
JH
1346 else {
1347 while (s < strend) {
1348 if (!isSPACE(*s)) {
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0ed51b3 1354 else
ffc61ed2
JH
1355 tmp = 1;
1356 s++;
a0ed51b3 1357 }
a0ed51b3
LW
1358 }
1359 break;
bbce6d69 1360 case NSPACEL:
3280af22 1361 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1362 if (do_utf8) {
1363 while (s < strend) {
1364 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1365 if (tmp && (norun || regtry(prog, s)))
1366 goto got_it;
1367 else
1368 tmp = doevery;
1369 }
bbce6d69 1370 else
ffc61ed2
JH
1371 tmp = 1;
1372 s += UTF8SKIP(s);
bbce6d69 1373 }
bbce6d69 1374 }
ffc61ed2
JH
1375 else {
1376 while (s < strend) {
1377 if (!isSPACE_LC(*s)) {
1378 if (tmp && (norun || regtry(prog, s)))
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
a0ed51b3 1383 else
ffc61ed2
JH
1384 tmp = 1;
1385 s++;
a0ed51b3 1386 }
a0ed51b3
LW
1387 }
1388 break;
a0d0e21e 1389 case DIGIT:
ffc61ed2 1390 if (do_utf8) {
8269fa76 1391 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1392 while (s < strend) {
3568d838 1393 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1394 if (tmp && (norun || regtry(prog, s)))
1395 goto got_it;
1396 else
1397 tmp = doevery;
1398 }
a0d0e21e 1399 else
ffc61ed2
JH
1400 tmp = 1;
1401 s += UTF8SKIP(s);
2b69d0c2 1402 }
a0d0e21e 1403 }
ffc61ed2
JH
1404 else {
1405 while (s < strend) {
1406 if (isDIGIT(*s)) {
1407 if (tmp && (norun || regtry(prog, s)))
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0ed51b3 1412 else
ffc61ed2
JH
1413 tmp = 1;
1414 s++;
a0ed51b3 1415 }
a0ed51b3
LW
1416 }
1417 break;
b8c5462f
JH
1418 case DIGITL:
1419 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1420 if (do_utf8) {
1421 while (s < strend) {
1422 if (isDIGIT_LC_utf8((U8*)s)) {
1423 if (tmp && (norun || regtry(prog, s)))
1424 goto got_it;
1425 else
1426 tmp = doevery;
1427 }
b8c5462f 1428 else
ffc61ed2
JH
1429 tmp = 1;
1430 s += UTF8SKIP(s);
b8c5462f 1431 }
b8c5462f 1432 }
ffc61ed2
JH
1433 else {
1434 while (s < strend) {
1435 if (isDIGIT_LC(*s)) {
1436 if (tmp && (norun || regtry(prog, s)))
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
b8c5462f 1441 else
ffc61ed2
JH
1442 tmp = 1;
1443 s++;
b8c5462f 1444 }
b8c5462f
JH
1445 }
1446 break;
a0d0e21e 1447 case NDIGIT:
ffc61ed2 1448 if (do_utf8) {
8269fa76 1449 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1450 while (s < strend) {
3568d838 1451 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1452 if (tmp && (norun || regtry(prog, s)))
1453 goto got_it;
1454 else
1455 tmp = doevery;
1456 }
a0d0e21e 1457 else
ffc61ed2
JH
1458 tmp = 1;
1459 s += UTF8SKIP(s);
a687059c 1460 }
a0d0e21e 1461 }
ffc61ed2
JH
1462 else {
1463 while (s < strend) {
1464 if (!isDIGIT(*s)) {
1465 if (tmp && (norun || regtry(prog, s)))
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0ed51b3 1470 else
ffc61ed2
JH
1471 tmp = 1;
1472 s++;
a0ed51b3 1473 }
a0ed51b3
LW
1474 }
1475 break;
b8c5462f
JH
1476 case NDIGITL:
1477 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1478 if (do_utf8) {
1479 while (s < strend) {
1480 if (!isDIGIT_LC_utf8((U8*)s)) {
1481 if (tmp && (norun || regtry(prog, s)))
1482 goto got_it;
1483 else
1484 tmp = doevery;
1485 }
b8c5462f 1486 else
ffc61ed2
JH
1487 tmp = 1;
1488 s += UTF8SKIP(s);
b8c5462f 1489 }
a0ed51b3 1490 }
ffc61ed2
JH
1491 else {
1492 while (s < strend) {
1493 if (!isDIGIT_LC(*s)) {
1494 if (tmp && (norun || regtry(prog, s)))
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
cf93c79d 1499 else
ffc61ed2
JH
1500 tmp = 1;
1501 s++;
b8c5462f 1502 }
b8c5462f
JH
1503 }
1504 break;
b3c9acc1 1505 default:
3c3eec57
GS
1506 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1507 break;
d6a28714 1508 }
6eb5f6b9
JH
1509 return 0;
1510 got_it:
1511 return s;
1512}
1513
1514/*
1515 - regexec_flags - match a regexp against a string
1516 */
1517I32
1518Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1519 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1520/* strend: pointer to null at end of string */
1521/* strbeg: real beginning of string */
1522/* minend: end of match must be >=minend after stringarg. */
1523/* data: May be used for some additional optimizations. */
1524/* nosave: For optimizations. */
1525{
6eb5f6b9
JH
1526 register char *s;
1527 register regnode *c;
1528 register char *startpos = stringarg;
6eb5f6b9
JH
1529 I32 minlen; /* must match at least this many chars */
1530 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1531 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1532 constant substr. */ /* CC */
1533 I32 end_shift = 0; /* Same for the end. */ /* CC */
1534 I32 scream_pos = -1; /* Internal iterator of scream. */
1535 char *scream_olds;
1536 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1537 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1538#ifdef DEBUGGING
9e55ce06
JH
1539 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1540 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1541#endif
6eb5f6b9
JH
1542
1543 PL_regcc = 0;
1544
1545 cache_re(prog);
1546#ifdef DEBUGGING
aea4f609 1547 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1548#endif
1549
1550 /* Be paranoid... */
1551 if (prog == NULL || startpos == NULL) {
1552 Perl_croak(aTHX_ "NULL regexp parameter");
1553 return 0;
1554 }
1555
1556 minlen = prog->minlen;
9e55ce06
JH
1557 if (strend - startpos < minlen &&
1558 !PL_reg_match_utf8 /* ANYOFs can balloon to EXACTFs */
1559 ) {
a72c7584
JH
1560 DEBUG_r(PerlIO_printf(Perl_debug_log,
1561 "String too short [regexec_flags]...\n"));
1562 goto phooey;
1aa99e6b 1563 }
6eb5f6b9 1564
6eb5f6b9
JH
1565 /* Check validity of program. */
1566 if (UCHARAT(prog->program) != REG_MAGIC) {
1567 Perl_croak(aTHX_ "corrupted regexp program");
1568 }
1569
1570 PL_reg_flags = 0;
1571 PL_reg_eval_set = 0;
1572 PL_reg_maxiter = 0;
1573
1574 if (prog->reganch & ROPT_UTF8)
1575 PL_reg_flags |= RF_utf8;
1576
1577 /* Mark beginning of line for ^ and lookbehind. */
1578 PL_regbol = startpos;
1579 PL_bostr = strbeg;
1580 PL_reg_sv = sv;
1581
1582 /* Mark end of line for $ (and such) */
1583 PL_regeol = strend;
1584
1585 /* see how far we have to get to not match where we matched before */
1586 PL_regtill = startpos+minend;
1587
1588 /* We start without call_cc context. */
1589 PL_reg_call_cc = 0;
1590
1591 /* If there is a "must appear" string, look for it. */
1592 s = startpos;
1593
1594 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1595 MAGIC *mg;
1596
1597 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1598 PL_reg_ganch = startpos;
1599 else if (sv && SvTYPE(sv) >= SVt_PVMG
1600 && SvMAGIC(sv)
14befaf4
DM
1601 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1602 && mg->mg_len >= 0) {
6eb5f6b9
JH
1603 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1604 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1605 if (s > PL_reg_ganch)
6eb5f6b9
JH
1606 goto phooey;
1607 s = PL_reg_ganch;
1608 }
1609 }
1610 else /* pos() not defined */
1611 PL_reg_ganch = strbeg;
1612 }
1613
699c3c34
JH
1614 if (do_utf8 == (UTF!=0) &&
1615 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1616 re_scream_pos_data d;
1617
1618 d.scream_olds = &scream_olds;
1619 d.scream_pos = &scream_pos;
1620 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1621 if (!s) {
1622 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1623 goto phooey; /* not present */
3fa9c3d7 1624 }
6eb5f6b9
JH
1625 }
1626
2a782b5b 1627 DEBUG_r({
9e55ce06
JH
1628 char *s0 = UTF ?
1629 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1630 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1631 prog->precomp;
1632 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1633 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1634 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1635 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1636 if (!PL_colorset)
1637 reginitcolors();
1638 PerlIO_printf(Perl_debug_log,
9e55ce06 1639 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1640 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1641 len0, len0, s0,
2a782b5b 1642 PL_colors[1],
9e55ce06 1643 len0 > 60 ? "..." : "",
2a782b5b 1644 PL_colors[0],
9e55ce06
JH
1645 (int)(len1 > 60 ? 60 : len1),
1646 s1, PL_colors[1],
1647 (len1 > 60 ? "..." : "")
2a782b5b
JH
1648 );
1649 });
6eb5f6b9
JH
1650
1651 /* Simplest case: anchored match need be tried only once. */
1652 /* [unless only anchor is BOL and multiline is set] */
1653 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1654 if (s == startpos && regtry(prog, startpos))
1655 goto got_it;
1656 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1657 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1658 {
1659 char *end;
1660
1661 if (minlen)
1662 dontbother = minlen - 1;
1aa99e6b 1663 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1664 /* for multiline we only have to try after newlines */
1665 if (prog->check_substr) {
1666 if (s == startpos)
1667 goto after_try;
1668 while (1) {
1669 if (regtry(prog, s))
1670 goto got_it;
1671 after_try:
1672 if (s >= end)
1673 goto phooey;
1674 if (prog->reganch & RE_USE_INTUIT) {
1675 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1676 if (!s)
1677 goto phooey;
1678 }
1679 else
1680 s++;
1681 }
1682 } else {
1683 if (s > startpos)
1684 s--;
1685 while (s < end) {
1686 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1687 if (regtry(prog, s))
1688 goto got_it;
1689 }
1690 }
1691 }
1692 }
1693 goto phooey;
1694 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1695 if (regtry(prog, PL_reg_ganch))
1696 goto got_it;
1697 goto phooey;
1698 }
1699
1700 /* Messy cases: unanchored match. */
9041c2e3 1701 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1702 /* we have /x+whatever/ */
1703 /* it must be a one character string (XXXX Except UTF?) */
1704 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1705#ifdef DEBUGGING
1706 int did_match = 0;
1707#endif
1708
1aa99e6b 1709 if (do_utf8) {
6eb5f6b9
JH
1710 while (s < strend) {
1711 if (*s == ch) {
bf93d4cc 1712 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1713 if (regtry(prog, s)) goto got_it;
1714 s += UTF8SKIP(s);
1715 while (s < strend && *s == ch)
1716 s += UTF8SKIP(s);
1717 }
1718 s += UTF8SKIP(s);
1719 }
1720 }
1721 else {
1722 while (s < strend) {
1723 if (*s == ch) {
bf93d4cc 1724 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1725 if (regtry(prog, s)) goto got_it;
1726 s++;
1727 while (s < strend && *s == ch)
1728 s++;
1729 }
1730 s++;
1731 }
1732 }
b7953727 1733 DEBUG_r(if (!did_match)
bf93d4cc 1734 PerlIO_printf(Perl_debug_log,
b7953727
JH
1735 "Did not find anchored character...\n")
1736 );
6eb5f6b9
JH
1737 }
1738 /*SUPPRESS 560*/
1aa99e6b
IH
1739 else if (do_utf8 == (UTF!=0) &&
1740 (prog->anchored_substr != Nullsv
9041c2e3 1741 || (prog->float_substr != Nullsv
1aa99e6b 1742 && prog->float_max_offset < strend - s))) {
9041c2e3 1743 SV *must = prog->anchored_substr
6eb5f6b9 1744 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1745 I32 back_max =
6eb5f6b9 1746 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1747 I32 back_min =
6eb5f6b9 1748 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1749 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1750 -(I32)(CHR_SVLEN(must)
1aa99e6b 1751 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1752 char *last1; /* Last position checked before */
bf93d4cc
GS
1753#ifdef DEBUGGING
1754 int did_match = 0;
1755#endif
6eb5f6b9
JH
1756
1757 if (s > PL_bostr)
1758 last1 = HOPc(s, -1);
1759 else
1760 last1 = s - 1; /* bogus */
1761
1762 /* XXXX check_substr already used to find `s', can optimize if
1763 check_substr==must. */
1764 scream_pos = -1;
1765 dontbother = end_shift;
1766 strend = HOPc(strend, -dontbother);
1767 while ( (s <= last) &&
9041c2e3 1768 ((flags & REXEC_SCREAM)
1aa99e6b 1769 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1770 end_shift, &scream_pos, 0))
1aa99e6b 1771 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1772 (unsigned char*)strend, must,
6eb5f6b9 1773 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1774 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1775 if (HOPc(s, -back_max) > last1) {
1776 last1 = HOPc(s, -back_min);
1777 s = HOPc(s, -back_max);
1778 }
1779 else {
1780 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1781
1782 last1 = HOPc(s, -back_min);
1783 s = t;
1784 }
1aa99e6b 1785 if (do_utf8) {
6eb5f6b9
JH
1786 while (s <= last1) {
1787 if (regtry(prog, s))
1788 goto got_it;
1789 s += UTF8SKIP(s);
1790 }
1791 }
1792 else {
1793 while (s <= last1) {
1794 if (regtry(prog, s))
1795 goto got_it;
1796 s++;
1797 }
1798 }
1799 }
b7953727
JH
1800 DEBUG_r(if (!did_match)
1801 PerlIO_printf(Perl_debug_log,
1802 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1803 ((must == prog->anchored_substr)
1804 ? "anchored" : "floating"),
1805 PL_colors[0],
1806 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1807 SvPVX(must),
b7953727
JH
1808 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1809 );
6eb5f6b9
JH
1810 goto phooey;
1811 }
155aba94 1812 else if ((c = prog->regstclass)) {
66e933ab
GS
1813 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1814 /* don't bother with what can't match */
6eb5f6b9 1815 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1816 DEBUG_r({
1817 SV *prop = sv_newmortal();
9e55ce06
JH
1818 char *s0;
1819 char *s1;
1820 int len0;
1821 int len1;
1822
ffc61ed2 1823 regprop(prop, c);
9e55ce06
JH
1824 s0 = UTF ?
1825 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1826 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1827 SvPVX(prop);
1828 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1829 s1 = UTF ?
c728cb41 1830 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1831 len1 = UTF ? SvCUR(dsv1) : strend - s;
1832 PerlIO_printf(Perl_debug_log,
1833 "Matching stclass `%*.*s' against `%*.*s'\n",
1834 len0, len0, s0,
1835 len1, len1, s1);
ffc61ed2 1836 });
6eb5f6b9
JH
1837 if (find_byclass(prog, c, s, strend, startpos, 0))
1838 goto got_it;
bf93d4cc 1839 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1840 }
1841 else {
1842 dontbother = 0;
1843 if (prog->float_substr != Nullsv) { /* Trim the end. */
1844 char *last;
d6a28714
JH
1845
1846 if (flags & REXEC_SCREAM) {
1847 last = screaminstr(sv, prog->float_substr, s - strbeg,
1848 end_shift, &scream_pos, 1); /* last one */
1849 if (!last)
ffc61ed2 1850 last = scream_olds; /* Only one occurrence. */
b8c5462f 1851 }
d6a28714
JH
1852 else {
1853 STRLEN len;
1854 char *little = SvPV(prog->float_substr, len);
1855
1856 if (SvTAIL(prog->float_substr)) {
1857 if (memEQ(strend - len + 1, little, len - 1))
1858 last = strend - len + 1;
1859 else if (!PL_multiline)
9041c2e3 1860 last = memEQ(strend - len, little, len)
d6a28714 1861 ? strend - len : Nullch;
b8c5462f 1862 else
d6a28714
JH
1863 goto find_last;
1864 } else {
1865 find_last:
9041c2e3 1866 if (len)
d6a28714 1867 last = rninstr(s, strend, little, little + len);
b8c5462f 1868 else
d6a28714 1869 last = strend; /* matching `$' */
b8c5462f 1870 }
b8c5462f 1871 }
bf93d4cc
GS
1872 if (last == NULL) {
1873 DEBUG_r(PerlIO_printf(Perl_debug_log,
1874 "%sCan't trim the tail, match fails (should not happen)%s\n",
1875 PL_colors[4],PL_colors[5]));
1876 goto phooey; /* Should not happen! */
1877 }
d6a28714
JH
1878 dontbother = strend - last + prog->float_min_offset;
1879 }
1880 if (minlen && (dontbother < minlen))
1881 dontbother = minlen - 1;
1882 strend -= dontbother; /* this one's always in bytes! */
1883 /* We don't know much -- general case. */
1aa99e6b 1884 if (do_utf8) {
d6a28714
JH
1885 for (;;) {
1886 if (regtry(prog, s))
1887 goto got_it;
1888 if (s >= strend)
1889 break;
b8c5462f 1890 s += UTF8SKIP(s);
d6a28714
JH
1891 };
1892 }
1893 else {
1894 do {
1895 if (regtry(prog, s))
1896 goto got_it;
1897 } while (s++ < strend);
1898 }
1899 }
1900
1901 /* Failure. */
1902 goto phooey;
1903
1904got_it:
1905 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1906
1907 if (PL_reg_eval_set) {
1908 /* Preserve the current value of $^R */
1909 if (oreplsv != GvSV(PL_replgv))
1910 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1911 restored, the value remains
1912 the same. */
acfe0abc 1913 restore_pos(aTHX_ 0);
d6a28714
JH
1914 }
1915
1916 /* make sure $`, $&, $', and $digit will work later */
1917 if ( !(flags & REXEC_NOT_FIRST) ) {
1918 if (RX_MATCH_COPIED(prog)) {
1919 Safefree(prog->subbeg);
1920 RX_MATCH_COPIED_off(prog);
1921 }
1922 if (flags & REXEC_COPY_STR) {
1923 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1924
1925 s = savepvn(strbeg, i);
1926 prog->subbeg = s;
1927 prog->sublen = i;
1928 RX_MATCH_COPIED_on(prog);
1929 }
1930 else {
1931 prog->subbeg = strbeg;
1932 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1933 }
1934 }
9041c2e3 1935
d6a28714
JH
1936 return 1;
1937
1938phooey:
bf93d4cc
GS
1939 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1940 PL_colors[4],PL_colors[5]));
d6a28714 1941 if (PL_reg_eval_set)
acfe0abc 1942 restore_pos(aTHX_ 0);
d6a28714
JH
1943 return 0;
1944}
1945
1946/*
1947 - regtry - try match at specific point
1948 */
1949STATIC I32 /* 0 failure, 1 success */
1950S_regtry(pTHX_ regexp *prog, char *startpos)
1951{
d6a28714
JH
1952 register I32 i;
1953 register I32 *sp;
1954 register I32 *ep;
1955 CHECKPOINT lastcp;
1956
02db2b7b
IZ
1957#ifdef DEBUGGING
1958 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1959#endif
d6a28714
JH
1960 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1961 MAGIC *mg;
1962
1963 PL_reg_eval_set = RS_init;
1964 DEBUG_r(DEBUG_s(
b900a521
JH
1965 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1966 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1967 ));
e8347627 1968 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1969 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1970 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1971 SAVETMPS;
1972 /* Apparently this is not needed, judging by wantarray. */
e8347627 1973 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1974 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1975
1976 if (PL_reg_sv) {
1977 /* Make $_ available to executed code. */
1978 if (PL_reg_sv != DEFSV) {
4d1ff10f 1979 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1980 SAVESPTR(DEFSV);
1981 DEFSV = PL_reg_sv;
b8c5462f 1982 }
d6a28714 1983
9041c2e3 1984 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1985 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1986 /* prepare for quick setting of pos */
14befaf4
DM
1987 sv_magic(PL_reg_sv, (SV*)0,
1988 PERL_MAGIC_regex_global, Nullch, 0);
1989 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1990 mg->mg_len = -1;
b8c5462f 1991 }
d6a28714
JH
1992 PL_reg_magic = mg;
1993 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1994 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1995 }
09687e5a 1996 if (!PL_reg_curpm) {
0f79a09d 1997 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1998#ifdef USE_ITHREADS
1999 {
2000 SV* repointer = newSViv(0);
577e12cc 2001 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2002 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2003 av_push(PL_regex_padav,repointer);
2004 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2005 PL_regex_pad = AvARRAY(PL_regex_padav);
2006 }
2007#endif
2008 }
aaa362c4 2009 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2010 PL_reg_oldcurpm = PL_curpm;
2011 PL_curpm = PL_reg_curpm;
2012 if (RX_MATCH_COPIED(prog)) {
2013 /* Here is a serious problem: we cannot rewrite subbeg,
2014 since it may be needed if this match fails. Thus
2015 $` inside (?{}) could fail... */
2016 PL_reg_oldsaved = prog->subbeg;
2017 PL_reg_oldsavedlen = prog->sublen;
2018 RX_MATCH_COPIED_off(prog);
2019 }
2020 else
2021 PL_reg_oldsaved = Nullch;
2022 prog->subbeg = PL_bostr;
2023 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2024 }
2025 prog->startp[0] = startpos - PL_bostr;
2026 PL_reginput = startpos;
2027 PL_regstartp = prog->startp;
2028 PL_regendp = prog->endp;
2029 PL_reglastparen = &prog->lastparen;
a01268b5 2030 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2031 prog->lastparen = 0;
2032 PL_regsize = 0;
2033 DEBUG_r(PL_reg_starttry = startpos);
2034 if (PL_reg_start_tmpl <= prog->nparens) {
2035 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2036 if(PL_reg_start_tmp)
2037 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2038 else
2039 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2040 }
2041
128e8167
JH
2042#ifdef DEBUGGING
2043 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2044 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2045 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2046#endif
2047
d6a28714
JH
2048 /* XXXX What this code is doing here?!!! There should be no need
2049 to do this again and again, PL_reglastparen should take care of
3dd2943c 2050 this! --ilya*/
dafc8851
JH
2051
2052 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2053 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2054 * PL_reglastparen), is not needed at all by the test suite
2055 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2056 * enough, for building DynaLoader, or otherwise this
2057 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2058 * will happen. Meanwhile, this code *is* needed for the
2059 * above-mentioned test suite tests to succeed. The common theme
2060 * on those tests seems to be returning null fields from matches.
2061 * --jhi */
dafc8851 2062#if 1
d6a28714
JH
2063 sp = prog->startp;
2064 ep = prog->endp;
2065 if (prog->nparens) {
09e8ae3b 2066 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2067 *++sp = -1;
2068 *++ep = -1;
2069 }
2070 }
dafc8851 2071#endif
02db2b7b 2072 REGCP_SET(lastcp);
d6a28714
JH
2073 if (regmatch(prog->program + 1)) {
2074 prog->endp[0] = PL_reginput - PL_bostr;
2075 return 1;
2076 }
02db2b7b 2077 REGCP_UNWIND(lastcp);
d6a28714
JH
2078 return 0;
2079}
2080
02db2b7b
IZ
2081#define RE_UNWIND_BRANCH 1
2082#define RE_UNWIND_BRANCHJ 2
2083
2084union re_unwind_t;
2085
2086typedef struct { /* XX: makes sense to enlarge it... */
2087 I32 type;
2088 I32 prev;
2089 CHECKPOINT lastcp;
2090} re_unwind_generic_t;
2091
2092typedef struct {
2093 I32 type;
2094 I32 prev;
2095 CHECKPOINT lastcp;
2096 I32 lastparen;
2097 regnode *next;
2098 char *locinput;
2099 I32 nextchr;
2100#ifdef DEBUGGING
2101 int regindent;
2102#endif
2103} re_unwind_branch_t;
2104
2105typedef union re_unwind_t {
2106 I32 type;
2107 re_unwind_generic_t generic;
2108 re_unwind_branch_t branch;
2109} re_unwind_t;
2110
8ba1375e
MJD
2111#define sayYES goto yes
2112#define sayNO goto no
2113#define sayYES_FINAL goto yes_final
2114#define sayYES_LOUD goto yes_loud
2115#define sayNO_FINAL goto no_final
2116#define sayNO_SILENT goto do_no
2117#define saySAME(x) if (x) goto yes; else goto no
2118
2119#define REPORT_CODE_OFF 24
2120
d6a28714
JH
2121/*
2122 - regmatch - main matching routine
2123 *
2124 * Conceptually the strategy is simple: check to see whether the current
2125 * node matches, call self recursively to see whether the rest matches,
2126 * and then act accordingly. In practice we make some effort to avoid
2127 * recursion, in particular by going through "ordinary" nodes (that don't
2128 * need to know whether the rest of the match failed) by a loop instead of
2129 * by recursion.
2130 */
2131/* [lwall] I've hoisted the register declarations to the outer block in order to
2132 * maybe save a little bit of pushing and popping on the stack. It also takes
2133 * advantage of machines that use a register save mask on subroutine entry.
2134 */
2135STATIC I32 /* 0 failure, 1 success */
2136S_regmatch(pTHX_ regnode *prog)
2137{
d6a28714
JH
2138 register regnode *scan; /* Current node. */
2139 regnode *next; /* Next node. */
2140 regnode *inner; /* Next node in internal branch. */
2141 register I32 nextchr; /* renamed nextchr - nextchar colides with
2142 function of same name */
2143 register I32 n; /* no or next */
b7953727
JH
2144 register I32 ln = 0; /* len or last */
2145 register char *s = Nullch; /* operand or save */
d6a28714 2146 register char *locinput = PL_reginput;
b7953727 2147 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2148 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2149 I32 unwind = 0;
b7953727 2150#if 0
02db2b7b 2151 I32 firstcp = PL_savestack_ix;
b7953727 2152#endif
53c4c00c 2153 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2154#ifdef DEBUGGING
ce333219
JH
2155 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2156 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2157 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2158#endif
02db2b7b 2159
d6a28714
JH
2160#ifdef DEBUGGING
2161 PL_regindent++;
2162#endif
2163
2164 /* Note that nextchr is a byte even in UTF */
2165 nextchr = UCHARAT(locinput);
2166 scan = prog;
2167 while (scan != NULL) {
8ba1375e 2168
2a782b5b 2169 DEBUG_r( {
d6a28714
JH
2170 SV *prop = sv_newmortal();
2171 int docolor = *PL_colors[0];
2172 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2173 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2174 /* The part of the string before starttry has one color
2175 (pref0_len chars), between starttry and current
2176 position another one (pref_len - pref0_len chars),
2177 after the current position the third one.
2178 We assume that pref0_len <= pref_len, otherwise we
2179 decrease pref0_len. */
9041c2e3 2180 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2181 ? (5 + taill) - l : locinput - PL_bostr;
2182 int pref0_len;
d6a28714 2183
df1ffd02 2184 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2185 pref_len++;
2186 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2187 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2188 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2189 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2190 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2191 l--;
d6a28714
JH
2192 if (pref0_len < 0)
2193 pref0_len = 0;
2194 if (pref0_len > pref_len)
2195 pref0_len = pref_len;
2196 regprop(prop, scan);
2a782b5b
JH
2197 {
2198 char *s0 =
df1ffd02 2199 do_utf8 ?
2a782b5b 2200 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2201 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2202 locinput - pref_len;
df1ffd02
JH
2203 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2204 char *s1 = do_utf8 ?
2a782b5b 2205 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2206 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2207 locinput - pref_len + pref0_len;
df1ffd02
JH
2208 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2209 char *s2 = do_utf8 ?
2a782b5b 2210 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2211 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2212 locinput;
df1ffd02 2213 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2214 PerlIO_printf(Perl_debug_log,
2215 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2216 (IV)(locinput - PL_bostr),
2217 PL_colors[4],
2218 len0, s0,
2219 PL_colors[5],
2220 PL_colors[2],
2221 len1, s1,
2222 PL_colors[3],
2223 (docolor ? "" : "> <"),
2224 PL_colors[0],
2225 len2, s2,
2226 PL_colors[1],
2227 15 - l - pref_len + 1,
2228 "",
2229 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2230 SvPVX(prop));
2231 }
2232 });
d6a28714
JH
2233
2234 next = scan + NEXT_OFF(scan);
2235 if (next == scan)
2236 next = NULL;
2237
2238 switch (OP(scan)) {
2239 case BOL:
12d33761
HS
2240 if (locinput == PL_bostr || (PL_multiline &&
2241 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2242 {
2243 /* regtill = regbol; */
b8c5462f
JH
2244 break;
2245 }
d6a28714
JH
2246 sayNO;
2247 case MBOL:
12d33761
HS
2248 if (locinput == PL_bostr ||
2249 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2250 {
b8c5462f
JH
2251 break;
2252 }
d6a28714
JH
2253 sayNO;
2254 case SBOL:
c2a73568 2255 if (locinput == PL_bostr)
b8c5462f 2256 break;
d6a28714
JH
2257 sayNO;
2258 case GPOS:
2259 if (locinput == PL_reg_ganch)
2260 break;
2261 sayNO;
2262 case EOL:
2263 if (PL_multiline)
2264 goto meol;
2265 else
2266 goto seol;
2267 case MEOL:
2268 meol:
2269 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2270 sayNO;
b8c5462f 2271 break;
d6a28714
JH
2272 case SEOL:
2273 seol:
2274 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2275 sayNO;
d6a28714 2276 if (PL_regeol - locinput > 1)
b8c5462f 2277 sayNO;
b8c5462f 2278 break;
d6a28714
JH
2279 case EOS:
2280 if (PL_regeol != locinput)
b8c5462f 2281 sayNO;
d6a28714 2282 break;
ffc61ed2 2283 case SANY:
d6a28714 2284 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2285 sayNO;
f33976b4
DB
2286 if (do_utf8) {
2287 locinput += PL_utf8skip[nextchr];
2288 if (locinput > PL_regeol)
2289 sayNO;
2290 nextchr = UCHARAT(locinput);
2291 }
2292 else
2293 nextchr = UCHARAT(++locinput);
2294 break;
2295 case CANY:
2296 if (!nextchr && locinput >= PL_regeol)
2297 sayNO;
b8c5462f 2298 nextchr = UCHARAT(++locinput);
a0d0e21e 2299 break;
ffc61ed2 2300 case REG_ANY:
1aa99e6b
IH
2301 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2302 sayNO;
2303 if (do_utf8) {
b8c5462f 2304 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2305 if (locinput > PL_regeol)
2306 sayNO;
a0ed51b3 2307 nextchr = UCHARAT(locinput);
a0ed51b3 2308 }
1aa99e6b
IH
2309 else
2310 nextchr = UCHARAT(++locinput);
a0ed51b3 2311 break;
d6a28714 2312 case EXACT:
cd439c50
IZ
2313 s = STRING(scan);
2314 ln = STR_LEN(scan);
1aa99e6b 2315 if (do_utf8 != (UTF!=0)) {
bc517b45 2316 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2317 char *l = locinput;
2318 char *e = s + ln;
bc517b45 2319 STRLEN ulen;
a72c7584 2320
5ff6fc6d
JH
2321 if (do_utf8) {
2322 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2323 while (s < e) {
2324 if (l >= PL_regeol)
5ff6fc6d
JH
2325 sayNO;
2326 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2327 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2328 sayNO;
bc517b45 2329 l += ulen;
5ff6fc6d 2330 s ++;
1aa99e6b 2331 }
5ff6fc6d
JH
2332 }
2333 else {
2334 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2335 while (s < e) {
2336 if (l >= PL_regeol)
2337 sayNO;
5ff6fc6d 2338 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2339 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2340 sayNO;
bc517b45 2341 s += ulen;
a72c7584 2342 l ++;
1aa99e6b 2343 }
5ff6fc6d 2344 }
1aa99e6b
IH
2345 locinput = l;
2346 nextchr = UCHARAT(locinput);
2347 break;
2348 }
bc517b45 2349 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2350 /* Inline the first character, for speed. */
2351 if (UCHARAT(s) != nextchr)
2352 sayNO;
2353 if (PL_regeol - locinput < ln)
2354 sayNO;
2355 if (ln > 1 && memNE(s, locinput, ln))
2356 sayNO;
2357 locinput += ln;
2358 nextchr = UCHARAT(locinput);
2359 break;
2360 case EXACTFL:
b8c5462f
JH
2361 PL_reg_flags |= RF_tainted;
2362 /* FALL THROUGH */
d6a28714 2363 case EXACTF:
cd439c50
IZ
2364 s = STRING(scan);
2365 ln = STR_LEN(scan);
d6a28714 2366
d07ddd77
JH
2367 if (do_utf8 || UTF) {
2368 /* Either target or the pattern are utf8. */
d6a28714 2369 char *l = locinput;
d07ddd77 2370 char *e = PL_regeol;
bc517b45 2371
d07ddd77
JH
2372 if (ibcmp_utf8(s, 0, ln, do_utf8,
2373 l, &e, 0, UTF))
2374 sayNO;
2375 locinput = e;
2376 nextchr = UCHARAT(locinput);
2377 break;
a0ed51b3 2378 }
d6a28714 2379
bc517b45
JH
2380 /* Neither the target and the pattern are utf8. */
2381
d6a28714
JH
2382 /* Inline the first character, for speed. */
2383 if (UCHARAT(s) != nextchr &&
2384 UCHARAT(s) != ((OP(scan) == EXACTF)
2385 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2386 sayNO;
d6a28714 2387 if (PL_regeol - locinput < ln)
b8c5462f 2388 sayNO;
d6a28714
JH
2389 if (ln > 1 && (OP(scan) == EXACTF
2390 ? ibcmp(s, locinput, ln)
2391 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2392 sayNO;
d6a28714
JH
2393 locinput += ln;
2394 nextchr = UCHARAT(locinput);
a0d0e21e 2395 break;
d6a28714 2396 case ANYOF:
ffc61ed2 2397 if (do_utf8) {
9e55ce06
JH
2398 STRLEN inclasslen = PL_regeol - locinput;
2399
2400 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
ffc61ed2
JH
2401 sayNO;
2402 if (locinput >= PL_regeol)
2403 sayNO;
9e55ce06 2404 locinput += inclasslen;
b8c5462f 2405 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2406 }
2407 else {
2408 if (nextchr < 0)
2409 nextchr = UCHARAT(locinput);
2410 if (!reginclass(scan, (U8*)locinput, do_utf8))
2411 sayNO;
2412 if (!nextchr && locinput >= PL_regeol)
2413 sayNO;
2414 nextchr = UCHARAT(++locinput);
2415 }
b8c5462f 2416 break;
d6a28714 2417 case ALNUML:
b8c5462f
JH
2418 PL_reg_flags |= RF_tainted;
2419 /* FALL THROUGH */
d6a28714 2420 case ALNUM:
b8c5462f 2421 if (!nextchr)
4633a7c4 2422 sayNO;
ffc61ed2 2423 if (do_utf8) {
ad24be35 2424 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2425 if (!(OP(scan) == ALNUM
3568d838 2426 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2427 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2428 {
2429 sayNO;
a0ed51b3 2430 }
b8c5462f 2431 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2432 nextchr = UCHARAT(locinput);
2433 break;
2434 }
ffc61ed2 2435 if (!(OP(scan) == ALNUM
d6a28714 2436 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2437 sayNO;
b8c5462f 2438 nextchr = UCHARAT(++locinput);
a0d0e21e 2439 break;
d6a28714 2440 case NALNUML:
b8c5462f
JH
2441 PL_reg_flags |= RF_tainted;
2442 /* FALL THROUGH */
d6a28714
JH
2443 case NALNUM:
2444 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2445 sayNO;
ffc61ed2 2446 if (do_utf8) {
8269fa76 2447 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2448 if (OP(scan) == NALNUM
3568d838 2449 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2450 : isALNUM_LC_utf8((U8*)locinput))
2451 {
b8c5462f 2452 sayNO;
d6a28714 2453 }
b8c5462f
JH
2454 locinput += PL_utf8skip[nextchr];
2455 nextchr = UCHARAT(locinput);
2456 break;
2457 }
ffc61ed2 2458 if (OP(scan) == NALNUM
d6a28714 2459 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2460 sayNO;
76e3520e 2461 nextchr = UCHARAT(++locinput);
a0d0e21e 2462 break;
d6a28714
JH
2463 case BOUNDL:
2464 case NBOUNDL:
3280af22 2465 PL_reg_flags |= RF_tainted;
bbce6d69 2466 /* FALL THROUGH */
d6a28714
JH
2467 case BOUND:
2468 case NBOUND:
2469 /* was last char in word? */
ffc61ed2 2470 if (do_utf8) {
12d33761
HS
2471 if (locinput == PL_bostr)
2472 ln = '\n';
ffc61ed2
JH
2473 else {
2474 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2475
2b9d42f0 2476 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2477 }
2478 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2479 ln = isALNUM_uni(ln);
8269fa76 2480 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2481 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2482 }
2483 else {
9041c2e3 2484 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2485 n = isALNUM_LC_utf8((U8*)locinput);
2486 }
a0ed51b3 2487 }
d6a28714 2488 else {
12d33761
HS
2489 ln = (locinput != PL_bostr) ?
2490 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2491 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2492 ln = isALNUM(ln);
2493 n = isALNUM(nextchr);
2494 }
2495 else {
2496 ln = isALNUM_LC(ln);
2497 n = isALNUM_LC(nextchr);
2498 }
d6a28714 2499 }
ffc61ed2
JH
2500 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2501 OP(scan) == BOUNDL))
2502 sayNO;
a0ed51b3 2503 break;
d6a28714 2504 case SPACEL:
3280af22 2505 PL_reg_flags |= RF_tainted;
bbce6d69 2506 /* FALL THROUGH */
d6a28714 2507 case SPACE:
9442cb0e 2508 if (!nextchr)
4633a7c4 2509 sayNO;
1aa99e6b 2510 if (do_utf8) {
fd400ab9 2511 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2512 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2513 if (!(OP(scan) == SPACE
3568d838 2514 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2515 : isSPACE_LC_utf8((U8*)locinput)))
2516 {
2517 sayNO;
2518 }
2519 locinput += PL_utf8skip[nextchr];
2520 nextchr = UCHARAT(locinput);
2521 break;
d6a28714 2522 }
ffc61ed2
JH
2523 if (!(OP(scan) == SPACE
2524 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2525 sayNO;
2526 nextchr = UCHARAT(++locinput);
2527 }
2528 else {
2529 if (!(OP(scan) == SPACE
2530 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2531 sayNO;
2532 nextchr = UCHARAT(++locinput);
a0ed51b3 2533 }
a0ed51b3 2534 break;
d6a28714 2535 case NSPACEL:
3280af22 2536 PL_reg_flags |= RF_tainted;
bbce6d69 2537 /* FALL THROUGH */
d6a28714 2538 case NSPACE:
9442cb0e 2539 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2540 sayNO;
1aa99e6b 2541 if (do_utf8) {
8269fa76 2542 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2543 if (OP(scan) == NSPACE
3568d838 2544 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2545 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2546 {
2547 sayNO;
2548 }
2549 locinput += PL_utf8skip[nextchr];
2550 nextchr = UCHARAT(locinput);
2551 break;
a0ed51b3 2552 }
ffc61ed2 2553 if (OP(scan) == NSPACE
d6a28714 2554 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2555 sayNO;
76e3520e 2556 nextchr = UCHARAT(++locinput);
a0d0e21e 2557 break;
d6a28714 2558 case DIGITL:
a0ed51b3
LW
2559 PL_reg_flags |= RF_tainted;
2560 /* FALL THROUGH */
d6a28714 2561 case DIGIT:
9442cb0e 2562 if (!nextchr)
a0ed51b3 2563 sayNO;
1aa99e6b 2564 if (do_utf8) {
8269fa76 2565 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2566 if (!(OP(scan) == DIGIT
3568d838 2567 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2568 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2569 {
a0ed51b3 2570 sayNO;
dfe13c55 2571 }
6f06b55f 2572 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2573 nextchr = UCHARAT(locinput);
2574 break;
2575 }
ffc61ed2 2576 if (!(OP(scan) == DIGIT
9442cb0e 2577 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2578 sayNO;
2579 nextchr = UCHARAT(++locinput);
2580 break;
d6a28714 2581 case NDIGITL:
b8c5462f
JH
2582 PL_reg_flags |= RF_tainted;
2583 /* FALL THROUGH */
d6a28714 2584 case NDIGIT:
9442cb0e 2585 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2586 sayNO;
1aa99e6b 2587 if (do_utf8) {
8269fa76 2588 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2589 if (OP(scan) == NDIGIT
3568d838 2590 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2591 : isDIGIT_LC_utf8((U8*)locinput))
2592 {
a0ed51b3 2593 sayNO;
9442cb0e 2594 }
6f06b55f 2595 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2596 nextchr = UCHARAT(locinput);
2597 break;
2598 }
ffc61ed2 2599 if (OP(scan) == NDIGIT
9442cb0e 2600 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2601 sayNO;
2602 nextchr = UCHARAT(++locinput);
2603 break;
2604 case CLUMP:
b7c83a7e 2605 if (locinput >= PL_regeol)
a0ed51b3 2606 sayNO;
b7c83a7e
JH
2607 if (do_utf8) {
2608 LOAD_UTF8_CHARCLASS(mark,"~");
2609 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2610 sayNO;
2611 locinput += PL_utf8skip[nextchr];
2612 while (locinput < PL_regeol &&
2613 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2614 locinput += UTF8SKIP(locinput);
2615 if (locinput > PL_regeol)
2616 sayNO;
eb08e2da
JH
2617 }
2618 else
2619 locinput++;
a0ed51b3
LW
2620 nextchr = UCHARAT(locinput);
2621 break;
c8756f30 2622 case REFFL:
3280af22 2623 PL_reg_flags |= RF_tainted;
c8756f30 2624 /* FALL THROUGH */
c277df42 2625 case REF:
c8756f30 2626 case REFF:
c277df42 2627 n = ARG(scan); /* which paren pair */
cf93c79d 2628 ln = PL_regstartp[n];
2c2d71f5 2629 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2630 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2631 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2632 if (ln == PL_regendp[n])
a0d0e21e 2633 break;
a0ed51b3 2634
cf93c79d 2635 s = PL_bostr + ln;
1aa99e6b 2636 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2637 char *l = locinput;
cf93c79d 2638 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2639 /*
2640 * Note that we can't do the "other character" lookup trick as
2641 * in the 8-bit case (no pun intended) because in Unicode we
2642 * have to map both upper and title case to lower case.
2643 */
2644 if (OP(scan) == REFF) {
a2a2844f 2645 STRLEN ulen1, ulen2;
e7ae6809
JH
2646 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2647 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2648 while (s < e) {
2649 if (l >= PL_regeol)
2650 sayNO;
a2a2844f
JH
2651 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2652 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2653 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2654 sayNO;
a2a2844f
JH
2655 s += ulen1;
2656 l += ulen2;
a0ed51b3
LW
2657 }
2658 }
2659 locinput = l;
2660 nextchr = UCHARAT(locinput);
2661 break;
2662 }
2663
a0d0e21e 2664 /* Inline the first character, for speed. */
76e3520e 2665 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2666 (OP(scan) == REF ||
2667 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2668 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2669 sayNO;
cf93c79d 2670 ln = PL_regendp[n] - ln;
3280af22 2671 if (locinput + ln > PL_regeol)
4633a7c4 2672 sayNO;
c8756f30
AK
2673 if (ln > 1 && (OP(scan) == REF
2674 ? memNE(s, locinput, ln)
2675 : (OP(scan) == REFF
2676 ? ibcmp(s, locinput, ln)
2677 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2678 sayNO;
a0d0e21e 2679 locinput += ln;
76e3520e 2680 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2681 break;
2682
2683 case NOTHING:
c277df42 2684 case TAIL:
a0d0e21e
LW
2685 break;
2686 case BACK:
2687 break;
c277df42
IZ
2688 case EVAL:
2689 {
2690 dSP;
533c011a 2691 OP_4tree *oop = PL_op;
3280af22
NIS
2692 COP *ocurcop = PL_curcop;
2693 SV **ocurpad = PL_curpad;
c277df42 2694 SV *ret;
9041c2e3 2695
c277df42 2696 n = ARG(scan);
533c011a 2697 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2698 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2699 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2700 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2701
8e5e9ebe
RGS
2702 {
2703 SV **before = SP;
2704 CALLRUNOPS(aTHX); /* Scalar context. */
2705 SPAGAIN;
2706 if (SP == before)
2707 ret = Nullsv; /* protect against empty (?{}) blocks. */
2708 else {
2709 ret = POPs;
2710 PUTBACK;
2711 }
2712 }
2713
0f5d15d6
IZ
2714 PL_op = oop;
2715 PL_curpad = ocurpad;
2716 PL_curcop = ocurcop;
c277df42 2717 if (logical) {
0f5d15d6
IZ
2718 if (logical == 2) { /* Postponed subexpression. */
2719 regexp *re;
22c35a8c 2720 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2721 re_cc_state state;
0f5d15d6
IZ
2722 CHECKPOINT cp, lastcp;
2723
2724 if(SvROK(ret) || SvRMAGICAL(ret)) {
2725 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2726
2727 if(SvMAGICAL(sv))
14befaf4 2728 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2729 }
2730 if (mg) {
2731 re = (regexp *)mg->mg_obj;
df0003d4 2732 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2733 }
2734 else {
2735 STRLEN len;
2736 char *t = SvPV(ret, len);
2737 PMOP pm;
2738 char *oprecomp = PL_regprecomp;
2739 I32 osize = PL_regsize;
2740 I32 onpar = PL_regnpar;
2741
5fcd1c1b 2742 Zero(&pm, 1, PMOP);
cea2e8a9 2743 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2744 if (!(SvFLAGS(ret)
0f5d15d6 2745 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2746 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2747 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2748 PL_regprecomp = oprecomp;
2749 PL_regsize = osize;
2750 PL_regnpar = onpar;
2751 }
2752 DEBUG_r(
9041c2e3 2753 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2754 "Entering embedded `%s%.60s%s%s'\n",
2755 PL_colors[0],
2756 re->precomp,
2757 PL_colors[1],
2758 (strlen(re->precomp) > 60 ? "..." : ""))
2759 );
2760 state.node = next;
2761 state.prev = PL_reg_call_cc;
2762 state.cc = PL_regcc;
2763 state.re = PL_reg_re;
2764
2ab05381 2765 PL_regcc = 0;
9041c2e3 2766
0f5d15d6 2767 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2768 REGCP_SET(lastcp);
0f5d15d6
IZ
2769 cache_re(re);
2770 state.ss = PL_savestack_ix;
2771 *PL_reglastparen = 0;
a01268b5 2772 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2773 PL_reg_call_cc = &state;
2774 PL_reginput = locinput;
2c2d71f5
JH
2775
2776 /* XXXX This is too dramatic a measure... */
2777 PL_reg_maxiter = 0;
2778
0f5d15d6 2779 if (regmatch(re->program + 1)) {
2c914db6
IZ
2780 /* Even though we succeeded, we need to restore
2781 global variables, since we may be wrapped inside
2782 SUSPEND, thus the match may be not finished yet. */
2783
2784 /* XXXX Do this only if SUSPENDed? */
2785 PL_reg_call_cc = state.prev;
2786 PL_regcc = state.cc;
2787 PL_reg_re = state.re;
2788 cache_re(PL_reg_re);
2789
2790 /* XXXX This is too dramatic a measure... */
2791 PL_reg_maxiter = 0;
2792
2793 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2794 ReREFCNT_dec(re);
2795 regcpblow(cp);
2796 sayYES;
2797 }
0f5d15d6 2798 ReREFCNT_dec(re);
02db2b7b 2799 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2800 regcppop();
2801 PL_reg_call_cc = state.prev;
2802 PL_regcc = state.cc;
2803 PL_reg_re = state.re;
d3790889 2804 cache_re(PL_reg_re);
2c2d71f5
JH
2805
2806 /* XXXX This is too dramatic a measure... */
2807 PL_reg_maxiter = 0;
2808
8e514ae6 2809 logical = 0;
0f5d15d6
IZ
2810 sayNO;
2811 }
c277df42 2812 sw = SvTRUE(ret);
0f5d15d6 2813 logical = 0;
a0ed51b3
LW
2814 }
2815 else
3280af22 2816 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2817 break;
2818 }
a0d0e21e 2819 case OPEN:
c277df42 2820 n = ARG(scan); /* which paren pair */
3280af22
NIS
2821 PL_reg_start_tmp[n] = locinput;
2822 if (n > PL_regsize)
2823 PL_regsize = n;
a0d0e21e
LW
2824 break;
2825 case CLOSE:
c277df42 2826 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2827 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2828 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2829 if (n > *PL_reglastparen)
2830 *PL_reglastparen = n;
a01268b5 2831 *PL_reglastcloseparen = n;
a0d0e21e 2832 break;
c277df42
IZ
2833 case GROUPP:
2834 n = ARG(scan); /* which paren pair */
cf93c79d 2835 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2836 break;
2837 case IFTHEN:
2c2d71f5 2838 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2839 if (sw)
2840 next = NEXTOPER(NEXTOPER(scan));
2841 else {
2842 next = scan + ARG(scan);
2843 if (OP(next) == IFTHEN) /* Fake one. */
2844 next = NEXTOPER(NEXTOPER(next));
2845 }
2846 break;
2847 case LOGICAL:
0f5d15d6 2848 logical = scan->flags;
c277df42 2849 break;
2ab05381
IZ
2850/*******************************************************************
2851 PL_regcc contains infoblock about the innermost (...)* loop, and
2852 a pointer to the next outer infoblock.
2853
2854 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2855
2856 1) After matching X, regnode for CURLYX is processed;
2857
9041c2e3 2858 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2859 regmatch() recursively with the starting point at WHILEM node;
2860
2861 3) Each hit of WHILEM node tries to match A and Z (in the order
2862 depending on the current iteration, min/max of {min,max} and
2863 greediness). The information about where are nodes for "A"
2864 and "Z" is read from the infoblock, as is info on how many times "A"
2865 was already matched, and greediness.
2866
2867 4) After A matches, the same WHILEM node is hit again.
2868
2869 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2870 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2871 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2872 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2873 of the external loop.
2874
2875 Currently present infoblocks form a tree with a stem formed by PL_curcc
2876 and whatever it mentions via ->next, and additional attached trees
2877 corresponding to temporarily unset infoblocks as in "5" above.
2878
9041c2e3 2879 In the following picture infoblocks for outer loop of
2ab05381
IZ
2880 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2881 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2882 infoblocks are drawn below the "reset" infoblock.
2883
2884 In fact in the picture below we do not show failed matches for Z and T
2885 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2886 more obvious *why* one needs to *temporary* unset infoblocks.]
2887
2888 Matched REx position InfoBlocks Comment
2889 (Y(A)*?Z)*?T x
2890 Y(A)*?Z)*?T x <- O
2891 Y (A)*?Z)*?T x <- O
2892 Y A)*?Z)*?T x <- O <- I
2893 YA )*?Z)*?T x <- O <- I
2894 YA A)*?Z)*?T x <- O <- I
2895 YAA )*?Z)*?T x <- O <- I
2896 YAA Z)*?T x <- O # Temporary unset I
2897 I
2898
2899 YAAZ Y(A)*?Z)*?T x <- O
2900 I
2901
2902 YAAZY (A)*?Z)*?T x <- O
2903 I
2904
2905 YAAZY A)*?Z)*?T x <- O <- I
2906 I
2907
2908 YAAZYA )*?Z)*?T x <- O <- I
2909 I
2910
2911 YAAZYA Z)*?T x <- O # Temporary unset I
2912 I,I
2913
2914 YAAZYAZ )*?T x <- O
2915 I,I
2916
2917 YAAZYAZ T x # Temporary unset O
2918 O
2919 I,I
2920
2921 YAAZYAZT x
2922 O
2923 I,I
2924 *******************************************************************/
a0d0e21e
LW
2925 case CURLYX: {
2926 CURCUR cc;
3280af22 2927 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2928 /* No need to save/restore up to this paren */
2929 I32 parenfloor = scan->flags;
c277df42
IZ
2930
2931 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2932 next += ARG(next);
3280af22
NIS
2933 cc.oldcc = PL_regcc;
2934 PL_regcc = &cc;
cb434fcc
IZ
2935 /* XXXX Probably it is better to teach regpush to support
2936 parenfloor > PL_regsize... */
2937 if (parenfloor > *PL_reglastparen)
2938 parenfloor = *PL_reglastparen; /* Pessimization... */
2939 cc.parenfloor = parenfloor;
a0d0e21e
LW
2940 cc.cur = -1;
2941 cc.min = ARG1(scan);
2942 cc.max = ARG2(scan);
c277df42 2943 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2944 cc.next = next;
2945 cc.minmod = minmod;
2946 cc.lastloc = 0;
3280af22 2947 PL_reginput = locinput;
a0d0e21e
LW
2948 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2949 regcpblow(cp);
3280af22 2950 PL_regcc = cc.oldcc;
4633a7c4 2951 saySAME(n);
a0d0e21e
LW
2952 }
2953 /* NOT REACHED */
2954 case WHILEM: {
2955 /*
2956 * This is really hard to understand, because after we match
2957 * what we're trying to match, we must make sure the rest of
2c2d71f5 2958 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2959 * to go back UP the parse tree by recursing ever deeper. And
2960 * if it fails, we have to reset our parent's current state
2961 * that we can try again after backing off.
2962 */
2963
c277df42 2964 CHECKPOINT cp, lastcp;
3280af22 2965 CURCUR* cc = PL_regcc;
c277df42
IZ
2966 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2967
4633a7c4 2968 n = cc->cur + 1; /* how many we know we matched */
3280af22 2969 PL_reginput = locinput;
a0d0e21e 2970
c277df42 2971 DEBUG_r(
9041c2e3
NIS
2972 PerlIO_printf(Perl_debug_log,
2973 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2974 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2975 (long)n, (long)cc->min,
c277df42
IZ
2976 (long)cc->max, (long)cc)
2977 );
4633a7c4 2978
a0d0e21e
LW
2979 /* If degenerate scan matches "", assume scan done. */
2980
579cf2c3 2981 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2982 PL_regcc = cc->oldcc;
2ab05381
IZ
2983 if (PL_regcc)
2984 ln = PL_regcc->cur;
c277df42 2985 DEBUG_r(
c3464db5
DD
2986 PerlIO_printf(Perl_debug_log,
2987 "%*s empty match detected, try continuation...\n",
3280af22 2988 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2989 );
a0d0e21e 2990 if (regmatch(cc->next))
4633a7c4 2991 sayYES;
2ab05381
IZ
2992 if (PL_regcc)
2993 PL_regcc->cur = ln;
3280af22 2994 PL_regcc = cc;
4633a7c4 2995 sayNO;
a0d0e21e
LW
2996 }
2997
2998 /* First just match a string of min scans. */
2999
3000 if (n < cc->min) {
3001 cc->cur = n;
3002 cc->lastloc = locinput;
4633a7c4
LW
3003 if (regmatch(cc->scan))
3004 sayYES;
3005 cc->cur = n - 1;
c277df42 3006 cc->lastloc = lastloc;
4633a7c4 3007 sayNO;
a0d0e21e
LW
3008 }
3009
2c2d71f5
JH
3010 if (scan->flags) {
3011 /* Check whether we already were at this position.
3012 Postpone detection until we know the match is not
3013 *that* much linear. */
3014 if (!PL_reg_maxiter) {
3015 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3016 PL_reg_leftiter = PL_reg_maxiter;
3017 }
3018 if (PL_reg_leftiter-- == 0) {
3019 I32 size = (PL_reg_maxiter + 7)/8;
3020 if (PL_reg_poscache) {
3021 if (PL_reg_poscache_size < size) {
3022 Renew(PL_reg_poscache, size, char);
3023 PL_reg_poscache_size = size;
3024 }
3025 Zero(PL_reg_poscache, size, char);
3026 }
3027 else {
3028 PL_reg_poscache_size = size;
3029 Newz(29, PL_reg_poscache, size, char);
3030 }
3031 DEBUG_r(
3032 PerlIO_printf(Perl_debug_log,
3033 "%sDetected a super-linear match, switching on caching%s...\n",
3034 PL_colors[4], PL_colors[5])
3035 );
3036 }
3037 if (PL_reg_leftiter < 0) {
3038 I32 o = locinput - PL_bostr, b;
3039
3040 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3041 b = o % 8;
3042 o /= 8;
3043 if (PL_reg_poscache[o] & (1<<b)) {
3044 DEBUG_r(
3045 PerlIO_printf(Perl_debug_log,
3046 "%*s already tried at this position...\n",
3047 REPORT_CODE_OFF+PL_regindent*2, "")
3048 );
7821416a 3049 sayNO_SILENT;
2c2d71f5
JH
3050 }
3051 PL_reg_poscache[o] |= (1<<b);
3052 }
3053 }
3054
a0d0e21e
LW
3055 /* Prefer next over scan for minimal matching. */
3056
3057 if (cc->minmod) {
3280af22 3058 PL_regcc = cc->oldcc;
2ab05381
IZ
3059 if (PL_regcc)
3060 ln = PL_regcc->cur;
5f05dabc 3061 cp = regcppush(cc->parenfloor);
02db2b7b 3062 REGCP_SET(lastcp);
5f05dabc 3063 if (regmatch(cc->next)) {
c277df42 3064 regcpblow(cp);
4633a7c4 3065 sayYES; /* All done. */
5f05dabc 3066 }
02db2b7b 3067 REGCP_UNWIND(lastcp);
5f05dabc 3068 regcppop();
2ab05381
IZ
3069 if (PL_regcc)
3070 PL_regcc->cur = ln;
3280af22 3071 PL_regcc = cc;
a0d0e21e 3072
c277df42 3073 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3074 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3075 && !(PL_reg_flags & RF_warned)) {
3076 PL_reg_flags |= RF_warned;
e476b1b5 3077 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3078 "Complex regular subexpression recursion",
3079 REG_INFTY - 1);
c277df42 3080 }
4633a7c4 3081 sayNO;
c277df42 3082 }
a687059c 3083
c277df42 3084 DEBUG_r(
c3464db5
DD
3085 PerlIO_printf(Perl_debug_log,
3086 "%*s trying longer...\n",
3280af22 3087 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3088 );
a0d0e21e 3089 /* Try scanning more and see if it helps. */
3280af22 3090 PL_reginput = locinput;
a0d0e21e
LW
3091 cc->cur = n;
3092 cc->lastloc = locinput;
5f05dabc 3093 cp = regcppush(cc->parenfloor);
02db2b7b 3094 REGCP_SET(lastcp);
5f05dabc 3095 if (regmatch(cc->scan)) {
c277df42 3096 regcpblow(cp);
4633a7c4 3097 sayYES;
5f05dabc 3098 }
02db2b7b 3099 REGCP_UNWIND(lastcp);
5f05dabc 3100 regcppop();
4633a7c4 3101 cc->cur = n - 1;
c277df42 3102 cc->lastloc = lastloc;
4633a7c4 3103 sayNO;
a0d0e21e
LW
3104 }
3105
3106 /* Prefer scan over next for maximal matching. */
3107
3108 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3109 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3110 cc->cur = n;
3111 cc->lastloc = locinput;
02db2b7b 3112 REGCP_SET(lastcp);
5f05dabc 3113 if (regmatch(cc->scan)) {
c277df42 3114 regcpblow(cp);
4633a7c4 3115 sayYES;
5f05dabc 3116 }
02db2b7b 3117 REGCP_UNWIND(lastcp);
a0d0e21e 3118 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3119 PL_reginput = locinput;
c277df42 3120 DEBUG_r(
c3464db5
DD
3121 PerlIO_printf(Perl_debug_log,
3122 "%*s failed, try continuation...\n",
3280af22 3123 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3124 );
3125 }
9041c2e3 3126 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3127 && !(PL_reg_flags & RF_warned)) {
3280af22 3128 PL_reg_flags |= RF_warned;
e476b1b5 3129 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3130 "Complex regular subexpression recursion",
3131 REG_INFTY - 1);
a0d0e21e
LW
3132 }
3133
3134 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3135 PL_regcc = cc->oldcc;
2ab05381
IZ
3136 if (PL_regcc)
3137 ln = PL_regcc->cur;
a0d0e21e 3138 if (regmatch(cc->next))
4633a7c4 3139 sayYES;
2ab05381
IZ
3140 if (PL_regcc)
3141 PL_regcc->cur = ln;
3280af22 3142 PL_regcc = cc;
4633a7c4 3143 cc->cur = n - 1;
c277df42 3144 cc->lastloc = lastloc;
4633a7c4 3145 sayNO;
a0d0e21e
LW
3146 }
3147 /* NOT REACHED */
9041c2e3 3148 case BRANCHJ:
c277df42
IZ
3149 next = scan + ARG(scan);
3150 if (next == scan)
3151 next = NULL;
3152 inner = NEXTOPER(NEXTOPER(scan));
3153 goto do_branch;
9041c2e3 3154 case BRANCH:
c277df42
IZ
3155 inner = NEXTOPER(scan);
3156 do_branch:
3157 {
c277df42
IZ
3158 c1 = OP(scan);
3159 if (OP(next) != c1) /* No choice. */
3160 next = inner; /* Avoid recursion. */
a0d0e21e 3161 else {
02db2b7b
IZ
3162 I32 lastparen = *PL_reglastparen;
3163 I32 unwind1;
3164 re_unwind_branch_t *uw;
3165
3166 /* Put unwinding data on stack */
3167 unwind1 = SSNEWt(1,re_unwind_branch_t);
3168 uw = SSPTRt(unwind1,re_unwind_branch_t);
3169 uw->prev = unwind;
3170 unwind = unwind1;
3171 uw->type = ((c1 == BRANCH)
3172 ? RE_UNWIND_BRANCH
3173 : RE_UNWIND_BRANCHJ);
3174 uw->lastparen = lastparen;
3175 uw->next = next;
3176 uw->locinput = locinput;
3177 uw->nextchr = nextchr;
3178#ifdef DEBUGGING
3179 uw->regindent = ++PL_regindent;
3180#endif
c277df42 3181
02db2b7b
IZ
3182 REGCP_SET(uw->lastcp);
3183
3184 /* Now go into the first branch */
3185 next = inner;
a687059c 3186 }
a0d0e21e
LW
3187 }
3188 break;
3189 case MINMOD:
3190 minmod = 1;
3191 break;
c277df42
IZ
3192 case CURLYM:
3193 {
00db4c45 3194 I32 l = 0;
c277df42 3195 CHECKPOINT lastcp;
9041c2e3 3196
c277df42
IZ
3197 /* We suppose that the next guy does not need
3198 backtracking: in particular, it is of constant length,
3199 and has no parenths to influence future backrefs. */
3200 ln = ARG1(scan); /* min to match */
3201 n = ARG2(scan); /* max to match */
c277df42
IZ
3202 paren = scan->flags;
3203 if (paren) {
3280af22
NIS
3204 if (paren > PL_regsize)
3205 PL_regsize = paren;
3206 if (paren > *PL_reglastparen)
3207 *PL_reglastparen = paren;
c277df42 3208 }
dc45a647 3209 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3210 if (paren)
3211 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3212 PL_reginput = locinput;
c277df42
IZ
3213 if (minmod) {
3214 minmod = 0;
3215 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3216 sayNO;
f31a99c8
HS
3217 /* if we matched something zero-length we don't need to
3218 backtrack - capturing parens are already defined, so
3219 the caveat in the maximal case doesn't apply
3220
3221 XXXX if ln == 0, we can redo this check first time
3222 through the following loop
3223 */
3224 if (ln && l == 0)
3225 n = ln; /* don't backtrack */
3280af22 3226 locinput = PL_reginput;
cca55fe3 3227 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3228 regnode *text_node = next;
3229
cca55fe3 3230 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3231
cca55fe3 3232 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3233 else {
cca55fe3
JP
3234 if (PL_regkind[(U8)OP(text_node)] == REF) {
3235 I32 n, ln;
3236 n = ARG(text_node); /* which paren pair */
3237 ln = PL_regstartp[n];
3238 /* assume yes if we haven't seen CLOSEn */
3239 if (
3240 *PL_reglastparen < n ||
3241 ln == -1 ||
3242 ln == PL_regendp[n]
3243 ) {
3244 c1 = c2 = -1000;
3245 goto assume_ok_MM;
3246 }
3247 c1 = *(PL_bostr + ln);
3248 }
3249 else { c1 = (U8)*STRING(text_node); }
af5decee 3250 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3251 c2 = PL_fold[c1];
af5decee 3252 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3253 c2 = PL_fold_locale[c1];
3254 else
3255 c2 = c1;
3256 }
a0ed51b3
LW
3257 }
3258 else
c277df42 3259 c1 = c2 = -1000;
cca55fe3 3260 assume_ok_MM:
02db2b7b 3261 REGCP_SET(lastcp);
5f4b28b2 3262 /* This may be improved if l == 0. */
c277df42
IZ
3263 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3264 /* If it could work, try it. */
3265 if (c1 == -1000 ||
3280af22
NIS
3266 UCHARAT(PL_reginput) == c1 ||
3267 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3268 {
3269 if (paren) {
f31a99c8 3270 if (ln) {
cf93c79d
IZ
3271 PL_regstartp[paren] =
3272 HOPc(PL_reginput, -l) - PL_bostr;
3273 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3274 }
3275 else
cf93c79d 3276 PL_regendp[paren] = -1;
c277df42
IZ
3277 }
3278 if (regmatch(next))
3279 sayYES;
02db2b7b 3280 REGCP_UNWIND(lastcp);
c277df42
IZ
3281 }
3282 /* Couldn't or didn't -- move forward. */
3280af22 3283 PL_reginput = locinput;
c277df42
IZ
3284 if (regrepeat_hard(scan, 1, &l)) {
3285 ln++;
3280af22 3286 locinput = PL_reginput;
c277df42
IZ
3287 }
3288 else
3289 sayNO;
3290 }
a0ed51b3
LW
3291 }
3292 else {
c277df42 3293 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3294 /* if we matched something zero-length we don't need to
3295 backtrack, unless the minimum count is zero and we
3296 are capturing the result - in that case the capture
3297 being defined or not may affect later execution
3298 */
3299 if (n != 0 && l == 0 && !(paren && ln == 0))
3300 ln = n; /* don't backtrack */
3280af22 3301 locinput = PL_reginput;
c277df42 3302 DEBUG_r(
5c0ca799 3303 PerlIO_printf(Perl_debug_log,
faccc32b 3304 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3305 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3306 (IV) n, (IV)l)
c277df42
IZ
3307 );
3308 if (n >= ln) {
cca55fe3 3309 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3310 regnode *text_node = next;
3311
cca55fe3 3312 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3313
cca55fe3 3314 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3315 else {
cca55fe3
JP
3316 if (PL_regkind[(U8)OP(text_node)] == REF) {
3317 I32 n, ln;
3318 n = ARG(text_node); /* which paren pair */
3319 ln = PL_regstartp[n];
3320 /* assume yes if we haven't seen CLOSEn */
3321 if (
3322 *PL_reglastparen < n ||
3323 ln == -1 ||
3324 ln == PL_regendp[n]
3325 ) {
3326 c1 = c2 = -1000;
3327 goto assume_ok_REG;
3328 }
3329 c1 = *(PL_bostr + ln);
3330 }
3331 else { c1 = (U8)*STRING(text_node); }
3332
af5decee 3333 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3334 c2 = PL_fold[c1];
af5decee 3335 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3336 c2 = PL_fold_locale[c1];
3337 else
3338 c2 = c1;
3339 }
a0ed51b3
LW
3340 }
3341 else
c277df42
IZ
3342 c1 = c2 = -1000;
3343 }
cca55fe3 3344 assume_ok_REG:
02db2b7b 3345 REGCP_SET(lastcp);
c277df42
IZ
3346 while (n >= ln) {
3347 /* If it could work, try it. */
3348 if (c1 == -1000 ||
3280af22
NIS
3349 UCHARAT(PL_reginput) == c1 ||
3350 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3351 {
3352 DEBUG_r(
c3464db5 3353 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3354 "%*s trying tail with n=%"IVdf"...\n",
3355 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3356 );
3357 if (paren) {
3358 if (n) {
cf93c79d
IZ
3359 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3360 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3361 }
a0ed51b3 3362 else
cf93c79d 3363 PL_regendp[paren] = -1;
c277df42 3364 }
a0ed51b3
LW
3365 if (regmatch(next))
3366 sayYES;
02db2b7b 3367 REGCP_UNWIND(lastcp);
a0ed51b3 3368 }
c277df42
IZ
3369 /* Couldn't or didn't -- back up. */
3370 n--;
dfe13c55 3371 locinput = HOPc(locinput, -l);
3280af22 3372 PL_reginput = locinput;
c277df42
IZ
3373 }
3374 }
3375 sayNO;
3376 break;
3377 }
3378 case CURLYN:
3379 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3380 if (paren > PL_regsize)
3381 PL_regsize = paren;
3382 if (paren > *PL_reglastparen)
3383 *PL_reglastparen = paren;
c277df42
IZ
3384 ln = ARG1(scan); /* min to match */
3385 n = ARG2(scan); /* max to match */
dc45a647 3386 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3387 goto repeat;
a0d0e21e 3388 case CURLY:
c277df42 3389 paren = 0;
a0d0e21e
LW
3390 ln = ARG1(scan); /* min to match */
3391 n = ARG2(scan); /* max to match */
dc45a647 3392 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3393 goto repeat;
3394 case STAR:
3395 ln = 0;
c277df42 3396 n = REG_INFTY;
a0d0e21e 3397 scan = NEXTOPER(scan);
c277df42 3398 paren = 0;
a0d0e21e
LW
3399 goto repeat;
3400 case PLUS:
c277df42
IZ
3401 ln = 1;
3402 n = REG_INFTY;
3403 scan = NEXTOPER(scan);
3404 paren = 0;
3405 repeat:
a0d0e21e
LW
3406 /*
3407 * Lookahead to avoid useless match attempts
3408 * when we know what character comes next.
3409 */
5f80c4cf
JP
3410
3411 /*
3412 * Used to only do .*x and .*?x, but now it allows
3413 * for )'s, ('s and (?{ ... })'s to be in the way
3414 * of the quantifier and the EXACT-like node. -- japhy
3415 */
3416
cca55fe3 3417 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3418 U8 *s;
3419 regnode *text_node = next;
3420
cca55fe3 3421 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3422
cca55fe3 3423 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3424 else {
cca55fe3
JP
3425 if (PL_regkind[(U8)OP(text_node)] == REF) {
3426 I32 n, ln;
3427 n = ARG(text_node); /* which paren pair */
3428 ln = PL_regstartp[n];
3429 /* assume yes if we haven't seen CLOSEn */
3430 if (
3431 *PL_reglastparen < n ||
3432 ln == -1 ||
3433 ln == PL_regendp[n]
3434 ) {
3435 c1 = c2 = -1000;
3436 goto assume_ok_easy;
3437 }
9246c65e 3438 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3439 }
3440 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3441
3442 if (!UTF) {
3443 c2 = c1 = *s;
f65d3ee7 3444 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3445 c2 = PL_fold[c1];
f65d3ee7 3446 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3447 c2 = PL_fold_locale[c1];
1aa99e6b 3448 }
5f80c4cf 3449 else { /* UTF */
f65d3ee7 3450 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3451 STRLEN ulen1, ulen2;
e7ae6809
JH
3452 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3453 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3454
3455 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3456 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3457
3458 c1 = utf8_to_uvuni(tmpbuf1, 0);
3459 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3460 }
3461 else {
3462 c2 = c1 = utf8_to_uvchr(s, NULL);
3463 }
1aa99e6b
IH
3464 }
3465 }
bbce6d69 3466 }
a0d0e21e 3467 else
bbce6d69 3468 c1 = c2 = -1000;
cca55fe3 3469 assume_ok_easy:
3280af22 3470 PL_reginput = locinput;
a0d0e21e 3471 if (minmod) {
c277df42 3472 CHECKPOINT lastcp;
a0d0e21e
LW
3473 minmod = 0;
3474 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3475 sayNO;
a0ed51b3 3476 locinput = PL_reginput;
02db2b7b 3477 REGCP_SET(lastcp);
0fe9bf95 3478 if (c1 != -1000) {
1aa99e6b 3479 char *e; /* Should not check after this */
0fe9bf95
IZ
3480 char *old = locinput;
3481
1aa99e6b 3482 if (n == REG_INFTY) {
0fe9bf95 3483 e = PL_regeol - 1;
1aa99e6b
IH
3484 if (do_utf8)
3485 while (UTF8_IS_CONTINUATION(*(U8*)e))
3486 e--;
3487 }
3488 else if (do_utf8) {
3489 int m = n - ln;
3490 for (e = locinput;
3491 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3492 e += UTF8SKIP(e);
3493 }
3494 else {
3495 e = locinput + n - ln;
3496 if (e >= PL_regeol)
3497 e = PL_regeol - 1;
3498 }
0fe9bf95 3499 while (1) {
1aa99e6b 3500 int count;
0fe9bf95 3501 /* Find place 'next' could work */
1aa99e6b
IH
3502 if (!do_utf8) {
3503 if (c1 == c2) {
a8e8ab15
JH
3504 while (locinput <= e &&
3505 UCHARAT(locinput) != c1)
1aa99e6b
IH
3506 locinput++;
3507 } else {
9041c2e3 3508 while (locinput <= e
a8e8ab15
JH
3509 && UCHARAT(locinput) != c1
3510 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3511 locinput++;
3512 }
3513 count = locinput - old;
3514 }
3515 else {
3516 STRLEN len;
3517 if (c1 == c2) {
3518 for (count = 0;
3519 locinput <= e &&
9041c2e3 3520 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3521 count++)
3522 locinput += len;
3523
3524 } else {
3525 for (count = 0; locinput <= e; count++) {
9041c2e3 3526 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3527 if (c == c1 || c == c2)
3528 break;
9041c2e3 3529 locinput += len;
1aa99e6b
IH
3530 }
3531 }
0fe9bf95 3532 }
9041c2e3 3533 if (locinput > e)
0fe9bf95
IZ
3534 sayNO;
3535 /* PL_reginput == old now */
3536 if (locinput != old) {
3537 ln = 1; /* Did some */
1aa99e6b 3538 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3539 sayNO;
3540 }
3541 /* PL_reginput == locinput now */
29d1e993 3542 TRYPAREN(paren, ln, locinput);
0fe9bf95 3543 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3544 REGCP_UNWIND(lastcp);
0fe9bf95 3545 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3546 old = locinput;
3547 if (do_utf8)
3548 locinput += UTF8SKIP(locinput);
3549 else
3550 locinput++;
0fe9bf95
IZ
3551 }
3552 }
3553 else
c277df42 3554 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3555 UV c;
3556 if (c1 != -1000) {
3557 if (do_utf8)
9041c2e3 3558 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3559 else
9041c2e3 3560 c = UCHARAT(PL_reginput);