This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
DLL descriptions on OS/2
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
b1ce53c5
JH
147#define REGCP_OTHER_ELEMS 5
148 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 149 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 150/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
151 SSPUSHINT(PL_regendp[p]);
152 SSPUSHINT(PL_regstartp[p]);
3280af22 153 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
154 SSPUSHINT(p);
155 }
b1ce53c5 156/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
157 SSPUSHINT(PL_regsize);
158 SSPUSHINT(*PL_reglastparen);
159 SSPUSHPTR(PL_reginput);
41123dfd
JH
160#define REGCP_FRAME_ELEMS 2
161/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
162 * are needed for the regexp context stack bookkeeping. */
163 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 164 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 165
a0d0e21e
LW
166 return retval;
167}
168
c277df42 169/* These are needed since we do not localize EVAL nodes: */
02db2b7b 170# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 171 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 172 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 173
02db2b7b 174# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 175 PerlIO_printf(Perl_debug_log, \
faccc32b 176 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 177 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 178
76e3520e 179STATIC char *
cea2e8a9 180S_regcppop(pTHX)
a0d0e21e 181{
b1ce53c5 182 I32 i;
a0d0e21e
LW
183 U32 paren = 0;
184 char *input;
cf93c79d 185 I32 tmps;
b1ce53c5
JH
186
187 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 188 i = SSPOPINT;
b1ce53c5
JH
189 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
190 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 191 input = (char *) SSPOPPTR;
3280af22
NIS
192 *PL_reglastparen = SSPOPINT;
193 PL_regsize = SSPOPINT;
b1ce53c5
JH
194
195 /* Now restore the parentheses context. */
41123dfd
JH
196 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
197 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 198 paren = (U32)SSPOPINT;
3280af22 199 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
200 PL_regstartp[paren] = SSPOPINT;
201 tmps = SSPOPINT;
3280af22
NIS
202 if (paren <= *PL_reglastparen)
203 PL_regendp[paren] = tmps;
c277df42 204 DEBUG_r(
c3464db5 205 PerlIO_printf(Perl_debug_log,
b900a521 206 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 207 (UV)paren, (IV)PL_regstartp[paren],
b900a521 208 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 209 (IV)PL_regendp[paren],
3280af22 210 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 211 );
a0d0e21e 212 }
c277df42 213 DEBUG_r(
3280af22 214 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 215 PerlIO_printf(Perl_debug_log,
faccc32b
JH
216 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
217 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
218 }
219 );
daf18116 220#if 1
dafc8851
JH
221 /* It would seem that the similar code in regtry()
222 * already takes care of this, and in fact it is in
223 * a better location to since this code can #if 0-ed out
224 * but the code in regtry() is needed or otherwise tests
225 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
226 * (as of patchlevel 7877) will fail. Then again,
227 * this code seems to be necessary or otherwise
228 * building DynaLoader will fail:
229 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
230 * --jhi */
3280af22
NIS
231 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
232 if (paren > PL_regsize)
cf93c79d
IZ
233 PL_regstartp[paren] = -1;
234 PL_regendp[paren] = -1;
a0d0e21e 235 }
dafc8851 236#endif
a0d0e21e
LW
237 return input;
238}
239
0f5d15d6 240STATIC char *
cea2e8a9 241S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
242{
243 I32 tmp = PL_savestack_ix;
244
245 PL_savestack_ix = ss;
246 regcppop();
247 PL_savestack_ix = tmp;
942e002e 248 return Nullch;
0f5d15d6
IZ
249}
250
251typedef struct re_cc_state
252{
253 I32 ss;
254 regnode *node;
255 struct re_cc_state *prev;
256 CURCUR *cc;
257 regexp *re;
258} re_cc_state;
259
02db2b7b 260#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 261
29d1e993
HS
262#define TRYPAREN(paren, n, input) { \
263 if (paren) { \
264 if (n) { \
265 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
266 PL_regendp[paren] = input - PL_bostr; \
267 } \
268 else \
269 PL_regendp[paren] = -1; \
270 } \
271 if (regmatch(next)) \
272 sayYES; \
273 if (paren && n) \
274 PL_regendp[paren] = -1; \
275}
276
277
a687059c 278/*
e50aee73 279 * pregexec and friends
a687059c
LW
280 */
281
282/*
c277df42 283 - pregexec - match a regexp against a string
a687059c 284 */
c277df42 285I32
864dbfa3 286Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 287 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
288/* strend: pointer to null at end of string */
289/* strbeg: real beginning of string */
290/* minend: end of match must be >=minend after stringarg. */
291/* nosave: For optimizations. */
292{
293 return
9041c2e3 294 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
295 nosave ? 0 : REXEC_COPY_STR);
296}
0f5d15d6
IZ
297
298STATIC void
cea2e8a9 299S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
300{
301 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
302#ifdef DEBUGGING
303 PL_regprogram = prog->program;
304#endif
305 PL_regnpar = prog->nparens;
9041c2e3
NIS
306 PL_regdata = prog->data;
307 PL_reg_re = prog;
0f5d15d6 308}
22e551b9 309
9041c2e3 310/*
cad2e5aa
JH
311 * Need to implement the following flags for reg_anch:
312 *
313 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
314 * USE_INTUIT_ML
315 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
316 * INTUIT_AUTORITATIVE_ML
317 * INTUIT_ONCE_NOML - Intuit can match in one location only.
318 * INTUIT_ONCE_ML
319 *
320 * Another flag for this function: SECOND_TIME (so that float substrs
321 * with giant delta may be not rechecked).
322 */
323
324/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
325
2c2d71f5 326/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
327 Otherwise, only SvCUR(sv) is used to get strbeg. */
328
329/* XXXX We assume that strpos is strbeg unless sv. */
330
6eb5f6b9
JH
331/* XXXX Some places assume that there is a fixed substring.
332 An update may be needed if optimizer marks as "INTUITable"
333 RExen without fixed substrings. Similarly, it is assumed that
334 lengths of all the strings are no more than minlen, thus they
335 cannot come from lookahead.
336 (Or minlen should take into account lookahead.) */
337
2c2d71f5
JH
338/* A failure to find a constant substring means that there is no need to make
339 an expensive call to REx engine, thus we celebrate a failure. Similarly,
340 finding a substring too deep into the string means that less calls to
30944b6d
IZ
341 regtry() should be needed.
342
343 REx compiler's optimizer found 4 possible hints:
344 a) Anchored substring;
345 b) Fixed substring;
346 c) Whether we are anchored (beginning-of-line or \G);
347 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 348 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
349 string which does not contradict any of them.
350 */
2c2d71f5 351
6eb5f6b9
JH
352/* Most of decisions we do here should have been done at compile time.
353 The nodes of the REx which we used for the search should have been
354 deleted from the finite automaton. */
355
cad2e5aa
JH
356char *
357Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
358 char *strend, U32 flags, re_scream_pos_data *data)
359{
2c2d71f5 360 register I32 start_shift;
cad2e5aa 361 /* Should be nonnegative! */
2c2d71f5
JH
362 register I32 end_shift;
363 register char *s;
364 register SV *check;
a1933d95 365 char *strbeg;
cad2e5aa
JH
366 char *t;
367 I32 ml_anch;
2c2d71f5 368 char *tmp;
6eb5f6b9
JH
369 register char *other_last = Nullch; /* other substr checked before this */
370 char *check_at; /* check substr found at this pos */
30944b6d
IZ
371#ifdef DEBUGGING
372 char *i_strpos = strpos;
373#endif
cad2e5aa
JH
374
375 DEBUG_r( if (!PL_colorset) reginitcolors() );
376 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 377 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
378 PL_colors[4],PL_colors[5],PL_colors[0],
379 prog->precomp,
380 PL_colors[1],
381 (strlen(prog->precomp) > 60 ? "..." : ""),
382 PL_colors[0],
b900a521 383 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
384 strpos, PL_colors[1],
385 (strend - strpos > 60 ? "..." : ""))
386 );
387
1aa99e6b
IH
388 if (prog->reganch & ROPT_UTF8)
389 PL_reg_flags |= RF_utf8;
390
391 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 392 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 393 goto fail;
2c2d71f5 394 }
a1933d95 395 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 396 PL_regeol = strend;
653099ff 397 check = prog->check_substr;
2c2d71f5 398 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
399 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
400 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 401 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 402
7e25d62c
JH
403 if (!ml_anch) {
404 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
405 | ROPT_IMPLICIT)) /* not a real BOL */
406 /* SvCUR is not set on references: SvRV and SvPVX overlap */
407 && sv && !SvROK(sv)
408 && (strpos != strbeg)) {
409 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
410 goto fail;
411 }
412 if (prog->check_offset_min == prog->check_offset_max &&
413 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 414 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
415 I32 slen;
416
1aa99e6b 417 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
418 if (SvTAIL(check)) {
419 slen = SvCUR(check); /* >= 1 */
cad2e5aa 420
9041c2e3 421 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
422 || (strend - s == slen && strend[-1] != '\n')) {
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
424 goto fail_finish;
cad2e5aa
JH
425 }
426 /* Now should match s[0..slen-2] */
427 slen--;
653099ff 428 if (slen && (*SvPVX(check) != *s
cad2e5aa 429 || (slen > 1
653099ff 430 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
431 report_neq:
432 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
433 goto fail_finish;
434 }
cad2e5aa 435 }
653099ff
GS
436 else if (*SvPVX(check) != *s
437 || ((slen = SvCUR(check)) > 1
438 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
439 goto report_neq;
440 goto success_at_start;
7e25d62c 441 }
cad2e5aa 442 }
2c2d71f5 443 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 444 s = strpos;
2c2d71f5 445 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 446 end_shift = prog->minlen - start_shift -
653099ff 447 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 448 if (!ml_anch) {
653099ff
GS
449 I32 end = prog->check_offset_max + CHR_SVLEN(check)
450 - (SvTAIL(check) != 0);
1aa99e6b 451 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
452
453 if (end_shift < eshift)
454 end_shift = eshift;
455 }
cad2e5aa 456 }
2c2d71f5 457 else { /* Can match at random position */
cad2e5aa
JH
458 ml_anch = 0;
459 s = strpos;
2c2d71f5
JH
460 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
461 /* Should be nonnegative! */
462 end_shift = prog->minlen - start_shift -
653099ff 463 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
464 }
465
2c2d71f5 466#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 467 if (end_shift < 0)
6bbae5e6 468 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
469#endif
470
2c2d71f5
JH
471 restart:
472 /* Find a possible match in the region s..strend by looking for
473 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 474 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
475 I32 p = -1; /* Internal iterator of scream. */
476 I32 *pp = data ? data->scream_pos : &p;
477
2c2d71f5
JH
478 if (PL_screamfirst[BmRARE(check)] >= 0
479 || ( BmRARE(check) == '\n'
480 && (BmPREVIOUS(check) == SvCUR(check) - 1)
481 && SvTAIL(check) ))
9041c2e3 482 s = screaminstr(sv, check,
2c2d71f5 483 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 484 else
2c2d71f5 485 goto fail_finish;
cad2e5aa
JH
486 if (data)
487 *data->scream_olds = s;
488 }
3baa4c62
JH
489 else if (prog->reganch & ROPT_SANY_SEEN)
490 s = fbm_instr((U8*)(s + start_shift),
491 (U8*)(strend - end_shift),
492 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 493 else
1aa99e6b
IH
494 s = fbm_instr(HOP3(s, start_shift, strend),
495 HOP3(strend, -end_shift, strbeg),
2c2d71f5 496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
497
498 /* Update the count-of-usability, remove useless subpatterns,
499 unshift s. */
2c2d71f5
JH
500
501 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
502 (s ? "Found" : "Did not find"),
503 ((check == prog->anchored_substr) ? "anchored" : "floating"),
504 PL_colors[0],
7b0972df
JH
505 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
506 SvPVX(check),
2c2d71f5
JH
507 PL_colors[1], (SvTAIL(check) ? "$" : ""),
508 (s ? " at offset " : "...\n") ) );
509
510 if (!s)
511 goto fail_finish;
512
6eb5f6b9
JH
513 check_at = s;
514
2c2d71f5 515 /* Finish the diagnostic message */
30944b6d 516 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
517
518 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
519 Start with the other substr.
520 XXXX no SCREAM optimization yet - and a very coarse implementation
521 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
522 *always* match. Probably should be marked during compile...
523 Probably it is right to do no SCREAM here...
524 */
525
526 if (prog->float_substr && prog->anchored_substr) {
30944b6d 527 /* Take into account the "other" substring. */
2c2d71f5
JH
528 /* XXXX May be hopelessly wrong for UTF... */
529 if (!other_last)
6eb5f6b9 530 other_last = strpos;
2c2d71f5 531 if (check == prog->float_substr) {
30944b6d
IZ
532 do_other_anchored:
533 {
1aa99e6b 534 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
535 char *s1 = s;
536
2c2d71f5
JH
537 t = s - prog->check_offset_max;
538 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
539 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 540 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 541 && t > strpos)))
30944b6d 542 /* EMPTY */;
2c2d71f5
JH
543 else
544 t = strpos;
1aa99e6b 545 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
546 if (t < other_last) /* These positions already checked */
547 t = other_last;
1aa99e6b 548 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
549 if (last < last1)
550 last1 = last;
551 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
552 /* On end-of-str: see comment below. */
553 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
554 HOP3(HOP3(last1, prog->anchored_offset, strend)
555 + SvCUR(prog->anchored_substr),
556 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
557 prog->anchored_substr,
558 PL_multiline ? FBMrf_MULTILINE : 0);
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
561 (s ? "Found" : "Contradicts"),
562 PL_colors[0],
7b0972df
JH
563 (int)(SvCUR(prog->anchored_substr)
564 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
565 SvPVX(prog->anchored_substr),
566 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
567 if (!s) {
568 if (last1 >= last2) {
569 DEBUG_r(PerlIO_printf(Perl_debug_log,
570 ", giving up...\n"));
571 goto fail_finish;
572 }
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", trying floating at offset %ld...\n",
1aa99e6b
IH
575 (long)(HOP3c(s1, 1, strend) - i_strpos)));
576 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
577 s = HOP3c(last, 1, strend);
2c2d71f5
JH
578 goto restart;
579 }
580 else {
581 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 582 (long)(s - i_strpos)));
1aa99e6b
IH
583 t = HOP3c(s, -prog->anchored_offset, strbeg);
584 other_last = HOP3c(s, 1, strend);
30944b6d 585 s = s1;
2c2d71f5
JH
586 if (t == strpos)
587 goto try_at_start;
2c2d71f5
JH
588 goto try_at_offset;
589 }
30944b6d 590 }
2c2d71f5
JH
591 }
592 else { /* Take into account the floating substring. */
593 char *last, *last1;
594 char *s1 = s;
595
1aa99e6b
IH
596 t = HOP3c(s, -start_shift, strbeg);
597 last1 = last =
598 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
599 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
600 last = HOP3c(t, prog->float_max_offset, strend);
601 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
602 if (s < other_last)
603 s = other_last;
2c2d71f5
JH
604 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
605 /* fbm_instr() takes into account exact value of end-of-str
606 if the check is SvTAIL(ed). Since false positives are OK,
607 and end-of-str is not later than strend we are OK. */
608 s = fbm_instr((unsigned char*)s,
609 (unsigned char*)last + SvCUR(prog->float_substr)
610 - (SvTAIL(prog->float_substr)!=0),
611 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
613 (s ? "Found" : "Contradicts"),
614 PL_colors[0],
7b0972df
JH
615 (int)(SvCUR(prog->float_substr)
616 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
617 SvPVX(prog->float_substr),
618 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
619 if (!s) {
620 if (last1 == last) {
621 DEBUG_r(PerlIO_printf(Perl_debug_log,
622 ", giving up...\n"));
623 goto fail_finish;
624 }
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", trying anchored starting at offset %ld...\n",
30944b6d 627 (long)(s1 + 1 - i_strpos)));
803ff556 628 other_last = last;
1aa99e6b 629 s = HOP3c(t, 1, strend);
2c2d71f5
JH
630 goto restart;
631 }
632 else {
633 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 634 (long)(s - i_strpos)));
803ff556 635 other_last = s; /* Fix this later. --Hugo */
30944b6d 636 s = s1;
2c2d71f5
JH
637 if (t == strpos)
638 goto try_at_start;
2c2d71f5
JH
639 goto try_at_offset;
640 }
641 }
cad2e5aa 642 }
2c2d71f5
JH
643
644 t = s - prog->check_offset_max;
2c2d71f5
JH
645 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
646 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
647 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
648 && t > strpos))) {
2c2d71f5
JH
649 /* Fixed substring is found far enough so that the match
650 cannot start at strpos. */
651 try_at_offset:
cad2e5aa 652 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
653 /* Eventually fbm_*() should handle this, but often
654 anchored_offset is not 0, so this check will not be wasted. */
655 /* XXXX In the code below we prefer to look for "^" even in
656 presence of anchored substrings. And we search even
657 beyond the found float position. These pessimizations
658 are historical artefacts only. */
659 find_anchor:
2c2d71f5 660 while (t < strend - prog->minlen) {
cad2e5aa 661 if (*t == '\n') {
4ee3650e 662 if (t < check_at - prog->check_offset_min) {
30944b6d 663 if (prog->anchored_substr) {
4ee3650e
GS
664 /* Since we moved from the found position,
665 we definitely contradict the found anchored
30944b6d
IZ
666 substr. Due to the above check we do not
667 contradict "check" substr.
668 Thus we can arrive here only if check substr
669 is float. Redo checking for "other"=="fixed".
670 */
9041c2e3 671 strpos = t + 1;
30944b6d
IZ
672 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
673 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
674 goto do_other_anchored;
675 }
4ee3650e
GS
676 /* We don't contradict the found floating substring. */
677 /* XXXX Why not check for STCLASS? */
cad2e5aa 678 s = t + 1;
2c2d71f5 679 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 680 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
681 goto set_useful;
682 }
4ee3650e
GS
683 /* Position contradicts check-string */
684 /* XXXX probably better to look for check-string
685 than for "\n", so one should lower the limit for t? */
686 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 687 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 688 other_last = strpos = s = t + 1;
cad2e5aa
JH
689 goto restart;
690 }
691 t++;
692 }
2c2d71f5
JH
693 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
694 PL_colors[0],PL_colors[1]));
695 goto fail_finish;
cad2e5aa 696 }
f5952150
GS
697 else {
698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
699 PL_colors[0],PL_colors[1]));
700 }
cad2e5aa
JH
701 s = t;
702 set_useful:
2c2d71f5 703 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
704 }
705 else {
f5952150 706 /* The found string does not prohibit matching at strpos,
2c2d71f5 707 - no optimization of calling REx engine can be performed,
f5952150
GS
708 unless it was an MBOL and we are not after MBOL,
709 or a future STCLASS check will fail this. */
2c2d71f5
JH
710 try_at_start:
711 /* Even in this situation we may use MBOL flag if strpos is offset
712 wrt the start of the string. */
05b4157f 713 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 714 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
715 /* May be due to an implicit anchor of m{.*foo} */
716 && !(prog->reganch & ROPT_IMPLICIT))
717 {
cad2e5aa
JH
718 t = strpos;
719 goto find_anchor;
720 }
30944b6d 721 DEBUG_r( if (ml_anch)
f5952150
GS
722 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
723 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 724 );
2c2d71f5 725 success_at_start:
30944b6d 726 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 727 && prog->check_substr /* Could be deleted already */
cad2e5aa 728 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
729 && prog->check_substr == prog->float_substr)
730 {
cad2e5aa 731 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 732 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
733 SvREFCNT_dec(prog->check_substr);
734 prog->check_substr = Nullsv; /* disable */
735 prog->float_substr = Nullsv; /* clear */
5e39e1e5 736 check = Nullsv; /* abort */
cad2e5aa 737 s = strpos;
3cf5c195
IZ
738 /* XXXX This is a remnant of the old implementation. It
739 looks wasteful, since now INTUIT can use many
6eb5f6b9 740 other heuristics. */
cad2e5aa
JH
741 prog->reganch &= ~RE_USE_INTUIT;
742 }
743 else
744 s = strpos;
745 }
746
6eb5f6b9
JH
747 /* Last resort... */
748 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
749 if (prog->regstclass) {
750 /* minlen == 0 is possible if regstclass is \b or \B,
751 and the fixed substr is ''$.
752 Since minlen is already taken into account, s+1 is before strend;
753 accidentally, minlen >= 1 guaranties no false positives at s + 1
754 even for \b or \B. But (minlen? 1 : 0) below assumes that
755 regstclass does not come from lookahead... */
756 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
757 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 758 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 759 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 760 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 761 : 1);
6eb5f6b9 762 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
763 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
764 : (prog->float_substr
765 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
766 cl_l, strend)
767 : strend);
a1933d95 768 char *startpos = strbeg;
6eb5f6b9
JH
769
770 t = s;
76384e4a 771 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 772 PL_regdata = prog->data;
76384e4a
GS
773 PL_bostr = startpos;
774 }
6eb5f6b9
JH
775 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
776 if (!s) {
777#ifdef DEBUGGING
778 char *what;
779#endif
780 if (endpos == strend) {
781 DEBUG_r( PerlIO_printf(Perl_debug_log,
782 "Could not match STCLASS...\n") );
783 goto fail;
784 }
66e933ab
GS
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "This position contradicts STCLASS...\n") );
653099ff
GS
787 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
788 goto fail;
6eb5f6b9
JH
789 /* Contradict one of substrings */
790 if (prog->anchored_substr) {
6eb5f6b9
JH
791 if (prog->anchored_substr == check) {
792 DEBUG_r( what = "anchored" );
793 hop_and_restart:
1aa99e6b 794 s = HOP3c(t, 1, strend);
66e933ab
GS
795 if (s + start_shift + end_shift > strend) {
796 /* XXXX Should be taken into account earlier? */
797 DEBUG_r( PerlIO_printf(Perl_debug_log,
798 "Could not match STCLASS...\n") );
799 goto fail;
800 }
5e39e1e5
HS
801 if (!check)
802 goto giveup;
6eb5f6b9 803 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 804 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
805 what, (long)(s + start_shift - i_strpos)) );
806 goto restart;
807 }
66e933ab 808 /* Have both, check_string is floating */
6eb5f6b9
JH
809 if (t + start_shift >= check_at) /* Contradicts floating=check */
810 goto retry_floating_check;
811 /* Recheck anchored substring, but not floating... */
9041c2e3 812 s = check_at;
5e39e1e5
HS
813 if (!check)
814 goto giveup;
6eb5f6b9 815 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 816 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
817 (long)(other_last - i_strpos)) );
818 goto do_other_anchored;
819 }
60e71179
GS
820 /* Another way we could have checked stclass at the
821 current position only: */
822 if (ml_anch) {
823 s = t = t + 1;
5e39e1e5
HS
824 if (!check)
825 goto giveup;
60e71179 826 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
827 "Looking for /%s^%s/m starting at offset %ld...\n",
828 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 829 goto try_at_offset;
66e933ab 830 }
60e71179
GS
831 if (!prog->float_substr) /* Could have been deleted */
832 goto fail;
6eb5f6b9
JH
833 /* Check is floating subtring. */
834 retry_floating_check:
835 t = check_at - start_shift;
836 DEBUG_r( what = "floating" );
837 goto hop_and_restart;
838 }
839 DEBUG_r( if (t != s)
9041c2e3 840 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
841 "By STCLASS: moving %ld --> %ld\n",
842 (long)(t - i_strpos), (long)(s - i_strpos));
843 else
9041c2e3 844 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
845 "Does not contradict STCLASS...\n") );
846 }
5e39e1e5
HS
847 giveup:
848 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
849 PL_colors[4], (check ? "Guessed" : "Giving up"),
850 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 851 return s;
2c2d71f5
JH
852
853 fail_finish: /* Substring not found */
66e933ab
GS
854 if (prog->check_substr) /* could be removed already */
855 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 856 fail:
2c2d71f5 857 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
858 PL_colors[4],PL_colors[5]));
859 return Nullch;
860}
9661b544 861
6eb5f6b9 862/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
863STATIC char *
864S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 865{
6eb5f6b9
JH
866 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
867 char *m;
d8093b23
G
868 STRLEN ln;
869 unsigned int c1;
870 unsigned int c2;
6eb5f6b9
JH
871 char *e;
872 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 873 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 874
6eb5f6b9
JH
875 /* We know what class it must start with. */
876 switch (OP(c)) {
6eb5f6b9 877 case ANYOF:
a0ed51b3 878 while (s < strend) {
ffc61ed2 879 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
880 if (tmp && (norun || regtry(prog, s)))
881 goto got_it;
882 else
883 tmp = doevery;
a0ed51b3 884 }
6eb5f6b9
JH
885 else
886 tmp = 1;
ffc61ed2 887 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 888 }
6eb5f6b9
JH
889 break;
890 case EXACTF:
891 m = STRING(c);
892 ln = STR_LEN(c);
1aa99e6b
IH
893 if (UTF) {
894 c1 = to_utf8_lower((U8*)m);
895 c2 = to_utf8_upper((U8*)m);
896 }
897 else {
898 c1 = *(U8*)m;
899 c2 = PL_fold[c1];
900 }
6eb5f6b9
JH
901 goto do_exactf;
902 case EXACTFL:
903 m = STRING(c);
904 ln = STR_LEN(c);
d8093b23 905 c1 = *(U8*)m;
6eb5f6b9
JH
906 c2 = PL_fold_locale[c1];
907 do_exactf:
908 e = strend - ln;
b3c9acc1 909
6eb5f6b9
JH
910 if (norun && e < s)
911 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
912
913 if (do_utf8) {
914 STRLEN len;
915 if (c1 == c2)
916 while (s <= e) {
9041c2e3 917 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
918 && regtry(prog, s) )
919 goto got_it;
920 s += len;
921 }
922 else
923 while (s <= e) {
9041c2e3 924 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
925 if ( (c == c1 || c == c2) && regtry(prog, s) )
926 goto got_it;
927 s += len;
928 }
929 }
930 else {
931 if (c1 == c2)
932 while (s <= e) {
933 if ( *(U8*)s == c1
934 && (ln == 1 || !(OP(c) == EXACTF
935 ? ibcmp(s, m, ln)
936 : ibcmp_locale(s, m, ln)))
937 && (norun || regtry(prog, s)) )
938 goto got_it;
939 s++;
940 }
941 else
942 while (s <= e) {
943 if ( (*(U8*)s == c1 || *(U8*)s == c2)
944 && (ln == 1 || !(OP(c) == EXACTF
945 ? ibcmp(s, m, ln)
946 : ibcmp_locale(s, m, ln)))
947 && (norun || regtry(prog, s)) )
948 goto got_it;
949 s++;
950 }
b3c9acc1
IZ
951 }
952 break;
bbce6d69 953 case BOUNDL:
3280af22 954 PL_reg_flags |= RF_tainted;
bbce6d69 955 /* FALL THROUGH */
a0d0e21e 956 case BOUND:
ffc61ed2 957 if (do_utf8) {
12d33761 958 if (s == PL_bostr)
ffc61ed2
JH
959 tmp = '\n';
960 else {
1aa99e6b 961 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 962
2b9d42f0 963 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
964 }
965 tmp = ((OP(c) == BOUND ?
9041c2e3 966 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 967 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
968 while (s < strend) {
969 if (tmp == !(OP(c) == BOUND ?
3568d838 970 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
971 isALNUM_LC_utf8((U8*)s)))
972 {
973 tmp = !tmp;
974 if ((norun || regtry(prog, s)))
975 goto got_it;
976 }
977 s += UTF8SKIP(s);
a687059c 978 }
a0d0e21e 979 }
667bb95a 980 else {
12d33761 981 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
982 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
983 while (s < strend) {
984 if (tmp ==
985 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
986 tmp = !tmp;
987 if ((norun || regtry(prog, s)))
988 goto got_it;
989 }
990 s++;
a0ed51b3 991 }
a0ed51b3 992 }
6eb5f6b9 993 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
994 goto got_it;
995 break;
bbce6d69 996 case NBOUNDL:
3280af22 997 PL_reg_flags |= RF_tainted;
bbce6d69 998 /* FALL THROUGH */
a0d0e21e 999 case NBOUND:
ffc61ed2 1000 if (do_utf8) {
12d33761 1001 if (s == PL_bostr)
ffc61ed2
JH
1002 tmp = '\n';
1003 else {
1aa99e6b 1004 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1005
2b9d42f0 1006 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1007 }
1008 tmp = ((OP(c) == NBOUND ?
9041c2e3 1009 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1010 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1011 while (s < strend) {
1012 if (tmp == !(OP(c) == NBOUND ?
3568d838 1013 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1014 isALNUM_LC_utf8((U8*)s)))
1015 tmp = !tmp;
1016 else if ((norun || regtry(prog, s)))
1017 goto got_it;
1018 s += UTF8SKIP(s);
1019 }
a0d0e21e 1020 }
667bb95a 1021 else {
12d33761 1022 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1023 tmp = ((OP(c) == NBOUND ?
1024 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1025 while (s < strend) {
1026 if (tmp ==
1027 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1028 tmp = !tmp;
1029 else if ((norun || regtry(prog, s)))
1030 goto got_it;
1031 s++;
1032 }
a0ed51b3 1033 }
6eb5f6b9 1034 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1035 goto got_it;
1036 break;
a0d0e21e 1037 case ALNUM:
ffc61ed2 1038 if (do_utf8) {
8269fa76 1039 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1040 while (s < strend) {
3568d838 1041 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1042 if (tmp && (norun || regtry(prog, s)))
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
bbce6d69 1047 else
ffc61ed2
JH
1048 tmp = 1;
1049 s += UTF8SKIP(s);
bbce6d69 1050 }
bbce6d69 1051 }
ffc61ed2
JH
1052 else {
1053 while (s < strend) {
1054 if (isALNUM(*s)) {
1055 if (tmp && (norun || regtry(prog, s)))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
a0ed51b3 1060 else
ffc61ed2
JH
1061 tmp = 1;
1062 s++;
a0ed51b3 1063 }
a0ed51b3
LW
1064 }
1065 break;
bbce6d69 1066 case ALNUML:
3280af22 1067 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1068 if (do_utf8) {
1069 while (s < strend) {
1070 if (isALNUM_LC_utf8((U8*)s)) {
1071 if (tmp && (norun || regtry(prog, s)))
1072 goto got_it;
1073 else
1074 tmp = doevery;
1075 }
a687059c 1076 else
ffc61ed2
JH
1077 tmp = 1;
1078 s += UTF8SKIP(s);
a0d0e21e 1079 }
a0d0e21e 1080 }
ffc61ed2
JH
1081 else {
1082 while (s < strend) {
1083 if (isALNUM_LC(*s)) {
1084 if (tmp && (norun || regtry(prog, s)))
1085 goto got_it;
1086 else
1087 tmp = doevery;
1088 }
a0ed51b3 1089 else
ffc61ed2
JH
1090 tmp = 1;
1091 s++;
a0ed51b3 1092 }
a0ed51b3
LW
1093 }
1094 break;
a0d0e21e 1095 case NALNUM:
ffc61ed2 1096 if (do_utf8) {
8269fa76 1097 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1098 while (s < strend) {
3568d838 1099 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1100 if (tmp && (norun || regtry(prog, s)))
1101 goto got_it;
1102 else
1103 tmp = doevery;
1104 }
bbce6d69 1105 else
ffc61ed2
JH
1106 tmp = 1;
1107 s += UTF8SKIP(s);
bbce6d69 1108 }
bbce6d69 1109 }
ffc61ed2
JH
1110 else {
1111 while (s < strend) {
1112 if (!isALNUM(*s)) {
1113 if (tmp && (norun || regtry(prog, s)))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
a0ed51b3 1118 else
ffc61ed2
JH
1119 tmp = 1;
1120 s++;
a0ed51b3 1121 }
a0ed51b3
LW
1122 }
1123 break;
bbce6d69 1124 case NALNUML:
3280af22 1125 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1126 if (do_utf8) {
1127 while (s < strend) {
1128 if (!isALNUM_LC_utf8((U8*)s)) {
1129 if (tmp && (norun || regtry(prog, s)))
1130 goto got_it;
1131 else
1132 tmp = doevery;
1133 }
a687059c 1134 else
ffc61ed2
JH
1135 tmp = 1;
1136 s += UTF8SKIP(s);
a687059c 1137 }
a0d0e21e 1138 }
ffc61ed2
JH
1139 else {
1140 while (s < strend) {
1141 if (!isALNUM_LC(*s)) {
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
a0ed51b3 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s++;
a0ed51b3 1150 }
a0ed51b3
LW
1151 }
1152 break;
a0d0e21e 1153 case SPACE:
ffc61ed2 1154 if (do_utf8) {
8269fa76 1155 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1156 while (s < strend) {
3568d838 1157 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1158 if (tmp && (norun || regtry(prog, s)))
1159 goto got_it;
1160 else
1161 tmp = doevery;
1162 }
a0d0e21e 1163 else
ffc61ed2
JH
1164 tmp = 1;
1165 s += UTF8SKIP(s);
2304df62 1166 }
a0d0e21e 1167 }
ffc61ed2
JH
1168 else {
1169 while (s < strend) {
1170 if (isSPACE(*s)) {
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
a0ed51b3 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s++;
a0ed51b3 1179 }
a0ed51b3
LW
1180 }
1181 break;
bbce6d69 1182 case SPACEL:
3280af22 1183 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1184 if (do_utf8) {
1185 while (s < strend) {
1186 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1187 if (tmp && (norun || regtry(prog, s)))
1188 goto got_it;
1189 else
1190 tmp = doevery;
1191 }
bbce6d69 1192 else
ffc61ed2
JH
1193 tmp = 1;
1194 s += UTF8SKIP(s);
bbce6d69 1195 }
bbce6d69 1196 }
ffc61ed2
JH
1197 else {
1198 while (s < strend) {
1199 if (isSPACE_LC(*s)) {
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
a0ed51b3 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s++;
a0ed51b3 1208 }
a0ed51b3
LW
1209 }
1210 break;
a0d0e21e 1211 case NSPACE:
ffc61ed2 1212 if (do_utf8) {
8269fa76 1213 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1214 while (s < strend) {
3568d838 1215 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1216 if (tmp && (norun || regtry(prog, s)))
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
a0d0e21e 1221 else
ffc61ed2
JH
1222 tmp = 1;
1223 s += UTF8SKIP(s);
a687059c 1224 }
a0d0e21e 1225 }
ffc61ed2
JH
1226 else {
1227 while (s < strend) {
1228 if (!isSPACE(*s)) {
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
a0ed51b3 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s++;
a0ed51b3 1237 }
a0ed51b3
LW
1238 }
1239 break;
bbce6d69 1240 case NSPACEL:
3280af22 1241 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1242 if (do_utf8) {
1243 while (s < strend) {
1244 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1245 if (tmp && (norun || regtry(prog, s)))
1246 goto got_it;
1247 else
1248 tmp = doevery;
1249 }
bbce6d69 1250 else
ffc61ed2
JH
1251 tmp = 1;
1252 s += UTF8SKIP(s);
bbce6d69 1253 }
bbce6d69 1254 }
ffc61ed2
JH
1255 else {
1256 while (s < strend) {
1257 if (!isSPACE_LC(*s)) {
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
a0ed51b3 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s++;
a0ed51b3 1266 }
a0ed51b3
LW
1267 }
1268 break;
a0d0e21e 1269 case DIGIT:
ffc61ed2 1270 if (do_utf8) {
8269fa76 1271 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1272 while (s < strend) {
3568d838 1273 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1274 if (tmp && (norun || regtry(prog, s)))
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
a0d0e21e 1279 else
ffc61ed2
JH
1280 tmp = 1;
1281 s += UTF8SKIP(s);
2b69d0c2 1282 }
a0d0e21e 1283 }
ffc61ed2
JH
1284 else {
1285 while (s < strend) {
1286 if (isDIGIT(*s)) {
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
a0ed51b3 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s++;
a0ed51b3 1295 }
a0ed51b3
LW
1296 }
1297 break;
b8c5462f
JH
1298 case DIGITL:
1299 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1300 if (do_utf8) {
1301 while (s < strend) {
1302 if (isDIGIT_LC_utf8((U8*)s)) {
1303 if (tmp && (norun || regtry(prog, s)))
1304 goto got_it;
1305 else
1306 tmp = doevery;
1307 }
b8c5462f 1308 else
ffc61ed2
JH
1309 tmp = 1;
1310 s += UTF8SKIP(s);
b8c5462f 1311 }
b8c5462f 1312 }
ffc61ed2
JH
1313 else {
1314 while (s < strend) {
1315 if (isDIGIT_LC(*s)) {
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
b8c5462f 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s++;
b8c5462f 1324 }
b8c5462f
JH
1325 }
1326 break;
a0d0e21e 1327 case NDIGIT:
ffc61ed2 1328 if (do_utf8) {
8269fa76 1329 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1330 while (s < strend) {
3568d838 1331 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1332 if (tmp && (norun || regtry(prog, s)))
1333 goto got_it;
1334 else
1335 tmp = doevery;
1336 }
a0d0e21e 1337 else
ffc61ed2
JH
1338 tmp = 1;
1339 s += UTF8SKIP(s);
a687059c 1340 }
a0d0e21e 1341 }
ffc61ed2
JH
1342 else {
1343 while (s < strend) {
1344 if (!isDIGIT(*s)) {
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
a0ed51b3 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s++;
a0ed51b3 1353 }
a0ed51b3
LW
1354 }
1355 break;
b8c5462f
JH
1356 case NDIGITL:
1357 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1358 if (do_utf8) {
1359 while (s < strend) {
1360 if (!isDIGIT_LC_utf8((U8*)s)) {
1361 if (tmp && (norun || regtry(prog, s)))
1362 goto got_it;
1363 else
1364 tmp = doevery;
1365 }
b8c5462f 1366 else
ffc61ed2
JH
1367 tmp = 1;
1368 s += UTF8SKIP(s);
b8c5462f 1369 }
a0ed51b3 1370 }
ffc61ed2
JH
1371 else {
1372 while (s < strend) {
1373 if (!isDIGIT_LC(*s)) {
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
cf93c79d 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s++;
b8c5462f 1382 }
b8c5462f
JH
1383 }
1384 break;
b3c9acc1 1385 default:
3c3eec57
GS
1386 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1387 break;
d6a28714 1388 }
6eb5f6b9
JH
1389 return 0;
1390 got_it:
1391 return s;
1392}
1393
1394/*
1395 - regexec_flags - match a regexp against a string
1396 */
1397I32
1398Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1399 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1400/* strend: pointer to null at end of string */
1401/* strbeg: real beginning of string */
1402/* minend: end of match must be >=minend after stringarg. */
1403/* data: May be used for some additional optimizations. */
1404/* nosave: For optimizations. */
1405{
6eb5f6b9
JH
1406 register char *s;
1407 register regnode *c;
1408 register char *startpos = stringarg;
6eb5f6b9
JH
1409 I32 minlen; /* must match at least this many chars */
1410 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1411 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1412 constant substr. */ /* CC */
1413 I32 end_shift = 0; /* Same for the end. */ /* CC */
1414 I32 scream_pos = -1; /* Internal iterator of scream. */
1415 char *scream_olds;
1416 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1417 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1418
1419 PL_regcc = 0;
1420
1421 cache_re(prog);
1422#ifdef DEBUGGING
aea4f609 1423 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1424#endif
1425
1426 /* Be paranoid... */
1427 if (prog == NULL || startpos == NULL) {
1428 Perl_croak(aTHX_ "NULL regexp parameter");
1429 return 0;
1430 }
1431
1432 minlen = prog->minlen;
1aa99e6b 1433 if (do_utf8) {
3baa4c62
JH
1434 if (!(prog->reganch & ROPT_SANY_SEEN))
1435 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1436 }
1437 else {
1438 if (strend - startpos < minlen) goto phooey;
1439 }
6eb5f6b9 1440
6eb5f6b9
JH
1441 /* Check validity of program. */
1442 if (UCHARAT(prog->program) != REG_MAGIC) {
1443 Perl_croak(aTHX_ "corrupted regexp program");
1444 }
1445
1446 PL_reg_flags = 0;
1447 PL_reg_eval_set = 0;
1448 PL_reg_maxiter = 0;
1449
1450 if (prog->reganch & ROPT_UTF8)
1451 PL_reg_flags |= RF_utf8;
1452
1453 /* Mark beginning of line for ^ and lookbehind. */
1454 PL_regbol = startpos;
1455 PL_bostr = strbeg;
1456 PL_reg_sv = sv;
1457
1458 /* Mark end of line for $ (and such) */
1459 PL_regeol = strend;
1460
1461 /* see how far we have to get to not match where we matched before */
1462 PL_regtill = startpos+minend;
1463
1464 /* We start without call_cc context. */
1465 PL_reg_call_cc = 0;
1466
1467 /* If there is a "must appear" string, look for it. */
1468 s = startpos;
1469
1470 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1471 MAGIC *mg;
1472
1473 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1474 PL_reg_ganch = startpos;
1475 else if (sv && SvTYPE(sv) >= SVt_PVMG
1476 && SvMAGIC(sv)
1477 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1478 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1479 if (prog->reganch & ROPT_ANCH_GPOS) {
1480 if (s > PL_reg_ganch)
1481 goto phooey;
1482 s = PL_reg_ganch;
1483 }
1484 }
1485 else /* pos() not defined */
1486 PL_reg_ganch = strbeg;
1487 }
1488
699c3c34
JH
1489 if (do_utf8 == (UTF!=0) &&
1490 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1491 re_scream_pos_data d;
1492
1493 d.scream_olds = &scream_olds;
1494 d.scream_pos = &scream_pos;
1495 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1496 if (!s)
1497 goto phooey; /* not present */
1498 }
1499
1500 DEBUG_r( if (!PL_colorset) reginitcolors() );
1501 DEBUG_r(PerlIO_printf(Perl_debug_log,
1502 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1503 PL_colors[4],PL_colors[5],PL_colors[0],
1504 prog->precomp,
1505 PL_colors[1],
1506 (strlen(prog->precomp) > 60 ? "..." : ""),
1507 PL_colors[0],
1508 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1509 startpos, PL_colors[1],
1510 (strend - startpos > 60 ? "..." : ""))
1511 );
1512
1513 /* Simplest case: anchored match need be tried only once. */
1514 /* [unless only anchor is BOL and multiline is set] */
1515 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1516 if (s == startpos && regtry(prog, startpos))
1517 goto got_it;
1518 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1519 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1520 {
1521 char *end;
1522
1523 if (minlen)
1524 dontbother = minlen - 1;
1aa99e6b 1525 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1526 /* for multiline we only have to try after newlines */
1527 if (prog->check_substr) {
1528 if (s == startpos)
1529 goto after_try;
1530 while (1) {
1531 if (regtry(prog, s))
1532 goto got_it;
1533 after_try:
1534 if (s >= end)
1535 goto phooey;
1536 if (prog->reganch & RE_USE_INTUIT) {
1537 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1538 if (!s)
1539 goto phooey;
1540 }
1541 else
1542 s++;
1543 }
1544 } else {
1545 if (s > startpos)
1546 s--;
1547 while (s < end) {
1548 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1549 if (regtry(prog, s))
1550 goto got_it;
1551 }
1552 }
1553 }
1554 }
1555 goto phooey;
1556 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1557 if (regtry(prog, PL_reg_ganch))
1558 goto got_it;
1559 goto phooey;
1560 }
1561
1562 /* Messy cases: unanchored match. */
9041c2e3 1563 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1564 /* we have /x+whatever/ */
1565 /* it must be a one character string (XXXX Except UTF?) */
1566 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1567#ifdef DEBUGGING
1568 int did_match = 0;
1569#endif
1570
1aa99e6b 1571 if (do_utf8) {
6eb5f6b9
JH
1572 while (s < strend) {
1573 if (*s == ch) {
bf93d4cc 1574 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1575 if (regtry(prog, s)) goto got_it;
1576 s += UTF8SKIP(s);
1577 while (s < strend && *s == ch)
1578 s += UTF8SKIP(s);
1579 }
1580 s += UTF8SKIP(s);
1581 }
1582 }
1583 else {
1584 while (s < strend) {
1585 if (*s == ch) {
bf93d4cc 1586 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1587 if (regtry(prog, s)) goto got_it;
1588 s++;
1589 while (s < strend && *s == ch)
1590 s++;
1591 }
1592 s++;
1593 }
1594 }
bf93d4cc
GS
1595 DEBUG_r(did_match ||
1596 PerlIO_printf(Perl_debug_log,
1597 "Did not find anchored character...\n"));
6eb5f6b9
JH
1598 }
1599 /*SUPPRESS 560*/
1aa99e6b
IH
1600 else if (do_utf8 == (UTF!=0) &&
1601 (prog->anchored_substr != Nullsv
9041c2e3 1602 || (prog->float_substr != Nullsv
1aa99e6b 1603 && prog->float_max_offset < strend - s))) {
9041c2e3 1604 SV *must = prog->anchored_substr
6eb5f6b9 1605 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1606 I32 back_max =
6eb5f6b9 1607 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1608 I32 back_min =
6eb5f6b9 1609 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1610 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1611 -(I32)(CHR_SVLEN(must)
1aa99e6b 1612 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1613 char *last1; /* Last position checked before */
bf93d4cc
GS
1614#ifdef DEBUGGING
1615 int did_match = 0;
1616#endif
6eb5f6b9
JH
1617
1618 if (s > PL_bostr)
1619 last1 = HOPc(s, -1);
1620 else
1621 last1 = s - 1; /* bogus */
1622
1623 /* XXXX check_substr already used to find `s', can optimize if
1624 check_substr==must. */
1625 scream_pos = -1;
1626 dontbother = end_shift;
1627 strend = HOPc(strend, -dontbother);
1628 while ( (s <= last) &&
9041c2e3 1629 ((flags & REXEC_SCREAM)
1aa99e6b 1630 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1631 end_shift, &scream_pos, 0))
1aa99e6b 1632 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1633 (unsigned char*)strend, must,
6eb5f6b9 1634 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1635 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1636 if (HOPc(s, -back_max) > last1) {
1637 last1 = HOPc(s, -back_min);
1638 s = HOPc(s, -back_max);
1639 }
1640 else {
1641 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1642
1643 last1 = HOPc(s, -back_min);
1644 s = t;
1645 }
1aa99e6b 1646 if (do_utf8) {
6eb5f6b9
JH
1647 while (s <= last1) {
1648 if (regtry(prog, s))
1649 goto got_it;
1650 s += UTF8SKIP(s);
1651 }
1652 }
1653 else {
1654 while (s <= last1) {
1655 if (regtry(prog, s))
1656 goto got_it;
1657 s++;
1658 }
1659 }
1660 }
bf93d4cc
GS
1661 DEBUG_r(did_match ||
1662 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1663 ((must == prog->anchored_substr)
1664 ? "anchored" : "floating"),
1665 PL_colors[0],
1666 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1667 SvPVX(must),
1668 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1669 goto phooey;
1670 }
155aba94 1671 else if ((c = prog->regstclass)) {
66e933ab
GS
1672 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1673 /* don't bother with what can't match */
6eb5f6b9 1674 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1675 DEBUG_r({
1676 SV *prop = sv_newmortal();
1677 regprop(prop, c);
1678 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1679 });
6eb5f6b9
JH
1680 if (find_byclass(prog, c, s, strend, startpos, 0))
1681 goto got_it;
bf93d4cc 1682 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1683 }
1684 else {
1685 dontbother = 0;
1686 if (prog->float_substr != Nullsv) { /* Trim the end. */
1687 char *last;
d6a28714
JH
1688
1689 if (flags & REXEC_SCREAM) {
1690 last = screaminstr(sv, prog->float_substr, s - strbeg,
1691 end_shift, &scream_pos, 1); /* last one */
1692 if (!last)
ffc61ed2 1693 last = scream_olds; /* Only one occurrence. */
b8c5462f 1694 }
d6a28714
JH
1695 else {
1696 STRLEN len;
1697 char *little = SvPV(prog->float_substr, len);
1698
1699 if (SvTAIL(prog->float_substr)) {
1700 if (memEQ(strend - len + 1, little, len - 1))
1701 last = strend - len + 1;
1702 else if (!PL_multiline)
9041c2e3 1703 last = memEQ(strend - len, little, len)
d6a28714 1704 ? strend - len : Nullch;
b8c5462f 1705 else
d6a28714
JH
1706 goto find_last;
1707 } else {
1708 find_last:
9041c2e3 1709 if (len)
d6a28714 1710 last = rninstr(s, strend, little, little + len);
b8c5462f 1711 else
d6a28714 1712 last = strend; /* matching `$' */
b8c5462f 1713 }
b8c5462f 1714 }
bf93d4cc
GS
1715 if (last == NULL) {
1716 DEBUG_r(PerlIO_printf(Perl_debug_log,
1717 "%sCan't trim the tail, match fails (should not happen)%s\n",
1718 PL_colors[4],PL_colors[5]));
1719 goto phooey; /* Should not happen! */
1720 }
d6a28714
JH
1721 dontbother = strend - last + prog->float_min_offset;
1722 }
1723 if (minlen && (dontbother < minlen))
1724 dontbother = minlen - 1;
1725 strend -= dontbother; /* this one's always in bytes! */
1726 /* We don't know much -- general case. */
1aa99e6b 1727 if (do_utf8) {
d6a28714
JH
1728 for (;;) {
1729 if (regtry(prog, s))
1730 goto got_it;
1731 if (s >= strend)
1732 break;
b8c5462f 1733 s += UTF8SKIP(s);
d6a28714
JH
1734 };
1735 }
1736 else {
1737 do {
1738 if (regtry(prog, s))
1739 goto got_it;
1740 } while (s++ < strend);
1741 }
1742 }
1743
1744 /* Failure. */
1745 goto phooey;
1746
1747got_it:
1748 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1749
1750 if (PL_reg_eval_set) {
1751 /* Preserve the current value of $^R */
1752 if (oreplsv != GvSV(PL_replgv))
1753 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1754 restored, the value remains
1755 the same. */
1756 restore_pos(aTHXo_ 0);
1757 }
1758
1759 /* make sure $`, $&, $', and $digit will work later */
1760 if ( !(flags & REXEC_NOT_FIRST) ) {
1761 if (RX_MATCH_COPIED(prog)) {
1762 Safefree(prog->subbeg);
1763 RX_MATCH_COPIED_off(prog);
1764 }
1765 if (flags & REXEC_COPY_STR) {
1766 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1767
1768 s = savepvn(strbeg, i);
1769 prog->subbeg = s;
1770 prog->sublen = i;
1771 RX_MATCH_COPIED_on(prog);
1772 }
1773 else {
1774 prog->subbeg = strbeg;
1775 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1776 }
1777 }
9041c2e3 1778
d6a28714
JH
1779 return 1;
1780
1781phooey:
bf93d4cc
GS
1782 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1783 PL_colors[4],PL_colors[5]));
d6a28714
JH
1784 if (PL_reg_eval_set)
1785 restore_pos(aTHXo_ 0);
1786 return 0;
1787}
1788
1789/*
1790 - regtry - try match at specific point
1791 */
1792STATIC I32 /* 0 failure, 1 success */
1793S_regtry(pTHX_ regexp *prog, char *startpos)
1794{
d6a28714
JH
1795 register I32 i;
1796 register I32 *sp;
1797 register I32 *ep;
1798 CHECKPOINT lastcp;
1799
02db2b7b
IZ
1800#ifdef DEBUGGING
1801 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1802#endif
d6a28714
JH
1803 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1804 MAGIC *mg;
1805
1806 PL_reg_eval_set = RS_init;
1807 DEBUG_r(DEBUG_s(
b900a521
JH
1808 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1809 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1810 ));
e8347627 1811 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1812 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1813 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1814 SAVETMPS;
1815 /* Apparently this is not needed, judging by wantarray. */
e8347627 1816 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1817 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1818
1819 if (PL_reg_sv) {
1820 /* Make $_ available to executed code. */
1821 if (PL_reg_sv != DEFSV) {
1822 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1823 SAVESPTR(DEFSV);
1824 DEFSV = PL_reg_sv;
b8c5462f 1825 }
d6a28714 1826
9041c2e3 1827 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
d6a28714
JH
1828 && (mg = mg_find(PL_reg_sv, 'g')))) {
1829 /* prepare for quick setting of pos */
1830 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1831 mg = mg_find(PL_reg_sv, 'g');
1832 mg->mg_len = -1;
b8c5462f 1833 }
d6a28714
JH
1834 PL_reg_magic = mg;
1835 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1836 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1837 }
1838 if (!PL_reg_curpm)
0f79a09d 1839 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1840 PL_reg_curpm->op_pmregexp = prog;
1841 PL_reg_oldcurpm = PL_curpm;
1842 PL_curpm = PL_reg_curpm;
1843 if (RX_MATCH_COPIED(prog)) {
1844 /* Here is a serious problem: we cannot rewrite subbeg,
1845 since it may be needed if this match fails. Thus
1846 $` inside (?{}) could fail... */
1847 PL_reg_oldsaved = prog->subbeg;
1848 PL_reg_oldsavedlen = prog->sublen;
1849 RX_MATCH_COPIED_off(prog);
1850 }
1851 else
1852 PL_reg_oldsaved = Nullch;
1853 prog->subbeg = PL_bostr;
1854 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1855 }
1856 prog->startp[0] = startpos - PL_bostr;
1857 PL_reginput = startpos;
1858 PL_regstartp = prog->startp;
1859 PL_regendp = prog->endp;
1860 PL_reglastparen = &prog->lastparen;
1861 prog->lastparen = 0;
1862 PL_regsize = 0;
1863 DEBUG_r(PL_reg_starttry = startpos);
1864 if (PL_reg_start_tmpl <= prog->nparens) {
1865 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1866 if(PL_reg_start_tmp)
1867 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1868 else
1869 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1870 }
1871
1872 /* XXXX What this code is doing here?!!! There should be no need
1873 to do this again and again, PL_reglastparen should take care of
3dd2943c 1874 this! --ilya*/
dafc8851
JH
1875
1876 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1877 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1878 * PL_reglastparen), is not needed at all by the test suite
1879 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1880 * enough, for building DynaLoader, or otherwise this
1881 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1882 * will happen. Meanwhile, this code *is* needed for the
1883 * above-mentioned test suite tests to succeed. The common theme
1884 * on those tests seems to be returning null fields from matches.
1885 * --jhi */
dafc8851 1886#if 1
d6a28714
JH
1887 sp = prog->startp;
1888 ep = prog->endp;
1889 if (prog->nparens) {
09e8ae3b 1890 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1891 *++sp = -1;
1892 *++ep = -1;
1893 }
1894 }
dafc8851 1895#endif
02db2b7b 1896 REGCP_SET(lastcp);
d6a28714
JH
1897 if (regmatch(prog->program + 1)) {
1898 prog->endp[0] = PL_reginput - PL_bostr;
1899 return 1;
1900 }
02db2b7b 1901 REGCP_UNWIND(lastcp);
d6a28714
JH
1902 return 0;
1903}
1904
02db2b7b
IZ
1905#define RE_UNWIND_BRANCH 1
1906#define RE_UNWIND_BRANCHJ 2
1907
1908union re_unwind_t;
1909
1910typedef struct { /* XX: makes sense to enlarge it... */
1911 I32 type;
1912 I32 prev;
1913 CHECKPOINT lastcp;
1914} re_unwind_generic_t;
1915
1916typedef struct {
1917 I32 type;
1918 I32 prev;
1919 CHECKPOINT lastcp;
1920 I32 lastparen;
1921 regnode *next;
1922 char *locinput;
1923 I32 nextchr;
1924#ifdef DEBUGGING
1925 int regindent;
1926#endif
1927} re_unwind_branch_t;
1928
1929typedef union re_unwind_t {
1930 I32 type;
1931 re_unwind_generic_t generic;
1932 re_unwind_branch_t branch;
1933} re_unwind_t;
1934
d6a28714
JH
1935/*
1936 - regmatch - main matching routine
1937 *
1938 * Conceptually the strategy is simple: check to see whether the current
1939 * node matches, call self recursively to see whether the rest matches,
1940 * and then act accordingly. In practice we make some effort to avoid
1941 * recursion, in particular by going through "ordinary" nodes (that don't
1942 * need to know whether the rest of the match failed) by a loop instead of
1943 * by recursion.
1944 */
1945/* [lwall] I've hoisted the register declarations to the outer block in order to
1946 * maybe save a little bit of pushing and popping on the stack. It also takes
1947 * advantage of machines that use a register save mask on subroutine entry.
1948 */
1949STATIC I32 /* 0 failure, 1 success */
1950S_regmatch(pTHX_ regnode *prog)
1951{
d6a28714
JH
1952 register regnode *scan; /* Current node. */
1953 regnode *next; /* Next node. */
1954 regnode *inner; /* Next node in internal branch. */
1955 register I32 nextchr; /* renamed nextchr - nextchar colides with
1956 function of same name */
1957 register I32 n; /* no or next */
1958 register I32 ln; /* len or last */
1959 register char *s; /* operand or save */
1960 register char *locinput = PL_reginput;
1961 register I32 c1, c2, paren; /* case fold search, parenth */
1962 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1963 I32 unwind = 0;
1964 I32 firstcp = PL_savestack_ix;
ffc61ed2 1965 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1966
d6a28714
JH
1967#ifdef DEBUGGING
1968 PL_regindent++;
1969#endif
1970
1971 /* Note that nextchr is a byte even in UTF */
1972 nextchr = UCHARAT(locinput);
1973 scan = prog;
1974 while (scan != NULL) {
1975#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1976#if 1
d6a28714
JH
1977# define sayYES goto yes
1978# define sayNO goto no
7821416a
IZ
1979# define sayYES_FINAL goto yes_final
1980# define sayYES_LOUD goto yes_loud
1981# define sayNO_FINAL goto no_final
1982# define sayNO_SILENT goto do_no
d6a28714
JH
1983# define saySAME(x) if (x) goto yes; else goto no
1984# define REPORT_CODE_OFF 24
1985#else
1986# define sayYES return 1
1987# define sayNO return 0
7821416a
IZ
1988# define sayYES_FINAL return 1
1989# define sayYES_LOUD return 1
1990# define sayNO_FINAL return 0
1991# define sayNO_SILENT return 0
d6a28714
JH
1992# define saySAME(x) return x
1993#endif
1994 DEBUG_r( {
1995 SV *prop = sv_newmortal();
1996 int docolor = *PL_colors[0];
1997 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1998 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
1999 /* The part of the string before starttry has one color
2000 (pref0_len chars), between starttry and current
2001 position another one (pref_len - pref0_len chars),
2002 after the current position the third one.
2003 We assume that pref0_len <= pref_len, otherwise we
2004 decrease pref0_len. */
9041c2e3 2005 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2006 ? (5 + taill) - l : locinput - PL_bostr;
2007 int pref0_len;
d6a28714 2008
1aa99e6b
IH
2009 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2010 pref_len++;
2011 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2012 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2013 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2014 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2015 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2016 l--;
d6a28714
JH
2017 if (pref0_len < 0)
2018 pref0_len = 0;
2019 if (pref0_len > pref_len)
2020 pref0_len = pref_len;
2021 regprop(prop, scan);
9041c2e3 2022 PerlIO_printf(Perl_debug_log,
b900a521 2023 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2024 (IV)(locinput - PL_bostr),
2025 PL_colors[4], pref0_len,
d6a28714 2026 locinput - pref_len, PL_colors[5],
9041c2e3 2027 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2028 locinput - pref_len + pref0_len, PL_colors[3],
2029 (docolor ? "" : "> <"),
2030 PL_colors[0], l, locinput, PL_colors[1],
2031 15 - l - pref_len + 1,
2032 "",
b900a521 2033 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2034 SvPVX(prop));
2035 } );
2036
2037 next = scan + NEXT_OFF(scan);
2038 if (next == scan)
2039 next = NULL;
2040
2041 switch (OP(scan)) {
2042 case BOL:
12d33761
HS
2043 if (locinput == PL_bostr || (PL_multiline &&
2044 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2045 {
2046 /* regtill = regbol; */
b8c5462f
JH
2047 break;
2048 }
d6a28714
JH
2049 sayNO;
2050 case MBOL:
12d33761
HS
2051 if (locinput == PL_bostr ||
2052 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2053 {
b8c5462f
JH
2054 break;
2055 }
d6a28714
JH
2056 sayNO;
2057 case SBOL:
c2a73568 2058 if (locinput == PL_bostr)
b8c5462f 2059 break;
d6a28714
JH
2060 sayNO;
2061 case GPOS:
2062 if (locinput == PL_reg_ganch)
2063 break;
2064 sayNO;
2065 case EOL:
2066 if (PL_multiline)
2067 goto meol;
2068 else
2069 goto seol;
2070 case MEOL:
2071 meol:
2072 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2073 sayNO;
b8c5462f 2074 break;
d6a28714
JH
2075 case SEOL:
2076 seol:
2077 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2078 sayNO;
d6a28714 2079 if (PL_regeol - locinput > 1)
b8c5462f 2080 sayNO;
b8c5462f 2081 break;
d6a28714
JH
2082 case EOS:
2083 if (PL_regeol != locinput)
b8c5462f 2084 sayNO;
d6a28714 2085 break;
ffc61ed2 2086 case SANY:
d6a28714 2087 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2088 sayNO;
b8c5462f 2089 nextchr = UCHARAT(++locinput);
a0d0e21e 2090 break;
ffc61ed2 2091 case REG_ANY:
1aa99e6b
IH
2092 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2093 sayNO;
2094 if (do_utf8) {
b8c5462f 2095 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2096 if (locinput > PL_regeol)
2097 sayNO;
a0ed51b3 2098 nextchr = UCHARAT(locinput);
a0ed51b3 2099 }
1aa99e6b
IH
2100 else
2101 nextchr = UCHARAT(++locinput);
a0ed51b3 2102 break;
d6a28714 2103 case EXACT:
cd439c50
IZ
2104 s = STRING(scan);
2105 ln = STR_LEN(scan);
1aa99e6b
IH
2106 if (do_utf8 != (UTF!=0)) {
2107 char *l = locinput;
2108 char *e = s + ln;
2109 STRLEN len;
2110 if (do_utf8)
2111 while (s < e) {
2112 if (l >= PL_regeol)
2113 sayNO;
9041c2e3 2114 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2115 sayNO;
2116 s++;
2117 l += len;
2118 }
2119 else
2120 while (s < e) {
2121 if (l >= PL_regeol)
2122 sayNO;
9041c2e3 2123 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2124 sayNO;
2125 s += len;
2126 l++;
2127 }
2128 locinput = l;
2129 nextchr = UCHARAT(locinput);
2130 break;
2131 }
d6a28714
JH
2132 /* Inline the first character, for speed. */
2133 if (UCHARAT(s) != nextchr)
2134 sayNO;
2135 if (PL_regeol - locinput < ln)
2136 sayNO;
2137 if (ln > 1 && memNE(s, locinput, ln))
2138 sayNO;
2139 locinput += ln;
2140 nextchr = UCHARAT(locinput);
2141 break;
2142 case EXACTFL:
b8c5462f
JH
2143 PL_reg_flags |= RF_tainted;
2144 /* FALL THROUGH */
d6a28714 2145 case EXACTF:
cd439c50
IZ
2146 s = STRING(scan);
2147 ln = STR_LEN(scan);
d6a28714 2148
1aa99e6b 2149 if (do_utf8) {
d6a28714 2150 char *l = locinput;
1aa99e6b
IH
2151 char *e;
2152 e = s + ln;
d6a28714
JH
2153 c1 = OP(scan) == EXACTF;
2154 while (s < e) {
1aa99e6b 2155 if (l >= PL_regeol) {
d6a28714
JH
2156 sayNO;
2157 }
2b9d42f0 2158 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2159 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2160 sayNO;
2161 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2162 l += UTF8SKIP(l);
b8c5462f 2163 }
d6a28714 2164 locinput = l;
a0ed51b3
LW
2165 nextchr = UCHARAT(locinput);
2166 break;
2167 }
d6a28714
JH
2168
2169 /* Inline the first character, for speed. */
2170 if (UCHARAT(s) != nextchr &&
2171 UCHARAT(s) != ((OP(scan) == EXACTF)
2172 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2173 sayNO;
d6a28714 2174 if (PL_regeol - locinput < ln)
b8c5462f 2175 sayNO;
d6a28714
JH
2176 if (ln > 1 && (OP(scan) == EXACTF
2177 ? ibcmp(s, locinput, ln)
2178 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2179 sayNO;
d6a28714
JH
2180 locinput += ln;
2181 nextchr = UCHARAT(locinput);
a0d0e21e 2182 break;
d6a28714 2183 case ANYOF:
ffc61ed2
JH
2184 if (do_utf8) {
2185 if (!reginclass(scan, (U8*)locinput, do_utf8))
2186 sayNO;
2187 if (locinput >= PL_regeol)
2188 sayNO;
2189 locinput += PL_utf8skip[nextchr];
b8c5462f 2190 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2191 }
2192 else {
2193 if (nextchr < 0)
2194 nextchr = UCHARAT(locinput);
2195 if (!reginclass(scan, (U8*)locinput, do_utf8))
2196 sayNO;
2197 if (!nextchr && locinput >= PL_regeol)
2198 sayNO;
2199 nextchr = UCHARAT(++locinput);
2200 }
b8c5462f 2201 break;
d6a28714 2202 case ALNUML:
b8c5462f
JH
2203 PL_reg_flags |= RF_tainted;
2204 /* FALL THROUGH */
d6a28714 2205 case ALNUM:
b8c5462f 2206 if (!nextchr)
4633a7c4 2207 sayNO;
ffc61ed2
JH
2208 if (do_utf8) {
2209 if (!(OP(scan) == ALNUM
3568d838 2210 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2211 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2212 {
2213 sayNO;
a0ed51b3 2214 }
b8c5462f 2215 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2216 nextchr = UCHARAT(locinput);
2217 break;
2218 }
ffc61ed2 2219 if (!(OP(scan) == ALNUM
d6a28714 2220 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2221 sayNO;
b8c5462f 2222 nextchr = UCHARAT(++locinput);
a0d0e21e 2223 break;
d6a28714 2224 case NALNUML:
b8c5462f
JH
2225 PL_reg_flags |= RF_tainted;
2226 /* FALL THROUGH */
d6a28714
JH
2227 case NALNUM:
2228 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2229 sayNO;
ffc61ed2 2230 if (do_utf8) {
8269fa76 2231 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2232 if (OP(scan) == NALNUM
3568d838 2233 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2234 : isALNUM_LC_utf8((U8*)locinput))
2235 {
b8c5462f 2236 sayNO;
d6a28714 2237 }
b8c5462f
JH
2238 locinput += PL_utf8skip[nextchr];
2239 nextchr = UCHARAT(locinput);
2240 break;
2241 }
ffc61ed2 2242 if (OP(scan) == NALNUM
d6a28714 2243 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2244 sayNO;
76e3520e 2245 nextchr = UCHARAT(++locinput);
a0d0e21e 2246 break;
d6a28714
JH
2247 case BOUNDL:
2248 case NBOUNDL:
3280af22 2249 PL_reg_flags |= RF_tainted;
bbce6d69 2250 /* FALL THROUGH */
d6a28714
JH
2251 case BOUND:
2252 case NBOUND:
2253 /* was last char in word? */
ffc61ed2 2254 if (do_utf8) {
12d33761
HS
2255 if (locinput == PL_bostr)
2256 ln = '\n';
ffc61ed2
JH
2257 else {
2258 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2259
2b9d42f0 2260 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2261 }
2262 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2263 ln = isALNUM_uni(ln);
8269fa76 2264 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2265 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2266 }
2267 else {
9041c2e3 2268 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2269 n = isALNUM_LC_utf8((U8*)locinput);
2270 }
a0ed51b3 2271 }
d6a28714 2272 else {
12d33761
HS
2273 ln = (locinput != PL_bostr) ?
2274 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2275 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2276 ln = isALNUM(ln);
2277 n = isALNUM(nextchr);
2278 }
2279 else {
2280 ln = isALNUM_LC(ln);
2281 n = isALNUM_LC(nextchr);
2282 }
d6a28714 2283 }
ffc61ed2
JH
2284 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2285 OP(scan) == BOUNDL))
2286 sayNO;
a0ed51b3 2287 break;
d6a28714 2288 case SPACEL:
3280af22 2289 PL_reg_flags |= RF_tainted;
bbce6d69 2290 /* FALL THROUGH */
d6a28714 2291 case SPACE:
9442cb0e 2292 if (!nextchr)
4633a7c4 2293 sayNO;
1aa99e6b 2294 if (do_utf8) {
fd400ab9 2295 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2296 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2297 if (!(OP(scan) == SPACE
3568d838 2298 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2299 : isSPACE_LC_utf8((U8*)locinput)))
2300 {
2301 sayNO;
2302 }
2303 locinput += PL_utf8skip[nextchr];
2304 nextchr = UCHARAT(locinput);
2305 break;
d6a28714 2306 }
ffc61ed2
JH
2307 if (!(OP(scan) == SPACE
2308 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2309 sayNO;
2310 nextchr = UCHARAT(++locinput);
2311 }
2312 else {
2313 if (!(OP(scan) == SPACE
2314 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2315 sayNO;
2316 nextchr = UCHARAT(++locinput);
a0ed51b3 2317 }
a0ed51b3 2318 break;
d6a28714 2319 case NSPACEL:
3280af22 2320 PL_reg_flags |= RF_tainted;
bbce6d69 2321 /* FALL THROUGH */
d6a28714 2322 case NSPACE:
9442cb0e 2323 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2324 sayNO;
1aa99e6b 2325 if (do_utf8) {
8269fa76 2326 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2327 if (OP(scan) == NSPACE
3568d838 2328 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2329 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2330 {
2331 sayNO;
2332 }
2333 locinput += PL_utf8skip[nextchr];
2334 nextchr = UCHARAT(locinput);
2335 break;
a0ed51b3 2336 }
ffc61ed2 2337 if (OP(scan) == NSPACE
d6a28714 2338 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2339 sayNO;
76e3520e 2340 nextchr = UCHARAT(++locinput);
a0d0e21e 2341 break;
d6a28714 2342 case DIGITL:
a0ed51b3
LW
2343 PL_reg_flags |= RF_tainted;
2344 /* FALL THROUGH */
d6a28714 2345 case DIGIT:
9442cb0e 2346 if (!nextchr)
a0ed51b3 2347 sayNO;
1aa99e6b 2348 if (do_utf8) {
8269fa76 2349 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2350 if (!(OP(scan) == DIGIT
3568d838 2351 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2352 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2353 {
a0ed51b3 2354 sayNO;
dfe13c55 2355 }
6f06b55f 2356 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2357 nextchr = UCHARAT(locinput);
2358 break;
2359 }
ffc61ed2 2360 if (!(OP(scan) == DIGIT
9442cb0e 2361 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2362 sayNO;
2363 nextchr = UCHARAT(++locinput);
2364 break;
d6a28714 2365 case NDIGITL:
b8c5462f
JH
2366 PL_reg_flags |= RF_tainted;
2367 /* FALL THROUGH */
d6a28714 2368 case NDIGIT:
9442cb0e 2369 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2370 sayNO;
1aa99e6b 2371 if (do_utf8) {
8269fa76 2372 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2373 if (OP(scan) == NDIGIT
3568d838 2374 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2375 : isDIGIT_LC_utf8((U8*)locinput))
2376 {
a0ed51b3 2377 sayNO;
9442cb0e 2378 }
6f06b55f 2379 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2380 nextchr = UCHARAT(locinput);
2381 break;
2382 }
ffc61ed2 2383 if (OP(scan) == NDIGIT
9442cb0e 2384 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2385 sayNO;
2386 nextchr = UCHARAT(++locinput);
2387 break;
2388 case CLUMP:
8269fa76 2389 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2390 if (locinput >= PL_regeol ||
2391 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2392 sayNO;
6f06b55f 2393 locinput += PL_utf8skip[nextchr];
3568d838
JH
2394 while (locinput < PL_regeol &&
2395 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2396 locinput += UTF8SKIP(locinput);
2397 if (locinput > PL_regeol)
2398 sayNO;
2399 nextchr = UCHARAT(locinput);
2400 break;
c8756f30 2401 case REFFL:
3280af22 2402 PL_reg_flags |= RF_tainted;
c8756f30 2403 /* FALL THROUGH */
c277df42 2404 case REF:
c8756f30 2405 case REFF:
c277df42 2406 n = ARG(scan); /* which paren pair */
cf93c79d 2407 ln = PL_regstartp[n];
2c2d71f5 2408 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2409 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2410 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2411 if (ln == PL_regendp[n])
a0d0e21e 2412 break;
a0ed51b3 2413
cf93c79d 2414 s = PL_bostr + ln;
1aa99e6b 2415 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2416 char *l = locinput;
cf93c79d 2417 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2418 /*
2419 * Note that we can't do the "other character" lookup trick as
2420 * in the 8-bit case (no pun intended) because in Unicode we
2421 * have to map both upper and title case to lower case.
2422 */
2423 if (OP(scan) == REFF) {
2424 while (s < e) {
2425 if (l >= PL_regeol)
2426 sayNO;
dfe13c55 2427 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2428 sayNO;
2429 s += UTF8SKIP(s);
2430 l += UTF8SKIP(l);
2431 }
2432 }
2433 else {
2434 while (s < e) {
2435 if (l >= PL_regeol)
2436 sayNO;
dfe13c55 2437 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2438 sayNO;
2439 s += UTF8SKIP(s);
2440 l += UTF8SKIP(l);
2441 }
2442 }
2443 locinput = l;
2444 nextchr = UCHARAT(locinput);
2445 break;
2446 }
2447
a0d0e21e 2448 /* Inline the first character, for speed. */
76e3520e 2449 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2450 (OP(scan) == REF ||
2451 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2452 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2453 sayNO;
cf93c79d 2454 ln = PL_regendp[n] - ln;
3280af22 2455 if (locinput + ln > PL_regeol)
4633a7c4 2456 sayNO;
c8756f30
AK
2457 if (ln > 1 && (OP(scan) == REF
2458 ? memNE(s, locinput, ln)
2459 : (OP(scan) == REFF
2460 ? ibcmp(s, locinput, ln)
2461 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2462 sayNO;
a0d0e21e 2463 locinput += ln;
76e3520e 2464 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2465 break;
2466
2467 case NOTHING:
c277df42 2468 case TAIL:
a0d0e21e
LW
2469 break;
2470 case BACK:
2471 break;
c277df42
IZ
2472 case EVAL:
2473 {
2474 dSP;
533c011a 2475 OP_4tree *oop = PL_op;
3280af22
NIS
2476 COP *ocurcop = PL_curcop;
2477 SV **ocurpad = PL_curpad;
c277df42 2478 SV *ret;
9041c2e3 2479
c277df42 2480 n = ARG(scan);
533c011a 2481 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2482 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2483 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2484 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2485
cea2e8a9 2486 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2487 SPAGAIN;
2488 ret = POPs;
2489 PUTBACK;
9041c2e3 2490
0f5d15d6
IZ
2491 PL_op = oop;
2492 PL_curpad = ocurpad;
2493 PL_curcop = ocurcop;
c277df42 2494 if (logical) {
0f5d15d6
IZ
2495 if (logical == 2) { /* Postponed subexpression. */
2496 regexp *re;
22c35a8c 2497 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2498 re_cc_state state;
0f5d15d6
IZ
2499 CHECKPOINT cp, lastcp;
2500
2501 if(SvROK(ret) || SvRMAGICAL(ret)) {
2502 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2503
2504 if(SvMAGICAL(sv))
2505 mg = mg_find(sv, 'r');
2506 }
2507 if (mg) {
2508 re = (regexp *)mg->mg_obj;
df0003d4 2509 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2510 }
2511 else {
2512 STRLEN len;
2513 char *t = SvPV(ret, len);
2514 PMOP pm;
2515 char *oprecomp = PL_regprecomp;
2516 I32 osize = PL_regsize;
2517 I32 onpar = PL_regnpar;
2518
5fcd1c1b 2519 Zero(&pm, 1, PMOP);
cea2e8a9 2520 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2521 if (!(SvFLAGS(ret)
0f5d15d6
IZ
2522 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2523 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2524 PL_regprecomp = oprecomp;
2525 PL_regsize = osize;
2526 PL_regnpar = onpar;
2527 }
2528 DEBUG_r(
9041c2e3 2529 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2530 "Entering embedded `%s%.60s%s%s'\n",
2531 PL_colors[0],
2532 re->precomp,
2533 PL_colors[1],
2534 (strlen(re->precomp) > 60 ? "..." : ""))
2535 );
2536 state.node = next;
2537 state.prev = PL_reg_call_cc;
2538 state.cc = PL_regcc;
2539 state.re = PL_reg_re;
2540
2ab05381 2541 PL_regcc = 0;
9041c2e3 2542
0f5d15d6 2543 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2544 REGCP_SET(lastcp);
0f5d15d6
IZ
2545 cache_re(re);
2546 state.ss = PL_savestack_ix;
2547 *PL_reglastparen = 0;
2548 PL_reg_call_cc = &state;
2549 PL_reginput = locinput;
2c2d71f5
JH
2550
2551 /* XXXX This is too dramatic a measure... */
2552 PL_reg_maxiter = 0;
2553
0f5d15d6 2554 if (regmatch(re->program + 1)) {
2c914db6
IZ
2555 /* Even though we succeeded, we need to restore
2556 global variables, since we may be wrapped inside
2557 SUSPEND, thus the match may be not finished yet. */
2558
2559 /* XXXX Do this only if SUSPENDed? */
2560 PL_reg_call_cc = state.prev;
2561 PL_regcc = state.cc;
2562 PL_reg_re = state.re;
2563 cache_re(PL_reg_re);
2564
2565 /* XXXX This is too dramatic a measure... */
2566 PL_reg_maxiter = 0;
2567
2568 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2569 ReREFCNT_dec(re);
2570 regcpblow(cp);
2571 sayYES;
2572 }
0f5d15d6 2573 ReREFCNT_dec(re);
02db2b7b 2574 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2575 regcppop();
2576 PL_reg_call_cc = state.prev;
2577 PL_regcc = state.cc;
2578 PL_reg_re = state.re;
d3790889 2579 cache_re(PL_reg_re);
2c2d71f5
JH
2580
2581 /* XXXX This is too dramatic a measure... */
2582 PL_reg_maxiter = 0;
2583
0f5d15d6
IZ
2584 sayNO;
2585 }
c277df42 2586 sw = SvTRUE(ret);
0f5d15d6 2587 logical = 0;
a0ed51b3
LW
2588 }
2589 else
3280af22 2590 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2591 break;
2592 }
a0d0e21e 2593 case OPEN:
c277df42 2594 n = ARG(scan); /* which paren pair */
3280af22
NIS
2595 PL_reg_start_tmp[n] = locinput;
2596 if (n > PL_regsize)
2597 PL_regsize = n;
a0d0e21e
LW
2598 break;
2599 case CLOSE:
c277df42 2600 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2601 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2602 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2603 if (n > *PL_reglastparen)
2604 *PL_reglastparen = n;
a0d0e21e 2605 break;
c277df42
IZ
2606 case GROUPP:
2607 n = ARG(scan); /* which paren pair */
cf93c79d 2608 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2609 break;
2610 case IFTHEN:
2c2d71f5 2611 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2612 if (sw)
2613 next = NEXTOPER(NEXTOPER(scan));
2614 else {
2615 next = scan + ARG(scan);
2616 if (OP(next) == IFTHEN) /* Fake one. */
2617 next = NEXTOPER(NEXTOPER(next));
2618 }
2619 break;
2620 case LOGICAL:
0f5d15d6 2621 logical = scan->flags;
c277df42 2622 break;
2ab05381
IZ
2623/*******************************************************************
2624 PL_regcc contains infoblock about the innermost (...)* loop, and
2625 a pointer to the next outer infoblock.
2626
2627 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2628
2629 1) After matching X, regnode for CURLYX is processed;
2630
9041c2e3 2631 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2632 regmatch() recursively with the starting point at WHILEM node;
2633
2634 3) Each hit of WHILEM node tries to match A and Z (in the order
2635 depending on the current iteration, min/max of {min,max} and
2636 greediness). The information about where are nodes for "A"
2637 and "Z" is read from the infoblock, as is info on how many times "A"
2638 was already matched, and greediness.
2639
2640 4) After A matches, the same WHILEM node is hit again.
2641
2642 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2643 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2644 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2645 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2646 of the external loop.
2647
2648 Currently present infoblocks form a tree with a stem formed by PL_curcc
2649 and whatever it mentions via ->next, and additional attached trees
2650 corresponding to temporarily unset infoblocks as in "5" above.
2651
9041c2e3 2652 In the following picture infoblocks for outer loop of
2ab05381
IZ
2653 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2654 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2655 infoblocks are drawn below the "reset" infoblock.
2656
2657 In fact in the picture below we do not show failed matches for Z and T
2658 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2659 more obvious *why* one needs to *temporary* unset infoblocks.]
2660
2661 Matched REx position InfoBlocks Comment
2662 (Y(A)*?Z)*?T x
2663 Y(A)*?Z)*?T x <- O
2664 Y (A)*?Z)*?T x <- O
2665 Y A)*?Z)*?T x <- O <- I
2666 YA )*?Z)*?T x <- O <- I
2667 YA A)*?Z)*?T x <- O <- I
2668 YAA )*?Z)*?T x <- O <- I
2669 YAA Z)*?T x <- O # Temporary unset I
2670 I
2671
2672 YAAZ Y(A)*?Z)*?T x <- O
2673 I
2674
2675 YAAZY (A)*?Z)*?T x <- O
2676 I
2677
2678 YAAZY A)*?Z)*?T x <- O <- I
2679 I
2680
2681 YAAZYA )*?Z)*?T x <- O <- I
2682 I
2683
2684 YAAZYA Z)*?T x <- O # Temporary unset I
2685 I,I
2686
2687 YAAZYAZ )*?T x <- O
2688 I,I
2689
2690 YAAZYAZ T x # Temporary unset O
2691 O
2692 I,I
2693
2694 YAAZYAZT x
2695 O
2696 I,I
2697 *******************************************************************/
a0d0e21e
LW
2698 case CURLYX: {
2699 CURCUR cc;
3280af22 2700 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2701 /* No need to save/restore up to this paren */
2702 I32 parenfloor = scan->flags;
c277df42
IZ
2703
2704 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2705 next += ARG(next);
3280af22
NIS
2706 cc.oldcc = PL_regcc;
2707 PL_regcc = &cc;
cb434fcc
IZ
2708 /* XXXX Probably it is better to teach regpush to support
2709 parenfloor > PL_regsize... */
2710 if (parenfloor > *PL_reglastparen)
2711 parenfloor = *PL_reglastparen; /* Pessimization... */
2712 cc.parenfloor = parenfloor;
a0d0e21e
LW
2713 cc.cur = -1;
2714 cc.min = ARG1(scan);
2715 cc.max = ARG2(scan);
c277df42 2716 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2717 cc.next = next;
2718 cc.minmod = minmod;
2719 cc.lastloc = 0;
3280af22 2720 PL_reginput = locinput;
a0d0e21e
LW
2721 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2722 regcpblow(cp);
3280af22 2723 PL_regcc = cc.oldcc;
4633a7c4 2724 saySAME(n);
a0d0e21e
LW
2725 }
2726 /* NOT REACHED */
2727 case WHILEM: {
2728 /*
2729 * This is really hard to understand, because after we match
2730 * what we're trying to match, we must make sure the rest of
2c2d71f5 2731 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2732 * to go back UP the parse tree by recursing ever deeper. And
2733 * if it fails, we have to reset our parent's current state
2734 * that we can try again after backing off.
2735 */
2736
c277df42 2737 CHECKPOINT cp, lastcp;
3280af22 2738 CURCUR* cc = PL_regcc;
c277df42
IZ
2739 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2740
4633a7c4 2741 n = cc->cur + 1; /* how many we know we matched */
3280af22 2742 PL_reginput = locinput;
a0d0e21e 2743
c277df42 2744 DEBUG_r(
9041c2e3
NIS
2745 PerlIO_printf(Perl_debug_log,
2746 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2747 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2748 (long)n, (long)cc->min,
c277df42
IZ
2749 (long)cc->max, (long)cc)
2750 );
4633a7c4 2751
a0d0e21e
LW
2752 /* If degenerate scan matches "", assume scan done. */
2753
579cf2c3 2754 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2755 PL_regcc = cc->oldcc;
2ab05381
IZ
2756 if (PL_regcc)
2757 ln = PL_regcc->cur;
c277df42 2758 DEBUG_r(
c3464db5
DD
2759 PerlIO_printf(Perl_debug_log,
2760 "%*s empty match detected, try continuation...\n",
3280af22 2761 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2762 );
a0d0e21e 2763 if (regmatch(cc->next))
4633a7c4 2764 sayYES;
2ab05381
IZ
2765 if (PL_regcc)
2766 PL_regcc->cur = ln;
3280af22 2767 PL_regcc = cc;
4633a7c4 2768 sayNO;
a0d0e21e
LW
2769 }
2770
2771 /* First just match a string of min scans. */
2772
2773 if (n < cc->min) {
2774 cc->cur = n;
2775 cc->lastloc = locinput;
4633a7c4
LW
2776 if (regmatch(cc->scan))
2777 sayYES;
2778 cc->cur = n - 1;
c277df42 2779 cc->lastloc = lastloc;
4633a7c4 2780 sayNO;
a0d0e21e
LW
2781 }
2782
2c2d71f5
JH
2783 if (scan->flags) {
2784 /* Check whether we already were at this position.
2785 Postpone detection until we know the match is not
2786 *that* much linear. */
2787 if (!PL_reg_maxiter) {
2788 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2789 PL_reg_leftiter = PL_reg_maxiter;
2790 }
2791 if (PL_reg_leftiter-- == 0) {
2792 I32 size = (PL_reg_maxiter + 7)/8;
2793 if (PL_reg_poscache) {
2794 if (PL_reg_poscache_size < size) {
2795 Renew(PL_reg_poscache, size, char);
2796 PL_reg_poscache_size = size;
2797 }
2798 Zero(PL_reg_poscache, size, char);
2799 }
2800 else {
2801 PL_reg_poscache_size = size;
2802 Newz(29, PL_reg_poscache, size, char);
2803 }
2804 DEBUG_r(
2805 PerlIO_printf(Perl_debug_log,
2806 "%sDetected a super-linear match, switching on caching%s...\n",
2807 PL_colors[4], PL_colors[5])
2808 );
2809 }
2810 if (PL_reg_leftiter < 0) {
2811 I32 o = locinput - PL_bostr, b;
2812
2813 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2814 b = o % 8;
2815 o /= 8;
2816 if (PL_reg_poscache[o] & (1<<b)) {
2817 DEBUG_r(
2818 PerlIO_printf(Perl_debug_log,
2819 "%*s already tried at this position...\n",
2820 REPORT_CODE_OFF+PL_regindent*2, "")
2821 );
7821416a 2822 sayNO_SILENT;
2c2d71f5
JH
2823 }
2824 PL_reg_poscache[o] |= (1<<b);
2825 }
2826 }
2827
a0d0e21e
LW
2828 /* Prefer next over scan for minimal matching. */
2829
2830 if (cc->minmod) {
3280af22 2831 PL_regcc = cc->oldcc;
2ab05381
IZ
2832 if (PL_regcc)
2833 ln = PL_regcc->cur;
5f05dabc 2834 cp = regcppush(cc->parenfloor);
02db2b7b 2835 REGCP_SET(lastcp);
5f05dabc 2836 if (regmatch(cc->next)) {
c277df42 2837 regcpblow(cp);
4633a7c4 2838 sayYES; /* All done. */
5f05dabc 2839 }
02db2b7b 2840 REGCP_UNWIND(lastcp);
5f05dabc 2841 regcppop();
2ab05381
IZ
2842 if (PL_regcc)
2843 PL_regcc->cur = ln;
3280af22 2844 PL_regcc = cc;
a0d0e21e 2845
c277df42 2846 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2847 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2848 && !(PL_reg_flags & RF_warned)) {
2849 PL_reg_flags |= RF_warned;
e476b1b5 2850 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2851 "Complex regular subexpression recursion",
2852 REG_INFTY - 1);
c277df42 2853 }
4633a7c4 2854 sayNO;
c277df42 2855 }
a687059c 2856
c277df42 2857 DEBUG_r(
c3464db5
DD
2858 PerlIO_printf(Perl_debug_log,
2859 "%*s trying longer...\n",
3280af22 2860 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2861 );
a0d0e21e 2862 /* Try scanning more and see if it helps. */
3280af22 2863 PL_reginput = locinput;
a0d0e21e
LW
2864 cc->cur = n;
2865 cc->lastloc = locinput;
5f05dabc 2866 cp = regcppush(cc->parenfloor);
02db2b7b 2867 REGCP_SET(lastcp);
5f05dabc 2868 if (regmatch(cc->scan)) {
c277df42 2869 regcpblow(cp);
4633a7c4 2870 sayYES;
5f05dabc 2871 }
02db2b7b 2872 REGCP_UNWIND(lastcp);
5f05dabc 2873 regcppop();
4633a7c4 2874 cc->cur = n - 1;
c277df42 2875 cc->lastloc = lastloc;
4633a7c4 2876 sayNO;
a0d0e21e
LW
2877 }
2878
2879 /* Prefer scan over next for maximal matching. */
2880
2881 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2882 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2883 cc->cur = n;
2884 cc->lastloc = locinput;
02db2b7b 2885 REGCP_SET(lastcp);
5f05dabc 2886 if (regmatch(cc->scan)) {
c277df42 2887 regcpblow(cp);
4633a7c4 2888 sayYES;
5f05dabc 2889 }
02db2b7b 2890 REGCP_UNWIND(lastcp);
a0d0e21e 2891 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2892 PL_reginput = locinput;
c277df42 2893 DEBUG_r(
c3464db5
DD
2894 PerlIO_printf(Perl_debug_log,
2895 "%*s failed, try continuation...\n",
3280af22 2896 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2897 );
2898 }
9041c2e3 2899 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2900 && !(PL_reg_flags & RF_warned)) {
3280af22 2901 PL_reg_flags |= RF_warned;
e476b1b5 2902 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2903 "Complex regular subexpression recursion",
2904 REG_INFTY - 1);
a0d0e21e
LW
2905 }
2906
2907 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2908 PL_regcc = cc->oldcc;
2ab05381
IZ
2909 if (PL_regcc)
2910 ln = PL_regcc->cur;
a0d0e21e 2911 if (regmatch(cc->next))
4633a7c4 2912 sayYES;
2ab05381
IZ
2913 if (PL_regcc)
2914 PL_regcc->cur = ln;
3280af22 2915 PL_regcc = cc;
4633a7c4 2916 cc->cur = n - 1;
c277df42 2917 cc->lastloc = lastloc;
4633a7c4 2918 sayNO;
a0d0e21e
LW
2919 }
2920 /* NOT REACHED */
9041c2e3 2921 case BRANCHJ:
c277df42
IZ
2922 next = scan + ARG(scan);
2923 if (next == scan)
2924 next = NULL;
2925 inner = NEXTOPER(NEXTOPER(scan));
2926 goto do_branch;
9041c2e3 2927 case BRANCH:
c277df42
IZ
2928 inner = NEXTOPER(scan);
2929 do_branch:
2930 {
2931 CHECKPOINT lastcp;
2932 c1 = OP(scan);
2933 if (OP(next) != c1) /* No choice. */
2934 next = inner; /* Avoid recursion. */
a0d0e21e 2935 else {
02db2b7b
IZ
2936 I32 lastparen = *PL_reglastparen;
2937 I32 unwind1;
2938 re_unwind_branch_t *uw;
2939
2940 /* Put unwinding data on stack */
2941 unwind1 = SSNEWt(1,re_unwind_branch_t);
2942 uw = SSPTRt(unwind1,re_unwind_branch_t);
2943 uw->prev = unwind;
2944 unwind = unwind1;
2945 uw->type = ((c1 == BRANCH)
2946 ? RE_UNWIND_BRANCH
2947 : RE_UNWIND_BRANCHJ);
2948 uw->lastparen = lastparen;
2949 uw->next = next;
2950 uw->locinput = locinput;
2951 uw->nextchr = nextchr;
2952#ifdef DEBUGGING
2953 uw->regindent = ++PL_regindent;
2954#endif
c277df42 2955
02db2b7b
IZ
2956 REGCP_SET(uw->lastcp);
2957
2958 /* Now go into the first branch */
2959 next = inner;
a687059c 2960 }
a0d0e21e
LW
2961 }
2962 break;
2963 case MINMOD:
2964 minmod = 1;
2965 break;
c277df42
IZ
2966 case CURLYM:
2967 {
00db4c45 2968 I32 l = 0;
c277df42 2969 CHECKPOINT lastcp;
9041c2e3 2970
c277df42
IZ
2971 /* We suppose that the next guy does not need
2972 backtracking: in particular, it is of constant length,
2973 and has no parenths to influence future backrefs. */
2974 ln = ARG1(scan); /* min to match */
2975 n = ARG2(scan); /* max to match */
c277df42
IZ
2976 paren = scan->flags;
2977 if (paren) {
3280af22
NIS
2978 if (paren > PL_regsize)
2979 PL_regsize = paren;
2980 if (paren > *PL_reglastparen)
2981 *PL_reglastparen = paren;
c277df42 2982 }
dc45a647 2983 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2984 if (paren)
2985 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2986 PL_reginput = locinput;
c277df42
IZ
2987 if (minmod) {
2988 minmod = 0;
2989 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2990 sayNO;
5f4b28b2 2991 if (ln && l == 0 && n >= ln
c277df42
IZ
2992 /* In fact, this is tricky. If paren, then the
2993 fact that we did/didnot match may influence
2994 future execution. */
2995 && !(paren && ln == 0))
2996 ln = n;
3280af22 2997 locinput = PL_reginput;
22c35a8c 2998 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2999 c1 = (U8)*STRING(next);
c277df42 3000 if (OP(next) == EXACTF)
22c35a8c 3001 c2 = PL_fold[c1];
c277df42 3002 else if (OP(next) == EXACTFL)
22c35a8c 3003 c2 = PL_fold_locale[c1];
c277df42
IZ
3004 else
3005 c2 = c1;
a0ed51b3
LW
3006 }
3007 else
c277df42 3008 c1 = c2 = -1000;
02db2b7b 3009 REGCP_SET(lastcp);
5f4b28b2 3010 /* This may be improved if l == 0. */
c277df42
IZ
3011 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3012 /* If it could work, try it. */
3013 if (c1 == -1000 ||
3280af22
NIS
3014 UCHARAT(PL_reginput) == c1 ||
3015 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3016 {
3017 if (paren) {
3018 if (n) {
cf93c79d
IZ
3019 PL_regstartp[paren] =
3020 HOPc(PL_reginput, -l) - PL_bostr;
3021 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3022 }
3023 else
cf93c79d 3024 PL_regendp[paren] = -1;
c277df42
IZ
3025 }
3026 if (regmatch(next))
3027 sayYES;
02db2b7b 3028 REGCP_UNWIND(lastcp);
c277df42
IZ
3029 }
3030 /* Couldn't or didn't -- move forward. */
3280af22 3031 PL_reginput = locinput;
c277df42
IZ
3032 if (regrepeat_hard(scan, 1, &l)) {
3033 ln++;
3280af22 3034 locinput = PL_reginput;
c277df42
IZ
3035 }
3036 else
3037 sayNO;
3038 }
a0ed51b3
LW
3039 }
3040 else {
c277df42
IZ
3041 n = regrepeat_hard(scan, n, &l);
3042 if (n != 0 && l == 0
3043 /* In fact, this is tricky. If paren, then the
3044 fact that we did/didnot match may influence
3045 future execution. */
3046 && !(paren && ln == 0))
3047 ln = n;
3280af22 3048 locinput = PL_reginput;
c277df42 3049 DEBUG_r(
5c0ca799 3050 PerlIO_printf(Perl_debug_log,
faccc32b 3051 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3052 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3053 (IV) n, (IV)l)
c277df42
IZ
3054 );
3055 if (n >= ln) {
22c35a8c 3056 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3057 c1 = (U8)*STRING(next);
c277df42 3058 if (OP(next) == EXACTF)
22c35a8c 3059 c2 = PL_fold[c1];
c277df42 3060 else if (OP(next) == EXACTFL)
22c35a8c 3061 c2 = PL_fold_locale[c1];
c277df42
IZ
3062 else
3063 c2 = c1;
a0ed51b3
LW
3064 }
3065 else
c277df42
IZ
3066 c1 = c2 = -1000;
3067 }
02db2b7b 3068 REGCP_SET(lastcp);
c277df42
IZ
3069 while (n >= ln) {
3070 /* If it could work, try it. */
3071 if (c1 == -1000 ||
3280af22
NIS
3072 UCHARAT(PL_reginput) == c1 ||
3073 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3074 {
3075 DEBUG_r(
c3464db5 3076 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3077 "%*s trying tail with n=%"IVdf"...\n",
3078 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3079 );
3080 if (paren) {
3081 if (n) {
cf93c79d
IZ
3082 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3083 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3084 }
a0ed51b3 3085 else
cf93c79d 3086 PL_regendp[paren] = -1;
c277df42 3087 }
a0ed51b3
LW
3088 if (regmatch(next))
3089 sayYES;
02db2b7b 3090 REGCP_UNWIND(lastcp);
a0ed51b3 3091 }
c277df42
IZ
3092 /* Couldn't or didn't -- back up. */
3093 n--;
dfe13c55 3094 locinput = HOPc(locinput, -l);
3280af22 3095 PL_reginput = locinput;
c277df42
IZ
3096 }
3097 }
3098 sayNO;
3099 break;
3100 }
3101 case CURLYN:
3102 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3103 if (paren > PL_regsize)
3104 PL_regsize = paren;
3105 if (paren > *PL_reglastparen)
3106 *PL_reglastparen = paren;
c277df42
IZ
3107 ln = ARG1(scan); /* min to match */
3108 n = ARG2(scan); /* max to match */
dc45a647 3109 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3110 goto repeat;
a0d0e21e 3111 case CURLY:
c277df42 3112 paren = 0;
a0d0e21e
LW
3113 ln = ARG1(scan); /* min to match */
3114 n = ARG2(scan); /* max to match */
dc45a647 3115 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3116 goto repeat;
3117 case STAR:
3118 ln = 0;
c277df42 3119 n = REG_INFTY;
a0d0e21e 3120 scan = NEXTOPER(scan);
c277df42 3121 paren = 0;
a0d0e21e
LW
3122 goto repeat;
3123 case PLUS:
c277df42
IZ
3124 ln = 1;
3125 n = REG_INFTY;
3126 scan = NEXTOPER(scan);
3127 paren = 0;
3128 repeat:
a0d0e21e
LW
3129 /*
3130 * Lookahead to avoid useless match attempts
3131 * when we know what character comes next.
3132 */
22c35a8c 3133 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3134 U8 *s = (U8*)STRING(next);
3135 if (!UTF) {
3136 c2 = c1 = *s;
3137 if (OP(next) == EXACTF)
3138 c2 = PL_fold[c1];
3139 else if (OP(next) == EXACTFL)
3140 c2 = PL_fold_locale[c1];
3141 }
3142 else { /* UTF */
3143 if (OP(next) == EXACTF) {
3144 c1 = to_utf8_lower(s);
3145 c2 = to_utf8_upper(s);
3146 }
3147 else {
9041c2e3 3148 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3149 }
3150 }
bbce6d69 3151 }
a0d0e21e 3152 else
bbce6d69 3153 c1 = c2 = -1000;
3280af22 3154 PL_reginput = locinput;
a0d0e21e 3155 if (minmod) {
c277df42 3156 CHECKPOINT lastcp;
a0d0e21e
LW
3157 minmod = 0;
3158 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3159 sayNO;
a0ed51b3 3160 locinput = PL_reginput;
02db2b7b 3161 REGCP_SET(lastcp);
0fe9bf95 3162 if (c1 != -1000) {
1aa99e6b 3163 char *e; /* Should not check after this */
0fe9bf95
IZ
3164 char *old = locinput;
3165
1aa99e6b 3166 if (n == REG_INFTY) {
0fe9bf95 3167 e = PL_regeol - 1;
1aa99e6b
IH
3168 if (do_utf8)
3169 while (UTF8_IS_CONTINUATION(*(U8*)e))
3170 e--;
3171 }
3172 else if (do_utf8) {
3173 int m = n - ln;
3174 for (e = locinput;
3175 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3176 e += UTF8SKIP(e);
3177 }
3178 else {
3179 e = locinput + n - ln;
3180 if (e >= PL_regeol)
3181 e = PL_regeol - 1;
3182 }
0fe9bf95 3183 while (1) {
1aa99e6b 3184 int count;
0fe9bf95 3185 /* Find place 'next' could work */
1aa99e6b
IH
3186 if (!do_utf8) {
3187 if (c1 == c2) {
3188 while (locinput <= e && *locinput != c1)
3189 locinput++;
3190 } else {
9041c2e3 3191 while (locinput <= e
1aa99e6b
IH
3192 && *locinput != c1
3193 && *locinput != c2)
3194 locinput++;
3195 }
3196 count = locinput - old;
3197 }
3198 else {
3199 STRLEN len;
3200 if (c1 == c2) {
3201 for (count = 0;
3202 locinput <= e &&
9041c2e3 3203 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3204 count++)
3205 locinput += len;
3206
3207 } else {
3208 for (count = 0; locinput <= e; count++) {
9041c2e3 3209 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3210 if (c == c1 || c == c2)
3211 break;
9041c2e3 3212 locinput += len;
1aa99e6b
IH
3213 }
3214 }
0fe9bf95 3215 }
9041c2e3 3216 if (locinput > e)
0fe9bf95
IZ
3217 sayNO;
3218 /* PL_reginput == old now */
3219 if (locinput != old) {
3220 ln = 1; /* Did some */
1aa99e6b 3221 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3222 sayNO;
3223 }
3224 /* PL_reginput == locinput now */
29d1e993 3225 TRYPAREN(paren, ln, locinput);
0fe9bf95 3226 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3227 REGCP_UNWIND(lastcp);
0fe9bf95 3228 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3229 old = locinput;
3230 if (do_utf8)
3231 locinput += UTF8SKIP(locinput);
3232 else
3233 locinput++;
0fe9bf95
IZ
3234 }
3235 }
3236 else
c277df42 3237 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3238 UV c;
3239 if (c1 != -1000) {
3240 if (do_utf8)
9041c2e3 3241 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3242 else
9041c2e3 3243 c = UCHARAT(PL_reginput);
2390ecbc
PP
3244 /* If it could work, try it. */
3245 if (c == c1 || c == c2)
3246 {
3247 TRYPAREN(paren, n, PL_reginput);
3248 REGCP_UNWIND(lastcp);
3249 }
1aa99e6b 3250 }
a0d0e21e 3251 /* If it could work, try it. */
2390ecbc 3252 else if (c1 == -1000)
bbce6d69 3253 {
29d1e993 3254 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3255 REGCP_UNWIND(lastcp);
bbce6d69 3256 }
c277df42 3257 /* Couldn't or didn't -- move forward. */
a0ed51b3 3258 PL_reginput = locinput;
a0d0e21e
LW
3259 if (regrepeat(scan, 1)) {
3260 ln++;
a0ed51b3
LW
3261 locinput = PL_reginput;
3262 }
3263 else
4633a7c4 3264 sayNO;
a0d0e21e
LW
3265 }
3266 }
3267 else {
c277df42 3268 CHECKPOINT lastcp;
a0d0e21e 3269 n = regrepeat(scan, n);
a0ed51b3 3270 locinput = PL_reginput;
22c35a8c 3271 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3272 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3273 ln = n; /* why back off? */
1aeab75a
GS
3274 /* ...because $ and \Z can match before *and* after
3275 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3276 We should back off by one in this case. */
3277 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3278 ln--;
3279 }
02db2b7b 3280 REGCP_SET(lastcp);
c277df42 3281 if (paren) {
1aa99e6b 3282 UV c;
c277df42 3283 while (n >= ln) {
1aa99e6b
IH
3284 if (c1 != -1000) {
3285 if (do_utf8)
9041c2e3 3286 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3287 else
9041c2e3 3288 c = UCHARAT(PL_reginput);
1aa99e6b 3289 }
c277df42 3290 /* If it could work, try it. */
1aa99e6b 3291 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3292 {
29d1e993 3293 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3294 REGCP_UNWIND(lastcp);
c277df42
IZ
3295 }
3296 /* Couldn't or didn't -- back up. */
3297 n--;
dfe13c55 3298 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3299 }
a0ed51b3
LW
3300 }
3301 else {
1aa99e6b 3302 UV c;
c277df42 3303 while (n >= ln) {
1aa99e6b
IH
3304 if (c1 != -1000) {
3305 if (do_utf8)
9041c2e3 3306 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3307 else
9041c2e3 3308 c = UCHARAT(PL_reginput);
1aa99e6b 3309 }
c277df42 3310 /* If it could work, try it. */
1aa99e6b 3311 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3312 {
29d1e993 3313 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3314 REGCP_UNWIND(lastcp);
c277df42
IZ
3315 }
3316 /* Couldn't or didn't -- back up. */
3317 n--;
dfe13c55 3318 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3319 }
a0d0e21e
LW
3320 }
3321 }
4633a7c4 3322 sayNO;
c277df42 3323 break;
a0d0e21e 3324 case END:
0f5d15d6
IZ
3325 if (PL_reg_call_cc) {
3326 re_cc_state *cur_call_cc = PL_reg_call_cc;
3327 CURCUR *cctmp = PL_regcc;
3328 regexp *re = PL_reg_re;
3329 CHECKPOINT cp, lastcp;
3330
3331 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3332 REGCP_SET(lastcp);
0f5d15d6
IZ
3333 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3334 the caller. */
3335 PL_reginput = locinput; /* Make position available to
3336 the callcc. */
3337 cache_re(PL_reg_call_cc->re);
3338 PL_regcc = PL_reg_call_cc->cc;
3339 PL_reg_call_cc = PL_reg_call_cc->prev;
3340 if (regmatch(cur_call_cc->node)) {
3341 PL_reg_call_cc = cur_call_cc;
3342 regcpblow(cp);
3343 sayYES;
3344 }
02db2b7b 3345 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3346 regcppop();
3347 PL_reg_call_cc = cur_call_cc;
3348 PL_regcc = cctmp;
3349 PL_reg_re = re;
3350 cache_re(re);
3351
3352 DEBUG_r(
3353 PerlIO_printf(Perl_debug_log,
3354 "%*s continuation failed...\n",
3355 REPORT_CODE_OFF+PL_regindent*2, "")
3356 );
7821416a 3357 sayNO_SILENT;
0f5d15d6 3358 }
7821416a
IZ
3359 if (locinput < PL_regtill) {
3360 DEBUG_r(PerlIO_printf(Perl_debug_log,
3361 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3362 PL_colors[4],
3363 (long)(locinput - PL_reg_starttry),
3364 (long)(PL_regtill - PL_reg_starttry),
3365 PL_colors[5]));
3366 sayNO_FINAL; /* Cannot match: too short. */
3367 }
3368 PL_reginput = locinput; /* put where regtry can find it */
3369 sayYES_FINAL; /* Success! */
7e5428c5 3370 case SUCCEED:
3280af22 3371 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3372 sayYES_LOUD; /* Success! */
c277df42
IZ
3373 case SUSPEND:
3374 n = 1;
9fe1d20c 3375 PL_reginput = locinput;
9041c2e3 3376 goto do_ifmatch;
a0d0e21e 3377 case UNLESSM:
c277df42 3378 n = 0;
a0ed51b3 3379 if (scan->flags) {
efb30f32
HS
3380 s = HOPBACKc(locinput, scan->flags);
3381 if (!s)
3382 goto say_yes;
3383 PL_reginput = s;
a0ed51b3
LW
3384 }
3385 else
3386 PL_reginput = locinput;
c277df42
IZ
3387 goto do_ifmatch;
3388 case IFMATCH:
3389 n = 1;
a0ed51b3 3390 if (scan->flags) {
efb30f32
HS
3391 s = HOPBACKc(locinput, scan->flags);
3392 if (!s)
3393 goto say_no;
3394 PL_reginput = s;
a0ed51b3
LW
3395 }
3396 else
3397 PL_reginput = locinput;
3398
c277df42 3399 do_ifmatch:
c277df42
IZ
3400 inner = NEXTOPER(NEXTOPER(scan));
3401 if (regmatch(inner) != n) {
3402 say_no:
3403 if (logical) {
3404 logical = 0;
3405 sw = 0;
3406 goto do_longjump;
a0ed51b3
LW
3407 }
3408 else
c277df42
IZ
3409 sayNO;
3410 }
3411 say_yes:
3412 if (logical) {
3413 logical = 0;
3414 sw = 1;
3415 }
fe44a5e8 3416 if (OP(scan) == SUSPEND) {
3280af22 3417 locinput = PL_reginput;
565764a8 3418 nextchr = UCHARAT(locinput);
fe44a5e8 3419 }
c277df42
IZ
3420 /* FALL THROUGH. */
3421 case LONGJMP:
3422 do_longjump:
3423 next = scan + ARG(scan);
3424 if (next == scan)
3425 next = NULL;
a0d0e21e
LW
3426 break;
3427 default:
b900a521 3428 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3429 PTR2UV(scan), OP(scan));
cea2e8a9 3430 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3431 }
02db2b7b 3432 reenter:
a0d0e21e
LW
3433 scan = next;
3434 }
a687059c 3435
a0d0e21e
LW
3436 /*
3437 * We get here only if there's trouble -- normally "case END" is
3438 * the terminating point.
3439 */
cea2e8a9 3440 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3441 /*NOTREACHED*/
4633a7c4
LW
3442 sayNO;
3443
7821416a
IZ
3444yes_loud:
3445 DEBUG_r(
3446 PerlIO_printf(Perl_debug_log,
3447 "%*s %scould match...%s\n",
3448 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3449 );
3450 goto yes;
3451yes_final:
3452 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3453 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3454yes:
3455#ifdef DEBUGGING
3280af22 3456 PL_regindent--;
4633a7c4 3457#endif
02db2b7b
IZ
3458
3459#if 0 /* Breaks $^R */
3460 if (unwind)
3461 regcpblow(firstcp);
3462#endif
4633a7c4
LW
3463 return 1;
3464
3465no:
7821416a
IZ
3466 DEBUG_r(
3467 PerlIO_printf(Perl_debug_log,
3468 "%*s %sfailed...%s\n",
3469 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3470 );
3471 goto do_no;
3472no_final:
3473do_no:
02db2b7b
IZ
3474 if (unwind) {
3475 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3476
3477 switch (uw->type) {
3478 case RE_UNWIND_BRANCH:
3479 case RE_UNWIND_BRANCHJ:
3480 {
3481 re_unwind_branch_t *uwb = &(uw->branch);
3482 I32 lastparen = uwb->lastparen;
9041c2e3 3483
02db2b7b
IZ
3484 REGCP_UNWIND(uwb->lastcp);
3485 for (n = *PL_reglastparen; n > lastparen; n--)
3486 PL_regendp[n] = -1;
3487 *PL_reglastparen = n;
3488 scan = next = uwb->next;
9041c2e3
NIS
3489 if ( !scan ||
3490 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3491 ? BRANCH : BRANCHJ) ) { /* Failure */
3492 unwind = uwb->prev;
3493#ifdef DEBUGGING
3494 PL_regindent--;
3495#endif
3496 goto do_no;
3497 }
3498 /* Have more choice yet. Reuse the same uwb. */
3499 /*SUPPRESS 560*/
3500 if ((n = (uwb->type == RE_UNWIND_BRANCH
3501 ? NEXT_OFF(next) : ARG(next))))
3502 next += n;
3503 else
3504 next = NULL; /* XXXX Needn't unwinding in this case... */
3505 uwb->next = next;
3506 next = NEXTOPER(scan);
3507 if (uwb->type == RE_UNWIND_BRANCHJ)
3508 next = NEXTOPER(next);
3509 locinput = uwb->locinput;
3510 nextchr = uwb->nextchr;
3511#ifdef DEBUGGING
3512 PL_regindent = uwb->regindent;
3513#endif
3514
3515 goto reenter;
3516 }
3517 /* NOT REACHED */
3518 default:
3519 Perl_croak(aTHX_ "regexp unwind memory corruption");
3520 }
3521 /* NOT REACHED */
3522 }
4633a7c4 3523#ifdef DEBUGGING
3280af22 3524 PL_regindent--;
4633a7c4 3525#endif
a0d0e21e 3526 return 0;
a687059c
LW
3527}
3528
3529/*
3530 - regrepeat - repeatedly match something simple, report how many
3531 */
3532/*
3533 * [This routine now assumes that it will only match on things of length 1.
3534 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3535 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3536 */
76e3520e 3537STATIC I32
cea2e8a9 3538S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3539{
a0d0e21e 3540 register char *scan;
a0d0e21e 3541 register I32 c;
3280af22 3542 register char *loceol = PL_regeol;
a0ed51b3 3543 register I32 hardcount = 0;
ffc61ed2 3544 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3545
3280af22 3546 scan = PL_reginput;
c277df42 3547 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3548 loceol = scan + max;
a0d0e21e 3549 switch (OP(p)) {
22c35a8c 3550 case REG_ANY:
1aa99e6b 3551 if (do_utf8) {
ffc61ed2 3552 loceol = PL_regeol;
1aa99e6b 3553 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3554 scan += UTF8SKIP(scan);
3555 hardcount++;
3556 }
3557 } else {
3558 while (scan < loceol && *scan != '\n')
3559 scan++;
a0ed51b3
LW
3560 }
3561 break;
ffc61ed2 3562 case SANY:
3baa4c62 3563 scan = loceol;
a0ed51b3 3564 break;
bbce6d69 3565 case EXACT: /* length of string is 1 */
cd439c50 3566 c = (U8)*STRING(p);
bbce6d69 3567 while (scan < loceol && UCHARAT(scan) == c)
3568 scan++;
3569 break;
3570 case EXACTF: /* length of string is 1 */
cd439c50 3571 c = (U8)*STRING(p);
bbce6d69 3572 while (scan < loceol &&
22c35a8c 3573 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3574 scan++;
3575 break;
3576 case EXACTFL: /* length of string is 1 */
3280af22 3577 PL_reg_flags |= RF_tainted;
cd439c50 3578 c = (U8)*STRING(p);
bbce6d69 3579 while (scan < loceol &&
22c35a8c 3580 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3581 scan++;
3582 break;
3583 case ANYOF:
ffc61ed2
JH
3584 if (do_utf8) {
3585 loceol = PL_regeol;
1aa99e6b
IH
3586 while (hardcount < max && scan < loceol &&
3587 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3588 scan += UTF8SKIP(scan);
3589 hardcount++;
3590 }
3591 } else {
3592 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3593 scan++;
3594 }
a0d0e21e
LW
3595 break;
3596 case ALNUM:
1aa99e6b 3597 if (do_utf8) {
ffc61ed2 3598 loceol = PL_regeol;
8269fa76 3599 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3600 while (hardcount < max && scan < loceol &&
3568d838 3601 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3602 scan += UTF8SKIP(scan);
3603 hardcount++;
3604 }
3605 } else {
3606 while (scan < loceol && isALNUM(*scan))
3607 scan++;
a0ed51b3
LW
3608 }
3609 break;
bbce6d69 3610 case ALNUML:
3280af22 3611 PL_reg_flags |= RF_tainted;
1aa99e6b 3612 if (do_utf8) {