This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Clean .exists deeper.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
02db2b7b 148# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 150 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 151
02db2b7b 152# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b 154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 155 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
dafc8851
JH
193#if 0
194 /* It would seem that the similar code in regtry()
195 * already takes care of this, and in fact it is in
196 * a better location to since this code can #if 0-ed out
197 * but the code in regtry() is needed or otherwise tests
198 * requiring null fields (pat.t#187 and split.t#{13,14}
09e8ae3b 199 * (as of patchlevel 7877) will fail. --jhi */
3280af22
NIS
200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
cf93c79d
IZ
202 PL_regstartp[paren] = -1;
203 PL_regendp[paren] = -1;
a0d0e21e 204 }
dafc8851 205#endif
a0d0e21e
LW
206 return input;
207}
208
0f5d15d6 209STATIC char *
cea2e8a9 210S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 211{
46124e9e 212 dTHR;
0f5d15d6
IZ
213 I32 tmp = PL_savestack_ix;
214
215 PL_savestack_ix = ss;
216 regcppop();
217 PL_savestack_ix = tmp;
942e002e 218 return Nullch;
0f5d15d6
IZ
219}
220
221typedef struct re_cc_state
222{
223 I32 ss;
224 regnode *node;
225 struct re_cc_state *prev;
226 CURCUR *cc;
227 regexp *re;
228} re_cc_state;
229
02db2b7b 230#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 231
29d1e993
HS
232#define TRYPAREN(paren, n, input) { \
233 if (paren) { \
234 if (n) { \
235 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
236 PL_regendp[paren] = input - PL_bostr; \
237 } \
238 else \
239 PL_regendp[paren] = -1; \
240 } \
241 if (regmatch(next)) \
242 sayYES; \
243 if (paren && n) \
244 PL_regendp[paren] = -1; \
245}
246
247
a687059c 248/*
e50aee73 249 * pregexec and friends
a687059c
LW
250 */
251
252/*
c277df42 253 - pregexec - match a regexp against a string
a687059c 254 */
c277df42 255I32
864dbfa3 256Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 257 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
258/* strend: pointer to null at end of string */
259/* strbeg: real beginning of string */
260/* minend: end of match must be >=minend after stringarg. */
261/* nosave: For optimizations. */
262{
263 return
264 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
265 nosave ? 0 : REXEC_COPY_STR);
266}
0f5d15d6
IZ
267
268STATIC void
cea2e8a9 269S_cache_re(pTHX_ regexp *prog)
0f5d15d6 270{
46124e9e 271 dTHR;
0f5d15d6
IZ
272 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
273#ifdef DEBUGGING
274 PL_regprogram = prog->program;
275#endif
276 PL_regnpar = prog->nparens;
277 PL_regdata = prog->data;
278 PL_reg_re = prog;
279}
22e551b9 280
cad2e5aa
JH
281/*
282 * Need to implement the following flags for reg_anch:
283 *
284 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
285 * USE_INTUIT_ML
286 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
287 * INTUIT_AUTORITATIVE_ML
288 * INTUIT_ONCE_NOML - Intuit can match in one location only.
289 * INTUIT_ONCE_ML
290 *
291 * Another flag for this function: SECOND_TIME (so that float substrs
292 * with giant delta may be not rechecked).
293 */
294
295/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
296
2c2d71f5 297/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
298 Otherwise, only SvCUR(sv) is used to get strbeg. */
299
300/* XXXX We assume that strpos is strbeg unless sv. */
301
6eb5f6b9
JH
302/* XXXX Some places assume that there is a fixed substring.
303 An update may be needed if optimizer marks as "INTUITable"
304 RExen without fixed substrings. Similarly, it is assumed that
305 lengths of all the strings are no more than minlen, thus they
306 cannot come from lookahead.
307 (Or minlen should take into account lookahead.) */
308
2c2d71f5
JH
309/* A failure to find a constant substring means that there is no need to make
310 an expensive call to REx engine, thus we celebrate a failure. Similarly,
311 finding a substring too deep into the string means that less calls to
30944b6d
IZ
312 regtry() should be needed.
313
314 REx compiler's optimizer found 4 possible hints:
315 a) Anchored substring;
316 b) Fixed substring;
317 c) Whether we are anchored (beginning-of-line or \G);
318 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 319 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
320 string which does not contradict any of them.
321 */
2c2d71f5 322
6eb5f6b9
JH
323/* Most of decisions we do here should have been done at compile time.
324 The nodes of the REx which we used for the search should have been
325 deleted from the finite automaton. */
326
cad2e5aa
JH
327char *
328Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
329 char *strend, U32 flags, re_scream_pos_data *data)
330{
2c2d71f5 331 register I32 start_shift;
cad2e5aa 332 /* Should be nonnegative! */
2c2d71f5
JH
333 register I32 end_shift;
334 register char *s;
335 register SV *check;
a1933d95 336 char *strbeg;
cad2e5aa
JH
337 char *t;
338 I32 ml_anch;
2c2d71f5 339 char *tmp;
6eb5f6b9
JH
340 register char *other_last = Nullch; /* other substr checked before this */
341 char *check_at; /* check substr found at this pos */
30944b6d
IZ
342#ifdef DEBUGGING
343 char *i_strpos = strpos;
344#endif
cad2e5aa
JH
345
346 DEBUG_r( if (!PL_colorset) reginitcolors() );
347 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 348 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
349 PL_colors[4],PL_colors[5],PL_colors[0],
350 prog->precomp,
351 PL_colors[1],
352 (strlen(prog->precomp) > 60 ? "..." : ""),
353 PL_colors[0],
b900a521 354 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
355 strpos, PL_colors[1],
356 (strend - strpos > 60 ? "..." : ""))
357 );
358
2c2d71f5
JH
359 if (prog->minlen > strend - strpos) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 361 goto fail;
2c2d71f5 362 }
a1933d95 363 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 364 check = prog->check_substr;
2c2d71f5 365 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
366 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
367 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 368 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 369
5675c1a6
IZ
370 if (!ml_anch) {
371 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
372 /* SvCUR is not set on references: SvRV and SvPVX overlap */
373 && sv && !SvROK(sv)
a1933d95 374 && (strpos != strbeg)) {
5675c1a6
IZ
375 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
376 goto fail;
377 }
378 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 379 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
380 I32 slen;
381
adac82c7 382 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 383 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
384 if (SvTAIL(check)) {
385 slen = SvCUR(check); /* >= 1 */
cad2e5aa 386
2c2d71f5
JH
387 if ( strend - s > slen || strend - s < slen - 1
388 || (strend - s == slen && strend[-1] != '\n')) {
389 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
390 goto fail_finish;
cad2e5aa
JH
391 }
392 /* Now should match s[0..slen-2] */
393 slen--;
653099ff 394 if (slen && (*SvPVX(check) != *s
cad2e5aa 395 || (slen > 1
653099ff 396 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
397 report_neq:
398 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
399 goto fail_finish;
400 }
cad2e5aa 401 }
653099ff
GS
402 else if (*SvPVX(check) != *s
403 || ((slen = SvCUR(check)) > 1
404 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
405 goto report_neq;
406 goto success_at_start;
5675c1a6 407 }
cad2e5aa 408 }
2c2d71f5 409 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 410 s = strpos;
2c2d71f5 411 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 412 end_shift = prog->minlen - start_shift -
653099ff 413 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 414 if (!ml_anch) {
653099ff
GS
415 I32 end = prog->check_offset_max + CHR_SVLEN(check)
416 - (SvTAIL(check) != 0);
2c2d71f5
JH
417 I32 eshift = strend - s - end;
418
419 if (end_shift < eshift)
420 end_shift = eshift;
421 }
cad2e5aa 422 }
2c2d71f5 423 else { /* Can match at random position */
cad2e5aa
JH
424 ml_anch = 0;
425 s = strpos;
2c2d71f5
JH
426 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
427 /* Should be nonnegative! */
428 end_shift = prog->minlen - start_shift -
653099ff 429 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
430 }
431
2c2d71f5 432#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 433 if (end_shift < 0)
6bbae5e6 434 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
435#endif
436
2c2d71f5
JH
437 restart:
438 /* Find a possible match in the region s..strend by looking for
439 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 440 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
441 I32 p = -1; /* Internal iterator of scream. */
442 I32 *pp = data ? data->scream_pos : &p;
443
2c2d71f5
JH
444 if (PL_screamfirst[BmRARE(check)] >= 0
445 || ( BmRARE(check) == '\n'
446 && (BmPREVIOUS(check) == SvCUR(check) - 1)
447 && SvTAIL(check) ))
448 s = screaminstr(sv, check,
449 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 450 else
2c2d71f5 451 goto fail_finish;
cad2e5aa
JH
452 if (data)
453 *data->scream_olds = s;
454 }
455 else
456 s = fbm_instr((unsigned char*)s + start_shift,
457 (unsigned char*)strend - end_shift,
2c2d71f5 458 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
459
460 /* Update the count-of-usability, remove useless subpatterns,
461 unshift s. */
2c2d71f5
JH
462
463 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
464 (s ? "Found" : "Did not find"),
465 ((check == prog->anchored_substr) ? "anchored" : "floating"),
466 PL_colors[0],
7b0972df
JH
467 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
468 SvPVX(check),
2c2d71f5
JH
469 PL_colors[1], (SvTAIL(check) ? "$" : ""),
470 (s ? " at offset " : "...\n") ) );
471
472 if (!s)
473 goto fail_finish;
474
6eb5f6b9
JH
475 check_at = s;
476
2c2d71f5 477 /* Finish the diagnostic message */
30944b6d 478 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
479
480 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
481 Start with the other substr.
482 XXXX no SCREAM optimization yet - and a very coarse implementation
483 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
484 *always* match. Probably should be marked during compile...
485 Probably it is right to do no SCREAM here...
486 */
487
488 if (prog->float_substr && prog->anchored_substr) {
30944b6d 489 /* Take into account the "other" substring. */
2c2d71f5
JH
490 /* XXXX May be hopelessly wrong for UTF... */
491 if (!other_last)
6eb5f6b9 492 other_last = strpos;
2c2d71f5 493 if (check == prog->float_substr) {
30944b6d
IZ
494 do_other_anchored:
495 {
2c2d71f5
JH
496 char *last = s - start_shift, *last1, *last2;
497 char *s1 = s;
498
499 tmp = PL_bostr;
500 t = s - prog->check_offset_max;
501 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
502 && (!(prog->reganch & ROPT_UTF8)
503 || (PL_bostr = strpos, /* Used in regcopmaybe() */
504 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
505 && t > strpos)))
30944b6d 506 /* EMPTY */;
2c2d71f5
JH
507 else
508 t = strpos;
509 t += prog->anchored_offset;
6eb5f6b9
JH
510 if (t < other_last) /* These positions already checked */
511 t = other_last;
2c2d71f5
JH
512 PL_bostr = tmp;
513 last2 = last1 = strend - prog->minlen;
514 if (last < last1)
515 last1 = last;
516 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
517 /* On end-of-str: see comment below. */
518 s = fbm_instr((unsigned char*)t,
519 (unsigned char*)last1 + prog->anchored_offset
520 + SvCUR(prog->anchored_substr)
521 - (SvTAIL(prog->anchored_substr)!=0),
522 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
523 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
524 (s ? "Found" : "Contradicts"),
525 PL_colors[0],
7b0972df
JH
526 (int)(SvCUR(prog->anchored_substr)
527 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
528 SvPVX(prog->anchored_substr),
529 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
530 if (!s) {
531 if (last1 >= last2) {
532 DEBUG_r(PerlIO_printf(Perl_debug_log,
533 ", giving up...\n"));
534 goto fail_finish;
535 }
536 DEBUG_r(PerlIO_printf(Perl_debug_log,
537 ", trying floating at offset %ld...\n",
30944b6d 538 (long)(s1 + 1 - i_strpos)));
2c2d71f5 539 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 540 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
541 s = HOPc(last, 1);
542 goto restart;
543 }
544 else {
545 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 546 (long)(s - i_strpos)));
2c2d71f5 547 t = s - prog->anchored_offset;
6eb5f6b9 548 other_last = s + 1;
30944b6d 549 s = s1;
2c2d71f5
JH
550 if (t == strpos)
551 goto try_at_start;
2c2d71f5
JH
552 goto try_at_offset;
553 }
30944b6d 554 }
2c2d71f5
JH
555 }
556 else { /* Take into account the floating substring. */
557 char *last, *last1;
558 char *s1 = s;
559
560 t = s - start_shift;
561 last1 = last = strend - prog->minlen + prog->float_min_offset;
562 if (last - t > prog->float_max_offset)
563 last = t + prog->float_max_offset;
564 s = t + prog->float_min_offset;
6eb5f6b9
JH
565 if (s < other_last)
566 s = other_last;
2c2d71f5
JH
567 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
568 /* fbm_instr() takes into account exact value of end-of-str
569 if the check is SvTAIL(ed). Since false positives are OK,
570 and end-of-str is not later than strend we are OK. */
571 s = fbm_instr((unsigned char*)s,
572 (unsigned char*)last + SvCUR(prog->float_substr)
573 - (SvTAIL(prog->float_substr)!=0),
574 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
575 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
576 (s ? "Found" : "Contradicts"),
577 PL_colors[0],
7b0972df
JH
578 (int)(SvCUR(prog->float_substr)
579 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
580 SvPVX(prog->float_substr),
581 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
582 if (!s) {
583 if (last1 == last) {
584 DEBUG_r(PerlIO_printf(Perl_debug_log,
585 ", giving up...\n"));
586 goto fail_finish;
587 }
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", trying anchored starting at offset %ld...\n",
30944b6d 590 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 591 other_last = last + 1;
2c2d71f5
JH
592 PL_regeol = strend; /* Used in HOP() */
593 s = HOPc(t, 1);
594 goto restart;
595 }
596 else {
597 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 598 (long)(s - i_strpos)));
6eb5f6b9 599 other_last = s + 1;
30944b6d 600 s = s1;
2c2d71f5
JH
601 if (t == strpos)
602 goto try_at_start;
2c2d71f5
JH
603 goto try_at_offset;
604 }
605 }
cad2e5aa 606 }
2c2d71f5
JH
607
608 t = s - prog->check_offset_max;
609 tmp = PL_bostr;
610 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
611 && (!(prog->reganch & ROPT_UTF8)
612 || (PL_bostr = strpos, /* Used in regcopmaybe() */
613 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
614 && t > strpos)))) {
615 PL_bostr = tmp;
616 /* Fixed substring is found far enough so that the match
617 cannot start at strpos. */
618 try_at_offset:
cad2e5aa 619 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
620 /* Eventually fbm_*() should handle this, but often
621 anchored_offset is not 0, so this check will not be wasted. */
622 /* XXXX In the code below we prefer to look for "^" even in
623 presence of anchored substrings. And we search even
624 beyond the found float position. These pessimizations
625 are historical artefacts only. */
626 find_anchor:
2c2d71f5 627 while (t < strend - prog->minlen) {
cad2e5aa 628 if (*t == '\n') {
4ee3650e 629 if (t < check_at - prog->check_offset_min) {
30944b6d 630 if (prog->anchored_substr) {
4ee3650e
GS
631 /* Since we moved from the found position,
632 we definitely contradict the found anchored
30944b6d
IZ
633 substr. Due to the above check we do not
634 contradict "check" substr.
635 Thus we can arrive here only if check substr
636 is float. Redo checking for "other"=="fixed".
637 */
638 strpos = t + 1;
639 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
640 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
641 goto do_other_anchored;
642 }
4ee3650e
GS
643 /* We don't contradict the found floating substring. */
644 /* XXXX Why not check for STCLASS? */
cad2e5aa 645 s = t + 1;
2c2d71f5 646 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 647 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
648 goto set_useful;
649 }
4ee3650e
GS
650 /* Position contradicts check-string */
651 /* XXXX probably better to look for check-string
652 than for "\n", so one should lower the limit for t? */
653 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 654 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 655 other_last = strpos = s = t + 1;
cad2e5aa
JH
656 goto restart;
657 }
658 t++;
659 }
2c2d71f5
JH
660 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
661 PL_colors[0],PL_colors[1]));
662 goto fail_finish;
cad2e5aa 663 }
f5952150
GS
664 else {
665 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
666 PL_colors[0],PL_colors[1]));
667 }
cad2e5aa
JH
668 s = t;
669 set_useful:
2c2d71f5 670 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
671 }
672 else {
2c2d71f5 673 PL_bostr = tmp;
f5952150 674 /* The found string does not prohibit matching at strpos,
2c2d71f5 675 - no optimization of calling REx engine can be performed,
f5952150
GS
676 unless it was an MBOL and we are not after MBOL,
677 or a future STCLASS check will fail this. */
2c2d71f5
JH
678 try_at_start:
679 /* Even in this situation we may use MBOL flag if strpos is offset
680 wrt the start of the string. */
05b4157f 681 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 682 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
683 /* May be due to an implicit anchor of m{.*foo} */
684 && !(prog->reganch & ROPT_IMPLICIT))
685 {
cad2e5aa
JH
686 t = strpos;
687 goto find_anchor;
688 }
30944b6d 689 DEBUG_r( if (ml_anch)
f5952150
GS
690 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
691 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 692 );
2c2d71f5 693 success_at_start:
30944b6d 694 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 695 && prog->check_substr /* Could be deleted already */
cad2e5aa 696 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
697 && prog->check_substr == prog->float_substr)
698 {
cad2e5aa 699 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 700 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
701 SvREFCNT_dec(prog->check_substr);
702 prog->check_substr = Nullsv; /* disable */
703 prog->float_substr = Nullsv; /* clear */
5e39e1e5 704 check = Nullsv; /* abort */
cad2e5aa 705 s = strpos;
3cf5c195
IZ
706 /* XXXX This is a remnant of the old implementation. It
707 looks wasteful, since now INTUIT can use many
6eb5f6b9 708 other heuristics. */
cad2e5aa
JH
709 prog->reganch &= ~RE_USE_INTUIT;
710 }
711 else
712 s = strpos;
713 }
714
6eb5f6b9
JH
715 /* Last resort... */
716 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
717 if (prog->regstclass) {
718 /* minlen == 0 is possible if regstclass is \b or \B,
719 and the fixed substr is ''$.
720 Since minlen is already taken into account, s+1 is before strend;
721 accidentally, minlen >= 1 guaranties no false positives at s + 1
722 even for \b or \B. But (minlen? 1 : 0) below assumes that
723 regstclass does not come from lookahead... */
724 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
725 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
726 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
727 ? STR_LEN(prog->regstclass)
728 : 1);
6eb5f6b9 729 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
730 ? s + (prog->minlen? cl_l : 0)
731 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 732 : strend) ;
a1933d95 733 char *startpos = strbeg;
6eb5f6b9
JH
734
735 t = s;
76384e4a
GS
736 if (prog->reganch & ROPT_UTF8) {
737 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
738 PL_bostr = startpos;
739 }
6eb5f6b9
JH
740 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
741 if (!s) {
742#ifdef DEBUGGING
743 char *what;
744#endif
745 if (endpos == strend) {
746 DEBUG_r( PerlIO_printf(Perl_debug_log,
747 "Could not match STCLASS...\n") );
748 goto fail;
749 }
66e933ab
GS
750 DEBUG_r( PerlIO_printf(Perl_debug_log,
751 "This position contradicts STCLASS...\n") );
653099ff
GS
752 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
753 goto fail;
6eb5f6b9
JH
754 /* Contradict one of substrings */
755 if (prog->anchored_substr) {
6eb5f6b9
JH
756 if (prog->anchored_substr == check) {
757 DEBUG_r( what = "anchored" );
758 hop_and_restart:
759 PL_regeol = strend; /* Used in HOP() */
760 s = HOPc(t, 1);
66e933ab
GS
761 if (s + start_shift + end_shift > strend) {
762 /* XXXX Should be taken into account earlier? */
763 DEBUG_r( PerlIO_printf(Perl_debug_log,
764 "Could not match STCLASS...\n") );
765 goto fail;
766 }
5e39e1e5
HS
767 if (!check)
768 goto giveup;
6eb5f6b9 769 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 770 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
771 what, (long)(s + start_shift - i_strpos)) );
772 goto restart;
773 }
66e933ab 774 /* Have both, check_string is floating */
6eb5f6b9
JH
775 if (t + start_shift >= check_at) /* Contradicts floating=check */
776 goto retry_floating_check;
777 /* Recheck anchored substring, but not floating... */
778 s = check_at;
5e39e1e5
HS
779 if (!check)
780 goto giveup;
6eb5f6b9 781 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 782 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
783 (long)(other_last - i_strpos)) );
784 goto do_other_anchored;
785 }
60e71179
GS
786 /* Another way we could have checked stclass at the
787 current position only: */
788 if (ml_anch) {
789 s = t = t + 1;
5e39e1e5
HS
790 if (!check)
791 goto giveup;
60e71179 792 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
793 "Looking for /%s^%s/m starting at offset %ld...\n",
794 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 795 goto try_at_offset;
66e933ab 796 }
60e71179
GS
797 if (!prog->float_substr) /* Could have been deleted */
798 goto fail;
6eb5f6b9
JH
799 /* Check is floating subtring. */
800 retry_floating_check:
801 t = check_at - start_shift;
802 DEBUG_r( what = "floating" );
803 goto hop_and_restart;
804 }
805 DEBUG_r( if (t != s)
806 PerlIO_printf(Perl_debug_log,
807 "By STCLASS: moving %ld --> %ld\n",
808 (long)(t - i_strpos), (long)(s - i_strpos));
809 else
810 PerlIO_printf(Perl_debug_log,
811 "Does not contradict STCLASS...\n") );
812 }
5e39e1e5
HS
813 giveup:
814 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
815 PL_colors[4], (check ? "Guessed" : "Giving up"),
816 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 817 return s;
2c2d71f5
JH
818
819 fail_finish: /* Substring not found */
66e933ab
GS
820 if (prog->check_substr) /* could be removed already */
821 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 822 fail:
2c2d71f5 823 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
824 PL_colors[4],PL_colors[5]));
825 return Nullch;
826}
9661b544 827
6eb5f6b9 828/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
829STATIC char *
830S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 831{
6eb5f6b9
JH
832 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
833 char *m;
d8093b23
G
834 STRLEN ln;
835 unsigned int c1;
836 unsigned int c2;
6eb5f6b9
JH
837 char *e;
838 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 839
6eb5f6b9
JH
840 /* We know what class it must start with. */
841 switch (OP(c)) {
842 case ANYOFUTF8:
843 while (s < strend) {
844 if (REGINCLASSUTF8(c, (U8*)s)) {
845 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 846 goto got_it;
3cf5c195 847 else
6eb5f6b9 848 tmp = doevery;
a0ed51b3 849 }
6eb5f6b9
JH
850 else
851 tmp = 1;
a0ed51b3
LW
852 s += UTF8SKIP(s);
853 }
6eb5f6b9
JH
854 break;
855 case ANYOF:
a0ed51b3 856 while (s < strend) {
d8093b23 857 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
858 if (tmp && (norun || regtry(prog, s)))
859 goto got_it;
860 else
861 tmp = doevery;
a0ed51b3 862 }
6eb5f6b9
JH
863 else
864 tmp = 1;
a0ed51b3 865 s++;
a0d0e21e 866 }
6eb5f6b9
JH
867 break;
868 case EXACTF:
869 m = STRING(c);
870 ln = STR_LEN(c);
d8093b23 871 c1 = *(U8*)m;
6eb5f6b9
JH
872 c2 = PL_fold[c1];
873 goto do_exactf;
874 case EXACTFL:
875 m = STRING(c);
876 ln = STR_LEN(c);
d8093b23 877 c1 = *(U8*)m;
6eb5f6b9
JH
878 c2 = PL_fold_locale[c1];
879 do_exactf:
880 e = strend - ln;
b3c9acc1 881
6eb5f6b9
JH
882 if (norun && e < s)
883 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
884 /* Here it is NOT UTF! */
885 if (c1 == c2) {
886 while (s <= e) {
d8093b23 887 if ( *(U8*)s == c1
66e933ab
GS
888 && (ln == 1 || !(OP(c) == EXACTF
889 ? ibcmp(s, m, ln)
890 : ibcmp_locale(s, m, ln)))
6eb5f6b9 891 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
892 goto got_it;
893 s++;
894 }
895 } else {
896 while (s <= e) {
d8093b23 897 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
898 && (ln == 1 || !(OP(c) == EXACTF
899 ? ibcmp(s, m, ln)
900 : ibcmp_locale(s, m, ln)))
6eb5f6b9 901 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
902 goto got_it;
903 s++;
904 }
905 }
906 break;
bbce6d69 907 case BOUNDL:
3280af22 908 PL_reg_flags |= RF_tainted;
bbce6d69 909 /* FALL THROUGH */
a0d0e21e 910 case BOUND:
6eb5f6b9 911 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 912 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 913 while (s < strend) {
95bac841 914 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 915 tmp = !tmp;
6eb5f6b9 916 if ((norun || regtry(prog, s)))
a0d0e21e 917 goto got_it;
a687059c 918 }
a0d0e21e
LW
919 s++;
920 }
6eb5f6b9 921 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
922 goto got_it;
923 break;
a0ed51b3
LW
924 case BOUNDLUTF8:
925 PL_reg_flags |= RF_tainted;
926 /* FALL THROUGH */
927 case BOUNDUTF8:
667bb95a
JH
928 if (s == startpos)
929 tmp = '\n';
930 else {
931 U8 *r = reghop((U8*)s, -1);
932
933 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
934 }
935 tmp = ((OP(c) == BOUNDUTF8 ?
936 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 937 while (s < strend) {
76384e4a 938 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
939 swash_fetch(PL_utf8_alnum, (U8*)s) :
940 isALNUM_LC_utf8((U8*)s)))
941 {
a0ed51b3 942 tmp = !tmp;
6eb5f6b9 943 if ((norun || regtry(prog, s)))
a0ed51b3
LW
944 goto got_it;
945 }
946 s += UTF8SKIP(s);
947 }
6eb5f6b9 948 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
949 goto got_it;
950 break;
bbce6d69 951 case NBOUNDL:
3280af22 952 PL_reg_flags |= RF_tainted;
bbce6d69 953 /* FALL THROUGH */
a0d0e21e 954 case NBOUND:
6eb5f6b9 955 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 956 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 957 while (s < strend) {
95bac841 958 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 959 tmp = !tmp;
6eb5f6b9 960 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
961 goto got_it;
962 s++;
963 }
6eb5f6b9 964 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
965 goto got_it;
966 break;
a0ed51b3
LW
967 case NBOUNDLUTF8:
968 PL_reg_flags |= RF_tainted;
969 /* FALL THROUGH */
970 case NBOUNDUTF8:
667bb95a
JH
971 if (s == startpos)
972 tmp = '\n';
973 else {
974 U8 *r = reghop((U8*)s, -1);
975
976 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
977 }
978 tmp = ((OP(c) == NBOUNDUTF8 ?
979 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 980 while (s < strend) {
76384e4a 981 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
982 swash_fetch(PL_utf8_alnum, (U8*)s) :
983 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 984 tmp = !tmp;
6eb5f6b9 985 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
986 goto got_it;
987 s += UTF8SKIP(s);
988 }
6eb5f6b9 989 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
990 goto got_it;
991 break;
a0d0e21e
LW
992 case ALNUM:
993 while (s < strend) {
bbce6d69 994 if (isALNUM(*s)) {
6eb5f6b9 995 if (tmp && (norun || regtry(prog, s)))
bbce6d69 996 goto got_it;
997 else
998 tmp = doevery;
999 }
1000 else
1001 tmp = 1;
1002 s++;
1003 }
1004 break;
a0ed51b3
LW
1005 case ALNUMUTF8:
1006 while (s < strend) {
dfe13c55 1007 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1008 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1009 goto got_it;
1010 else
1011 tmp = doevery;
1012 }
1013 else
1014 tmp = 1;
1015 s += UTF8SKIP(s);
1016 }
1017 break;
bbce6d69 1018 case ALNUML:
3280af22 1019 PL_reg_flags |= RF_tainted;
bbce6d69 1020 while (s < strend) {
1021 if (isALNUM_LC(*s)) {
6eb5f6b9 1022 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1023 goto got_it;
a687059c 1024 else
a0d0e21e
LW
1025 tmp = doevery;
1026 }
1027 else
1028 tmp = 1;
1029 s++;
1030 }
1031 break;
a0ed51b3
LW
1032 case ALNUMLUTF8:
1033 PL_reg_flags |= RF_tainted;
1034 while (s < strend) {
dfe13c55 1035 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1036 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1037 goto got_it;
1038 else
1039 tmp = doevery;
1040 }
1041 else
1042 tmp = 1;
1043 s += UTF8SKIP(s);
1044 }
1045 break;
a0d0e21e
LW
1046 case NALNUM:
1047 while (s < strend) {
bbce6d69 1048 if (!isALNUM(*s)) {
6eb5f6b9 1049 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1050 goto got_it;
1051 else
1052 tmp = doevery;
1053 }
1054 else
1055 tmp = 1;
1056 s++;
1057 }
1058 break;
a0ed51b3
LW
1059 case NALNUMUTF8:
1060 while (s < strend) {
dfe13c55 1061 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1062 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1063 goto got_it;
1064 else
1065 tmp = doevery;
1066 }
1067 else
1068 tmp = 1;
1069 s += UTF8SKIP(s);
1070 }
1071 break;
bbce6d69 1072 case NALNUML:
3280af22 1073 PL_reg_flags |= RF_tainted;
bbce6d69 1074 while (s < strend) {
1075 if (!isALNUM_LC(*s)) {
6eb5f6b9 1076 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1077 goto got_it;
a687059c 1078 else
a0d0e21e 1079 tmp = doevery;
a687059c 1080 }
a0d0e21e
LW
1081 else
1082 tmp = 1;
1083 s++;
1084 }
1085 break;
a0ed51b3
LW
1086 case NALNUMLUTF8:
1087 PL_reg_flags |= RF_tainted;
1088 while (s < strend) {
dfe13c55 1089 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1090 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1091 goto got_it;
1092 else
1093 tmp = doevery;
1094 }
1095 else
1096 tmp = 1;
1097 s += UTF8SKIP(s);
1098 }
1099 break;
a0d0e21e
LW
1100 case SPACE:
1101 while (s < strend) {
1102 if (isSPACE(*s)) {
6eb5f6b9 1103 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1104 goto got_it;
1105 else
1106 tmp = doevery;
2304df62 1107 }
a0d0e21e
LW
1108 else
1109 tmp = 1;
1110 s++;
1111 }
1112 break;
a0ed51b3
LW
1113 case SPACEUTF8:
1114 while (s < strend) {
dfe13c55 1115 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1116 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1117 goto got_it;
1118 else
1119 tmp = doevery;
1120 }
1121 else
1122 tmp = 1;
1123 s += UTF8SKIP(s);
1124 }
1125 break;
bbce6d69 1126 case SPACEL:
3280af22 1127 PL_reg_flags |= RF_tainted;
bbce6d69 1128 while (s < strend) {
1129 if (isSPACE_LC(*s)) {
6eb5f6b9 1130 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1131 goto got_it;
1132 else
1133 tmp = doevery;
1134 }
1135 else
1136 tmp = 1;
1137 s++;
1138 }
1139 break;
a0ed51b3
LW
1140 case SPACELUTF8:
1141 PL_reg_flags |= RF_tainted;
1142 while (s < strend) {
dfe13c55 1143 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1144 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1145 goto got_it;
1146 else
1147 tmp = doevery;
1148 }
1149 else
1150 tmp = 1;
1151 s += UTF8SKIP(s);
1152 }
1153 break;
a0d0e21e
LW
1154 case NSPACE:
1155 while (s < strend) {
1156 if (!isSPACE(*s)) {
6eb5f6b9 1157 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1158 goto got_it;
1159 else
1160 tmp = doevery;
a687059c 1161 }
a0d0e21e
LW
1162 else
1163 tmp = 1;
1164 s++;
1165 }
1166 break;
a0ed51b3
LW
1167 case NSPACEUTF8:
1168 while (s < strend) {
dfe13c55 1169 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1170 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1171 goto got_it;
1172 else
1173 tmp = doevery;
1174 }
1175 else
1176 tmp = 1;
1177 s += UTF8SKIP(s);
1178 }
1179 break;
bbce6d69 1180 case NSPACEL:
3280af22 1181 PL_reg_flags |= RF_tainted;
bbce6d69 1182 while (s < strend) {
1183 if (!isSPACE_LC(*s)) {
6eb5f6b9 1184 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
1189 else
1190 tmp = 1;
1191 s++;
1192 }
1193 break;
a0ed51b3
LW
1194 case NSPACELUTF8:
1195 PL_reg_flags |= RF_tainted;
1196 while (s < strend) {
dfe13c55 1197 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1198 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1199 goto got_it;
1200 else
1201 tmp = doevery;
1202 }
1203 else
1204 tmp = 1;
1205 s += UTF8SKIP(s);
1206 }
1207 break;
a0d0e21e
LW
1208 case DIGIT:
1209 while (s < strend) {
1210 if (isDIGIT(*s)) {
6eb5f6b9 1211 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1212 goto got_it;
1213 else
1214 tmp = doevery;
2b69d0c2 1215 }
a0d0e21e
LW
1216 else
1217 tmp = 1;
1218 s++;
1219 }
1220 break;
a0ed51b3
LW
1221 case DIGITUTF8:
1222 while (s < strend) {
dfe13c55 1223 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1224 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1225 goto got_it;
1226 else
1227 tmp = doevery;
1228 }
1229 else
1230 tmp = 1;
1231 s += UTF8SKIP(s);
1232 }
1233 break;
b8c5462f
JH
1234 case DIGITL:
1235 PL_reg_flags |= RF_tainted;
1236 while (s < strend) {
1237 if (isDIGIT_LC(*s)) {
6eb5f6b9 1238 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1239 goto got_it;
1240 else
1241 tmp = doevery;
1242 }
1243 else
1244 tmp = 1;
1245 s++;
1246 }
1247 break;
1248 case DIGITLUTF8:
1249 PL_reg_flags |= RF_tainted;
1250 while (s < strend) {
1251 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1252 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1253 goto got_it;
1254 else
1255 tmp = doevery;
1256 }
1257 else
1258 tmp = 1;
1259 s += UTF8SKIP(s);
1260 }
1261 break;
a0d0e21e
LW
1262 case NDIGIT:
1263 while (s < strend) {
1264 if (!isDIGIT(*s)) {
6eb5f6b9 1265 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1266 goto got_it;
1267 else
1268 tmp = doevery;
a687059c 1269 }
a0d0e21e
LW
1270 else
1271 tmp = 1;
1272 s++;
1273 }
1274 break;
a0ed51b3
LW
1275 case NDIGITUTF8:
1276 while (s < strend) {
dfe13c55 1277 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1278 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1279 goto got_it;
1280 else
1281 tmp = doevery;
1282 }
1283 else
1284 tmp = 1;
1285 s += UTF8SKIP(s);
1286 }
1287 break;
b8c5462f
JH
1288 case NDIGITL:
1289 PL_reg_flags |= RF_tainted;
1290 while (s < strend) {
1291 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1292 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
1297 else
1298 tmp = 1;
1299 s++;
a0ed51b3 1300 }
b8c5462f
JH
1301 break;
1302 case NDIGITLUTF8:
1303 PL_reg_flags |= RF_tainted;
1304 while (s < strend) {
1305 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1306 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1307 goto got_it;
cf93c79d 1308 else
b8c5462f
JH
1309 tmp = doevery;
1310 }
1311 else
1312 tmp = 1;
1313 s += UTF8SKIP(s);
1314 }
1315 break;
b3c9acc1 1316 default:
3c3eec57
GS
1317 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1318 break;
d6a28714 1319 }
6eb5f6b9
JH
1320 return 0;
1321 got_it:
1322 return s;
1323}
1324
1325/*
1326 - regexec_flags - match a regexp against a string
1327 */
1328I32
1329Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1330 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1331/* strend: pointer to null at end of string */
1332/* strbeg: real beginning of string */
1333/* minend: end of match must be >=minend after stringarg. */
1334/* data: May be used for some additional optimizations. */
1335/* nosave: For optimizations. */
1336{
1337 dTHR;
1338 register char *s;
1339 register regnode *c;
1340 register char *startpos = stringarg;
6eb5f6b9
JH
1341 I32 minlen; /* must match at least this many chars */
1342 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1343 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1344 constant substr. */ /* CC */
1345 I32 end_shift = 0; /* Same for the end. */ /* CC */
1346 I32 scream_pos = -1; /* Internal iterator of scream. */
1347 char *scream_olds;
1348 SV* oreplsv = GvSV(PL_replgv);
1349
1350 PL_regcc = 0;
1351
1352 cache_re(prog);
1353#ifdef DEBUGGING
1354 PL_regnarrate = PL_debug & 512;
1355#endif
1356
1357 /* Be paranoid... */
1358 if (prog == NULL || startpos == NULL) {
1359 Perl_croak(aTHX_ "NULL regexp parameter");
1360 return 0;
1361 }
1362
1363 minlen = prog->minlen;
1364 if (strend - startpos < minlen) goto phooey;
1365
1366 if (startpos == strbeg) /* is ^ valid at stringarg? */
1367 PL_regprev = '\n';
1368 else {
1369 PL_regprev = (U32)stringarg[-1];
1370 if (!PL_multiline && PL_regprev == '\n')
1371 PL_regprev = '\0'; /* force ^ to NOT match */
1372 }
1373
1374 /* Check validity of program. */
1375 if (UCHARAT(prog->program) != REG_MAGIC) {
1376 Perl_croak(aTHX_ "corrupted regexp program");
1377 }
1378
1379 PL_reg_flags = 0;
1380 PL_reg_eval_set = 0;
1381 PL_reg_maxiter = 0;
1382
1383 if (prog->reganch & ROPT_UTF8)
1384 PL_reg_flags |= RF_utf8;
1385
1386 /* Mark beginning of line for ^ and lookbehind. */
1387 PL_regbol = startpos;
1388 PL_bostr = strbeg;
1389 PL_reg_sv = sv;
1390
1391 /* Mark end of line for $ (and such) */
1392 PL_regeol = strend;
1393
1394 /* see how far we have to get to not match where we matched before */
1395 PL_regtill = startpos+minend;
1396
1397 /* We start without call_cc context. */
1398 PL_reg_call_cc = 0;
1399
1400 /* If there is a "must appear" string, look for it. */
1401 s = startpos;
1402
1403 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1404 MAGIC *mg;
1405
1406 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1407 PL_reg_ganch = startpos;
1408 else if (sv && SvTYPE(sv) >= SVt_PVMG
1409 && SvMAGIC(sv)
1410 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1411 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1412 if (prog->reganch & ROPT_ANCH_GPOS) {
1413 if (s > PL_reg_ganch)
1414 goto phooey;
1415 s = PL_reg_ganch;
1416 }
1417 }
1418 else /* pos() not defined */
1419 PL_reg_ganch = strbeg;
1420 }
1421
1422 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1423 re_scream_pos_data d;
1424
1425 d.scream_olds = &scream_olds;
1426 d.scream_pos = &scream_pos;
1427 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1428 if (!s)
1429 goto phooey; /* not present */
1430 }
1431
1432 DEBUG_r( if (!PL_colorset) reginitcolors() );
1433 DEBUG_r(PerlIO_printf(Perl_debug_log,
1434 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1435 PL_colors[4],PL_colors[5],PL_colors[0],
1436 prog->precomp,
1437 PL_colors[1],
1438 (strlen(prog->precomp) > 60 ? "..." : ""),
1439 PL_colors[0],
1440 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1441 startpos, PL_colors[1],
1442 (strend - startpos > 60 ? "..." : ""))
1443 );
1444
1445 /* Simplest case: anchored match need be tried only once. */
1446 /* [unless only anchor is BOL and multiline is set] */
1447 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1448 if (s == startpos && regtry(prog, startpos))
1449 goto got_it;
1450 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1451 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1452 {
1453 char *end;
1454
1455 if (minlen)
1456 dontbother = minlen - 1;
1457 end = HOPc(strend, -dontbother) - 1;
1458 /* for multiline we only have to try after newlines */
1459 if (prog->check_substr) {
1460 if (s == startpos)
1461 goto after_try;
1462 while (1) {
1463 if (regtry(prog, s))
1464 goto got_it;
1465 after_try:
1466 if (s >= end)
1467 goto phooey;
1468 if (prog->reganch & RE_USE_INTUIT) {
1469 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1470 if (!s)
1471 goto phooey;
1472 }
1473 else
1474 s++;
1475 }
1476 } else {
1477 if (s > startpos)
1478 s--;
1479 while (s < end) {
1480 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1481 if (regtry(prog, s))
1482 goto got_it;
1483 }
1484 }
1485 }
1486 }
1487 goto phooey;
1488 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1489 if (regtry(prog, PL_reg_ganch))
1490 goto got_it;
1491 goto phooey;
1492 }
1493
1494 /* Messy cases: unanchored match. */
1495 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1496 /* we have /x+whatever/ */
1497 /* it must be a one character string (XXXX Except UTF?) */
1498 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1499#ifdef DEBUGGING
1500 int did_match = 0;
1501#endif
1502
6eb5f6b9
JH
1503 if (UTF) {
1504 while (s < strend) {
1505 if (*s == ch) {
bf93d4cc 1506 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1507 if (regtry(prog, s)) goto got_it;
1508 s += UTF8SKIP(s);
1509 while (s < strend && *s == ch)
1510 s += UTF8SKIP(s);
1511 }
1512 s += UTF8SKIP(s);
1513 }
1514 }
1515 else {
1516 while (s < strend) {
1517 if (*s == ch) {
bf93d4cc 1518 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1519 if (regtry(prog, s)) goto got_it;
1520 s++;
1521 while (s < strend && *s == ch)
1522 s++;
1523 }
1524 s++;
1525 }
1526 }
bf93d4cc
GS
1527 DEBUG_r(did_match ||
1528 PerlIO_printf(Perl_debug_log,
1529 "Did not find anchored character...\n"));
6eb5f6b9
JH
1530 }
1531 /*SUPPRESS 560*/
1532 else if (prog->anchored_substr != Nullsv
1533 || (prog->float_substr != Nullsv
1534 && prog->float_max_offset < strend - s)) {
1535 SV *must = prog->anchored_substr
1536 ? prog->anchored_substr : prog->float_substr;
1537 I32 back_max =
1538 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1539 I32 back_min =
1540 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1541 char *last = HOPc(strend, /* Cannot start after this */
1542 -(I32)(CHR_SVLEN(must)
1543 - (SvTAIL(must) != 0) + back_min));
1544 char *last1; /* Last position checked before */
bf93d4cc
GS
1545#ifdef DEBUGGING
1546 int did_match = 0;
1547#endif
6eb5f6b9
JH
1548
1549 if (s > PL_bostr)
1550 last1 = HOPc(s, -1);
1551 else
1552 last1 = s - 1; /* bogus */
1553
1554 /* XXXX check_substr already used to find `s', can optimize if
1555 check_substr==must. */
1556 scream_pos = -1;
1557 dontbother = end_shift;
1558 strend = HOPc(strend, -dontbother);
1559 while ( (s <= last) &&
1560 ((flags & REXEC_SCREAM)
1561 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1562 end_shift, &scream_pos, 0))
1563 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1564 (unsigned char*)strend, must,
1565 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1566 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1567 if (HOPc(s, -back_max) > last1) {
1568 last1 = HOPc(s, -back_min);
1569 s = HOPc(s, -back_max);
1570 }
1571 else {
1572 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1573
1574 last1 = HOPc(s, -back_min);
1575 s = t;
1576 }
1577 if (UTF) {
1578 while (s <= last1) {
1579 if (regtry(prog, s))
1580 goto got_it;
1581 s += UTF8SKIP(s);
1582 }
1583 }
1584 else {
1585 while (s <= last1) {
1586 if (regtry(prog, s))
1587 goto got_it;
1588 s++;
1589 }
1590 }
1591 }
bf93d4cc
GS
1592 DEBUG_r(did_match ||
1593 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1594 ((must == prog->anchored_substr)
1595 ? "anchored" : "floating"),
1596 PL_colors[0],
1597 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1598 SvPVX(must),
1599 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1600 goto phooey;
1601 }
155aba94 1602 else if ((c = prog->regstclass)) {
66e933ab
GS
1603 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1604 /* don't bother with what can't match */
6eb5f6b9
JH
1605 strend = HOPc(strend, -(minlen - 1));
1606 if (find_byclass(prog, c, s, strend, startpos, 0))
1607 goto got_it;
bf93d4cc 1608 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1609 }
1610 else {
1611 dontbother = 0;
1612 if (prog->float_substr != Nullsv) { /* Trim the end. */
1613 char *last;
d6a28714
JH
1614
1615 if (flags & REXEC_SCREAM) {
1616 last = screaminstr(sv, prog->float_substr, s - strbeg,
1617 end_shift, &scream_pos, 1); /* last one */
1618 if (!last)
1619 last = scream_olds; /* Only one occurence. */
b8c5462f 1620 }
d6a28714
JH
1621 else {
1622 STRLEN len;
1623 char *little = SvPV(prog->float_substr, len);
1624
1625 if (SvTAIL(prog->float_substr)) {
1626 if (memEQ(strend - len + 1, little, len - 1))
1627 last = strend - len + 1;
1628 else if (!PL_multiline)
1629 last = memEQ(strend - len, little, len)
1630 ? strend - len : Nullch;
b8c5462f 1631 else
d6a28714
JH
1632 goto find_last;
1633 } else {
1634 find_last:
1635 if (len)
1636 last = rninstr(s, strend, little, little + len);
b8c5462f 1637 else
d6a28714 1638 last = strend; /* matching `$' */
b8c5462f 1639 }
b8c5462f 1640 }
bf93d4cc
GS
1641 if (last == NULL) {
1642 DEBUG_r(PerlIO_printf(Perl_debug_log,
1643 "%sCan't trim the tail, match fails (should not happen)%s\n",
1644 PL_colors[4],PL_colors[5]));
1645 goto phooey; /* Should not happen! */
1646 }
d6a28714
JH
1647 dontbother = strend - last + prog->float_min_offset;
1648 }
1649 if (minlen && (dontbother < minlen))
1650 dontbother = minlen - 1;
1651 strend -= dontbother; /* this one's always in bytes! */
1652 /* We don't know much -- general case. */
1653 if (UTF) {
1654 for (;;) {
1655 if (regtry(prog, s))
1656 goto got_it;
1657 if (s >= strend)
1658 break;
b8c5462f 1659 s += UTF8SKIP(s);
d6a28714
JH
1660 };
1661 }
1662 else {
1663 do {
1664 if (regtry(prog, s))
1665 goto got_it;
1666 } while (s++ < strend);
1667 }
1668 }
1669
1670 /* Failure. */
1671 goto phooey;
1672
1673got_it:
1674 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1675
1676 if (PL_reg_eval_set) {
1677 /* Preserve the current value of $^R */
1678 if (oreplsv != GvSV(PL_replgv))
1679 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1680 restored, the value remains
1681 the same. */
1682 restore_pos(aTHXo_ 0);
1683 }
1684
1685 /* make sure $`, $&, $', and $digit will work later */
1686 if ( !(flags & REXEC_NOT_FIRST) ) {
1687 if (RX_MATCH_COPIED(prog)) {
1688 Safefree(prog->subbeg);
1689 RX_MATCH_COPIED_off(prog);
1690 }
1691 if (flags & REXEC_COPY_STR) {
1692 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1693
1694 s = savepvn(strbeg, i);
1695 prog->subbeg = s;
1696 prog->sublen = i;
1697 RX_MATCH_COPIED_on(prog);
1698 }
1699 else {
1700 prog->subbeg = strbeg;
1701 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1702 }
1703 }
1704
1705 return 1;
1706
1707phooey:
bf93d4cc
GS
1708 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1709 PL_colors[4],PL_colors[5]));
d6a28714
JH
1710 if (PL_reg_eval_set)
1711 restore_pos(aTHXo_ 0);
1712 return 0;
1713}
1714
1715/*
1716 - regtry - try match at specific point
1717 */
1718STATIC I32 /* 0 failure, 1 success */
1719S_regtry(pTHX_ regexp *prog, char *startpos)
1720{
1721 dTHR;
1722 register I32 i;
1723 register I32 *sp;
1724 register I32 *ep;
1725 CHECKPOINT lastcp;
1726
02db2b7b
IZ
1727#ifdef DEBUGGING
1728 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1729#endif
d6a28714
JH
1730 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1731 MAGIC *mg;
1732
1733 PL_reg_eval_set = RS_init;
1734 DEBUG_r(DEBUG_s(
b900a521
JH
1735 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1736 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1737 ));
e8347627 1738 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1739 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1740 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1741 SAVETMPS;
1742 /* Apparently this is not needed, judging by wantarray. */
e8347627 1743 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1744 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1745
1746 if (PL_reg_sv) {
1747 /* Make $_ available to executed code. */
1748 if (PL_reg_sv != DEFSV) {
1749 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1750 SAVESPTR(DEFSV);
1751 DEFSV = PL_reg_sv;
b8c5462f 1752 }
d6a28714
JH
1753
1754 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1755 && (mg = mg_find(PL_reg_sv, 'g')))) {
1756 /* prepare for quick setting of pos */
1757 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1758 mg = mg_find(PL_reg_sv, 'g');
1759 mg->mg_len = -1;
b8c5462f 1760 }
d6a28714
JH
1761 PL_reg_magic = mg;
1762 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1763 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1764 }
1765 if (!PL_reg_curpm)
0f79a09d 1766 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1767 PL_reg_curpm->op_pmregexp = prog;
1768 PL_reg_oldcurpm = PL_curpm;
1769 PL_curpm = PL_reg_curpm;
1770 if (RX_MATCH_COPIED(prog)) {
1771 /* Here is a serious problem: we cannot rewrite subbeg,
1772 since it may be needed if this match fails. Thus
1773 $` inside (?{}) could fail... */
1774 PL_reg_oldsaved = prog->subbeg;
1775 PL_reg_oldsavedlen = prog->sublen;
1776 RX_MATCH_COPIED_off(prog);
1777 }
1778 else
1779 PL_reg_oldsaved = Nullch;
1780 prog->subbeg = PL_bostr;
1781 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1782 }
1783 prog->startp[0] = startpos - PL_bostr;
1784 PL_reginput = startpos;
1785 PL_regstartp = prog->startp;
1786 PL_regendp = prog->endp;
1787 PL_reglastparen = &prog->lastparen;
1788 prog->lastparen = 0;
1789 PL_regsize = 0;
1790 DEBUG_r(PL_reg_starttry = startpos);
1791 if (PL_reg_start_tmpl <= prog->nparens) {
1792 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1793 if(PL_reg_start_tmp)
1794 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1795 else
1796 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1797 }
1798
1799 /* XXXX What this code is doing here?!!! There should be no need
1800 to do this again and again, PL_reglastparen should take care of
3dd2943c 1801 this! --ilya*/
dafc8851
JH
1802
1803 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1804 * Actually, the code in regcppop() (which Ilya may be meaning by
1805 * PL_reglastparen), does not seem to be needed at all (?!), whereas
1806 * this code *is* needed for the above-mentioned tests to succeed.
1807 * The common theme on those tests seems to be returning null fields
1808 * from matches. --jhi */
1809#if 1
d6a28714
JH
1810 sp = prog->startp;
1811 ep = prog->endp;
1812 if (prog->nparens) {
09e8ae3b 1813 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1814 *++sp = -1;
1815 *++ep = -1;
1816 }
1817 }
dafc8851 1818#endif
02db2b7b 1819 REGCP_SET(lastcp);
d6a28714
JH
1820 if (regmatch(prog->program + 1)) {
1821 prog->endp[0] = PL_reginput - PL_bostr;
1822 return 1;
1823 }
02db2b7b 1824 REGCP_UNWIND(lastcp);
d6a28714
JH
1825 return 0;
1826}
1827
02db2b7b
IZ
1828#define RE_UNWIND_BRANCH 1
1829#define RE_UNWIND_BRANCHJ 2
1830
1831union re_unwind_t;
1832
1833typedef struct { /* XX: makes sense to enlarge it... */
1834 I32 type;
1835 I32 prev;
1836 CHECKPOINT lastcp;
1837} re_unwind_generic_t;
1838
1839typedef struct {
1840 I32 type;
1841 I32 prev;
1842 CHECKPOINT lastcp;
1843 I32 lastparen;
1844 regnode *next;
1845 char *locinput;
1846 I32 nextchr;
1847#ifdef DEBUGGING
1848 int regindent;
1849#endif
1850} re_unwind_branch_t;
1851
1852typedef union re_unwind_t {
1853 I32 type;
1854 re_unwind_generic_t generic;
1855 re_unwind_branch_t branch;
1856} re_unwind_t;
1857
d6a28714
JH
1858/*
1859 - regmatch - main matching routine
1860 *
1861 * Conceptually the strategy is simple: check to see whether the current
1862 * node matches, call self recursively to see whether the rest matches,
1863 * and then act accordingly. In practice we make some effort to avoid
1864 * recursion, in particular by going through "ordinary" nodes (that don't
1865 * need to know whether the rest of the match failed) by a loop instead of
1866 * by recursion.
1867 */
1868/* [lwall] I've hoisted the register declarations to the outer block in order to
1869 * maybe save a little bit of pushing and popping on the stack. It also takes
1870 * advantage of machines that use a register save mask on subroutine entry.
1871 */
1872STATIC I32 /* 0 failure, 1 success */
1873S_regmatch(pTHX_ regnode *prog)
1874{
1875 dTHR;
1876 register regnode *scan; /* Current node. */
1877 regnode *next; /* Next node. */
1878 regnode *inner; /* Next node in internal branch. */
1879 register I32 nextchr; /* renamed nextchr - nextchar colides with
1880 function of same name */
1881 register I32 n; /* no or next */
1882 register I32 ln; /* len or last */
1883 register char *s; /* operand or save */
1884 register char *locinput = PL_reginput;
1885 register I32 c1, c2, paren; /* case fold search, parenth */
1886 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1887 I32 unwind = 0;
1888 I32 firstcp = PL_savestack_ix;
1889
d6a28714
JH
1890#ifdef DEBUGGING
1891 PL_regindent++;
1892#endif
1893
1894 /* Note that nextchr is a byte even in UTF */
1895 nextchr = UCHARAT(locinput);
1896 scan = prog;
1897 while (scan != NULL) {
1898#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1899#if 1
d6a28714
JH
1900# define sayYES goto yes
1901# define sayNO goto no
7821416a
IZ
1902# define sayYES_FINAL goto yes_final
1903# define sayYES_LOUD goto yes_loud
1904# define sayNO_FINAL goto no_final
1905# define sayNO_SILENT goto do_no
d6a28714
JH
1906# define saySAME(x) if (x) goto yes; else goto no
1907# define REPORT_CODE_OFF 24
1908#else
1909# define sayYES return 1
1910# define sayNO return 0
7821416a
IZ
1911# define sayYES_FINAL return 1
1912# define sayYES_LOUD return 1
1913# define sayNO_FINAL return 0
1914# define sayNO_SILENT return 0
d6a28714
JH
1915# define saySAME(x) return x
1916#endif
1917 DEBUG_r( {
1918 SV *prop = sv_newmortal();
1919 int docolor = *PL_colors[0];
1920 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1921 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1922 /* The part of the string before starttry has one color
1923 (pref0_len chars), between starttry and current
1924 position another one (pref_len - pref0_len chars),
1925 after the current position the third one.
1926 We assume that pref0_len <= pref_len, otherwise we
1927 decrease pref0_len. */
1928 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1929 ? (5 + taill) - l : locinput - PL_bostr);
1930 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1931
1932 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1933 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1934 ? (5 + taill) - pref_len : PL_regeol - locinput);
1935 if (pref0_len < 0)
1936 pref0_len = 0;
1937 if (pref0_len > pref_len)
1938 pref0_len = pref_len;
1939 regprop(prop, scan);
1940 PerlIO_printf(Perl_debug_log,
b900a521
JH
1941 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1942 (IV)(locinput - PL_bostr),
d6a28714
JH
1943 PL_colors[4], pref0_len,
1944 locinput - pref_len, PL_colors[5],
1945 PL_colors[2], pref_len - pref0_len,
1946 locinput - pref_len + pref0_len, PL_colors[3],
1947 (docolor ? "" : "> <"),
1948 PL_colors[0], l, locinput, PL_colors[1],
1949 15 - l - pref_len + 1,
1950 "",
b900a521 1951 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1952 SvPVX(prop));
1953 } );
1954
1955 next = scan + NEXT_OFF(scan);
1956 if (next == scan)
1957 next = NULL;
1958
1959 switch (OP(scan)) {
1960 case BOL:
1961 if (locinput == PL_bostr
1962 ? PL_regprev == '\n'
1963 : (PL_multiline &&
1964 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1965 {
1966 /* regtill = regbol; */
b8c5462f
JH
1967 break;
1968 }
d6a28714
JH
1969 sayNO;
1970 case MBOL:
1971 if (locinput == PL_bostr
1972 ? PL_regprev == '\n'
1973 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1974 {
b8c5462f
JH
1975 break;
1976 }
d6a28714
JH
1977 sayNO;
1978 case SBOL:
c2a73568 1979 if (locinput == PL_bostr)
b8c5462f 1980 break;
d6a28714
JH
1981 sayNO;
1982 case GPOS:
1983 if (locinput == PL_reg_ganch)
1984 break;
1985 sayNO;
1986 case EOL:
1987 if (PL_multiline)
1988 goto meol;
1989 else
1990 goto seol;
1991 case MEOL:
1992 meol:
1993 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1994 sayNO;
b8c5462f 1995 break;
d6a28714
JH
1996 case SEOL:
1997 seol:
1998 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1999 sayNO;
d6a28714 2000 if (PL_regeol - locinput > 1)
b8c5462f 2001 sayNO;
b8c5462f 2002 break;
d6a28714
JH
2003 case EOS:
2004 if (PL_regeol != locinput)
b8c5462f 2005 sayNO;
d6a28714
JH
2006 break;
2007 case SANYUTF8:
b8c5462f 2008 if (nextchr & 0x80) {
b8c5462f 2009 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2010 if (locinput > PL_regeol)
2011 sayNO;
b8c5462f
JH
2012 nextchr = UCHARAT(locinput);
2013 break;
2014 }
d6a28714 2015 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2016 sayNO;
b8c5462f 2017 nextchr = UCHARAT(++locinput);
a0d0e21e 2018 break;
d6a28714
JH
2019 case SANY:
2020 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2021 sayNO;
2022 nextchr = UCHARAT(++locinput);
b85d18e9 2023 break;
d6a28714 2024 case ANYUTF8:
a0ed51b3 2025 if (nextchr & 0x80) {
b8c5462f 2026 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2027 if (locinput > PL_regeol)
2028 sayNO;
a0ed51b3
LW
2029 nextchr = UCHARAT(locinput);
2030 break;
2031 }
155aba94 2032 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2033 sayNO;
2034 nextchr = UCHARAT(++locinput);
2035 break;
d6a28714 2036 case REG_ANY:
155aba94 2037 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2038 sayNO;
76e3520e 2039 nextchr = UCHARAT(++locinput);
a0d0e21e 2040 break;
d6a28714 2041 case EXACT:
cd439c50
IZ
2042 s = STRING(scan);
2043 ln = STR_LEN(scan);
d6a28714
JH
2044 /* Inline the first character, for speed. */
2045 if (UCHARAT(s) != nextchr)
2046 sayNO;
2047 if (PL_regeol - locinput < ln)
2048 sayNO;
2049 if (ln > 1 && memNE(s, locinput, ln))
2050 sayNO;
2051 locinput += ln;
2052 nextchr = UCHARAT(locinput);
2053 break;
2054 case EXACTFL:
b8c5462f
JH
2055 PL_reg_flags |= RF_tainted;
2056 /* FALL THROUGH */
d6a28714 2057 case EXACTF:
cd439c50
IZ
2058 s = STRING(scan);
2059 ln = STR_LEN(scan);
d6a28714
JH
2060
2061 if (UTF) {
2062 char *l = locinput;
2063 char *e = s + ln;
2064 c1 = OP(scan) == EXACTF;
2065 while (s < e) {
2066 if (l >= PL_regeol)
2067 sayNO;
667bb95a
JH
2068 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2069 (c1 ?
2070 toLOWER_utf8((U8*)l) :
2071 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2072 {
2073 sayNO;
2074 }
2075 s += UTF8SKIP(s);
2076 l += UTF8SKIP(l);
b8c5462f 2077 }
d6a28714 2078 locinput = l;
a0ed51b3
LW
2079 nextchr = UCHARAT(locinput);
2080 break;
2081 }
d6a28714
JH
2082
2083 /* Inline the first character, for speed. */
2084 if (UCHARAT(s) != nextchr &&
2085 UCHARAT(s) != ((OP(scan) == EXACTF)
2086 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2087 sayNO;
d6a28714 2088 if (PL_regeol - locinput < ln)
b8c5462f 2089 sayNO;
d6a28714
JH
2090 if (ln > 1 && (OP(scan) == EXACTF
2091 ? ibcmp(s, locinput, ln)
2092 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2093 sayNO;
d6a28714
JH
2094 locinput += ln;
2095 nextchr = UCHARAT(locinput);
a0d0e21e 2096 break;
d6a28714 2097 case ANYOFUTF8:
d6a28714 2098 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2099 sayNO;
d6a28714
JH
2100 if (locinput >= PL_regeol)
2101 sayNO;
2102 locinput += PL_utf8skip[nextchr];
2103 nextchr = UCHARAT(locinput);
2104 break;
2105 case ANYOF:
d6a28714 2106 if (nextchr < 0)
b8c5462f 2107 nextchr = UCHARAT(locinput);
936ed897 2108 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2109 sayNO;
2110 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2111 sayNO;
b8c5462f
JH
2112 nextchr = UCHARAT(++locinput);
2113 break;
d6a28714 2114 case ALNUML:
b8c5462f
JH
2115 PL_reg_flags |= RF_tainted;
2116 /* FALL THROUGH */
d6a28714 2117 case ALNUM:
b8c5462f 2118 if (!nextchr)
4633a7c4 2119 sayNO;
d6a28714
JH
2120 if (!(OP(scan) == ALNUM
2121 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2122 sayNO;
2123 nextchr = UCHARAT(++locinput);
bbce6d69 2124 break;
d6a28714 2125 case ALNUMLUTF8:
3280af22 2126 PL_reg_flags |= RF_tainted;
bbce6d69 2127 /* FALL THROUGH */
d6a28714 2128 case ALNUMUTF8:
b8c5462f
JH
2129 if (!nextchr)
2130 sayNO;
2131 if (nextchr & 0x80) {
d6a28714
JH
2132 if (!(OP(scan) == ALNUMUTF8
2133 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2134 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2135 {
2136 sayNO;
a0ed51b3 2137 }
b8c5462f 2138 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2139 nextchr = UCHARAT(locinput);
2140 break;
2141 }
d6a28714
JH
2142 if (!(OP(scan) == ALNUMUTF8
2143 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2144 sayNO;
b8c5462f 2145 nextchr = UCHARAT(++locinput);
a0d0e21e 2146 break;
d6a28714 2147 case NALNUML:
b8c5462f
JH
2148 PL_reg_flags |= RF_tainted;
2149 /* FALL THROUGH */
d6a28714
JH
2150 case NALNUM:
2151 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2152 sayNO;
d6a28714
JH
2153 if (OP(scan) == NALNUM
2154 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2155 sayNO;
b8c5462f 2156 nextchr = UCHARAT(++locinput);
a0ed51b3 2157 break;
d6a28714 2158 case NALNUMLUTF8:
b8c5462f
JH
2159 PL_reg_flags |= RF_tainted;
2160 /* FALL THROUGH */
d6a28714 2161 case NALNUMUTF8:
3280af22 2162 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2163 sayNO;
b8c5462f 2164 if (nextchr & 0x80) {
d6a28714
JH
2165 if (OP(scan) == NALNUMUTF8
2166 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2167 : isALNUM_LC_utf8((U8*)locinput))
2168 {
b8c5462f 2169 sayNO;
d6a28714 2170 }
b8c5462f
JH
2171 locinput += PL_utf8skip[nextchr];
2172 nextchr = UCHARAT(locinput);
2173 break;
2174 }
d6a28714
JH
2175 if (OP(scan) == NALNUMUTF8
2176 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2177 sayNO;
76e3520e 2178 nextchr = UCHARAT(++locinput);
a0d0e21e 2179 break;
d6a28714
JH
2180 case BOUNDL:
2181 case NBOUNDL:
3280af22 2182 PL_reg_flags |= RF_tainted;
bbce6d69 2183 /* FALL THROUGH */
d6a28714
JH
2184 case BOUND:
2185 case NBOUND:
2186 /* was last char in word? */
2187 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2188 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2189 ln = isALNUM(ln);
2190 n = isALNUM(nextchr);
2191 }
2192 else {
2193 ln = isALNUM_LC(ln);
2194 n = isALNUM_LC(nextchr);
2195 }
2196 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2197 sayNO;
a0d0e21e 2198 break;
d6a28714
JH
2199 case BOUNDLUTF8:
2200 case NBOUNDLUTF8:
a0ed51b3
LW
2201 PL_reg_flags |= RF_tainted;
2202 /* FALL THROUGH */
d6a28714
JH
2203 case BOUNDUTF8:
2204 case NBOUNDUTF8:
2205 /* was last char in word? */
667bb95a
JH
2206 if (locinput == PL_regbol)
2207 ln = PL_regprev;
2208 else {
2209 U8 *r = reghop((U8*)locinput, -1);
2210
2211 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2212 }
d6a28714
JH
2213 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2214 ln = isALNUM_uni(ln);
2215 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2216 }
d6a28714
JH
2217 else {
2218 ln = isALNUM_LC_uni(ln);
2219 n = isALNUM_LC_utf8((U8*)locinput);
2220 }
2221 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2222 sayNO;
a0ed51b3 2223 break;
d6a28714 2224 case SPACEL:
3280af22 2225 PL_reg_flags |= RF_tainted;
bbce6d69 2226 /* FALL THROUGH */
d6a28714 2227 case SPACE:
9442cb0e 2228 if (!nextchr)
4633a7c4 2229 sayNO;
d6a28714
JH
2230 if (!(OP(scan) == SPACE
2231 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2232 sayNO;
76e3520e 2233 nextchr = UCHARAT(++locinput);
a0d0e21e 2234 break;
d6a28714 2235 case SPACELUTF8:
a0ed51b3
LW
2236 PL_reg_flags |= RF_tainted;
2237 /* FALL THROUGH */
d6a28714 2238 case SPACEUTF8:
9442cb0e 2239 if (!nextchr)
a0ed51b3
LW
2240 sayNO;
2241 if (nextchr & 0x80) {
d6a28714 2242 if (!(OP(scan) == SPACEUTF8
9442cb0e 2243 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2244 : isSPACE_LC_utf8((U8*)locinput)))
2245 {
a0ed51b3 2246 sayNO;
d6a28714 2247 }
6f06b55f 2248 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2249 nextchr = UCHARAT(locinput);
2250 break;
2251 }
d6a28714
JH
2252 if (!(OP(scan) == SPACEUTF8
2253 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2254 sayNO;
2255 nextchr = UCHARAT(++locinput);
2256 break;
d6a28714 2257 case NSPACEL:
3280af22 2258 PL_reg_flags |= RF_tainted;
bbce6d69 2259 /* FALL THROUGH */
d6a28714 2260 case NSPACE:
9442cb0e 2261 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2262 sayNO;
9442cb0e 2263 if (OP(scan) == NSPACE
d6a28714 2264 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2265 sayNO;
b8c5462f 2266 nextchr = UCHARAT(++locinput);
a0d0e21e 2267 break;
d6a28714 2268 case NSPACELUTF8:
a0ed51b3
LW
2269 PL_reg_flags |= RF_tainted;
2270 /* FALL THROUGH */
d6a28714 2271 case NSPACEUTF8:
9442cb0e 2272 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2273 sayNO;
2274 if (nextchr & 0x80) {
d6a28714 2275 if (OP(scan) == NSPACEUTF8
9442cb0e 2276 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2277 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2278 {
2279 sayNO;
2280 }
2281 locinput += PL_utf8skip[nextchr];
2282 nextchr = UCHARAT(locinput);
2283 break;
a0ed51b3 2284 }
d6a28714
JH
2285 if (OP(scan) == NSPACEUTF8
2286 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2287 sayNO;
76e3520e 2288 nextchr = UCHARAT(++locinput);
a0d0e21e 2289 break;
d6a28714 2290 case DIGITL:
a0ed51b3
LW
2291 PL_reg_flags |= RF_tainted;
2292 /* FALL THROUGH */
d6a28714 2293 case DIGIT:
9442cb0e 2294 if (!nextchr)
a0ed51b3 2295 sayNO;
d6a28714
JH
2296 if (!(OP(scan) == DIGIT
2297 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2298 sayNO;
76e3520e 2299 nextchr = UCHARAT(++locinput);
a0d0e21e 2300 break;
d6a28714 2301 case DIGITLUTF8:
a0ed51b3
LW
2302 PL_reg_flags |= RF_tainted;
2303 /* FALL THROUGH */
d6a28714 2304 case DIGITUTF8:
a0ed51b3
LW
2305 if (!nextchr)
2306 sayNO;
2307 if (nextchr & 0x80) {
9442cb0e
GS
2308 if (!(OP(scan) == DIGITUTF8
2309 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2310 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2311 {
a0ed51b3 2312 sayNO;
dfe13c55 2313 }
6f06b55f 2314 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2315 nextchr = UCHARAT(locinput);
2316 break;
2317 }
9442cb0e
GS
2318 if (!(OP(scan) == DIGITUTF8
2319 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2320 sayNO;
2321 nextchr = UCHARAT(++locinput);
2322 break;
d6a28714 2323 case NDIGITL:
b8c5462f
JH
2324 PL_reg_flags |= RF_tainted;
2325 /* FALL THROUGH */
d6a28714 2326 case NDIGIT:
9442cb0e 2327 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2328 sayNO;
9442cb0e 2329 if (OP(scan) == NDIGIT
d6a28714 2330 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2331 sayNO;
76e3520e 2332 nextchr = UCHARAT(++locinput);
a0d0e21e 2333 break;
d6a28714 2334 case NDIGITLUTF8:
b8c5462f
JH
2335 PL_reg_flags |= RF_tainted;
2336 /* FALL THROUGH */
d6a28714 2337 case NDIGITUTF8:
b8c5462f
JH
2338 if (!nextchr && locinput >= PL_regeol)
2339 sayNO;
a0ed51b3 2340 if (nextchr & 0x80) {
9442cb0e
GS
2341 if (OP(scan) == NDIGITUTF8
2342 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2343 : isDIGIT_LC_utf8((U8*)locinput))
2344 {
a0ed51b3 2345 sayNO;
9442cb0e 2346 }
6f06b55f 2347 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2348 nextchr = UCHARAT(locinput);
2349 break;
2350 }
9442cb0e
GS
2351 if (OP(scan) == NDIGITUTF8
2352 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2353 sayNO;
2354 nextchr = UCHARAT(++locinput);
2355 break;
2356 case CLUMP:
dfe13c55 2357 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2358 sayNO;
6f06b55f 2359 locinput += PL_utf8skip[nextchr];
dfe13c55 2360 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2361 locinput += UTF8SKIP(locinput);
2362 if (locinput > PL_regeol)
2363 sayNO;
2364 nextchr = UCHARAT(locinput);
2365 break;
c8756f30 2366 case REFFL:
3280af22 2367 PL_reg_flags |= RF_tainted;
c8756f30 2368 /* FALL THROUGH */
c277df42 2369 case REF:
c8756f30 2370 case REFF:
c277df42 2371 n = ARG(scan); /* which paren pair */
cf93c79d 2372 ln = PL_regstartp[n];
2c2d71f5 2373 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2374 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2375 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2376 if (ln == PL_regendp[n])
a0d0e21e 2377 break;
a0ed51b3 2378
cf93c79d 2379 s = PL_bostr + ln;
a0ed51b3
LW
2380 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2381 char *l = locinput;
cf93c79d 2382 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2383 /*
2384 * Note that we can't do the "other character" lookup trick as
2385 * in the 8-bit case (no pun intended) because in Unicode we
2386 * have to map both upper and title case to lower case.
2387 */
2388 if (OP(scan) == REFF) {
2389 while (s < e) {
2390 if (l >= PL_regeol)
2391 sayNO;
dfe13c55 2392 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2393 sayNO;
2394 s += UTF8SKIP(s);
2395 l += UTF8SKIP(l);
2396 }
2397 }
2398 else {
2399 while (s < e) {
2400 if (l >= PL_regeol)
2401 sayNO;
dfe13c55 2402 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2403 sayNO;
2404 s += UTF8SKIP(s);
2405 l += UTF8SKIP(l);
2406 }
2407 }
2408 locinput = l;
2409 nextchr = UCHARAT(locinput);
2410 break;
2411 }
2412
a0d0e21e 2413 /* Inline the first character, for speed. */
76e3520e 2414 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2415 (OP(scan) == REF ||
2416 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2417 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2418 sayNO;
cf93c79d 2419 ln = PL_regendp[n] - ln;
3280af22 2420 if (locinput + ln > PL_regeol)
4633a7c4 2421 sayNO;
c8756f30
AK
2422 if (ln > 1 && (OP(scan) == REF
2423 ? memNE(s, locinput, ln)
2424 : (OP(scan) == REFF
2425 ? ibcmp(s, locinput, ln)
2426 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2427 sayNO;
a0d0e21e 2428 locinput += ln;
76e3520e 2429 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2430 break;
2431
2432 case NOTHING:
c277df42 2433 case TAIL:
a0d0e21e
LW
2434 break;
2435 case BACK:
2436 break;
c277df42
IZ
2437 case EVAL:
2438 {
2439 dSP;
533c011a 2440 OP_4tree *oop = PL_op;
3280af22
NIS
2441 COP *ocurcop = PL_curcop;
2442 SV **ocurpad = PL_curpad;
c277df42
IZ
2443 SV *ret;
2444
2445 n = ARG(scan);
533c011a 2446 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2447 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2448 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2449 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2450
cea2e8a9 2451 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2452 SPAGAIN;
2453 ret = POPs;
2454 PUTBACK;
2455
0f5d15d6
IZ
2456 PL_op = oop;
2457 PL_curpad = ocurpad;
2458 PL_curcop = ocurcop;
c277df42 2459 if (logical) {
0f5d15d6
IZ
2460 if (logical == 2) { /* Postponed subexpression. */
2461 regexp *re;
22c35a8c 2462 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2463 re_cc_state state;
0f5d15d6
IZ
2464 CHECKPOINT cp, lastcp;
2465
2466 if(SvROK(ret) || SvRMAGICAL(ret)) {
2467 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2468
2469 if(SvMAGICAL(sv))
2470 mg = mg_find(sv, 'r');
2471 }
2472 if (mg) {
2473 re = (regexp *)mg->mg_obj;
df0003d4 2474 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2475 }
2476 else {
2477 STRLEN len;
2478 char *t = SvPV(ret, len);
2479 PMOP pm;
2480 char *oprecomp = PL_regprecomp;
2481 I32 osize = PL_regsize;
2482 I32 onpar = PL_regnpar;
2483
2484 pm.op_pmflags = 0;
393fec97 2485 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2486 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2487 if (!(SvFLAGS(ret)
2488 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2489 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2490 PL_regprecomp = oprecomp;
2491 PL_regsize = osize;
2492 PL_regnpar = onpar;
2493 }
2494 DEBUG_r(
2495 PerlIO_printf(Perl_debug_log,
2496 "Entering embedded `%s%.60s%s%s'\n",
2497 PL_colors[0],
2498 re->precomp,
2499 PL_colors[1],
2500 (strlen(re->precomp) > 60 ? "..." : ""))
2501 );
2502 state.node = next;
2503 state.prev = PL_reg_call_cc;
2504 state.cc = PL_regcc;
2505 state.re = PL_reg_re;
2506
2ab05381 2507 PL_regcc = 0;
0f5d15d6
IZ
2508
2509 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2510 REGCP_SET(lastcp);
0f5d15d6
IZ
2511 cache_re(re);
2512 state.ss = PL_savestack_ix;
2513 *PL_reglastparen = 0;
2514 PL_reg_call_cc = &state;
2515 PL_reginput = locinput;
2c2d71f5
JH
2516
2517 /* XXXX This is too dramatic a measure... */
2518 PL_reg_maxiter = 0;
2519
0f5d15d6 2520 if (regmatch(re->program + 1)) {
2c914db6
IZ
2521 /* Even though we succeeded, we need to restore
2522 global variables, since we may be wrapped inside
2523 SUSPEND, thus the match may be not finished yet. */
2524
2525 /* XXXX Do this only if SUSPENDed? */
2526 PL_reg_call_cc = state.prev;
2527 PL_regcc = state.cc;
2528 PL_reg_re = state.re;
2529 cache_re(PL_reg_re);
2530
2531 /* XXXX This is too dramatic a measure... */
2532 PL_reg_maxiter = 0;
2533
2534 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2535 ReREFCNT_dec(re);
2536 regcpblow(cp);
2537 sayYES;
2538 }
0f5d15d6 2539 ReREFCNT_dec(re);
02db2b7b 2540 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2541 regcppop();
2542 PL_reg_call_cc = state.prev;
2543 PL_regcc = state.cc;
2544 PL_reg_re = state.re;
d3790889 2545 cache_re(PL_reg_re);
2c2d71f5
JH
2546
2547 /* XXXX This is too dramatic a measure... */
2548 PL_reg_maxiter = 0;
2549
0f5d15d6
IZ
2550 sayNO;
2551 }
c277df42 2552 sw = SvTRUE(ret);
0f5d15d6 2553 logical = 0;
a0ed51b3
LW
2554 }
2555 else
3280af22 2556 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2557 break;
2558 }
a0d0e21e 2559 case OPEN:
c277df42 2560 n = ARG(scan); /* which paren pair */
3280af22
NIS
2561 PL_reg_start_tmp[n] = locinput;
2562 if (n > PL_regsize)
2563 PL_regsize = n;
a0d0e21e
LW
2564 break;
2565 case CLOSE:
c277df42 2566 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2567 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2568 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2569 if (n > *PL_reglastparen)
2570 *PL_reglastparen = n;
a0d0e21e 2571 break;
c277df42
IZ
2572 case GROUPP:
2573 n = ARG(scan); /* which paren pair */
cf93c79d 2574 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2575 break;
2576 case IFTHEN:
2c2d71f5 2577 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2578 if (sw)
2579 next = NEXTOPER(NEXTOPER(scan));
2580 else {
2581 next = scan + ARG(scan);
2582 if (OP(next) == IFTHEN) /* Fake one. */
2583 next = NEXTOPER(NEXTOPER(next));
2584 }
2585 break;
2586 case LOGICAL:
0f5d15d6 2587 logical = scan->flags;
c277df42 2588 break;
2ab05381
IZ
2589/*******************************************************************
2590 PL_regcc contains infoblock about the innermost (...)* loop, and
2591 a pointer to the next outer infoblock.
2592
2593 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2594
2595 1) After matching X, regnode for CURLYX is processed;
2596
2597 2) This regnode creates infoblock on the stack, and calls
2598 regmatch() recursively with the starting point at WHILEM node;
2599
2600 3) Each hit of WHILEM node tries to match A and Z (in the order
2601 depending on the current iteration, min/max of {min,max} and
2602 greediness). The information about where are nodes for "A"
2603 and "Z" is read from the infoblock, as is info on how many times "A"
2604 was already matched, and greediness.
2605
2606 4) After A matches, the same WHILEM node is hit again.
2607
2608 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2609 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2610 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2611 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2612 of the external loop.
2613
2614 Currently present infoblocks form a tree with a stem formed by PL_curcc
2615 and whatever it mentions via ->next, and additional attached trees
2616 corresponding to temporarily unset infoblocks as in "5" above.
2617
2618 In the following picture infoblocks for outer loop of
2619 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2620 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2621 infoblocks are drawn below the "reset" infoblock.
2622
2623 In fact in the picture below we do not show failed matches for Z and T
2624 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2625 more obvious *why* one needs to *temporary* unset infoblocks.]
2626
2627 Matched REx position InfoBlocks Comment
2628 (Y(A)*?Z)*?T x
2629 Y(A)*?Z)*?T x <- O
2630 Y (A)*?Z)*?T x <- O
2631 Y A)*?Z)*?T x <- O <- I
2632 YA )*?Z)*?T x <- O <- I
2633 YA A)*?Z)*?T x <- O <- I
2634 YAA )*?Z)*?T x <- O <- I
2635 YAA Z)*?T x <- O # Temporary unset I
2636 I
2637
2638 YAAZ Y(A)*?Z)*?T x <- O
2639 I
2640
2641 YAAZY (A)*?Z)*?T x <- O
2642 I
2643
2644 YAAZY A)*?Z)*?T x <- O <- I
2645 I
2646
2647 YAAZYA )*?Z)*?T x <- O <- I
2648 I
2649
2650 YAAZYA Z)*?T x <- O # Temporary unset I
2651 I,I
2652
2653 YAAZYAZ )*?T x <- O
2654 I,I
2655
2656 YAAZYAZ T x # Temporary unset O
2657 O
2658 I,I
2659
2660 YAAZYAZT x
2661 O
2662 I,I
2663 *******************************************************************/
a0d0e21e
LW
2664 case CURLYX: {
2665 CURCUR cc;
3280af22 2666 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2667 /* No need to save/restore up to this paren */
2668 I32 parenfloor = scan->flags;
c277df42
IZ
2669
2670 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2671 next += ARG(next);
3280af22
NIS
2672 cc.oldcc = PL_regcc;
2673 PL_regcc = &cc;
cb434fcc
IZ
2674 /* XXXX Probably it is better to teach regpush to support
2675 parenfloor > PL_regsize... */
2676 if (parenfloor > *PL_reglastparen)
2677 parenfloor = *PL_reglastparen; /* Pessimization... */
2678 cc.parenfloor = parenfloor;
a0d0e21e
LW
2679 cc.cur = -1;
2680 cc.min = ARG1(scan);
2681 cc.max = ARG2(scan);
c277df42 2682 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2683 cc.next = next;
2684 cc.minmod = minmod;
2685 cc.lastloc = 0;
3280af22 2686 PL_reginput = locinput;
a0d0e21e
LW
2687 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2688 regcpblow(cp);
3280af22 2689 PL_regcc = cc.oldcc;
4633a7c4 2690 saySAME(n);
a0d0e21e
LW
2691 }
2692 /* NOT REACHED */
2693 case WHILEM: {
2694 /*
2695 * This is really hard to understand, because after we match
2696 * what we're trying to match, we must make sure the rest of
2c2d71f5 2697 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2698 * to go back UP the parse tree by recursing ever deeper. And
2699 * if it fails, we have to reset our parent's current state
2700 * that we can try again after backing off.
2701 */
2702
c277df42 2703 CHECKPOINT cp, lastcp;
3280af22 2704 CURCUR* cc = PL_regcc;
c277df42
IZ
2705 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2706
4633a7c4 2707 n = cc->cur + 1; /* how many we know we matched */
3280af22 2708 PL_reginput = locinput;
a0d0e21e 2709
c277df42
IZ
2710 DEBUG_r(
2711 PerlIO_printf(Perl_debug_log,
2712 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2713 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2714 (long)n, (long)cc->min,
2715 (long)cc->max, (long)cc)
2716 );
4633a7c4 2717
a0d0e21e
LW
2718 /* If degenerate scan matches "", assume scan done. */
2719
579cf2c3 2720 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2721 PL_regcc = cc->oldcc;
2ab05381
IZ
2722 if (PL_regcc)
2723 ln = PL_regcc->cur;
c277df42 2724 DEBUG_r(
c3464db5
DD
2725 PerlIO_printf(Perl_debug_log,
2726 "%*s empty match detected, try continuation...\n",
3280af22 2727 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2728 );
a0d0e21e 2729 if (regmatch(cc->next))
4633a7c4 2730 sayYES;
2ab05381
IZ
2731 if (PL_regcc)
2732 PL_regcc->cur = ln;
3280af22 2733 PL_regcc = cc;
4633a7c4 2734 sayNO;
a0d0e21e
LW
2735 }
2736
2737 /* First just match a string of min scans. */
2738
2739 if (n < cc->min) {
2740 cc->cur = n;
2741 cc->lastloc = locinput;
4633a7c4
LW
2742 if (regmatch(cc->scan))
2743 sayYES;
2744 cc->cur = n - 1;
c277df42 2745 cc->lastloc = lastloc;
4633a7c4 2746 sayNO;
a0d0e21e
LW
2747 }
2748
2c2d71f5
JH
2749 if (scan->flags) {
2750 /* Check whether we already were at this position.
2751 Postpone detection until we know the match is not
2752 *that* much linear. */
2753 if (!PL_reg_maxiter) {
2754 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2755 PL_reg_leftiter = PL_reg_maxiter;
2756 }
2757 if (PL_reg_leftiter-- == 0) {
2758 I32 size = (PL_reg_maxiter + 7)/8;
2759 if (PL_reg_poscache) {
2760 if (PL_reg_poscache_size < size) {
2761 Renew(PL_reg_poscache, size, char);
2762 PL_reg_poscache_size = size;
2763 }
2764 Zero(PL_reg_poscache, size, char);
2765 }
2766 else {
2767 PL_reg_poscache_size = size;
2768 Newz(29, PL_reg_poscache, size, char);
2769 }
2770 DEBUG_r(
2771 PerlIO_printf(Perl_debug_log,
2772 "%sDetected a super-linear match, switching on caching%s...\n",
2773 PL_colors[4], PL_colors[5])
2774 );
2775 }
2776 if (PL_reg_leftiter < 0) {
2777 I32 o = locinput - PL_bostr, b;
2778
2779 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2780 b = o % 8;
2781 o /= 8;
2782 if (PL_reg_poscache[o] & (1<<b)) {
2783 DEBUG_r(
2784 PerlIO_printf(Perl_debug_log,
2785 "%*s already tried at this position...\n",
2786 REPORT_CODE_OFF+PL_regindent*2, "")
2787 );
7821416a 2788 sayNO_SILENT;
2c2d71f5
JH
2789 }
2790 PL_reg_poscache[o] |= (1<<b);
2791 }
2792 }
2793
a0d0e21e
LW
2794 /* Prefer next over scan for minimal matching. */
2795
2796 if (cc->minmod) {
3280af22 2797 PL_regcc = cc->oldcc;
2ab05381
IZ
2798 if (PL_regcc)
2799 ln = PL_regcc->cur;
5f05dabc 2800 cp = regcppush(cc->parenfloor);
02db2b7b 2801 REGCP_SET(lastcp);
5f05dabc 2802 if (regmatch(cc->next)) {
c277df42 2803 regcpblow(cp);
4633a7c4 2804 sayYES; /* All done. */
5f05dabc 2805 }
02db2b7b 2806 REGCP_UNWIND(lastcp);
5f05dabc 2807 regcppop();
2ab05381
IZ
2808 if (PL_regcc)
2809 PL_regcc->cur = ln;
3280af22 2810 PL_regcc = cc;
a0d0e21e 2811
c277df42 2812 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2813 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2814 && !(PL_reg_flags & RF_warned)) {
2815 PL_reg_flags |= RF_warned;
e476b1b5 2816 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2817 "Complex regular subexpression recursion",
2818 REG_INFTY - 1);
c277df42 2819 }
4633a7c4 2820 sayNO;
c277df42 2821 }
a687059c 2822
c277df42 2823 DEBUG_r(
c3464db5
DD
2824 PerlIO_printf(Perl_debug_log,
2825 "%*s trying longer...\n",
3280af22 2826 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2827 );
a0d0e21e 2828 /* Try scanning more and see if it helps. */
3280af22 2829 PL_reginput = locinput;
a0d0e21e
LW
2830 cc->cur = n;
2831 cc->lastloc = locinput;
5f05dabc 2832 cp = regcppush(cc->parenfloor);
02db2b7b 2833 REGCP_SET(lastcp);
5f05dabc 2834 if (regmatch(cc->scan)) {
c277df42 2835 regcpblow(cp);
4633a7c4 2836 sayYES;
5f05dabc 2837 }
02db2b7b 2838 REGCP_UNWIND(lastcp);
5f05dabc 2839 regcppop();
4633a7c4 2840 cc->cur = n - 1;
c277df42 2841 cc->lastloc = lastloc;
4633a7c4 2842 sayNO;
a0d0e21e
LW
2843 }
2844
2845 /* Prefer scan over next for maximal matching. */
2846
2847 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2848 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2849 cc->cur = n;
2850 cc->lastloc = locinput;
02db2b7b 2851 REGCP_SET(lastcp);
5f05dabc 2852 if (regmatch(cc->scan)) {
c277df42 2853 regcpblow(cp);
4633a7c4 2854 sayYES;
5f05dabc 2855 }
02db2b7b 2856 REGCP_UNWIND(lastcp);
a0d0e21e 2857 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2858 PL_reginput = locinput;
c277df42 2859 DEBUG_r(
c3464db5
DD
2860 PerlIO_printf(Perl_debug_log,
2861 "%*s failed, try continuation...\n",
3280af22 2862 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2863 );
2864 }
e476b1b5 2865 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2866 && !(PL_reg_flags & RF_warned)) {
3280af22 2867 PL_reg_flags |= RF_warned;
e476b1b5 2868 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2869 "Complex regular subexpression recursion",
2870 REG_INFTY - 1);
a0d0e21e
LW
2871 }
2872
2873 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2874 PL_regcc = cc->oldcc;
2ab05381
IZ
2875 if (PL_regcc)
2876 ln = PL_regcc->cur;
a0d0e21e 2877 if (regmatch(cc->next))
4633a7c4 2878 sayYES;
2ab05381
IZ
2879 if (PL_regcc)
2880 PL_regcc->cur = ln;
3280af22 2881 PL_regcc = cc;
4633a7c4 2882 cc->cur = n - 1;
c277df42 2883 cc->lastloc = lastloc;
4633a7c4 2884 sayNO;
a0d0e21e
LW
2885 }
2886 /* NOT REACHED */
c277df42
IZ
2887 case BRANCHJ:
2888 next = scan + ARG(scan);
2889 if (next == scan)
2890 next = NULL;
2891 inner = NEXTOPER(NEXTOPER(scan));
2892 goto do_branch;
2893 case BRANCH:
2894 inner = NEXTOPER(scan);
2895 do_branch:
2896 {
2897 CHECKPOINT lastcp;
2898 c1 = OP(scan);
2899 if (OP(next) != c1) /* No choice. */
2900 next = inner; /* Avoid recursion. */
a0d0e21e 2901 else {
02db2b7b
IZ
2902 I32 lastparen = *PL_reglastparen;
2903 I32 unwind1;
2904 re_unwind_branch_t *uw;
2905
2906 /* Put unwinding data on stack */
2907 unwind1 = SSNEWt(1,re_unwind_branch_t);
2908 uw = SSPTRt(unwind1,re_unwind_branch_t);
2909 uw->prev = unwind;
2910 unwind = unwind1;
2911 uw->type = ((c1 == BRANCH)
2912 ? RE_UNWIND_BRANCH
2913 : RE_UNWIND_BRANCHJ);
2914 uw->lastparen = lastparen;
2915 uw->next = next;
2916 uw->locinput = locinput;
2917 uw->nextchr = nextchr;
2918#ifdef DEBUGGING
2919 uw->regindent = ++PL_regindent;
2920#endif
c277df42 2921
02db2b7b
IZ
2922 REGCP_SET(uw->lastcp);
2923
2924 /* Now go into the first branch */
2925 next = inner;
a687059c 2926 }
a0d0e21e
LW
2927 }
2928 break;
2929 case MINMOD:
2930 minmod = 1;
2931 break;
c277df42
IZ
2932 case CURLYM:
2933 {
00db4c45 2934 I32 l = 0;
c277df42
IZ
2935 CHECKPOINT lastcp;
2936
2937 /* We suppose that the next guy does not need
2938 backtracking: in particular, it is of constant length,
2939 and has no parenths to influence future backrefs. */
2940 ln = ARG1(scan); /* min to match */
2941 n = ARG2(scan); /* max to match */
c277df42
IZ
2942 paren = scan->flags;
2943 if (paren) {
3280af22
NIS
2944 if (paren > PL_regsize)
2945 PL_regsize = paren;
2946 if (paren > *PL_reglastparen)
2947 *PL_reglastparen = paren;
c277df42 2948 }
dc45a647 2949 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2950 if (paren)
2951 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2952 PL_reginput = locinput;
c277df42
IZ
2953 if (minmod) {
2954 minmod = 0;
2955 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2956 sayNO;
5f4b28b2 2957 if (ln && l == 0 && n >= ln
c277df42
IZ
2958 /* In fact, this is tricky. If paren, then the
2959 fact that we did/didnot match may influence
2960 future execution. */
2961 && !(paren && ln == 0))
2962 ln = n;
3280af22 2963 locinput = PL_reginput;
22c35a8c 2964 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2965 c1 = (U8)*STRING(next);
c277df42 2966 if (OP(next) == EXACTF)
22c35a8c 2967 c2 = PL_fold[c1];
c277df42 2968 else if (OP(next) == EXACTFL)
22c35a8c 2969 c2 = PL_fold_locale[c1];
c277df42
IZ
2970 else
2971 c2 = c1;
a0ed51b3
LW
2972 }
2973 else
c277df42 2974 c1 = c2 = -1000;
02db2b7b 2975 REGCP_SET(lastcp);
5f4b28b2 2976 /* This may be improved if l == 0. */
c277df42
IZ
2977 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2978 /* If it could work, try it. */
2979 if (c1 == -1000 ||
3280af22
NIS
2980 UCHARAT(PL_reginput) == c1 ||
2981 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2982 {
2983 if (paren) {
2984 if (n) {
cf93c79d
IZ
2985 PL_regstartp[paren] =
2986 HOPc(PL_reginput, -l) - PL_bostr;
2987 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2988 }
2989 else
cf93c79d 2990 PL_regendp[paren] = -1;
c277df42
IZ
2991 }
2992 if (regmatch(next))
2993 sayYES;
02db2b7b 2994 REGCP_UNWIND(lastcp);
c277df42
IZ
2995 }
2996 /* Couldn't or didn't -- move forward. */
3280af22 2997 PL_reginput = locinput;
c277df42
IZ
2998 if (regrepeat_hard(scan, 1, &l)) {
2999 ln++;
3280af22 3000 locinput = PL_reginput;
c277df42
IZ
3001 }
3002 else
3003 sayNO;
3004 }
a0ed51b3
LW
3005 }
3006 else {
c277df42
IZ
3007 n = regrepeat_hard(scan, n, &l);
3008 if (n != 0 && l == 0
3009 /* In fact, this is tricky. If paren, then the
3010 fact that we did/didnot match may influence
3011 future execution. */
3012 && !(paren && ln == 0))
3013 ln = n;
3280af22 3014 locinput = PL_reginput;
c277df42 3015 DEBUG_r(
5c0ca799 3016 PerlIO_printf(Perl_debug_log,
faccc32b 3017 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3018 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3019 (IV) n, (IV)l)
c277df42
IZ
3020 );
3021 if (n >= ln) {
22c35a8c 3022 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3023 c1 = (U8)*STRING(next);
c277df42 3024 if (OP(next) == EXACTF)
22c35a8c 3025 c2 = PL_fold[c1];
c277df42 3026 else if (OP(next) == EXACTFL)
22c35a8c 3027 c2 = PL_fold_locale[c1];
c277df42
IZ
3028 else
3029 c2 = c1;
a0ed51b3
LW
3030 }
3031 else
c277df42
IZ
3032 c1 = c2 = -1000;
3033 }
02db2b7b 3034 REGCP_SET(lastcp);
c277df42
IZ
3035 while (n >= ln) {
3036 /* If it could work, try it. */
3037 if (c1 == -1000 ||
3280af22
NIS
3038 UCHARAT(PL_reginput) == c1 ||
3039 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3040 {
3041 DEBUG_r(
c3464db5 3042 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3043 "%*s trying tail with n=%"IVdf"...\n",
3044 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3045 );
3046 if (paren) {
3047 if (n) {
cf93c79d
IZ
3048 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3049 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3050 }
a0ed51b3 3051 else
cf93c79d 3052 PL_regendp[paren] = -1;
c277df42 3053 }
a0ed51b3
LW
3054 if (regmatch(next))
3055 sayYES;
02db2b7b 3056 REGCP_UNWIND(lastcp);
a0ed51b3 3057 }
c277df42
IZ
3058 /* Couldn't or didn't -- back up. */
3059 n--;
dfe13c55 3060 locinput = HOPc(locinput, -l);
3280af22 3061 PL_reginput = locinput;
c277df42
IZ
3062 }
3063 }
3064 sayNO;
3065 break;
3066 }
3067 case CURLYN:
3068 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3069 if (paren > PL_regsize)
3070 PL_regsize = paren;
3071 if (paren > *PL_reglastparen)
3072 *PL_reglastparen = paren;
c277df42
IZ
3073 ln = ARG1(scan); /* min to match */
3074 n = ARG2(scan); /* max to match */
dc45a647 3075 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3076 goto repeat;
a0d0e21e 3077 case CURLY:
c277df42 3078 paren = 0;
a0d0e21e
LW
3079 ln = ARG1(scan); /* min to match */
3080 n = ARG2(scan); /* max to match */
dc45a647 3081 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3082 goto repeat;
3083 case STAR:
3084 ln = 0;
c277df42 3085 n = REG_INFTY;
a0d0e21e 3086 scan = NEXTOPER(scan);
c277df42 3087 paren = 0;
a0d0e21e
LW
3088 goto repeat;
3089 case PLUS:
c277df42
IZ
3090 ln = 1;
3091 n = REG_INFTY;
3092 scan = NEXTOPER(scan);
3093 paren = 0;
3094 repeat:
a0d0e21e
LW
3095 /*
3096 * Lookahead to avoid useless match attempts
3097 * when we know what character comes next.
3098 */
22c35a8c 3099 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3100 c1 = (U8)*STRING(next);
bbce6d69 3101 if (OP(next) == EXACTF)
22c35a8c 3102 c2 = PL_fold[c1];
bbce6d69 3103 else if (OP(next) == EXACTFL)
22c35a8c 3104 c2 = PL_fold_locale[c1];
bbce6d69 3105 else
3106 c2 = c1;
3107 }
a0d0e21e 3108 else
bbce6d69 3109 c1 = c2 = -1000;
3280af22 3110 PL_reginput = locinput;
a0d0e21e 3111 if (minmod) {
c277df42 3112 CHECKPOINT lastcp;
a0d0e21e
LW
3113 minmod = 0;
3114 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3115 sayNO;
a0ed51b3 3116 locinput = PL_reginput;
02db2b7b 3117 REGCP_SET(lastcp);
0fe9bf95
IZ
3118 if (c1 != -1000) {
3119 char *e = locinput + n - ln; /* Should not check after this */
3120 char *old = locinput;
3121
3122 if (e >= PL_regeol || (n == REG_INFTY))
3123 e = PL_regeol - 1;
3124 while (1) {
3125 /* Find place 'next' could work */
3126 if (c1 == c2) {
3127 while (locinput <= e && *locinput != c1)
3128 locinput++;
3129 } else {
3130 while (locinput <= e
3131 && *locinput != c1
3132 && *locinput != c2)
3133 locinput++;
3134 }
3135 if (locinput > e)
3136 sayNO;
3137 /* PL_reginput == old now */
3138 if (locinput != old) {
3139 ln = 1; /* Did some */
3140 if (regrepeat(scan, locinput - old) <
3141 locinput - old)
3142 sayNO;
3143 }
3144 /* PL_reginput == locinput now */
29d1e993 3145 TRYPAREN(paren, ln, locinput);
0fe9bf95 3146 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3147 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3148 /* Couldn't or didn't -- move forward. */
3149 old = locinput++;
3150 }
3151 }
3152 else
c277df42 3153 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3154 /* If it could work, try it. */
bbce6d69 3155 if (c1 == -1000 ||
3280af22
NIS
3156 UCHARAT(PL_reginput) == c1 ||
3157 UCHARAT(PL_reginput) == c2)
bbce6d69 3158 {
29d1e993 3159 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3160 REGCP_UNWIND(lastcp);
bbce6d69 3161 }
c277df42 3162 /* Couldn't or didn't -- move forward. */
a0ed51b3 3163 PL_reginput = locinput;
a0d0e21e
LW
3164 if (regrepeat(scan, 1)) {
3165 ln++;
a0ed51b3
LW
3166 locinput = PL_reginput;
3167 }
3168 else
4633a7c4 3169 sayNO;
a0d0e21e
LW
3170 }
3171 }
3172 else {
c277df42 3173 CHECKPOINT lastcp;
a0d0e21e 3174 n = regrepeat(scan, n);
a0ed51b3 3175 locinput = PL_reginput;
22c35a8c 3176 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3177 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3178 ln = n; /* why back off? */
1aeab75a
GS
3179 /* ...because $ and \Z can match before *and* after
3180 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3181 We should back off by one in this case. */
3182 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3183 ln--;
3184 }
02db2b7b 3185 REGCP_SET(lastcp);
c277df42
IZ
3186 if (paren) {
3187 while (n >= ln) {
3188 /* If it could work, try it. */
3189 if (c1 == -1000 ||
3280af22
NIS
3190 UCHARAT(PL_reginput) == c1 ||
3191 UCHARAT(PL_reginput) == c2)
c277df42 3192 {
29d1e993 3193 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3194 REGCP_UNWIND(lastcp);
c277df42
IZ
3195 }
3196 /* Couldn't or didn't -- back up. */
3197 n--;
dfe13c55 3198 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3199 }
a0ed51b3
LW
3200 }
3201 else {
c277df42
IZ
3202 while (n >= ln) {
3203 /* If it could work, try it. */
3204 if (c1 == -1000 ||
3280af22
NIS
3205 UCHARAT(PL_reginput) == c1 ||
3206 UCHARAT(PL_reginput) == c2)
c277df42 3207 {
29d1e993 3208 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3209 REGCP_UNWIND(lastcp);
c277df42
IZ
3210 }
3211 /* Couldn't or didn't -- back up. */
3212 n--;
dfe13c55 3213 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3214 }
a0d0e21e
LW
3215 }
3216 }
4633a7c4 3217 sayNO;
c277df42 3218 break;
a0d0e21e 3219 case END:
0f5d15d6
IZ
3220 if (PL_reg_call_cc) {
3221 re_cc_state *cur_call_cc = PL_reg_call_cc;
3222 CURCUR *cctmp = PL_regcc;
3223 regexp *re = PL_reg_re;
3224 CHECKPOINT cp, lastcp;
3225
3226 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3227 REGCP_SET(lastcp);
0f5d15d6
IZ
3228 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3229 the caller. */
3230 PL_reginput = locinput; /* Make position available to
3231 the callcc. */
3232 cache_re(PL_reg_call_cc->re);
3233 PL_regcc = PL_reg_call_cc->cc;
3234 PL_reg_call_cc = PL_reg_call_cc->prev;
3235 if (regmatch(cur_call_cc->node)) {
3236 PL_reg_call_cc = cur_call_cc;
3237 regcpblow(cp);
3238 sayYES;
3239 }
02db2b7b 3240 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3241 regcppop();
3242 PL_reg_call_cc = cur_call_cc;
3243 PL_regcc = cctmp;
3244 PL_reg_re = re;
3245 cache_re(re);
3246
3247 DEBUG_r(
3248 PerlIO_printf(Perl_debug_log,
3249 "%*s continuation failed...\n",
3250 REPORT_CODE_OFF+PL_regindent*2, "")
3251 );
7821416a 3252 sayNO_SILENT;
0f5d15d6 3253 }
7821416a
IZ
3254 if (locinput < PL_regtill) {
3255 DEBUG_r(PerlIO_printf(Perl_debug_log,
3256 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3257 PL_colors[4],
3258 (long)(locinput - PL_reg_starttry),
3259 (long)(PL_regtill - PL_reg_starttry),
3260 PL_colors[5]));
3261 sayNO_FINAL; /* Cannot match: too short. */
3262 }
3263 PL_reginput = locinput; /* put where regtry can find it */
3264 sayYES_FINAL; /* Success! */
7e5428c5 3265 case SUCCEED:
3280af22 3266 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3267 sayYES_LOUD; /* Success! */
c277df42
IZ
3268 case SUSPEND:
3269 n = 1;
9fe1d20c 3270 PL_reginput = locinput;
c277df42 3271 goto do_ifmatch;
a0d0e21e 3272 case UNLESSM:
c277df42 3273 n = 0;
a0ed51b3 3274 if (scan->flags) {
0fe9bf95
IZ
3275 if (UTF) { /* XXXX This is absolutely
3276 broken, we read before
3277 start of string. */
3278 s = HOPMAYBEc(locinput, -scan->flags);
3279 if (!s)
3280 goto say_yes;
3281 PL_reginput = s;
3282 }
3283 else {
3284 if (locinput < PL_bostr + scan->flags)
3285 goto say_yes;
3286 PL_reginput = locinput - scan->flags;
3287 goto do_ifmatch;
3288 }
a0ed51b3
LW
3289 }
3290 else
3291 PL_reginput = locinput;
c277df42
IZ
3292 goto do_ifmatch;
3293 case IFMATCH:
3294 n = 1;
a0ed51b3 3295 if (scan->flags) {
0fe9bf95
IZ
3296 if (UTF) { /* XXXX This is absolutely
3297 broken, we read before
3298 start of string. */
3299 s = HOPMAYBEc(locinput, -scan->flags);
3300 if (!s || s < PL_bostr)
3301 goto say_no;
3302 PL_reginput = s;
3303 }
3304 else {
3305 if (locinput < PL_bostr + scan->flags)
3306 goto say_no;
3307 PL_reginput = locinput - scan->flags;
3308 goto do_ifmatch;
3309 }
a0ed51b3
LW
3310 }
3311 else
3312 PL_reginput = locinput;
3313
c277df42 3314 do_ifmatch:
c277df42
IZ
3315 inner = NEXTOPER(NEXTOPER(scan));
3316 if (regmatch(inner) != n) {
3317 say_no:
3318 if (logical) {
3319 logical = 0;
3320 sw = 0;
3321 goto do_longjump;
a0ed51b3
LW
3322 }
3323 else
c277df42
IZ
3324 sayNO;
3325 }
3326 say_yes:
3327 if (logical) {
3328 logical = 0;
3329 sw = 1;
3330 }
fe44a5e8 3331 if (OP(scan) == SUSPEND) {
3280af22 3332 locinput = PL_reginput;
565764a8 3333 nextchr = UCHARAT(locinput);
fe44a5e8 3334 }
c277df42
IZ
3335 /* FALL THROUGH. */
3336 case LONGJMP:
3337 do_longjump:
3338 next = scan + ARG(scan);
3339 if (next == scan)
3340 next = NULL;
a0d0e21e
LW
3341 break;
3342 default:
b900a521 3343 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3344 PTR2UV(scan), OP(scan));
cea2e8a9 3345 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3346 }
02db2b7b 3347 reenter:
a0d0e21e
LW
3348 scan = next;
3349 }
a687059c 3350
a0d0e21e
LW
3351 /*
3352 * We get here only if there's trouble -- normally "case END" is
3353 * the terminating point.
3354 */
cea2e8a9 3355 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3356 /*NOTREACHED*/
4633a7c4
LW
3357 sayNO;
3358
7821416a
IZ
3359yes_loud:
3360 DEBUG_r(
3361 PerlIO_printf(Perl_debug_log,
3362 "%*s %scould match...%s\n",
3363 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3364 );
3365 goto yes;
3366yes_final:
3367 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3368 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3369yes:
3370#ifdef DEBUGGING
3280af22 3371 PL_regindent--;
4633a7c4 3372#endif
02db2b7b
IZ
3373
3374#if 0 /* Breaks $^R */
3375 if (unwind)
3376 regcpblow(firstcp);
3377#endif
4633a7c4
LW
3378 return 1;
3379
3380no:
7821416a
IZ
3381 DEBUG_r(
3382 PerlIO_printf(Perl_debug_log,
3383 "%*s %sfailed...%s\n",
3384 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3385 );
3386 goto do_no;
3387no_final:
3388do_no:
02db2b7b
IZ
3389 if (unwind) {
3390 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3391
3392 switch (uw->type) {
3393 case RE_UNWIND_BRANCH:
3394 case RE_UNWIND_BRANCHJ:
3395 {
3396 re_unwind_branch_t *uwb = &(uw->branch);
3397 I32 lastparen = uwb->lastparen;
3398
3399 REGCP_UNWIND(uwb->lastcp);
3400 for (n = *PL_reglastparen; n > lastparen; n--)
3401 PL_regendp[n] = -1;
3402 *PL_reglastparen = n;
3403 scan = next = uwb->next;
3404 if ( !scan ||
3405 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3406 ? BRANCH : BRANCHJ) ) { /* Failure */
3407 unwind = uwb->prev;
3408#ifdef DEBUGGING
3409 PL_regindent--;
3410#endif
3411 goto do_no;
3412 }
3413 /* Have more choice yet. Reuse the same uwb. */
3414 /*SUPPRESS 560*/
3415 if ((n = (uwb->type == RE_UNWIND_BRANCH
3416 ? NEXT_OFF(next) : ARG(next))))
3417 next += n;
3418 else
3419 next = NULL; /* XXXX Needn't unwinding in this case... */
3420 uwb->next = next;
3421 next = NEXTOPER(scan);
3422 if (uwb->type == RE_UNWIND_BRANCHJ)
3423 next = NEXTOPER(next);
3424 locinput = uwb->locinput;
3425 nextchr = uwb->nextchr;
3426#ifdef DEBUGGING
3427 PL_regindent = uwb->regindent;
3428#endif
3429
3430 goto reenter;
3431 }
3432 /* NOT REACHED */
3433 default:
3434 Perl_croak(aTHX_ "regexp unwind memory corruption");
3435 }
3436 /* NOT REACHED */
3437 }
4633a7c4 3438#ifdef DEBUGGING
3280af22 3439 PL_regindent--;
4633a7c4 3440#endif
a0d0e21e 3441 return 0;
a687059c
LW
3442}
3443
3444/*
3445 - regrepeat - repeatedly match something simple, report how many
3446 */
3447/*
3448 * [This routine now assumes that it will only match on things of length 1.
3449 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3450 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3451 */
76e3520e 3452STATIC I32
cea2e8a9 3453S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3454{
5c0ca799 3455 dTHR;
a0d0e21e 3456 register char *scan;
a0d0e21e 3457 register I32 c;
3280af22 3458 register char *loceol = PL_regeol;
a0ed51b3 3459 register I32 hardcount = 0;
a0d0e21e 3460
3280af22 3461 scan = PL_reginput;
c277df42 3462 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3463 loceol = scan + max;
a0d0e21e 3464 switch (OP(p)) {
22c35a8c 3465 case REG_ANY:
a0d0e21e
LW
3466 while (scan < loceol && *scan != '\n')
3467 scan++;
3468 break;
3469 case SANY:
3470 scan = loceol;
3471 break;
a0ed51b3
LW
3472 case ANYUTF8:
3473 loceol = PL_regeol;
3474 while (scan < loceol && *scan != '\n') {
3475 scan += UTF8SKIP(scan);
3476 hardcount++;
3477 }
3478 break;
3479 case SANYUTF8:
3480 loceol = PL_regeol;
3481 while (scan < loceol) {
3482 scan += UTF8SKIP(scan);
3483 hardcount++;
3484 }
3485 break;
bbce6d69 3486 case EXACT: /* length of string is 1 */
cd439c50 3487 c = (U8)*STRING(p);
bbce6d69 3488 while (scan < loceol && UCHARAT(scan) == c)
3489 scan++;
3490 break;
3491 case EXACTF: /* length of string is 1 */
cd439c50 3492 c = (U8)*STRING(p);
bbce6d69 3493 while (scan < loceol &&
22c35a8c 3494 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3495 scan++;
3496 break;
3497 case EXACTFL: /* length of string is 1 */
3280af22 3498 PL_reg_flags |= RF_tainted;
cd439c50 3499 c = (U8)*STRING(p);
bbce6d69 3500 while (scan < loceol &&
22c35a8c 3501 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3502 scan++;
3503 break;
a0ed51b3
LW
3504 case ANYOFUTF8:
3505 loceol = PL_regeol;
3506 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3507 scan += UTF8SKIP(scan);
3508 hardcount++;
3509 }
3510 break;
a0d0e21e 3511 case ANYOF:
936ed897 3512 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3513 scan++;
a0d0e21e
LW
3514 break;
3515 case ALNUM:
3516 while (scan < loceol && isALNUM(*scan))
3517 scan++;
3518 break;
a0ed51b3
LW
3519 case ALNUMUTF8:
3520 loceol = PL_regeol;
dfe13c55 3521 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3522 scan += UTF8SKIP(scan);
3523 hardcount++;
3524 }
3525 break;
bbce6d69 3526 case ALNUML:
3280af22 3527 PL_reg_flags |= RF_tainted;
bbce6d69 3528 while (scan < loceol && isALNUM_LC(*scan))
3529 scan++;
3530 break;
a0ed51b3
LW
3531 case ALNUMLUTF8:
3532 PL_reg_flags |= RF_tainted;
3533 loceol = PL_regeol;
dfe13c55 3534 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3535 scan += UTF8SKIP(scan);
3536 hardcount++;
3537 }
3538 break;
3539 break;
a0d0e21e
LW
3540 case NALNUM:
3541 while (scan < loceol && !isALNUM(*scan))
3542 scan++;
3543 break;
a0ed51b3
LW
3544 case NALNUMUTF8:
3545 loceol = PL_regeol;
dfe13c55 3546 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3547 scan += UTF8SKIP(scan);
3548 hardcount++;
3549 }
3550 break;
bbce6d69 3551 case NALNUML:
3280af22 3552 PL_reg_flags |= RF_tainted;
bbce6d69 3553 while (scan < loceol && !isALNUM_LC(*scan))
3554 scan++;
3555 break;
a0ed51b3
LW
3556 case NALNUMLUTF8:
3557 PL_reg_flags |= RF_tainted;
3558 loceol = PL_regeol;
dfe13c55 3559 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3560 scan += UTF8SKIP(scan);
3561 hardcount++;
3562 }
3563 break;
a0d0e21e
LW
3564 case SPACE:
3565 while (scan < loceol && isSPACE(*scan))
3566 scan++;
3567 break;
a0ed51b3
LW
3568 case SPACEUTF8:
3569 loceol = PL_regeol;
dfe13c55 3570 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3571 scan += UTF8SKIP(scan);
3572 hardcount++;
3573 }
3574 break;
bbce6d69 3575 case SPACEL:
3280af22 3576 PL_reg_flags |= RF_tainted;
bbce6d69 3577 while (scan < loceol && isSPACE_LC(*scan))
3578 scan++;
3579 break;
a0ed51b3
LW
3580 case SPACELUTF8:
3581 PL_reg_flags |= RF_tainted;
3582 loceol = PL_regeol;
dfe13c55 3583 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3584 scan += UTF8SKIP(scan);
3585 hardcount++;
3586 }
3587 break;
a0d0e21e
LW
3588 case NSPACE:
3589 while (scan < loceol && !isSPACE(*scan))
3590 scan++;
3591 break;
a0ed51b3
LW
3592 case NSPACEUTF8:
3593 loceol = PL_regeol;
dfe13c55 3594 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3595 scan += UTF8SKIP(scan);
3596 hardcount++;
3597 }
3598 break;
bbce6d69 3599 case NSPACEL:
3280af22 3600 PL_reg_flags |= RF_tainted;
bbce6d69 3601 while (scan < loceol && !isSPACE_LC(*scan))
3602 scan++;
3603 break;
a0ed51b3
LW
3604 case NSPACELUTF8:
3605 PL_reg_flags |= RF_tainted;
3606 loceol = PL_regeol;
dfe13c55 3607 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3608 scan += UTF8SKIP(scan);
3609 hardcount++;
3610 }
3611 break;
a0d0e21e
LW
3612 case DIGIT:
3613 while (scan < loceol && isDIGIT(*scan))
3614 scan++;
3615 break;
a0ed51b3
LW
3616 case DIGITUTF8:
3617 loceol = PL_regeol;
dfe13c55 3618 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3619 scan += UTF8SKIP(scan);
3620 hardcount++;
3621 }
3622 break;
3623 break;
a0d0e21e
LW
3624 case NDIGIT:
3625 while (scan < loceol && !isDIGIT(*scan))
3626 scan++;
3627 break;
a0ed51b3
LW
3628 case NDIGITUTF8:
3629 loceol = PL_regeol;
dfe13c55 3630 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3631 scan += UTF8SKIP(scan);
3632 hardcount++;
3633 }
3634 break;
a0d0e21e
LW
3635 default: /* Called on something of 0 width. */
3636 break; /* So match right here or not at all. */
3637 }
a687059c 3638
a0ed51b3
LW
3639 if (hardcount)
3640 c = hardcount;
3641 else
3642 c = scan - PL_reginput;
3280af22 3643 PL_reginput = scan;
a687059c 3644
c277df42
IZ
3645 DEBUG_r(
3646 {
3647 SV *prop = sv_newmortal();
3648
3649 regprop(prop, p);
3650 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3651 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3652 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3653 });
3654
a0d0e21e 3655 return(c);
a687059c
LW
3656}
3657
3658/*
c277df42
IZ
3659 - regrepeat_hard - repeatedly match something, report total lenth and length
3660 *
3661 * The repeater is supposed to have constant length.
3662 */
3663
76e3520e 3664STATIC I32
cea2e8a9 3665S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3666{
5c0ca799 3667 dTHR;
c277df42
IZ
3668 register char *scan;
3669 register char *start;
3280af22 3670 register char *loceol = PL_regeol;
a0ed51b3 3671 I32 l = 0;
708e3b05 3672 I32 count = 0, res = 1;
a0ed51b3
LW
3673
3674 if (!max)
3675 return 0;
c277df42 3676
3280af22 3677 start = PL_reginput;
a0ed51b3 3678 if (UTF) {
708e3b05 3679 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3680 if (!count++) {
3681 l = 0;
3682 while (start < PL_reginput) {
3683 l++;
3684 start += UTF8SKIP(start);
3685 }
3686 *lp = l;
3687 if (l == 0)
3688 return max;
3689 }
3690 if (count == max)
3691 return count;
3692 }
3693 }
3694 else {
708e3b05 3695 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3696 if (!count++) {
3697 *lp = l = PL_reginput - start;
3698 if (max != REG_INFTY && l*max < loceol - scan)
3699 loceol = scan + l*max;
3700 if (l == 0)
3701 return max;
c277df42
IZ
3702 }
3703 }
3704 }
708e3b05 3705 if (!res)
3280af22 3706 PL_reginput = scan;
c277df42 3707
a0ed51b3 3708 return count;
c277df42
IZ
3709}
3710
3711/*
cb8d8820 3712 - reginclass - determine if a character falls into a character class
bbce6d69 3713 */
3714
76e3520e 3715STATIC bool
936ed897 3716S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3717{
5c0ca799 3718 dTHR;
b8c5462f 3719 char flags = ANYOF_FLAGS(p);
bbce6d69 3720 bool match = FALSE;
3721
3722 c &= 0xFF;
b8c5462f 3723 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3724 match = TRUE;
3725 else if (flags & ANYOF_FOLD) {
3726 I32 cf;
3727 if (flags & ANYOF_LOCALE) {
3280af22 3728 PL_reg_flags |= RF_tainted;
22c35a8c 3729 cf = PL_fold_locale[c];
bbce6d69 3730 }
3731 else
22c35a8c 3732 cf = PL_fold[c];
b8c5462f 3733 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3734 match = TRUE;
3735 }
3736
b8c5462f 3737 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3738 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3739 if (
3740 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3741 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3742 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3743 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3744 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3745 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3746 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3747 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3748 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3749 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3753 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3754 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3755 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3756 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3757 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3758 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3765 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3770 ) /* How's that for a conditional? */
bbce6d69 3771 {
3772 match = TRUE;
3773 }
3774 }
3775
ae5c130c 3776 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3777}
3778
a0ed51b3 3779STATIC bool
cea2e8a9 3780S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3781{
3782 dTHR;
a0ed51b3
LW
3783 char flags = ARG1(f);
3784 bool match = FALSE;
3785 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3786
3787 if (swash_fetch(sv, p))
3788 match = TRUE;
3789 else if (flags & ANYOF_FOLD) {
806e7201 3790 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3791 if (flags & ANYOF_LOCALE) {
3792 PL_reg_flags |= RF_tainted;
3793 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3794 }
3795 else
3796 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3797 if (swash_fetch(sv, tmpbuf))
3798 match = TRUE;
3799 }
3800
b8c5462f 3801 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3802
3803 return (flags & ANYOF_INVERT) ? !match : match;
3804}
161b471a 3805
dfe13c55 3806STATIC U8 *
cea2e8a9 3807S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3808{
3809 dTHR;
a0ed51b3
LW
3810 if (off >= 0) {
3811 while (off-- && s < (U8*)PL_regeol)
3812 s += UTF8SKIP(s);
3813 }
3814 else {
3815 while (off++) {
3816 if (s > (U8*)PL_bostr) {
3817 s--;
3818 if (*s & 0x80) {
3819 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3820 s--;
3821 } /* XXX could check well-formedness here */
3822 }
3823 }
3824 }
3825 return s;
3826}
161b471a 3827
dfe13c55 3828STATIC U8 *
cea2e8a9 3829S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3830{
c485e607 3831 dTHR;
a0ed51b3
LW
3832 if (off >= 0) {
3833 while (off-- && s < (U8*)PL_regeol)
3834 s += UTF8SKIP(s);
3835 if (off >= 0)
3836 return 0;
3837 }
3838 else {
3839 while (off++) {
3840 if (s > (U8*)PL_bostr) {
3841 s--;
3842 if (*s & 0x80) {
3843 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3844 s--;
3845 } /* XXX could check well-formedness here */
3846 }
3847 else
3848 break;
3849 }
3850 if (off <= 0)
3851 return 0;
3852 }
3853 return s;
3854}
51371543
GS
3855
3856#ifdef PERL_OBJECT
51371543
GS
3857#include "XSUB.h"
3858#endif
3859
3860static void
3861restore_pos(pTHXo_ void *arg)
3862{
3863 dTHR;
3864 if (PL_reg_eval_set) {
3865 if (PL_reg_oldsaved) {
3866 PL_reg_re->subbeg = PL_reg_oldsaved;
3867 PL_reg_re->sublen = PL_reg_oldsavedlen;
3868 RX_MATCH_COPIED_on(PL_reg_re);
3869 }
3870 PL_reg_magic->mg_len = PL_reg_oldpos;
3871 PL_reg_eval_set = 0;
3872 PL_curpm = PL_reg_oldcurpm;
3873 }
3874}