This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PERL_IMPLICIT_SYS (almost) works - something odd with "signal"
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
02db2b7b 148# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 150 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 151
02db2b7b 152# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b 154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 155 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
daf18116 193#if 1
dafc8851
JH
194 /* It would seem that the similar code in regtry()
195 * already takes care of this, and in fact it is in
196 * a better location to since this code can #if 0-ed out
197 * but the code in regtry() is needed or otherwise tests
198 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
199 * (as of patchlevel 7877) will fail. Then again,
200 * this code seems to be necessary or otherwise
201 * building DynaLoader will fail:
202 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
203 * --jhi */
3280af22
NIS
204 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
205 if (paren > PL_regsize)
cf93c79d
IZ
206 PL_regstartp[paren] = -1;
207 PL_regendp[paren] = -1;
a0d0e21e 208 }
dafc8851 209#endif
a0d0e21e
LW
210 return input;
211}
212
0f5d15d6 213STATIC char *
cea2e8a9 214S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 215{
46124e9e 216 dTHR;
0f5d15d6
IZ
217 I32 tmp = PL_savestack_ix;
218
219 PL_savestack_ix = ss;
220 regcppop();
221 PL_savestack_ix = tmp;
942e002e 222 return Nullch;
0f5d15d6
IZ
223}
224
225typedef struct re_cc_state
226{
227 I32 ss;
228 regnode *node;
229 struct re_cc_state *prev;
230 CURCUR *cc;
231 regexp *re;
232} re_cc_state;
233
02db2b7b 234#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 235
29d1e993
HS
236#define TRYPAREN(paren, n, input) { \
237 if (paren) { \
238 if (n) { \
239 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
240 PL_regendp[paren] = input - PL_bostr; \
241 } \
242 else \
243 PL_regendp[paren] = -1; \
244 } \
245 if (regmatch(next)) \
246 sayYES; \
247 if (paren && n) \
248 PL_regendp[paren] = -1; \
249}
250
251
a687059c 252/*
e50aee73 253 * pregexec and friends
a687059c
LW
254 */
255
256/*
c277df42 257 - pregexec - match a regexp against a string
a687059c 258 */
c277df42 259I32
864dbfa3 260Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 261 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
262/* strend: pointer to null at end of string */
263/* strbeg: real beginning of string */
264/* minend: end of match must be >=minend after stringarg. */
265/* nosave: For optimizations. */
266{
267 return
268 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
269 nosave ? 0 : REXEC_COPY_STR);
270}
0f5d15d6
IZ
271
272STATIC void
cea2e8a9 273S_cache_re(pTHX_ regexp *prog)
0f5d15d6 274{
46124e9e 275 dTHR;
0f5d15d6
IZ
276 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
277#ifdef DEBUGGING
278 PL_regprogram = prog->program;
279#endif
280 PL_regnpar = prog->nparens;
281 PL_regdata = prog->data;
282 PL_reg_re = prog;
283}
22e551b9 284
cad2e5aa
JH
285/*
286 * Need to implement the following flags for reg_anch:
287 *
288 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
289 * USE_INTUIT_ML
290 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
291 * INTUIT_AUTORITATIVE_ML
292 * INTUIT_ONCE_NOML - Intuit can match in one location only.
293 * INTUIT_ONCE_ML
294 *
295 * Another flag for this function: SECOND_TIME (so that float substrs
296 * with giant delta may be not rechecked).
297 */
298
299/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
300
2c2d71f5 301/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
302 Otherwise, only SvCUR(sv) is used to get strbeg. */
303
304/* XXXX We assume that strpos is strbeg unless sv. */
305
6eb5f6b9
JH
306/* XXXX Some places assume that there is a fixed substring.
307 An update may be needed if optimizer marks as "INTUITable"
308 RExen without fixed substrings. Similarly, it is assumed that
309 lengths of all the strings are no more than minlen, thus they
310 cannot come from lookahead.
311 (Or minlen should take into account lookahead.) */
312
2c2d71f5
JH
313/* A failure to find a constant substring means that there is no need to make
314 an expensive call to REx engine, thus we celebrate a failure. Similarly,
315 finding a substring too deep into the string means that less calls to
30944b6d
IZ
316 regtry() should be needed.
317
318 REx compiler's optimizer found 4 possible hints:
319 a) Anchored substring;
320 b) Fixed substring;
321 c) Whether we are anchored (beginning-of-line or \G);
322 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 323 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
324 string which does not contradict any of them.
325 */
2c2d71f5 326
6eb5f6b9
JH
327/* Most of decisions we do here should have been done at compile time.
328 The nodes of the REx which we used for the search should have been
329 deleted from the finite automaton. */
330
cad2e5aa
JH
331char *
332Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
333 char *strend, U32 flags, re_scream_pos_data *data)
334{
2c2d71f5 335 register I32 start_shift;
cad2e5aa 336 /* Should be nonnegative! */
2c2d71f5
JH
337 register I32 end_shift;
338 register char *s;
339 register SV *check;
a1933d95 340 char *strbeg;
cad2e5aa
JH
341 char *t;
342 I32 ml_anch;
2c2d71f5 343 char *tmp;
6eb5f6b9
JH
344 register char *other_last = Nullch; /* other substr checked before this */
345 char *check_at; /* check substr found at this pos */
30944b6d
IZ
346#ifdef DEBUGGING
347 char *i_strpos = strpos;
348#endif
cad2e5aa
JH
349
350 DEBUG_r( if (!PL_colorset) reginitcolors() );
351 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 352 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
353 PL_colors[4],PL_colors[5],PL_colors[0],
354 prog->precomp,
355 PL_colors[1],
356 (strlen(prog->precomp) > 60 ? "..." : ""),
357 PL_colors[0],
b900a521 358 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
359 strpos, PL_colors[1],
360 (strend - strpos > 60 ? "..." : ""))
361 );
362
2c2d71f5
JH
363 if (prog->minlen > strend - strpos) {
364 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 365 goto fail;
2c2d71f5 366 }
a1933d95 367 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 368 check = prog->check_substr;
2c2d71f5 369 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
370 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
371 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 372 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 373
5675c1a6
IZ
374 if (!ml_anch) {
375 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
376 /* SvCUR is not set on references: SvRV and SvPVX overlap */
377 && sv && !SvROK(sv)
a1933d95 378 && (strpos != strbeg)) {
5675c1a6
IZ
379 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
380 goto fail;
381 }
382 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 383 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
384 I32 slen;
385
adac82c7 386 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 387 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
388 if (SvTAIL(check)) {
389 slen = SvCUR(check); /* >= 1 */
cad2e5aa 390
2c2d71f5
JH
391 if ( strend - s > slen || strend - s < slen - 1
392 || (strend - s == slen && strend[-1] != '\n')) {
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
394 goto fail_finish;
cad2e5aa
JH
395 }
396 /* Now should match s[0..slen-2] */
397 slen--;
653099ff 398 if (slen && (*SvPVX(check) != *s
cad2e5aa 399 || (slen > 1
653099ff 400 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
401 report_neq:
402 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
403 goto fail_finish;
404 }
cad2e5aa 405 }
653099ff
GS
406 else if (*SvPVX(check) != *s
407 || ((slen = SvCUR(check)) > 1
408 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
409 goto report_neq;
410 goto success_at_start;
5675c1a6 411 }
cad2e5aa 412 }
2c2d71f5 413 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 414 s = strpos;
2c2d71f5 415 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 416 end_shift = prog->minlen - start_shift -
653099ff 417 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 418 if (!ml_anch) {
653099ff
GS
419 I32 end = prog->check_offset_max + CHR_SVLEN(check)
420 - (SvTAIL(check) != 0);
2c2d71f5
JH
421 I32 eshift = strend - s - end;
422
423 if (end_shift < eshift)
424 end_shift = eshift;
425 }
cad2e5aa 426 }
2c2d71f5 427 else { /* Can match at random position */
cad2e5aa
JH
428 ml_anch = 0;
429 s = strpos;
2c2d71f5
JH
430 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
431 /* Should be nonnegative! */
432 end_shift = prog->minlen - start_shift -
653099ff 433 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
434 }
435
2c2d71f5 436#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 437 if (end_shift < 0)
6bbae5e6 438 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
439#endif
440
2c2d71f5
JH
441 restart:
442 /* Find a possible match in the region s..strend by looking for
443 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 444 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
445 I32 p = -1; /* Internal iterator of scream. */
446 I32 *pp = data ? data->scream_pos : &p;
447
2c2d71f5
JH
448 if (PL_screamfirst[BmRARE(check)] >= 0
449 || ( BmRARE(check) == '\n'
450 && (BmPREVIOUS(check) == SvCUR(check) - 1)
451 && SvTAIL(check) ))
452 s = screaminstr(sv, check,
453 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 454 else
2c2d71f5 455 goto fail_finish;
cad2e5aa
JH
456 if (data)
457 *data->scream_olds = s;
458 }
459 else
460 s = fbm_instr((unsigned char*)s + start_shift,
461 (unsigned char*)strend - end_shift,
2c2d71f5 462 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
463
464 /* Update the count-of-usability, remove useless subpatterns,
465 unshift s. */
2c2d71f5
JH
466
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
468 (s ? "Found" : "Did not find"),
469 ((check == prog->anchored_substr) ? "anchored" : "floating"),
470 PL_colors[0],
7b0972df
JH
471 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
472 SvPVX(check),
2c2d71f5
JH
473 PL_colors[1], (SvTAIL(check) ? "$" : ""),
474 (s ? " at offset " : "...\n") ) );
475
476 if (!s)
477 goto fail_finish;
478
6eb5f6b9
JH
479 check_at = s;
480
2c2d71f5 481 /* Finish the diagnostic message */
30944b6d 482 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
483
484 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
485 Start with the other substr.
486 XXXX no SCREAM optimization yet - and a very coarse implementation
487 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
488 *always* match. Probably should be marked during compile...
489 Probably it is right to do no SCREAM here...
490 */
491
492 if (prog->float_substr && prog->anchored_substr) {
30944b6d 493 /* Take into account the "other" substring. */
2c2d71f5
JH
494 /* XXXX May be hopelessly wrong for UTF... */
495 if (!other_last)
6eb5f6b9 496 other_last = strpos;
2c2d71f5 497 if (check == prog->float_substr) {
30944b6d
IZ
498 do_other_anchored:
499 {
2c2d71f5
JH
500 char *last = s - start_shift, *last1, *last2;
501 char *s1 = s;
502
503 tmp = PL_bostr;
504 t = s - prog->check_offset_max;
505 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
506 && (!(prog->reganch & ROPT_UTF8)
507 || (PL_bostr = strpos, /* Used in regcopmaybe() */
508 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
509 && t > strpos)))
30944b6d 510 /* EMPTY */;
2c2d71f5
JH
511 else
512 t = strpos;
513 t += prog->anchored_offset;
6eb5f6b9
JH
514 if (t < other_last) /* These positions already checked */
515 t = other_last;
2c2d71f5
JH
516 PL_bostr = tmp;
517 last2 = last1 = strend - prog->minlen;
518 if (last < last1)
519 last1 = last;
520 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
521 /* On end-of-str: see comment below. */
522 s = fbm_instr((unsigned char*)t,
523 (unsigned char*)last1 + prog->anchored_offset
524 + SvCUR(prog->anchored_substr)
525 - (SvTAIL(prog->anchored_substr)!=0),
526 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
527 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
528 (s ? "Found" : "Contradicts"),
529 PL_colors[0],
7b0972df
JH
530 (int)(SvCUR(prog->anchored_substr)
531 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
532 SvPVX(prog->anchored_substr),
533 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
534 if (!s) {
535 if (last1 >= last2) {
536 DEBUG_r(PerlIO_printf(Perl_debug_log,
537 ", giving up...\n"));
538 goto fail_finish;
539 }
540 DEBUG_r(PerlIO_printf(Perl_debug_log,
541 ", trying floating at offset %ld...\n",
30944b6d 542 (long)(s1 + 1 - i_strpos)));
2c2d71f5 543 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 544 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
545 s = HOPc(last, 1);
546 goto restart;
547 }
548 else {
549 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 550 (long)(s - i_strpos)));
2c2d71f5 551 t = s - prog->anchored_offset;
6eb5f6b9 552 other_last = s + 1;
30944b6d 553 s = s1;
2c2d71f5
JH
554 if (t == strpos)
555 goto try_at_start;
2c2d71f5
JH
556 goto try_at_offset;
557 }
30944b6d 558 }
2c2d71f5
JH
559 }
560 else { /* Take into account the floating substring. */
561 char *last, *last1;
562 char *s1 = s;
563
564 t = s - start_shift;
565 last1 = last = strend - prog->minlen + prog->float_min_offset;
566 if (last - t > prog->float_max_offset)
567 last = t + prog->float_max_offset;
568 s = t + prog->float_min_offset;
6eb5f6b9
JH
569 if (s < other_last)
570 s = other_last;
2c2d71f5
JH
571 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
572 /* fbm_instr() takes into account exact value of end-of-str
573 if the check is SvTAIL(ed). Since false positives are OK,
574 and end-of-str is not later than strend we are OK. */
575 s = fbm_instr((unsigned char*)s,
576 (unsigned char*)last + SvCUR(prog->float_substr)
577 - (SvTAIL(prog->float_substr)!=0),
578 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
579 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
580 (s ? "Found" : "Contradicts"),
581 PL_colors[0],
7b0972df
JH
582 (int)(SvCUR(prog->float_substr)
583 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
584 SvPVX(prog->float_substr),
585 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
586 if (!s) {
587 if (last1 == last) {
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", giving up...\n"));
590 goto fail_finish;
591 }
592 DEBUG_r(PerlIO_printf(Perl_debug_log,
593 ", trying anchored starting at offset %ld...\n",
30944b6d 594 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 595 other_last = last + 1;
2c2d71f5
JH
596 PL_regeol = strend; /* Used in HOP() */
597 s = HOPc(t, 1);
598 goto restart;
599 }
600 else {
601 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 602 (long)(s - i_strpos)));
6eb5f6b9 603 other_last = s + 1;
30944b6d 604 s = s1;
2c2d71f5
JH
605 if (t == strpos)
606 goto try_at_start;
2c2d71f5
JH
607 goto try_at_offset;
608 }
609 }
cad2e5aa 610 }
2c2d71f5
JH
611
612 t = s - prog->check_offset_max;
613 tmp = PL_bostr;
614 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
615 && (!(prog->reganch & ROPT_UTF8)
616 || (PL_bostr = strpos, /* Used in regcopmaybe() */
617 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
618 && t > strpos)))) {
619 PL_bostr = tmp;
620 /* Fixed substring is found far enough so that the match
621 cannot start at strpos. */
622 try_at_offset:
cad2e5aa 623 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
624 /* Eventually fbm_*() should handle this, but often
625 anchored_offset is not 0, so this check will not be wasted. */
626 /* XXXX In the code below we prefer to look for "^" even in
627 presence of anchored substrings. And we search even
628 beyond the found float position. These pessimizations
629 are historical artefacts only. */
630 find_anchor:
2c2d71f5 631 while (t < strend - prog->minlen) {
cad2e5aa 632 if (*t == '\n') {
4ee3650e 633 if (t < check_at - prog->check_offset_min) {
30944b6d 634 if (prog->anchored_substr) {
4ee3650e
GS
635 /* Since we moved from the found position,
636 we definitely contradict the found anchored
30944b6d
IZ
637 substr. Due to the above check we do not
638 contradict "check" substr.
639 Thus we can arrive here only if check substr
640 is float. Redo checking for "other"=="fixed".
641 */
642 strpos = t + 1;
643 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
644 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
645 goto do_other_anchored;
646 }
4ee3650e
GS
647 /* We don't contradict the found floating substring. */
648 /* XXXX Why not check for STCLASS? */
cad2e5aa 649 s = t + 1;
2c2d71f5 650 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 651 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
652 goto set_useful;
653 }
4ee3650e
GS
654 /* Position contradicts check-string */
655 /* XXXX probably better to look for check-string
656 than for "\n", so one should lower the limit for t? */
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 658 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 659 other_last = strpos = s = t + 1;
cad2e5aa
JH
660 goto restart;
661 }
662 t++;
663 }
2c2d71f5
JH
664 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
665 PL_colors[0],PL_colors[1]));
666 goto fail_finish;
cad2e5aa 667 }
f5952150
GS
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
670 PL_colors[0],PL_colors[1]));
671 }
cad2e5aa
JH
672 s = t;
673 set_useful:
2c2d71f5 674 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
675 }
676 else {
2c2d71f5 677 PL_bostr = tmp;
f5952150 678 /* The found string does not prohibit matching at strpos,
2c2d71f5 679 - no optimization of calling REx engine can be performed,
f5952150
GS
680 unless it was an MBOL and we are not after MBOL,
681 or a future STCLASS check will fail this. */
2c2d71f5
JH
682 try_at_start:
683 /* Even in this situation we may use MBOL flag if strpos is offset
684 wrt the start of the string. */
05b4157f 685 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 686 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
687 /* May be due to an implicit anchor of m{.*foo} */
688 && !(prog->reganch & ROPT_IMPLICIT))
689 {
cad2e5aa
JH
690 t = strpos;
691 goto find_anchor;
692 }
30944b6d 693 DEBUG_r( if (ml_anch)
f5952150
GS
694 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
695 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 696 );
2c2d71f5 697 success_at_start:
30944b6d 698 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 699 && prog->check_substr /* Could be deleted already */
cad2e5aa 700 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
701 && prog->check_substr == prog->float_substr)
702 {
cad2e5aa 703 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 704 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
705 SvREFCNT_dec(prog->check_substr);
706 prog->check_substr = Nullsv; /* disable */
707 prog->float_substr = Nullsv; /* clear */
5e39e1e5 708 check = Nullsv; /* abort */
cad2e5aa 709 s = strpos;
3cf5c195
IZ
710 /* XXXX This is a remnant of the old implementation. It
711 looks wasteful, since now INTUIT can use many
6eb5f6b9 712 other heuristics. */
cad2e5aa
JH
713 prog->reganch &= ~RE_USE_INTUIT;
714 }
715 else
716 s = strpos;
717 }
718
6eb5f6b9
JH
719 /* Last resort... */
720 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
721 if (prog->regstclass) {
722 /* minlen == 0 is possible if regstclass is \b or \B,
723 and the fixed substr is ''$.
724 Since minlen is already taken into account, s+1 is before strend;
725 accidentally, minlen >= 1 guaranties no false positives at s + 1
726 even for \b or \B. But (minlen? 1 : 0) below assumes that
727 regstclass does not come from lookahead... */
728 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
729 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
730 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
731 ? STR_LEN(prog->regstclass)
732 : 1);
6eb5f6b9 733 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
734 ? s + (prog->minlen? cl_l : 0)
735 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 736 : strend) ;
a1933d95 737 char *startpos = strbeg;
6eb5f6b9
JH
738
739 t = s;
76384e4a
GS
740 if (prog->reganch & ROPT_UTF8) {
741 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
742 PL_bostr = startpos;
743 }
6eb5f6b9
JH
744 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
745 if (!s) {
746#ifdef DEBUGGING
747 char *what;
748#endif
749 if (endpos == strend) {
750 DEBUG_r( PerlIO_printf(Perl_debug_log,
751 "Could not match STCLASS...\n") );
752 goto fail;
753 }
66e933ab
GS
754 DEBUG_r( PerlIO_printf(Perl_debug_log,
755 "This position contradicts STCLASS...\n") );
653099ff
GS
756 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
757 goto fail;
6eb5f6b9
JH
758 /* Contradict one of substrings */
759 if (prog->anchored_substr) {
6eb5f6b9
JH
760 if (prog->anchored_substr == check) {
761 DEBUG_r( what = "anchored" );
762 hop_and_restart:
763 PL_regeol = strend; /* Used in HOP() */
764 s = HOPc(t, 1);
66e933ab
GS
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
769 goto fail;
770 }
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
775 what, (long)(s + start_shift - i_strpos)) );
776 goto restart;
777 }
66e933ab 778 /* Have both, check_string is floating */
6eb5f6b9
JH
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
782 s = check_at;
5e39e1e5
HS
783 if (!check)
784 goto giveup;
6eb5f6b9 785 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 786 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
789 }
60e71179
GS
790 /* Another way we could have checked stclass at the
791 current position only: */
792 if (ml_anch) {
793 s = t = t + 1;
5e39e1e5
HS
794 if (!check)
795 goto giveup;
60e71179 796 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 799 goto try_at_offset;
66e933ab 800 }
60e71179
GS
801 if (!prog->float_substr) /* Could have been deleted */
802 goto fail;
6eb5f6b9
JH
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
808 }
809 DEBUG_r( if (t != s)
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
813 else
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
816 }
5e39e1e5
HS
817 giveup:
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 821 return s;
2c2d71f5
JH
822
823 fail_finish: /* Substring not found */
66e933ab
GS
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 826 fail:
2c2d71f5 827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
828 PL_colors[4],PL_colors[5]));
829 return Nullch;
830}
9661b544 831
6eb5f6b9 832/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
833STATIC char *
834S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 835{
6eb5f6b9
JH
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
837 char *m;
d8093b23
G
838 STRLEN ln;
839 unsigned int c1;
840 unsigned int c2;
6eb5f6b9
JH
841 char *e;
842 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 843
6eb5f6b9
JH
844 /* We know what class it must start with. */
845 switch (OP(c)) {
846 case ANYOFUTF8:
847 while (s < strend) {
848 if (REGINCLASSUTF8(c, (U8*)s)) {
849 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 850 goto got_it;
3cf5c195 851 else
6eb5f6b9 852 tmp = doevery;
a0ed51b3 853 }
6eb5f6b9
JH
854 else
855 tmp = 1;
a0ed51b3
LW
856 s += UTF8SKIP(s);
857 }
6eb5f6b9
JH
858 break;
859 case ANYOF:
a0ed51b3 860 while (s < strend) {
d8093b23 861 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
862 if (tmp && (norun || regtry(prog, s)))
863 goto got_it;
864 else
865 tmp = doevery;
a0ed51b3 866 }
6eb5f6b9
JH
867 else
868 tmp = 1;
a0ed51b3 869 s++;
a0d0e21e 870 }
6eb5f6b9
JH
871 break;
872 case EXACTF:
873 m = STRING(c);
874 ln = STR_LEN(c);
d8093b23 875 c1 = *(U8*)m;
6eb5f6b9
JH
876 c2 = PL_fold[c1];
877 goto do_exactf;
878 case EXACTFL:
879 m = STRING(c);
880 ln = STR_LEN(c);
d8093b23 881 c1 = *(U8*)m;
6eb5f6b9
JH
882 c2 = PL_fold_locale[c1];
883 do_exactf:
884 e = strend - ln;
b3c9acc1 885
6eb5f6b9
JH
886 if (norun && e < s)
887 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
888 /* Here it is NOT UTF! */
889 if (c1 == c2) {
890 while (s <= e) {
d8093b23 891 if ( *(U8*)s == c1
66e933ab
GS
892 && (ln == 1 || !(OP(c) == EXACTF
893 ? ibcmp(s, m, ln)
894 : ibcmp_locale(s, m, ln)))
6eb5f6b9 895 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
896 goto got_it;
897 s++;
898 }
899 } else {
900 while (s <= e) {
d8093b23 901 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
902 && (ln == 1 || !(OP(c) == EXACTF
903 ? ibcmp(s, m, ln)
904 : ibcmp_locale(s, m, ln)))
6eb5f6b9 905 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
906 goto got_it;
907 s++;
908 }
909 }
910 break;
bbce6d69 911 case BOUNDL:
3280af22 912 PL_reg_flags |= RF_tainted;
bbce6d69 913 /* FALL THROUGH */
a0d0e21e 914 case BOUND:
6eb5f6b9 915 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 916 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 917 while (s < strend) {
95bac841 918 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 919 tmp = !tmp;
6eb5f6b9 920 if ((norun || regtry(prog, s)))
a0d0e21e 921 goto got_it;
a687059c 922 }
a0d0e21e
LW
923 s++;
924 }
6eb5f6b9 925 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
926 goto got_it;
927 break;
a0ed51b3
LW
928 case BOUNDLUTF8:
929 PL_reg_flags |= RF_tainted;
930 /* FALL THROUGH */
931 case BOUNDUTF8:
667bb95a
JH
932 if (s == startpos)
933 tmp = '\n';
934 else {
935 U8 *r = reghop((U8*)s, -1);
936
937 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
938 }
939 tmp = ((OP(c) == BOUNDUTF8 ?
940 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 941 while (s < strend) {
76384e4a 942 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
943 swash_fetch(PL_utf8_alnum, (U8*)s) :
944 isALNUM_LC_utf8((U8*)s)))
945 {
a0ed51b3 946 tmp = !tmp;
6eb5f6b9 947 if ((norun || regtry(prog, s)))
a0ed51b3
LW
948 goto got_it;
949 }
950 s += UTF8SKIP(s);
951 }
6eb5f6b9 952 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
953 goto got_it;
954 break;
bbce6d69 955 case NBOUNDL:
3280af22 956 PL_reg_flags |= RF_tainted;
bbce6d69 957 /* FALL THROUGH */
a0d0e21e 958 case NBOUND:
6eb5f6b9 959 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 960 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 961 while (s < strend) {
95bac841 962 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 963 tmp = !tmp;
6eb5f6b9 964 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
965 goto got_it;
966 s++;
967 }
6eb5f6b9 968 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
969 goto got_it;
970 break;
a0ed51b3
LW
971 case NBOUNDLUTF8:
972 PL_reg_flags |= RF_tainted;
973 /* FALL THROUGH */
974 case NBOUNDUTF8:
667bb95a
JH
975 if (s == startpos)
976 tmp = '\n';
977 else {
978 U8 *r = reghop((U8*)s, -1);
979
980 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
981 }
982 tmp = ((OP(c) == NBOUNDUTF8 ?
983 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 984 while (s < strend) {
76384e4a 985 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
986 swash_fetch(PL_utf8_alnum, (U8*)s) :
987 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 988 tmp = !tmp;
6eb5f6b9 989 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
990 goto got_it;
991 s += UTF8SKIP(s);
992 }
6eb5f6b9 993 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
994 goto got_it;
995 break;
a0d0e21e
LW
996 case ALNUM:
997 while (s < strend) {
bbce6d69 998 if (isALNUM(*s)) {
6eb5f6b9 999 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1000 goto got_it;
1001 else
1002 tmp = doevery;
1003 }
1004 else
1005 tmp = 1;
1006 s++;
1007 }
1008 break;
a0ed51b3
LW
1009 case ALNUMUTF8:
1010 while (s < strend) {
dfe13c55 1011 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1013 goto got_it;
1014 else
1015 tmp = doevery;
1016 }
1017 else
1018 tmp = 1;
1019 s += UTF8SKIP(s);
1020 }
1021 break;
bbce6d69 1022 case ALNUML:
3280af22 1023 PL_reg_flags |= RF_tainted;
bbce6d69 1024 while (s < strend) {
1025 if (isALNUM_LC(*s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1027 goto got_it;
a687059c 1028 else
a0d0e21e
LW
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s++;
1034 }
1035 break;
a0ed51b3
LW
1036 case ALNUMLUTF8:
1037 PL_reg_flags |= RF_tainted;
1038 while (s < strend) {
dfe13c55 1039 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1040 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1041 goto got_it;
1042 else
1043 tmp = doevery;
1044 }
1045 else
1046 tmp = 1;
1047 s += UTF8SKIP(s);
1048 }
1049 break;
a0d0e21e
LW
1050 case NALNUM:
1051 while (s < strend) {
bbce6d69 1052 if (!isALNUM(*s)) {
6eb5f6b9 1053 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1054 goto got_it;
1055 else
1056 tmp = doevery;
1057 }
1058 else
1059 tmp = 1;
1060 s++;
1061 }
1062 break;
a0ed51b3
LW
1063 case NALNUMUTF8:
1064 while (s < strend) {
dfe13c55 1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s += UTF8SKIP(s);
1074 }
1075 break;
bbce6d69 1076 case NALNUML:
3280af22 1077 PL_reg_flags |= RF_tainted;
bbce6d69 1078 while (s < strend) {
1079 if (!isALNUM_LC(*s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1081 goto got_it;
a687059c 1082 else
a0d0e21e 1083 tmp = doevery;
a687059c 1084 }
a0d0e21e
LW
1085 else
1086 tmp = 1;
1087 s++;
1088 }
1089 break;
a0ed51b3
LW
1090 case NALNUMLUTF8:
1091 PL_reg_flags |= RF_tainted;
1092 while (s < strend) {
dfe13c55 1093 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1094 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
1099 else
1100 tmp = 1;
1101 s += UTF8SKIP(s);
1102 }
1103 break;
a0d0e21e
LW
1104 case SPACE:
1105 while (s < strend) {
1106 if (isSPACE(*s)) {
6eb5f6b9 1107 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1108 goto got_it;
1109 else
1110 tmp = doevery;
2304df62 1111 }
a0d0e21e
LW
1112 else
1113 tmp = 1;
1114 s++;
1115 }
1116 break;
a0ed51b3
LW
1117 case SPACEUTF8:
1118 while (s < strend) {
dfe13c55 1119 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s += UTF8SKIP(s);
1128 }
1129 break;
bbce6d69 1130 case SPACEL:
3280af22 1131 PL_reg_flags |= RF_tainted;
bbce6d69 1132 while (s < strend) {
1133 if (isSPACE_LC(*s)) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s++;
1142 }
1143 break;
a0ed51b3
LW
1144 case SPACELUTF8:
1145 PL_reg_flags |= RF_tainted;
1146 while (s < strend) {
dfe13c55 1147 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1148 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
1153 else
1154 tmp = 1;
1155 s += UTF8SKIP(s);
1156 }
1157 break;
a0d0e21e
LW
1158 case NSPACE:
1159 while (s < strend) {
1160 if (!isSPACE(*s)) {
6eb5f6b9 1161 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1162 goto got_it;
1163 else
1164 tmp = doevery;
a687059c 1165 }
a0d0e21e
LW
1166 else
1167 tmp = 1;
1168 s++;
1169 }
1170 break;
a0ed51b3
LW
1171 case NSPACEUTF8:
1172 while (s < strend) {
dfe13c55 1173 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s += UTF8SKIP(s);
1182 }
1183 break;
bbce6d69 1184 case NSPACEL:
3280af22 1185 PL_reg_flags |= RF_tainted;
bbce6d69 1186 while (s < strend) {
1187 if (!isSPACE_LC(*s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s++;
1196 }
1197 break;
a0ed51b3
LW
1198 case NSPACELUTF8:
1199 PL_reg_flags |= RF_tainted;
1200 while (s < strend) {
dfe13c55 1201 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1202 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1203 goto got_it;
1204 else
1205 tmp = doevery;
1206 }
1207 else
1208 tmp = 1;
1209 s += UTF8SKIP(s);
1210 }
1211 break;
a0d0e21e
LW
1212 case DIGIT:
1213 while (s < strend) {
1214 if (isDIGIT(*s)) {
6eb5f6b9 1215 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1216 goto got_it;
1217 else
1218 tmp = doevery;
2b69d0c2 1219 }
a0d0e21e
LW
1220 else
1221 tmp = 1;
1222 s++;
1223 }
1224 break;
a0ed51b3
LW
1225 case DIGITUTF8:
1226 while (s < strend) {
dfe13c55 1227 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s += UTF8SKIP(s);
1236 }
1237 break;
b8c5462f
JH
1238 case DIGITL:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (isDIGIT_LC(*s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s++;
1250 }
1251 break;
1252 case DIGITLUTF8:
1253 PL_reg_flags |= RF_tainted;
1254 while (s < strend) {
1255 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1256 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1257 goto got_it;
1258 else
1259 tmp = doevery;
1260 }
1261 else
1262 tmp = 1;
1263 s += UTF8SKIP(s);
1264 }
1265 break;
a0d0e21e
LW
1266 case NDIGIT:
1267 while (s < strend) {
1268 if (!isDIGIT(*s)) {
6eb5f6b9 1269 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1270 goto got_it;
1271 else
1272 tmp = doevery;
a687059c 1273 }
a0d0e21e
LW
1274 else
1275 tmp = 1;
1276 s++;
1277 }
1278 break;
a0ed51b3
LW
1279 case NDIGITUTF8:
1280 while (s < strend) {
dfe13c55 1281 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1282 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
1287 else
1288 tmp = 1;
1289 s += UTF8SKIP(s);
1290 }
1291 break;
b8c5462f
JH
1292 case NDIGITL:
1293 PL_reg_flags |= RF_tainted;
1294 while (s < strend) {
1295 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1296 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1297 goto got_it;
1298 else
1299 tmp = doevery;
1300 }
1301 else
1302 tmp = 1;
1303 s++;
a0ed51b3 1304 }
b8c5462f
JH
1305 break;
1306 case NDIGITLUTF8:
1307 PL_reg_flags |= RF_tainted;
1308 while (s < strend) {
1309 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1310 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1311 goto got_it;
cf93c79d 1312 else
b8c5462f
JH
1313 tmp = doevery;
1314 }
1315 else
1316 tmp = 1;
1317 s += UTF8SKIP(s);
1318 }
1319 break;
b3c9acc1 1320 default:
3c3eec57
GS
1321 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1322 break;
d6a28714 1323 }
6eb5f6b9
JH
1324 return 0;
1325 got_it:
1326 return s;
1327}
1328
1329/*
1330 - regexec_flags - match a regexp against a string
1331 */
1332I32
1333Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1334 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1335/* strend: pointer to null at end of string */
1336/* strbeg: real beginning of string */
1337/* minend: end of match must be >=minend after stringarg. */
1338/* data: May be used for some additional optimizations. */
1339/* nosave: For optimizations. */
1340{
1341 dTHR;
1342 register char *s;
1343 register regnode *c;
1344 register char *startpos = stringarg;
6eb5f6b9
JH
1345 I32 minlen; /* must match at least this many chars */
1346 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1347 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1348 constant substr. */ /* CC */
1349 I32 end_shift = 0; /* Same for the end. */ /* CC */
1350 I32 scream_pos = -1; /* Internal iterator of scream. */
1351 char *scream_olds;
1352 SV* oreplsv = GvSV(PL_replgv);
1353
1354 PL_regcc = 0;
1355
1356 cache_re(prog);
1357#ifdef DEBUGGING
1358 PL_regnarrate = PL_debug & 512;
1359#endif
1360
1361 /* Be paranoid... */
1362 if (prog == NULL || startpos == NULL) {
1363 Perl_croak(aTHX_ "NULL regexp parameter");
1364 return 0;
1365 }
1366
1367 minlen = prog->minlen;
1368 if (strend - startpos < minlen) goto phooey;
1369
1370 if (startpos == strbeg) /* is ^ valid at stringarg? */
1371 PL_regprev = '\n';
1372 else {
1373 PL_regprev = (U32)stringarg[-1];
1374 if (!PL_multiline && PL_regprev == '\n')
1375 PL_regprev = '\0'; /* force ^ to NOT match */
1376 }
1377
1378 /* Check validity of program. */
1379 if (UCHARAT(prog->program) != REG_MAGIC) {
1380 Perl_croak(aTHX_ "corrupted regexp program");
1381 }
1382
1383 PL_reg_flags = 0;
1384 PL_reg_eval_set = 0;
1385 PL_reg_maxiter = 0;
1386
1387 if (prog->reganch & ROPT_UTF8)
1388 PL_reg_flags |= RF_utf8;
1389
1390 /* Mark beginning of line for ^ and lookbehind. */
1391 PL_regbol = startpos;
1392 PL_bostr = strbeg;
1393 PL_reg_sv = sv;
1394
1395 /* Mark end of line for $ (and such) */
1396 PL_regeol = strend;
1397
1398 /* see how far we have to get to not match where we matched before */
1399 PL_regtill = startpos+minend;
1400
1401 /* We start without call_cc context. */
1402 PL_reg_call_cc = 0;
1403
1404 /* If there is a "must appear" string, look for it. */
1405 s = startpos;
1406
1407 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1408 MAGIC *mg;
1409
1410 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1411 PL_reg_ganch = startpos;
1412 else if (sv && SvTYPE(sv) >= SVt_PVMG
1413 && SvMAGIC(sv)
1414 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1415 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1416 if (prog->reganch & ROPT_ANCH_GPOS) {
1417 if (s > PL_reg_ganch)
1418 goto phooey;
1419 s = PL_reg_ganch;
1420 }
1421 }
1422 else /* pos() not defined */
1423 PL_reg_ganch = strbeg;
1424 }
1425
1426 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1427 re_scream_pos_data d;
1428
1429 d.scream_olds = &scream_olds;
1430 d.scream_pos = &scream_pos;
1431 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1432 if (!s)
1433 goto phooey; /* not present */
1434 }
1435
1436 DEBUG_r( if (!PL_colorset) reginitcolors() );
1437 DEBUG_r(PerlIO_printf(Perl_debug_log,
1438 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1439 PL_colors[4],PL_colors[5],PL_colors[0],
1440 prog->precomp,
1441 PL_colors[1],
1442 (strlen(prog->precomp) > 60 ? "..." : ""),
1443 PL_colors[0],
1444 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1445 startpos, PL_colors[1],
1446 (strend - startpos > 60 ? "..." : ""))
1447 );
1448
1449 /* Simplest case: anchored match need be tried only once. */
1450 /* [unless only anchor is BOL and multiline is set] */
1451 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1452 if (s == startpos && regtry(prog, startpos))
1453 goto got_it;
1454 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1455 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1456 {
1457 char *end;
1458
1459 if (minlen)
1460 dontbother = minlen - 1;
1461 end = HOPc(strend, -dontbother) - 1;
1462 /* for multiline we only have to try after newlines */
1463 if (prog->check_substr) {
1464 if (s == startpos)
1465 goto after_try;
1466 while (1) {
1467 if (regtry(prog, s))
1468 goto got_it;
1469 after_try:
1470 if (s >= end)
1471 goto phooey;
1472 if (prog->reganch & RE_USE_INTUIT) {
1473 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1474 if (!s)
1475 goto phooey;
1476 }
1477 else
1478 s++;
1479 }
1480 } else {
1481 if (s > startpos)
1482 s--;
1483 while (s < end) {
1484 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1485 if (regtry(prog, s))
1486 goto got_it;
1487 }
1488 }
1489 }
1490 }
1491 goto phooey;
1492 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1493 if (regtry(prog, PL_reg_ganch))
1494 goto got_it;
1495 goto phooey;
1496 }
1497
1498 /* Messy cases: unanchored match. */
1499 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1500 /* we have /x+whatever/ */
1501 /* it must be a one character string (XXXX Except UTF?) */
1502 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1503#ifdef DEBUGGING
1504 int did_match = 0;
1505#endif
1506
6eb5f6b9
JH
1507 if (UTF) {
1508 while (s < strend) {
1509 if (*s == ch) {
bf93d4cc 1510 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1511 if (regtry(prog, s)) goto got_it;
1512 s += UTF8SKIP(s);
1513 while (s < strend && *s == ch)
1514 s += UTF8SKIP(s);
1515 }
1516 s += UTF8SKIP(s);
1517 }
1518 }
1519 else {
1520 while (s < strend) {
1521 if (*s == ch) {
bf93d4cc 1522 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1523 if (regtry(prog, s)) goto got_it;
1524 s++;
1525 while (s < strend && *s == ch)
1526 s++;
1527 }
1528 s++;
1529 }
1530 }
bf93d4cc
GS
1531 DEBUG_r(did_match ||
1532 PerlIO_printf(Perl_debug_log,
1533 "Did not find anchored character...\n"));
6eb5f6b9
JH
1534 }
1535 /*SUPPRESS 560*/
1536 else if (prog->anchored_substr != Nullsv
1537 || (prog->float_substr != Nullsv
1538 && prog->float_max_offset < strend - s)) {
1539 SV *must = prog->anchored_substr
1540 ? prog->anchored_substr : prog->float_substr;
1541 I32 back_max =
1542 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1543 I32 back_min =
1544 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1545 char *last = HOPc(strend, /* Cannot start after this */
1546 -(I32)(CHR_SVLEN(must)
1547 - (SvTAIL(must) != 0) + back_min));
1548 char *last1; /* Last position checked before */
bf93d4cc
GS
1549#ifdef DEBUGGING
1550 int did_match = 0;
1551#endif
6eb5f6b9
JH
1552
1553 if (s > PL_bostr)
1554 last1 = HOPc(s, -1);
1555 else
1556 last1 = s - 1; /* bogus */
1557
1558 /* XXXX check_substr already used to find `s', can optimize if
1559 check_substr==must. */
1560 scream_pos = -1;
1561 dontbother = end_shift;
1562 strend = HOPc(strend, -dontbother);
1563 while ( (s <= last) &&
1564 ((flags & REXEC_SCREAM)
1565 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1566 end_shift, &scream_pos, 0))
1567 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1568 (unsigned char*)strend, must,
1569 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1570 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1571 if (HOPc(s, -back_max) > last1) {
1572 last1 = HOPc(s, -back_min);
1573 s = HOPc(s, -back_max);
1574 }
1575 else {
1576 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1577
1578 last1 = HOPc(s, -back_min);
1579 s = t;
1580 }
1581 if (UTF) {
1582 while (s <= last1) {
1583 if (regtry(prog, s))
1584 goto got_it;
1585 s += UTF8SKIP(s);
1586 }
1587 }
1588 else {
1589 while (s <= last1) {
1590 if (regtry(prog, s))
1591 goto got_it;
1592 s++;
1593 }
1594 }
1595 }
bf93d4cc
GS
1596 DEBUG_r(did_match ||
1597 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1598 ((must == prog->anchored_substr)
1599 ? "anchored" : "floating"),
1600 PL_colors[0],
1601 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1602 SvPVX(must),
1603 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1604 goto phooey;
1605 }
155aba94 1606 else if ((c = prog->regstclass)) {
66e933ab
GS
1607 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1608 /* don't bother with what can't match */
6eb5f6b9
JH
1609 strend = HOPc(strend, -(minlen - 1));
1610 if (find_byclass(prog, c, s, strend, startpos, 0))
1611 goto got_it;
bf93d4cc 1612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1613 }
1614 else {
1615 dontbother = 0;
1616 if (prog->float_substr != Nullsv) { /* Trim the end. */
1617 char *last;
d6a28714
JH
1618
1619 if (flags & REXEC_SCREAM) {
1620 last = screaminstr(sv, prog->float_substr, s - strbeg,
1621 end_shift, &scream_pos, 1); /* last one */
1622 if (!last)
1623 last = scream_olds; /* Only one occurence. */
b8c5462f 1624 }
d6a28714
JH
1625 else {
1626 STRLEN len;
1627 char *little = SvPV(prog->float_substr, len);
1628
1629 if (SvTAIL(prog->float_substr)) {
1630 if (memEQ(strend - len + 1, little, len - 1))
1631 last = strend - len + 1;
1632 else if (!PL_multiline)
1633 last = memEQ(strend - len, little, len)
1634 ? strend - len : Nullch;
b8c5462f 1635 else
d6a28714
JH
1636 goto find_last;
1637 } else {
1638 find_last:
1639 if (len)
1640 last = rninstr(s, strend, little, little + len);
b8c5462f 1641 else
d6a28714 1642 last = strend; /* matching `$' */
b8c5462f 1643 }
b8c5462f 1644 }
bf93d4cc
GS
1645 if (last == NULL) {
1646 DEBUG_r(PerlIO_printf(Perl_debug_log,
1647 "%sCan't trim the tail, match fails (should not happen)%s\n",
1648 PL_colors[4],PL_colors[5]));
1649 goto phooey; /* Should not happen! */
1650 }
d6a28714
JH
1651 dontbother = strend - last + prog->float_min_offset;
1652 }
1653 if (minlen && (dontbother < minlen))
1654 dontbother = minlen - 1;
1655 strend -= dontbother; /* this one's always in bytes! */
1656 /* We don't know much -- general case. */
1657 if (UTF) {
1658 for (;;) {
1659 if (regtry(prog, s))
1660 goto got_it;
1661 if (s >= strend)
1662 break;
b8c5462f 1663 s += UTF8SKIP(s);
d6a28714
JH
1664 };
1665 }
1666 else {
1667 do {
1668 if (regtry(prog, s))
1669 goto got_it;
1670 } while (s++ < strend);
1671 }
1672 }
1673
1674 /* Failure. */
1675 goto phooey;
1676
1677got_it:
1678 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1679
1680 if (PL_reg_eval_set) {
1681 /* Preserve the current value of $^R */
1682 if (oreplsv != GvSV(PL_replgv))
1683 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1684 restored, the value remains
1685 the same. */
1686 restore_pos(aTHXo_ 0);
1687 }
1688
1689 /* make sure $`, $&, $', and $digit will work later */
1690 if ( !(flags & REXEC_NOT_FIRST) ) {
1691 if (RX_MATCH_COPIED(prog)) {
1692 Safefree(prog->subbeg);
1693 RX_MATCH_COPIED_off(prog);
1694 }
1695 if (flags & REXEC_COPY_STR) {
1696 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1697
1698 s = savepvn(strbeg, i);
1699 prog->subbeg = s;
1700 prog->sublen = i;
1701 RX_MATCH_COPIED_on(prog);
1702 }
1703 else {
1704 prog->subbeg = strbeg;
1705 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1706 }
1707 }
1708
1709 return 1;
1710
1711phooey:
bf93d4cc
GS
1712 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1713 PL_colors[4],PL_colors[5]));
d6a28714
JH
1714 if (PL_reg_eval_set)
1715 restore_pos(aTHXo_ 0);
1716 return 0;
1717}
1718
1719/*
1720 - regtry - try match at specific point
1721 */
1722STATIC I32 /* 0 failure, 1 success */
1723S_regtry(pTHX_ regexp *prog, char *startpos)
1724{
1725 dTHR;
1726 register I32 i;
1727 register I32 *sp;
1728 register I32 *ep;
1729 CHECKPOINT lastcp;
1730
02db2b7b
IZ
1731#ifdef DEBUGGING
1732 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1733#endif
d6a28714
JH
1734 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1735 MAGIC *mg;
1736
1737 PL_reg_eval_set = RS_init;
1738 DEBUG_r(DEBUG_s(
b900a521
JH
1739 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1740 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1741 ));
e8347627 1742 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1743 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1744 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1745 SAVETMPS;
1746 /* Apparently this is not needed, judging by wantarray. */
e8347627 1747 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1748 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1749
1750 if (PL_reg_sv) {
1751 /* Make $_ available to executed code. */
1752 if (PL_reg_sv != DEFSV) {
1753 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1754 SAVESPTR(DEFSV);
1755 DEFSV = PL_reg_sv;
b8c5462f 1756 }
d6a28714
JH
1757
1758 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1759 && (mg = mg_find(PL_reg_sv, 'g')))) {
1760 /* prepare for quick setting of pos */
1761 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1762 mg = mg_find(PL_reg_sv, 'g');
1763 mg->mg_len = -1;
b8c5462f 1764 }
d6a28714
JH
1765 PL_reg_magic = mg;
1766 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1767 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1768 }
1769 if (!PL_reg_curpm)
0f79a09d 1770 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1771 PL_reg_curpm->op_pmregexp = prog;
1772 PL_reg_oldcurpm = PL_curpm;
1773 PL_curpm = PL_reg_curpm;
1774 if (RX_MATCH_COPIED(prog)) {
1775 /* Here is a serious problem: we cannot rewrite subbeg,
1776 since it may be needed if this match fails. Thus
1777 $` inside (?{}) could fail... */
1778 PL_reg_oldsaved = prog->subbeg;
1779 PL_reg_oldsavedlen = prog->sublen;
1780 RX_MATCH_COPIED_off(prog);
1781 }
1782 else
1783 PL_reg_oldsaved = Nullch;
1784 prog->subbeg = PL_bostr;
1785 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1786 }
1787 prog->startp[0] = startpos - PL_bostr;
1788 PL_reginput = startpos;
1789 PL_regstartp = prog->startp;
1790 PL_regendp = prog->endp;
1791 PL_reglastparen = &prog->lastparen;
1792 prog->lastparen = 0;
1793 PL_regsize = 0;
1794 DEBUG_r(PL_reg_starttry = startpos);
1795 if (PL_reg_start_tmpl <= prog->nparens) {
1796 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1797 if(PL_reg_start_tmp)
1798 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1799 else
1800 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1801 }
1802
1803 /* XXXX What this code is doing here?!!! There should be no need
1804 to do this again and again, PL_reglastparen should take care of
3dd2943c 1805 this! --ilya*/
dafc8851
JH
1806
1807 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1808 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1809 * PL_reglastparen), is not needed at all by the test suite
1810 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1811 * enough, for building DynaLoader, or otherwise this
1812 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1813 * will happen. Meanwhile, this code *is* needed for the
1814 * above-mentioned test suite tests to succeed. The common theme
1815 * on those tests seems to be returning null fields from matches.
1816 * --jhi */
dafc8851 1817#if 1
d6a28714
JH
1818 sp = prog->startp;
1819 ep = prog->endp;
1820 if (prog->nparens) {
09e8ae3b 1821 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1822 *++sp = -1;
1823 *++ep = -1;
1824 }
1825 }
dafc8851 1826#endif
02db2b7b 1827 REGCP_SET(lastcp);
d6a28714
JH
1828 if (regmatch(prog->program + 1)) {
1829 prog->endp[0] = PL_reginput - PL_bostr;
1830 return 1;
1831 }
02db2b7b 1832 REGCP_UNWIND(lastcp);
d6a28714
JH
1833 return 0;
1834}
1835
02db2b7b
IZ
1836#define RE_UNWIND_BRANCH 1
1837#define RE_UNWIND_BRANCHJ 2
1838
1839union re_unwind_t;
1840
1841typedef struct { /* XX: makes sense to enlarge it... */
1842 I32 type;
1843 I32 prev;
1844 CHECKPOINT lastcp;
1845} re_unwind_generic_t;
1846
1847typedef struct {
1848 I32 type;
1849 I32 prev;
1850 CHECKPOINT lastcp;
1851 I32 lastparen;
1852 regnode *next;
1853 char *locinput;
1854 I32 nextchr;
1855#ifdef DEBUGGING
1856 int regindent;
1857#endif
1858} re_unwind_branch_t;
1859
1860typedef union re_unwind_t {
1861 I32 type;
1862 re_unwind_generic_t generic;
1863 re_unwind_branch_t branch;
1864} re_unwind_t;
1865
d6a28714
JH
1866/*
1867 - regmatch - main matching routine
1868 *
1869 * Conceptually the strategy is simple: check to see whether the current
1870 * node matches, call self recursively to see whether the rest matches,
1871 * and then act accordingly. In practice we make some effort to avoid
1872 * recursion, in particular by going through "ordinary" nodes (that don't
1873 * need to know whether the rest of the match failed) by a loop instead of
1874 * by recursion.
1875 */
1876/* [lwall] I've hoisted the register declarations to the outer block in order to
1877 * maybe save a little bit of pushing and popping on the stack. It also takes
1878 * advantage of machines that use a register save mask on subroutine entry.
1879 */
1880STATIC I32 /* 0 failure, 1 success */
1881S_regmatch(pTHX_ regnode *prog)
1882{
1883 dTHR;
1884 register regnode *scan; /* Current node. */
1885 regnode *next; /* Next node. */
1886 regnode *inner; /* Next node in internal branch. */
1887 register I32 nextchr; /* renamed nextchr - nextchar colides with
1888 function of same name */
1889 register I32 n; /* no or next */
1890 register I32 ln; /* len or last */
1891 register char *s; /* operand or save */
1892 register char *locinput = PL_reginput;
1893 register I32 c1, c2, paren; /* case fold search, parenth */
1894 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1895 I32 unwind = 0;
1896 I32 firstcp = PL_savestack_ix;
1897
d6a28714
JH
1898#ifdef DEBUGGING
1899 PL_regindent++;
1900#endif
1901
1902 /* Note that nextchr is a byte even in UTF */
1903 nextchr = UCHARAT(locinput);
1904 scan = prog;
1905 while (scan != NULL) {
1906#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1907#if 1
d6a28714
JH
1908# define sayYES goto yes
1909# define sayNO goto no
7821416a
IZ
1910# define sayYES_FINAL goto yes_final
1911# define sayYES_LOUD goto yes_loud
1912# define sayNO_FINAL goto no_final
1913# define sayNO_SILENT goto do_no
d6a28714
JH
1914# define saySAME(x) if (x) goto yes; else goto no
1915# define REPORT_CODE_OFF 24
1916#else
1917# define sayYES return 1
1918# define sayNO return 0
7821416a
IZ
1919# define sayYES_FINAL return 1
1920# define sayYES_LOUD return 1
1921# define sayNO_FINAL return 0
1922# define sayNO_SILENT return 0
d6a28714
JH
1923# define saySAME(x) return x
1924#endif
1925 DEBUG_r( {
1926 SV *prop = sv_newmortal();
1927 int docolor = *PL_colors[0];
1928 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1929 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1930 /* The part of the string before starttry has one color
1931 (pref0_len chars), between starttry and current
1932 position another one (pref_len - pref0_len chars),
1933 after the current position the third one.
1934 We assume that pref0_len <= pref_len, otherwise we
1935 decrease pref0_len. */
1936 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1937 ? (5 + taill) - l : locinput - PL_bostr);
1938 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1939
1940 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1941 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1942 ? (5 + taill) - pref_len : PL_regeol - locinput);
1943 if (pref0_len < 0)
1944 pref0_len = 0;
1945 if (pref0_len > pref_len)
1946 pref0_len = pref_len;
1947 regprop(prop, scan);
1948 PerlIO_printf(Perl_debug_log,
b900a521
JH
1949 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1950 (IV)(locinput - PL_bostr),
d6a28714
JH
1951 PL_colors[4], pref0_len,
1952 locinput - pref_len, PL_colors[5],
1953 PL_colors[2], pref_len - pref0_len,
1954 locinput - pref_len + pref0_len, PL_colors[3],
1955 (docolor ? "" : "> <"),
1956 PL_colors[0], l, locinput, PL_colors[1],
1957 15 - l - pref_len + 1,
1958 "",
b900a521 1959 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1960 SvPVX(prop));
1961 } );
1962
1963 next = scan + NEXT_OFF(scan);
1964 if (next == scan)
1965 next = NULL;
1966
1967 switch (OP(scan)) {
1968 case BOL:
1969 if (locinput == PL_bostr
1970 ? PL_regprev == '\n'
1971 : (PL_multiline &&
1972 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1973 {
1974 /* regtill = regbol; */
b8c5462f
JH
1975 break;
1976 }
d6a28714
JH
1977 sayNO;
1978 case MBOL:
1979 if (locinput == PL_bostr
1980 ? PL_regprev == '\n'
1981 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1982 {
b8c5462f
JH
1983 break;
1984 }
d6a28714
JH
1985 sayNO;
1986 case SBOL:
c2a73568 1987 if (locinput == PL_bostr)
b8c5462f 1988 break;
d6a28714
JH
1989 sayNO;
1990 case GPOS:
1991 if (locinput == PL_reg_ganch)
1992 break;
1993 sayNO;
1994 case EOL:
1995 if (PL_multiline)
1996 goto meol;
1997 else
1998 goto seol;
1999 case MEOL:
2000 meol:
2001 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2002 sayNO;
b8c5462f 2003 break;
d6a28714
JH
2004 case SEOL:
2005 seol:
2006 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2007 sayNO;
d6a28714 2008 if (PL_regeol - locinput > 1)
b8c5462f 2009 sayNO;
b8c5462f 2010 break;
d6a28714
JH
2011 case EOS:
2012 if (PL_regeol != locinput)
b8c5462f 2013 sayNO;
d6a28714
JH
2014 break;
2015 case SANYUTF8:
b8c5462f 2016 if (nextchr & 0x80) {
b8c5462f 2017 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2018 if (locinput > PL_regeol)
2019 sayNO;
b8c5462f
JH
2020 nextchr = UCHARAT(locinput);
2021 break;
2022 }
d6a28714 2023 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2024 sayNO;
b8c5462f 2025 nextchr = UCHARAT(++locinput);
a0d0e21e 2026 break;
d6a28714
JH
2027 case SANY:
2028 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2029 sayNO;
2030 nextchr = UCHARAT(++locinput);
b85d18e9 2031 break;
d6a28714 2032 case ANYUTF8:
a0ed51b3 2033 if (nextchr & 0x80) {
b8c5462f 2034 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2035 if (locinput > PL_regeol)
2036 sayNO;
a0ed51b3
LW
2037 nextchr = UCHARAT(locinput);
2038 break;
2039 }
155aba94 2040 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2041 sayNO;
2042 nextchr = UCHARAT(++locinput);
2043 break;
d6a28714 2044 case REG_ANY:
155aba94 2045 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2046 sayNO;
76e3520e 2047 nextchr = UCHARAT(++locinput);
a0d0e21e 2048 break;
d6a28714 2049 case EXACT:
cd439c50
IZ
2050 s = STRING(scan);
2051 ln = STR_LEN(scan);
d6a28714
JH
2052 /* Inline the first character, for speed. */
2053 if (UCHARAT(s) != nextchr)
2054 sayNO;
2055 if (PL_regeol - locinput < ln)
2056 sayNO;
2057 if (ln > 1 && memNE(s, locinput, ln))
2058 sayNO;
2059 locinput += ln;
2060 nextchr = UCHARAT(locinput);
2061 break;
2062 case EXACTFL:
b8c5462f
JH
2063 PL_reg_flags |= RF_tainted;
2064 /* FALL THROUGH */
d6a28714 2065 case EXACTF:
cd439c50
IZ
2066 s = STRING(scan);
2067 ln = STR_LEN(scan);
d6a28714
JH
2068
2069 if (UTF) {
2070 char *l = locinput;
2071 char *e = s + ln;
2072 c1 = OP(scan) == EXACTF;
2073 while (s < e) {
2074 if (l >= PL_regeol)
2075 sayNO;
667bb95a
JH
2076 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2077 (c1 ?
2078 toLOWER_utf8((U8*)l) :
2079 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2080 {
2081 sayNO;
2082 }
2083 s += UTF8SKIP(s);
2084 l += UTF8SKIP(l);
b8c5462f 2085 }
d6a28714 2086 locinput = l;
a0ed51b3
LW
2087 nextchr = UCHARAT(locinput);
2088 break;
2089 }
d6a28714
JH
2090
2091 /* Inline the first character, for speed. */
2092 if (UCHARAT(s) != nextchr &&
2093 UCHARAT(s) != ((OP(scan) == EXACTF)
2094 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2095 sayNO;
d6a28714 2096 if (PL_regeol - locinput < ln)
b8c5462f 2097 sayNO;
d6a28714
JH
2098 if (ln > 1 && (OP(scan) == EXACTF
2099 ? ibcmp(s, locinput, ln)
2100 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2101 sayNO;
d6a28714
JH
2102 locinput += ln;
2103 nextchr = UCHARAT(locinput);
a0d0e21e 2104 break;
d6a28714 2105 case ANYOFUTF8:
d6a28714 2106 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2107 sayNO;
d6a28714
JH
2108 if (locinput >= PL_regeol)
2109 sayNO;
2110 locinput += PL_utf8skip[nextchr];
2111 nextchr = UCHARAT(locinput);
2112 break;
2113 case ANYOF:
d6a28714 2114 if (nextchr < 0)
b8c5462f 2115 nextchr = UCHARAT(locinput);
936ed897 2116 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2117 sayNO;
2118 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2119 sayNO;
b8c5462f
JH
2120 nextchr = UCHARAT(++locinput);
2121 break;
d6a28714 2122 case ALNUML:
b8c5462f
JH
2123 PL_reg_flags |= RF_tainted;
2124 /* FALL THROUGH */
d6a28714 2125 case ALNUM:
b8c5462f 2126 if (!nextchr)
4633a7c4 2127 sayNO;
d6a28714
JH
2128 if (!(OP(scan) == ALNUM
2129 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2130 sayNO;
2131 nextchr = UCHARAT(++locinput);
bbce6d69 2132 break;
d6a28714 2133 case ALNUMLUTF8:
3280af22 2134 PL_reg_flags |= RF_tainted;
bbce6d69 2135 /* FALL THROUGH */
d6a28714 2136 case ALNUMUTF8:
b8c5462f
JH
2137 if (!nextchr)
2138 sayNO;
2139 if (nextchr & 0x80) {
d6a28714
JH
2140 if (!(OP(scan) == ALNUMUTF8
2141 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2142 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2143 {
2144 sayNO;
a0ed51b3 2145 }
b8c5462f 2146 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2147 nextchr = UCHARAT(locinput);
2148 break;
2149 }
d6a28714
JH
2150 if (!(OP(scan) == ALNUMUTF8
2151 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2152 sayNO;
b8c5462f 2153 nextchr = UCHARAT(++locinput);
a0d0e21e 2154 break;
d6a28714 2155 case NALNUML:
b8c5462f
JH
2156 PL_reg_flags |= RF_tainted;
2157 /* FALL THROUGH */
d6a28714
JH
2158 case NALNUM:
2159 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2160 sayNO;
d6a28714
JH
2161 if (OP(scan) == NALNUM
2162 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2163 sayNO;
b8c5462f 2164 nextchr = UCHARAT(++locinput);
a0ed51b3 2165 break;
d6a28714 2166 case NALNUMLUTF8:
b8c5462f
JH
2167 PL_reg_flags |= RF_tainted;
2168 /* FALL THROUGH */
d6a28714 2169 case NALNUMUTF8:
3280af22 2170 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2171 sayNO;
b8c5462f 2172 if (nextchr & 0x80) {
d6a28714
JH
2173 if (OP(scan) == NALNUMUTF8
2174 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2175 : isALNUM_LC_utf8((U8*)locinput))
2176 {
b8c5462f 2177 sayNO;
d6a28714 2178 }
b8c5462f
JH
2179 locinput += PL_utf8skip[nextchr];
2180 nextchr = UCHARAT(locinput);
2181 break;
2182 }
d6a28714
JH
2183 if (OP(scan) == NALNUMUTF8
2184 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2185 sayNO;
76e3520e 2186 nextchr = UCHARAT(++locinput);
a0d0e21e 2187 break;
d6a28714
JH
2188 case BOUNDL:
2189 case NBOUNDL:
3280af22 2190 PL_reg_flags |= RF_tainted;
bbce6d69 2191 /* FALL THROUGH */
d6a28714
JH
2192 case BOUND:
2193 case NBOUND:
2194 /* was last char in word? */
2195 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2196 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2197 ln = isALNUM(ln);
2198 n = isALNUM(nextchr);
2199 }
2200 else {
2201 ln = isALNUM_LC(ln);
2202 n = isALNUM_LC(nextchr);
2203 }
2204 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2205 sayNO;
a0d0e21e 2206 break;
d6a28714
JH
2207 case BOUNDLUTF8:
2208 case NBOUNDLUTF8:
a0ed51b3
LW
2209 PL_reg_flags |= RF_tainted;
2210 /* FALL THROUGH */
d6a28714
JH
2211 case BOUNDUTF8:
2212 case NBOUNDUTF8:
2213 /* was last char in word? */
667bb95a
JH
2214 if (locinput == PL_regbol)
2215 ln = PL_regprev;
2216 else {
2217 U8 *r = reghop((U8*)locinput, -1);
2218
2219 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2220 }
d6a28714
JH
2221 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2222 ln = isALNUM_uni(ln);
2223 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2224 }
d6a28714
JH
2225 else {
2226 ln = isALNUM_LC_uni(ln);
2227 n = isALNUM_LC_utf8((U8*)locinput);
2228 }
2229 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2230 sayNO;
a0ed51b3 2231 break;
d6a28714 2232 case SPACEL:
3280af22 2233 PL_reg_flags |= RF_tainted;
bbce6d69 2234 /* FALL THROUGH */
d6a28714 2235 case SPACE:
9442cb0e 2236 if (!nextchr)
4633a7c4 2237 sayNO;
d6a28714
JH
2238 if (!(OP(scan) == SPACE
2239 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2240 sayNO;
76e3520e 2241 nextchr = UCHARAT(++locinput);
a0d0e21e 2242 break;
d6a28714 2243 case SPACELUTF8:
a0ed51b3
LW
2244 PL_reg_flags |= RF_tainted;
2245 /* FALL THROUGH */
d6a28714 2246 case SPACEUTF8:
9442cb0e 2247 if (!nextchr)
a0ed51b3
LW
2248 sayNO;
2249 if (nextchr & 0x80) {
d6a28714 2250 if (!(OP(scan) == SPACEUTF8
9442cb0e 2251 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2252 : isSPACE_LC_utf8((U8*)locinput)))
2253 {
a0ed51b3 2254 sayNO;
d6a28714 2255 }
6f06b55f 2256 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2257 nextchr = UCHARAT(locinput);
2258 break;
2259 }
d6a28714
JH
2260 if (!(OP(scan) == SPACEUTF8
2261 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2262 sayNO;
2263 nextchr = UCHARAT(++locinput);
2264 break;
d6a28714 2265 case NSPACEL:
3280af22 2266 PL_reg_flags |= RF_tainted;
bbce6d69 2267 /* FALL THROUGH */
d6a28714 2268 case NSPACE:
9442cb0e 2269 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2270 sayNO;
9442cb0e 2271 if (OP(scan) == NSPACE
d6a28714 2272 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2273 sayNO;
b8c5462f 2274 nextchr = UCHARAT(++locinput);
a0d0e21e 2275 break;
d6a28714 2276 case NSPACELUTF8:
a0ed51b3
LW
2277 PL_reg_flags |= RF_tainted;
2278 /* FALL THROUGH */
d6a28714 2279 case NSPACEUTF8:
9442cb0e 2280 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2281 sayNO;
2282 if (nextchr & 0x80) {
d6a28714 2283 if (OP(scan) == NSPACEUTF8
9442cb0e 2284 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2285 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2286 {
2287 sayNO;
2288 }
2289 locinput += PL_utf8skip[nextchr];
2290 nextchr = UCHARAT(locinput);
2291 break;
a0ed51b3 2292 }
d6a28714
JH
2293 if (OP(scan) == NSPACEUTF8
2294 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2295 sayNO;
76e3520e 2296 nextchr = UCHARAT(++locinput);
a0d0e21e 2297 break;
d6a28714 2298 case DIGITL:
a0ed51b3
LW
2299 PL_reg_flags |= RF_tainted;
2300 /* FALL THROUGH */
d6a28714 2301 case DIGIT:
9442cb0e 2302 if (!nextchr)
a0ed51b3 2303 sayNO;
d6a28714
JH
2304 if (!(OP(scan) == DIGIT
2305 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2306 sayNO;
76e3520e 2307 nextchr = UCHARAT(++locinput);
a0d0e21e 2308 break;
d6a28714 2309 case DIGITLUTF8:
a0ed51b3
LW
2310 PL_reg_flags |= RF_tainted;
2311 /* FALL THROUGH */
d6a28714 2312 case DIGITUTF8:
a0ed51b3
LW
2313 if (!nextchr)
2314 sayNO;
2315 if (nextchr & 0x80) {
9442cb0e
GS
2316 if (!(OP(scan) == DIGITUTF8
2317 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2318 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2319 {
a0ed51b3 2320 sayNO;
dfe13c55 2321 }
6f06b55f 2322 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2323 nextchr = UCHARAT(locinput);
2324 break;
2325 }
9442cb0e
GS
2326 if (!(OP(scan) == DIGITUTF8
2327 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2328 sayNO;
2329 nextchr = UCHARAT(++locinput);
2330 break;
d6a28714 2331 case NDIGITL:
b8c5462f
JH
2332 PL_reg_flags |= RF_tainted;
2333 /* FALL THROUGH */
d6a28714 2334 case NDIGIT:
9442cb0e 2335 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2336 sayNO;
9442cb0e 2337 if (OP(scan) == NDIGIT
d6a28714 2338 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2339 sayNO;
76e3520e 2340 nextchr = UCHARAT(++locinput);
a0d0e21e 2341 break;
d6a28714 2342 case NDIGITLUTF8:
b8c5462f
JH
2343 PL_reg_flags |= RF_tainted;
2344 /* FALL THROUGH */
d6a28714 2345 case NDIGITUTF8:
b8c5462f
JH
2346 if (!nextchr && locinput >= PL_regeol)
2347 sayNO;
a0ed51b3 2348 if (nextchr & 0x80) {
9442cb0e
GS
2349 if (OP(scan) == NDIGITUTF8
2350 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2351 : isDIGIT_LC_utf8((U8*)locinput))
2352 {
a0ed51b3 2353 sayNO;
9442cb0e 2354 }
6f06b55f 2355 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2356 nextchr = UCHARAT(locinput);
2357 break;
2358 }
9442cb0e
GS
2359 if (OP(scan) == NDIGITUTF8
2360 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2361 sayNO;
2362 nextchr = UCHARAT(++locinput);
2363 break;
2364 case CLUMP:
dfe13c55 2365 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2366 sayNO;
6f06b55f 2367 locinput += PL_utf8skip[nextchr];
dfe13c55 2368 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2369 locinput += UTF8SKIP(locinput);
2370 if (locinput > PL_regeol)
2371 sayNO;
2372 nextchr = UCHARAT(locinput);
2373 break;
c8756f30 2374 case REFFL:
3280af22 2375 PL_reg_flags |= RF_tainted;
c8756f30 2376 /* FALL THROUGH */
c277df42 2377 case REF:
c8756f30 2378 case REFF:
c277df42 2379 n = ARG(scan); /* which paren pair */
cf93c79d 2380 ln = PL_regstartp[n];
2c2d71f5 2381 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2382 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2383 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2384 if (ln == PL_regendp[n])
a0d0e21e 2385 break;
a0ed51b3 2386
cf93c79d 2387 s = PL_bostr + ln;
a0ed51b3
LW
2388 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2389 char *l = locinput;
cf93c79d 2390 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2391 /*
2392 * Note that we can't do the "other character" lookup trick as
2393 * in the 8-bit case (no pun intended) because in Unicode we
2394 * have to map both upper and title case to lower case.
2395 */
2396 if (OP(scan) == REFF) {
2397 while (s < e) {
2398 if (l >= PL_regeol)
2399 sayNO;
dfe13c55 2400 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2401 sayNO;
2402 s += UTF8SKIP(s);
2403 l += UTF8SKIP(l);
2404 }
2405 }
2406 else {
2407 while (s < e) {
2408 if (l >= PL_regeol)
2409 sayNO;
dfe13c55 2410 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2411 sayNO;
2412 s += UTF8SKIP(s);
2413 l += UTF8SKIP(l);
2414 }
2415 }
2416 locinput = l;
2417 nextchr = UCHARAT(locinput);
2418 break;
2419 }
2420
a0d0e21e 2421 /* Inline the first character, for speed. */
76e3520e 2422 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2423 (OP(scan) == REF ||
2424 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2425 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2426 sayNO;
cf93c79d 2427 ln = PL_regendp[n] - ln;
3280af22 2428 if (locinput + ln > PL_regeol)
4633a7c4 2429 sayNO;
c8756f30
AK
2430 if (ln > 1 && (OP(scan) == REF
2431 ? memNE(s, locinput, ln)
2432 : (OP(scan) == REFF
2433 ? ibcmp(s, locinput, ln)
2434 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2435 sayNO;
a0d0e21e 2436 locinput += ln;
76e3520e 2437 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2438 break;
2439
2440 case NOTHING:
c277df42 2441 case TAIL:
a0d0e21e
LW
2442 break;
2443 case BACK:
2444 break;
c277df42
IZ
2445 case EVAL:
2446 {
2447 dSP;
533c011a 2448 OP_4tree *oop = PL_op;
3280af22
NIS
2449 COP *ocurcop = PL_curcop;
2450 SV **ocurpad = PL_curpad;
c277df42
IZ
2451 SV *ret;
2452
2453 n = ARG(scan);
533c011a 2454 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2455 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2456 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2457 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2458
cea2e8a9 2459 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2460 SPAGAIN;
2461 ret = POPs;
2462 PUTBACK;
2463
0f5d15d6
IZ
2464 PL_op = oop;
2465 PL_curpad = ocurpad;
2466 PL_curcop = ocurcop;
c277df42 2467 if (logical) {
0f5d15d6
IZ
2468 if (logical == 2) { /* Postponed subexpression. */
2469 regexp *re;
22c35a8c 2470 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2471 re_cc_state state;
0f5d15d6
IZ
2472 CHECKPOINT cp, lastcp;
2473
2474 if(SvROK(ret) || SvRMAGICAL(ret)) {
2475 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2476
2477 if(SvMAGICAL(sv))
2478 mg = mg_find(sv, 'r');
2479 }
2480 if (mg) {
2481 re = (regexp *)mg->mg_obj;
df0003d4 2482 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2483 }
2484 else {
2485 STRLEN len;
2486 char *t = SvPV(ret, len);
2487 PMOP pm;
2488 char *oprecomp = PL_regprecomp;
2489 I32 osize = PL_regsize;
2490 I32 onpar = PL_regnpar;
2491
2492 pm.op_pmflags = 0;
393fec97 2493 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2494 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2495 if (!(SvFLAGS(ret)
2496 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2497 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2498 PL_regprecomp = oprecomp;
2499 PL_regsize = osize;
2500 PL_regnpar = onpar;
2501 }
2502 DEBUG_r(
2503 PerlIO_printf(Perl_debug_log,
2504 "Entering embedded `%s%.60s%s%s'\n",
2505 PL_colors[0],
2506 re->precomp,
2507 PL_colors[1],
2508 (strlen(re->precomp) > 60 ? "..." : ""))
2509 );
2510 state.node = next;
2511 state.prev = PL_reg_call_cc;
2512 state.cc = PL_regcc;
2513 state.re = PL_reg_re;
2514
2ab05381 2515 PL_regcc = 0;
0f5d15d6
IZ
2516
2517 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2518 REGCP_SET(lastcp);
0f5d15d6
IZ
2519 cache_re(re);
2520 state.ss = PL_savestack_ix;
2521 *PL_reglastparen = 0;
2522 PL_reg_call_cc = &state;
2523 PL_reginput = locinput;
2c2d71f5
JH
2524
2525 /* XXXX This is too dramatic a measure... */
2526 PL_reg_maxiter = 0;
2527
0f5d15d6 2528 if (regmatch(re->program + 1)) {
2c914db6
IZ
2529 /* Even though we succeeded, we need to restore
2530 global variables, since we may be wrapped inside
2531 SUSPEND, thus the match may be not finished yet. */
2532
2533 /* XXXX Do this only if SUSPENDed? */
2534 PL_reg_call_cc = state.prev;
2535 PL_regcc = state.cc;
2536 PL_reg_re = state.re;
2537 cache_re(PL_reg_re);
2538
2539 /* XXXX This is too dramatic a measure... */
2540 PL_reg_maxiter = 0;
2541
2542 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2543 ReREFCNT_dec(re);
2544 regcpblow(cp);
2545 sayYES;
2546 }
0f5d15d6 2547 ReREFCNT_dec(re);
02db2b7b 2548 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2549 regcppop();
2550 PL_reg_call_cc = state.prev;
2551 PL_regcc = state.cc;
2552 PL_reg_re = state.re;
d3790889 2553 cache_re(PL_reg_re);
2c2d71f5
JH
2554
2555 /* XXXX This is too dramatic a measure... */
2556 PL_reg_maxiter = 0;
2557
0f5d15d6
IZ
2558 sayNO;
2559 }
c277df42 2560 sw = SvTRUE(ret);
0f5d15d6 2561 logical = 0;
a0ed51b3
LW
2562 }
2563 else
3280af22 2564 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2565 break;
2566 }
a0d0e21e 2567 case OPEN:
c277df42 2568 n = ARG(scan); /* which paren pair */
3280af22
NIS
2569 PL_reg_start_tmp[n] = locinput;
2570 if (n > PL_regsize)
2571 PL_regsize = n;
a0d0e21e
LW
2572 break;
2573 case CLOSE:
c277df42 2574 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2575 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2576 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2577 if (n > *PL_reglastparen)
2578 *PL_reglastparen = n;
a0d0e21e 2579 break;
c277df42
IZ
2580 case GROUPP:
2581 n = ARG(scan); /* which paren pair */
cf93c79d 2582 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2583 break;
2584 case IFTHEN:
2c2d71f5 2585 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2586 if (sw)
2587 next = NEXTOPER(NEXTOPER(scan));
2588 else {
2589 next = scan + ARG(scan);
2590 if (OP(next) == IFTHEN) /* Fake one. */
2591 next = NEXTOPER(NEXTOPER(next));
2592 }
2593 break;
2594 case LOGICAL:
0f5d15d6 2595 logical = scan->flags;
c277df42 2596 break;
2ab05381
IZ
2597/*******************************************************************
2598 PL_regcc contains infoblock about the innermost (...)* loop, and
2599 a pointer to the next outer infoblock.
2600
2601 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2602
2603 1) After matching X, regnode for CURLYX is processed;
2604
2605 2) This regnode creates infoblock on the stack, and calls
2606 regmatch() recursively with the starting point at WHILEM node;
2607
2608 3) Each hit of WHILEM node tries to match A and Z (in the order
2609 depending on the current iteration, min/max of {min,max} and
2610 greediness). The information about where are nodes for "A"
2611 and "Z" is read from the infoblock, as is info on how many times "A"
2612 was already matched, and greediness.
2613
2614 4) After A matches, the same WHILEM node is hit again.
2615
2616 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2617 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2618 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2619 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2620 of the external loop.
2621
2622 Currently present infoblocks form a tree with a stem formed by PL_curcc
2623 and whatever it mentions via ->next, and additional attached trees
2624 corresponding to temporarily unset infoblocks as in "5" above.
2625
2626 In the following picture infoblocks for outer loop of
2627 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2628 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2629 infoblocks are drawn below the "reset" infoblock.
2630
2631 In fact in the picture below we do not show failed matches for Z and T
2632 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2633 more obvious *why* one needs to *temporary* unset infoblocks.]
2634
2635 Matched REx position InfoBlocks Comment
2636 (Y(A)*?Z)*?T x
2637 Y(A)*?Z)*?T x <- O
2638 Y (A)*?Z)*?T x <- O
2639 Y A)*?Z)*?T x <- O <- I
2640 YA )*?Z)*?T x <- O <- I
2641 YA A)*?Z)*?T x <- O <- I
2642 YAA )*?Z)*?T x <- O <- I
2643 YAA Z)*?T x <- O # Temporary unset I
2644 I
2645
2646 YAAZ Y(A)*?Z)*?T x <- O
2647 I
2648
2649 YAAZY (A)*?Z)*?T x <- O
2650 I
2651
2652 YAAZY A)*?Z)*?T x <- O <- I
2653 I
2654
2655 YAAZYA )*?Z)*?T x <- O <- I
2656 I
2657
2658 YAAZYA Z)*?T x <- O # Temporary unset I
2659 I,I
2660
2661 YAAZYAZ )*?T x <- O
2662 I,I
2663
2664 YAAZYAZ T x # Temporary unset O
2665 O
2666 I,I
2667
2668 YAAZYAZT x
2669 O
2670 I,I
2671 *******************************************************************/
a0d0e21e
LW
2672 case CURLYX: {
2673 CURCUR cc;
3280af22 2674 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2675 /* No need to save/restore up to this paren */
2676 I32 parenfloor = scan->flags;
c277df42
IZ
2677
2678 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2679 next += ARG(next);
3280af22
NIS
2680 cc.oldcc = PL_regcc;
2681 PL_regcc = &cc;
cb434fcc
IZ
2682 /* XXXX Probably it is better to teach regpush to support
2683 parenfloor > PL_regsize... */
2684 if (parenfloor > *PL_reglastparen)
2685 parenfloor = *PL_reglastparen; /* Pessimization... */
2686 cc.parenfloor = parenfloor;
a0d0e21e
LW
2687 cc.cur = -1;
2688 cc.min = ARG1(scan);
2689 cc.max = ARG2(scan);
c277df42 2690 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2691 cc.next = next;
2692 cc.minmod = minmod;
2693 cc.lastloc = 0;
3280af22 2694 PL_reginput = locinput;
a0d0e21e
LW
2695 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2696 regcpblow(cp);
3280af22 2697 PL_regcc = cc.oldcc;
4633a7c4 2698 saySAME(n);
a0d0e21e
LW
2699 }
2700 /* NOT REACHED */
2701 case WHILEM: {
2702 /*
2703 * This is really hard to understand, because after we match
2704 * what we're trying to match, we must make sure the rest of
2c2d71f5 2705 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2706 * to go back UP the parse tree by recursing ever deeper. And
2707 * if it fails, we have to reset our parent's current state
2708 * that we can try again after backing off.
2709 */
2710
c277df42 2711 CHECKPOINT cp, lastcp;
3280af22 2712 CURCUR* cc = PL_regcc;
c277df42
IZ
2713 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2714
4633a7c4 2715 n = cc->cur + 1; /* how many we know we matched */
3280af22 2716 PL_reginput = locinput;
a0d0e21e 2717
c277df42
IZ
2718 DEBUG_r(
2719 PerlIO_printf(Perl_debug_log,
2720 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2721 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2722 (long)n, (long)cc->min,
2723 (long)cc->max, (long)cc)
2724 );
4633a7c4 2725
a0d0e21e
LW
2726 /* If degenerate scan matches "", assume scan done. */
2727
579cf2c3 2728 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2729 PL_regcc = cc->oldcc;
2ab05381
IZ
2730 if (PL_regcc)
2731 ln = PL_regcc->cur;
c277df42 2732 DEBUG_r(
c3464db5
DD
2733 PerlIO_printf(Perl_debug_log,
2734 "%*s empty match detected, try continuation...\n",
3280af22 2735 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2736 );
a0d0e21e 2737 if (regmatch(cc->next))
4633a7c4 2738 sayYES;
2ab05381
IZ
2739 if (PL_regcc)
2740 PL_regcc->cur = ln;
3280af22 2741 PL_regcc = cc;
4633a7c4 2742 sayNO;
a0d0e21e
LW
2743 }
2744
2745 /* First just match a string of min scans. */
2746
2747 if (n < cc->min) {
2748 cc->cur = n;
2749 cc->lastloc = locinput;
4633a7c4
LW
2750 if (regmatch(cc->scan))
2751 sayYES;
2752 cc->cur = n - 1;
c277df42 2753 cc->lastloc = lastloc;
4633a7c4 2754 sayNO;
a0d0e21e
LW
2755 }
2756
2c2d71f5
JH
2757 if (scan->flags) {
2758 /* Check whether we already were at this position.
2759 Postpone detection until we know the match is not
2760 *that* much linear. */
2761 if (!PL_reg_maxiter) {
2762 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2763 PL_reg_leftiter = PL_reg_maxiter;
2764 }
2765 if (PL_reg_leftiter-- == 0) {
2766 I32 size = (PL_reg_maxiter + 7)/8;
2767 if (PL_reg_poscache) {
2768 if (PL_reg_poscache_size < size) {
2769 Renew(PL_reg_poscache, size, char);
2770 PL_reg_poscache_size = size;
2771 }
2772 Zero(PL_reg_poscache, size, char);
2773 }
2774 else {
2775 PL_reg_poscache_size = size;
2776 Newz(29, PL_reg_poscache, size, char);
2777 }
2778 DEBUG_r(
2779 PerlIO_printf(Perl_debug_log,
2780 "%sDetected a super-linear match, switching on caching%s...\n",
2781 PL_colors[4], PL_colors[5])
2782 );
2783 }
2784 if (PL_reg_leftiter < 0) {
2785 I32 o = locinput - PL_bostr, b;
2786
2787 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2788 b = o % 8;
2789 o /= 8;
2790 if (PL_reg_poscache[o] & (1<<b)) {
2791 DEBUG_r(
2792 PerlIO_printf(Perl_debug_log,
2793 "%*s already tried at this position...\n",
2794 REPORT_CODE_OFF+PL_regindent*2, "")
2795 );
7821416a 2796 sayNO_SILENT;
2c2d71f5
JH
2797 }
2798 PL_reg_poscache[o] |= (1<<b);
2799 }
2800 }
2801
a0d0e21e
LW
2802 /* Prefer next over scan for minimal matching. */
2803
2804 if (cc->minmod) {
3280af22 2805 PL_regcc = cc->oldcc;
2ab05381
IZ
2806 if (PL_regcc)
2807 ln = PL_regcc->cur;
5f05dabc 2808 cp = regcppush(cc->parenfloor);
02db2b7b 2809 REGCP_SET(lastcp);
5f05dabc 2810 if (regmatch(cc->next)) {
c277df42 2811 regcpblow(cp);
4633a7c4 2812 sayYES; /* All done. */
5f05dabc 2813 }
02db2b7b 2814 REGCP_UNWIND(lastcp);
5f05dabc 2815 regcppop();
2ab05381
IZ
2816 if (PL_regcc)
2817 PL_regcc->cur = ln;
3280af22 2818 PL_regcc = cc;
a0d0e21e 2819
c277df42 2820 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2821 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2822 && !(PL_reg_flags & RF_warned)) {
2823 PL_reg_flags |= RF_warned;
e476b1b5 2824 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2825 "Complex regular subexpression recursion",
2826 REG_INFTY - 1);
c277df42 2827 }
4633a7c4 2828 sayNO;
c277df42 2829 }
a687059c 2830
c277df42 2831 DEBUG_r(
c3464db5
DD
2832 PerlIO_printf(Perl_debug_log,
2833 "%*s trying longer...\n",
3280af22 2834 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2835 );
a0d0e21e 2836 /* Try scanning more and see if it helps. */
3280af22 2837 PL_reginput = locinput;
a0d0e21e
LW
2838 cc->cur = n;
2839 cc->lastloc = locinput;
5f05dabc 2840 cp = regcppush(cc->parenfloor);
02db2b7b 2841 REGCP_SET(lastcp);
5f05dabc 2842 if (regmatch(cc->scan)) {
c277df42 2843 regcpblow(cp);
4633a7c4 2844 sayYES;
5f05dabc 2845 }
02db2b7b 2846 REGCP_UNWIND(lastcp);
5f05dabc 2847 regcppop();
4633a7c4 2848 cc->cur = n - 1;
c277df42 2849 cc->lastloc = lastloc;
4633a7c4 2850 sayNO;
a0d0e21e
LW
2851 }
2852
2853 /* Prefer scan over next for maximal matching. */
2854
2855 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2856 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2857 cc->cur = n;
2858 cc->lastloc = locinput;
02db2b7b 2859 REGCP_SET(lastcp);
5f05dabc 2860 if (regmatch(cc->scan)) {
c277df42 2861 regcpblow(cp);
4633a7c4 2862 sayYES;
5f05dabc 2863 }
02db2b7b 2864 REGCP_UNWIND(lastcp);
a0d0e21e 2865 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2866 PL_reginput = locinput;
c277df42 2867 DEBUG_r(
c3464db5
DD
2868 PerlIO_printf(Perl_debug_log,
2869 "%*s failed, try continuation...\n",
3280af22 2870 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2871 );
2872 }
e476b1b5 2873 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2874 && !(PL_reg_flags & RF_warned)) {
3280af22 2875 PL_reg_flags |= RF_warned;
e476b1b5 2876 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2877 "Complex regular subexpression recursion",
2878 REG_INFTY - 1);
a0d0e21e
LW
2879 }
2880
2881 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2882 PL_regcc = cc->oldcc;
2ab05381
IZ
2883 if (PL_regcc)
2884 ln = PL_regcc->cur;
a0d0e21e 2885 if (regmatch(cc->next))
4633a7c4 2886 sayYES;
2ab05381
IZ
2887 if (PL_regcc)
2888 PL_regcc->cur = ln;
3280af22 2889 PL_regcc = cc;
4633a7c4 2890 cc->cur = n - 1;
c277df42 2891 cc->lastloc = lastloc;
4633a7c4 2892 sayNO;
a0d0e21e
LW
2893 }
2894 /* NOT REACHED */
c277df42
IZ
2895 case BRANCHJ:
2896 next = scan + ARG(scan);
2897 if (next == scan)
2898 next = NULL;
2899 inner = NEXTOPER(NEXTOPER(scan));
2900 goto do_branch;
2901 case BRANCH:
2902 inner = NEXTOPER(scan);
2903 do_branch:
2904 {
2905 CHECKPOINT lastcp;
2906 c1 = OP(scan);
2907 if (OP(next) != c1) /* No choice. */
2908 next = inner; /* Avoid recursion. */
a0d0e21e 2909 else {
02db2b7b
IZ
2910 I32 lastparen = *PL_reglastparen;
2911 I32 unwind1;
2912 re_unwind_branch_t *uw;
2913
2914 /* Put unwinding data on stack */
2915 unwind1 = SSNEWt(1,re_unwind_branch_t);
2916 uw = SSPTRt(unwind1,re_unwind_branch_t);
2917 uw->prev = unwind;
2918 unwind = unwind1;
2919 uw->type = ((c1 == BRANCH)
2920 ? RE_UNWIND_BRANCH
2921 : RE_UNWIND_BRANCHJ);
2922 uw->lastparen = lastparen;
2923 uw->next = next;
2924 uw->locinput = locinput;
2925 uw->nextchr = nextchr;
2926#ifdef DEBUGGING
2927 uw->regindent = ++PL_regindent;
2928#endif
c277df42 2929
02db2b7b
IZ
2930 REGCP_SET(uw->lastcp);
2931
2932 /* Now go into the first branch */
2933 next = inner;
a687059c 2934 }
a0d0e21e
LW
2935 }
2936 break;
2937 case MINMOD:
2938 minmod = 1;
2939 break;
c277df42
IZ
2940 case CURLYM:
2941 {
00db4c45 2942 I32 l = 0;
c277df42
IZ
2943 CHECKPOINT lastcp;
2944
2945 /* We suppose that the next guy does not need
2946 backtracking: in particular, it is of constant length,
2947 and has no parenths to influence future backrefs. */
2948 ln = ARG1(scan); /* min to match */
2949 n = ARG2(scan); /* max to match */
c277df42
IZ
2950 paren = scan->flags;
2951 if (paren) {
3280af22
NIS
2952 if (paren > PL_regsize)
2953 PL_regsize = paren;
2954 if (paren > *PL_reglastparen)
2955 *PL_reglastparen = paren;
c277df42 2956 }
dc45a647 2957 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2958 if (paren)
2959 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2960 PL_reginput = locinput;
c277df42
IZ
2961 if (minmod) {
2962 minmod = 0;
2963 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2964 sayNO;
5f4b28b2 2965 if (ln && l == 0 && n >= ln
c277df42
IZ
2966 /* In fact, this is tricky. If paren, then the
2967 fact that we did/didnot match may influence
2968 future execution. */
2969 && !(paren && ln == 0))
2970 ln = n;
3280af22 2971 locinput = PL_reginput;
22c35a8c 2972 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2973 c1 = (U8)*STRING(next);
c277df42 2974 if (OP(next) == EXACTF)
22c35a8c 2975 c2 = PL_fold[c1];
c277df42 2976 else if (OP(next) == EXACTFL)
22c35a8c 2977 c2 = PL_fold_locale[c1];
c277df42
IZ
2978 else
2979 c2 = c1;
a0ed51b3
LW
2980 }
2981 else
c277df42 2982 c1 = c2 = -1000;
02db2b7b 2983 REGCP_SET(lastcp);
5f4b28b2 2984 /* This may be improved if l == 0. */
c277df42
IZ
2985 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2986 /* If it could work, try it. */
2987 if (c1 == -1000 ||
3280af22
NIS
2988 UCHARAT(PL_reginput) == c1 ||
2989 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2990 {
2991 if (paren) {
2992 if (n) {
cf93c79d
IZ
2993 PL_regstartp[paren] =
2994 HOPc(PL_reginput, -l) - PL_bostr;
2995 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2996 }
2997 else
cf93c79d 2998 PL_regendp[paren] = -1;
c277df42
IZ
2999 }
3000 if (regmatch(next))
3001 sayYES;
02db2b7b 3002 REGCP_UNWIND(lastcp);
c277df42
IZ
3003 }
3004 /* Couldn't or didn't -- move forward. */
3280af22 3005 PL_reginput = locinput;
c277df42
IZ
3006 if (regrepeat_hard(scan, 1, &l)) {
3007 ln++;
3280af22 3008 locinput = PL_reginput;
c277df42
IZ
3009 }
3010 else
3011 sayNO;
3012 }
a0ed51b3
LW
3013 }
3014 else {
c277df42
IZ
3015 n = regrepeat_hard(scan, n, &l);
3016 if (n != 0 && l == 0
3017 /* In fact, this is tricky. If paren, then the
3018 fact that we did/didnot match may influence
3019 future execution. */
3020 && !(paren && ln == 0))
3021 ln = n;
3280af22 3022 locinput = PL_reginput;
c277df42 3023 DEBUG_r(
5c0ca799 3024 PerlIO_printf(Perl_debug_log,
faccc32b 3025 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3026 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3027 (IV) n, (IV)l)
c277df42
IZ
3028 );
3029 if (n >= ln) {
22c35a8c 3030 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3031 c1 = (U8)*STRING(next);
c277df42 3032 if (OP(next) == EXACTF)
22c35a8c 3033 c2 = PL_fold[c1];
c277df42 3034 else if (OP(next) == EXACTFL)
22c35a8c 3035 c2 = PL_fold_locale[c1];
c277df42
IZ
3036 else
3037 c2 = c1;
a0ed51b3
LW
3038 }
3039 else
c277df42
IZ
3040 c1 = c2 = -1000;
3041 }
02db2b7b 3042 REGCP_SET(lastcp);
c277df42
IZ
3043 while (n >= ln) {
3044 /* If it could work, try it. */
3045 if (c1 == -1000 ||
3280af22
NIS
3046 UCHARAT(PL_reginput) == c1 ||
3047 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3048 {
3049 DEBUG_r(
c3464db5 3050 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3051 "%*s trying tail with n=%"IVdf"...\n",
3052 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3053 );
3054 if (paren) {
3055 if (n) {
cf93c79d
IZ
3056 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3057 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3058 }
a0ed51b3 3059 else
cf93c79d 3060 PL_regendp[paren] = -1;
c277df42 3061 }
a0ed51b3
LW
3062 if (regmatch(next))
3063 sayYES;
02db2b7b 3064 REGCP_UNWIND(lastcp);
a0ed51b3 3065 }
c277df42
IZ
3066 /* Couldn't or didn't -- back up. */
3067 n--;
dfe13c55 3068 locinput = HOPc(locinput, -l);
3280af22 3069 PL_reginput = locinput;
c277df42
IZ
3070 }
3071 }
3072 sayNO;
3073 break;
3074 }
3075 case CURLYN:
3076 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3077 if (paren > PL_regsize)
3078 PL_regsize = paren;
3079 if (paren > *PL_reglastparen)
3080 *PL_reglastparen = paren;
c277df42
IZ
3081 ln = ARG1(scan); /* min to match */
3082 n = ARG2(scan); /* max to match */
dc45a647 3083 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3084 goto repeat;
a0d0e21e 3085 case CURLY:
c277df42 3086 paren = 0;
a0d0e21e
LW
3087 ln = ARG1(scan); /* min to match */
3088 n = ARG2(scan); /* max to match */
dc45a647 3089 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3090 goto repeat;
3091 case STAR:
3092 ln = 0;
c277df42 3093 n = REG_INFTY;
a0d0e21e 3094 scan = NEXTOPER(scan);
c277df42 3095 paren = 0;
a0d0e21e
LW
3096 goto repeat;
3097 case PLUS:
c277df42
IZ
3098 ln = 1;
3099 n = REG_INFTY;
3100 scan = NEXTOPER(scan);
3101 paren = 0;
3102 repeat:
a0d0e21e
LW
3103 /*
3104 * Lookahead to avoid useless match attempts
3105 * when we know what character comes next.
3106 */
22c35a8c 3107 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3108 c1 = (U8)*STRING(next);
bbce6d69 3109 if (OP(next) == EXACTF)
22c35a8c 3110 c2 = PL_fold[c1];
bbce6d69 3111 else if (OP(next) == EXACTFL)
22c35a8c 3112 c2 = PL_fold_locale[c1];
bbce6d69 3113 else
3114 c2 = c1;
3115 }
a0d0e21e 3116 else
bbce6d69 3117 c1 = c2 = -1000;
3280af22 3118 PL_reginput = locinput;
a0d0e21e 3119 if (minmod) {
c277df42 3120 CHECKPOINT lastcp;
a0d0e21e
LW
3121 minmod = 0;
3122 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3123 sayNO;
a0ed51b3 3124 locinput = PL_reginput;
02db2b7b 3125 REGCP_SET(lastcp);
0fe9bf95
IZ
3126 if (c1 != -1000) {
3127 char *e = locinput + n - ln; /* Should not check after this */
3128 char *old = locinput;
3129
3130 if (e >= PL_regeol || (n == REG_INFTY))
3131 e = PL_regeol - 1;
3132 while (1) {
3133 /* Find place 'next' could work */
3134 if (c1 == c2) {
3135 while (locinput <= e && *locinput != c1)
3136 locinput++;
3137 } else {
3138 while (locinput <= e
3139 && *locinput != c1
3140 && *locinput != c2)
3141 locinput++;
3142 }
3143 if (locinput > e)
3144 sayNO;
3145 /* PL_reginput == old now */
3146 if (locinput != old) {
3147 ln = 1; /* Did some */
3148 if (regrepeat(scan, locinput - old) <
3149 locinput - old)
3150 sayNO;
3151 }
3152 /* PL_reginput == locinput now */
29d1e993 3153 TRYPAREN(paren, ln, locinput);
0fe9bf95 3154 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3155 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3156 /* Couldn't or didn't -- move forward. */
3157 old = locinput++;
3158 }
3159 }
3160 else
c277df42 3161 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3162 /* If it could work, try it. */
bbce6d69 3163 if (c1 == -1000 ||
3280af22
NIS
3164 UCHARAT(PL_reginput) == c1 ||
3165 UCHARAT(PL_reginput) == c2)
bbce6d69 3166 {
29d1e993 3167 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3168 REGCP_UNWIND(lastcp);
bbce6d69 3169 }
c277df42 3170 /* Couldn't or didn't -- move forward. */
a0ed51b3 3171 PL_reginput = locinput;
a0d0e21e
LW
3172 if (regrepeat(scan, 1)) {
3173 ln++;
a0ed51b3
LW
3174 locinput = PL_reginput;
3175 }
3176 else
4633a7c4 3177 sayNO;
a0d0e21e
LW
3178 }
3179 }
3180 else {
c277df42 3181 CHECKPOINT lastcp;
a0d0e21e 3182 n = regrepeat(scan, n);
a0ed51b3 3183 locinput = PL_reginput;
22c35a8c 3184 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3185 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3186 ln = n; /* why back off? */
1aeab75a
GS
3187 /* ...because $ and \Z can match before *and* after
3188 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3189 We should back off by one in this case. */
3190 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3191 ln--;
3192 }
02db2b7b 3193 REGCP_SET(lastcp);
c277df42
IZ
3194 if (paren) {
3195 while (n >= ln) {
3196 /* If it could work, try it. */
3197 if (c1 == -1000 ||
3280af22
NIS
3198 UCHARAT(PL_reginput) == c1 ||
3199 UCHARAT(PL_reginput) == c2)
c277df42 3200 {
29d1e993 3201 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3202 REGCP_UNWIND(lastcp);
c277df42
IZ
3203 }
3204 /* Couldn't or didn't -- back up. */
3205 n--;
dfe13c55 3206 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3207 }
a0ed51b3
LW
3208 }
3209 else {
c277df42
IZ
3210 while (n >= ln) {
3211 /* If it could work, try it. */
3212 if (c1 == -1000 ||
3280af22
NIS
3213 UCHARAT(PL_reginput) == c1 ||
3214 UCHARAT(PL_reginput) == c2)
c277df42 3215 {
29d1e993 3216 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3217 REGCP_UNWIND(lastcp);
c277df42
IZ
3218 }
3219 /* Couldn't or didn't -- back up. */
3220 n--;
dfe13c55 3221 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3222 }
a0d0e21e
LW
3223 }
3224 }
4633a7c4 3225 sayNO;
c277df42 3226 break;
a0d0e21e 3227 case END:
0f5d15d6
IZ
3228 if (PL_reg_call_cc) {
3229 re_cc_state *cur_call_cc = PL_reg_call_cc;
3230 CURCUR *cctmp = PL_regcc;
3231 regexp *re = PL_reg_re;
3232 CHECKPOINT cp, lastcp;
3233
3234 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3235 REGCP_SET(lastcp);
0f5d15d6
IZ
3236 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3237 the caller. */
3238 PL_reginput = locinput; /* Make position available to
3239 the callcc. */
3240 cache_re(PL_reg_call_cc->re);
3241 PL_regcc = PL_reg_call_cc->cc;
3242 PL_reg_call_cc = PL_reg_call_cc->prev;
3243 if (regmatch(cur_call_cc->node)) {
3244 PL_reg_call_cc = cur_call_cc;
3245 regcpblow(cp);
3246 sayYES;
3247 }
02db2b7b 3248 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3249 regcppop();
3250 PL_reg_call_cc = cur_call_cc;
3251 PL_regcc = cctmp;
3252 PL_reg_re = re;
3253 cache_re(re);
3254
3255 DEBUG_r(
3256 PerlIO_printf(Perl_debug_log,
3257 "%*s continuation failed...\n",
3258 REPORT_CODE_OFF+PL_regindent*2, "")
3259 );
7821416a 3260 sayNO_SILENT;
0f5d15d6 3261 }
7821416a
IZ
3262 if (locinput < PL_regtill) {
3263 DEBUG_r(PerlIO_printf(Perl_debug_log,
3264 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3265 PL_colors[4],
3266 (long)(locinput - PL_reg_starttry),
3267 (long)(PL_regtill - PL_reg_starttry),
3268 PL_colors[5]));
3269 sayNO_FINAL; /* Cannot match: too short. */
3270 }
3271 PL_reginput = locinput; /* put where regtry can find it */
3272 sayYES_FINAL; /* Success! */
7e5428c5 3273 case SUCCEED:
3280af22 3274 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3275 sayYES_LOUD; /* Success! */
c277df42
IZ
3276 case SUSPEND:
3277 n = 1;
9fe1d20c 3278 PL_reginput = locinput;
c277df42 3279 goto do_ifmatch;
a0d0e21e 3280 case UNLESSM:
c277df42 3281 n = 0;
a0ed51b3 3282 if (scan->flags) {
0fe9bf95
IZ
3283 if (UTF) { /* XXXX This is absolutely
3284 broken, we read before
3285 start of string. */
3286 s = HOPMAYBEc(locinput, -scan->flags);
3287 if (!s)
3288 goto say_yes;
3289 PL_reginput = s;
3290 }
3291 else {
3292 if (locinput < PL_bostr + scan->flags)
3293 goto say_yes;
3294 PL_reginput = locinput - scan->flags;
3295 goto do_ifmatch;
3296 }
a0ed51b3
LW
3297 }
3298 else
3299 PL_reginput = locinput;
c277df42
IZ
3300 goto do_ifmatch;
3301 case IFMATCH:
3302 n = 1;
a0ed51b3 3303 if (scan->flags) {
0fe9bf95
IZ
3304 if (UTF) { /* XXXX This is absolutely
3305 broken, we read before
3306 start of string. */
3307 s = HOPMAYBEc(locinput, -scan->flags);
3308 if (!s || s < PL_bostr)
3309 goto say_no;
3310 PL_reginput = s;
3311 }
3312 else {
3313 if (locinput < PL_bostr + scan->flags)
3314 goto say_no;
3315 PL_reginput = locinput - scan->flags;
3316 goto do_ifmatch;
3317 }
a0ed51b3
LW
3318 }
3319 else
3320 PL_reginput = locinput;
3321
c277df42 3322 do_ifmatch:
c277df42
IZ
3323 inner = NEXTOPER(NEXTOPER(scan));
3324 if (regmatch(inner) != n) {
3325 say_no:
3326 if (logical) {
3327 logical = 0;
3328 sw = 0;
3329 goto do_longjump;
a0ed51b3
LW
3330 }
3331 else
c277df42
IZ
3332 sayNO;
3333 }
3334 say_yes:
3335 if (logical) {
3336 logical = 0;
3337 sw = 1;
3338 }
fe44a5e8 3339 if (OP(scan) == SUSPEND) {
3280af22 3340 locinput = PL_reginput;
565764a8 3341 nextchr = UCHARAT(locinput);
fe44a5e8 3342 }
c277df42
IZ
3343 /* FALL THROUGH. */
3344 case LONGJMP:
3345 do_longjump:
3346 next = scan + ARG(scan);
3347 if (next == scan)
3348 next = NULL;
a0d0e21e
LW
3349 break;
3350 default:
b900a521 3351 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3352 PTR2UV(scan), OP(scan));
cea2e8a9 3353 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3354 }
02db2b7b 3355 reenter:
a0d0e21e
LW
3356 scan = next;
3357 }
a687059c 3358
a0d0e21e
LW
3359 /*
3360 * We get here only if there's trouble -- normally "case END" is
3361 * the terminating point.
3362 */
cea2e8a9 3363 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3364 /*NOTREACHED*/
4633a7c4
LW
3365 sayNO;
3366
7821416a
IZ
3367yes_loud:
3368 DEBUG_r(
3369 PerlIO_printf(Perl_debug_log,
3370 "%*s %scould match...%s\n",
3371 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3372 );
3373 goto yes;
3374yes_final:
3375 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3376 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3377yes:
3378#ifdef DEBUGGING
3280af22 3379 PL_regindent--;
4633a7c4 3380#endif
02db2b7b
IZ
3381
3382#if 0 /* Breaks $^R */
3383 if (unwind)
3384 regcpblow(firstcp);
3385#endif
4633a7c4
LW
3386 return 1;
3387
3388no:
7821416a
IZ
3389 DEBUG_r(
3390 PerlIO_printf(Perl_debug_log,
3391 "%*s %sfailed...%s\n",
3392 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3393 );
3394 goto do_no;
3395no_final:
3396do_no:
02db2b7b
IZ
3397 if (unwind) {
3398 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3399
3400 switch (uw->type) {
3401 case RE_UNWIND_BRANCH:
3402 case RE_UNWIND_BRANCHJ:
3403 {
3404 re_unwind_branch_t *uwb = &(uw->branch);
3405 I32 lastparen = uwb->lastparen;
3406
3407 REGCP_UNWIND(uwb->lastcp);
3408 for (n = *PL_reglastparen; n > lastparen; n--)
3409 PL_regendp[n] = -1;
3410 *PL_reglastparen = n;
3411 scan = next = uwb->next;
3412 if ( !scan ||
3413 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3414 ? BRANCH : BRANCHJ) ) { /* Failure */
3415 unwind = uwb->prev;
3416#ifdef DEBUGGING
3417 PL_regindent--;
3418#endif
3419 goto do_no;
3420 }
3421 /* Have more choice yet. Reuse the same uwb. */
3422 /*SUPPRESS 560*/
3423 if ((n = (uwb->type == RE_UNWIND_BRANCH
3424 ? NEXT_OFF(next) : ARG(next))))
3425 next += n;
3426 else
3427 next = NULL; /* XXXX Needn't unwinding in this case... */
3428 uwb->next = next;
3429 next = NEXTOPER(scan);
3430 if (uwb->type == RE_UNWIND_BRANCHJ)
3431 next = NEXTOPER(next);
3432 locinput = uwb->locinput;
3433 nextchr = uwb->nextchr;
3434#ifdef DEBUGGING
3435 PL_regindent = uwb->regindent;
3436#endif
3437
3438 goto reenter;
3439 }
3440 /* NOT REACHED */
3441 default:
3442 Perl_croak(aTHX_ "regexp unwind memory corruption");
3443 }
3444 /* NOT REACHED */
3445 }
4633a7c4 3446#ifdef DEBUGGING
3280af22 3447 PL_regindent--;
4633a7c4 3448#endif
a0d0e21e 3449 return 0;
a687059c
LW
3450}
3451
3452/*
3453 - regrepeat - repeatedly match something simple, report how many
3454 */
3455/*
3456 * [This routine now assumes that it will only match on things of length 1.
3457 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3458 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3459 */
76e3520e 3460STATIC I32
cea2e8a9 3461S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3462{
5c0ca799 3463 dTHR;
a0d0e21e 3464 register char *scan;
a0d0e21e 3465 register I32 c;
3280af22 3466 register char *loceol = PL_regeol;
a0ed51b3 3467 register I32 hardcount = 0;
a0d0e21e 3468
3280af22 3469 scan = PL_reginput;
c277df42 3470 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3471 loceol = scan + max;
a0d0e21e 3472 switch (OP(p)) {
22c35a8c 3473 case REG_ANY:
a0d0e21e
LW
3474 while (scan < loceol && *scan != '\n')
3475 scan++;
3476 break;
3477 case SANY:
3478 scan = loceol;
3479 break;
a0ed51b3
LW
3480 case ANYUTF8:
3481 loceol = PL_regeol;
3482 while (scan < loceol && *scan != '\n') {
3483 scan += UTF8SKIP(scan);
3484 hardcount++;
3485 }
3486 break;
3487 case SANYUTF8:
3488 loceol = PL_regeol;
3489 while (scan < loceol) {
3490 scan += UTF8SKIP(scan);
3491 hardcount++;
3492 }
3493 break;
bbce6d69 3494 case EXACT: /* length of string is 1 */
cd439c50 3495 c = (U8)*STRING(p);
bbce6d69 3496 while (scan < loceol && UCHARAT(scan) == c)
3497 scan++;
3498 break;
3499 case EXACTF: /* length of string is 1 */
cd439c50 3500 c = (U8)*STRING(p);
bbce6d69 3501 while (scan < loceol &&
22c35a8c 3502 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3503 scan++;
3504 break;
3505 case EXACTFL: /* length of string is 1 */
3280af22 3506 PL_reg_flags |= RF_tainted;
cd439c50 3507 c = (U8)*STRING(p);
bbce6d69 3508 while (scan < loceol &&
22c35a8c 3509 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3510 scan++;
3511 break;
a0ed51b3
LW
3512 case ANYOFUTF8:
3513 loceol = PL_regeol;
3514 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3515 scan += UTF8SKIP(scan);
3516 hardcount++;
3517 }
3518 break;
a0d0e21e 3519 case ANYOF:
936ed897 3520 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3521 scan++;
a0d0e21e
LW
3522 break;
3523 case ALNUM:
3524 while (scan < loceol && isALNUM(*scan))
3525 scan++;
3526 break;
a0ed51b3
LW
3527 case ALNUMUTF8:
3528 loceol = PL_regeol;
dfe13c55 3529 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3530 scan += UTF8SKIP(scan);
3531 hardcount++;
3532 }
3533 break;
bbce6d69 3534 case ALNUML:
3280af22 3535 PL_reg_flags |= RF_tainted;
bbce6d69 3536 while (scan < loceol && isALNUM_LC(*scan))
3537 scan++;
3538 break;
a0ed51b3
LW
3539 case ALNUMLUTF8:
3540 PL_reg_flags |= RF_tainted;
3541 loceol = PL_regeol;
dfe13c55 3542 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3543 scan += UTF8SKIP(scan);
3544 hardcount++;
3545 }
3546 break;
3547 break;
a0d0e21e
LW
3548 case NALNUM:
3549 while (scan < loceol && !isALNUM(*scan))
3550 scan++;
3551 break;
a0ed51b3
LW
3552 case NALNUMUTF8:
3553 loceol = PL_regeol;
dfe13c55 3554 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3555 scan += UTF8SKIP(scan);
3556 hardcount++;
3557 }
3558 break;
bbce6d69 3559 case NALNUML:
3280af22 3560 PL_reg_flags |= RF_tainted;
bbce6d69 3561 while (scan < loceol && !isALNUM_LC(*scan))
3562 scan++;
3563 break;
a0ed51b3
LW
3564 case NALNUMLUTF8:
3565 PL_reg_flags |= RF_tainted;
3566 loceol = PL_regeol;
dfe13c55 3567 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3568 scan += UTF8SKIP(scan);
3569 hardcount++;
3570 }
3571 break;
a0d0e21e
LW
3572 case SPACE:
3573 while (scan < loceol && isSPACE(*scan))
3574 scan++;
3575 break;
a0ed51b3
LW
3576 case SPACEUTF8:
3577 loceol = PL_regeol;
dfe13c55 3578 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3579 scan += UTF8SKIP(scan);
3580 hardcount++;
3581 }
3582 break;
bbce6d69 3583 case SPACEL:
3280af22 3584 PL_reg_flags |= RF_tainted;
bbce6d69 3585 while (scan < loceol && isSPACE_LC(*scan))
3586 scan++;
3587 break;
a0ed51b3
LW
3588 case SPACELUTF8:
3589 PL_reg_flags |= RF_tainted;
3590 loceol = PL_regeol;
dfe13c55 3591 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3592 scan += UTF8SKIP(scan);
3593 hardcount++;
3594 }
3595 break;
a0d0e21e
LW
3596 case NSPACE:
3597 while (scan < loceol && !isSPACE(*scan))
3598 scan++;
3599 break;
a0ed51b3
LW
3600 case NSPACEUTF8:
3601 loceol = PL_regeol;
dfe13c55 3602 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3603 scan += UTF8SKIP(scan);
3604 hardcount++;
3605 }
3606 break;
bbce6d69 3607 case NSPACEL:
3280af22 3608 PL_reg_flags |= RF_tainted;
bbce6d69 3609 while (scan < loceol && !isSPACE_LC(*scan))
3610 scan++;
3611 break;
a0ed51b3
LW
3612 case NSPACELUTF8:
3613 PL_reg_flags |= RF_tainted;
3614 loceol = PL_regeol;
dfe13c55 3615 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3616 scan += UTF8SKIP(scan);
3617 hardcount++;
3618 }
3619 break;
a0d0e21e
LW
3620 case DIGIT:
3621 while (scan < loceol && isDIGIT(*scan))
3622 scan++;
3623 break;
a0ed51b3
LW
3624 case DIGITUTF8:
3625 loceol = PL_regeol;
dfe13c55 3626 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3627 scan += UTF8SKIP(scan);
3628 hardcount++;
3629 }
3630 break;
3631 break;
a0d0e21e
LW
3632 case NDIGIT:
3633 while (scan < loceol && !isDIGIT(*scan))
3634 scan++;
3635 break;
a0ed51b3
LW
3636 case NDIGITUTF8:
3637 loceol = PL_regeol;
dfe13c55 3638 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3639 scan += UTF8SKIP(scan);
3640 hardcount++;
3641 }
3642 break;
a0d0e21e
LW
3643 default: /* Called on something of 0 width. */
3644 break; /* So match right here or not at all. */
3645 }
a687059c 3646
a0ed51b3
LW
3647 if (hardcount)
3648 c = hardcount;
3649 else
3650 c = scan - PL_reginput;
3280af22 3651 PL_reginput = scan;
a687059c 3652
c277df42
IZ
3653 DEBUG_r(
3654 {
3655 SV *prop = sv_newmortal();
3656
3657 regprop(prop, p);
3658 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3659 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3660 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3661 });
3662
a0d0e21e 3663 return(c);
a687059c
LW
3664}
3665
3666/*
c277df42
IZ
3667 - regrepeat_hard - repeatedly match something, report total lenth and length
3668 *
3669 * The repeater is supposed to have constant length.
3670 */
3671
76e3520e 3672STATIC I32
cea2e8a9 3673S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3674{
5c0ca799 3675 dTHR;
c277df42
IZ
3676 register char *scan;
3677 register char *start;
3280af22 3678 register char *loceol = PL_regeol;
a0ed51b3 3679 I32 l = 0;
708e3b05 3680 I32 count = 0, res = 1;
a0ed51b3
LW
3681
3682 if (!max)
3683 return 0;
c277df42 3684
3280af22 3685 start = PL_reginput;
a0ed51b3 3686 if (UTF) {
708e3b05 3687 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3688 if (!count++) {
3689 l = 0;
3690 while (start < PL_reginput) {
3691 l++;
3692 start += UTF8SKIP(start);
3693 }
3694 *lp = l;
3695 if (l == 0)
3696 return max;
3697 }
3698 if (count == max)
3699 return count;
3700 }
3701 }
3702 else {
708e3b05 3703 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3704 if (!count++) {
3705 *lp = l = PL_reginput - start;
3706 if (max != REG_INFTY && l*max < loceol - scan)
3707 loceol = scan + l*max;
3708 if (l == 0)
3709 return max;
c277df42
IZ
3710 }
3711 }
3712 }
708e3b05 3713 if (!res)
3280af22 3714 PL_reginput = scan;
c277df42 3715
a0ed51b3 3716 return count;
c277df42
IZ
3717}
3718
3719/*
cb8d8820 3720 - reginclass - determine if a character falls into a character class
bbce6d69 3721 */
3722
76e3520e 3723STATIC bool
936ed897 3724S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3725{
5c0ca799 3726 dTHR;
b8c5462f 3727 char flags = ANYOF_FLAGS(p);
bbce6d69 3728 bool match = FALSE;
3729
3730 c &= 0xFF;
b8c5462f 3731 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3732 match = TRUE;
3733 else if (flags & ANYOF_FOLD) {
3734 I32 cf;
3735 if (flags & ANYOF_LOCALE) {
3280af22 3736 PL_reg_flags |= RF_tainted;
22c35a8c 3737 cf = PL_fold_locale[c];
bbce6d69 3738 }
3739 else
22c35a8c 3740 cf = PL_fold[c];
b8c5462f 3741 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3742 match = TRUE;
3743 }
3744
b8c5462f 3745 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3746 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3747 if (
3748 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3749 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3753 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3754 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3755 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3756 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3757 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3758 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3765 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3770 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3771 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3772 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3773 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3774 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3775 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3bec3564 3776 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
aaa51d5e 3777 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3778 ) /* How's that for a conditional? */
bbce6d69 3779 {
3780 match = TRUE;
3781 }
3782 }
3783
ae5c130c 3784 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3785}
3786
a0ed51b3 3787STATIC bool
cea2e8a9 3788S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3789{
3790 dTHR;
a0ed51b3
LW
3791 char flags = ARG1(f);
3792 bool match = FALSE;
3793 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3794
3795 if (swash_fetch(sv, p))
3796 match = TRUE;
3797 else if (flags & ANYOF_FOLD) {
806e7201 3798 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3799 if (flags & ANYOF_LOCALE) {
3800 PL_reg_flags |= RF_tainted;
3801 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3802 }
3803 else
3804 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3805 if (swash_fetch(sv, tmpbuf))
3806 match = TRUE;
3807 }
3808
b8c5462f 3809 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3810
3811 return (flags & ANYOF_INVERT) ? !match : match;
3812}
161b471a 3813
dfe13c55 3814STATIC U8 *
cea2e8a9 3815S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3816{
3817 dTHR;
a0ed51b3
LW
3818 if (off >= 0) {
3819 while (off-- && s < (U8*)PL_regeol)
3820 s += UTF8SKIP(s);
3821 }
3822 else {
3823 while (off++) {
3824 if (s > (U8*)PL_bostr) {
3825 s--;
3826 if (*s & 0x80) {
3827 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3828 s--;
3829 } /* XXX could check well-formedness here */
3830 }
3831 }
3832 }
3833 return s;
3834}
161b471a 3835
dfe13c55 3836STATIC U8 *
cea2e8a9 3837S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3838{
c485e607 3839 dTHR;
a0ed51b3
LW
3840 if (off >= 0) {
3841 while (off-- && s < (U8*)PL_regeol)
3842 s += UTF8SKIP(s);
3843 if (off >= 0)
3844 return 0;
3845 }
3846 else {
3847 while (off++) {
3848 if (s > (U8*)PL_bostr) {
3849 s--;
3850 if (*s & 0x80) {
3851 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3852 s--;
3853 } /* XXX could check well-formedness here */
3854 }
3855 else
3856 break;
3857 }
3858 if (off <= 0)
3859 return 0;
3860 }
3861 return s;
3862}
51371543
GS
3863
3864#ifdef PERL_OBJECT
51371543
GS
3865#include "XSUB.h"
3866#endif
3867
3868static void
3869restore_pos(pTHXo_ void *arg)
3870{
3871 dTHR;
3872 if (PL_reg_eval_set) {
3873 if (PL_reg_oldsaved) {
3874 PL_reg_re->subbeg = PL_reg_oldsaved;
3875 PL_reg_re->sublen = PL_reg_oldsavedlen;
3876 RX_MATCH_COPIED_on(PL_reg_re);
3877 }
3878 PL_reg_magic->mg_len = PL_reg_oldpos;
3879 PL_reg_eval_set = 0;
3880 PL_curpm = PL_reg_oldcurpm;
3881 }
3882}