This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Retract #7404 with a patch from Robin Barker, via Andy Dougherty.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
cad2e5aa
JH
328 char *t;
329 I32 ml_anch;
2c2d71f5 330 char *tmp;
6eb5f6b9
JH
331 register char *other_last = Nullch; /* other substr checked before this */
332 char *check_at; /* check substr found at this pos */
30944b6d
IZ
333#ifdef DEBUGGING
334 char *i_strpos = strpos;
335#endif
cad2e5aa
JH
336
337 DEBUG_r( if (!PL_colorset) reginitcolors() );
338 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 339 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
340 PL_colors[4],PL_colors[5],PL_colors[0],
341 prog->precomp,
342 PL_colors[1],
343 (strlen(prog->precomp) > 60 ? "..." : ""),
344 PL_colors[0],
b900a521 345 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
346 strpos, PL_colors[1],
347 (strend - strpos > 60 ? "..." : ""))
348 );
349
2c2d71f5
JH
350 if (prog->minlen > strend - strpos) {
351 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 352 goto fail;
2c2d71f5 353 }
653099ff 354 check = prog->check_substr;
2c2d71f5 355 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
356 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
357 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 358 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 359
5675c1a6
IZ
360 if (!ml_anch) {
361 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
362 /* SvCUR is not set on references: SvRV and SvPVX overlap */
363 && sv && !SvROK(sv)
364 && (strpos + SvCUR(sv) != strend)) {
365 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
366 goto fail;
367 }
368 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 369 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
370 I32 slen;
371
adac82c7 372 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 373 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
374 if (SvTAIL(check)) {
375 slen = SvCUR(check); /* >= 1 */
cad2e5aa 376
2c2d71f5
JH
377 if ( strend - s > slen || strend - s < slen - 1
378 || (strend - s == slen && strend[-1] != '\n')) {
379 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
380 goto fail_finish;
cad2e5aa
JH
381 }
382 /* Now should match s[0..slen-2] */
383 slen--;
653099ff 384 if (slen && (*SvPVX(check) != *s
cad2e5aa 385 || (slen > 1
653099ff 386 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
387 report_neq:
388 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
389 goto fail_finish;
390 }
cad2e5aa 391 }
653099ff
GS
392 else if (*SvPVX(check) != *s
393 || ((slen = SvCUR(check)) > 1
394 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
395 goto report_neq;
396 goto success_at_start;
5675c1a6 397 }
cad2e5aa 398 }
2c2d71f5 399 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 400 s = strpos;
2c2d71f5 401 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 402 end_shift = prog->minlen - start_shift -
653099ff 403 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 404 if (!ml_anch) {
653099ff
GS
405 I32 end = prog->check_offset_max + CHR_SVLEN(check)
406 - (SvTAIL(check) != 0);
2c2d71f5
JH
407 I32 eshift = strend - s - end;
408
409 if (end_shift < eshift)
410 end_shift = eshift;
411 }
cad2e5aa 412 }
2c2d71f5 413 else { /* Can match at random position */
cad2e5aa
JH
414 ml_anch = 0;
415 s = strpos;
2c2d71f5
JH
416 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
417 /* Should be nonnegative! */
418 end_shift = prog->minlen - start_shift -
653099ff 419 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
420 }
421
2c2d71f5 422#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 423 if (end_shift < 0)
6bbae5e6 424 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
425#endif
426
2c2d71f5
JH
427 restart:
428 /* Find a possible match in the region s..strend by looking for
429 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 430 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
431 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
432 I32 p = -1; /* Internal iterator of scream. */
433 I32 *pp = data ? data->scream_pos : &p;
434
2c2d71f5
JH
435 if (PL_screamfirst[BmRARE(check)] >= 0
436 || ( BmRARE(check) == '\n'
437 && (BmPREVIOUS(check) == SvCUR(check) - 1)
438 && SvTAIL(check) ))
439 s = screaminstr(sv, check,
440 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 441 else
2c2d71f5 442 goto fail_finish;
cad2e5aa
JH
443 if (data)
444 *data->scream_olds = s;
445 }
446 else
447 s = fbm_instr((unsigned char*)s + start_shift,
448 (unsigned char*)strend - end_shift,
2c2d71f5 449 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
450
451 /* Update the count-of-usability, remove useless subpatterns,
452 unshift s. */
2c2d71f5
JH
453
454 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
455 (s ? "Found" : "Did not find"),
456 ((check == prog->anchored_substr) ? "anchored" : "floating"),
457 PL_colors[0],
7b0972df
JH
458 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
459 SvPVX(check),
2c2d71f5
JH
460 PL_colors[1], (SvTAIL(check) ? "$" : ""),
461 (s ? " at offset " : "...\n") ) );
462
463 if (!s)
464 goto fail_finish;
465
6eb5f6b9
JH
466 check_at = s;
467
2c2d71f5 468 /* Finish the diagnostic message */
30944b6d 469 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
470
471 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
472 Start with the other substr.
473 XXXX no SCREAM optimization yet - and a very coarse implementation
474 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
475 *always* match. Probably should be marked during compile...
476 Probably it is right to do no SCREAM here...
477 */
478
479 if (prog->float_substr && prog->anchored_substr) {
30944b6d 480 /* Take into account the "other" substring. */
2c2d71f5
JH
481 /* XXXX May be hopelessly wrong for UTF... */
482 if (!other_last)
6eb5f6b9 483 other_last = strpos;
2c2d71f5 484 if (check == prog->float_substr) {
30944b6d
IZ
485 do_other_anchored:
486 {
2c2d71f5
JH
487 char *last = s - start_shift, *last1, *last2;
488 char *s1 = s;
489
490 tmp = PL_bostr;
491 t = s - prog->check_offset_max;
492 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
493 && (!(prog->reganch & ROPT_UTF8)
494 || (PL_bostr = strpos, /* Used in regcopmaybe() */
495 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
496 && t > strpos)))
30944b6d 497 /* EMPTY */;
2c2d71f5
JH
498 else
499 t = strpos;
500 t += prog->anchored_offset;
6eb5f6b9
JH
501 if (t < other_last) /* These positions already checked */
502 t = other_last;
2c2d71f5
JH
503 PL_bostr = tmp;
504 last2 = last1 = strend - prog->minlen;
505 if (last < last1)
506 last1 = last;
507 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
508 /* On end-of-str: see comment below. */
509 s = fbm_instr((unsigned char*)t,
510 (unsigned char*)last1 + prog->anchored_offset
511 + SvCUR(prog->anchored_substr)
512 - (SvTAIL(prog->anchored_substr)!=0),
513 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
514 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
515 (s ? "Found" : "Contradicts"),
516 PL_colors[0],
7b0972df
JH
517 (int)(SvCUR(prog->anchored_substr)
518 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
519 SvPVX(prog->anchored_substr),
520 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
521 if (!s) {
522 if (last1 >= last2) {
523 DEBUG_r(PerlIO_printf(Perl_debug_log,
524 ", giving up...\n"));
525 goto fail_finish;
526 }
527 DEBUG_r(PerlIO_printf(Perl_debug_log,
528 ", trying floating at offset %ld...\n",
30944b6d 529 (long)(s1 + 1 - i_strpos)));
2c2d71f5 530 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 531 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
532 s = HOPc(last, 1);
533 goto restart;
534 }
535 else {
536 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 537 (long)(s - i_strpos)));
2c2d71f5 538 t = s - prog->anchored_offset;
6eb5f6b9 539 other_last = s + 1;
30944b6d 540 s = s1;
2c2d71f5
JH
541 if (t == strpos)
542 goto try_at_start;
2c2d71f5
JH
543 goto try_at_offset;
544 }
30944b6d 545 }
2c2d71f5
JH
546 }
547 else { /* Take into account the floating substring. */
548 char *last, *last1;
549 char *s1 = s;
550
551 t = s - start_shift;
552 last1 = last = strend - prog->minlen + prog->float_min_offset;
553 if (last - t > prog->float_max_offset)
554 last = t + prog->float_max_offset;
555 s = t + prog->float_min_offset;
6eb5f6b9
JH
556 if (s < other_last)
557 s = other_last;
2c2d71f5
JH
558 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
559 /* fbm_instr() takes into account exact value of end-of-str
560 if the check is SvTAIL(ed). Since false positives are OK,
561 and end-of-str is not later than strend we are OK. */
562 s = fbm_instr((unsigned char*)s,
563 (unsigned char*)last + SvCUR(prog->float_substr)
564 - (SvTAIL(prog->float_substr)!=0),
565 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
566 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
567 (s ? "Found" : "Contradicts"),
568 PL_colors[0],
7b0972df
JH
569 (int)(SvCUR(prog->float_substr)
570 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
571 SvPVX(prog->float_substr),
572 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
573 if (!s) {
574 if (last1 == last) {
575 DEBUG_r(PerlIO_printf(Perl_debug_log,
576 ", giving up...\n"));
577 goto fail_finish;
578 }
579 DEBUG_r(PerlIO_printf(Perl_debug_log,
580 ", trying anchored starting at offset %ld...\n",
30944b6d 581 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 582 other_last = last + 1;
2c2d71f5
JH
583 PL_regeol = strend; /* Used in HOP() */
584 s = HOPc(t, 1);
585 goto restart;
586 }
587 else {
588 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 589 (long)(s - i_strpos)));
6eb5f6b9 590 other_last = s + 1;
30944b6d 591 s = s1;
2c2d71f5
JH
592 if (t == strpos)
593 goto try_at_start;
2c2d71f5
JH
594 goto try_at_offset;
595 }
596 }
cad2e5aa 597 }
2c2d71f5
JH
598
599 t = s - prog->check_offset_max;
600 tmp = PL_bostr;
601 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
602 && (!(prog->reganch & ROPT_UTF8)
603 || (PL_bostr = strpos, /* Used in regcopmaybe() */
604 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
605 && t > strpos)))) {
606 PL_bostr = tmp;
607 /* Fixed substring is found far enough so that the match
608 cannot start at strpos. */
609 try_at_offset:
cad2e5aa 610 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
611 /* Eventually fbm_*() should handle this, but often
612 anchored_offset is not 0, so this check will not be wasted. */
613 /* XXXX In the code below we prefer to look for "^" even in
614 presence of anchored substrings. And we search even
615 beyond the found float position. These pessimizations
616 are historical artefacts only. */
617 find_anchor:
2c2d71f5 618 while (t < strend - prog->minlen) {
cad2e5aa 619 if (*t == '\n') {
4ee3650e 620 if (t < check_at - prog->check_offset_min) {
30944b6d 621 if (prog->anchored_substr) {
4ee3650e
GS
622 /* Since we moved from the found position,
623 we definitely contradict the found anchored
30944b6d
IZ
624 substr. Due to the above check we do not
625 contradict "check" substr.
626 Thus we can arrive here only if check substr
627 is float. Redo checking for "other"=="fixed".
628 */
629 strpos = t + 1;
630 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
631 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
632 goto do_other_anchored;
633 }
4ee3650e
GS
634 /* We don't contradict the found floating substring. */
635 /* XXXX Why not check for STCLASS? */
cad2e5aa 636 s = t + 1;
2c2d71f5 637 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 638 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
639 goto set_useful;
640 }
4ee3650e
GS
641 /* Position contradicts check-string */
642 /* XXXX probably better to look for check-string
643 than for "\n", so one should lower the limit for t? */
644 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 645 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 646 other_last = strpos = s = t + 1;
cad2e5aa
JH
647 goto restart;
648 }
649 t++;
650 }
2c2d71f5
JH
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
652 PL_colors[0],PL_colors[1]));
653 goto fail_finish;
cad2e5aa 654 }
f5952150
GS
655 else {
656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
657 PL_colors[0],PL_colors[1]));
658 }
cad2e5aa
JH
659 s = t;
660 set_useful:
2c2d71f5 661 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
662 }
663 else {
2c2d71f5 664 PL_bostr = tmp;
f5952150 665 /* The found string does not prohibit matching at strpos,
2c2d71f5 666 - no optimization of calling REx engine can be performed,
f5952150
GS
667 unless it was an MBOL and we are not after MBOL,
668 or a future STCLASS check will fail this. */
2c2d71f5
JH
669 try_at_start:
670 /* Even in this situation we may use MBOL flag if strpos is offset
671 wrt the start of the string. */
05b4157f 672 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
673 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
674 /* May be due to an implicit anchor of m{.*foo} */
675 && !(prog->reganch & ROPT_IMPLICIT))
676 {
cad2e5aa
JH
677 t = strpos;
678 goto find_anchor;
679 }
30944b6d 680 DEBUG_r( if (ml_anch)
f5952150
GS
681 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
682 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 683 );
2c2d71f5 684 success_at_start:
30944b6d 685 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 686 && prog->check_substr /* Could be deleted already */
cad2e5aa 687 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
688 && prog->check_substr == prog->float_substr)
689 {
cad2e5aa 690 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 691 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
692 SvREFCNT_dec(prog->check_substr);
693 prog->check_substr = Nullsv; /* disable */
694 prog->float_substr = Nullsv; /* clear */
5e39e1e5 695 check = Nullsv; /* abort */
cad2e5aa 696 s = strpos;
3cf5c195
IZ
697 /* XXXX This is a remnant of the old implementation. It
698 looks wasteful, since now INTUIT can use many
6eb5f6b9 699 other heuristics. */
cad2e5aa
JH
700 prog->reganch &= ~RE_USE_INTUIT;
701 }
702 else
703 s = strpos;
704 }
705
6eb5f6b9
JH
706 /* Last resort... */
707 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
708 if (prog->regstclass) {
709 /* minlen == 0 is possible if regstclass is \b or \B,
710 and the fixed substr is ''$.
711 Since minlen is already taken into account, s+1 is before strend;
712 accidentally, minlen >= 1 guaranties no false positives at s + 1
713 even for \b or \B. But (minlen? 1 : 0) below assumes that
714 regstclass does not come from lookahead... */
715 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
716 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
717 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
718 ? STR_LEN(prog->regstclass)
719 : 1);
6eb5f6b9 720 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
721 ? s + (prog->minlen? cl_l : 0)
722 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 723 : strend) ;
8fdd422b 724 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
725
726 t = s;
76384e4a
GS
727 if (prog->reganch & ROPT_UTF8) {
728 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
729 PL_bostr = startpos;
730 }
6eb5f6b9
JH
731 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
732 if (!s) {
733#ifdef DEBUGGING
734 char *what;
735#endif
736 if (endpos == strend) {
737 DEBUG_r( PerlIO_printf(Perl_debug_log,
738 "Could not match STCLASS...\n") );
739 goto fail;
740 }
66e933ab
GS
741 DEBUG_r( PerlIO_printf(Perl_debug_log,
742 "This position contradicts STCLASS...\n") );
653099ff
GS
743 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
744 goto fail;
6eb5f6b9
JH
745 /* Contradict one of substrings */
746 if (prog->anchored_substr) {
6eb5f6b9
JH
747 if (prog->anchored_substr == check) {
748 DEBUG_r( what = "anchored" );
749 hop_and_restart:
750 PL_regeol = strend; /* Used in HOP() */
751 s = HOPc(t, 1);
66e933ab
GS
752 if (s + start_shift + end_shift > strend) {
753 /* XXXX Should be taken into account earlier? */
754 DEBUG_r( PerlIO_printf(Perl_debug_log,
755 "Could not match STCLASS...\n") );
756 goto fail;
757 }
5e39e1e5
HS
758 if (!check)
759 goto giveup;
6eb5f6b9 760 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 761 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
762 what, (long)(s + start_shift - i_strpos)) );
763 goto restart;
764 }
66e933ab 765 /* Have both, check_string is floating */
6eb5f6b9
JH
766 if (t + start_shift >= check_at) /* Contradicts floating=check */
767 goto retry_floating_check;
768 /* Recheck anchored substring, but not floating... */
769 s = check_at;
5e39e1e5
HS
770 if (!check)
771 goto giveup;
6eb5f6b9 772 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 773 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
774 (long)(other_last - i_strpos)) );
775 goto do_other_anchored;
776 }
60e71179
GS
777 /* Another way we could have checked stclass at the
778 current position only: */
779 if (ml_anch) {
780 s = t = t + 1;
5e39e1e5
HS
781 if (!check)
782 goto giveup;
60e71179 783 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
784 "Looking for /%s^%s/m starting at offset %ld...\n",
785 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 786 goto try_at_offset;
66e933ab 787 }
60e71179
GS
788 if (!prog->float_substr) /* Could have been deleted */
789 goto fail;
6eb5f6b9
JH
790 /* Check is floating subtring. */
791 retry_floating_check:
792 t = check_at - start_shift;
793 DEBUG_r( what = "floating" );
794 goto hop_and_restart;
795 }
796 DEBUG_r( if (t != s)
797 PerlIO_printf(Perl_debug_log,
798 "By STCLASS: moving %ld --> %ld\n",
799 (long)(t - i_strpos), (long)(s - i_strpos));
800 else
801 PerlIO_printf(Perl_debug_log,
802 "Does not contradict STCLASS...\n") );
803 }
5e39e1e5
HS
804 giveup:
805 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
806 PL_colors[4], (check ? "Guessed" : "Giving up"),
807 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 808 return s;
2c2d71f5
JH
809
810 fail_finish: /* Substring not found */
66e933ab
GS
811 if (prog->check_substr) /* could be removed already */
812 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 813 fail:
2c2d71f5 814 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
815 PL_colors[4],PL_colors[5]));
816 return Nullch;
817}
9661b544 818
6eb5f6b9 819/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
820STATIC char *
821S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 822{
6eb5f6b9
JH
823 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
824 char *m;
d8093b23
G
825 STRLEN ln;
826 unsigned int c1;
827 unsigned int c2;
6eb5f6b9
JH
828 char *e;
829 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 830
6eb5f6b9
JH
831 /* We know what class it must start with. */
832 switch (OP(c)) {
833 case ANYOFUTF8:
834 while (s < strend) {
835 if (REGINCLASSUTF8(c, (U8*)s)) {
836 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 837 goto got_it;
3cf5c195 838 else
6eb5f6b9 839 tmp = doevery;
a0ed51b3 840 }
6eb5f6b9
JH
841 else
842 tmp = 1;
a0ed51b3
LW
843 s += UTF8SKIP(s);
844 }
6eb5f6b9
JH
845 break;
846 case ANYOF:
a0ed51b3 847 while (s < strend) {
d8093b23 848 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
849 if (tmp && (norun || regtry(prog, s)))
850 goto got_it;
851 else
852 tmp = doevery;
a0ed51b3 853 }
6eb5f6b9
JH
854 else
855 tmp = 1;
a0ed51b3 856 s++;
a0d0e21e 857 }
6eb5f6b9
JH
858 break;
859 case EXACTF:
860 m = STRING(c);
861 ln = STR_LEN(c);
d8093b23 862 c1 = *(U8*)m;
6eb5f6b9
JH
863 c2 = PL_fold[c1];
864 goto do_exactf;
865 case EXACTFL:
866 m = STRING(c);
867 ln = STR_LEN(c);
d8093b23 868 c1 = *(U8*)m;
6eb5f6b9
JH
869 c2 = PL_fold_locale[c1];
870 do_exactf:
871 e = strend - ln;
b3c9acc1 872
6eb5f6b9
JH
873 if (norun && e < s)
874 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
875 /* Here it is NOT UTF! */
876 if (c1 == c2) {
877 while (s <= e) {
d8093b23 878 if ( *(U8*)s == c1
66e933ab
GS
879 && (ln == 1 || !(OP(c) == EXACTF
880 ? ibcmp(s, m, ln)
881 : ibcmp_locale(s, m, ln)))
6eb5f6b9 882 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
883 goto got_it;
884 s++;
885 }
886 } else {
887 while (s <= e) {
d8093b23 888 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
889 && (ln == 1 || !(OP(c) == EXACTF
890 ? ibcmp(s, m, ln)
891 : ibcmp_locale(s, m, ln)))
6eb5f6b9 892 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
893 goto got_it;
894 s++;
895 }
896 }
897 break;
bbce6d69 898 case BOUNDL:
3280af22 899 PL_reg_flags |= RF_tainted;
bbce6d69 900 /* FALL THROUGH */
a0d0e21e 901 case BOUND:
6eb5f6b9 902 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 903 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 904 while (s < strend) {
95bac841 905 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 906 tmp = !tmp;
6eb5f6b9 907 if ((norun || regtry(prog, s)))
a0d0e21e 908 goto got_it;
a687059c 909 }
a0d0e21e
LW
910 s++;
911 }
6eb5f6b9 912 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
913 goto got_it;
914 break;
a0ed51b3
LW
915 case BOUNDLUTF8:
916 PL_reg_flags |= RF_tainted;
917 /* FALL THROUGH */
918 case BOUNDUTF8:
8e84507e 919 tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
76384e4a 920 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 921 while (s < strend) {
76384e4a 922 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
923 swash_fetch(PL_utf8_alnum, (U8*)s) :
924 isALNUM_LC_utf8((U8*)s)))
925 {
a0ed51b3 926 tmp = !tmp;
6eb5f6b9 927 if ((norun || regtry(prog, s)))
a0ed51b3
LW
928 goto got_it;
929 }
930 s += UTF8SKIP(s);
931 }
6eb5f6b9 932 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
933 goto got_it;
934 break;
bbce6d69 935 case NBOUNDL:
3280af22 936 PL_reg_flags |= RF_tainted;
bbce6d69 937 /* FALL THROUGH */
a0d0e21e 938 case NBOUND:
6eb5f6b9 939 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 940 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 941 while (s < strend) {
95bac841 942 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 943 tmp = !tmp;
6eb5f6b9 944 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
945 goto got_it;
946 s++;
947 }
6eb5f6b9 948 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
949 goto got_it;
950 break;
a0ed51b3
LW
951 case NBOUNDLUTF8:
952 PL_reg_flags |= RF_tainted;
953 /* FALL THROUGH */
954 case NBOUNDUTF8:
8e84507e 955 tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
76384e4a 956 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 957 while (s < strend) {
76384e4a 958 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
959 swash_fetch(PL_utf8_alnum, (U8*)s) :
960 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 961 tmp = !tmp;
6eb5f6b9 962 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
963 goto got_it;
964 s += UTF8SKIP(s);
965 }
6eb5f6b9 966 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
967 goto got_it;
968 break;
a0d0e21e
LW
969 case ALNUM:
970 while (s < strend) {
bbce6d69 971 if (isALNUM(*s)) {
6eb5f6b9 972 if (tmp && (norun || regtry(prog, s)))
bbce6d69 973 goto got_it;
974 else
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
979 s++;
980 }
981 break;
a0ed51b3
LW
982 case ALNUMUTF8:
983 while (s < strend) {
dfe13c55 984 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 985 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
986 goto got_it;
987 else
988 tmp = doevery;
989 }
990 else
991 tmp = 1;
992 s += UTF8SKIP(s);
993 }
994 break;
bbce6d69 995 case ALNUML:
3280af22 996 PL_reg_flags |= RF_tainted;
bbce6d69 997 while (s < strend) {
998 if (isALNUM_LC(*s)) {
6eb5f6b9 999 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1000 goto got_it;
a687059c 1001 else
a0d0e21e
LW
1002 tmp = doevery;
1003 }
1004 else
1005 tmp = 1;
1006 s++;
1007 }
1008 break;
a0ed51b3
LW
1009 case ALNUMLUTF8:
1010 PL_reg_flags |= RF_tainted;
1011 while (s < strend) {
dfe13c55 1012 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1013 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1014 goto got_it;
1015 else
1016 tmp = doevery;
1017 }
1018 else
1019 tmp = 1;
1020 s += UTF8SKIP(s);
1021 }
1022 break;
a0d0e21e
LW
1023 case NALNUM:
1024 while (s < strend) {
bbce6d69 1025 if (!isALNUM(*s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1027 goto got_it;
1028 else
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s++;
1034 }
1035 break;
a0ed51b3
LW
1036 case NALNUMUTF8:
1037 while (s < strend) {
dfe13c55 1038 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1039 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1040 goto got_it;
1041 else
1042 tmp = doevery;
1043 }
1044 else
1045 tmp = 1;
1046 s += UTF8SKIP(s);
1047 }
1048 break;
bbce6d69 1049 case NALNUML:
3280af22 1050 PL_reg_flags |= RF_tainted;
bbce6d69 1051 while (s < strend) {
1052 if (!isALNUM_LC(*s)) {
6eb5f6b9 1053 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1054 goto got_it;
a687059c 1055 else
a0d0e21e 1056 tmp = doevery;
a687059c 1057 }
a0d0e21e
LW
1058 else
1059 tmp = 1;
1060 s++;
1061 }
1062 break;
a0ed51b3
LW
1063 case NALNUMLUTF8:
1064 PL_reg_flags |= RF_tainted;
1065 while (s < strend) {
dfe13c55 1066 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1067 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1068 goto got_it;
1069 else
1070 tmp = doevery;
1071 }
1072 else
1073 tmp = 1;
1074 s += UTF8SKIP(s);
1075 }
1076 break;
a0d0e21e
LW
1077 case SPACE:
1078 while (s < strend) {
1079 if (isSPACE(*s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1081 goto got_it;
1082 else
1083 tmp = doevery;
2304df62 1084 }
a0d0e21e
LW
1085 else
1086 tmp = 1;
1087 s++;
1088 }
1089 break;
a0ed51b3
LW
1090 case SPACEUTF8:
1091 while (s < strend) {
dfe13c55 1092 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1093 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1094 goto got_it;
1095 else
1096 tmp = doevery;
1097 }
1098 else
1099 tmp = 1;
1100 s += UTF8SKIP(s);
1101 }
1102 break;
bbce6d69 1103 case SPACEL:
3280af22 1104 PL_reg_flags |= RF_tainted;
bbce6d69 1105 while (s < strend) {
1106 if (isSPACE_LC(*s)) {
6eb5f6b9 1107 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1108 goto got_it;
1109 else
1110 tmp = doevery;
1111 }
1112 else
1113 tmp = 1;
1114 s++;
1115 }
1116 break;
a0ed51b3
LW
1117 case SPACELUTF8:
1118 PL_reg_flags |= RF_tainted;
1119 while (s < strend) {
dfe13c55 1120 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1121 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1122 goto got_it;
1123 else
1124 tmp = doevery;
1125 }
1126 else
1127 tmp = 1;
1128 s += UTF8SKIP(s);
1129 }
1130 break;
a0d0e21e
LW
1131 case NSPACE:
1132 while (s < strend) {
1133 if (!isSPACE(*s)) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1135 goto got_it;
1136 else
1137 tmp = doevery;
a687059c 1138 }
a0d0e21e
LW
1139 else
1140 tmp = 1;
1141 s++;
1142 }
1143 break;
a0ed51b3
LW
1144 case NSPACEUTF8:
1145 while (s < strend) {
dfe13c55 1146 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1147 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1148 goto got_it;
1149 else
1150 tmp = doevery;
1151 }
1152 else
1153 tmp = 1;
1154 s += UTF8SKIP(s);
1155 }
1156 break;
bbce6d69 1157 case NSPACEL:
3280af22 1158 PL_reg_flags |= RF_tainted;
bbce6d69 1159 while (s < strend) {
1160 if (!isSPACE_LC(*s)) {
6eb5f6b9 1161 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
1166 else
1167 tmp = 1;
1168 s++;
1169 }
1170 break;
a0ed51b3
LW
1171 case NSPACELUTF8:
1172 PL_reg_flags |= RF_tainted;
1173 while (s < strend) {
dfe13c55 1174 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1175 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1176 goto got_it;
1177 else
1178 tmp = doevery;
1179 }
1180 else
1181 tmp = 1;
1182 s += UTF8SKIP(s);
1183 }
1184 break;
a0d0e21e
LW
1185 case DIGIT:
1186 while (s < strend) {
1187 if (isDIGIT(*s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1189 goto got_it;
1190 else
1191 tmp = doevery;
2b69d0c2 1192 }
a0d0e21e
LW
1193 else
1194 tmp = 1;
1195 s++;
1196 }
1197 break;
a0ed51b3
LW
1198 case DIGITUTF8:
1199 while (s < strend) {
dfe13c55 1200 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1201 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1202 goto got_it;
1203 else
1204 tmp = doevery;
1205 }
1206 else
1207 tmp = 1;
1208 s += UTF8SKIP(s);
1209 }
1210 break;
b8c5462f
JH
1211 case DIGITL:
1212 PL_reg_flags |= RF_tainted;
1213 while (s < strend) {
1214 if (isDIGIT_LC(*s)) {
6eb5f6b9 1215 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1216 goto got_it;
1217 else
1218 tmp = doevery;
1219 }
1220 else
1221 tmp = 1;
1222 s++;
1223 }
1224 break;
1225 case DIGITLUTF8:
1226 PL_reg_flags |= RF_tainted;
1227 while (s < strend) {
1228 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1229 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
1234 else
1235 tmp = 1;
1236 s += UTF8SKIP(s);
1237 }
1238 break;
a0d0e21e
LW
1239 case NDIGIT:
1240 while (s < strend) {
1241 if (!isDIGIT(*s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1243 goto got_it;
1244 else
1245 tmp = doevery;
a687059c 1246 }
a0d0e21e
LW
1247 else
1248 tmp = 1;
1249 s++;
1250 }
1251 break;
a0ed51b3
LW
1252 case NDIGITUTF8:
1253 while (s < strend) {
dfe13c55 1254 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1255 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1256 goto got_it;
1257 else
1258 tmp = doevery;
1259 }
1260 else
1261 tmp = 1;
1262 s += UTF8SKIP(s);
1263 }
1264 break;
b8c5462f
JH
1265 case NDIGITL:
1266 PL_reg_flags |= RF_tainted;
1267 while (s < strend) {
1268 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1269 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1270 goto got_it;
1271 else
1272 tmp = doevery;
1273 }
1274 else
1275 tmp = 1;
1276 s++;
a0ed51b3 1277 }
b8c5462f
JH
1278 break;
1279 case NDIGITLUTF8:
1280 PL_reg_flags |= RF_tainted;
1281 while (s < strend) {
1282 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1283 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1284 goto got_it;
cf93c79d 1285 else
b8c5462f
JH
1286 tmp = doevery;
1287 }
1288 else
1289 tmp = 1;
1290 s += UTF8SKIP(s);
1291 }
1292 break;
b3c9acc1 1293 default:
3c3eec57
GS
1294 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1295 break;
d6a28714 1296 }
6eb5f6b9
JH
1297 return 0;
1298 got_it:
1299 return s;
1300}
1301
1302/*
1303 - regexec_flags - match a regexp against a string
1304 */
1305I32
1306Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1307 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1308/* strend: pointer to null at end of string */
1309/* strbeg: real beginning of string */
1310/* minend: end of match must be >=minend after stringarg. */
1311/* data: May be used for some additional optimizations. */
1312/* nosave: For optimizations. */
1313{
1314 dTHR;
1315 register char *s;
1316 register regnode *c;
1317 register char *startpos = stringarg;
6eb5f6b9
JH
1318 I32 minlen; /* must match at least this many chars */
1319 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1320 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1321 constant substr. */ /* CC */
1322 I32 end_shift = 0; /* Same for the end. */ /* CC */
1323 I32 scream_pos = -1; /* Internal iterator of scream. */
1324 char *scream_olds;
1325 SV* oreplsv = GvSV(PL_replgv);
1326
1327 PL_regcc = 0;
1328
1329 cache_re(prog);
1330#ifdef DEBUGGING
1331 PL_regnarrate = PL_debug & 512;
1332#endif
1333
1334 /* Be paranoid... */
1335 if (prog == NULL || startpos == NULL) {
1336 Perl_croak(aTHX_ "NULL regexp parameter");
1337 return 0;
1338 }
1339
1340 minlen = prog->minlen;
1341 if (strend - startpos < minlen) goto phooey;
1342
1343 if (startpos == strbeg) /* is ^ valid at stringarg? */
1344 PL_regprev = '\n';
1345 else {
1346 PL_regprev = (U32)stringarg[-1];
1347 if (!PL_multiline && PL_regprev == '\n')
1348 PL_regprev = '\0'; /* force ^ to NOT match */
1349 }
1350
1351 /* Check validity of program. */
1352 if (UCHARAT(prog->program) != REG_MAGIC) {
1353 Perl_croak(aTHX_ "corrupted regexp program");
1354 }
1355
1356 PL_reg_flags = 0;
1357 PL_reg_eval_set = 0;
1358 PL_reg_maxiter = 0;
1359
1360 if (prog->reganch & ROPT_UTF8)
1361 PL_reg_flags |= RF_utf8;
1362
1363 /* Mark beginning of line for ^ and lookbehind. */
1364 PL_regbol = startpos;
1365 PL_bostr = strbeg;
1366 PL_reg_sv = sv;
1367
1368 /* Mark end of line for $ (and such) */
1369 PL_regeol = strend;
1370
1371 /* see how far we have to get to not match where we matched before */
1372 PL_regtill = startpos+minend;
1373
1374 /* We start without call_cc context. */
1375 PL_reg_call_cc = 0;
1376
1377 /* If there is a "must appear" string, look for it. */
1378 s = startpos;
1379
1380 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1381 MAGIC *mg;
1382
1383 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1384 PL_reg_ganch = startpos;
1385 else if (sv && SvTYPE(sv) >= SVt_PVMG
1386 && SvMAGIC(sv)
1387 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1388 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1389 if (prog->reganch & ROPT_ANCH_GPOS) {
1390 if (s > PL_reg_ganch)
1391 goto phooey;
1392 s = PL_reg_ganch;
1393 }
1394 }
1395 else /* pos() not defined */
1396 PL_reg_ganch = strbeg;
1397 }
1398
1399 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1400 re_scream_pos_data d;
1401
1402 d.scream_olds = &scream_olds;
1403 d.scream_pos = &scream_pos;
1404 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1405 if (!s)
1406 goto phooey; /* not present */
1407 }
1408
1409 DEBUG_r( if (!PL_colorset) reginitcolors() );
1410 DEBUG_r(PerlIO_printf(Perl_debug_log,
1411 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1412 PL_colors[4],PL_colors[5],PL_colors[0],
1413 prog->precomp,
1414 PL_colors[1],
1415 (strlen(prog->precomp) > 60 ? "..." : ""),
1416 PL_colors[0],
1417 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1418 startpos, PL_colors[1],
1419 (strend - startpos > 60 ? "..." : ""))
1420 );
1421
1422 /* Simplest case: anchored match need be tried only once. */
1423 /* [unless only anchor is BOL and multiline is set] */
1424 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1425 if (s == startpos && regtry(prog, startpos))
1426 goto got_it;
1427 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1428 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1429 {
1430 char *end;
1431
1432 if (minlen)
1433 dontbother = minlen - 1;
1434 end = HOPc(strend, -dontbother) - 1;
1435 /* for multiline we only have to try after newlines */
1436 if (prog->check_substr) {
1437 if (s == startpos)
1438 goto after_try;
1439 while (1) {
1440 if (regtry(prog, s))
1441 goto got_it;
1442 after_try:
1443 if (s >= end)
1444 goto phooey;
1445 if (prog->reganch & RE_USE_INTUIT) {
1446 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1447 if (!s)
1448 goto phooey;
1449 }
1450 else
1451 s++;
1452 }
1453 } else {
1454 if (s > startpos)
1455 s--;
1456 while (s < end) {
1457 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1458 if (regtry(prog, s))
1459 goto got_it;
1460 }
1461 }
1462 }
1463 }
1464 goto phooey;
1465 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1466 if (regtry(prog, PL_reg_ganch))
1467 goto got_it;
1468 goto phooey;
1469 }
1470
1471 /* Messy cases: unanchored match. */
1472 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1473 /* we have /x+whatever/ */
1474 /* it must be a one character string (XXXX Except UTF?) */
1475 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1476#ifdef DEBUGGING
1477 int did_match = 0;
1478#endif
1479
6eb5f6b9
JH
1480 if (UTF) {
1481 while (s < strend) {
1482 if (*s == ch) {
bf93d4cc 1483 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1484 if (regtry(prog, s)) goto got_it;
1485 s += UTF8SKIP(s);
1486 while (s < strend && *s == ch)
1487 s += UTF8SKIP(s);
1488 }
1489 s += UTF8SKIP(s);
1490 }
1491 }
1492 else {
1493 while (s < strend) {
1494 if (*s == ch) {
bf93d4cc 1495 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1496 if (regtry(prog, s)) goto got_it;
1497 s++;
1498 while (s < strend && *s == ch)
1499 s++;
1500 }
1501 s++;
1502 }
1503 }
bf93d4cc
GS
1504 DEBUG_r(did_match ||
1505 PerlIO_printf(Perl_debug_log,
1506 "Did not find anchored character...\n"));
6eb5f6b9
JH
1507 }
1508 /*SUPPRESS 560*/
1509 else if (prog->anchored_substr != Nullsv
1510 || (prog->float_substr != Nullsv
1511 && prog->float_max_offset < strend - s)) {
1512 SV *must = prog->anchored_substr
1513 ? prog->anchored_substr : prog->float_substr;
1514 I32 back_max =
1515 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1516 I32 back_min =
1517 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1518 char *last = HOPc(strend, /* Cannot start after this */
1519 -(I32)(CHR_SVLEN(must)
1520 - (SvTAIL(must) != 0) + back_min));
1521 char *last1; /* Last position checked before */
bf93d4cc
GS
1522#ifdef DEBUGGING
1523 int did_match = 0;
1524#endif
6eb5f6b9
JH
1525
1526 if (s > PL_bostr)
1527 last1 = HOPc(s, -1);
1528 else
1529 last1 = s - 1; /* bogus */
1530
1531 /* XXXX check_substr already used to find `s', can optimize if
1532 check_substr==must. */
1533 scream_pos = -1;
1534 dontbother = end_shift;
1535 strend = HOPc(strend, -dontbother);
1536 while ( (s <= last) &&
1537 ((flags & REXEC_SCREAM)
1538 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1539 end_shift, &scream_pos, 0))
1540 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1541 (unsigned char*)strend, must,
1542 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1543 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1544 if (HOPc(s, -back_max) > last1) {
1545 last1 = HOPc(s, -back_min);
1546 s = HOPc(s, -back_max);
1547 }
1548 else {
1549 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1550
1551 last1 = HOPc(s, -back_min);
1552 s = t;
1553 }
1554 if (UTF) {
1555 while (s <= last1) {
1556 if (regtry(prog, s))
1557 goto got_it;
1558 s += UTF8SKIP(s);
1559 }
1560 }
1561 else {
1562 while (s <= last1) {
1563 if (regtry(prog, s))
1564 goto got_it;
1565 s++;
1566 }
1567 }
1568 }
bf93d4cc
GS
1569 DEBUG_r(did_match ||
1570 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1571 ((must == prog->anchored_substr)
1572 ? "anchored" : "floating"),
1573 PL_colors[0],
1574 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1575 SvPVX(must),
1576 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1577 goto phooey;
1578 }
155aba94 1579 else if ((c = prog->regstclass)) {
66e933ab
GS
1580 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1581 /* don't bother with what can't match */
6eb5f6b9
JH
1582 strend = HOPc(strend, -(minlen - 1));
1583 if (find_byclass(prog, c, s, strend, startpos, 0))
1584 goto got_it;
bf93d4cc 1585 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1586 }
1587 else {
1588 dontbother = 0;
1589 if (prog->float_substr != Nullsv) { /* Trim the end. */
1590 char *last;
d6a28714
JH
1591
1592 if (flags & REXEC_SCREAM) {
1593 last = screaminstr(sv, prog->float_substr, s - strbeg,
1594 end_shift, &scream_pos, 1); /* last one */
1595 if (!last)
1596 last = scream_olds; /* Only one occurence. */
b8c5462f 1597 }
d6a28714
JH
1598 else {
1599 STRLEN len;
1600 char *little = SvPV(prog->float_substr, len);
1601
1602 if (SvTAIL(prog->float_substr)) {
1603 if (memEQ(strend - len + 1, little, len - 1))
1604 last = strend - len + 1;
1605 else if (!PL_multiline)
1606 last = memEQ(strend - len, little, len)
1607 ? strend - len : Nullch;
b8c5462f 1608 else
d6a28714
JH
1609 goto find_last;
1610 } else {
1611 find_last:
1612 if (len)
1613 last = rninstr(s, strend, little, little + len);
b8c5462f 1614 else
d6a28714 1615 last = strend; /* matching `$' */
b8c5462f 1616 }
b8c5462f 1617 }
bf93d4cc
GS
1618 if (last == NULL) {
1619 DEBUG_r(PerlIO_printf(Perl_debug_log,
1620 "%sCan't trim the tail, match fails (should not happen)%s\n",
1621 PL_colors[4],PL_colors[5]));
1622 goto phooey; /* Should not happen! */
1623 }
d6a28714
JH
1624 dontbother = strend - last + prog->float_min_offset;
1625 }
1626 if (minlen && (dontbother < minlen))
1627 dontbother = minlen - 1;
1628 strend -= dontbother; /* this one's always in bytes! */
1629 /* We don't know much -- general case. */
1630 if (UTF) {
1631 for (;;) {
1632 if (regtry(prog, s))
1633 goto got_it;
1634 if (s >= strend)
1635 break;
b8c5462f 1636 s += UTF8SKIP(s);
d6a28714
JH
1637 };
1638 }
1639 else {
1640 do {
1641 if (regtry(prog, s))
1642 goto got_it;
1643 } while (s++ < strend);
1644 }
1645 }
1646
1647 /* Failure. */
1648 goto phooey;
1649
1650got_it:
1651 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1652
1653 if (PL_reg_eval_set) {
1654 /* Preserve the current value of $^R */
1655 if (oreplsv != GvSV(PL_replgv))
1656 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1657 restored, the value remains
1658 the same. */
1659 restore_pos(aTHXo_ 0);
1660 }
1661
1662 /* make sure $`, $&, $', and $digit will work later */
1663 if ( !(flags & REXEC_NOT_FIRST) ) {
1664 if (RX_MATCH_COPIED(prog)) {
1665 Safefree(prog->subbeg);
1666 RX_MATCH_COPIED_off(prog);
1667 }
1668 if (flags & REXEC_COPY_STR) {
1669 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1670
1671 s = savepvn(strbeg, i);
1672 prog->subbeg = s;
1673 prog->sublen = i;
1674 RX_MATCH_COPIED_on(prog);
1675 }
1676 else {
1677 prog->subbeg = strbeg;
1678 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1679 }
1680 }
1681
1682 return 1;
1683
1684phooey:
bf93d4cc
GS
1685 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1686 PL_colors[4],PL_colors[5]));
d6a28714
JH
1687 if (PL_reg_eval_set)
1688 restore_pos(aTHXo_ 0);
1689 return 0;
1690}
1691
1692/*
1693 - regtry - try match at specific point
1694 */
1695STATIC I32 /* 0 failure, 1 success */
1696S_regtry(pTHX_ regexp *prog, char *startpos)
1697{
1698 dTHR;
1699 register I32 i;
1700 register I32 *sp;
1701 register I32 *ep;
1702 CHECKPOINT lastcp;
1703
1704 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1705 MAGIC *mg;
1706
1707 PL_reg_eval_set = RS_init;
1708 DEBUG_r(DEBUG_s(
b900a521
JH
1709 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1710 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1711 ));
e8347627 1712 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1713 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1714 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1715 SAVETMPS;
1716 /* Apparently this is not needed, judging by wantarray. */
e8347627 1717 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1718 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1719
1720 if (PL_reg_sv) {
1721 /* Make $_ available to executed code. */
1722 if (PL_reg_sv != DEFSV) {
1723 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1724 SAVESPTR(DEFSV);
1725 DEFSV = PL_reg_sv;
b8c5462f 1726 }
d6a28714
JH
1727
1728 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1729 && (mg = mg_find(PL_reg_sv, 'g')))) {
1730 /* prepare for quick setting of pos */
1731 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1732 mg = mg_find(PL_reg_sv, 'g');
1733 mg->mg_len = -1;
b8c5462f 1734 }
d6a28714
JH
1735 PL_reg_magic = mg;
1736 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1737 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1738 }
1739 if (!PL_reg_curpm)
0f79a09d 1740 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1741 PL_reg_curpm->op_pmregexp = prog;
1742 PL_reg_oldcurpm = PL_curpm;
1743 PL_curpm = PL_reg_curpm;
1744 if (RX_MATCH_COPIED(prog)) {
1745 /* Here is a serious problem: we cannot rewrite subbeg,
1746 since it may be needed if this match fails. Thus
1747 $` inside (?{}) could fail... */
1748 PL_reg_oldsaved = prog->subbeg;
1749 PL_reg_oldsavedlen = prog->sublen;
1750 RX_MATCH_COPIED_off(prog);
1751 }
1752 else
1753 PL_reg_oldsaved = Nullch;
1754 prog->subbeg = PL_bostr;
1755 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1756 }
1757 prog->startp[0] = startpos - PL_bostr;
1758 PL_reginput = startpos;
1759 PL_regstartp = prog->startp;
1760 PL_regendp = prog->endp;
1761 PL_reglastparen = &prog->lastparen;
1762 prog->lastparen = 0;
1763 PL_regsize = 0;
1764 DEBUG_r(PL_reg_starttry = startpos);
1765 if (PL_reg_start_tmpl <= prog->nparens) {
1766 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1767 if(PL_reg_start_tmp)
1768 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1769 else
1770 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1771 }
1772
1773 /* XXXX What this code is doing here?!!! There should be no need
1774 to do this again and again, PL_reglastparen should take care of
1775 this! */
1776 sp = prog->startp;
1777 ep = prog->endp;
1778 if (prog->nparens) {
1779 for (i = prog->nparens; i >= 1; i--) {
1780 *++sp = -1;
1781 *++ep = -1;
1782 }
1783 }
1784 REGCP_SET;
1785 if (regmatch(prog->program + 1)) {
1786 prog->endp[0] = PL_reginput - PL_bostr;
1787 return 1;
1788 }
1789 REGCP_UNWIND;
1790 return 0;
1791}
1792
1793/*
1794 - regmatch - main matching routine
1795 *
1796 * Conceptually the strategy is simple: check to see whether the current
1797 * node matches, call self recursively to see whether the rest matches,
1798 * and then act accordingly. In practice we make some effort to avoid
1799 * recursion, in particular by going through "ordinary" nodes (that don't
1800 * need to know whether the rest of the match failed) by a loop instead of
1801 * by recursion.
1802 */
1803/* [lwall] I've hoisted the register declarations to the outer block in order to
1804 * maybe save a little bit of pushing and popping on the stack. It also takes
1805 * advantage of machines that use a register save mask on subroutine entry.
1806 */
1807STATIC I32 /* 0 failure, 1 success */
1808S_regmatch(pTHX_ regnode *prog)
1809{
1810 dTHR;
1811 register regnode *scan; /* Current node. */
1812 regnode *next; /* Next node. */
1813 regnode *inner; /* Next node in internal branch. */
1814 register I32 nextchr; /* renamed nextchr - nextchar colides with
1815 function of same name */
1816 register I32 n; /* no or next */
1817 register I32 ln; /* len or last */
1818 register char *s; /* operand or save */
1819 register char *locinput = PL_reginput;
1820 register I32 c1, c2, paren; /* case fold search, parenth */
1821 int minmod = 0, sw = 0, logical = 0;
1822#ifdef DEBUGGING
1823 PL_regindent++;
1824#endif
1825
1826 /* Note that nextchr is a byte even in UTF */
1827 nextchr = UCHARAT(locinput);
1828 scan = prog;
1829 while (scan != NULL) {
1830#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1831#ifdef DEBUGGING
1832# define sayYES goto yes
1833# define sayNO goto no
7821416a
IZ
1834# define sayYES_FINAL goto yes_final
1835# define sayYES_LOUD goto yes_loud
1836# define sayNO_FINAL goto no_final
1837# define sayNO_SILENT goto do_no
d6a28714
JH
1838# define saySAME(x) if (x) goto yes; else goto no
1839# define REPORT_CODE_OFF 24
1840#else
1841# define sayYES return 1
1842# define sayNO return 0
7821416a
IZ
1843# define sayYES_FINAL return 1
1844# define sayYES_LOUD return 1
1845# define sayNO_FINAL return 0
1846# define sayNO_SILENT return 0
d6a28714
JH
1847# define saySAME(x) return x
1848#endif
1849 DEBUG_r( {
1850 SV *prop = sv_newmortal();
1851 int docolor = *PL_colors[0];
1852 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1853 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1854 /* The part of the string before starttry has one color
1855 (pref0_len chars), between starttry and current
1856 position another one (pref_len - pref0_len chars),
1857 after the current position the third one.
1858 We assume that pref0_len <= pref_len, otherwise we
1859 decrease pref0_len. */
1860 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1861 ? (5 + taill) - l : locinput - PL_bostr);
1862 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1863
1864 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1865 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1866 ? (5 + taill) - pref_len : PL_regeol - locinput);
1867 if (pref0_len < 0)
1868 pref0_len = 0;
1869 if (pref0_len > pref_len)
1870 pref0_len = pref_len;
1871 regprop(prop, scan);
1872 PerlIO_printf(Perl_debug_log,
b900a521
JH
1873 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1874 (IV)(locinput - PL_bostr),
d6a28714
JH
1875 PL_colors[4], pref0_len,
1876 locinput - pref_len, PL_colors[5],
1877 PL_colors[2], pref_len - pref0_len,
1878 locinput - pref_len + pref0_len, PL_colors[3],
1879 (docolor ? "" : "> <"),
1880 PL_colors[0], l, locinput, PL_colors[1],
1881 15 - l - pref_len + 1,
1882 "",
b900a521 1883 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1884 SvPVX(prop));
1885 } );
1886
1887 next = scan + NEXT_OFF(scan);
1888 if (next == scan)
1889 next = NULL;
1890
1891 switch (OP(scan)) {
1892 case BOL:
1893 if (locinput == PL_bostr
1894 ? PL_regprev == '\n'
1895 : (PL_multiline &&
1896 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1897 {
1898 /* regtill = regbol; */
b8c5462f
JH
1899 break;
1900 }
d6a28714
JH
1901 sayNO;
1902 case MBOL:
1903 if (locinput == PL_bostr
1904 ? PL_regprev == '\n'
1905 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1906 {
b8c5462f
JH
1907 break;
1908 }
d6a28714
JH
1909 sayNO;
1910 case SBOL:
c2a73568 1911 if (locinput == PL_bostr)
b8c5462f 1912 break;
d6a28714
JH
1913 sayNO;
1914 case GPOS:
1915 if (locinput == PL_reg_ganch)
1916 break;
1917 sayNO;
1918 case EOL:
1919 if (PL_multiline)
1920 goto meol;
1921 else
1922 goto seol;
1923 case MEOL:
1924 meol:
1925 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1926 sayNO;
b8c5462f 1927 break;
d6a28714
JH
1928 case SEOL:
1929 seol:
1930 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1931 sayNO;
d6a28714 1932 if (PL_regeol - locinput > 1)
b8c5462f 1933 sayNO;
b8c5462f 1934 break;
d6a28714
JH
1935 case EOS:
1936 if (PL_regeol != locinput)
b8c5462f 1937 sayNO;
d6a28714
JH
1938 break;
1939 case SANYUTF8:
b8c5462f 1940 if (nextchr & 0x80) {
b8c5462f 1941 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1942 if (locinput > PL_regeol)
1943 sayNO;
b8c5462f
JH
1944 nextchr = UCHARAT(locinput);
1945 break;
1946 }
d6a28714 1947 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1948 sayNO;
b8c5462f 1949 nextchr = UCHARAT(++locinput);
a0d0e21e 1950 break;
d6a28714
JH
1951 case SANY:
1952 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1953 sayNO;
1954 nextchr = UCHARAT(++locinput);
b85d18e9 1955 break;
d6a28714 1956 case ANYUTF8:
a0ed51b3 1957 if (nextchr & 0x80) {
b8c5462f 1958 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1959 if (locinput > PL_regeol)
1960 sayNO;
a0ed51b3
LW
1961 nextchr = UCHARAT(locinput);
1962 break;
1963 }
155aba94 1964 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1965 sayNO;
1966 nextchr = UCHARAT(++locinput);
1967 break;
d6a28714 1968 case REG_ANY:
155aba94 1969 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1970 sayNO;
76e3520e 1971 nextchr = UCHARAT(++locinput);
a0d0e21e 1972 break;
d6a28714 1973 case EXACT:
cd439c50
IZ
1974 s = STRING(scan);
1975 ln = STR_LEN(scan);
d6a28714
JH
1976 /* Inline the first character, for speed. */
1977 if (UCHARAT(s) != nextchr)
1978 sayNO;
1979 if (PL_regeol - locinput < ln)
1980 sayNO;
1981 if (ln > 1 && memNE(s, locinput, ln))
1982 sayNO;
1983 locinput += ln;
1984 nextchr = UCHARAT(locinput);
1985 break;
1986 case EXACTFL:
b8c5462f
JH
1987 PL_reg_flags |= RF_tainted;
1988 /* FALL THROUGH */
d6a28714 1989 case EXACTF:
cd439c50
IZ
1990 s = STRING(scan);
1991 ln = STR_LEN(scan);
d6a28714
JH
1992
1993 if (UTF) {
1994 char *l = locinput;
1995 char *e = s + ln;
1996 c1 = OP(scan) == EXACTF;
1997 while (s < e) {
1998 if (l >= PL_regeol)
1999 sayNO;
8e84507e 2000 if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ?
d6a28714
JH
2001 toLOWER_utf8((U8*)l) :
2002 toLOWER_LC_utf8((U8*)l)))
2003 {
2004 sayNO;
2005 }
2006 s += UTF8SKIP(s);
2007 l += UTF8SKIP(l);
b8c5462f 2008 }
d6a28714 2009 locinput = l;
a0ed51b3
LW
2010 nextchr = UCHARAT(locinput);
2011 break;
2012 }
d6a28714
JH
2013
2014 /* Inline the first character, for speed. */
2015 if (UCHARAT(s) != nextchr &&
2016 UCHARAT(s) != ((OP(scan) == EXACTF)
2017 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2018 sayNO;
d6a28714 2019 if (PL_regeol - locinput < ln)
b8c5462f 2020 sayNO;
d6a28714
JH
2021 if (ln > 1 && (OP(scan) == EXACTF
2022 ? ibcmp(s, locinput, ln)
2023 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2024 sayNO;
d6a28714
JH
2025 locinput += ln;
2026 nextchr = UCHARAT(locinput);
a0d0e21e 2027 break;
d6a28714 2028 case ANYOFUTF8:
d6a28714 2029 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2030 sayNO;
d6a28714
JH
2031 if (locinput >= PL_regeol)
2032 sayNO;
2033 locinput += PL_utf8skip[nextchr];
2034 nextchr = UCHARAT(locinput);
2035 break;
2036 case ANYOF:
d6a28714 2037 if (nextchr < 0)
b8c5462f 2038 nextchr = UCHARAT(locinput);
936ed897 2039 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2040 sayNO;
2041 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2042 sayNO;
b8c5462f
JH
2043 nextchr = UCHARAT(++locinput);
2044 break;
d6a28714 2045 case ALNUML:
b8c5462f
JH
2046 PL_reg_flags |= RF_tainted;
2047 /* FALL THROUGH */
d6a28714 2048 case ALNUM:
b8c5462f 2049 if (!nextchr)
4633a7c4 2050 sayNO;
d6a28714
JH
2051 if (!(OP(scan) == ALNUM
2052 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2053 sayNO;
2054 nextchr = UCHARAT(++locinput);
bbce6d69 2055 break;
d6a28714 2056 case ALNUMLUTF8:
3280af22 2057 PL_reg_flags |= RF_tainted;
bbce6d69 2058 /* FALL THROUGH */
d6a28714 2059 case ALNUMUTF8:
b8c5462f
JH
2060 if (!nextchr)
2061 sayNO;
2062 if (nextchr & 0x80) {
d6a28714
JH
2063 if (!(OP(scan) == ALNUMUTF8
2064 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2065 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2066 {
2067 sayNO;
a0ed51b3 2068 }
b8c5462f 2069 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2070 nextchr = UCHARAT(locinput);
2071 break;
2072 }
d6a28714
JH
2073 if (!(OP(scan) == ALNUMUTF8
2074 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2075 sayNO;
b8c5462f 2076 nextchr = UCHARAT(++locinput);
a0d0e21e 2077 break;
d6a28714 2078 case NALNUML:
b8c5462f
JH
2079 PL_reg_flags |= RF_tainted;
2080 /* FALL THROUGH */
d6a28714
JH
2081 case NALNUM:
2082 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2083 sayNO;
d6a28714
JH
2084 if (OP(scan) == NALNUM
2085 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2086 sayNO;
b8c5462f 2087 nextchr = UCHARAT(++locinput);
a0ed51b3 2088 break;
d6a28714 2089 case NALNUMLUTF8:
b8c5462f
JH
2090 PL_reg_flags |= RF_tainted;
2091 /* FALL THROUGH */
d6a28714 2092 case NALNUMUTF8:
3280af22 2093 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2094 sayNO;
b8c5462f 2095 if (nextchr & 0x80) {
d6a28714
JH
2096 if (OP(scan) == NALNUMUTF8
2097 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2098 : isALNUM_LC_utf8((U8*)locinput))
2099 {
b8c5462f 2100 sayNO;
d6a28714 2101 }
b8c5462f
JH
2102 locinput += PL_utf8skip[nextchr];
2103 nextchr = UCHARAT(locinput);
2104 break;
2105 }
d6a28714
JH
2106 if (OP(scan) == NALNUMUTF8
2107 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2108 sayNO;
76e3520e 2109 nextchr = UCHARAT(++locinput);
a0d0e21e 2110 break;
d6a28714
JH
2111 case BOUNDL:
2112 case NBOUNDL:
3280af22 2113 PL_reg_flags |= RF_tainted;
bbce6d69 2114 /* FALL THROUGH */
d6a28714
JH
2115 case BOUND:
2116 case NBOUND:
2117 /* was last char in word? */
2118 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2119 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2120 ln = isALNUM(ln);
2121 n = isALNUM(nextchr);
2122 }
2123 else {
2124 ln = isALNUM_LC(ln);
2125 n = isALNUM_LC(nextchr);
2126 }
2127 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2128 sayNO;
a0d0e21e 2129 break;
d6a28714
JH
2130 case BOUNDLUTF8:
2131 case NBOUNDLUTF8:
a0ed51b3
LW
2132 PL_reg_flags |= RF_tainted;
2133 /* FALL THROUGH */
d6a28714
JH
2134 case BOUNDUTF8:
2135 case NBOUNDUTF8:
2136 /* was last char in word? */
2137 ln = (locinput != PL_regbol)
8e84507e 2138 ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
d6a28714
JH
2139 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2140 ln = isALNUM_uni(ln);
2141 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2142 }
d6a28714
JH
2143 else {
2144 ln = isALNUM_LC_uni(ln);
2145 n = isALNUM_LC_utf8((U8*)locinput);
2146 }
2147 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2148 sayNO;
a0ed51b3 2149 break;
d6a28714 2150 case SPACEL:
3280af22 2151 PL_reg_flags |= RF_tainted;
bbce6d69 2152 /* FALL THROUGH */
d6a28714 2153 case SPACE:
9442cb0e 2154 if (!nextchr)
4633a7c4 2155 sayNO;
d6a28714
JH
2156 if (!(OP(scan) == SPACE
2157 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2158 sayNO;
76e3520e 2159 nextchr = UCHARAT(++locinput);
a0d0e21e 2160 break;
d6a28714 2161 case SPACELUTF8:
a0ed51b3
LW
2162 PL_reg_flags |= RF_tainted;
2163 /* FALL THROUGH */
d6a28714 2164 case SPACEUTF8:
9442cb0e 2165 if (!nextchr)
a0ed51b3
LW
2166 sayNO;
2167 if (nextchr & 0x80) {
d6a28714 2168 if (!(OP(scan) == SPACEUTF8
9442cb0e 2169 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2170 : isSPACE_LC_utf8((U8*)locinput)))
2171 {
a0ed51b3 2172 sayNO;
d6a28714 2173 }
6f06b55f 2174 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2175 nextchr = UCHARAT(locinput);
2176 break;
2177 }
d6a28714
JH
2178 if (!(OP(scan) == SPACEUTF8
2179 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2180 sayNO;
2181 nextchr = UCHARAT(++locinput);
2182 break;
d6a28714 2183 case NSPACEL:
3280af22 2184 PL_reg_flags |= RF_tainted;
bbce6d69 2185 /* FALL THROUGH */
d6a28714 2186 case NSPACE:
9442cb0e 2187 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2188 sayNO;
9442cb0e 2189 if (OP(scan) == NSPACE
d6a28714 2190 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2191 sayNO;
b8c5462f 2192 nextchr = UCHARAT(++locinput);
a0d0e21e 2193 break;
d6a28714 2194 case NSPACELUTF8:
a0ed51b3
LW
2195 PL_reg_flags |= RF_tainted;
2196 /* FALL THROUGH */
d6a28714 2197 case NSPACEUTF8:
9442cb0e 2198 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2199 sayNO;
2200 if (nextchr & 0x80) {
d6a28714 2201 if (OP(scan) == NSPACEUTF8
9442cb0e 2202 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2203 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2204 {
2205 sayNO;
2206 }
2207 locinput += PL_utf8skip[nextchr];
2208 nextchr = UCHARAT(locinput);
2209 break;
a0ed51b3 2210 }
d6a28714
JH
2211 if (OP(scan) == NSPACEUTF8
2212 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2213 sayNO;
76e3520e 2214 nextchr = UCHARAT(++locinput);
a0d0e21e 2215 break;
d6a28714 2216 case DIGITL:
a0ed51b3
LW
2217 PL_reg_flags |= RF_tainted;
2218 /* FALL THROUGH */
d6a28714 2219 case DIGIT:
9442cb0e 2220 if (!nextchr)
a0ed51b3 2221 sayNO;
d6a28714
JH
2222 if (!(OP(scan) == DIGIT
2223 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2224 sayNO;
76e3520e 2225 nextchr = UCHARAT(++locinput);
a0d0e21e 2226 break;
d6a28714 2227 case DIGITLUTF8:
a0ed51b3
LW
2228 PL_reg_flags |= RF_tainted;
2229 /* FALL THROUGH */
d6a28714 2230 case DIGITUTF8:
a0ed51b3
LW
2231 if (!nextchr)
2232 sayNO;
2233 if (nextchr & 0x80) {
9442cb0e
GS
2234 if (!(OP(scan) == DIGITUTF8
2235 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2236 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2237 {
a0ed51b3 2238 sayNO;
dfe13c55 2239 }
6f06b55f 2240 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2241 nextchr = UCHARAT(locinput);
2242 break;
2243 }
9442cb0e
GS
2244 if (!(OP(scan) == DIGITUTF8
2245 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2246 sayNO;
2247 nextchr = UCHARAT(++locinput);
2248 break;
d6a28714 2249 case NDIGITL:
b8c5462f
JH
2250 PL_reg_flags |= RF_tainted;
2251 /* FALL THROUGH */
d6a28714 2252 case NDIGIT:
9442cb0e 2253 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2254 sayNO;
9442cb0e 2255 if (OP(scan) == NDIGIT
d6a28714 2256 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2257 sayNO;
76e3520e 2258 nextchr = UCHARAT(++locinput);
a0d0e21e 2259 break;
d6a28714 2260 case NDIGITLUTF8:
b8c5462f
JH
2261 PL_reg_flags |= RF_tainted;
2262 /* FALL THROUGH */
d6a28714 2263 case NDIGITUTF8:
b8c5462f
JH
2264 if (!nextchr && locinput >= PL_regeol)
2265 sayNO;
a0ed51b3 2266 if (nextchr & 0x80) {
9442cb0e
GS
2267 if (OP(scan) == NDIGITUTF8
2268 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2269 : isDIGIT_LC_utf8((U8*)locinput))
2270 {
a0ed51b3 2271 sayNO;
9442cb0e 2272 }
6f06b55f 2273 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2274 nextchr = UCHARAT(locinput);
2275 break;
2276 }
9442cb0e
GS
2277 if (OP(scan) == NDIGITUTF8
2278 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2279 sayNO;
2280 nextchr = UCHARAT(++locinput);
2281 break;
2282 case CLUMP:
dfe13c55 2283 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2284 sayNO;
6f06b55f 2285 locinput += PL_utf8skip[nextchr];
dfe13c55 2286 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2287 locinput += UTF8SKIP(locinput);
2288 if (locinput > PL_regeol)
2289 sayNO;
2290 nextchr = UCHARAT(locinput);
2291 break;
c8756f30 2292 case REFFL:
3280af22 2293 PL_reg_flags |= RF_tainted;
c8756f30 2294 /* FALL THROUGH */
c277df42 2295 case REF:
c8756f30 2296 case REFF:
c277df42 2297 n = ARG(scan); /* which paren pair */
cf93c79d 2298 ln = PL_regstartp[n];
2c2d71f5 2299 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2300 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2301 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2302 if (ln == PL_regendp[n])
a0d0e21e 2303 break;
a0ed51b3 2304
cf93c79d 2305 s = PL_bostr + ln;
a0ed51b3
LW
2306 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2307 char *l = locinput;
cf93c79d 2308 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2309 /*
2310 * Note that we can't do the "other character" lookup trick as
2311 * in the 8-bit case (no pun intended) because in Unicode we
2312 * have to map both upper and title case to lower case.
2313 */
2314 if (OP(scan) == REFF) {
2315 while (s < e) {
2316 if (l >= PL_regeol)
2317 sayNO;
dfe13c55 2318 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2319 sayNO;
2320 s += UTF8SKIP(s);
2321 l += UTF8SKIP(l);
2322 }
2323 }
2324 else {
2325 while (s < e) {
2326 if (l >= PL_regeol)
2327 sayNO;
dfe13c55 2328 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2329 sayNO;
2330 s += UTF8SKIP(s);
2331 l += UTF8SKIP(l);
2332 }
2333 }
2334 locinput = l;
2335 nextchr = UCHARAT(locinput);
2336 break;
2337 }
2338
a0d0e21e 2339 /* Inline the first character, for speed. */
76e3520e 2340 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2341 (OP(scan) == REF ||
2342 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2343 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2344 sayNO;
cf93c79d 2345 ln = PL_regendp[n] - ln;
3280af22 2346 if (locinput + ln > PL_regeol)
4633a7c4 2347 sayNO;
c8756f30
AK
2348 if (ln > 1 && (OP(scan) == REF
2349 ? memNE(s, locinput, ln)
2350 : (OP(scan) == REFF
2351 ? ibcmp(s, locinput, ln)
2352 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2353 sayNO;
a0d0e21e 2354 locinput += ln;
76e3520e 2355 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2356 break;
2357
2358 case NOTHING:
c277df42 2359 case TAIL:
a0d0e21e
LW
2360 break;
2361 case BACK:
2362 break;
c277df42
IZ
2363 case EVAL:
2364 {
2365 dSP;
533c011a 2366 OP_4tree *oop = PL_op;
3280af22
NIS
2367 COP *ocurcop = PL_curcop;
2368 SV **ocurpad = PL_curpad;
c277df42
IZ
2369 SV *ret;
2370
2371 n = ARG(scan);
533c011a 2372 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2373 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2374 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2375 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2376
cea2e8a9 2377 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2378 SPAGAIN;
2379 ret = POPs;
2380 PUTBACK;
2381
0f5d15d6
IZ
2382 PL_op = oop;
2383 PL_curpad = ocurpad;
2384 PL_curcop = ocurcop;
c277df42 2385 if (logical) {
0f5d15d6
IZ
2386 if (logical == 2) { /* Postponed subexpression. */
2387 regexp *re;
22c35a8c 2388 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2389 re_cc_state state;
0f5d15d6
IZ
2390 CHECKPOINT cp, lastcp;
2391
2392 if(SvROK(ret) || SvRMAGICAL(ret)) {
2393 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2394
2395 if(SvMAGICAL(sv))
2396 mg = mg_find(sv, 'r');
2397 }
2398 if (mg) {
2399 re = (regexp *)mg->mg_obj;
df0003d4 2400 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2401 }
2402 else {
2403 STRLEN len;
2404 char *t = SvPV(ret, len);
2405 PMOP pm;
2406 char *oprecomp = PL_regprecomp;
2407 I32 osize = PL_regsize;
2408 I32 onpar = PL_regnpar;
2409
2410 pm.op_pmflags = 0;
393fec97 2411 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2412 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2413 if (!(SvFLAGS(ret)
2414 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2415 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2416 PL_regprecomp = oprecomp;
2417 PL_regsize = osize;
2418 PL_regnpar = onpar;
2419 }
2420 DEBUG_r(
2421 PerlIO_printf(Perl_debug_log,
2422 "Entering embedded `%s%.60s%s%s'\n",
2423 PL_colors[0],
2424 re->precomp,
2425 PL_colors[1],
2426 (strlen(re->precomp) > 60 ? "..." : ""))
2427 );
2428 state.node = next;
2429 state.prev = PL_reg_call_cc;
2430 state.cc = PL_regcc;
2431 state.re = PL_reg_re;
2432
2ab05381 2433 PL_regcc = 0;
0f5d15d6
IZ
2434
2435 cp = regcppush(0); /* Save *all* the positions. */
2436 REGCP_SET;
2437 cache_re(re);
2438 state.ss = PL_savestack_ix;
2439 *PL_reglastparen = 0;
2440 PL_reg_call_cc = &state;
2441 PL_reginput = locinput;
2c2d71f5
JH
2442
2443 /* XXXX This is too dramatic a measure... */
2444 PL_reg_maxiter = 0;
2445
0f5d15d6 2446 if (regmatch(re->program + 1)) {
2c914db6
IZ
2447 /* Even though we succeeded, we need to restore
2448 global variables, since we may be wrapped inside
2449 SUSPEND, thus the match may be not finished yet. */
2450
2451 /* XXXX Do this only if SUSPENDed? */
2452 PL_reg_call_cc = state.prev;
2453 PL_regcc = state.cc;
2454 PL_reg_re = state.re;
2455 cache_re(PL_reg_re);
2456
2457 /* XXXX This is too dramatic a measure... */
2458 PL_reg_maxiter = 0;
2459
2460 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2461 ReREFCNT_dec(re);
2462 regcpblow(cp);
2463 sayYES;
2464 }
0f5d15d6
IZ
2465 ReREFCNT_dec(re);
2466 REGCP_UNWIND;
2467 regcppop();
2468 PL_reg_call_cc = state.prev;
2469 PL_regcc = state.cc;
2470 PL_reg_re = state.re;
d3790889 2471 cache_re(PL_reg_re);
2c2d71f5
JH
2472
2473 /* XXXX This is too dramatic a measure... */
2474 PL_reg_maxiter = 0;
2475
0f5d15d6
IZ
2476 sayNO;
2477 }
c277df42 2478 sw = SvTRUE(ret);
0f5d15d6 2479 logical = 0;
a0ed51b3
LW
2480 }
2481 else
3280af22 2482 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2483 break;
2484 }
a0d0e21e 2485 case OPEN:
c277df42 2486 n = ARG(scan); /* which paren pair */
3280af22
NIS
2487 PL_reg_start_tmp[n] = locinput;
2488 if (n > PL_regsize)
2489 PL_regsize = n;
a0d0e21e
LW
2490 break;
2491 case CLOSE:
c277df42 2492 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2493 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2494 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2495 if (n > *PL_reglastparen)
2496 *PL_reglastparen = n;
a0d0e21e 2497 break;
c277df42
IZ
2498 case GROUPP:
2499 n = ARG(scan); /* which paren pair */
cf93c79d 2500 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2501 break;
2502 case IFTHEN:
2c2d71f5 2503 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2504 if (sw)
2505 next = NEXTOPER(NEXTOPER(scan));
2506 else {
2507 next = scan + ARG(scan);
2508 if (OP(next) == IFTHEN) /* Fake one. */
2509 next = NEXTOPER(NEXTOPER(next));
2510 }
2511 break;
2512 case LOGICAL:
0f5d15d6 2513 logical = scan->flags;
c277df42 2514 break;
2ab05381
IZ
2515/*******************************************************************
2516 PL_regcc contains infoblock about the innermost (...)* loop, and
2517 a pointer to the next outer infoblock.
2518
2519 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2520
2521 1) After matching X, regnode for CURLYX is processed;
2522
2523 2) This regnode creates infoblock on the stack, and calls
2524 regmatch() recursively with the starting point at WHILEM node;
2525
2526 3) Each hit of WHILEM node tries to match A and Z (in the order
2527 depending on the current iteration, min/max of {min,max} and
2528 greediness). The information about where are nodes for "A"
2529 and "Z" is read from the infoblock, as is info on how many times "A"
2530 was already matched, and greediness.
2531
2532 4) After A matches, the same WHILEM node is hit again.
2533
2534 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2535 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2536 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2537 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2538 of the external loop.
2539
2540 Currently present infoblocks form a tree with a stem formed by PL_curcc
2541 and whatever it mentions via ->next, and additional attached trees
2542 corresponding to temporarily unset infoblocks as in "5" above.
2543
2544 In the following picture infoblocks for outer loop of
2545 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2546 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2547 infoblocks are drawn below the "reset" infoblock.
2548
2549 In fact in the picture below we do not show failed matches for Z and T
2550 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2551 more obvious *why* one needs to *temporary* unset infoblocks.]
2552
2553 Matched REx position InfoBlocks Comment
2554 (Y(A)*?Z)*?T x
2555 Y(A)*?Z)*?T x <- O
2556 Y (A)*?Z)*?T x <- O
2557 Y A)*?Z)*?T x <- O <- I
2558 YA )*?Z)*?T x <- O <- I
2559 YA A)*?Z)*?T x <- O <- I
2560 YAA )*?Z)*?T x <- O <- I
2561 YAA Z)*?T x <- O # Temporary unset I
2562 I
2563
2564 YAAZ Y(A)*?Z)*?T x <- O
2565 I
2566
2567 YAAZY (A)*?Z)*?T x <- O
2568 I
2569
2570 YAAZY A)*?Z)*?T x <- O <- I
2571 I
2572
2573 YAAZYA )*?Z)*?T x <- O <- I
2574 I
2575
2576 YAAZYA Z)*?T x <- O # Temporary unset I
2577 I,I
2578
2579 YAAZYAZ )*?T x <- O
2580 I,I
2581
2582 YAAZYAZ T x # Temporary unset O
2583 O
2584 I,I
2585
2586 YAAZYAZT x
2587 O
2588 I,I
2589 *******************************************************************/
a0d0e21e
LW
2590 case CURLYX: {
2591 CURCUR cc;
3280af22 2592 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2593
2594 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2595 next += ARG(next);
3280af22
NIS
2596 cc.oldcc = PL_regcc;
2597 PL_regcc = &cc;
2598 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2599 cc.cur = -1;
2600 cc.min = ARG1(scan);
2601 cc.max = ARG2(scan);
c277df42 2602 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2603 cc.next = next;
2604 cc.minmod = minmod;
2605 cc.lastloc = 0;
3280af22 2606 PL_reginput = locinput;
a0d0e21e
LW
2607 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2608 regcpblow(cp);
3280af22 2609 PL_regcc = cc.oldcc;
4633a7c4 2610 saySAME(n);
a0d0e21e
LW
2611 }
2612 /* NOT REACHED */
2613 case WHILEM: {
2614 /*
2615 * This is really hard to understand, because after we match
2616 * what we're trying to match, we must make sure the rest of
2c2d71f5 2617 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2618 * to go back UP the parse tree by recursing ever deeper. And
2619 * if it fails, we have to reset our parent's current state
2620 * that we can try again after backing off.
2621 */
2622
c277df42 2623 CHECKPOINT cp, lastcp;
3280af22 2624 CURCUR* cc = PL_regcc;
c277df42
IZ
2625 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2626
4633a7c4 2627 n = cc->cur + 1; /* how many we know we matched */
3280af22 2628 PL_reginput = locinput;
a0d0e21e 2629
c277df42
IZ
2630 DEBUG_r(
2631 PerlIO_printf(Perl_debug_log,
2632 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2633 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2634 (long)n, (long)cc->min,
2635 (long)cc->max, (long)cc)
2636 );
4633a7c4 2637
a0d0e21e
LW
2638 /* If degenerate scan matches "", assume scan done. */
2639
579cf2c3 2640 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2641 PL_regcc = cc->oldcc;
2ab05381
IZ
2642 if (PL_regcc)
2643 ln = PL_regcc->cur;
c277df42 2644 DEBUG_r(
c3464db5
DD
2645 PerlIO_printf(Perl_debug_log,
2646 "%*s empty match detected, try continuation...\n",
3280af22 2647 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2648 );
a0d0e21e 2649 if (regmatch(cc->next))
4633a7c4 2650 sayYES;
2ab05381
IZ
2651 if (PL_regcc)
2652 PL_regcc->cur = ln;
3280af22 2653 PL_regcc = cc;
4633a7c4 2654 sayNO;
a0d0e21e
LW
2655 }
2656
2657 /* First just match a string of min scans. */
2658
2659 if (n < cc->min) {
2660 cc->cur = n;
2661 cc->lastloc = locinput;
4633a7c4
LW
2662 if (regmatch(cc->scan))
2663 sayYES;
2664 cc->cur = n - 1;
c277df42 2665 cc->lastloc = lastloc;
4633a7c4 2666 sayNO;
a0d0e21e
LW
2667 }
2668
2c2d71f5
JH
2669 if (scan->flags) {
2670 /* Check whether we already were at this position.
2671 Postpone detection until we know the match is not
2672 *that* much linear. */
2673 if (!PL_reg_maxiter) {
2674 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2675 PL_reg_leftiter = PL_reg_maxiter;
2676 }
2677 if (PL_reg_leftiter-- == 0) {
2678 I32 size = (PL_reg_maxiter + 7)/8;
2679 if (PL_reg_poscache) {
2680 if (PL_reg_poscache_size < size) {
2681 Renew(PL_reg_poscache, size, char);
2682 PL_reg_poscache_size = size;
2683 }
2684 Zero(PL_reg_poscache, size, char);
2685 }
2686 else {
2687 PL_reg_poscache_size = size;
2688 Newz(29, PL_reg_poscache, size, char);
2689 }
2690 DEBUG_r(
2691 PerlIO_printf(Perl_debug_log,
2692 "%sDetected a super-linear match, switching on caching%s...\n",
2693 PL_colors[4], PL_colors[5])
2694 );
2695 }
2696 if (PL_reg_leftiter < 0) {
2697 I32 o = locinput - PL_bostr, b;
2698
2699 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2700 b = o % 8;
2701 o /= 8;
2702 if (PL_reg_poscache[o] & (1<<b)) {
2703 DEBUG_r(
2704 PerlIO_printf(Perl_debug_log,
2705 "%*s already tried at this position...\n",
2706 REPORT_CODE_OFF+PL_regindent*2, "")
2707 );
7821416a 2708 sayNO_SILENT;
2c2d71f5
JH
2709 }
2710 PL_reg_poscache[o] |= (1<<b);
2711 }
2712 }
2713
a0d0e21e
LW
2714 /* Prefer next over scan for minimal matching. */
2715
2716 if (cc->minmod) {
3280af22 2717 PL_regcc = cc->oldcc;
2ab05381
IZ
2718 if (PL_regcc)
2719 ln = PL_regcc->cur;
5f05dabc 2720 cp = regcppush(cc->parenfloor);
c277df42 2721 REGCP_SET;
5f05dabc 2722 if (regmatch(cc->next)) {
c277df42 2723 regcpblow(cp);
4633a7c4 2724 sayYES; /* All done. */
5f05dabc 2725 }
c277df42 2726 REGCP_UNWIND;
5f05dabc 2727 regcppop();
2ab05381
IZ
2728 if (PL_regcc)
2729 PL_regcc->cur = ln;
3280af22 2730 PL_regcc = cc;
a0d0e21e 2731
c277df42 2732 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2733 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2734 && !(PL_reg_flags & RF_warned)) {
2735 PL_reg_flags |= RF_warned;
e476b1b5 2736 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2737 "Complex regular subexpression recursion",
2738 REG_INFTY - 1);
c277df42 2739 }
4633a7c4 2740 sayNO;
c277df42 2741 }
a687059c 2742
c277df42 2743 DEBUG_r(
c3464db5
DD
2744 PerlIO_printf(Perl_debug_log,
2745 "%*s trying longer...\n",
3280af22 2746 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2747 );
a0d0e21e 2748 /* Try scanning more and see if it helps. */
3280af22 2749 PL_reginput = locinput;
a0d0e21e
LW
2750 cc->cur = n;
2751 cc->lastloc = locinput;
5f05dabc 2752 cp = regcppush(cc->parenfloor);
c277df42 2753 REGCP_SET;
5f05dabc 2754 if (regmatch(cc->scan)) {
c277df42 2755 regcpblow(cp);
4633a7c4 2756 sayYES;
5f05dabc 2757 }
c277df42 2758 REGCP_UNWIND;
5f05dabc 2759 regcppop();
4633a7c4 2760 cc->cur = n - 1;
c277df42 2761 cc->lastloc = lastloc;
4633a7c4 2762 sayNO;
a0d0e21e
LW
2763 }
2764
2765 /* Prefer scan over next for maximal matching. */
2766
2767 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2768 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2769 cc->cur = n;
2770 cc->lastloc = locinput;
c277df42 2771 REGCP_SET;
5f05dabc 2772 if (regmatch(cc->scan)) {
c277df42 2773 regcpblow(cp);
4633a7c4 2774 sayYES;
5f05dabc 2775 }
c277df42 2776 REGCP_UNWIND;
a0d0e21e 2777 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2778 PL_reginput = locinput;
c277df42 2779 DEBUG_r(
c3464db5
DD
2780 PerlIO_printf(Perl_debug_log,
2781 "%*s failed, try continuation...\n",
3280af22 2782 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2783 );
2784 }
e476b1b5 2785 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2786 && !(PL_reg_flags & RF_warned)) {
3280af22 2787 PL_reg_flags |= RF_warned;
e476b1b5 2788 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2789 "Complex regular subexpression recursion",
2790 REG_INFTY - 1);
a0d0e21e
LW
2791 }
2792
2793 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2794 PL_regcc = cc->oldcc;
2ab05381
IZ
2795 if (PL_regcc)
2796 ln = PL_regcc->cur;
a0d0e21e 2797 if (regmatch(cc->next))
4633a7c4 2798 sayYES;
2ab05381
IZ
2799 if (PL_regcc)
2800 PL_regcc->cur = ln;
3280af22 2801 PL_regcc = cc;
4633a7c4 2802 cc->cur = n - 1;
c277df42 2803 cc->lastloc = lastloc;
4633a7c4 2804 sayNO;
a0d0e21e
LW
2805 }
2806 /* NOT REACHED */
c277df42
IZ
2807 case BRANCHJ:
2808 next = scan + ARG(scan);
2809 if (next == scan)
2810 next = NULL;
2811 inner = NEXTOPER(NEXTOPER(scan));
2812 goto do_branch;
2813 case BRANCH:
2814 inner = NEXTOPER(scan);
2815 do_branch:
2816 {
2817 CHECKPOINT lastcp;
2818 c1 = OP(scan);
2819 if (OP(next) != c1) /* No choice. */
2820 next = inner; /* Avoid recursion. */
a0d0e21e 2821 else {
3280af22 2822 int lastparen = *PL_reglastparen;
c277df42
IZ
2823
2824 REGCP_SET;
a0d0e21e 2825 do {
3280af22 2826 PL_reginput = locinput;
c277df42 2827 if (regmatch(inner))
4633a7c4 2828 sayYES;
c277df42 2829 REGCP_UNWIND;
3280af22 2830 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2831 PL_regendp[n] = -1;
3280af22 2832 *PL_reglastparen = n;
c277df42 2833 scan = next;
a0d0e21e 2834 /*SUPPRESS 560*/
155aba94 2835 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2836 next += n;
a0d0e21e 2837 else
c277df42 2838 next = NULL;
c277df42
IZ
2839 inner = NEXTOPER(scan);
2840 if (c1 == BRANCHJ) {
2841 inner = NEXTOPER(inner);
2842 }
2843 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2844 sayNO;
a0d0e21e 2845 /* NOTREACHED */
a687059c 2846 }
a0d0e21e
LW
2847 }
2848 break;
2849 case MINMOD:
2850 minmod = 1;
2851 break;
c277df42
IZ
2852 case CURLYM:
2853 {
00db4c45 2854 I32 l = 0;
c277df42
IZ
2855 CHECKPOINT lastcp;
2856
2857 /* We suppose that the next guy does not need
2858 backtracking: in particular, it is of constant length,
2859 and has no parenths to influence future backrefs. */
2860 ln = ARG1(scan); /* min to match */
2861 n = ARG2(scan); /* max to match */
c277df42
IZ
2862 paren = scan->flags;
2863 if (paren) {
3280af22
NIS
2864 if (paren > PL_regsize)
2865 PL_regsize = paren;
2866 if (paren > *PL_reglastparen)
2867 *PL_reglastparen = paren;
c277df42 2868 }
dc45a647 2869 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2870 if (paren)
2871 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2872 PL_reginput = locinput;
c277df42
IZ
2873 if (minmod) {
2874 minmod = 0;
2875 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2876 sayNO;
5f4b28b2 2877 if (ln && l == 0 && n >= ln
c277df42
IZ
2878 /* In fact, this is tricky. If paren, then the
2879 fact that we did/didnot match may influence
2880 future execution. */
2881 && !(paren && ln == 0))
2882 ln = n;
3280af22 2883 locinput = PL_reginput;
22c35a8c 2884 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2885 c1 = (U8)*STRING(next);
c277df42 2886 if (OP(next) == EXACTF)
22c35a8c 2887 c2 = PL_fold[c1];
c277df42 2888 else if (OP(next) == EXACTFL)
22c35a8c 2889 c2 = PL_fold_locale[c1];
c277df42
IZ
2890 else
2891 c2 = c1;
a0ed51b3
LW
2892 }
2893 else
c277df42
IZ
2894 c1 = c2 = -1000;
2895 REGCP_SET;
5f4b28b2 2896 /* This may be improved if l == 0. */
c277df42
IZ
2897 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2898 /* If it could work, try it. */
2899 if (c1 == -1000 ||
3280af22
NIS
2900 UCHARAT(PL_reginput) == c1 ||
2901 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2902 {
2903 if (paren) {
2904 if (n) {
cf93c79d
IZ
2905 PL_regstartp[paren] =
2906 HOPc(PL_reginput, -l) - PL_bostr;
2907 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2908 }
2909 else
cf93c79d 2910 PL_regendp[paren] = -1;
c277df42
IZ
2911 }
2912 if (regmatch(next))
2913 sayYES;
2914 REGCP_UNWIND;
2915 }
2916 /* Couldn't or didn't -- move forward. */
3280af22 2917 PL_reginput = locinput;
c277df42
IZ
2918 if (regrepeat_hard(scan, 1, &l)) {
2919 ln++;
3280af22 2920 locinput = PL_reginput;
c277df42
IZ
2921 }
2922 else
2923 sayNO;
2924 }
a0ed51b3
LW
2925 }
2926 else {
c277df42
IZ
2927 n = regrepeat_hard(scan, n, &l);
2928 if (n != 0 && l == 0
2929 /* In fact, this is tricky. If paren, then the
2930 fact that we did/didnot match may influence
2931 future execution. */
2932 && !(paren && ln == 0))
2933 ln = n;
3280af22 2934 locinput = PL_reginput;
c277df42 2935 DEBUG_r(
5c0ca799 2936 PerlIO_printf(Perl_debug_log,
faccc32b 2937 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2938 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2939 (IV) n, (IV)l)
c277df42
IZ
2940 );
2941 if (n >= ln) {
22c35a8c 2942 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2943 c1 = (U8)*STRING(next);
c277df42 2944 if (OP(next) == EXACTF)
22c35a8c 2945 c2 = PL_fold[c1];
c277df42 2946 else if (OP(next) == EXACTFL)
22c35a8c 2947 c2 = PL_fold_locale[c1];
c277df42
IZ
2948 else
2949 c2 = c1;
a0ed51b3
LW
2950 }
2951 else
c277df42
IZ
2952 c1 = c2 = -1000;
2953 }
2954 REGCP_SET;
2955 while (n >= ln) {
2956 /* If it could work, try it. */
2957 if (c1 == -1000 ||
3280af22
NIS
2958 UCHARAT(PL_reginput) == c1 ||
2959 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2960 {
2961 DEBUG_r(
c3464db5 2962 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2963 "%*s trying tail with n=%"IVdf"...\n",
2964 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2965 );
2966 if (paren) {
2967 if (n) {
cf93c79d
IZ
2968 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2969 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2970 }
a0ed51b3 2971 else
cf93c79d 2972 PL_regendp[paren] = -1;
c277df42 2973 }
a0ed51b3
LW
2974 if (regmatch(next))
2975 sayYES;
2976 REGCP_UNWIND;
2977 }
c277df42
IZ
2978 /* Couldn't or didn't -- back up. */
2979 n--;
dfe13c55 2980 locinput = HOPc(locinput, -l);
3280af22 2981 PL_reginput = locinput;
c277df42
IZ
2982 }
2983 }
2984 sayNO;
2985 break;
2986 }
2987 case CURLYN:
2988 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2989 if (paren > PL_regsize)
2990 PL_regsize = paren;
2991 if (paren > *PL_reglastparen)
2992 *PL_reglastparen = paren;
c277df42
IZ
2993 ln = ARG1(scan); /* min to match */
2994 n = ARG2(scan); /* max to match */
dc45a647 2995 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2996 goto repeat;
a0d0e21e 2997 case CURLY:
c277df42 2998 paren = 0;
a0d0e21e
LW
2999 ln = ARG1(scan); /* min to match */
3000 n = ARG2(scan); /* max to match */
dc45a647 3001 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3002 goto repeat;
3003 case STAR:
3004 ln = 0;
c277df42 3005 n = REG_INFTY;
a0d0e21e 3006 scan = NEXTOPER(scan);
c277df42 3007 paren = 0;
a0d0e21e
LW
3008 goto repeat;
3009 case PLUS:
c277df42
IZ
3010 ln = 1;
3011 n = REG_INFTY;
3012 scan = NEXTOPER(scan);
3013 paren = 0;
3014 repeat:
a0d0e21e
LW
3015 /*
3016 * Lookahead to avoid useless match attempts
3017 * when we know what character comes next.
3018 */
22c35a8c 3019 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3020 c1 = (U8)*STRING(next);
bbce6d69 3021 if (OP(next) == EXACTF)
22c35a8c 3022 c2 = PL_fold[c1];
bbce6d69 3023 else if (OP(next) == EXACTFL)
22c35a8c 3024 c2 = PL_fold_locale[c1];
bbce6d69 3025 else
3026 c2 = c1;
3027 }
a0d0e21e 3028 else
bbce6d69 3029 c1 = c2 = -1000;
3280af22 3030 PL_reginput = locinput;
a0d0e21e 3031 if (minmod) {
c277df42 3032 CHECKPOINT lastcp;
a0d0e21e
LW
3033 minmod = 0;
3034 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3035 sayNO;
a0ed51b3 3036 locinput = PL_reginput;
c277df42 3037 REGCP_SET;
0fe9bf95
IZ
3038 if (c1 != -1000) {
3039 char *e = locinput + n - ln; /* Should not check after this */
3040 char *old = locinput;
3041
3042 if (e >= PL_regeol || (n == REG_INFTY))
3043 e = PL_regeol - 1;
3044 while (1) {
3045 /* Find place 'next' could work */
3046 if (c1 == c2) {
3047 while (locinput <= e && *locinput != c1)
3048 locinput++;
3049 } else {
3050 while (locinput <= e
3051 && *locinput != c1
3052 && *locinput != c2)
3053 locinput++;
3054 }
3055 if (locinput > e)
3056 sayNO;
3057 /* PL_reginput == old now */
3058 if (locinput != old) {
3059 ln = 1; /* Did some */
3060 if (regrepeat(scan, locinput - old) <
3061 locinput - old)
3062 sayNO;
3063 }
3064 /* PL_reginput == locinput now */
29d1e993 3065 TRYPAREN(paren, ln, locinput);
0fe9bf95
IZ
3066 PL_reginput = locinput; /* Could be reset... */
3067 REGCP_UNWIND;
3068 /* Couldn't or didn't -- move forward. */
3069 old = locinput++;
3070 }
3071 }
3072 else
c277df42 3073 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3074 /* If it could work, try it. */
bbce6d69 3075 if (c1 == -1000 ||
3280af22
NIS
3076 UCHARAT(PL_reginput) == c1 ||
3077 UCHARAT(PL_reginput) == c2)
bbce6d69 3078 {
29d1e993 3079 TRYPAREN(paren, n, PL_reginput);
c277df42 3080 REGCP_UNWIND;
bbce6d69 3081 }
c277df42 3082 /* Couldn't or didn't -- move forward. */
a0ed51b3 3083 PL_reginput = locinput;
a0d0e21e
LW
3084 if (regrepeat(scan, 1)) {
3085 ln++;
a0ed51b3
LW
3086 locinput = PL_reginput;
3087 }
3088 else
4633a7c4 3089 sayNO;
a0d0e21e
LW
3090 }
3091 }
3092 else {
c277df42 3093 CHECKPOINT lastcp;
a0d0e21e 3094 n = regrepeat(scan, n);
a0ed51b3 3095 locinput = PL_reginput;
22c35a8c 3096 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3097 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3098 ln = n; /* why back off? */
1aeab75a
GS
3099 /* ...because $ and \Z can match before *and* after
3100 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3101 We should back off by one in this case. */
3102 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3103 ln--;
3104 }
c277df42
IZ
3105 REGCP_SET;
3106 if (paren) {
3107 while (n >= ln) {
3108 /* If it could work, try it. */
3109 if (c1 == -1000 ||
3280af22
NIS
3110 UCHARAT(PL_reginput) == c1 ||
3111 UCHARAT(PL_reginput) == c2)
c277df42 3112 {
29d1e993 3113 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3114 REGCP_UNWIND;
3115 }
3116 /* Couldn't or didn't -- back up. */
3117 n--;
dfe13c55 3118 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3119 }
a0ed51b3
LW
3120 }
3121 else {
c277df42
IZ
3122 while (n >= ln) {
3123 /* If it could work, try it. */
3124 if (c1 == -1000 ||
3280af22
NIS
3125 UCHARAT(PL_reginput) == c1 ||
3126 UCHARAT(PL_reginput) == c2)
c277df42 3127 {
29d1e993 3128 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3129 REGCP_UNWIND;
3130 }
3131 /* Couldn't or didn't -- back up. */
3132 n--;
dfe13c55 3133 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3134 }
a0d0e21e
LW
3135 }
3136 }
4633a7c4 3137 sayNO;
c277df42 3138 break;
a0d0e21e 3139 case END:
0f5d15d6
IZ
3140 if (PL_reg_call_cc) {
3141 re_cc_state *cur_call_cc = PL_reg_call_cc;
3142 CURCUR *cctmp = PL_regcc;
3143 regexp *re = PL_reg_re;
3144 CHECKPOINT cp, lastcp;
3145
3146 cp = regcppush(0); /* Save *all* the positions. */
3147 REGCP_SET;
3148 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3149 the caller. */
3150 PL_reginput = locinput; /* Make position available to
3151 the callcc. */
3152 cache_re(PL_reg_call_cc->re);
3153 PL_regcc = PL_reg_call_cc->cc;
3154 PL_reg_call_cc = PL_reg_call_cc->prev;
3155 if (regmatch(cur_call_cc->node)) {
3156 PL_reg_call_cc = cur_call_cc;
3157 regcpblow(cp);
3158 sayYES;
3159 }
3160 REGCP_UNWIND;
3161 regcppop();
3162 PL_reg_call_cc = cur_call_cc;
3163 PL_regcc = cctmp;
3164 PL_reg_re = re;
3165 cache_re(re);
3166
3167 DEBUG_r(
3168 PerlIO_printf(Perl_debug_log,
3169 "%*s continuation failed...\n",
3170 REPORT_CODE_OFF+PL_regindent*2, "")
3171 );
7821416a 3172 sayNO_SILENT;
0f5d15d6 3173 }
7821416a
IZ
3174 if (locinput < PL_regtill) {
3175 DEBUG_r(PerlIO_printf(Perl_debug_log,
3176 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3177 PL_colors[4],
3178 (long)(locinput - PL_reg_starttry),
3179 (long)(PL_regtill - PL_reg_starttry),
3180 PL_colors[5]));
3181 sayNO_FINAL; /* Cannot match: too short. */
3182 }
3183 PL_reginput = locinput; /* put where regtry can find it */
3184 sayYES_FINAL; /* Success! */
7e5428c5 3185 case SUCCEED:
3280af22 3186 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3187 sayYES_LOUD; /* Success! */
c277df42
IZ
3188 case SUSPEND:
3189 n = 1;
9fe1d20c 3190 PL_reginput = locinput;
c277df42 3191 goto do_ifmatch;
a0d0e21e 3192 case UNLESSM:
c277df42 3193 n = 0;
a0ed51b3 3194 if (scan->flags) {
0fe9bf95
IZ
3195 if (UTF) { /* XXXX This is absolutely
3196 broken, we read before
3197 start of string. */
3198 s = HOPMAYBEc(locinput, -scan->flags);
3199 if (!s)
3200 goto say_yes;
3201 PL_reginput = s;
3202 }
3203 else {
3204 if (locinput < PL_bostr + scan->flags)
3205 goto say_yes;
3206 PL_reginput = locinput - scan->flags;
3207 goto do_ifmatch;
3208 }
a0ed51b3
LW
3209 }
3210 else
3211 PL_reginput = locinput;
c277df42
IZ
3212 goto do_ifmatch;
3213 case IFMATCH:
3214 n = 1;
a0ed51b3 3215 if (scan->flags) {
0fe9bf95
IZ
3216 if (UTF) { /* XXXX This is absolutely
3217 broken, we read before
3218 start of string. */
3219 s = HOPMAYBEc(locinput, -scan->flags);
3220 if (!s || s < PL_bostr)
3221 goto say_no;
3222 PL_reginput = s;
3223 }
3224 else {
3225 if (locinput < PL_bostr + scan->flags)
3226 goto say_no;
3227 PL_reginput = locinput - scan->flags;
3228 goto do_ifmatch;
3229 }
a0ed51b3
LW
3230 }
3231 else
3232 PL_reginput = locinput;
3233
c277df42 3234 do_ifmatch:
c277df42
IZ
3235 inner = NEXTOPER(NEXTOPER(scan));
3236 if (regmatch(inner) != n) {
3237 say_no:
3238 if (logical) {
3239 logical = 0;
3240 sw = 0;
3241 goto do_longjump;
a0ed51b3
LW
3242 }
3243 else
c277df42
IZ
3244 sayNO;
3245 }
3246 say_yes:
3247 if (logical) {
3248 logical = 0;
3249 sw = 1;
3250 }
fe44a5e8 3251 if (OP(scan) == SUSPEND) {
3280af22 3252 locinput = PL_reginput;
565764a8 3253 nextchr = UCHARAT(locinput);
fe44a5e8 3254 }
c277df42
IZ
3255 /* FALL THROUGH. */
3256 case LONGJMP:
3257 do_longjump:
3258 next = scan + ARG(scan);
3259 if (next == scan)
3260 next = NULL;
a0d0e21e
LW
3261 break;
3262 default:
b900a521 3263 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3264 PTR2UV(scan), OP(scan));
cea2e8a9 3265 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3266 }
a0d0e21e
LW
3267 scan = next;
3268 }
a687059c 3269
a0d0e21e
LW
3270 /*
3271 * We get here only if there's trouble -- normally "case END" is
3272 * the terminating point.
3273 */
cea2e8a9 3274 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3275 /*NOTREACHED*/
4633a7c4
LW
3276 sayNO;
3277
7821416a
IZ
3278yes_loud:
3279 DEBUG_r(
3280 PerlIO_printf(Perl_debug_log,
3281 "%*s %scould match...%s\n",
3282 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3283 );
3284 goto yes;
3285yes_final:
3286 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3287 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3288yes:
3289#ifdef DEBUGGING
3280af22 3290 PL_regindent--;
4633a7c4
LW
3291#endif
3292 return 1;
3293
3294no:
7821416a
IZ
3295 DEBUG_r(
3296 PerlIO_printf(Perl_debug_log,
3297 "%*s %sfailed...%s\n",
3298 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3299 );
3300 goto do_no;
3301no_final:
3302do_no:
4633a7c4 3303#ifdef DEBUGGING
3280af22 3304 PL_regindent--;
4633a7c4 3305#endif
a0d0e21e 3306 return 0;
a687059c
LW
3307}
3308
3309/*
3310 - regrepeat - repeatedly match something simple, report how many
3311 */
3312/*
3313 * [This routine now assumes that it will only match on things of length 1.
3314 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3315 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3316 */
76e3520e 3317STATIC I32
cea2e8a9 3318S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3319{
5c0ca799 3320 dTHR;
a0d0e21e 3321 register char *scan;
a0d0e21e 3322 register I32 c;
3280af22 3323 register char *loceol = PL_regeol;
a0ed51b3 3324 register I32 hardcount = 0;
a0d0e21e 3325
3280af22 3326 scan = PL_reginput;
c277df42 3327 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3328 loceol = scan + max;
a0d0e21e 3329 switch (OP(p)) {
22c35a8c 3330 case REG_ANY:
a0d0e21e
LW
3331 while (scan < loceol && *scan != '\n')
3332 scan++;
3333 break;
3334 case SANY:
3335 scan = loceol;
3336 break;
a0ed51b3
LW
3337 case ANYUTF8:
3338 loceol = PL_regeol;
3339 while (scan < loceol && *scan != '\n') {
3340 scan += UTF8SKIP(scan);
3341 hardcount++;
3342 }
3343 break;
3344 case SANYUTF8:
3345 loceol = PL_regeol;
3346 while (scan < loceol) {
3347 scan += UTF8SKIP(scan);
3348 hardcount++;
3349 }
3350 break;
bbce6d69 3351 case EXACT: /* length of string is 1 */
cd439c50 3352 c = (U8)*STRING(p);
bbce6d69 3353 while (scan < loceol && UCHARAT(scan) == c)
3354 scan++;
3355 break;
3356 case EXACTF: /* length of string is 1 */
cd439c50 3357 c = (U8)*STRING(p);
bbce6d69 3358 while (scan < loceol &&
22c35a8c 3359 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3360 scan++;
3361 break;
3362 case EXACTFL: /* length of string is 1 */
3280af22 3363 PL_reg_flags |= RF_tainted;
cd439c50 3364 c = (U8)*STRING(p);
bbce6d69 3365 while (scan < loceol &&
22c35a8c 3366 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3367 scan++;
3368 break;
a0ed51b3
LW
3369 case ANYOFUTF8:
3370 loceol = PL_regeol;
3371 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3372 scan += UTF8SKIP(scan);
3373 hardcount++;
3374 }
3375 break;
a0d0e21e 3376 case ANYOF:
936ed897 3377 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3378 scan++;
a0d0e21e
LW
3379 break;
3380 case ALNUM:
3381 while (scan < loceol && isALNUM(*scan))
3382 scan++;
3383 break;
a0ed51b3
LW
3384 case ALNUMUTF8:
3385 loceol = PL_regeol;
dfe13c55 3386 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3387 scan += UTF8SKIP(scan);
3388 hardcount++;
3389 }
3390 break;
bbce6d69 3391 case ALNUML:
3280af22 3392 PL_reg_flags |= RF_tainted;
bbce6d69 3393 while (scan < loceol && isALNUM_LC(*scan))
3394 scan++;
3395 break;
a0ed51b3
LW
3396 case ALNUMLUTF8:
3397 PL_reg_flags |= RF_tainted;
3398 loceol = PL_regeol;
dfe13c55 3399 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3400 scan += UTF8SKIP(scan);
3401 hardcount++;
3402 }
3403 break;
3404 break;
a0d0e21e
LW
3405 case NALNUM:
3406 while (scan < loceol && !isALNUM(*scan))
3407 scan++;
3408 break;
a0ed51b3
LW
3409 case NALNUMUTF8:
3410 loceol = PL_regeol;
dfe13c55 3411 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3412 scan += UTF8SKIP(scan);
3413 hardcount++;
3414 }
3415 break;
bbce6d69 3416 case NALNUML:
3280af22 3417 PL_reg_flags |= RF_tainted;
bbce6d69 3418 while (scan < loceol && !isALNUM_LC(*scan))
3419 scan++;
3420 break;
a0ed51b3
LW
3421 case NALNUMLUTF8:
3422 PL_reg_flags |= RF_tainted;
3423 loceol = PL_regeol;
dfe13c55 3424 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3425 scan += UTF8SKIP(scan);
3426 hardcount++;
3427 }
3428 break;
a0d0e21e
LW
3429 case SPACE:
3430 while (scan < loceol && isSPACE(*scan))
3431 scan++;
3432 break;
a0ed51b3
LW
3433 case SPACEUTF8:
3434 loceol = PL_regeol;
dfe13c55 3435 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3436 scan += UTF8SKIP(scan);
3437 hardcount++;
3438 }
3439 break;
bbce6d69 3440 case SPACEL:
3280af22 3441 PL_reg_flags |= RF_tainted;
bbce6d69 3442 while (scan < loceol && isSPACE_LC(*scan))
3443 scan++;
3444 break;
a0ed51b3
LW
3445 case SPACELUTF8:
3446 PL_reg_flags |= RF_tainted;
3447 loceol = PL_regeol;
dfe13c55 3448 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3449 scan += UTF8SKIP(scan);
3450 hardcount++;
3451 }
3452 break;
a0d0e21e
LW
3453 case NSPACE:
3454 while (scan < loceol && !isSPACE(*scan))
3455 scan++;
3456 break;
a0ed51b3
LW
3457 case NSPACEUTF8:
3458 loceol = PL_regeol;
dfe13c55 3459 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3460 scan += UTF8SKIP(scan);
3461 hardcount++;
3462 }
3463 break;
bbce6d69 3464 case NSPACEL:
3280af22 3465 PL_reg_flags |= RF_tainted;
bbce6d69 3466 while (scan < loceol && !isSPACE_LC(*scan))
3467 scan++;
3468 break;
a0ed51b3
LW
3469 case NSPACELUTF8:
3470 PL_reg_flags |= RF_tainted;
3471 loceol = PL_regeol;
dfe13c55 3472 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3473 scan += UTF8SKIP(scan);
3474 hardcount++;
3475 }
3476 break;
a0d0e21e
LW
3477 case DIGIT:
3478 while (scan < loceol && isDIGIT(*scan))
3479 scan++;
3480 break;
a0ed51b3
LW
3481 case DIGITUTF8:
3482 loceol = PL_regeol;
dfe13c55 3483 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3484 scan += UTF8SKIP(scan);
3485 hardcount++;
3486 }
3487 break;
3488 break;
a0d0e21e
LW
3489 case NDIGIT:
3490 while (scan < loceol && !isDIGIT(*scan))
3491 scan++;
3492 break;
a0ed51b3
LW
3493 case NDIGITUTF8:
3494 loceol = PL_regeol;
dfe13c55 3495 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3496 scan += UTF8SKIP(scan);
3497 hardcount++;
3498 }
3499 break;
a0d0e21e
LW
3500 default: /* Called on something of 0 width. */
3501 break; /* So match right here or not at all. */
3502 }
a687059c 3503
a0ed51b3
LW
3504 if (hardcount)
3505 c = hardcount;
3506 else
3507 c = scan - PL_reginput;
3280af22 3508 PL_reginput = scan;
a687059c 3509
c277df42
IZ
3510 DEBUG_r(
3511 {
3512 SV *prop = sv_newmortal();
3513
3514 regprop(prop, p);
3515 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3516 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3517 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3518 });
3519
a0d0e21e 3520 return(c);
a687059c
LW
3521}
3522
3523/*
c277df42
IZ
3524 - regrepeat_hard - repeatedly match something, report total lenth and length
3525 *
3526 * The repeater is supposed to have constant length.
3527 */
3528
76e3520e 3529STATIC I32
cea2e8a9 3530S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3531{
5c0ca799 3532 dTHR;
c277df42
IZ
3533 register char *scan;
3534 register char *start;
3280af22 3535 register char *loceol = PL_regeol;
a0ed51b3 3536 I32 l = 0;
708e3b05 3537 I32 count = 0, res = 1;
a0ed51b3
LW
3538
3539 if (!max)
3540 return 0;
c277df42 3541
3280af22 3542 start = PL_reginput;
a0ed51b3 3543 if (UTF) {
708e3b05 3544 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3545 if (!count++) {
3546 l = 0;
3547 while (start < PL_reginput) {
3548 l++;
3549 start += UTF8SKIP(start);
3550 }
3551 *lp = l;
3552 if (l == 0)
3553 return max;
3554 }
3555 if (count == max)
3556 return count;
3557 }
3558 }
3559 else {
708e3b05 3560 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3561 if (!count++) {
3562 *lp = l = PL_reginput - start;
3563 if (max != REG_INFTY && l*max < loceol - scan)
3564 loceol = scan + l*max;
3565 if (l == 0)
3566 return max;
c277df42
IZ
3567 }
3568 }
3569 }
708e3b05 3570 if (!res)
3280af22 3571 PL_reginput = scan;
c277df42 3572
a0ed51b3 3573 return count;
c277df42
IZ
3574}
3575
3576/*
cb8d8820 3577 - reginclass - determine if a character falls into a character class
bbce6d69 3578 */
3579
76e3520e 3580STATIC bool
936ed897 3581S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3582{
5c0ca799 3583 dTHR;
b8c5462f 3584 char flags = ANYOF_FLAGS(p);
bbce6d69 3585 bool match = FALSE;
3586
3587 c &= 0xFF;
b8c5462f 3588 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3589 match = TRUE;
3590 else if (flags & ANYOF_FOLD) {
3591 I32 cf;
3592 if (flags & ANYOF_LOCALE) {
3280af22 3593 PL_reg_flags |= RF_tainted;
22c35a8c 3594 cf = PL_fold_locale[c];
bbce6d69 3595 }
3596 else
22c35a8c 3597 cf = PL_fold[c];
b8c5462f 3598 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3599 match = TRUE;
3600 }
3601
b8c5462f 3602 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3603 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3604 if (
3605 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3606 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3607 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3608 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3620 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3621 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3622 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3623 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3624 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3625 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3626 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3627 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3628 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3629 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3630 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3631 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3632 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3633 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3634 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3635 ) /* How's that for a conditional? */
bbce6d69 3636 {
3637 match = TRUE;
3638 }
3639 }
3640
ae5c130c 3641 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3642}
3643
a0ed51b3 3644STATIC bool
cea2e8a9 3645S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3646{
3647 dTHR;
a0ed51b3
LW
3648 char flags = ARG1(f);
3649 bool match = FALSE;
3650 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3651
3652 if (swash_fetch(sv, p))
3653 match = TRUE;
3654 else if (flags & ANYOF_FOLD) {
806e7201 3655 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3656 if (flags & ANYOF_LOCALE) {
3657 PL_reg_flags |= RF_tainted;
3658 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3659 }
3660 else
3661 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3662 if (swash_fetch(sv, tmpbuf))
3663 match = TRUE;
3664 }
3665
b8c5462f 3666 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3667
3668 return (flags & ANYOF_INVERT) ? !match : match;
3669}
161b471a 3670
dfe13c55 3671STATIC U8 *
cea2e8a9 3672S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3673{
3674 dTHR;
a0ed51b3
LW
3675 if (off >= 0) {
3676 while (off-- && s < (U8*)PL_regeol)
3677 s += UTF8SKIP(s);
3678 }
3679 else {
3680 while (off++) {
3681 if (s > (U8*)PL_bostr) {
3682 s--;
3683 if (*s & 0x80) {
3684 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3685 s--;
3686 } /* XXX could check well-formedness here */
3687 }
3688 }
3689 }
3690 return s;
3691}
161b471a 3692
dfe13c55 3693STATIC U8 *
cea2e8a9 3694S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3695{
c485e607 3696 dTHR;
a0ed51b3
LW
3697 if (off >= 0) {
3698 while (off-- && s < (U8*)PL_regeol)
3699 s += UTF8SKIP(s);
3700 if (off >= 0)
3701 return 0;
3702 }
3703 else {
3704 while (off++) {
3705 if (s > (U8*)PL_bostr) {
3706 s--;
3707 if (*s & 0x80) {
3708 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3709 s--;
3710 } /* XXX could check well-formedness here */
3711 }
3712 else
3713 break;
3714 }
3715 if (off <= 0)
3716 return 0;
3717 }
3718 return s;
3719}
51371543
GS
3720
3721#ifdef PERL_OBJECT
51371543
GS
3722#include "XSUB.h"
3723#endif
3724
3725static void
3726restore_pos(pTHXo_ void *arg)
3727{
3728 dTHR;
3729 if (PL_reg_eval_set) {
3730 if (PL_reg_oldsaved) {
3731 PL_reg_re->subbeg = PL_reg_oldsaved;
3732 PL_reg_re->sublen = PL_reg_oldsavedlen;
3733 RX_MATCH_COPIED_on(PL_reg_re);
3734 }
3735 PL_reg_magic->mg_len = PL_reg_oldpos;
3736 PL_reg_eval_set = 0;
3737 PL_curpm = PL_reg_oldcurpm;
3738 }
3739}