This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Comment on comment.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
02db2b7b 148# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 150 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 151
02db2b7b 152# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b 154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 155 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
02db2b7b 222#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
a1933d95 328 char *strbeg;
cad2e5aa
JH
329 char *t;
330 I32 ml_anch;
2c2d71f5 331 char *tmp;
6eb5f6b9
JH
332 register char *other_last = Nullch; /* other substr checked before this */
333 char *check_at; /* check substr found at this pos */
30944b6d
IZ
334#ifdef DEBUGGING
335 char *i_strpos = strpos;
336#endif
cad2e5aa
JH
337
338 DEBUG_r( if (!PL_colorset) reginitcolors() );
339 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 340 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
341 PL_colors[4],PL_colors[5],PL_colors[0],
342 prog->precomp,
343 PL_colors[1],
344 (strlen(prog->precomp) > 60 ? "..." : ""),
345 PL_colors[0],
b900a521 346 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
347 strpos, PL_colors[1],
348 (strend - strpos > 60 ? "..." : ""))
349 );
350
2c2d71f5
JH
351 if (prog->minlen > strend - strpos) {
352 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
a1933d95 355 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 356 check = prog->check_substr;
2c2d71f5 357 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
358 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
359 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 360 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 361
5675c1a6
IZ
362 if (!ml_anch) {
363 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
364 /* SvCUR is not set on references: SvRV and SvPVX overlap */
365 && sv && !SvROK(sv)
a1933d95 366 && (strpos != strbeg)) {
5675c1a6
IZ
367 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
368 goto fail;
369 }
370 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 371 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
372 I32 slen;
373
adac82c7 374 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 375 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
376 if (SvTAIL(check)) {
377 slen = SvCUR(check); /* >= 1 */
cad2e5aa 378
2c2d71f5
JH
379 if ( strend - s > slen || strend - s < slen - 1
380 || (strend - s == slen && strend[-1] != '\n')) {
381 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
382 goto fail_finish;
cad2e5aa
JH
383 }
384 /* Now should match s[0..slen-2] */
385 slen--;
653099ff 386 if (slen && (*SvPVX(check) != *s
cad2e5aa 387 || (slen > 1
653099ff 388 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
389 report_neq:
390 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
391 goto fail_finish;
392 }
cad2e5aa 393 }
653099ff
GS
394 else if (*SvPVX(check) != *s
395 || ((slen = SvCUR(check)) > 1
396 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
397 goto report_neq;
398 goto success_at_start;
5675c1a6 399 }
cad2e5aa 400 }
2c2d71f5 401 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 402 s = strpos;
2c2d71f5 403 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 404 end_shift = prog->minlen - start_shift -
653099ff 405 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 406 if (!ml_anch) {
653099ff
GS
407 I32 end = prog->check_offset_max + CHR_SVLEN(check)
408 - (SvTAIL(check) != 0);
2c2d71f5
JH
409 I32 eshift = strend - s - end;
410
411 if (end_shift < eshift)
412 end_shift = eshift;
413 }
cad2e5aa 414 }
2c2d71f5 415 else { /* Can match at random position */
cad2e5aa
JH
416 ml_anch = 0;
417 s = strpos;
2c2d71f5
JH
418 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
419 /* Should be nonnegative! */
420 end_shift = prog->minlen - start_shift -
653099ff 421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
422 }
423
2c2d71f5 424#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 425 if (end_shift < 0)
6bbae5e6 426 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
427#endif
428
2c2d71f5
JH
429 restart:
430 /* Find a possible match in the region s..strend by looking for
431 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 432 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
433 I32 p = -1; /* Internal iterator of scream. */
434 I32 *pp = data ? data->scream_pos : &p;
435
2c2d71f5
JH
436 if (PL_screamfirst[BmRARE(check)] >= 0
437 || ( BmRARE(check) == '\n'
438 && (BmPREVIOUS(check) == SvCUR(check) - 1)
439 && SvTAIL(check) ))
440 s = screaminstr(sv, check,
441 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 442 else
2c2d71f5 443 goto fail_finish;
cad2e5aa
JH
444 if (data)
445 *data->scream_olds = s;
446 }
447 else
448 s = fbm_instr((unsigned char*)s + start_shift,
449 (unsigned char*)strend - end_shift,
2c2d71f5 450 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
451
452 /* Update the count-of-usability, remove useless subpatterns,
453 unshift s. */
2c2d71f5
JH
454
455 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
456 (s ? "Found" : "Did not find"),
457 ((check == prog->anchored_substr) ? "anchored" : "floating"),
458 PL_colors[0],
7b0972df
JH
459 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
460 SvPVX(check),
2c2d71f5
JH
461 PL_colors[1], (SvTAIL(check) ? "$" : ""),
462 (s ? " at offset " : "...\n") ) );
463
464 if (!s)
465 goto fail_finish;
466
6eb5f6b9
JH
467 check_at = s;
468
2c2d71f5 469 /* Finish the diagnostic message */
30944b6d 470 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
471
472 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
473 Start with the other substr.
474 XXXX no SCREAM optimization yet - and a very coarse implementation
475 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
476 *always* match. Probably should be marked during compile...
477 Probably it is right to do no SCREAM here...
478 */
479
480 if (prog->float_substr && prog->anchored_substr) {
30944b6d 481 /* Take into account the "other" substring. */
2c2d71f5
JH
482 /* XXXX May be hopelessly wrong for UTF... */
483 if (!other_last)
6eb5f6b9 484 other_last = strpos;
2c2d71f5 485 if (check == prog->float_substr) {
30944b6d
IZ
486 do_other_anchored:
487 {
2c2d71f5
JH
488 char *last = s - start_shift, *last1, *last2;
489 char *s1 = s;
490
491 tmp = PL_bostr;
492 t = s - prog->check_offset_max;
493 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
494 && (!(prog->reganch & ROPT_UTF8)
495 || (PL_bostr = strpos, /* Used in regcopmaybe() */
496 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
497 && t > strpos)))
30944b6d 498 /* EMPTY */;
2c2d71f5
JH
499 else
500 t = strpos;
501 t += prog->anchored_offset;
6eb5f6b9
JH
502 if (t < other_last) /* These positions already checked */
503 t = other_last;
2c2d71f5
JH
504 PL_bostr = tmp;
505 last2 = last1 = strend - prog->minlen;
506 if (last < last1)
507 last1 = last;
508 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
509 /* On end-of-str: see comment below. */
510 s = fbm_instr((unsigned char*)t,
511 (unsigned char*)last1 + prog->anchored_offset
512 + SvCUR(prog->anchored_substr)
513 - (SvTAIL(prog->anchored_substr)!=0),
514 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
515 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
516 (s ? "Found" : "Contradicts"),
517 PL_colors[0],
7b0972df
JH
518 (int)(SvCUR(prog->anchored_substr)
519 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
520 SvPVX(prog->anchored_substr),
521 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
522 if (!s) {
523 if (last1 >= last2) {
524 DEBUG_r(PerlIO_printf(Perl_debug_log,
525 ", giving up...\n"));
526 goto fail_finish;
527 }
528 DEBUG_r(PerlIO_printf(Perl_debug_log,
529 ", trying floating at offset %ld...\n",
30944b6d 530 (long)(s1 + 1 - i_strpos)));
2c2d71f5 531 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 532 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
533 s = HOPc(last, 1);
534 goto restart;
535 }
536 else {
537 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 538 (long)(s - i_strpos)));
2c2d71f5 539 t = s - prog->anchored_offset;
6eb5f6b9 540 other_last = s + 1;
30944b6d 541 s = s1;
2c2d71f5
JH
542 if (t == strpos)
543 goto try_at_start;
2c2d71f5
JH
544 goto try_at_offset;
545 }
30944b6d 546 }
2c2d71f5
JH
547 }
548 else { /* Take into account the floating substring. */
549 char *last, *last1;
550 char *s1 = s;
551
552 t = s - start_shift;
553 last1 = last = strend - prog->minlen + prog->float_min_offset;
554 if (last - t > prog->float_max_offset)
555 last = t + prog->float_max_offset;
556 s = t + prog->float_min_offset;
6eb5f6b9
JH
557 if (s < other_last)
558 s = other_last;
2c2d71f5
JH
559 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
560 /* fbm_instr() takes into account exact value of end-of-str
561 if the check is SvTAIL(ed). Since false positives are OK,
562 and end-of-str is not later than strend we are OK. */
563 s = fbm_instr((unsigned char*)s,
564 (unsigned char*)last + SvCUR(prog->float_substr)
565 - (SvTAIL(prog->float_substr)!=0),
566 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
567 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
568 (s ? "Found" : "Contradicts"),
569 PL_colors[0],
7b0972df
JH
570 (int)(SvCUR(prog->float_substr)
571 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
572 SvPVX(prog->float_substr),
573 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
574 if (!s) {
575 if (last1 == last) {
576 DEBUG_r(PerlIO_printf(Perl_debug_log,
577 ", giving up...\n"));
578 goto fail_finish;
579 }
580 DEBUG_r(PerlIO_printf(Perl_debug_log,
581 ", trying anchored starting at offset %ld...\n",
30944b6d 582 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 583 other_last = last + 1;
2c2d71f5
JH
584 PL_regeol = strend; /* Used in HOP() */
585 s = HOPc(t, 1);
586 goto restart;
587 }
588 else {
589 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 590 (long)(s - i_strpos)));
6eb5f6b9 591 other_last = s + 1;
30944b6d 592 s = s1;
2c2d71f5
JH
593 if (t == strpos)
594 goto try_at_start;
2c2d71f5
JH
595 goto try_at_offset;
596 }
597 }
cad2e5aa 598 }
2c2d71f5
JH
599
600 t = s - prog->check_offset_max;
601 tmp = PL_bostr;
602 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
603 && (!(prog->reganch & ROPT_UTF8)
604 || (PL_bostr = strpos, /* Used in regcopmaybe() */
605 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
606 && t > strpos)))) {
607 PL_bostr = tmp;
608 /* Fixed substring is found far enough so that the match
609 cannot start at strpos. */
610 try_at_offset:
cad2e5aa 611 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
612 /* Eventually fbm_*() should handle this, but often
613 anchored_offset is not 0, so this check will not be wasted. */
614 /* XXXX In the code below we prefer to look for "^" even in
615 presence of anchored substrings. And we search even
616 beyond the found float position. These pessimizations
617 are historical artefacts only. */
618 find_anchor:
2c2d71f5 619 while (t < strend - prog->minlen) {
cad2e5aa 620 if (*t == '\n') {
4ee3650e 621 if (t < check_at - prog->check_offset_min) {
30944b6d 622 if (prog->anchored_substr) {
4ee3650e
GS
623 /* Since we moved from the found position,
624 we definitely contradict the found anchored
30944b6d
IZ
625 substr. Due to the above check we do not
626 contradict "check" substr.
627 Thus we can arrive here only if check substr
628 is float. Redo checking for "other"=="fixed".
629 */
630 strpos = t + 1;
631 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
632 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
633 goto do_other_anchored;
634 }
4ee3650e
GS
635 /* We don't contradict the found floating substring. */
636 /* XXXX Why not check for STCLASS? */
cad2e5aa 637 s = t + 1;
2c2d71f5 638 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 639 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
640 goto set_useful;
641 }
4ee3650e
GS
642 /* Position contradicts check-string */
643 /* XXXX probably better to look for check-string
644 than for "\n", so one should lower the limit for t? */
645 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 646 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 647 other_last = strpos = s = t + 1;
cad2e5aa
JH
648 goto restart;
649 }
650 t++;
651 }
2c2d71f5
JH
652 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
653 PL_colors[0],PL_colors[1]));
654 goto fail_finish;
cad2e5aa 655 }
f5952150
GS
656 else {
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
658 PL_colors[0],PL_colors[1]));
659 }
cad2e5aa
JH
660 s = t;
661 set_useful:
2c2d71f5 662 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
663 }
664 else {
2c2d71f5 665 PL_bostr = tmp;
f5952150 666 /* The found string does not prohibit matching at strpos,
2c2d71f5 667 - no optimization of calling REx engine can be performed,
f5952150
GS
668 unless it was an MBOL and we are not after MBOL,
669 or a future STCLASS check will fail this. */
2c2d71f5
JH
670 try_at_start:
671 /* Even in this situation we may use MBOL flag if strpos is offset
672 wrt the start of the string. */
05b4157f 673 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 674 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
675 /* May be due to an implicit anchor of m{.*foo} */
676 && !(prog->reganch & ROPT_IMPLICIT))
677 {
cad2e5aa
JH
678 t = strpos;
679 goto find_anchor;
680 }
30944b6d 681 DEBUG_r( if (ml_anch)
f5952150
GS
682 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
683 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 684 );
2c2d71f5 685 success_at_start:
30944b6d 686 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 687 && prog->check_substr /* Could be deleted already */
cad2e5aa 688 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
689 && prog->check_substr == prog->float_substr)
690 {
cad2e5aa 691 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 692 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
693 SvREFCNT_dec(prog->check_substr);
694 prog->check_substr = Nullsv; /* disable */
695 prog->float_substr = Nullsv; /* clear */
5e39e1e5 696 check = Nullsv; /* abort */
cad2e5aa 697 s = strpos;
3cf5c195
IZ
698 /* XXXX This is a remnant of the old implementation. It
699 looks wasteful, since now INTUIT can use many
6eb5f6b9 700 other heuristics. */
cad2e5aa
JH
701 prog->reganch &= ~RE_USE_INTUIT;
702 }
703 else
704 s = strpos;
705 }
706
6eb5f6b9
JH
707 /* Last resort... */
708 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
709 if (prog->regstclass) {
710 /* minlen == 0 is possible if regstclass is \b or \B,
711 and the fixed substr is ''$.
712 Since minlen is already taken into account, s+1 is before strend;
713 accidentally, minlen >= 1 guaranties no false positives at s + 1
714 even for \b or \B. But (minlen? 1 : 0) below assumes that
715 regstclass does not come from lookahead... */
716 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
717 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
718 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
719 ? STR_LEN(prog->regstclass)
720 : 1);
6eb5f6b9 721 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
722 ? s + (prog->minlen? cl_l : 0)
723 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 724 : strend) ;
a1933d95 725 char *startpos = strbeg;
6eb5f6b9
JH
726
727 t = s;
76384e4a
GS
728 if (prog->reganch & ROPT_UTF8) {
729 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
730 PL_bostr = startpos;
731 }
6eb5f6b9
JH
732 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
733 if (!s) {
734#ifdef DEBUGGING
735 char *what;
736#endif
737 if (endpos == strend) {
738 DEBUG_r( PerlIO_printf(Perl_debug_log,
739 "Could not match STCLASS...\n") );
740 goto fail;
741 }
66e933ab
GS
742 DEBUG_r( PerlIO_printf(Perl_debug_log,
743 "This position contradicts STCLASS...\n") );
653099ff
GS
744 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
745 goto fail;
6eb5f6b9
JH
746 /* Contradict one of substrings */
747 if (prog->anchored_substr) {
6eb5f6b9
JH
748 if (prog->anchored_substr == check) {
749 DEBUG_r( what = "anchored" );
750 hop_and_restart:
751 PL_regeol = strend; /* Used in HOP() */
752 s = HOPc(t, 1);
66e933ab
GS
753 if (s + start_shift + end_shift > strend) {
754 /* XXXX Should be taken into account earlier? */
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "Could not match STCLASS...\n") );
757 goto fail;
758 }
5e39e1e5
HS
759 if (!check)
760 goto giveup;
6eb5f6b9 761 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 762 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
763 what, (long)(s + start_shift - i_strpos)) );
764 goto restart;
765 }
66e933ab 766 /* Have both, check_string is floating */
6eb5f6b9
JH
767 if (t + start_shift >= check_at) /* Contradicts floating=check */
768 goto retry_floating_check;
769 /* Recheck anchored substring, but not floating... */
770 s = check_at;
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
775 (long)(other_last - i_strpos)) );
776 goto do_other_anchored;
777 }
60e71179
GS
778 /* Another way we could have checked stclass at the
779 current position only: */
780 if (ml_anch) {
781 s = t = t + 1;
5e39e1e5
HS
782 if (!check)
783 goto giveup;
60e71179 784 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
785 "Looking for /%s^%s/m starting at offset %ld...\n",
786 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 787 goto try_at_offset;
66e933ab 788 }
60e71179
GS
789 if (!prog->float_substr) /* Could have been deleted */
790 goto fail;
6eb5f6b9
JH
791 /* Check is floating subtring. */
792 retry_floating_check:
793 t = check_at - start_shift;
794 DEBUG_r( what = "floating" );
795 goto hop_and_restart;
796 }
797 DEBUG_r( if (t != s)
798 PerlIO_printf(Perl_debug_log,
799 "By STCLASS: moving %ld --> %ld\n",
800 (long)(t - i_strpos), (long)(s - i_strpos));
801 else
802 PerlIO_printf(Perl_debug_log,
803 "Does not contradict STCLASS...\n") );
804 }
5e39e1e5
HS
805 giveup:
806 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
807 PL_colors[4], (check ? "Guessed" : "Giving up"),
808 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 809 return s;
2c2d71f5
JH
810
811 fail_finish: /* Substring not found */
66e933ab
GS
812 if (prog->check_substr) /* could be removed already */
813 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 814 fail:
2c2d71f5 815 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
816 PL_colors[4],PL_colors[5]));
817 return Nullch;
818}
9661b544 819
6eb5f6b9 820/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
821STATIC char *
822S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 823{
6eb5f6b9
JH
824 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
825 char *m;
d8093b23
G
826 STRLEN ln;
827 unsigned int c1;
828 unsigned int c2;
6eb5f6b9
JH
829 char *e;
830 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 831
6eb5f6b9
JH
832 /* We know what class it must start with. */
833 switch (OP(c)) {
834 case ANYOFUTF8:
835 while (s < strend) {
836 if (REGINCLASSUTF8(c, (U8*)s)) {
837 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 838 goto got_it;
3cf5c195 839 else
6eb5f6b9 840 tmp = doevery;
a0ed51b3 841 }
6eb5f6b9
JH
842 else
843 tmp = 1;
a0ed51b3
LW
844 s += UTF8SKIP(s);
845 }
6eb5f6b9
JH
846 break;
847 case ANYOF:
a0ed51b3 848 while (s < strend) {
d8093b23 849 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
850 if (tmp && (norun || regtry(prog, s)))
851 goto got_it;
852 else
853 tmp = doevery;
a0ed51b3 854 }
6eb5f6b9
JH
855 else
856 tmp = 1;
a0ed51b3 857 s++;
a0d0e21e 858 }
6eb5f6b9
JH
859 break;
860 case EXACTF:
861 m = STRING(c);
862 ln = STR_LEN(c);
d8093b23 863 c1 = *(U8*)m;
6eb5f6b9
JH
864 c2 = PL_fold[c1];
865 goto do_exactf;
866 case EXACTFL:
867 m = STRING(c);
868 ln = STR_LEN(c);
d8093b23 869 c1 = *(U8*)m;
6eb5f6b9
JH
870 c2 = PL_fold_locale[c1];
871 do_exactf:
872 e = strend - ln;
b3c9acc1 873
6eb5f6b9
JH
874 if (norun && e < s)
875 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
876 /* Here it is NOT UTF! */
877 if (c1 == c2) {
878 while (s <= e) {
d8093b23 879 if ( *(U8*)s == c1
66e933ab
GS
880 && (ln == 1 || !(OP(c) == EXACTF
881 ? ibcmp(s, m, ln)
882 : ibcmp_locale(s, m, ln)))
6eb5f6b9 883 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
884 goto got_it;
885 s++;
886 }
887 } else {
888 while (s <= e) {
d8093b23 889 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
890 && (ln == 1 || !(OP(c) == EXACTF
891 ? ibcmp(s, m, ln)
892 : ibcmp_locale(s, m, ln)))
6eb5f6b9 893 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
894 goto got_it;
895 s++;
896 }
897 }
898 break;
bbce6d69 899 case BOUNDL:
3280af22 900 PL_reg_flags |= RF_tainted;
bbce6d69 901 /* FALL THROUGH */
a0d0e21e 902 case BOUND:
6eb5f6b9 903 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 904 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 905 while (s < strend) {
95bac841 906 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 907 tmp = !tmp;
6eb5f6b9 908 if ((norun || regtry(prog, s)))
a0d0e21e 909 goto got_it;
a687059c 910 }
a0d0e21e
LW
911 s++;
912 }
6eb5f6b9 913 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
914 goto got_it;
915 break;
a0ed51b3
LW
916 case BOUNDLUTF8:
917 PL_reg_flags |= RF_tainted;
918 /* FALL THROUGH */
919 case BOUNDUTF8:
667bb95a
JH
920 if (s == startpos)
921 tmp = '\n';
922 else {
923 U8 *r = reghop((U8*)s, -1);
924
925 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
926 }
927 tmp = ((OP(c) == BOUNDUTF8 ?
928 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 929 while (s < strend) {
76384e4a 930 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
931 swash_fetch(PL_utf8_alnum, (U8*)s) :
932 isALNUM_LC_utf8((U8*)s)))
933 {
a0ed51b3 934 tmp = !tmp;
6eb5f6b9 935 if ((norun || regtry(prog, s)))
a0ed51b3
LW
936 goto got_it;
937 }
938 s += UTF8SKIP(s);
939 }
6eb5f6b9 940 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
941 goto got_it;
942 break;
bbce6d69 943 case NBOUNDL:
3280af22 944 PL_reg_flags |= RF_tainted;
bbce6d69 945 /* FALL THROUGH */
a0d0e21e 946 case NBOUND:
6eb5f6b9 947 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 948 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 949 while (s < strend) {
95bac841 950 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 951 tmp = !tmp;
6eb5f6b9 952 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
953 goto got_it;
954 s++;
955 }
6eb5f6b9 956 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
957 goto got_it;
958 break;
a0ed51b3
LW
959 case NBOUNDLUTF8:
960 PL_reg_flags |= RF_tainted;
961 /* FALL THROUGH */
962 case NBOUNDUTF8:
667bb95a
JH
963 if (s == startpos)
964 tmp = '\n';
965 else {
966 U8 *r = reghop((U8*)s, -1);
967
968 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
969 }
970 tmp = ((OP(c) == NBOUNDUTF8 ?
971 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 972 while (s < strend) {
76384e4a 973 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
974 swash_fetch(PL_utf8_alnum, (U8*)s) :
975 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 976 tmp = !tmp;
6eb5f6b9 977 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
978 goto got_it;
979 s += UTF8SKIP(s);
980 }
6eb5f6b9 981 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
982 goto got_it;
983 break;
a0d0e21e
LW
984 case ALNUM:
985 while (s < strend) {
bbce6d69 986 if (isALNUM(*s)) {
6eb5f6b9 987 if (tmp && (norun || regtry(prog, s)))
bbce6d69 988 goto got_it;
989 else
990 tmp = doevery;
991 }
992 else
993 tmp = 1;
994 s++;
995 }
996 break;
a0ed51b3
LW
997 case ALNUMUTF8:
998 while (s < strend) {
dfe13c55 999 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1000 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1001 goto got_it;
1002 else
1003 tmp = doevery;
1004 }
1005 else
1006 tmp = 1;
1007 s += UTF8SKIP(s);
1008 }
1009 break;
bbce6d69 1010 case ALNUML:
3280af22 1011 PL_reg_flags |= RF_tainted;
bbce6d69 1012 while (s < strend) {
1013 if (isALNUM_LC(*s)) {
6eb5f6b9 1014 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1015 goto got_it;
a687059c 1016 else
a0d0e21e
LW
1017 tmp = doevery;
1018 }
1019 else
1020 tmp = 1;
1021 s++;
1022 }
1023 break;
a0ed51b3
LW
1024 case ALNUMLUTF8:
1025 PL_reg_flags |= RF_tainted;
1026 while (s < strend) {
dfe13c55 1027 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
a0d0e21e
LW
1038 case NALNUM:
1039 while (s < strend) {
bbce6d69 1040 if (!isALNUM(*s)) {
6eb5f6b9 1041 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1042 goto got_it;
1043 else
1044 tmp = doevery;
1045 }
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3
LW
1051 case NALNUMUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1054 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case NALNUML:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (!isALNUM_LC(*s)) {
6eb5f6b9 1068 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1069 goto got_it;
a687059c 1070 else
a0d0e21e 1071 tmp = doevery;
a687059c 1072 }
a0d0e21e
LW
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case NALNUMLUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e
LW
1092 case SPACE:
1093 while (s < strend) {
1094 if (isSPACE(*s)) {
6eb5f6b9 1095 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1096 goto got_it;
1097 else
1098 tmp = doevery;
2304df62 1099 }
a0d0e21e
LW
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3
LW
1105 case SPACEUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1108 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case SPACEL:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (isSPACE_LC(*s)) {
6eb5f6b9 1122 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3
LW
1132 case SPACELUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e
LW
1146 case NSPACE:
1147 while (s < strend) {
1148 if (!isSPACE(*s)) {
6eb5f6b9 1149 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1150 goto got_it;
1151 else
1152 tmp = doevery;
a687059c 1153 }
a0d0e21e
LW
1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3
LW
1159 case NSPACEUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1162 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
bbce6d69 1172 case NSPACEL:
3280af22 1173 PL_reg_flags |= RF_tainted;
bbce6d69 1174 while (s < strend) {
1175 if (!isSPACE_LC(*s)) {
6eb5f6b9 1176 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
a0ed51b3
LW
1186 case NSPACELUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
dfe13c55 1189 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e
LW
1200 case DIGIT:
1201 while (s < strend) {
1202 if (isDIGIT(*s)) {
6eb5f6b9 1203 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1204 goto got_it;
1205 else
1206 tmp = doevery;
2b69d0c2 1207 }
a0d0e21e
LW
1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3
LW
1213 case DIGITUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1216 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
b8c5462f
JH
1226 case DIGITL:
1227 PL_reg_flags |= RF_tainted;
1228 while (s < strend) {
1229 if (isDIGIT_LC(*s)) {
6eb5f6b9 1230 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
1238 }
1239 break;
1240 case DIGITLUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
a0d0e21e
LW
1254 case NDIGIT:
1255 while (s < strend) {
1256 if (!isDIGIT(*s)) {
6eb5f6b9 1257 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1258 goto got_it;
1259 else
1260 tmp = doevery;
a687059c 1261 }
a0d0e21e
LW
1262 else
1263 tmp = 1;
1264 s++;
1265 }
1266 break;
a0ed51b3
LW
1267 case NDIGITUTF8:
1268 while (s < strend) {
dfe13c55 1269 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1270 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
1275 else
1276 tmp = 1;
1277 s += UTF8SKIP(s);
1278 }
1279 break;
b8c5462f
JH
1280 case NDIGITL:
1281 PL_reg_flags |= RF_tainted;
1282 while (s < strend) {
1283 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1284 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s++;
a0ed51b3 1292 }
b8c5462f
JH
1293 break;
1294 case NDIGITLUTF8:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1298 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1299 goto got_it;
cf93c79d 1300 else
b8c5462f
JH
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s += UTF8SKIP(s);
1306 }
1307 break;
b3c9acc1 1308 default:
3c3eec57
GS
1309 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1310 break;
d6a28714 1311 }
6eb5f6b9
JH
1312 return 0;
1313 got_it:
1314 return s;
1315}
1316
1317/*
1318 - regexec_flags - match a regexp against a string
1319 */
1320I32
1321Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1322 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1323/* strend: pointer to null at end of string */
1324/* strbeg: real beginning of string */
1325/* minend: end of match must be >=minend after stringarg. */
1326/* data: May be used for some additional optimizations. */
1327/* nosave: For optimizations. */
1328{
1329 dTHR;
1330 register char *s;
1331 register regnode *c;
1332 register char *startpos = stringarg;
6eb5f6b9
JH
1333 I32 minlen; /* must match at least this many chars */
1334 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1335 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1336 constant substr. */ /* CC */
1337 I32 end_shift = 0; /* Same for the end. */ /* CC */
1338 I32 scream_pos = -1; /* Internal iterator of scream. */
1339 char *scream_olds;
1340 SV* oreplsv = GvSV(PL_replgv);
1341
1342 PL_regcc = 0;
1343
1344 cache_re(prog);
1345#ifdef DEBUGGING
1346 PL_regnarrate = PL_debug & 512;
1347#endif
1348
1349 /* Be paranoid... */
1350 if (prog == NULL || startpos == NULL) {
1351 Perl_croak(aTHX_ "NULL regexp parameter");
1352 return 0;
1353 }
1354
1355 minlen = prog->minlen;
1356 if (strend - startpos < minlen) goto phooey;
1357
1358 if (startpos == strbeg) /* is ^ valid at stringarg? */
1359 PL_regprev = '\n';
1360 else {
1361 PL_regprev = (U32)stringarg[-1];
1362 if (!PL_multiline && PL_regprev == '\n')
1363 PL_regprev = '\0'; /* force ^ to NOT match */
1364 }
1365
1366 /* Check validity of program. */
1367 if (UCHARAT(prog->program) != REG_MAGIC) {
1368 Perl_croak(aTHX_ "corrupted regexp program");
1369 }
1370
1371 PL_reg_flags = 0;
1372 PL_reg_eval_set = 0;
1373 PL_reg_maxiter = 0;
1374
1375 if (prog->reganch & ROPT_UTF8)
1376 PL_reg_flags |= RF_utf8;
1377
1378 /* Mark beginning of line for ^ and lookbehind. */
1379 PL_regbol = startpos;
1380 PL_bostr = strbeg;
1381 PL_reg_sv = sv;
1382
1383 /* Mark end of line for $ (and such) */
1384 PL_regeol = strend;
1385
1386 /* see how far we have to get to not match where we matched before */
1387 PL_regtill = startpos+minend;
1388
1389 /* We start without call_cc context. */
1390 PL_reg_call_cc = 0;
1391
1392 /* If there is a "must appear" string, look for it. */
1393 s = startpos;
1394
1395 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1396 MAGIC *mg;
1397
1398 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1399 PL_reg_ganch = startpos;
1400 else if (sv && SvTYPE(sv) >= SVt_PVMG
1401 && SvMAGIC(sv)
1402 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1403 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1404 if (prog->reganch & ROPT_ANCH_GPOS) {
1405 if (s > PL_reg_ganch)
1406 goto phooey;
1407 s = PL_reg_ganch;
1408 }
1409 }
1410 else /* pos() not defined */
1411 PL_reg_ganch = strbeg;
1412 }
1413
1414 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1415 re_scream_pos_data d;
1416
1417 d.scream_olds = &scream_olds;
1418 d.scream_pos = &scream_pos;
1419 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1420 if (!s)
1421 goto phooey; /* not present */
1422 }
1423
1424 DEBUG_r( if (!PL_colorset) reginitcolors() );
1425 DEBUG_r(PerlIO_printf(Perl_debug_log,
1426 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1427 PL_colors[4],PL_colors[5],PL_colors[0],
1428 prog->precomp,
1429 PL_colors[1],
1430 (strlen(prog->precomp) > 60 ? "..." : ""),
1431 PL_colors[0],
1432 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1433 startpos, PL_colors[1],
1434 (strend - startpos > 60 ? "..." : ""))
1435 );
1436
1437 /* Simplest case: anchored match need be tried only once. */
1438 /* [unless only anchor is BOL and multiline is set] */
1439 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1440 if (s == startpos && regtry(prog, startpos))
1441 goto got_it;
1442 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1443 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1444 {
1445 char *end;
1446
1447 if (minlen)
1448 dontbother = minlen - 1;
1449 end = HOPc(strend, -dontbother) - 1;
1450 /* for multiline we only have to try after newlines */
1451 if (prog->check_substr) {
1452 if (s == startpos)
1453 goto after_try;
1454 while (1) {
1455 if (regtry(prog, s))
1456 goto got_it;
1457 after_try:
1458 if (s >= end)
1459 goto phooey;
1460 if (prog->reganch & RE_USE_INTUIT) {
1461 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1462 if (!s)
1463 goto phooey;
1464 }
1465 else
1466 s++;
1467 }
1468 } else {
1469 if (s > startpos)
1470 s--;
1471 while (s < end) {
1472 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1473 if (regtry(prog, s))
1474 goto got_it;
1475 }
1476 }
1477 }
1478 }
1479 goto phooey;
1480 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1481 if (regtry(prog, PL_reg_ganch))
1482 goto got_it;
1483 goto phooey;
1484 }
1485
1486 /* Messy cases: unanchored match. */
1487 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1488 /* we have /x+whatever/ */
1489 /* it must be a one character string (XXXX Except UTF?) */
1490 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1491#ifdef DEBUGGING
1492 int did_match = 0;
1493#endif
1494
6eb5f6b9
JH
1495 if (UTF) {
1496 while (s < strend) {
1497 if (*s == ch) {
bf93d4cc 1498 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1499 if (regtry(prog, s)) goto got_it;
1500 s += UTF8SKIP(s);
1501 while (s < strend && *s == ch)
1502 s += UTF8SKIP(s);
1503 }
1504 s += UTF8SKIP(s);
1505 }
1506 }
1507 else {
1508 while (s < strend) {
1509 if (*s == ch) {
bf93d4cc 1510 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1511 if (regtry(prog, s)) goto got_it;
1512 s++;
1513 while (s < strend && *s == ch)
1514 s++;
1515 }
1516 s++;
1517 }
1518 }
bf93d4cc
GS
1519 DEBUG_r(did_match ||
1520 PerlIO_printf(Perl_debug_log,
1521 "Did not find anchored character...\n"));
6eb5f6b9
JH
1522 }
1523 /*SUPPRESS 560*/
1524 else if (prog->anchored_substr != Nullsv
1525 || (prog->float_substr != Nullsv
1526 && prog->float_max_offset < strend - s)) {
1527 SV *must = prog->anchored_substr
1528 ? prog->anchored_substr : prog->float_substr;
1529 I32 back_max =
1530 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1531 I32 back_min =
1532 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1533 char *last = HOPc(strend, /* Cannot start after this */
1534 -(I32)(CHR_SVLEN(must)
1535 - (SvTAIL(must) != 0) + back_min));
1536 char *last1; /* Last position checked before */
bf93d4cc
GS
1537#ifdef DEBUGGING
1538 int did_match = 0;
1539#endif
6eb5f6b9
JH
1540
1541 if (s > PL_bostr)
1542 last1 = HOPc(s, -1);
1543 else
1544 last1 = s - 1; /* bogus */
1545
1546 /* XXXX check_substr already used to find `s', can optimize if
1547 check_substr==must. */
1548 scream_pos = -1;
1549 dontbother = end_shift;
1550 strend = HOPc(strend, -dontbother);
1551 while ( (s <= last) &&
1552 ((flags & REXEC_SCREAM)
1553 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1554 end_shift, &scream_pos, 0))
1555 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1556 (unsigned char*)strend, must,
1557 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1558 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1559 if (HOPc(s, -back_max) > last1) {
1560 last1 = HOPc(s, -back_min);
1561 s = HOPc(s, -back_max);
1562 }
1563 else {
1564 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1565
1566 last1 = HOPc(s, -back_min);
1567 s = t;
1568 }
1569 if (UTF) {
1570 while (s <= last1) {
1571 if (regtry(prog, s))
1572 goto got_it;
1573 s += UTF8SKIP(s);
1574 }
1575 }
1576 else {
1577 while (s <= last1) {
1578 if (regtry(prog, s))
1579 goto got_it;
1580 s++;
1581 }
1582 }
1583 }
bf93d4cc
GS
1584 DEBUG_r(did_match ||
1585 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1586 ((must == prog->anchored_substr)
1587 ? "anchored" : "floating"),
1588 PL_colors[0],
1589 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1590 SvPVX(must),
1591 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1592 goto phooey;
1593 }
155aba94 1594 else if ((c = prog->regstclass)) {
66e933ab
GS
1595 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1596 /* don't bother with what can't match */
6eb5f6b9
JH
1597 strend = HOPc(strend, -(minlen - 1));
1598 if (find_byclass(prog, c, s, strend, startpos, 0))
1599 goto got_it;
bf93d4cc 1600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1601 }
1602 else {
1603 dontbother = 0;
1604 if (prog->float_substr != Nullsv) { /* Trim the end. */
1605 char *last;
d6a28714
JH
1606
1607 if (flags & REXEC_SCREAM) {
1608 last = screaminstr(sv, prog->float_substr, s - strbeg,
1609 end_shift, &scream_pos, 1); /* last one */
1610 if (!last)
1611 last = scream_olds; /* Only one occurence. */
b8c5462f 1612 }
d6a28714
JH
1613 else {
1614 STRLEN len;
1615 char *little = SvPV(prog->float_substr, len);
1616
1617 if (SvTAIL(prog->float_substr)) {
1618 if (memEQ(strend - len + 1, little, len - 1))
1619 last = strend - len + 1;
1620 else if (!PL_multiline)
1621 last = memEQ(strend - len, little, len)
1622 ? strend - len : Nullch;
b8c5462f 1623 else
d6a28714
JH
1624 goto find_last;
1625 } else {
1626 find_last:
1627 if (len)
1628 last = rninstr(s, strend, little, little + len);
b8c5462f 1629 else
d6a28714 1630 last = strend; /* matching `$' */
b8c5462f 1631 }
b8c5462f 1632 }
bf93d4cc
GS
1633 if (last == NULL) {
1634 DEBUG_r(PerlIO_printf(Perl_debug_log,
1635 "%sCan't trim the tail, match fails (should not happen)%s\n",
1636 PL_colors[4],PL_colors[5]));
1637 goto phooey; /* Should not happen! */
1638 }
d6a28714
JH
1639 dontbother = strend - last + prog->float_min_offset;
1640 }
1641 if (minlen && (dontbother < minlen))
1642 dontbother = minlen - 1;
1643 strend -= dontbother; /* this one's always in bytes! */
1644 /* We don't know much -- general case. */
1645 if (UTF) {
1646 for (;;) {
1647 if (regtry(prog, s))
1648 goto got_it;
1649 if (s >= strend)
1650 break;
b8c5462f 1651 s += UTF8SKIP(s);
d6a28714
JH
1652 };
1653 }
1654 else {
1655 do {
1656 if (regtry(prog, s))
1657 goto got_it;
1658 } while (s++ < strend);
1659 }
1660 }
1661
1662 /* Failure. */
1663 goto phooey;
1664
1665got_it:
1666 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1667
1668 if (PL_reg_eval_set) {
1669 /* Preserve the current value of $^R */
1670 if (oreplsv != GvSV(PL_replgv))
1671 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1672 restored, the value remains
1673 the same. */
1674 restore_pos(aTHXo_ 0);
1675 }
1676
1677 /* make sure $`, $&, $', and $digit will work later */
1678 if ( !(flags & REXEC_NOT_FIRST) ) {
1679 if (RX_MATCH_COPIED(prog)) {
1680 Safefree(prog->subbeg);
1681 RX_MATCH_COPIED_off(prog);
1682 }
1683 if (flags & REXEC_COPY_STR) {
1684 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1685
1686 s = savepvn(strbeg, i);
1687 prog->subbeg = s;
1688 prog->sublen = i;
1689 RX_MATCH_COPIED_on(prog);
1690 }
1691 else {
1692 prog->subbeg = strbeg;
1693 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1694 }
1695 }
1696
1697 return 1;
1698
1699phooey:
bf93d4cc
GS
1700 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1701 PL_colors[4],PL_colors[5]));
d6a28714
JH
1702 if (PL_reg_eval_set)
1703 restore_pos(aTHXo_ 0);
1704 return 0;
1705}
1706
1707/*
1708 - regtry - try match at specific point
1709 */
1710STATIC I32 /* 0 failure, 1 success */
1711S_regtry(pTHX_ regexp *prog, char *startpos)
1712{
1713 dTHR;
1714 register I32 i;
1715 register I32 *sp;
1716 register I32 *ep;
1717 CHECKPOINT lastcp;
1718
02db2b7b
IZ
1719#ifdef DEBUGGING
1720 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1721#endif
d6a28714
JH
1722 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1723 MAGIC *mg;
1724
1725 PL_reg_eval_set = RS_init;
1726 DEBUG_r(DEBUG_s(
b900a521
JH
1727 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1728 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1729 ));
e8347627 1730 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1731 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1732 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1733 SAVETMPS;
1734 /* Apparently this is not needed, judging by wantarray. */
e8347627 1735 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1736 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1737
1738 if (PL_reg_sv) {
1739 /* Make $_ available to executed code. */
1740 if (PL_reg_sv != DEFSV) {
1741 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1742 SAVESPTR(DEFSV);
1743 DEFSV = PL_reg_sv;
b8c5462f 1744 }
d6a28714
JH
1745
1746 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1747 && (mg = mg_find(PL_reg_sv, 'g')))) {
1748 /* prepare for quick setting of pos */
1749 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1750 mg = mg_find(PL_reg_sv, 'g');
1751 mg->mg_len = -1;
b8c5462f 1752 }
d6a28714
JH
1753 PL_reg_magic = mg;
1754 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1755 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1756 }
1757 if (!PL_reg_curpm)
0f79a09d 1758 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1759 PL_reg_curpm->op_pmregexp = prog;
1760 PL_reg_oldcurpm = PL_curpm;
1761 PL_curpm = PL_reg_curpm;
1762 if (RX_MATCH_COPIED(prog)) {
1763 /* Here is a serious problem: we cannot rewrite subbeg,
1764 since it may be needed if this match fails. Thus
1765 $` inside (?{}) could fail... */
1766 PL_reg_oldsaved = prog->subbeg;
1767 PL_reg_oldsavedlen = prog->sublen;
1768 RX_MATCH_COPIED_off(prog);
1769 }
1770 else
1771 PL_reg_oldsaved = Nullch;
1772 prog->subbeg = PL_bostr;
1773 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1774 }
1775 prog->startp[0] = startpos - PL_bostr;
1776 PL_reginput = startpos;
1777 PL_regstartp = prog->startp;
1778 PL_regendp = prog->endp;
1779 PL_reglastparen = &prog->lastparen;
1780 prog->lastparen = 0;
1781 PL_regsize = 0;
1782 DEBUG_r(PL_reg_starttry = startpos);
1783 if (PL_reg_start_tmpl <= prog->nparens) {
1784 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1785 if(PL_reg_start_tmp)
1786 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1787 else
1788 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1789 }
1790
1791 /* XXXX What this code is doing here?!!! There should be no need
1792 to do this again and again, PL_reglastparen should take care of
3dd2943c
JH
1793 this! --ilya*/
1794 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this. --jhi */
d6a28714
JH
1795 sp = prog->startp;
1796 ep = prog->endp;
1797 if (prog->nparens) {
1798 for (i = prog->nparens; i >= 1; i--) {
1799 *++sp = -1;
1800 *++ep = -1;
1801 }
1802 }
02db2b7b 1803 REGCP_SET(lastcp);
d6a28714
JH
1804 if (regmatch(prog->program + 1)) {
1805 prog->endp[0] = PL_reginput - PL_bostr;
1806 return 1;
1807 }
02db2b7b 1808 REGCP_UNWIND(lastcp);
d6a28714
JH
1809 return 0;
1810}
1811
02db2b7b
IZ
1812#define RE_UNWIND_BRANCH 1
1813#define RE_UNWIND_BRANCHJ 2
1814
1815union re_unwind_t;
1816
1817typedef struct { /* XX: makes sense to enlarge it... */
1818 I32 type;
1819 I32 prev;
1820 CHECKPOINT lastcp;
1821} re_unwind_generic_t;
1822
1823typedef struct {
1824 I32 type;
1825 I32 prev;
1826 CHECKPOINT lastcp;
1827 I32 lastparen;
1828 regnode *next;
1829 char *locinput;
1830 I32 nextchr;
1831#ifdef DEBUGGING
1832 int regindent;
1833#endif
1834} re_unwind_branch_t;
1835
1836typedef union re_unwind_t {
1837 I32 type;
1838 re_unwind_generic_t generic;
1839 re_unwind_branch_t branch;
1840} re_unwind_t;
1841
d6a28714
JH
1842/*
1843 - regmatch - main matching routine
1844 *
1845 * Conceptually the strategy is simple: check to see whether the current
1846 * node matches, call self recursively to see whether the rest matches,
1847 * and then act accordingly. In practice we make some effort to avoid
1848 * recursion, in particular by going through "ordinary" nodes (that don't
1849 * need to know whether the rest of the match failed) by a loop instead of
1850 * by recursion.
1851 */
1852/* [lwall] I've hoisted the register declarations to the outer block in order to
1853 * maybe save a little bit of pushing and popping on the stack. It also takes
1854 * advantage of machines that use a register save mask on subroutine entry.
1855 */
1856STATIC I32 /* 0 failure, 1 success */
1857S_regmatch(pTHX_ regnode *prog)
1858{
1859 dTHR;
1860 register regnode *scan; /* Current node. */
1861 regnode *next; /* Next node. */
1862 regnode *inner; /* Next node in internal branch. */
1863 register I32 nextchr; /* renamed nextchr - nextchar colides with
1864 function of same name */
1865 register I32 n; /* no or next */
1866 register I32 ln; /* len or last */
1867 register char *s; /* operand or save */
1868 register char *locinput = PL_reginput;
1869 register I32 c1, c2, paren; /* case fold search, parenth */
1870 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1871 I32 unwind = 0;
1872 I32 firstcp = PL_savestack_ix;
1873
d6a28714
JH
1874#ifdef DEBUGGING
1875 PL_regindent++;
1876#endif
1877
1878 /* Note that nextchr is a byte even in UTF */
1879 nextchr = UCHARAT(locinput);
1880 scan = prog;
1881 while (scan != NULL) {
1882#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1883#if 1
d6a28714
JH
1884# define sayYES goto yes
1885# define sayNO goto no
7821416a
IZ
1886# define sayYES_FINAL goto yes_final
1887# define sayYES_LOUD goto yes_loud
1888# define sayNO_FINAL goto no_final
1889# define sayNO_SILENT goto do_no
d6a28714
JH
1890# define saySAME(x) if (x) goto yes; else goto no
1891# define REPORT_CODE_OFF 24
1892#else
1893# define sayYES return 1
1894# define sayNO return 0
7821416a
IZ
1895# define sayYES_FINAL return 1
1896# define sayYES_LOUD return 1
1897# define sayNO_FINAL return 0
1898# define sayNO_SILENT return 0
d6a28714
JH
1899# define saySAME(x) return x
1900#endif
1901 DEBUG_r( {
1902 SV *prop = sv_newmortal();
1903 int docolor = *PL_colors[0];
1904 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1905 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1906 /* The part of the string before starttry has one color
1907 (pref0_len chars), between starttry and current
1908 position another one (pref_len - pref0_len chars),
1909 after the current position the third one.
1910 We assume that pref0_len <= pref_len, otherwise we
1911 decrease pref0_len. */
1912 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1913 ? (5 + taill) - l : locinput - PL_bostr);
1914 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1915
1916 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1917 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1918 ? (5 + taill) - pref_len : PL_regeol - locinput);
1919 if (pref0_len < 0)
1920 pref0_len = 0;
1921 if (pref0_len > pref_len)
1922 pref0_len = pref_len;
1923 regprop(prop, scan);
1924 PerlIO_printf(Perl_debug_log,
b900a521
JH
1925 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1926 (IV)(locinput - PL_bostr),
d6a28714
JH
1927 PL_colors[4], pref0_len,
1928 locinput - pref_len, PL_colors[5],
1929 PL_colors[2], pref_len - pref0_len,
1930 locinput - pref_len + pref0_len, PL_colors[3],
1931 (docolor ? "" : "> <"),
1932 PL_colors[0], l, locinput, PL_colors[1],
1933 15 - l - pref_len + 1,
1934 "",
b900a521 1935 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1936 SvPVX(prop));
1937 } );
1938
1939 next = scan + NEXT_OFF(scan);
1940 if (next == scan)
1941 next = NULL;
1942
1943 switch (OP(scan)) {
1944 case BOL:
1945 if (locinput == PL_bostr
1946 ? PL_regprev == '\n'
1947 : (PL_multiline &&
1948 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1949 {
1950 /* regtill = regbol; */
b8c5462f
JH
1951 break;
1952 }
d6a28714
JH
1953 sayNO;
1954 case MBOL:
1955 if (locinput == PL_bostr
1956 ? PL_regprev == '\n'
1957 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1958 {
b8c5462f
JH
1959 break;
1960 }
d6a28714
JH
1961 sayNO;
1962 case SBOL:
c2a73568 1963 if (locinput == PL_bostr)
b8c5462f 1964 break;
d6a28714
JH
1965 sayNO;
1966 case GPOS:
1967 if (locinput == PL_reg_ganch)
1968 break;
1969 sayNO;
1970 case EOL:
1971 if (PL_multiline)
1972 goto meol;
1973 else
1974 goto seol;
1975 case MEOL:
1976 meol:
1977 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1978 sayNO;
b8c5462f 1979 break;
d6a28714
JH
1980 case SEOL:
1981 seol:
1982 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1983 sayNO;
d6a28714 1984 if (PL_regeol - locinput > 1)
b8c5462f 1985 sayNO;
b8c5462f 1986 break;
d6a28714
JH
1987 case EOS:
1988 if (PL_regeol != locinput)
b8c5462f 1989 sayNO;
d6a28714
JH
1990 break;
1991 case SANYUTF8:
b8c5462f 1992 if (nextchr & 0x80) {
b8c5462f 1993 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1994 if (locinput > PL_regeol)
1995 sayNO;
b8c5462f
JH
1996 nextchr = UCHARAT(locinput);
1997 break;
1998 }
d6a28714 1999 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2000 sayNO;
b8c5462f 2001 nextchr = UCHARAT(++locinput);
a0d0e21e 2002 break;
d6a28714
JH
2003 case SANY:
2004 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2005 sayNO;
2006 nextchr = UCHARAT(++locinput);
b85d18e9 2007 break;
d6a28714 2008 case ANYUTF8:
a0ed51b3 2009 if (nextchr & 0x80) {
b8c5462f 2010 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2011 if (locinput > PL_regeol)
2012 sayNO;
a0ed51b3
LW
2013 nextchr = UCHARAT(locinput);
2014 break;
2015 }
155aba94 2016 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2017 sayNO;
2018 nextchr = UCHARAT(++locinput);
2019 break;
d6a28714 2020 case REG_ANY:
155aba94 2021 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2022 sayNO;
76e3520e 2023 nextchr = UCHARAT(++locinput);
a0d0e21e 2024 break;
d6a28714 2025 case EXACT:
cd439c50
IZ
2026 s = STRING(scan);
2027 ln = STR_LEN(scan);
d6a28714
JH
2028 /* Inline the first character, for speed. */
2029 if (UCHARAT(s) != nextchr)
2030 sayNO;
2031 if (PL_regeol - locinput < ln)
2032 sayNO;
2033 if (ln > 1 && memNE(s, locinput, ln))
2034 sayNO;
2035 locinput += ln;
2036 nextchr = UCHARAT(locinput);
2037 break;
2038 case EXACTFL:
b8c5462f
JH
2039 PL_reg_flags |= RF_tainted;
2040 /* FALL THROUGH */
d6a28714 2041 case EXACTF:
cd439c50
IZ
2042 s = STRING(scan);
2043 ln = STR_LEN(scan);
d6a28714
JH
2044
2045 if (UTF) {
2046 char *l = locinput;
2047 char *e = s + ln;
2048 c1 = OP(scan) == EXACTF;
2049 while (s < e) {
2050 if (l >= PL_regeol)
2051 sayNO;
667bb95a
JH
2052 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2053 (c1 ?
2054 toLOWER_utf8((U8*)l) :
2055 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2056 {
2057 sayNO;
2058 }
2059 s += UTF8SKIP(s);
2060 l += UTF8SKIP(l);
b8c5462f 2061 }
d6a28714 2062 locinput = l;
a0ed51b3
LW
2063 nextchr = UCHARAT(locinput);
2064 break;
2065 }
d6a28714
JH
2066
2067 /* Inline the first character, for speed. */
2068 if (UCHARAT(s) != nextchr &&
2069 UCHARAT(s) != ((OP(scan) == EXACTF)
2070 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2071 sayNO;
d6a28714 2072 if (PL_regeol - locinput < ln)
b8c5462f 2073 sayNO;
d6a28714
JH
2074 if (ln > 1 && (OP(scan) == EXACTF
2075 ? ibcmp(s, locinput, ln)
2076 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2077 sayNO;
d6a28714
JH
2078 locinput += ln;
2079 nextchr = UCHARAT(locinput);
a0d0e21e 2080 break;
d6a28714 2081 case ANYOFUTF8:
d6a28714 2082 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2083 sayNO;
d6a28714
JH
2084 if (locinput >= PL_regeol)
2085 sayNO;
2086 locinput += PL_utf8skip[nextchr];
2087 nextchr = UCHARAT(locinput);
2088 break;
2089 case ANYOF:
d6a28714 2090 if (nextchr < 0)
b8c5462f 2091 nextchr = UCHARAT(locinput);
936ed897 2092 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2093 sayNO;
2094 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2095 sayNO;
b8c5462f
JH
2096 nextchr = UCHARAT(++locinput);
2097 break;
d6a28714 2098 case ALNUML:
b8c5462f
JH
2099 PL_reg_flags |= RF_tainted;
2100 /* FALL THROUGH */
d6a28714 2101 case ALNUM:
b8c5462f 2102 if (!nextchr)
4633a7c4 2103 sayNO;
d6a28714
JH
2104 if (!(OP(scan) == ALNUM
2105 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2106 sayNO;
2107 nextchr = UCHARAT(++locinput);
bbce6d69 2108 break;
d6a28714 2109 case ALNUMLUTF8:
3280af22 2110 PL_reg_flags |= RF_tainted;
bbce6d69 2111 /* FALL THROUGH */
d6a28714 2112 case ALNUMUTF8:
b8c5462f
JH
2113 if (!nextchr)
2114 sayNO;
2115 if (nextchr & 0x80) {
d6a28714
JH
2116 if (!(OP(scan) == ALNUMUTF8
2117 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2118 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2119 {
2120 sayNO;
a0ed51b3 2121 }
b8c5462f 2122 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2123 nextchr = UCHARAT(locinput);
2124 break;
2125 }
d6a28714
JH
2126 if (!(OP(scan) == ALNUMUTF8
2127 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2128 sayNO;
b8c5462f 2129 nextchr = UCHARAT(++locinput);
a0d0e21e 2130 break;
d6a28714 2131 case NALNUML:
b8c5462f
JH
2132 PL_reg_flags |= RF_tainted;
2133 /* FALL THROUGH */
d6a28714
JH
2134 case NALNUM:
2135 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2136 sayNO;
d6a28714
JH
2137 if (OP(scan) == NALNUM
2138 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2139 sayNO;
b8c5462f 2140 nextchr = UCHARAT(++locinput);
a0ed51b3 2141 break;
d6a28714 2142 case NALNUMLUTF8:
b8c5462f
JH
2143 PL_reg_flags |= RF_tainted;
2144 /* FALL THROUGH */
d6a28714 2145 case NALNUMUTF8:
3280af22 2146 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2147 sayNO;
b8c5462f 2148 if (nextchr & 0x80) {
d6a28714
JH
2149 if (OP(scan) == NALNUMUTF8
2150 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2151 : isALNUM_LC_utf8((U8*)locinput))
2152 {
b8c5462f 2153 sayNO;
d6a28714 2154 }
b8c5462f
JH
2155 locinput += PL_utf8skip[nextchr];
2156 nextchr = UCHARAT(locinput);
2157 break;
2158 }
d6a28714
JH
2159 if (OP(scan) == NALNUMUTF8
2160 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2161 sayNO;
76e3520e 2162 nextchr = UCHARAT(++locinput);
a0d0e21e 2163 break;
d6a28714
JH
2164 case BOUNDL:
2165 case NBOUNDL:
3280af22 2166 PL_reg_flags |= RF_tainted;
bbce6d69 2167 /* FALL THROUGH */
d6a28714
JH
2168 case BOUND:
2169 case NBOUND:
2170 /* was last char in word? */
2171 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2172 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2173 ln = isALNUM(ln);
2174 n = isALNUM(nextchr);
2175 }
2176 else {
2177 ln = isALNUM_LC(ln);
2178 n = isALNUM_LC(nextchr);
2179 }
2180 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2181 sayNO;
a0d0e21e 2182 break;
d6a28714
JH
2183 case BOUNDLUTF8:
2184 case NBOUNDLUTF8:
a0ed51b3
LW
2185 PL_reg_flags |= RF_tainted;
2186 /* FALL THROUGH */
d6a28714
JH
2187 case BOUNDUTF8:
2188 case NBOUNDUTF8:
2189 /* was last char in word? */
667bb95a
JH
2190 if (locinput == PL_regbol)
2191 ln = PL_regprev;
2192 else {
2193 U8 *r = reghop((U8*)locinput, -1);
2194
2195 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2196 }
d6a28714
JH
2197 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2198 ln = isALNUM_uni(ln);
2199 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2200 }
d6a28714
JH
2201 else {
2202 ln = isALNUM_LC_uni(ln);
2203 n = isALNUM_LC_utf8((U8*)locinput);
2204 }
2205 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2206 sayNO;
a0ed51b3 2207 break;
d6a28714 2208 case SPACEL:
3280af22 2209 PL_reg_flags |= RF_tainted;
bbce6d69 2210 /* FALL THROUGH */
d6a28714 2211 case SPACE:
9442cb0e 2212 if (!nextchr)
4633a7c4 2213 sayNO;
d6a28714
JH
2214 if (!(OP(scan) == SPACE
2215 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2216 sayNO;
76e3520e 2217 nextchr = UCHARAT(++locinput);
a0d0e21e 2218 break;
d6a28714 2219 case SPACELUTF8:
a0ed51b3
LW
2220 PL_reg_flags |= RF_tainted;
2221 /* FALL THROUGH */
d6a28714 2222 case SPACEUTF8:
9442cb0e 2223 if (!nextchr)
a0ed51b3
LW
2224 sayNO;
2225 if (nextchr & 0x80) {
d6a28714 2226 if (!(OP(scan) == SPACEUTF8
9442cb0e 2227 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2228 : isSPACE_LC_utf8((U8*)locinput)))
2229 {
a0ed51b3 2230 sayNO;
d6a28714 2231 }
6f06b55f 2232 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2233 nextchr = UCHARAT(locinput);
2234 break;
2235 }
d6a28714
JH
2236 if (!(OP(scan) == SPACEUTF8
2237 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2238 sayNO;
2239 nextchr = UCHARAT(++locinput);
2240 break;
d6a28714 2241 case NSPACEL:
3280af22 2242 PL_reg_flags |= RF_tainted;
bbce6d69 2243 /* FALL THROUGH */
d6a28714 2244 case NSPACE:
9442cb0e 2245 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2246 sayNO;
9442cb0e 2247 if (OP(scan) == NSPACE
d6a28714 2248 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2249 sayNO;
b8c5462f 2250 nextchr = UCHARAT(++locinput);
a0d0e21e 2251 break;
d6a28714 2252 case NSPACELUTF8:
a0ed51b3
LW
2253 PL_reg_flags |= RF_tainted;
2254 /* FALL THROUGH */
d6a28714 2255 case NSPACEUTF8:
9442cb0e 2256 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2257 sayNO;
2258 if (nextchr & 0x80) {
d6a28714 2259 if (OP(scan) == NSPACEUTF8
9442cb0e 2260 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2261 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2262 {
2263 sayNO;
2264 }
2265 locinput += PL_utf8skip[nextchr];
2266 nextchr = UCHARAT(locinput);
2267 break;
a0ed51b3 2268 }
d6a28714
JH
2269 if (OP(scan) == NSPACEUTF8
2270 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2271 sayNO;
76e3520e 2272 nextchr = UCHARAT(++locinput);
a0d0e21e 2273 break;
d6a28714 2274 case DIGITL:
a0ed51b3
LW
2275 PL_reg_flags |= RF_tainted;
2276 /* FALL THROUGH */
d6a28714 2277 case DIGIT:
9442cb0e 2278 if (!nextchr)
a0ed51b3 2279 sayNO;
d6a28714
JH
2280 if (!(OP(scan) == DIGIT
2281 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2282 sayNO;
76e3520e 2283 nextchr = UCHARAT(++locinput);
a0d0e21e 2284 break;
d6a28714 2285 case DIGITLUTF8:
a0ed51b3
LW
2286 PL_reg_flags |= RF_tainted;
2287 /* FALL THROUGH */
d6a28714 2288 case DIGITUTF8:
a0ed51b3
LW
2289 if (!nextchr)
2290 sayNO;
2291 if (nextchr & 0x80) {
9442cb0e
GS
2292 if (!(OP(scan) == DIGITUTF8
2293 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2294 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2295 {
a0ed51b3 2296 sayNO;
dfe13c55 2297 }
6f06b55f 2298 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2299 nextchr = UCHARAT(locinput);
2300 break;
2301 }
9442cb0e
GS
2302 if (!(OP(scan) == DIGITUTF8
2303 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2304 sayNO;
2305 nextchr = UCHARAT(++locinput);
2306 break;
d6a28714 2307 case NDIGITL:
b8c5462f
JH
2308 PL_reg_flags |= RF_tainted;
2309 /* FALL THROUGH */
d6a28714 2310 case NDIGIT:
9442cb0e 2311 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2312 sayNO;
9442cb0e 2313 if (OP(scan) == NDIGIT
d6a28714 2314 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2315 sayNO;
76e3520e 2316 nextchr = UCHARAT(++locinput);
a0d0e21e 2317 break;
d6a28714 2318 case NDIGITLUTF8:
b8c5462f
JH
2319 PL_reg_flags |= RF_tainted;
2320 /* FALL THROUGH */
d6a28714 2321 case NDIGITUTF8:
b8c5462f
JH
2322 if (!nextchr && locinput >= PL_regeol)
2323 sayNO;
a0ed51b3 2324 if (nextchr & 0x80) {
9442cb0e
GS
2325 if (OP(scan) == NDIGITUTF8
2326 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2327 : isDIGIT_LC_utf8((U8*)locinput))
2328 {
a0ed51b3 2329 sayNO;
9442cb0e 2330 }
6f06b55f 2331 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2332 nextchr = UCHARAT(locinput);
2333 break;
2334 }
9442cb0e
GS
2335 if (OP(scan) == NDIGITUTF8
2336 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2337 sayNO;
2338 nextchr = UCHARAT(++locinput);
2339 break;
2340 case CLUMP:
dfe13c55 2341 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2342 sayNO;
6f06b55f 2343 locinput += PL_utf8skip[nextchr];
dfe13c55 2344 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2345 locinput += UTF8SKIP(locinput);
2346 if (locinput > PL_regeol)
2347 sayNO;
2348 nextchr = UCHARAT(locinput);
2349 break;
c8756f30 2350 case REFFL:
3280af22 2351 PL_reg_flags |= RF_tainted;
c8756f30 2352 /* FALL THROUGH */
c277df42 2353 case REF:
c8756f30 2354 case REFF:
c277df42 2355 n = ARG(scan); /* which paren pair */
cf93c79d 2356 ln = PL_regstartp[n];
2c2d71f5 2357 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2358 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2359 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2360 if (ln == PL_regendp[n])
a0d0e21e 2361 break;
a0ed51b3 2362
cf93c79d 2363 s = PL_bostr + ln;
a0ed51b3
LW
2364 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2365 char *l = locinput;
cf93c79d 2366 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2367 /*
2368 * Note that we can't do the "other character" lookup trick as
2369 * in the 8-bit case (no pun intended) because in Unicode we
2370 * have to map both upper and title case to lower case.
2371 */
2372 if (OP(scan) == REFF) {
2373 while (s < e) {
2374 if (l >= PL_regeol)
2375 sayNO;
dfe13c55 2376 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2377 sayNO;
2378 s += UTF8SKIP(s);
2379 l += UTF8SKIP(l);
2380 }
2381 }
2382 else {
2383 while (s < e) {
2384 if (l >= PL_regeol)
2385 sayNO;
dfe13c55 2386 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2387 sayNO;
2388 s += UTF8SKIP(s);
2389 l += UTF8SKIP(l);
2390 }
2391 }
2392 locinput = l;
2393 nextchr = UCHARAT(locinput);
2394 break;
2395 }
2396
a0d0e21e 2397 /* Inline the first character, for speed. */
76e3520e 2398 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2399 (OP(scan) == REF ||
2400 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2401 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2402 sayNO;
cf93c79d 2403 ln = PL_regendp[n] - ln;
3280af22 2404 if (locinput + ln > PL_regeol)
4633a7c4 2405 sayNO;
c8756f30
AK
2406 if (ln > 1 && (OP(scan) == REF
2407 ? memNE(s, locinput, ln)
2408 : (OP(scan) == REFF
2409 ? ibcmp(s, locinput, ln)
2410 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2411 sayNO;
a0d0e21e 2412 locinput += ln;
76e3520e 2413 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2414 break;
2415
2416 case NOTHING:
c277df42 2417 case TAIL:
a0d0e21e
LW
2418 break;
2419 case BACK:
2420 break;
c277df42
IZ
2421 case EVAL:
2422 {
2423 dSP;
533c011a 2424 OP_4tree *oop = PL_op;
3280af22
NIS
2425 COP *ocurcop = PL_curcop;
2426 SV **ocurpad = PL_curpad;
c277df42
IZ
2427 SV *ret;
2428
2429 n = ARG(scan);
533c011a 2430 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2431 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2432 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2433 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2434
cea2e8a9 2435 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2436 SPAGAIN;
2437 ret = POPs;
2438 PUTBACK;
2439
0f5d15d6
IZ
2440 PL_op = oop;
2441 PL_curpad = ocurpad;
2442 PL_curcop = ocurcop;
c277df42 2443 if (logical) {
0f5d15d6
IZ
2444 if (logical == 2) { /* Postponed subexpression. */
2445 regexp *re;
22c35a8c 2446 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2447 re_cc_state state;
0f5d15d6
IZ
2448 CHECKPOINT cp, lastcp;
2449
2450 if(SvROK(ret) || SvRMAGICAL(ret)) {
2451 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2452
2453 if(SvMAGICAL(sv))
2454 mg = mg_find(sv, 'r');
2455 }
2456 if (mg) {
2457 re = (regexp *)mg->mg_obj;
df0003d4 2458 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2459 }
2460 else {
2461 STRLEN len;
2462 char *t = SvPV(ret, len);
2463 PMOP pm;
2464 char *oprecomp = PL_regprecomp;
2465 I32 osize = PL_regsize;
2466 I32 onpar = PL_regnpar;
2467
2468 pm.op_pmflags = 0;
393fec97 2469 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2470 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2471 if (!(SvFLAGS(ret)
2472 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2473 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2474 PL_regprecomp = oprecomp;
2475 PL_regsize = osize;
2476 PL_regnpar = onpar;
2477 }
2478 DEBUG_r(
2479 PerlIO_printf(Perl_debug_log,
2480 "Entering embedded `%s%.60s%s%s'\n",
2481 PL_colors[0],
2482 re->precomp,
2483 PL_colors[1],
2484 (strlen(re->precomp) > 60 ? "..." : ""))
2485 );
2486 state.node = next;
2487 state.prev = PL_reg_call_cc;
2488 state.cc = PL_regcc;
2489 state.re = PL_reg_re;
2490
2ab05381 2491 PL_regcc = 0;
0f5d15d6
IZ
2492
2493 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2494 REGCP_SET(lastcp);
0f5d15d6
IZ
2495 cache_re(re);
2496 state.ss = PL_savestack_ix;
2497 *PL_reglastparen = 0;
2498 PL_reg_call_cc = &state;
2499 PL_reginput = locinput;
2c2d71f5
JH
2500
2501 /* XXXX This is too dramatic a measure... */
2502 PL_reg_maxiter = 0;
2503
0f5d15d6 2504 if (regmatch(re->program + 1)) {
2c914db6
IZ
2505 /* Even though we succeeded, we need to restore
2506 global variables, since we may be wrapped inside
2507 SUSPEND, thus the match may be not finished yet. */
2508
2509 /* XXXX Do this only if SUSPENDed? */
2510 PL_reg_call_cc = state.prev;
2511 PL_regcc = state.cc;
2512 PL_reg_re = state.re;
2513 cache_re(PL_reg_re);
2514
2515 /* XXXX This is too dramatic a measure... */
2516 PL_reg_maxiter = 0;
2517
2518 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2519 ReREFCNT_dec(re);
2520 regcpblow(cp);
2521 sayYES;
2522 }
0f5d15d6 2523 ReREFCNT_dec(re);
02db2b7b 2524 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2525 regcppop();
2526 PL_reg_call_cc = state.prev;
2527 PL_regcc = state.cc;
2528 PL_reg_re = state.re;
d3790889 2529 cache_re(PL_reg_re);
2c2d71f5
JH
2530
2531 /* XXXX This is too dramatic a measure... */
2532 PL_reg_maxiter = 0;
2533
0f5d15d6
IZ
2534 sayNO;
2535 }
c277df42 2536 sw = SvTRUE(ret);
0f5d15d6 2537 logical = 0;
a0ed51b3
LW
2538 }
2539 else
3280af22 2540 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2541 break;
2542 }
a0d0e21e 2543 case OPEN:
c277df42 2544 n = ARG(scan); /* which paren pair */
3280af22
NIS
2545 PL_reg_start_tmp[n] = locinput;
2546 if (n > PL_regsize)
2547 PL_regsize = n;
a0d0e21e
LW
2548 break;
2549 case CLOSE:
c277df42 2550 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2551 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2552 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2553 if (n > *PL_reglastparen)
2554 *PL_reglastparen = n;
a0d0e21e 2555 break;
c277df42
IZ
2556 case GROUPP:
2557 n = ARG(scan); /* which paren pair */
cf93c79d 2558 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2559 break;
2560 case IFTHEN:
2c2d71f5 2561 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2562 if (sw)
2563 next = NEXTOPER(NEXTOPER(scan));
2564 else {
2565 next = scan + ARG(scan);
2566 if (OP(next) == IFTHEN) /* Fake one. */
2567 next = NEXTOPER(NEXTOPER(next));
2568 }
2569 break;
2570 case LOGICAL:
0f5d15d6 2571 logical = scan->flags;
c277df42 2572 break;
2ab05381
IZ
2573/*******************************************************************
2574 PL_regcc contains infoblock about the innermost (...)* loop, and
2575 a pointer to the next outer infoblock.
2576
2577 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2578
2579 1) After matching X, regnode for CURLYX is processed;
2580
2581 2) This regnode creates infoblock on the stack, and calls
2582 regmatch() recursively with the starting point at WHILEM node;
2583
2584 3) Each hit of WHILEM node tries to match A and Z (in the order
2585 depending on the current iteration, min/max of {min,max} and
2586 greediness). The information about where are nodes for "A"
2587 and "Z" is read from the infoblock, as is info on how many times "A"
2588 was already matched, and greediness.
2589
2590 4) After A matches, the same WHILEM node is hit again.
2591
2592 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2593 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2594 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2595 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2596 of the external loop.
2597
2598 Currently present infoblocks form a tree with a stem formed by PL_curcc
2599 and whatever it mentions via ->next, and additional attached trees
2600 corresponding to temporarily unset infoblocks as in "5" above.
2601
2602 In the following picture infoblocks for outer loop of
2603 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2604 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2605 infoblocks are drawn below the "reset" infoblock.
2606
2607 In fact in the picture below we do not show failed matches for Z and T
2608 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2609 more obvious *why* one needs to *temporary* unset infoblocks.]
2610
2611 Matched REx position InfoBlocks Comment
2612 (Y(A)*?Z)*?T x
2613 Y(A)*?Z)*?T x <- O
2614 Y (A)*?Z)*?T x <- O
2615 Y A)*?Z)*?T x <- O <- I
2616 YA )*?Z)*?T x <- O <- I
2617 YA A)*?Z)*?T x <- O <- I
2618 YAA )*?Z)*?T x <- O <- I
2619 YAA Z)*?T x <- O # Temporary unset I
2620 I
2621
2622 YAAZ Y(A)*?Z)*?T x <- O
2623 I
2624
2625 YAAZY (A)*?Z)*?T x <- O
2626 I
2627
2628 YAAZY A)*?Z)*?T x <- O <- I
2629 I
2630
2631 YAAZYA )*?Z)*?T x <- O <- I
2632 I
2633
2634 YAAZYA Z)*?T x <- O # Temporary unset I
2635 I,I
2636
2637 YAAZYAZ )*?T x <- O
2638 I,I
2639
2640 YAAZYAZ T x # Temporary unset O
2641 O
2642 I,I
2643
2644 YAAZYAZT x
2645 O
2646 I,I
2647 *******************************************************************/
a0d0e21e
LW
2648 case CURLYX: {
2649 CURCUR cc;
3280af22 2650 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2651 /* No need to save/restore up to this paren */
2652 I32 parenfloor = scan->flags;
c277df42
IZ
2653
2654 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2655 next += ARG(next);
3280af22
NIS
2656 cc.oldcc = PL_regcc;
2657 PL_regcc = &cc;
cb434fcc
IZ
2658 /* XXXX Probably it is better to teach regpush to support
2659 parenfloor > PL_regsize... */
2660 if (parenfloor > *PL_reglastparen)
2661 parenfloor = *PL_reglastparen; /* Pessimization... */
2662 cc.parenfloor = parenfloor;
a0d0e21e
LW
2663 cc.cur = -1;
2664 cc.min = ARG1(scan);
2665 cc.max = ARG2(scan);
c277df42 2666 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2667 cc.next = next;
2668 cc.minmod = minmod;
2669 cc.lastloc = 0;
3280af22 2670 PL_reginput = locinput;
a0d0e21e
LW
2671 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2672 regcpblow(cp);
3280af22 2673 PL_regcc = cc.oldcc;
4633a7c4 2674 saySAME(n);
a0d0e21e
LW
2675 }
2676 /* NOT REACHED */
2677 case WHILEM: {
2678 /*
2679 * This is really hard to understand, because after we match
2680 * what we're trying to match, we must make sure the rest of
2c2d71f5 2681 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2682 * to go back UP the parse tree by recursing ever deeper. And
2683 * if it fails, we have to reset our parent's current state
2684 * that we can try again after backing off.
2685 */
2686
c277df42 2687 CHECKPOINT cp, lastcp;
3280af22 2688 CURCUR* cc = PL_regcc;
c277df42
IZ
2689 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2690
4633a7c4 2691 n = cc->cur + 1; /* how many we know we matched */
3280af22 2692 PL_reginput = locinput;
a0d0e21e 2693
c277df42
IZ
2694 DEBUG_r(
2695 PerlIO_printf(Perl_debug_log,
2696 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2697 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2698 (long)n, (long)cc->min,
2699 (long)cc->max, (long)cc)
2700 );
4633a7c4 2701
a0d0e21e
LW
2702 /* If degenerate scan matches "", assume scan done. */
2703
579cf2c3 2704 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2705 PL_regcc = cc->oldcc;
2ab05381
IZ
2706 if (PL_regcc)
2707 ln = PL_regcc->cur;
c277df42 2708 DEBUG_r(
c3464db5
DD
2709 PerlIO_printf(Perl_debug_log,
2710 "%*s empty match detected, try continuation...\n",
3280af22 2711 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2712 );
a0d0e21e 2713 if (regmatch(cc->next))
4633a7c4 2714 sayYES;
2ab05381
IZ
2715 if (PL_regcc)
2716 PL_regcc->cur = ln;
3280af22 2717 PL_regcc = cc;
4633a7c4 2718 sayNO;
a0d0e21e
LW
2719 }
2720
2721 /* First just match a string of min scans. */
2722
2723 if (n < cc->min) {
2724 cc->cur = n;
2725 cc->lastloc = locinput;
4633a7c4
LW
2726 if (regmatch(cc->scan))
2727 sayYES;
2728 cc->cur = n - 1;
c277df42 2729 cc->lastloc = lastloc;
4633a7c4 2730 sayNO;
a0d0e21e
LW
2731 }
2732
2c2d71f5
JH
2733 if (scan->flags) {
2734 /* Check whether we already were at this position.
2735 Postpone detection until we know the match is not
2736 *that* much linear. */
2737 if (!PL_reg_maxiter) {
2738 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2739 PL_reg_leftiter = PL_reg_maxiter;
2740 }
2741 if (PL_reg_leftiter-- == 0) {
2742 I32 size = (PL_reg_maxiter + 7)/8;
2743 if (PL_reg_poscache) {
2744 if (PL_reg_poscache_size < size) {
2745 Renew(PL_reg_poscache, size, char);
2746 PL_reg_poscache_size = size;
2747 }
2748 Zero(PL_reg_poscache, size, char);
2749 }
2750 else {
2751 PL_reg_poscache_size = size;
2752 Newz(29, PL_reg_poscache, size, char);
2753 }
2754 DEBUG_r(
2755 PerlIO_printf(Perl_debug_log,
2756 "%sDetected a super-linear match, switching on caching%s...\n",
2757 PL_colors[4], PL_colors[5])
2758 );
2759 }
2760 if (PL_reg_leftiter < 0) {
2761 I32 o = locinput - PL_bostr, b;
2762
2763 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2764 b = o % 8;
2765 o /= 8;
2766 if (PL_reg_poscache[o] & (1<<b)) {
2767 DEBUG_r(
2768 PerlIO_printf(Perl_debug_log,
2769 "%*s already tried at this position...\n",
2770 REPORT_CODE_OFF+PL_regindent*2, "")
2771 );
7821416a 2772 sayNO_SILENT;
2c2d71f5
JH
2773 }
2774 PL_reg_poscache[o] |= (1<<b);
2775 }
2776 }
2777
a0d0e21e
LW
2778 /* Prefer next over scan for minimal matching. */
2779
2780 if (cc->minmod) {
3280af22 2781 PL_regcc = cc->oldcc;
2ab05381
IZ
2782 if (PL_regcc)
2783 ln = PL_regcc->cur;
5f05dabc 2784 cp = regcppush(cc->parenfloor);
02db2b7b 2785 REGCP_SET(lastcp);
5f05dabc 2786 if (regmatch(cc->next)) {
c277df42 2787 regcpblow(cp);
4633a7c4 2788 sayYES; /* All done. */
5f05dabc 2789 }
02db2b7b 2790 REGCP_UNWIND(lastcp);
5f05dabc 2791 regcppop();
2ab05381
IZ
2792 if (PL_regcc)
2793 PL_regcc->cur = ln;
3280af22 2794 PL_regcc = cc;
a0d0e21e 2795
c277df42 2796 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2797 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2798 && !(PL_reg_flags & RF_warned)) {
2799 PL_reg_flags |= RF_warned;
e476b1b5 2800 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2801 "Complex regular subexpression recursion",
2802 REG_INFTY - 1);
c277df42 2803 }
4633a7c4 2804 sayNO;
c277df42 2805 }
a687059c 2806
c277df42 2807 DEBUG_r(
c3464db5
DD
2808 PerlIO_printf(Perl_debug_log,
2809 "%*s trying longer...\n",
3280af22 2810 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2811 );
a0d0e21e 2812 /* Try scanning more and see if it helps. */
3280af22 2813 PL_reginput = locinput;
a0d0e21e
LW
2814 cc->cur = n;
2815 cc->lastloc = locinput;
5f05dabc 2816 cp = regcppush(cc->parenfloor);
02db2b7b 2817 REGCP_SET(lastcp);
5f05dabc 2818 if (regmatch(cc->scan)) {
c277df42 2819 regcpblow(cp);
4633a7c4 2820 sayYES;
5f05dabc 2821 }
02db2b7b 2822 REGCP_UNWIND(lastcp);
5f05dabc 2823 regcppop();
4633a7c4 2824 cc->cur = n - 1;
c277df42 2825 cc->lastloc = lastloc;
4633a7c4 2826 sayNO;
a0d0e21e
LW
2827 }
2828
2829 /* Prefer scan over next for maximal matching. */
2830
2831 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2832 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2833 cc->cur = n;
2834 cc->lastloc = locinput;
02db2b7b 2835 REGCP_SET(lastcp);
5f05dabc 2836 if (regmatch(cc->scan)) {
c277df42 2837 regcpblow(cp);
4633a7c4 2838 sayYES;
5f05dabc 2839 }
02db2b7b 2840 REGCP_UNWIND(lastcp);
a0d0e21e 2841 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2842 PL_reginput = locinput;
c277df42 2843 DEBUG_r(
c3464db5
DD
2844 PerlIO_printf(Perl_debug_log,
2845 "%*s failed, try continuation...\n",
3280af22 2846 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2847 );
2848 }
e476b1b5 2849 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2850 && !(PL_reg_flags & RF_warned)) {
3280af22 2851 PL_reg_flags |= RF_warned;
e476b1b5 2852 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2853 "Complex regular subexpression recursion",
2854 REG_INFTY - 1);
a0d0e21e
LW
2855 }
2856
2857 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2858 PL_regcc = cc->oldcc;
2ab05381
IZ
2859 if (PL_regcc)
2860 ln = PL_regcc->cur;
a0d0e21e 2861 if (regmatch(cc->next))
4633a7c4 2862 sayYES;
2ab05381
IZ
2863 if (PL_regcc)
2864 PL_regcc->cur = ln;
3280af22 2865 PL_regcc = cc;
4633a7c4 2866 cc->cur = n - 1;
c277df42 2867 cc->lastloc = lastloc;
4633a7c4 2868 sayNO;
a0d0e21e
LW
2869 }
2870 /* NOT REACHED */
c277df42
IZ
2871 case BRANCHJ:
2872 next = scan + ARG(scan);
2873 if (next == scan)
2874 next = NULL;
2875 inner = NEXTOPER(NEXTOPER(scan));
2876 goto do_branch;
2877 case BRANCH:
2878 inner = NEXTOPER(scan);
2879 do_branch:
2880 {
2881 CHECKPOINT lastcp;
2882 c1 = OP(scan);
2883 if (OP(next) != c1) /* No choice. */
2884 next = inner; /* Avoid recursion. */
a0d0e21e 2885 else {
02db2b7b
IZ
2886 I32 lastparen = *PL_reglastparen;
2887 I32 unwind1;
2888 re_unwind_branch_t *uw;
2889
2890 /* Put unwinding data on stack */
2891 unwind1 = SSNEWt(1,re_unwind_branch_t);
2892 uw = SSPTRt(unwind1,re_unwind_branch_t);
2893 uw->prev = unwind;
2894 unwind = unwind1;
2895 uw->type = ((c1 == BRANCH)
2896 ? RE_UNWIND_BRANCH
2897 : RE_UNWIND_BRANCHJ);
2898 uw->lastparen = lastparen;
2899 uw->next = next;
2900 uw->locinput = locinput;
2901 uw->nextchr = nextchr;
2902#ifdef DEBUGGING
2903 uw->regindent = ++PL_regindent;
2904#endif
c277df42 2905
02db2b7b
IZ
2906 REGCP_SET(uw->lastcp);
2907
2908 /* Now go into the first branch */
2909 next = inner;
a687059c 2910 }
a0d0e21e
LW
2911 }
2912 break;
2913 case MINMOD:
2914 minmod = 1;
2915 break;
c277df42
IZ
2916 case CURLYM:
2917 {
00db4c45 2918 I32 l = 0;
c277df42
IZ
2919 CHECKPOINT lastcp;
2920
2921 /* We suppose that the next guy does not need
2922 backtracking: in particular, it is of constant length,
2923 and has no parenths to influence future backrefs. */
2924 ln = ARG1(scan); /* min to match */
2925 n = ARG2(scan); /* max to match */
c277df42
IZ
2926 paren = scan->flags;
2927 if (paren) {
3280af22
NIS
2928 if (paren > PL_regsize)
2929 PL_regsize = paren;
2930 if (paren > *PL_reglastparen)
2931 *PL_reglastparen = paren;
c277df42 2932 }
dc45a647 2933 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2934 if (paren)
2935 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2936 PL_reginput = locinput;
c277df42
IZ
2937 if (minmod) {
2938 minmod = 0;
2939 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2940 sayNO;
5f4b28b2 2941 if (ln && l == 0 && n >= ln
c277df42
IZ
2942 /* In fact, this is tricky. If paren, then the
2943 fact that we did/didnot match may influence
2944 future execution. */
2945 && !(paren && ln == 0))
2946 ln = n;
3280af22 2947 locinput = PL_reginput;
22c35a8c 2948 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2949 c1 = (U8)*STRING(next);
c277df42 2950 if (OP(next) == EXACTF)
22c35a8c 2951 c2 = PL_fold[c1];
c277df42 2952 else if (OP(next) == EXACTFL)
22c35a8c 2953 c2 = PL_fold_locale[c1];
c277df42
IZ
2954 else
2955 c2 = c1;
a0ed51b3
LW
2956 }
2957 else
c277df42 2958 c1 = c2 = -1000;
02db2b7b 2959 REGCP_SET(lastcp);
5f4b28b2 2960 /* This may be improved if l == 0. */
c277df42
IZ
2961 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2962 /* If it could work, try it. */
2963 if (c1 == -1000 ||
3280af22
NIS
2964 UCHARAT(PL_reginput) == c1 ||
2965 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2966 {
2967 if (paren) {
2968 if (n) {
cf93c79d
IZ
2969 PL_regstartp[paren] =
2970 HOPc(PL_reginput, -l) - PL_bostr;
2971 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2972 }
2973 else
cf93c79d 2974 PL_regendp[paren] = -1;
c277df42
IZ
2975 }
2976 if (regmatch(next))
2977 sayYES;
02db2b7b 2978 REGCP_UNWIND(lastcp);
c277df42
IZ
2979 }
2980 /* Couldn't or didn't -- move forward. */
3280af22 2981 PL_reginput = locinput;
c277df42
IZ
2982 if (regrepeat_hard(scan, 1, &l)) {
2983 ln++;
3280af22 2984 locinput = PL_reginput;
c277df42
IZ
2985 }
2986 else
2987 sayNO;
2988 }
a0ed51b3
LW
2989 }
2990 else {
c277df42
IZ
2991 n = regrepeat_hard(scan, n, &l);
2992 if (n != 0 && l == 0
2993 /* In fact, this is tricky. If paren, then the
2994 fact that we did/didnot match may influence
2995 future execution. */
2996 && !(paren && ln == 0))
2997 ln = n;
3280af22 2998 locinput = PL_reginput;
c277df42 2999 DEBUG_r(
5c0ca799 3000 PerlIO_printf(Perl_debug_log,
faccc32b 3001 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3002 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3003 (IV) n, (IV)l)
c277df42
IZ
3004 );
3005 if (n >= ln) {
22c35a8c 3006 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3007 c1 = (U8)*STRING(next);
c277df42 3008 if (OP(next) == EXACTF)
22c35a8c 3009 c2 = PL_fold[c1];
c277df42 3010 else if (OP(next) == EXACTFL)
22c35a8c 3011 c2 = PL_fold_locale[c1];
c277df42
IZ
3012 else
3013 c2 = c1;
a0ed51b3
LW
3014 }
3015 else
c277df42
IZ
3016 c1 = c2 = -1000;
3017 }
02db2b7b 3018 REGCP_SET(lastcp);
c277df42
IZ
3019 while (n >= ln) {
3020 /* If it could work, try it. */
3021 if (c1 == -1000 ||
3280af22
NIS
3022 UCHARAT(PL_reginput) == c1 ||
3023 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3024 {
3025 DEBUG_r(
c3464db5 3026 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3027 "%*s trying tail with n=%"IVdf"...\n",
3028 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3029 );
3030 if (paren) {
3031 if (n) {
cf93c79d
IZ
3032 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3033 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3034 }
a0ed51b3 3035 else
cf93c79d 3036 PL_regendp[paren] = -1;
c277df42 3037 }
a0ed51b3
LW
3038 if (regmatch(next))
3039 sayYES;
02db2b7b 3040 REGCP_UNWIND(lastcp);
a0ed51b3 3041 }
c277df42
IZ
3042 /* Couldn't or didn't -- back up. */
3043 n--;
dfe13c55 3044 locinput = HOPc(locinput, -l);
3280af22 3045 PL_reginput = locinput;
c277df42
IZ
3046 }
3047 }
3048 sayNO;
3049 break;
3050 }
3051 case CURLYN:
3052 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3053 if (paren > PL_regsize)
3054 PL_regsize = paren;
3055 if (paren > *PL_reglastparen)
3056 *PL_reglastparen = paren;
c277df42
IZ
3057 ln = ARG1(scan); /* min to match */
3058 n = ARG2(scan); /* max to match */
dc45a647 3059 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3060 goto repeat;
a0d0e21e 3061 case CURLY:
c277df42 3062 paren = 0;
a0d0e21e
LW
3063 ln = ARG1(scan); /* min to match */
3064 n = ARG2(scan); /* max to match */
dc45a647 3065 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3066 goto repeat;
3067 case STAR:
3068 ln = 0;
c277df42 3069 n = REG_INFTY;
a0d0e21e 3070 scan = NEXTOPER(scan);
c277df42 3071 paren = 0;
a0d0e21e
LW
3072 goto repeat;
3073 case PLUS:
c277df42
IZ
3074 ln = 1;
3075 n = REG_INFTY;
3076 scan = NEXTOPER(scan);
3077 paren = 0;
3078 repeat:
a0d0e21e
LW
3079 /*
3080 * Lookahead to avoid useless match attempts
3081 * when we know what character comes next.
3082 */
22c35a8c 3083 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3084 c1 = (U8)*STRING(next);
bbce6d69 3085 if (OP(next) == EXACTF)
22c35a8c 3086 c2 = PL_fold[c1];
bbce6d69 3087 else if (OP(next) == EXACTFL)
22c35a8c 3088 c2 = PL_fold_locale[c1];
bbce6d69 3089 else
3090 c2 = c1;
3091 }
a0d0e21e 3092 else
bbce6d69 3093 c1 = c2 = -1000;
3280af22 3094 PL_reginput = locinput;
a0d0e21e 3095 if (minmod) {
c277df42 3096 CHECKPOINT lastcp;
a0d0e21e
LW
3097 minmod = 0;
3098 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3099 sayNO;
a0ed51b3 3100 locinput = PL_reginput;
02db2b7b 3101 REGCP_SET(lastcp);
0fe9bf95
IZ
3102 if (c1 != -1000) {
3103 char *e = locinput + n - ln; /* Should not check after this */
3104 char *old = locinput;
3105
3106 if (e >= PL_regeol || (n == REG_INFTY))
3107 e = PL_regeol - 1;
3108 while (1) {
3109 /* Find place 'next' could work */
3110 if (c1 == c2) {
3111 while (locinput <= e && *locinput != c1)
3112 locinput++;
3113 } else {
3114 while (locinput <= e
3115 && *locinput != c1
3116 && *locinput != c2)
3117 locinput++;
3118 }
3119 if (locinput > e)
3120 sayNO;
3121 /* PL_reginput == old now */
3122 if (locinput != old) {
3123 ln = 1; /* Did some */
3124 if (regrepeat(scan, locinput - old) <
3125 locinput - old)
3126 sayNO;
3127 }
3128 /* PL_reginput == locinput now */
29d1e993 3129 TRYPAREN(paren, ln, locinput);
0fe9bf95 3130 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3131 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3132 /* Couldn't or didn't -- move forward. */
3133 old = locinput++;
3134 }
3135 }
3136 else
c277df42 3137 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3138 /* If it could work, try it. */
bbce6d69 3139 if (c1 == -1000 ||
3280af22
NIS
3140 UCHARAT(PL_reginput) == c1 ||
3141 UCHARAT(PL_reginput) == c2)
bbce6d69 3142 {
29d1e993 3143 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3144 REGCP_UNWIND(lastcp);
bbce6d69 3145 }
c277df42 3146 /* Couldn't or didn't -- move forward. */
a0ed51b3 3147 PL_reginput = locinput;
a0d0e21e
LW
3148 if (regrepeat(scan, 1)) {
3149 ln++;
a0ed51b3
LW
3150 locinput = PL_reginput;
3151 }
3152 else
4633a7c4 3153 sayNO;
a0d0e21e
LW
3154 }
3155 }
3156 else {
c277df42 3157 CHECKPOINT lastcp;
a0d0e21e 3158 n = regrepeat(scan, n);
a0ed51b3 3159 locinput = PL_reginput;
22c35a8c 3160 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3161 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3162 ln = n; /* why back off? */
1aeab75a
GS
3163 /* ...because $ and \Z can match before *and* after
3164 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3165 We should back off by one in this case. */
3166 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3167 ln--;
3168 }
02db2b7b 3169 REGCP_SET(lastcp);
c277df42
IZ
3170 if (paren) {
3171 while (n >= ln) {
3172 /* If it could work, try it. */
3173 if (c1 == -1000 ||
3280af22
NIS
3174 UCHARAT(PL_reginput) == c1 ||
3175 UCHARAT(PL_reginput) == c2)
c277df42 3176 {
29d1e993 3177 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3178 REGCP_UNWIND(lastcp);
c277df42
IZ
3179 }
3180 /* Couldn't or didn't -- back up. */
3181 n--;
dfe13c55 3182 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3183 }
a0ed51b3
LW
3184 }
3185 else {
c277df42
IZ
3186 while (n >= ln) {
3187 /* If it could work, try it. */
3188 if (c1 == -1000 ||
3280af22
NIS
3189 UCHARAT(PL_reginput) == c1 ||
3190 UCHARAT(PL_reginput) == c2)
c277df42 3191 {
29d1e993 3192 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3193 REGCP_UNWIND(lastcp);
c277df42
IZ
3194 }
3195 /* Couldn't or didn't -- back up. */
3196 n--;
dfe13c55 3197 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3198 }
a0d0e21e
LW
3199 }
3200 }
4633a7c4 3201 sayNO;
c277df42 3202 break;
a0d0e21e 3203 case END:
0f5d15d6
IZ
3204 if (PL_reg_call_cc) {
3205 re_cc_state *cur_call_cc = PL_reg_call_cc;
3206 CURCUR *cctmp = PL_regcc;
3207 regexp *re = PL_reg_re;
3208 CHECKPOINT cp, lastcp;
3209
3210 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3211 REGCP_SET(lastcp);
0f5d15d6
IZ
3212 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3213 the caller. */
3214 PL_reginput = locinput; /* Make position available to
3215 the callcc. */
3216 cache_re(PL_reg_call_cc->re);
3217 PL_regcc = PL_reg_call_cc->cc;
3218 PL_reg_call_cc = PL_reg_call_cc->prev;
3219 if (regmatch(cur_call_cc->node)) {
3220 PL_reg_call_cc = cur_call_cc;
3221 regcpblow(cp);
3222 sayYES;
3223 }
02db2b7b 3224 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3225 regcppop();
3226 PL_reg_call_cc = cur_call_cc;
3227 PL_regcc = cctmp;
3228 PL_reg_re = re;
3229 cache_re(re);
3230
3231 DEBUG_r(
3232 PerlIO_printf(Perl_debug_log,
3233 "%*s continuation failed...\n",
3234 REPORT_CODE_OFF+PL_regindent*2, "")
3235 );
7821416a 3236 sayNO_SILENT;
0f5d15d6 3237 }
7821416a
IZ
3238 if (locinput < PL_regtill) {
3239 DEBUG_r(PerlIO_printf(Perl_debug_log,
3240 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3241 PL_colors[4],
3242 (long)(locinput - PL_reg_starttry),
3243 (long)(PL_regtill - PL_reg_starttry),
3244 PL_colors[5]));
3245 sayNO_FINAL; /* Cannot match: too short. */
3246 }
3247 PL_reginput = locinput; /* put where regtry can find it */
3248 sayYES_FINAL; /* Success! */
7e5428c5 3249 case SUCCEED:
3280af22 3250 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3251 sayYES_LOUD; /* Success! */
c277df42
IZ
3252 case SUSPEND:
3253 n = 1;
9fe1d20c 3254 PL_reginput = locinput;
c277df42 3255 goto do_ifmatch;
a0d0e21e 3256 case UNLESSM:
c277df42 3257 n = 0;
a0ed51b3 3258 if (scan->flags) {
0fe9bf95
IZ
3259 if (UTF) { /* XXXX This is absolutely
3260 broken, we read before
3261 start of string. */
3262 s = HOPMAYBEc(locinput, -scan->flags);
3263 if (!s)
3264 goto say_yes;
3265 PL_reginput = s;
3266 }
3267 else {
3268 if (locinput < PL_bostr + scan->flags)
3269 goto say_yes;
3270 PL_reginput = locinput - scan->flags;
3271 goto do_ifmatch;
3272 }
a0ed51b3
LW
3273 }
3274 else
3275 PL_reginput = locinput;
c277df42
IZ
3276 goto do_ifmatch;
3277 case IFMATCH:
3278 n = 1;
a0ed51b3 3279 if (scan->flags) {
0fe9bf95
IZ
3280 if (UTF) { /* XXXX This is absolutely
3281 broken, we read before
3282 start of string. */
3283 s = HOPMAYBEc(locinput, -scan->flags);
3284 if (!s || s < PL_bostr)
3285 goto say_no;
3286 PL_reginput = s;
3287 }
3288 else {
3289 if (locinput < PL_bostr + scan->flags)
3290 goto say_no;
3291 PL_reginput = locinput - scan->flags;
3292 goto do_ifmatch;
3293 }
a0ed51b3
LW
3294 }
3295 else
3296 PL_reginput = locinput;
3297
c277df42 3298 do_ifmatch:
c277df42
IZ
3299 inner = NEXTOPER(NEXTOPER(scan));
3300 if (regmatch(inner) != n) {
3301 say_no:
3302 if (logical) {
3303 logical = 0;
3304 sw = 0;
3305 goto do_longjump;
a0ed51b3
LW
3306 }
3307 else
c277df42
IZ
3308 sayNO;
3309 }
3310 say_yes:
3311 if (logical) {
3312 logical = 0;
3313 sw = 1;
3314 }
fe44a5e8 3315 if (OP(scan) == SUSPEND) {
3280af22 3316 locinput = PL_reginput;
565764a8 3317 nextchr = UCHARAT(locinput);
fe44a5e8 3318 }
c277df42
IZ
3319 /* FALL THROUGH. */
3320 case LONGJMP:
3321 do_longjump:
3322 next = scan + ARG(scan);
3323 if (next == scan)
3324 next = NULL;
a0d0e21e
LW
3325 break;
3326 default:
b900a521 3327 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3328 PTR2UV(scan), OP(scan));
cea2e8a9 3329 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3330 }
02db2b7b 3331 reenter:
a0d0e21e
LW
3332 scan = next;
3333 }
a687059c 3334
a0d0e21e
LW
3335 /*
3336 * We get here only if there's trouble -- normally "case END" is
3337 * the terminating point.
3338 */
cea2e8a9 3339 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3340 /*NOTREACHED*/
4633a7c4
LW
3341 sayNO;
3342
7821416a
IZ
3343yes_loud:
3344 DEBUG_r(
3345 PerlIO_printf(Perl_debug_log,
3346 "%*s %scould match...%s\n",
3347 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3348 );
3349 goto yes;
3350yes_final:
3351 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3352 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3353yes:
3354#ifdef DEBUGGING
3280af22 3355 PL_regindent--;
4633a7c4 3356#endif
02db2b7b
IZ
3357
3358#if 0 /* Breaks $^R */
3359 if (unwind)
3360 regcpblow(firstcp);
3361#endif
4633a7c4
LW
3362 return 1;
3363
3364no:
7821416a
IZ
3365 DEBUG_r(
3366 PerlIO_printf(Perl_debug_log,
3367 "%*s %sfailed...%s\n",
3368 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3369 );
3370 goto do_no;
3371no_final:
3372do_no:
02db2b7b
IZ
3373 if (unwind) {
3374 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3375
3376 switch (uw->type) {
3377 case RE_UNWIND_BRANCH:
3378 case RE_UNWIND_BRANCHJ:
3379 {
3380 re_unwind_branch_t *uwb = &(uw->branch);
3381 I32 lastparen = uwb->lastparen;
3382
3383 REGCP_UNWIND(uwb->lastcp);
3384 for (n = *PL_reglastparen; n > lastparen; n--)
3385 PL_regendp[n] = -1;
3386 *PL_reglastparen = n;
3387 scan = next = uwb->next;
3388 if ( !scan ||
3389 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3390 ? BRANCH : BRANCHJ) ) { /* Failure */
3391 unwind = uwb->prev;
3392#ifdef DEBUGGING
3393 PL_regindent--;
3394#endif
3395 goto do_no;
3396 }
3397 /* Have more choice yet. Reuse the same uwb. */
3398 /*SUPPRESS 560*/
3399 if ((n = (uwb->type == RE_UNWIND_BRANCH
3400 ? NEXT_OFF(next) : ARG(next))))
3401 next += n;
3402 else
3403 next = NULL; /* XXXX Needn't unwinding in this case... */
3404 uwb->next = next;
3405 next = NEXTOPER(scan);
3406 if (uwb->type == RE_UNWIND_BRANCHJ)
3407 next = NEXTOPER(next);
3408 locinput = uwb->locinput;
3409 nextchr = uwb->nextchr;
3410#ifdef DEBUGGING
3411 PL_regindent = uwb->regindent;
3412#endif
3413
3414 goto reenter;
3415 }
3416 /* NOT REACHED */
3417 default:
3418 Perl_croak(aTHX_ "regexp unwind memory corruption");
3419 }
3420 /* NOT REACHED */
3421 }
4633a7c4 3422#ifdef DEBUGGING
3280af22 3423 PL_regindent--;
4633a7c4 3424#endif
a0d0e21e 3425 return 0;
a687059c
LW
3426}
3427
3428/*
3429 - regrepeat - repeatedly match something simple, report how many
3430 */
3431/*
3432 * [This routine now assumes that it will only match on things of length 1.
3433 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3434 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3435 */
76e3520e 3436STATIC I32
cea2e8a9 3437S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3438{
5c0ca799 3439 dTHR;
a0d0e21e 3440 register char *scan;
a0d0e21e 3441 register I32 c;
3280af22 3442 register char *loceol = PL_regeol;
a0ed51b3 3443 register I32 hardcount = 0;
a0d0e21e 3444
3280af22 3445 scan = PL_reginput;
c277df42 3446 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3447 loceol = scan + max;
a0d0e21e 3448 switch (OP(p)) {
22c35a8c 3449 case REG_ANY:
a0d0e21e
LW
3450 while (scan < loceol && *scan != '\n')
3451 scan++;
3452 break;
3453 case SANY:
3454 scan = loceol;
3455 break;
a0ed51b3
LW
3456 case ANYUTF8:
3457 loceol = PL_regeol;
3458 while (scan < loceol && *scan != '\n') {
3459 scan += UTF8SKIP(scan);
3460 hardcount++;
3461 }
3462 break;
3463 case SANYUTF8:
3464 loceol = PL_regeol;
3465 while (scan < loceol) {
3466 scan += UTF8SKIP(scan);
3467 hardcount++;
3468 }
3469 break;
bbce6d69 3470 case EXACT: /* length of string is 1 */
cd439c50 3471 c = (U8)*STRING(p);
bbce6d69 3472 while (scan < loceol && UCHARAT(scan) == c)
3473 scan++;
3474 break;
3475 case EXACTF: /* length of string is 1 */
cd439c50 3476 c = (U8)*STRING(p);
bbce6d69 3477 while (scan < loceol &&
22c35a8c 3478 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3479 scan++;
3480 break;
3481 case EXACTFL: /* length of string is 1 */
3280af22 3482 PL_reg_flags |= RF_tainted;
cd439c50 3483 c = (U8)*STRING(p);
bbce6d69 3484 while (scan < loceol &&
22c35a8c 3485 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3486 scan++;
3487 break;
a0ed51b3
LW
3488 case ANYOFUTF8:
3489 loceol = PL_regeol;
3490 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3491 scan += UTF8SKIP(scan);
3492 hardcount++;
3493 }
3494 break;
a0d0e21e 3495 case ANYOF:
936ed897 3496 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3497 scan++;
a0d0e21e
LW
3498 break;
3499 case ALNUM:
3500 while (scan < loceol && isALNUM(*scan))
3501 scan++;
3502 break;
a0ed51b3
LW
3503 case ALNUMUTF8:
3504 loceol = PL_regeol;
dfe13c55 3505 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3506 scan += UTF8SKIP(scan);
3507 hardcount++;
3508 }
3509 break;
bbce6d69 3510 case ALNUML:
3280af22 3511 PL_reg_flags |= RF_tainted;
bbce6d69 3512 while (scan < loceol && isALNUM_LC(*scan))
3513 scan++;
3514 break;
a0ed51b3
LW
3515 case ALNUMLUTF8:
3516 PL_reg_flags |= RF_tainted;
3517 loceol = PL_regeol;
dfe13c55 3518 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3519 scan += UTF8SKIP(scan);
3520 hardcount++;
3521 }
3522 break;
3523 break;
a0d0e21e
LW
3524 case NALNUM:
3525 while (scan < loceol && !isALNUM(*scan))
3526 scan++;
3527 break;
a0ed51b3
LW
3528 case NALNUMUTF8:
3529 loceol = PL_regeol;
dfe13c55 3530 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3531 scan += UTF8SKIP(scan);
3532 hardcount++;
3533 }
3534 break;
bbce6d69 3535 case NALNUML:
3280af22 3536 PL_reg_flags |= RF_tainted;
bbce6d69 3537 while (scan < loceol && !isALNUM_LC(*scan))
3538 scan++;
3539 break;
a0ed51b3
LW
3540 case NALNUMLUTF8:
3541 PL_reg_flags |= RF_tainted;
3542 loceol = PL_regeol;
dfe13c55 3543 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3544 scan += UTF8SKIP(scan);
3545 hardcount++;
3546 }
3547 break;
a0d0e21e
LW
3548 case SPACE:
3549 while (scan < loceol && isSPACE(*scan))
3550 scan++;
3551 break;
a0ed51b3
LW
3552 case SPACEUTF8:
3553 loceol = PL_regeol;
dfe13c55 3554 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3555 scan += UTF8SKIP(scan);
3556 hardcount++;
3557 }
3558 break;
bbce6d69 3559 case SPACEL:
3280af22 3560 PL_reg_flags |= RF_tainted;
bbce6d69 3561 while (scan < loceol && isSPACE_LC(*scan))
3562 scan++;
3563 break;
a0ed51b3
LW
3564 case SPACELUTF8:
3565 PL_reg_flags |= RF_tainted;
3566 loceol = PL_regeol;
dfe13c55 3567 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3568 scan += UTF8SKIP(scan);
3569 hardcount++;
3570 }
3571 break;
a0d0e21e
LW
3572 case NSPACE:
3573 while (scan < loceol && !isSPACE(*scan))
3574 scan++;
3575 break;
a0ed51b3
LW
3576 case NSPACEUTF8:
3577 loceol = PL_regeol;
dfe13c55 3578 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3579 scan += UTF8SKIP(scan);
3580 hardcount++;
3581 }
3582 break;
bbce6d69 3583 case NSPACEL:
3280af22 3584 PL_reg_flags |= RF_tainted;
bbce6d69 3585 while (scan < loceol && !isSPACE_LC(*scan))
3586 scan++;
3587 break;
a0ed51b3
LW
3588 case NSPACELUTF8:
3589 PL_reg_flags |= RF_tainted;
3590 loceol = PL_regeol;
dfe13c55 3591 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3592 scan += UTF8SKIP(scan);
3593 hardcount++;
3594 }
3595 break;
a0d0e21e
LW
3596 case DIGIT:
3597 while (scan < loceol && isDIGIT(*scan))
3598 scan++;
3599 break;
a0ed51b3
LW
3600 case DIGITUTF8:
3601 loceol = PL_regeol;
dfe13c55 3602 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3603 scan += UTF8SKIP(scan);
3604 hardcount++;
3605 }
3606 break;
3607 break;
a0d0e21e
LW
3608 case NDIGIT:
3609 while (scan < loceol && !isDIGIT(*scan))
3610 scan++;
3611 break;
a0ed51b3
LW
3612 case NDIGITUTF8:
3613 loceol = PL_regeol;
dfe13c55 3614 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3615 scan += UTF8SKIP(scan);
3616 hardcount++;
3617 }
3618 break;
a0d0e21e
LW
3619 default: /* Called on something of 0 width. */
3620 break; /* So match right here or not at all. */
3621 }
a687059c 3622
a0ed51b3
LW
3623 if (hardcount)
3624 c = hardcount;
3625 else
3626 c = scan - PL_reginput;
3280af22 3627 PL_reginput = scan;
a687059c 3628
c277df42
IZ
3629 DEBUG_r(
3630 {
3631 SV *prop = sv_newmortal();
3632
3633 regprop(prop, p);
3634 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3635 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3636 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3637 });
3638
a0d0e21e 3639 return(c);
a687059c
LW
3640}
3641
3642/*
c277df42
IZ
3643 - regrepeat_hard - repeatedly match something, report total lenth and length
3644 *
3645 * The repeater is supposed to have constant length.
3646 */
3647
76e3520e 3648STATIC I32
cea2e8a9 3649S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3650{
5c0ca799 3651 dTHR;
c277df42
IZ
3652 register char *scan;
3653 register char *start;
3280af22 3654 register char *loceol = PL_regeol;
a0ed51b3 3655 I32 l = 0;
708e3b05 3656 I32 count = 0, res = 1;
a0ed51b3
LW
3657
3658 if (!max)
3659 return 0;
c277df42 3660
3280af22 3661 start = PL_reginput;
a0ed51b3 3662 if (UTF) {
708e3b05 3663 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3664 if (!count++) {
3665 l = 0;
3666 while (start < PL_reginput) {
3667 l++;
3668 start += UTF8SKIP(start);
3669 }
3670 *lp = l;
3671 if (l == 0)
3672 return max;
3673 }
3674 if (count == max)
3675 return count;
3676 }
3677 }
3678 else {
708e3b05 3679 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3680 if (!count++) {
3681 *lp = l = PL_reginput - start;
3682 if (max != REG_INFTY && l*max < loceol - scan)
3683 loceol = scan + l*max;
3684 if (l == 0)
3685 return max;
c277df42
IZ
3686 }
3687 }
3688 }
708e3b05 3689 if (!res)
3280af22 3690 PL_reginput = scan;
c277df42 3691
a0ed51b3 3692 return count;
c277df42
IZ
3693}
3694
3695/*
cb8d8820 3696 - reginclass - determine if a character falls into a character class
bbce6d69 3697 */
3698
76e3520e 3699STATIC bool
936ed897 3700S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3701{
5c0ca799 3702 dTHR;
b8c5462f 3703 char flags = ANYOF_FLAGS(p);
bbce6d69 3704 bool match = FALSE;
3705
3706 c &= 0xFF;
b8c5462f 3707 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3708 match = TRUE;
3709 else if (flags & ANYOF_FOLD) {
3710 I32 cf;
3711 if (flags & ANYOF_LOCALE) {
3280af22 3712 PL_reg_flags |= RF_tainted;
22c35a8c 3713 cf = PL_fold_locale[c];
bbce6d69 3714 }
3715 else
22c35a8c 3716 cf = PL_fold[c];
b8c5462f 3717 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3718 match = TRUE;
3719 }
3720
b8c5462f 3721 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3722 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3723 if (
3724 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3725 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3726 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3727 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3728 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3729 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3730 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3731 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3732 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3733 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3734 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3735 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3736 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3737 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3738 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3739 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3740 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3741 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3742 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3743 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3744 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3745 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3746 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3747 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3748 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3749 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3753 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3754 ) /* How's that for a conditional? */
bbce6d69 3755 {
3756 match = TRUE;
3757 }
3758 }
3759
ae5c130c 3760 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3761}
3762
a0ed51b3 3763STATIC bool
cea2e8a9 3764S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3765{
3766 dTHR;
a0ed51b3
LW
3767 char flags = ARG1(f);
3768 bool match = FALSE;
3769 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3770
3771 if (swash_fetch(sv, p))
3772 match = TRUE;
3773 else if (flags & ANYOF_FOLD) {
806e7201 3774 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3775 if (flags & ANYOF_LOCALE) {
3776 PL_reg_flags |= RF_tainted;
3777 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3778 }
3779 else
3780 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3781 if (swash_fetch(sv, tmpbuf))
3782 match = TRUE;
3783 }
3784
b8c5462f 3785 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3786
3787 return (flags & ANYOF_INVERT) ? !match : match;
3788}
161b471a 3789
dfe13c55 3790STATIC U8 *
cea2e8a9 3791S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3792{
3793 dTHR;
a0ed51b3
LW
3794 if (off >= 0) {
3795 while (off-- && s < (U8*)PL_regeol)
3796 s += UTF8SKIP(s);
3797 }
3798 else {
3799 while (off++) {
3800 if (s > (U8*)PL_bostr) {
3801 s--;
3802 if (*s & 0x80) {
3803 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3804 s--;
3805 } /* XXX could check well-formedness here */
3806 }
3807 }
3808 }
3809 return s;
3810}
161b471a 3811
dfe13c55 3812STATIC U8 *
cea2e8a9 3813S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3814{
c485e607 3815 dTHR;
a0ed51b3
LW
3816 if (off >= 0) {
3817 while (off-- && s < (U8*)PL_regeol)
3818 s += UTF8SKIP(s);
3819 if (off >= 0)
3820 return 0;
3821 }
3822 else {
3823 while (off++) {
3824 if (s > (U8*)PL_bostr) {
3825 s--;
3826 if (*s & 0x80) {
3827 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3828 s--;
3829 } /* XXX could check well-formedness here */
3830 }
3831 else
3832 break;
3833 }
3834 if (off <= 0)
3835 return 0;
3836 }
3837 return s;
3838}
51371543
GS
3839
3840#ifdef PERL_OBJECT
51371543
GS
3841#include "XSUB.h"
3842#endif
3843
3844static void
3845restore_pos(pTHXo_ void *arg)
3846{
3847 dTHR;
3848 if (PL_reg_eval_set) {
3849 if (PL_reg_oldsaved) {
3850 PL_reg_re->subbeg = PL_reg_oldsaved;
3851 PL_reg_re->sublen = PL_reg_oldsavedlen;
3852 RX_MATCH_COPIED_on(PL_reg_re);
3853 }
3854 PL_reg_magic->mg_len = PL_reg_oldpos;
3855 PL_reg_eval_set = 0;
3856 PL_curpm = PL_reg_oldcurpm;
3857 }
3858}