This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Adjust the docs to agree with #7875.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
02db2b7b 148# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 150 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 151
02db2b7b 152# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b 154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 155 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
02db2b7b 222#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
a1933d95 328 char *strbeg;
cad2e5aa
JH
329 char *t;
330 I32 ml_anch;
2c2d71f5 331 char *tmp;
6eb5f6b9
JH
332 register char *other_last = Nullch; /* other substr checked before this */
333 char *check_at; /* check substr found at this pos */
30944b6d
IZ
334#ifdef DEBUGGING
335 char *i_strpos = strpos;
336#endif
cad2e5aa
JH
337
338 DEBUG_r( if (!PL_colorset) reginitcolors() );
339 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 340 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
341 PL_colors[4],PL_colors[5],PL_colors[0],
342 prog->precomp,
343 PL_colors[1],
344 (strlen(prog->precomp) > 60 ? "..." : ""),
345 PL_colors[0],
b900a521 346 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
347 strpos, PL_colors[1],
348 (strend - strpos > 60 ? "..." : ""))
349 );
350
2c2d71f5
JH
351 if (prog->minlen > strend - strpos) {
352 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
a1933d95 355 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 356 check = prog->check_substr;
2c2d71f5 357 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
358 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
359 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 360 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 361
5675c1a6
IZ
362 if (!ml_anch) {
363 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
364 /* SvCUR is not set on references: SvRV and SvPVX overlap */
365 && sv && !SvROK(sv)
a1933d95 366 && (strpos != strbeg)) {
5675c1a6
IZ
367 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
368 goto fail;
369 }
370 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 371 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
372 I32 slen;
373
adac82c7 374 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 375 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
376 if (SvTAIL(check)) {
377 slen = SvCUR(check); /* >= 1 */
cad2e5aa 378
2c2d71f5
JH
379 if ( strend - s > slen || strend - s < slen - 1
380 || (strend - s == slen && strend[-1] != '\n')) {
381 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
382 goto fail_finish;
cad2e5aa
JH
383 }
384 /* Now should match s[0..slen-2] */
385 slen--;
653099ff 386 if (slen && (*SvPVX(check) != *s
cad2e5aa 387 || (slen > 1
653099ff 388 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
389 report_neq:
390 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
391 goto fail_finish;
392 }
cad2e5aa 393 }
653099ff
GS
394 else if (*SvPVX(check) != *s
395 || ((slen = SvCUR(check)) > 1
396 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
397 goto report_neq;
398 goto success_at_start;
5675c1a6 399 }
cad2e5aa 400 }
2c2d71f5 401 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 402 s = strpos;
2c2d71f5 403 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 404 end_shift = prog->minlen - start_shift -
653099ff 405 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 406 if (!ml_anch) {
653099ff
GS
407 I32 end = prog->check_offset_max + CHR_SVLEN(check)
408 - (SvTAIL(check) != 0);
2c2d71f5
JH
409 I32 eshift = strend - s - end;
410
411 if (end_shift < eshift)
412 end_shift = eshift;
413 }
cad2e5aa 414 }
2c2d71f5 415 else { /* Can match at random position */
cad2e5aa
JH
416 ml_anch = 0;
417 s = strpos;
2c2d71f5
JH
418 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
419 /* Should be nonnegative! */
420 end_shift = prog->minlen - start_shift -
653099ff 421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
422 }
423
2c2d71f5 424#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 425 if (end_shift < 0)
6bbae5e6 426 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
427#endif
428
2c2d71f5
JH
429 restart:
430 /* Find a possible match in the region s..strend by looking for
431 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 432 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
433 I32 p = -1; /* Internal iterator of scream. */
434 I32 *pp = data ? data->scream_pos : &p;
435
2c2d71f5
JH
436 if (PL_screamfirst[BmRARE(check)] >= 0
437 || ( BmRARE(check) == '\n'
438 && (BmPREVIOUS(check) == SvCUR(check) - 1)
439 && SvTAIL(check) ))
440 s = screaminstr(sv, check,
441 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 442 else
2c2d71f5 443 goto fail_finish;
cad2e5aa
JH
444 if (data)
445 *data->scream_olds = s;
446 }
447 else
448 s = fbm_instr((unsigned char*)s + start_shift,
449 (unsigned char*)strend - end_shift,
2c2d71f5 450 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
451
452 /* Update the count-of-usability, remove useless subpatterns,
453 unshift s. */
2c2d71f5
JH
454
455 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
456 (s ? "Found" : "Did not find"),
457 ((check == prog->anchored_substr) ? "anchored" : "floating"),
458 PL_colors[0],
7b0972df
JH
459 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
460 SvPVX(check),
2c2d71f5
JH
461 PL_colors[1], (SvTAIL(check) ? "$" : ""),
462 (s ? " at offset " : "...\n") ) );
463
464 if (!s)
465 goto fail_finish;
466
6eb5f6b9
JH
467 check_at = s;
468
2c2d71f5 469 /* Finish the diagnostic message */
30944b6d 470 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
471
472 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
473 Start with the other substr.
474 XXXX no SCREAM optimization yet - and a very coarse implementation
475 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
476 *always* match. Probably should be marked during compile...
477 Probably it is right to do no SCREAM here...
478 */
479
480 if (prog->float_substr && prog->anchored_substr) {
30944b6d 481 /* Take into account the "other" substring. */
2c2d71f5
JH
482 /* XXXX May be hopelessly wrong for UTF... */
483 if (!other_last)
6eb5f6b9 484 other_last = strpos;
2c2d71f5 485 if (check == prog->float_substr) {
30944b6d
IZ
486 do_other_anchored:
487 {
2c2d71f5
JH
488 char *last = s - start_shift, *last1, *last2;
489 char *s1 = s;
490
491 tmp = PL_bostr;
492 t = s - prog->check_offset_max;
493 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
494 && (!(prog->reganch & ROPT_UTF8)
495 || (PL_bostr = strpos, /* Used in regcopmaybe() */
496 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
497 && t > strpos)))
30944b6d 498 /* EMPTY */;
2c2d71f5
JH
499 else
500 t = strpos;
501 t += prog->anchored_offset;
6eb5f6b9
JH
502 if (t < other_last) /* These positions already checked */
503 t = other_last;
2c2d71f5
JH
504 PL_bostr = tmp;
505 last2 = last1 = strend - prog->minlen;
506 if (last < last1)
507 last1 = last;
508 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
509 /* On end-of-str: see comment below. */
510 s = fbm_instr((unsigned char*)t,
511 (unsigned char*)last1 + prog->anchored_offset
512 + SvCUR(prog->anchored_substr)
513 - (SvTAIL(prog->anchored_substr)!=0),
514 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
515 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
516 (s ? "Found" : "Contradicts"),
517 PL_colors[0],
7b0972df
JH
518 (int)(SvCUR(prog->anchored_substr)
519 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
520 SvPVX(prog->anchored_substr),
521 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
522 if (!s) {
523 if (last1 >= last2) {
524 DEBUG_r(PerlIO_printf(Perl_debug_log,
525 ", giving up...\n"));
526 goto fail_finish;
527 }
528 DEBUG_r(PerlIO_printf(Perl_debug_log,
529 ", trying floating at offset %ld...\n",
30944b6d 530 (long)(s1 + 1 - i_strpos)));
2c2d71f5 531 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 532 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
533 s = HOPc(last, 1);
534 goto restart;
535 }
536 else {
537 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 538 (long)(s - i_strpos)));
2c2d71f5 539 t = s - prog->anchored_offset;
6eb5f6b9 540 other_last = s + 1;
30944b6d 541 s = s1;
2c2d71f5
JH
542 if (t == strpos)
543 goto try_at_start;
2c2d71f5
JH
544 goto try_at_offset;
545 }
30944b6d 546 }
2c2d71f5
JH
547 }
548 else { /* Take into account the floating substring. */
549 char *last, *last1;
550 char *s1 = s;
551
552 t = s - start_shift;
553 last1 = last = strend - prog->minlen + prog->float_min_offset;
554 if (last - t > prog->float_max_offset)
555 last = t + prog->float_max_offset;
556 s = t + prog->float_min_offset;
6eb5f6b9
JH
557 if (s < other_last)
558 s = other_last;
2c2d71f5
JH
559 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
560 /* fbm_instr() takes into account exact value of end-of-str
561 if the check is SvTAIL(ed). Since false positives are OK,
562 and end-of-str is not later than strend we are OK. */
563 s = fbm_instr((unsigned char*)s,
564 (unsigned char*)last + SvCUR(prog->float_substr)
565 - (SvTAIL(prog->float_substr)!=0),
566 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
567 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
568 (s ? "Found" : "Contradicts"),
569 PL_colors[0],
7b0972df
JH
570 (int)(SvCUR(prog->float_substr)
571 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
572 SvPVX(prog->float_substr),
573 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
574 if (!s) {
575 if (last1 == last) {
576 DEBUG_r(PerlIO_printf(Perl_debug_log,
577 ", giving up...\n"));
578 goto fail_finish;
579 }
580 DEBUG_r(PerlIO_printf(Perl_debug_log,
581 ", trying anchored starting at offset %ld...\n",
30944b6d 582 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 583 other_last = last + 1;
2c2d71f5
JH
584 PL_regeol = strend; /* Used in HOP() */
585 s = HOPc(t, 1);
586 goto restart;
587 }
588 else {
589 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 590 (long)(s - i_strpos)));
6eb5f6b9 591 other_last = s + 1;
30944b6d 592 s = s1;
2c2d71f5
JH
593 if (t == strpos)
594 goto try_at_start;
2c2d71f5
JH
595 goto try_at_offset;
596 }
597 }
cad2e5aa 598 }
2c2d71f5
JH
599
600 t = s - prog->check_offset_max;
601 tmp = PL_bostr;
602 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
603 && (!(prog->reganch & ROPT_UTF8)
604 || (PL_bostr = strpos, /* Used in regcopmaybe() */
605 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
606 && t > strpos)))) {
607 PL_bostr = tmp;
608 /* Fixed substring is found far enough so that the match
609 cannot start at strpos. */
610 try_at_offset:
cad2e5aa 611 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
612 /* Eventually fbm_*() should handle this, but often
613 anchored_offset is not 0, so this check will not be wasted. */
614 /* XXXX In the code below we prefer to look for "^" even in
615 presence of anchored substrings. And we search even
616 beyond the found float position. These pessimizations
617 are historical artefacts only. */
618 find_anchor:
2c2d71f5 619 while (t < strend - prog->minlen) {
cad2e5aa 620 if (*t == '\n') {
4ee3650e 621 if (t < check_at - prog->check_offset_min) {
30944b6d 622 if (prog->anchored_substr) {
4ee3650e
GS
623 /* Since we moved from the found position,
624 we definitely contradict the found anchored
30944b6d
IZ
625 substr. Due to the above check we do not
626 contradict "check" substr.
627 Thus we can arrive here only if check substr
628 is float. Redo checking for "other"=="fixed".
629 */
630 strpos = t + 1;
631 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
632 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
633 goto do_other_anchored;
634 }
4ee3650e
GS
635 /* We don't contradict the found floating substring. */
636 /* XXXX Why not check for STCLASS? */
cad2e5aa 637 s = t + 1;
2c2d71f5 638 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 639 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
640 goto set_useful;
641 }
4ee3650e
GS
642 /* Position contradicts check-string */
643 /* XXXX probably better to look for check-string
644 than for "\n", so one should lower the limit for t? */
645 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 646 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 647 other_last = strpos = s = t + 1;
cad2e5aa
JH
648 goto restart;
649 }
650 t++;
651 }
2c2d71f5
JH
652 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
653 PL_colors[0],PL_colors[1]));
654 goto fail_finish;
cad2e5aa 655 }
f5952150
GS
656 else {
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
658 PL_colors[0],PL_colors[1]));
659 }
cad2e5aa
JH
660 s = t;
661 set_useful:
2c2d71f5 662 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
663 }
664 else {
2c2d71f5 665 PL_bostr = tmp;
f5952150 666 /* The found string does not prohibit matching at strpos,
2c2d71f5 667 - no optimization of calling REx engine can be performed,
f5952150
GS
668 unless it was an MBOL and we are not after MBOL,
669 or a future STCLASS check will fail this. */
2c2d71f5
JH
670 try_at_start:
671 /* Even in this situation we may use MBOL flag if strpos is offset
672 wrt the start of the string. */
05b4157f 673 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 674 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
675 /* May be due to an implicit anchor of m{.*foo} */
676 && !(prog->reganch & ROPT_IMPLICIT))
677 {
cad2e5aa
JH
678 t = strpos;
679 goto find_anchor;
680 }
30944b6d 681 DEBUG_r( if (ml_anch)
f5952150
GS
682 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
683 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 684 );
2c2d71f5 685 success_at_start:
30944b6d 686 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 687 && prog->check_substr /* Could be deleted already */
cad2e5aa 688 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
689 && prog->check_substr == prog->float_substr)
690 {
cad2e5aa 691 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 692 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
693 SvREFCNT_dec(prog->check_substr);
694 prog->check_substr = Nullsv; /* disable */
695 prog->float_substr = Nullsv; /* clear */
5e39e1e5 696 check = Nullsv; /* abort */
cad2e5aa 697 s = strpos;
3cf5c195
IZ
698 /* XXXX This is a remnant of the old implementation. It
699 looks wasteful, since now INTUIT can use many
6eb5f6b9 700 other heuristics. */
cad2e5aa
JH
701 prog->reganch &= ~RE_USE_INTUIT;
702 }
703 else
704 s = strpos;
705 }
706
6eb5f6b9
JH
707 /* Last resort... */
708 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
709 if (prog->regstclass) {
710 /* minlen == 0 is possible if regstclass is \b or \B,
711 and the fixed substr is ''$.
712 Since minlen is already taken into account, s+1 is before strend;
713 accidentally, minlen >= 1 guaranties no false positives at s + 1
714 even for \b or \B. But (minlen? 1 : 0) below assumes that
715 regstclass does not come from lookahead... */
716 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
717 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
718 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
719 ? STR_LEN(prog->regstclass)
720 : 1);
6eb5f6b9 721 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
722 ? s + (prog->minlen? cl_l : 0)
723 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 724 : strend) ;
a1933d95 725 char *startpos = strbeg;
6eb5f6b9
JH
726
727 t = s;
76384e4a
GS
728 if (prog->reganch & ROPT_UTF8) {
729 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
730 PL_bostr = startpos;
731 }
6eb5f6b9
JH
732 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
733 if (!s) {
734#ifdef DEBUGGING
735 char *what;
736#endif
737 if (endpos == strend) {
738 DEBUG_r( PerlIO_printf(Perl_debug_log,
739 "Could not match STCLASS...\n") );
740 goto fail;
741 }
66e933ab
GS
742 DEBUG_r( PerlIO_printf(Perl_debug_log,
743 "This position contradicts STCLASS...\n") );
653099ff
GS
744 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
745 goto fail;
6eb5f6b9
JH
746 /* Contradict one of substrings */
747 if (prog->anchored_substr) {
6eb5f6b9
JH
748 if (prog->anchored_substr == check) {
749 DEBUG_r( what = "anchored" );
750 hop_and_restart:
751 PL_regeol = strend; /* Used in HOP() */
752 s = HOPc(t, 1);
66e933ab
GS
753 if (s + start_shift + end_shift > strend) {
754 /* XXXX Should be taken into account earlier? */
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "Could not match STCLASS...\n") );
757 goto fail;
758 }
5e39e1e5
HS
759 if (!check)
760 goto giveup;
6eb5f6b9 761 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 762 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
763 what, (long)(s + start_shift - i_strpos)) );
764 goto restart;
765 }
66e933ab 766 /* Have both, check_string is floating */
6eb5f6b9
JH
767 if (t + start_shift >= check_at) /* Contradicts floating=check */
768 goto retry_floating_check;
769 /* Recheck anchored substring, but not floating... */
770 s = check_at;
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
775 (long)(other_last - i_strpos)) );
776 goto do_other_anchored;
777 }
60e71179
GS
778 /* Another way we could have checked stclass at the
779 current position only: */
780 if (ml_anch) {
781 s = t = t + 1;
5e39e1e5
HS
782 if (!check)
783 goto giveup;
60e71179 784 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
785 "Looking for /%s^%s/m starting at offset %ld...\n",
786 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 787 goto try_at_offset;
66e933ab 788 }
60e71179
GS
789 if (!prog->float_substr) /* Could have been deleted */
790 goto fail;
6eb5f6b9
JH
791 /* Check is floating subtring. */
792 retry_floating_check:
793 t = check_at - start_shift;
794 DEBUG_r( what = "floating" );
795 goto hop_and_restart;
796 }
797 DEBUG_r( if (t != s)
798 PerlIO_printf(Perl_debug_log,
799 "By STCLASS: moving %ld --> %ld\n",
800 (long)(t - i_strpos), (long)(s - i_strpos));
801 else
802 PerlIO_printf(Perl_debug_log,
803 "Does not contradict STCLASS...\n") );
804 }
5e39e1e5
HS
805 giveup:
806 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
807 PL_colors[4], (check ? "Guessed" : "Giving up"),
808 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 809 return s;
2c2d71f5
JH
810
811 fail_finish: /* Substring not found */
66e933ab
GS
812 if (prog->check_substr) /* could be removed already */
813 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 814 fail:
2c2d71f5 815 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
816 PL_colors[4],PL_colors[5]));
817 return Nullch;
818}
9661b544 819
6eb5f6b9 820/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
821STATIC char *
822S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 823{
6eb5f6b9
JH
824 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
825 char *m;
d8093b23
G
826 STRLEN ln;
827 unsigned int c1;
828 unsigned int c2;
6eb5f6b9
JH
829 char *e;
830 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 831
6eb5f6b9
JH
832 /* We know what class it must start with. */
833 switch (OP(c)) {
834 case ANYOFUTF8:
835 while (s < strend) {
836 if (REGINCLASSUTF8(c, (U8*)s)) {
837 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 838 goto got_it;
3cf5c195 839 else
6eb5f6b9 840 tmp = doevery;
a0ed51b3 841 }
6eb5f6b9
JH
842 else
843 tmp = 1;
a0ed51b3
LW
844 s += UTF8SKIP(s);
845 }
6eb5f6b9
JH
846 break;
847 case ANYOF:
a0ed51b3 848 while (s < strend) {
d8093b23 849 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
850 if (tmp && (norun || regtry(prog, s)))
851 goto got_it;
852 else
853 tmp = doevery;
a0ed51b3 854 }
6eb5f6b9
JH
855 else
856 tmp = 1;
a0ed51b3 857 s++;
a0d0e21e 858 }
6eb5f6b9
JH
859 break;
860 case EXACTF:
861 m = STRING(c);
862 ln = STR_LEN(c);
d8093b23 863 c1 = *(U8*)m;
6eb5f6b9
JH
864 c2 = PL_fold[c1];
865 goto do_exactf;
866 case EXACTFL:
867 m = STRING(c);
868 ln = STR_LEN(c);
d8093b23 869 c1 = *(U8*)m;
6eb5f6b9
JH
870 c2 = PL_fold_locale[c1];
871 do_exactf:
872 e = strend - ln;
b3c9acc1 873
6eb5f6b9
JH
874 if (norun && e < s)
875 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
876 /* Here it is NOT UTF! */
877 if (c1 == c2) {
878 while (s <= e) {
d8093b23 879 if ( *(U8*)s == c1
66e933ab
GS
880 && (ln == 1 || !(OP(c) == EXACTF
881 ? ibcmp(s, m, ln)
882 : ibcmp_locale(s, m, ln)))
6eb5f6b9 883 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
884 goto got_it;
885 s++;
886 }
887 } else {
888 while (s <= e) {
d8093b23 889 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
890 && (ln == 1 || !(OP(c) == EXACTF
891 ? ibcmp(s, m, ln)
892 : ibcmp_locale(s, m, ln)))
6eb5f6b9 893 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
894 goto got_it;
895 s++;
896 }
897 }
898 break;
bbce6d69 899 case BOUNDL:
3280af22 900 PL_reg_flags |= RF_tainted;
bbce6d69 901 /* FALL THROUGH */
a0d0e21e 902 case BOUND:
6eb5f6b9 903 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 904 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 905 while (s < strend) {
95bac841 906 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 907 tmp = !tmp;
6eb5f6b9 908 if ((norun || regtry(prog, s)))
a0d0e21e 909 goto got_it;
a687059c 910 }
a0d0e21e
LW
911 s++;
912 }
6eb5f6b9 913 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
914 goto got_it;
915 break;
a0ed51b3
LW
916 case BOUNDLUTF8:
917 PL_reg_flags |= RF_tainted;
918 /* FALL THROUGH */
919 case BOUNDUTF8:
667bb95a
JH
920 if (s == startpos)
921 tmp = '\n';
922 else {
923 U8 *r = reghop((U8*)s, -1);
924
925 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
926 }
927 tmp = ((OP(c) == BOUNDUTF8 ?
928 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 929 while (s < strend) {
76384e4a 930 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
931 swash_fetch(PL_utf8_alnum, (U8*)s) :
932 isALNUM_LC_utf8((U8*)s)))
933 {
a0ed51b3 934 tmp = !tmp;
6eb5f6b9 935 if ((norun || regtry(prog, s)))
a0ed51b3
LW
936 goto got_it;
937 }
938 s += UTF8SKIP(s);
939 }
6eb5f6b9 940 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
941 goto got_it;
942 break;
bbce6d69 943 case NBOUNDL:
3280af22 944 PL_reg_flags |= RF_tainted;
bbce6d69 945 /* FALL THROUGH */
a0d0e21e 946 case NBOUND:
6eb5f6b9 947 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 948 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 949 while (s < strend) {
95bac841 950 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 951 tmp = !tmp;
6eb5f6b9 952 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
953 goto got_it;
954 s++;
955 }
6eb5f6b9 956 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
957 goto got_it;
958 break;
a0ed51b3
LW
959 case NBOUNDLUTF8:
960 PL_reg_flags |= RF_tainted;
961 /* FALL THROUGH */
962 case NBOUNDUTF8:
667bb95a
JH
963 if (s == startpos)
964 tmp = '\n';
965 else {
966 U8 *r = reghop((U8*)s, -1);
967
968 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
969 }
970 tmp = ((OP(c) == NBOUNDUTF8 ?
971 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 972 while (s < strend) {
76384e4a 973 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
974 swash_fetch(PL_utf8_alnum, (U8*)s) :
975 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 976 tmp = !tmp;
6eb5f6b9 977 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
978 goto got_it;
979 s += UTF8SKIP(s);
980 }
6eb5f6b9 981 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
982 goto got_it;
983 break;
a0d0e21e
LW
984 case ALNUM:
985 while (s < strend) {
bbce6d69 986 if (isALNUM(*s)) {
6eb5f6b9 987 if (tmp && (norun || regtry(prog, s)))
bbce6d69 988 goto got_it;
989 else
990 tmp = doevery;
991 }
992 else
993 tmp = 1;
994 s++;
995 }
996 break;
a0ed51b3
LW
997 case ALNUMUTF8:
998 while (s < strend) {
dfe13c55 999 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1000 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1001 goto got_it;
1002 else
1003 tmp = doevery;
1004 }
1005 else
1006 tmp = 1;
1007 s += UTF8SKIP(s);
1008 }
1009 break;
bbce6d69 1010 case ALNUML:
3280af22 1011 PL_reg_flags |= RF_tainted;
bbce6d69 1012 while (s < strend) {
1013 if (isALNUM_LC(*s)) {
6eb5f6b9 1014 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1015 goto got_it;
a687059c 1016 else
a0d0e21e
LW
1017 tmp = doevery;
1018 }
1019 else
1020 tmp = 1;
1021 s++;
1022 }
1023 break;
a0ed51b3
LW
1024 case ALNUMLUTF8:
1025 PL_reg_flags |= RF_tainted;
1026 while (s < strend) {
dfe13c55 1027 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
a0d0e21e
LW
1038 case NALNUM:
1039 while (s < strend) {
bbce6d69 1040 if (!isALNUM(*s)) {
6eb5f6b9 1041 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1042 goto got_it;
1043 else
1044 tmp = doevery;
1045 }
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3
LW
1051 case NALNUMUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1054 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case NALNUML:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (!isALNUM_LC(*s)) {
6eb5f6b9 1068 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1069 goto got_it;
a687059c 1070 else
a0d0e21e 1071 tmp = doevery;
a687059c 1072 }
a0d0e21e
LW
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case NALNUMLUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e
LW
1092 case SPACE:
1093 while (s < strend) {
1094 if (isSPACE(*s)) {
6eb5f6b9 1095 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1096 goto got_it;
1097 else
1098 tmp = doevery;
2304df62 1099 }
a0d0e21e
LW
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3
LW
1105 case SPACEUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1108 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case SPACEL:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (isSPACE_LC(*s)) {
6eb5f6b9 1122 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3
LW
1132 case SPACELUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e
LW
1146 case NSPACE:
1147 while (s < strend) {
1148 if (!isSPACE(*s)) {
6eb5f6b9 1149 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1150 goto got_it;
1151 else
1152 tmp = doevery;
a687059c 1153 }
a0d0e21e
LW
1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3
LW
1159 case NSPACEUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1162 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
bbce6d69 1172 case NSPACEL:
3280af22 1173 PL_reg_flags |= RF_tainted;
bbce6d69 1174 while (s < strend) {
1175 if (!isSPACE_LC(*s)) {
6eb5f6b9 1176 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
a0ed51b3
LW
1186 case NSPACELUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
dfe13c55 1189 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e
LW
1200 case DIGIT:
1201 while (s < strend) {
1202 if (isDIGIT(*s)) {
6eb5f6b9 1203 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1204 goto got_it;
1205 else
1206 tmp = doevery;
2b69d0c2 1207 }
a0d0e21e
LW
1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3
LW
1213 case DIGITUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1216 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
b8c5462f
JH
1226 case DIGITL:
1227 PL_reg_flags |= RF_tainted;
1228 while (s < strend) {
1229 if (isDIGIT_LC(*s)) {
6eb5f6b9 1230 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
1238 }
1239 break;
1240 case DIGITLUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
a0d0e21e
LW
1254 case NDIGIT:
1255 while (s < strend) {
1256 if (!isDIGIT(*s)) {
6eb5f6b9 1257 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1258 goto got_it;
1259 else
1260 tmp = doevery;
a687059c 1261 }
a0d0e21e
LW
1262 else
1263 tmp = 1;
1264 s++;
1265 }
1266 break;
a0ed51b3
LW
1267 case NDIGITUTF8:
1268 while (s < strend) {
dfe13c55 1269 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1270 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
1275 else
1276 tmp = 1;
1277 s += UTF8SKIP(s);
1278 }
1279 break;
b8c5462f
JH
1280 case NDIGITL:
1281 PL_reg_flags |= RF_tainted;
1282 while (s < strend) {
1283 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1284 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s++;
a0ed51b3 1292 }
b8c5462f
JH
1293 break;
1294 case NDIGITLUTF8:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1298 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1299 goto got_it;
cf93c79d 1300 else
b8c5462f
JH
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s += UTF8SKIP(s);
1306 }
1307 break;
b3c9acc1 1308 default:
3c3eec57
GS
1309 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1310 break;
d6a28714 1311 }
6eb5f6b9
JH
1312 return 0;
1313 got_it:
1314 return s;
1315}
1316
1317/*
1318 - regexec_flags - match a regexp against a string
1319 */
1320I32
1321Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1322 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1323/* strend: pointer to null at end of string */
1324/* strbeg: real beginning of string */
1325/* minend: end of match must be >=minend after stringarg. */
1326/* data: May be used for some additional optimizations. */
1327/* nosave: For optimizations. */
1328{
1329 dTHR;
1330 register char *s;
1331 register regnode *c;
1332 register char *startpos = stringarg;
6eb5f6b9
JH
1333 I32 minlen; /* must match at least this many chars */
1334 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1335 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1336 constant substr. */ /* CC */
1337 I32 end_shift = 0; /* Same for the end. */ /* CC */
1338 I32 scream_pos = -1; /* Internal iterator of scream. */
1339 char *scream_olds;
1340 SV* oreplsv = GvSV(PL_replgv);
1341
1342 PL_regcc = 0;
1343
1344 cache_re(prog);
1345#ifdef DEBUGGING
1346 PL_regnarrate = PL_debug & 512;
1347#endif
1348
1349 /* Be paranoid... */
1350 if (prog == NULL || startpos == NULL) {
1351 Perl_croak(aTHX_ "NULL regexp parameter");
1352 return 0;
1353 }
1354
1355 minlen = prog->minlen;
1356 if (strend - startpos < minlen) goto phooey;
1357
1358 if (startpos == strbeg) /* is ^ valid at stringarg? */
1359 PL_regprev = '\n';
1360 else {
1361 PL_regprev = (U32)stringarg[-1];
1362 if (!PL_multiline && PL_regprev == '\n')
1363 PL_regprev = '\0'; /* force ^ to NOT match */
1364 }
1365
1366 /* Check validity of program. */
1367 if (UCHARAT(prog->program) != REG_MAGIC) {
1368 Perl_croak(aTHX_ "corrupted regexp program");
1369 }
1370
1371 PL_reg_flags = 0;
1372 PL_reg_eval_set = 0;
1373 PL_reg_maxiter = 0;
1374
1375 if (prog->reganch & ROPT_UTF8)
1376 PL_reg_flags |= RF_utf8;
1377
1378 /* Mark beginning of line for ^ and lookbehind. */
1379 PL_regbol = startpos;
1380 PL_bostr = strbeg;
1381 PL_reg_sv = sv;
1382
1383 /* Mark end of line for $ (and such) */
1384 PL_regeol = strend;
1385
1386 /* see how far we have to get to not match where we matched before */
1387 PL_regtill = startpos+minend;
1388
1389 /* We start without call_cc context. */
1390 PL_reg_call_cc = 0;
1391
1392 /* If there is a "must appear" string, look for it. */
1393 s = startpos;
1394
1395 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1396 MAGIC *mg;
1397
1398 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1399 PL_reg_ganch = startpos;
1400 else if (sv && SvTYPE(sv) >= SVt_PVMG
1401 && SvMAGIC(sv)
1402 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1403 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1404 if (prog->reganch & ROPT_ANCH_GPOS) {
1405 if (s > PL_reg_ganch)
1406 goto phooey;
1407 s = PL_reg_ganch;
1408 }
1409 }
1410 else /* pos() not defined */
1411 PL_reg_ganch = strbeg;
1412 }
1413
1414 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1415 re_scream_pos_data d;
1416
1417 d.scream_olds = &scream_olds;
1418 d.scream_pos = &scream_pos;
1419 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1420 if (!s)
1421 goto phooey; /* not present */
1422 }
1423
1424 DEBUG_r( if (!PL_colorset) reginitcolors() );
1425 DEBUG_r(PerlIO_printf(Perl_debug_log,
1426 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1427 PL_colors[4],PL_colors[5],PL_colors[0],
1428 prog->precomp,
1429 PL_colors[1],
1430 (strlen(prog->precomp) > 60 ? "..." : ""),
1431 PL_colors[0],
1432 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1433 startpos, PL_colors[1],
1434 (strend - startpos > 60 ? "..." : ""))
1435 );
1436
1437 /* Simplest case: anchored match need be tried only once. */
1438 /* [unless only anchor is BOL and multiline is set] */
1439 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1440 if (s == startpos && regtry(prog, startpos))
1441 goto got_it;
1442 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1443 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1444 {
1445 char *end;
1446
1447 if (minlen)
1448 dontbother = minlen - 1;
1449 end = HOPc(strend, -dontbother) - 1;
1450 /* for multiline we only have to try after newlines */
1451 if (prog->check_substr) {
1452 if (s == startpos)
1453 goto after_try;
1454 while (1) {
1455 if (regtry(prog, s))
1456 goto got_it;
1457 after_try:
1458 if (s >= end)
1459 goto phooey;
1460 if (prog->reganch & RE_USE_INTUIT) {
1461 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1462 if (!s)
1463 goto phooey;
1464 }
1465 else
1466 s++;
1467 }
1468 } else {
1469 if (s > startpos)
1470 s--;
1471 while (s < end) {
1472 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1473 if (regtry(prog, s))
1474 goto got_it;
1475 }
1476 }
1477 }
1478 }
1479 goto phooey;
1480 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1481 if (regtry(prog, PL_reg_ganch))
1482 goto got_it;
1483 goto phooey;
1484 }
1485
1486 /* Messy cases: unanchored match. */
1487 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1488 /* we have /x+whatever/ */
1489 /* it must be a one character string (XXXX Except UTF?) */
1490 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1491#ifdef DEBUGGING
1492 int did_match = 0;
1493#endif
1494
6eb5f6b9
JH
1495 if (UTF) {
1496 while (s < strend) {
1497 if (*s == ch) {
bf93d4cc 1498 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1499 if (regtry(prog, s)) goto got_it;
1500 s += UTF8SKIP(s);
1501 while (s < strend && *s == ch)
1502 s += UTF8SKIP(s);
1503 }
1504 s += UTF8SKIP(s);
1505 }
1506 }
1507 else {
1508 while (s < strend) {
1509 if (*s == ch) {
bf93d4cc 1510 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1511 if (regtry(prog, s)) goto got_it;
1512 s++;
1513 while (s < strend && *s == ch)
1514 s++;
1515 }
1516 s++;
1517 }
1518 }
bf93d4cc
GS
1519 DEBUG_r(did_match ||
1520 PerlIO_printf(Perl_debug_log,
1521 "Did not find anchored character...\n"));
6eb5f6b9
JH
1522 }
1523 /*SUPPRESS 560*/
1524 else if (prog->anchored_substr != Nullsv
1525 || (prog->float_substr != Nullsv
1526 && prog->float_max_offset < strend - s)) {
1527 SV *must = prog->anchored_substr
1528 ? prog->anchored_substr : prog->float_substr;
1529 I32 back_max =
1530 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1531 I32 back_min =
1532 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1533 char *last = HOPc(strend, /* Cannot start after this */
1534 -(I32)(CHR_SVLEN(must)
1535 - (SvTAIL(must) != 0) + back_min));
1536 char *last1; /* Last position checked before */
bf93d4cc
GS
1537#ifdef DEBUGGING
1538 int did_match = 0;
1539#endif
6eb5f6b9
JH
1540
1541 if (s > PL_bostr)
1542 last1 = HOPc(s, -1);
1543 else
1544 last1 = s - 1; /* bogus */
1545
1546 /* XXXX check_substr already used to find `s', can optimize if
1547 check_substr==must. */
1548 scream_pos = -1;
1549 dontbother = end_shift;
1550 strend = HOPc(strend, -dontbother);
1551 while ( (s <= last) &&
1552 ((flags & REXEC_SCREAM)
1553 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1554 end_shift, &scream_pos, 0))
1555 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1556 (unsigned char*)strend, must,
1557 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1558 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1559 if (HOPc(s, -back_max) > last1) {
1560 last1 = HOPc(s, -back_min);
1561 s = HOPc(s, -back_max);
1562 }
1563 else {
1564 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1565
1566 last1 = HOPc(s, -back_min);
1567 s = t;
1568 }
1569 if (UTF) {
1570 while (s <= last1) {
1571 if (regtry(prog, s))
1572 goto got_it;
1573 s += UTF8SKIP(s);
1574 }
1575 }
1576 else {
1577 while (s <= last1) {
1578 if (regtry(prog, s))
1579 goto got_it;
1580 s++;
1581 }
1582 }
1583 }
bf93d4cc
GS
1584 DEBUG_r(did_match ||
1585 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1586 ((must == prog->anchored_substr)
1587 ? "anchored" : "floating"),
1588 PL_colors[0],
1589 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1590 SvPVX(must),
1591 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1592 goto phooey;
1593 }
155aba94 1594 else if ((c = prog->regstclass)) {
66e933ab
GS
1595 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1596 /* don't bother with what can't match */
6eb5f6b9
JH
1597 strend = HOPc(strend, -(minlen - 1));
1598 if (find_byclass(prog, c, s, strend, startpos, 0))
1599 goto got_it;
bf93d4cc 1600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1601 }
1602 else {
1603 dontbother = 0;
1604 if (prog->float_substr != Nullsv) { /* Trim the end. */
1605 char *last;
d6a28714
JH
1606
1607 if (flags & REXEC_SCREAM) {
1608 last = screaminstr(sv, prog->float_substr, s - strbeg,
1609 end_shift, &scream_pos, 1); /* last one */
1610 if (!last)
1611 last = scream_olds; /* Only one occurence. */
b8c5462f 1612 }
d6a28714
JH
1613 else {
1614 STRLEN len;
1615 char *little = SvPV(prog->float_substr, len);
1616
1617 if (SvTAIL(prog->float_substr)) {
1618 if (memEQ(strend - len + 1, little, len - 1))
1619 last = strend - len + 1;
1620 else if (!PL_multiline)
1621 last = memEQ(strend - len, little, len)
1622 ? strend - len : Nullch;
b8c5462f 1623 else
d6a28714
JH
1624 goto find_last;
1625 } else {
1626 find_last:
1627 if (len)
1628 last = rninstr(s, strend, little, little + len);
b8c5462f 1629 else
d6a28714 1630 last = strend; /* matching `$' */
b8c5462f 1631 }
b8c5462f 1632 }
bf93d4cc
GS
1633 if (last == NULL) {
1634 DEBUG_r(PerlIO_printf(Perl_debug_log,
1635 "%sCan't trim the tail, match fails (should not happen)%s\n",
1636 PL_colors[4],PL_colors[5]));
1637 goto phooey; /* Should not happen! */
1638 }
d6a28714
JH
1639 dontbother = strend - last + prog->float_min_offset;
1640 }
1641 if (minlen && (dontbother < minlen))
1642 dontbother = minlen - 1;
1643 strend -= dontbother; /* this one's always in bytes! */
1644 /* We don't know much -- general case. */
1645 if (UTF) {
1646 for (;;) {
1647 if (regtry(prog, s))
1648 goto got_it;
1649 if (s >= strend)
1650 break;
b8c5462f 1651 s += UTF8SKIP(s);
d6a28714
JH
1652 };
1653 }
1654 else {
1655 do {
1656 if (regtry(prog, s))
1657 goto got_it;
1658 } while (s++ < strend);
1659 }
1660 }
1661
1662 /* Failure. */
1663 goto phooey;
1664
1665got_it:
1666 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1667
1668 if (PL_reg_eval_set) {
1669 /* Preserve the current value of $^R */
1670 if (oreplsv != GvSV(PL_replgv))
1671 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1672 restored, the value remains
1673 the same. */
1674 restore_pos(aTHXo_ 0);
1675 }
1676
1677 /* make sure $`, $&, $', and $digit will work later */
1678 if ( !(flags & REXEC_NOT_FIRST) ) {
1679 if (RX_MATCH_COPIED(prog)) {
1680 Safefree(prog->subbeg);
1681 RX_MATCH_COPIED_off(prog);
1682 }
1683 if (flags & REXEC_COPY_STR) {
1684 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1685
1686 s = savepvn(strbeg, i);
1687 prog->subbeg = s;
1688 prog->sublen = i;
1689 RX_MATCH_COPIED_on(prog);
1690 }
1691 else {
1692 prog->subbeg = strbeg;
1693 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1694 }
1695 }
1696
1697 return 1;
1698
1699phooey:
bf93d4cc
GS
1700 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1701 PL_colors[4],PL_colors[5]));
d6a28714
JH
1702 if (PL_reg_eval_set)
1703 restore_pos(aTHXo_ 0);
1704 return 0;
1705}
1706
1707/*
1708 - regtry - try match at specific point
1709 */
1710STATIC I32 /* 0 failure, 1 success */
1711S_regtry(pTHX_ regexp *prog, char *startpos)
1712{
1713 dTHR;
1714 register I32 i;
1715 register I32 *sp;
1716 register I32 *ep;
1717 CHECKPOINT lastcp;
1718
02db2b7b
IZ
1719#ifdef DEBUGGING
1720 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1721#endif
d6a28714
JH
1722 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1723 MAGIC *mg;
1724
1725 PL_reg_eval_set = RS_init;
1726 DEBUG_r(DEBUG_s(
b900a521
JH
1727 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1728 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1729 ));
e8347627 1730 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1731 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1732 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1733 SAVETMPS;
1734 /* Apparently this is not needed, judging by wantarray. */
e8347627 1735 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1736 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1737
1738 if (PL_reg_sv) {
1739 /* Make $_ available to executed code. */
1740 if (PL_reg_sv != DEFSV) {
1741 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1742 SAVESPTR(DEFSV);
1743 DEFSV = PL_reg_sv;
b8c5462f 1744 }
d6a28714
JH
1745
1746 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1747 && (mg = mg_find(PL_reg_sv, 'g')))) {
1748 /* prepare for quick setting of pos */
1749 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1750 mg = mg_find(PL_reg_sv, 'g');
1751 mg->mg_len = -1;
b8c5462f 1752 }
d6a28714
JH
1753 PL_reg_magic = mg;
1754 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1755 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1756 }
1757 if (!PL_reg_curpm)
0f79a09d 1758 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1759 PL_reg_curpm->op_pmregexp = prog;
1760 PL_reg_oldcurpm = PL_curpm;
1761 PL_curpm = PL_reg_curpm;
1762 if (RX_MATCH_COPIED(prog)) {
1763 /* Here is a serious problem: we cannot rewrite subbeg,
1764 since it may be needed if this match fails. Thus
1765 $` inside (?{}) could fail... */
1766 PL_reg_oldsaved = prog->subbeg;
1767 PL_reg_oldsavedlen = prog->sublen;
1768 RX_MATCH_COPIED_off(prog);
1769 }
1770 else
1771 PL_reg_oldsaved = Nullch;
1772 prog->subbeg = PL_bostr;
1773 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1774 }
1775 prog->startp[0] = startpos - PL_bostr;
1776 PL_reginput = startpos;
1777 PL_regstartp = prog->startp;
1778 PL_regendp = prog->endp;
1779 PL_reglastparen = &prog->lastparen;
1780 prog->lastparen = 0;
1781 PL_regsize = 0;
1782 DEBUG_r(PL_reg_starttry = startpos);
1783 if (PL_reg_start_tmpl <= prog->nparens) {
1784 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1785 if(PL_reg_start_tmp)
1786 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1787 else
1788 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1789 }
1790
1791 /* XXXX What this code is doing here?!!! There should be no need
1792 to do this again and again, PL_reglastparen should take care of
1793 this! */
1794 sp = prog->startp;
1795 ep = prog->endp;
1796 if (prog->nparens) {
1797 for (i = prog->nparens; i >= 1; i--) {
1798 *++sp = -1;
1799 *++ep = -1;
1800 }
1801 }
02db2b7b 1802 REGCP_SET(lastcp);
d6a28714
JH
1803 if (regmatch(prog->program + 1)) {
1804 prog->endp[0] = PL_reginput - PL_bostr;
1805 return 1;
1806 }
02db2b7b 1807 REGCP_UNWIND(lastcp);
d6a28714
JH
1808 return 0;
1809}
1810
02db2b7b
IZ
1811#define RE_UNWIND_BRANCH 1
1812#define RE_UNWIND_BRANCHJ 2
1813
1814union re_unwind_t;
1815
1816typedef struct { /* XX: makes sense to enlarge it... */
1817 I32 type;
1818 I32 prev;
1819 CHECKPOINT lastcp;
1820} re_unwind_generic_t;
1821
1822typedef struct {
1823 I32 type;
1824 I32 prev;
1825 CHECKPOINT lastcp;
1826 I32 lastparen;
1827 regnode *next;
1828 char *locinput;
1829 I32 nextchr;
1830#ifdef DEBUGGING
1831 int regindent;
1832#endif
1833} re_unwind_branch_t;
1834
1835typedef union re_unwind_t {
1836 I32 type;
1837 re_unwind_generic_t generic;
1838 re_unwind_branch_t branch;
1839} re_unwind_t;
1840
d6a28714
JH
1841/*
1842 - regmatch - main matching routine
1843 *
1844 * Conceptually the strategy is simple: check to see whether the current
1845 * node matches, call self recursively to see whether the rest matches,
1846 * and then act accordingly. In practice we make some effort to avoid
1847 * recursion, in particular by going through "ordinary" nodes (that don't
1848 * need to know whether the rest of the match failed) by a loop instead of
1849 * by recursion.
1850 */
1851/* [lwall] I've hoisted the register declarations to the outer block in order to
1852 * maybe save a little bit of pushing and popping on the stack. It also takes
1853 * advantage of machines that use a register save mask on subroutine entry.
1854 */
1855STATIC I32 /* 0 failure, 1 success */
1856S_regmatch(pTHX_ regnode *prog)
1857{
1858 dTHR;
1859 register regnode *scan; /* Current node. */
1860 regnode *next; /* Next node. */
1861 regnode *inner; /* Next node in internal branch. */
1862 register I32 nextchr; /* renamed nextchr - nextchar colides with
1863 function of same name */
1864 register I32 n; /* no or next */
1865 register I32 ln; /* len or last */
1866 register char *s; /* operand or save */
1867 register char *locinput = PL_reginput;
1868 register I32 c1, c2, paren; /* case fold search, parenth */
1869 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1870 I32 unwind = 0;
1871 I32 firstcp = PL_savestack_ix;
1872
d6a28714
JH
1873#ifdef DEBUGGING
1874 PL_regindent++;
1875#endif
1876
1877 /* Note that nextchr is a byte even in UTF */
1878 nextchr = UCHARAT(locinput);
1879 scan = prog;
1880 while (scan != NULL) {
1881#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1882#if 1
d6a28714
JH
1883# define sayYES goto yes
1884# define sayNO goto no
7821416a
IZ
1885# define sayYES_FINAL goto yes_final
1886# define sayYES_LOUD goto yes_loud
1887# define sayNO_FINAL goto no_final
1888# define sayNO_SILENT goto do_no
d6a28714
JH
1889# define saySAME(x) if (x) goto yes; else goto no
1890# define REPORT_CODE_OFF 24
1891#else
1892# define sayYES return 1
1893# define sayNO return 0
7821416a
IZ
1894# define sayYES_FINAL return 1
1895# define sayYES_LOUD return 1
1896# define sayNO_FINAL return 0
1897# define sayNO_SILENT return 0
d6a28714
JH
1898# define saySAME(x) return x
1899#endif
1900 DEBUG_r( {
1901 SV *prop = sv_newmortal();
1902 int docolor = *PL_colors[0];
1903 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1904 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1905 /* The part of the string before starttry has one color
1906 (pref0_len chars), between starttry and current
1907 position another one (pref_len - pref0_len chars),
1908 after the current position the third one.
1909 We assume that pref0_len <= pref_len, otherwise we
1910 decrease pref0_len. */
1911 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1912 ? (5 + taill) - l : locinput - PL_bostr);
1913 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1914
1915 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1916 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1917 ? (5 + taill) - pref_len : PL_regeol - locinput);
1918 if (pref0_len < 0)
1919 pref0_len = 0;
1920 if (pref0_len > pref_len)
1921 pref0_len = pref_len;
1922 regprop(prop, scan);
1923 PerlIO_printf(Perl_debug_log,
b900a521
JH
1924 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1925 (IV)(locinput - PL_bostr),
d6a28714
JH
1926 PL_colors[4], pref0_len,
1927 locinput - pref_len, PL_colors[5],
1928 PL_colors[2], pref_len - pref0_len,
1929 locinput - pref_len + pref0_len, PL_colors[3],
1930 (docolor ? "" : "> <"),
1931 PL_colors[0], l, locinput, PL_colors[1],
1932 15 - l - pref_len + 1,
1933 "",
b900a521 1934 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1935 SvPVX(prop));
1936 } );
1937
1938 next = scan + NEXT_OFF(scan);
1939 if (next == scan)
1940 next = NULL;
1941
1942 switch (OP(scan)) {
1943 case BOL:
1944 if (locinput == PL_bostr
1945 ? PL_regprev == '\n'
1946 : (PL_multiline &&
1947 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1948 {
1949 /* regtill = regbol; */
b8c5462f
JH
1950 break;
1951 }
d6a28714
JH
1952 sayNO;
1953 case MBOL:
1954 if (locinput == PL_bostr
1955 ? PL_regprev == '\n'
1956 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1957 {
b8c5462f
JH
1958 break;
1959 }
d6a28714
JH
1960 sayNO;
1961 case SBOL:
c2a73568 1962 if (locinput == PL_bostr)
b8c5462f 1963 break;
d6a28714
JH
1964 sayNO;
1965 case GPOS:
1966 if (locinput == PL_reg_ganch)
1967 break;
1968 sayNO;
1969 case EOL:
1970 if (PL_multiline)
1971 goto meol;
1972 else
1973 goto seol;
1974 case MEOL:
1975 meol:
1976 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1977 sayNO;
b8c5462f 1978 break;
d6a28714
JH
1979 case SEOL:
1980 seol:
1981 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1982 sayNO;
d6a28714 1983 if (PL_regeol - locinput > 1)
b8c5462f 1984 sayNO;
b8c5462f 1985 break;
d6a28714
JH
1986 case EOS:
1987 if (PL_regeol != locinput)
b8c5462f 1988 sayNO;
d6a28714
JH
1989 break;
1990 case SANYUTF8:
b8c5462f 1991 if (nextchr & 0x80) {
b8c5462f 1992 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1993 if (locinput > PL_regeol)
1994 sayNO;
b8c5462f
JH
1995 nextchr = UCHARAT(locinput);
1996 break;
1997 }
d6a28714 1998 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1999 sayNO;
b8c5462f 2000 nextchr = UCHARAT(++locinput);
a0d0e21e 2001 break;
d6a28714
JH
2002 case SANY:
2003 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2004 sayNO;
2005 nextchr = UCHARAT(++locinput);
b85d18e9 2006 break;
d6a28714 2007 case ANYUTF8:
a0ed51b3 2008 if (nextchr & 0x80) {
b8c5462f 2009 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2010 if (locinput > PL_regeol)
2011 sayNO;
a0ed51b3
LW
2012 nextchr = UCHARAT(locinput);
2013 break;
2014 }
155aba94 2015 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2016 sayNO;
2017 nextchr = UCHARAT(++locinput);
2018 break;
d6a28714 2019 case REG_ANY:
155aba94 2020 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2021 sayNO;
76e3520e 2022 nextchr = UCHARAT(++locinput);
a0d0e21e 2023 break;
d6a28714 2024 case EXACT:
cd439c50
IZ
2025 s = STRING(scan);
2026 ln = STR_LEN(scan);
d6a28714
JH
2027 /* Inline the first character, for speed. */
2028 if (UCHARAT(s) != nextchr)
2029 sayNO;
2030 if (PL_regeol - locinput < ln)
2031 sayNO;
2032 if (ln > 1 && memNE(s, locinput, ln))
2033 sayNO;
2034 locinput += ln;
2035 nextchr = UCHARAT(locinput);
2036 break;
2037 case EXACTFL:
b8c5462f
JH
2038 PL_reg_flags |= RF_tainted;
2039 /* FALL THROUGH */
d6a28714 2040 case EXACTF:
cd439c50
IZ
2041 s = STRING(scan);
2042 ln = STR_LEN(scan);
d6a28714
JH
2043
2044 if (UTF) {
2045 char *l = locinput;
2046 char *e = s + ln;
2047 c1 = OP(scan) == EXACTF;
2048 while (s < e) {
2049 if (l >= PL_regeol)
2050 sayNO;
667bb95a
JH
2051 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2052 (c1 ?
2053 toLOWER_utf8((U8*)l) :
2054 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2055 {
2056 sayNO;
2057 }
2058 s += UTF8SKIP(s);
2059 l += UTF8SKIP(l);
b8c5462f 2060 }
d6a28714 2061 locinput = l;
a0ed51b3
LW
2062 nextchr = UCHARAT(locinput);
2063 break;
2064 }
d6a28714
JH
2065
2066 /* Inline the first character, for speed. */
2067 if (UCHARAT(s) != nextchr &&
2068 UCHARAT(s) != ((OP(scan) == EXACTF)
2069 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2070 sayNO;
d6a28714 2071 if (PL_regeol - locinput < ln)
b8c5462f 2072 sayNO;
d6a28714
JH
2073 if (ln > 1 && (OP(scan) == EXACTF
2074 ? ibcmp(s, locinput, ln)
2075 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2076 sayNO;
d6a28714
JH
2077 locinput += ln;
2078 nextchr = UCHARAT(locinput);
a0d0e21e 2079 break;
d6a28714 2080 case ANYOFUTF8:
d6a28714 2081 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2082 sayNO;
d6a28714
JH
2083 if (locinput >= PL_regeol)
2084 sayNO;
2085 locinput += PL_utf8skip[nextchr];
2086 nextchr = UCHARAT(locinput);
2087 break;
2088 case ANYOF:
d6a28714 2089 if (nextchr < 0)
b8c5462f 2090 nextchr = UCHARAT(locinput);
936ed897 2091 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2092 sayNO;
2093 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2094 sayNO;
b8c5462f
JH
2095 nextchr = UCHARAT(++locinput);
2096 break;
d6a28714 2097 case ALNUML:
b8c5462f
JH
2098 PL_reg_flags |= RF_tainted;
2099 /* FALL THROUGH */
d6a28714 2100 case ALNUM:
b8c5462f 2101 if (!nextchr)
4633a7c4 2102 sayNO;
d6a28714
JH
2103 if (!(OP(scan) == ALNUM
2104 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2105 sayNO;
2106 nextchr = UCHARAT(++locinput);
bbce6d69 2107 break;
d6a28714 2108 case ALNUMLUTF8:
3280af22 2109 PL_reg_flags |= RF_tainted;
bbce6d69 2110 /* FALL THROUGH */
d6a28714 2111 case ALNUMUTF8:
b8c5462f
JH
2112 if (!nextchr)
2113 sayNO;
2114 if (nextchr & 0x80) {
d6a28714
JH
2115 if (!(OP(scan) == ALNUMUTF8
2116 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2117 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2118 {
2119 sayNO;
a0ed51b3 2120 }
b8c5462f 2121 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2122 nextchr = UCHARAT(locinput);
2123 break;
2124 }
d6a28714
JH
2125 if (!(OP(scan) == ALNUMUTF8
2126 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2127 sayNO;
b8c5462f 2128 nextchr = UCHARAT(++locinput);
a0d0e21e 2129 break;
d6a28714 2130 case NALNUML:
b8c5462f
JH
2131 PL_reg_flags |= RF_tainted;
2132 /* FALL THROUGH */
d6a28714
JH
2133 case NALNUM:
2134 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2135 sayNO;
d6a28714
JH
2136 if (OP(scan) == NALNUM
2137 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2138 sayNO;
b8c5462f 2139 nextchr = UCHARAT(++locinput);
a0ed51b3 2140 break;
d6a28714 2141 case NALNUMLUTF8:
b8c5462f
JH
2142 PL_reg_flags |= RF_tainted;
2143 /* FALL THROUGH */
d6a28714 2144 case NALNUMUTF8:
3280af22 2145 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2146 sayNO;
b8c5462f 2147 if (nextchr & 0x80) {
d6a28714
JH
2148 if (OP(scan) == NALNUMUTF8
2149 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2150 : isALNUM_LC_utf8((U8*)locinput))
2151 {
b8c5462f 2152 sayNO;
d6a28714 2153 }
b8c5462f
JH
2154 locinput += PL_utf8skip[nextchr];
2155 nextchr = UCHARAT(locinput);
2156 break;
2157 }
d6a28714
JH
2158 if (OP(scan) == NALNUMUTF8
2159 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2160 sayNO;
76e3520e 2161 nextchr = UCHARAT(++locinput);
a0d0e21e 2162 break;
d6a28714
JH
2163 case BOUNDL:
2164 case NBOUNDL:
3280af22 2165 PL_reg_flags |= RF_tainted;
bbce6d69 2166 /* FALL THROUGH */
d6a28714
JH
2167 case BOUND:
2168 case NBOUND:
2169 /* was last char in word? */
2170 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2171 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2172 ln = isALNUM(ln);
2173 n = isALNUM(nextchr);
2174 }
2175 else {
2176 ln = isALNUM_LC(ln);
2177 n = isALNUM_LC(nextchr);
2178 }
2179 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2180 sayNO;
a0d0e21e 2181 break;
d6a28714
JH
2182 case BOUNDLUTF8:
2183 case NBOUNDLUTF8:
a0ed51b3
LW
2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714
JH
2186 case BOUNDUTF8:
2187 case NBOUNDUTF8:
2188 /* was last char in word? */
667bb95a
JH
2189 if (locinput == PL_regbol)
2190 ln = PL_regprev;
2191 else {
2192 U8 *r = reghop((U8*)locinput, -1);
2193
2194 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2195 }
d6a28714
JH
2196 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2197 ln = isALNUM_uni(ln);
2198 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2199 }
d6a28714
JH
2200 else {
2201 ln = isALNUM_LC_uni(ln);
2202 n = isALNUM_LC_utf8((U8*)locinput);
2203 }
2204 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2205 sayNO;
a0ed51b3 2206 break;
d6a28714 2207 case SPACEL:
3280af22 2208 PL_reg_flags |= RF_tainted;
bbce6d69 2209 /* FALL THROUGH */
d6a28714 2210 case SPACE:
9442cb0e 2211 if (!nextchr)
4633a7c4 2212 sayNO;
d6a28714
JH
2213 if (!(OP(scan) == SPACE
2214 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2215 sayNO;
76e3520e 2216 nextchr = UCHARAT(++locinput);
a0d0e21e 2217 break;
d6a28714 2218 case SPACELUTF8:
a0ed51b3
LW
2219 PL_reg_flags |= RF_tainted;
2220 /* FALL THROUGH */
d6a28714 2221 case SPACEUTF8:
9442cb0e 2222 if (!nextchr)
a0ed51b3
LW
2223 sayNO;
2224 if (nextchr & 0x80) {
d6a28714 2225 if (!(OP(scan) == SPACEUTF8
9442cb0e 2226 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2227 : isSPACE_LC_utf8((U8*)locinput)))
2228 {
a0ed51b3 2229 sayNO;
d6a28714 2230 }
6f06b55f 2231 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2232 nextchr = UCHARAT(locinput);
2233 break;
2234 }
d6a28714
JH
2235 if (!(OP(scan) == SPACEUTF8
2236 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2237 sayNO;
2238 nextchr = UCHARAT(++locinput);
2239 break;
d6a28714 2240 case NSPACEL:
3280af22 2241 PL_reg_flags |= RF_tainted;
bbce6d69 2242 /* FALL THROUGH */
d6a28714 2243 case NSPACE:
9442cb0e 2244 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2245 sayNO;
9442cb0e 2246 if (OP(scan) == NSPACE
d6a28714 2247 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2248 sayNO;
b8c5462f 2249 nextchr = UCHARAT(++locinput);
a0d0e21e 2250 break;
d6a28714 2251 case NSPACELUTF8:
a0ed51b3
LW
2252 PL_reg_flags |= RF_tainted;
2253 /* FALL THROUGH */
d6a28714 2254 case NSPACEUTF8:
9442cb0e 2255 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2256 sayNO;
2257 if (nextchr & 0x80) {
d6a28714 2258 if (OP(scan) == NSPACEUTF8
9442cb0e 2259 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2260 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2261 {
2262 sayNO;
2263 }
2264 locinput += PL_utf8skip[nextchr];
2265 nextchr = UCHARAT(locinput);
2266 break;
a0ed51b3 2267 }
d6a28714
JH
2268 if (OP(scan) == NSPACEUTF8
2269 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2270 sayNO;
76e3520e 2271 nextchr = UCHARAT(++locinput);
a0d0e21e 2272 break;
d6a28714 2273 case DIGITL:
a0ed51b3
LW
2274 PL_reg_flags |= RF_tainted;
2275 /* FALL THROUGH */
d6a28714 2276 case DIGIT:
9442cb0e 2277 if (!nextchr)
a0ed51b3 2278 sayNO;
d6a28714
JH
2279 if (!(OP(scan) == DIGIT
2280 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2281 sayNO;
76e3520e 2282 nextchr = UCHARAT(++locinput);
a0d0e21e 2283 break;
d6a28714 2284 case DIGITLUTF8:
a0ed51b3
LW
2285 PL_reg_flags |= RF_tainted;
2286 /* FALL THROUGH */
d6a28714 2287 case DIGITUTF8:
a0ed51b3
LW
2288 if (!nextchr)
2289 sayNO;
2290 if (nextchr & 0x80) {
9442cb0e
GS
2291 if (!(OP(scan) == DIGITUTF8
2292 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2293 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2294 {
a0ed51b3 2295 sayNO;
dfe13c55 2296 }
6f06b55f 2297 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2298 nextchr = UCHARAT(locinput);
2299 break;
2300 }
9442cb0e
GS
2301 if (!(OP(scan) == DIGITUTF8
2302 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2303 sayNO;
2304 nextchr = UCHARAT(++locinput);
2305 break;
d6a28714 2306 case NDIGITL:
b8c5462f
JH
2307 PL_reg_flags |= RF_tainted;
2308 /* FALL THROUGH */
d6a28714 2309 case NDIGIT:
9442cb0e 2310 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2311 sayNO;
9442cb0e 2312 if (OP(scan) == NDIGIT
d6a28714 2313 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2314 sayNO;
76e3520e 2315 nextchr = UCHARAT(++locinput);
a0d0e21e 2316 break;
d6a28714 2317 case NDIGITLUTF8:
b8c5462f
JH
2318 PL_reg_flags |= RF_tainted;
2319 /* FALL THROUGH */
d6a28714 2320 case NDIGITUTF8:
b8c5462f
JH
2321 if (!nextchr && locinput >= PL_regeol)
2322 sayNO;
a0ed51b3 2323 if (nextchr & 0x80) {
9442cb0e
GS
2324 if (OP(scan) == NDIGITUTF8
2325 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2326 : isDIGIT_LC_utf8((U8*)locinput))
2327 {
a0ed51b3 2328 sayNO;
9442cb0e 2329 }
6f06b55f 2330 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2331 nextchr = UCHARAT(locinput);
2332 break;
2333 }
9442cb0e
GS
2334 if (OP(scan) == NDIGITUTF8
2335 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2336 sayNO;
2337 nextchr = UCHARAT(++locinput);
2338 break;
2339 case CLUMP:
dfe13c55 2340 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2341 sayNO;
6f06b55f 2342 locinput += PL_utf8skip[nextchr];
dfe13c55 2343 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2344 locinput += UTF8SKIP(locinput);
2345 if (locinput > PL_regeol)
2346 sayNO;
2347 nextchr = UCHARAT(locinput);
2348 break;
c8756f30 2349 case REFFL:
3280af22 2350 PL_reg_flags |= RF_tainted;
c8756f30 2351 /* FALL THROUGH */
c277df42 2352 case REF:
c8756f30 2353 case REFF:
c277df42 2354 n = ARG(scan); /* which paren pair */
cf93c79d 2355 ln = PL_regstartp[n];
2c2d71f5 2356 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2357 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2358 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2359 if (ln == PL_regendp[n])
a0d0e21e 2360 break;
a0ed51b3 2361
cf93c79d 2362 s = PL_bostr + ln;
a0ed51b3
LW
2363 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2364 char *l = locinput;
cf93c79d 2365 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2366 /*
2367 * Note that we can't do the "other character" lookup trick as
2368 * in the 8-bit case (no pun intended) because in Unicode we
2369 * have to map both upper and title case to lower case.
2370 */
2371 if (OP(scan) == REFF) {
2372 while (s < e) {
2373 if (l >= PL_regeol)
2374 sayNO;
dfe13c55 2375 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2376 sayNO;
2377 s += UTF8SKIP(s);
2378 l += UTF8SKIP(l);
2379 }
2380 }
2381 else {
2382 while (s < e) {
2383 if (l >= PL_regeol)
2384 sayNO;
dfe13c55 2385 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2386 sayNO;
2387 s += UTF8SKIP(s);
2388 l += UTF8SKIP(l);
2389 }
2390 }
2391 locinput = l;
2392 nextchr = UCHARAT(locinput);
2393 break;
2394 }
2395
a0d0e21e 2396 /* Inline the first character, for speed. */
76e3520e 2397 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2398 (OP(scan) == REF ||
2399 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2400 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2401 sayNO;
cf93c79d 2402 ln = PL_regendp[n] - ln;
3280af22 2403 if (locinput + ln > PL_regeol)
4633a7c4 2404 sayNO;
c8756f30
AK
2405 if (ln > 1 && (OP(scan) == REF
2406 ? memNE(s, locinput, ln)
2407 : (OP(scan) == REFF
2408 ? ibcmp(s, locinput, ln)
2409 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2410 sayNO;
a0d0e21e 2411 locinput += ln;
76e3520e 2412 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2413 break;
2414
2415 case NOTHING:
c277df42 2416 case TAIL:
a0d0e21e
LW
2417 break;
2418 case BACK:
2419 break;
c277df42
IZ
2420 case EVAL:
2421 {
2422 dSP;
533c011a 2423 OP_4tree *oop = PL_op;
3280af22
NIS
2424 COP *ocurcop = PL_curcop;
2425 SV **ocurpad = PL_curpad;
c277df42
IZ
2426 SV *ret;
2427
2428 n = ARG(scan);
533c011a 2429 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2430 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2431 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2432 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2433
cea2e8a9 2434 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2435 SPAGAIN;
2436 ret = POPs;
2437 PUTBACK;
2438
0f5d15d6
IZ
2439 PL_op = oop;
2440 PL_curpad = ocurpad;
2441 PL_curcop = ocurcop;
c277df42 2442 if (logical) {
0f5d15d6
IZ
2443 if (logical == 2) { /* Postponed subexpression. */
2444 regexp *re;
22c35a8c 2445 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2446 re_cc_state state;
0f5d15d6
IZ
2447 CHECKPOINT cp, lastcp;
2448
2449 if(SvROK(ret) || SvRMAGICAL(ret)) {
2450 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2451
2452 if(SvMAGICAL(sv))
2453 mg = mg_find(sv, 'r');
2454 }
2455 if (mg) {
2456 re = (regexp *)mg->mg_obj;
df0003d4 2457 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2458 }
2459 else {
2460 STRLEN len;
2461 char *t = SvPV(ret, len);
2462 PMOP pm;
2463 char *oprecomp = PL_regprecomp;
2464 I32 osize = PL_regsize;
2465 I32 onpar = PL_regnpar;
2466
2467 pm.op_pmflags = 0;
393fec97 2468 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2469 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2470 if (!(SvFLAGS(ret)
2471 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2472 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2473 PL_regprecomp = oprecomp;
2474 PL_regsize = osize;
2475 PL_regnpar = onpar;
2476 }
2477 DEBUG_r(
2478 PerlIO_printf(Perl_debug_log,
2479 "Entering embedded `%s%.60s%s%s'\n",
2480 PL_colors[0],
2481 re->precomp,
2482 PL_colors[1],
2483 (strlen(re->precomp) > 60 ? "..." : ""))
2484 );
2485 state.node = next;
2486 state.prev = PL_reg_call_cc;
2487 state.cc = PL_regcc;
2488 state.re = PL_reg_re;
2489
2ab05381 2490 PL_regcc = 0;
0f5d15d6
IZ
2491
2492 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2493 REGCP_SET(lastcp);
0f5d15d6
IZ
2494 cache_re(re);
2495 state.ss = PL_savestack_ix;
2496 *PL_reglastparen = 0;
2497 PL_reg_call_cc = &state;
2498 PL_reginput = locinput;
2c2d71f5
JH
2499
2500 /* XXXX This is too dramatic a measure... */
2501 PL_reg_maxiter = 0;
2502
0f5d15d6 2503 if (regmatch(re->program + 1)) {
2c914db6
IZ
2504 /* Even though we succeeded, we need to restore
2505 global variables, since we may be wrapped inside
2506 SUSPEND, thus the match may be not finished yet. */
2507
2508 /* XXXX Do this only if SUSPENDed? */
2509 PL_reg_call_cc = state.prev;
2510 PL_regcc = state.cc;
2511 PL_reg_re = state.re;
2512 cache_re(PL_reg_re);
2513
2514 /* XXXX This is too dramatic a measure... */
2515 PL_reg_maxiter = 0;
2516
2517 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2518 ReREFCNT_dec(re);
2519 regcpblow(cp);
2520 sayYES;
2521 }
0f5d15d6 2522 ReREFCNT_dec(re);
02db2b7b 2523 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2524 regcppop();
2525 PL_reg_call_cc = state.prev;
2526 PL_regcc = state.cc;
2527 PL_reg_re = state.re;
d3790889 2528 cache_re(PL_reg_re);
2c2d71f5
JH
2529
2530 /* XXXX This is too dramatic a measure... */
2531 PL_reg_maxiter = 0;
2532
0f5d15d6
IZ
2533 sayNO;
2534 }
c277df42 2535 sw = SvTRUE(ret);
0f5d15d6 2536 logical = 0;
a0ed51b3
LW
2537 }
2538 else
3280af22 2539 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2540 break;
2541 }
a0d0e21e 2542 case OPEN:
c277df42 2543 n = ARG(scan); /* which paren pair */
3280af22
NIS
2544 PL_reg_start_tmp[n] = locinput;
2545 if (n > PL_regsize)
2546 PL_regsize = n;
a0d0e21e
LW
2547 break;
2548 case CLOSE:
c277df42 2549 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2550 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2551 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2552 if (n > *PL_reglastparen)
2553 *PL_reglastparen = n;
a0d0e21e 2554 break;
c277df42
IZ
2555 case GROUPP:
2556 n = ARG(scan); /* which paren pair */
cf93c79d 2557 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2558 break;
2559 case IFTHEN:
2c2d71f5 2560 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2561 if (sw)
2562 next = NEXTOPER(NEXTOPER(scan));
2563 else {
2564 next = scan + ARG(scan);
2565 if (OP(next) == IFTHEN) /* Fake one. */
2566 next = NEXTOPER(NEXTOPER(next));
2567 }
2568 break;
2569 case LOGICAL:
0f5d15d6 2570 logical = scan->flags;
c277df42 2571 break;
2ab05381
IZ
2572/*******************************************************************
2573 PL_regcc contains infoblock about the innermost (...)* loop, and
2574 a pointer to the next outer infoblock.
2575
2576 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2577
2578 1) After matching X, regnode for CURLYX is processed;
2579
2580 2) This regnode creates infoblock on the stack, and calls
2581 regmatch() recursively with the starting point at WHILEM node;
2582
2583 3) Each hit of WHILEM node tries to match A and Z (in the order
2584 depending on the current iteration, min/max of {min,max} and
2585 greediness). The information about where are nodes for "A"
2586 and "Z" is read from the infoblock, as is info on how many times "A"
2587 was already matched, and greediness.
2588
2589 4) After A matches, the same WHILEM node is hit again.
2590
2591 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2592 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2593 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2594 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2595 of the external loop.
2596
2597 Currently present infoblocks form a tree with a stem formed by PL_curcc
2598 and whatever it mentions via ->next, and additional attached trees
2599 corresponding to temporarily unset infoblocks as in "5" above.
2600
2601 In the following picture infoblocks for outer loop of
2602 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2603 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2604 infoblocks are drawn below the "reset" infoblock.
2605
2606 In fact in the picture below we do not show failed matches for Z and T
2607 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2608 more obvious *why* one needs to *temporary* unset infoblocks.]
2609
2610 Matched REx position InfoBlocks Comment
2611 (Y(A)*?Z)*?T x
2612 Y(A)*?Z)*?T x <- O
2613 Y (A)*?Z)*?T x <- O
2614 Y A)*?Z)*?T x <- O <- I
2615 YA )*?Z)*?T x <- O <- I
2616 YA A)*?Z)*?T x <- O <- I
2617 YAA )*?Z)*?T x <- O <- I
2618 YAA Z)*?T x <- O # Temporary unset I
2619 I
2620
2621 YAAZ Y(A)*?Z)*?T x <- O
2622 I
2623
2624 YAAZY (A)*?Z)*?T x <- O
2625 I
2626
2627 YAAZY A)*?Z)*?T x <- O <- I
2628 I
2629
2630 YAAZYA )*?Z)*?T x <- O <- I
2631 I
2632
2633 YAAZYA Z)*?T x <- O # Temporary unset I
2634 I,I
2635
2636 YAAZYAZ )*?T x <- O
2637 I,I
2638
2639 YAAZYAZ T x # Temporary unset O
2640 O
2641 I,I
2642
2643 YAAZYAZT x
2644 O
2645 I,I
2646 *******************************************************************/
a0d0e21e
LW
2647 case CURLYX: {
2648 CURCUR cc;
3280af22 2649 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2650 /* No need to save/restore up to this paren */
2651 I32 parenfloor = scan->flags;
c277df42
IZ
2652
2653 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2654 next += ARG(next);
3280af22
NIS
2655 cc.oldcc = PL_regcc;
2656 PL_regcc = &cc;
cb434fcc
IZ
2657 /* XXXX Probably it is better to teach regpush to support
2658 parenfloor > PL_regsize... */
2659 if (parenfloor > *PL_reglastparen)
2660 parenfloor = *PL_reglastparen; /* Pessimization... */
2661 cc.parenfloor = parenfloor;
a0d0e21e
LW
2662 cc.cur = -1;
2663 cc.min = ARG1(scan);
2664 cc.max = ARG2(scan);
c277df42 2665 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2666 cc.next = next;
2667 cc.minmod = minmod;
2668 cc.lastloc = 0;
3280af22 2669 PL_reginput = locinput;
a0d0e21e
LW
2670 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2671 regcpblow(cp);
3280af22 2672 PL_regcc = cc.oldcc;
4633a7c4 2673 saySAME(n);
a0d0e21e
LW
2674 }
2675 /* NOT REACHED */
2676 case WHILEM: {
2677 /*
2678 * This is really hard to understand, because after we match
2679 * what we're trying to match, we must make sure the rest of
2c2d71f5 2680 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2681 * to go back UP the parse tree by recursing ever deeper. And
2682 * if it fails, we have to reset our parent's current state
2683 * that we can try again after backing off.
2684 */
2685
c277df42 2686 CHECKPOINT cp, lastcp;
3280af22 2687 CURCUR* cc = PL_regcc;
c277df42
IZ
2688 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2689
4633a7c4 2690 n = cc->cur + 1; /* how many we know we matched */
3280af22 2691 PL_reginput = locinput;
a0d0e21e 2692
c277df42
IZ
2693 DEBUG_r(
2694 PerlIO_printf(Perl_debug_log,
2695 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2696 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2697 (long)n, (long)cc->min,
2698 (long)cc->max, (long)cc)
2699 );
4633a7c4 2700
a0d0e21e
LW
2701 /* If degenerate scan matches "", assume scan done. */
2702
579cf2c3 2703 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2704 PL_regcc = cc->oldcc;
2ab05381
IZ
2705 if (PL_regcc)
2706 ln = PL_regcc->cur;
c277df42 2707 DEBUG_r(
c3464db5
DD
2708 PerlIO_printf(Perl_debug_log,
2709 "%*s empty match detected, try continuation...\n",
3280af22 2710 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2711 );
a0d0e21e 2712 if (regmatch(cc->next))
4633a7c4 2713 sayYES;
2ab05381
IZ
2714 if (PL_regcc)
2715 PL_regcc->cur = ln;
3280af22 2716 PL_regcc = cc;
4633a7c4 2717 sayNO;
a0d0e21e
LW
2718 }
2719
2720 /* First just match a string of min scans. */
2721
2722 if (n < cc->min) {
2723 cc->cur = n;
2724 cc->lastloc = locinput;
4633a7c4
LW
2725 if (regmatch(cc->scan))
2726 sayYES;
2727 cc->cur = n - 1;
c277df42 2728 cc->lastloc = lastloc;
4633a7c4 2729 sayNO;
a0d0e21e
LW
2730 }
2731
2c2d71f5
JH
2732 if (scan->flags) {
2733 /* Check whether we already were at this position.
2734 Postpone detection until we know the match is not
2735 *that* much linear. */
2736 if (!PL_reg_maxiter) {
2737 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2738 PL_reg_leftiter = PL_reg_maxiter;
2739 }
2740 if (PL_reg_leftiter-- == 0) {
2741 I32 size = (PL_reg_maxiter + 7)/8;
2742 if (PL_reg_poscache) {
2743 if (PL_reg_poscache_size < size) {
2744 Renew(PL_reg_poscache, size, char);
2745 PL_reg_poscache_size = size;
2746 }
2747 Zero(PL_reg_poscache, size, char);
2748 }
2749 else {
2750 PL_reg_poscache_size = size;
2751 Newz(29, PL_reg_poscache, size, char);
2752 }
2753 DEBUG_r(
2754 PerlIO_printf(Perl_debug_log,
2755 "%sDetected a super-linear match, switching on caching%s...\n",
2756 PL_colors[4], PL_colors[5])
2757 );
2758 }
2759 if (PL_reg_leftiter < 0) {
2760 I32 o = locinput - PL_bostr, b;
2761
2762 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2763 b = o % 8;
2764 o /= 8;
2765 if (PL_reg_poscache[o] & (1<<b)) {
2766 DEBUG_r(
2767 PerlIO_printf(Perl_debug_log,
2768 "%*s already tried at this position...\n",
2769 REPORT_CODE_OFF+PL_regindent*2, "")
2770 );
7821416a 2771 sayNO_SILENT;
2c2d71f5
JH
2772 }
2773 PL_reg_poscache[o] |= (1<<b);
2774 }
2775 }
2776
a0d0e21e
LW
2777 /* Prefer next over scan for minimal matching. */
2778
2779 if (cc->minmod) {
3280af22 2780 PL_regcc = cc->oldcc;
2ab05381
IZ
2781 if (PL_regcc)
2782 ln = PL_regcc->cur;
5f05dabc 2783 cp = regcppush(cc->parenfloor);
02db2b7b 2784 REGCP_SET(lastcp);
5f05dabc 2785 if (regmatch(cc->next)) {
c277df42 2786 regcpblow(cp);
4633a7c4 2787 sayYES; /* All done. */
5f05dabc 2788 }
02db2b7b 2789 REGCP_UNWIND(lastcp);
5f05dabc 2790 regcppop();
2ab05381
IZ
2791 if (PL_regcc)
2792 PL_regcc->cur = ln;
3280af22 2793 PL_regcc = cc;
a0d0e21e 2794
c277df42 2795 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2796 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2797 && !(PL_reg_flags & RF_warned)) {
2798 PL_reg_flags |= RF_warned;
e476b1b5 2799 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2800 "Complex regular subexpression recursion",
2801 REG_INFTY - 1);
c277df42 2802 }
4633a7c4 2803 sayNO;
c277df42 2804 }
a687059c 2805
c277df42 2806 DEBUG_r(
c3464db5
DD
2807 PerlIO_printf(Perl_debug_log,
2808 "%*s trying longer...\n",
3280af22 2809 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2810 );
a0d0e21e 2811 /* Try scanning more and see if it helps. */
3280af22 2812 PL_reginput = locinput;
a0d0e21e
LW
2813 cc->cur = n;
2814 cc->lastloc = locinput;
5f05dabc 2815 cp = regcppush(cc->parenfloor);
02db2b7b 2816 REGCP_SET(lastcp);
5f05dabc 2817 if (regmatch(cc->scan)) {
c277df42 2818 regcpblow(cp);
4633a7c4 2819 sayYES;
5f05dabc 2820 }
02db2b7b 2821 REGCP_UNWIND(lastcp);
5f05dabc 2822 regcppop();
4633a7c4 2823 cc->cur = n - 1;
c277df42 2824 cc->lastloc = lastloc;
4633a7c4 2825 sayNO;
a0d0e21e
LW
2826 }
2827
2828 /* Prefer scan over next for maximal matching. */
2829
2830 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2831 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2832 cc->cur = n;
2833 cc->lastloc = locinput;
02db2b7b 2834 REGCP_SET(lastcp);
5f05dabc 2835 if (regmatch(cc->scan)) {
c277df42 2836 regcpblow(cp);
4633a7c4 2837 sayYES;
5f05dabc 2838 }
02db2b7b 2839 REGCP_UNWIND(lastcp);
a0d0e21e 2840 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2841 PL_reginput = locinput;
c277df42 2842 DEBUG_r(
c3464db5
DD
2843 PerlIO_printf(Perl_debug_log,
2844 "%*s failed, try continuation...\n",
3280af22 2845 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2846 );
2847 }
e476b1b5 2848 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2849 && !(PL_reg_flags & RF_warned)) {
3280af22 2850 PL_reg_flags |= RF_warned;
e476b1b5 2851 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2852 "Complex regular subexpression recursion",
2853 REG_INFTY - 1);
a0d0e21e
LW
2854 }
2855
2856 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2857 PL_regcc = cc->oldcc;
2ab05381
IZ
2858 if (PL_regcc)
2859 ln = PL_regcc->cur;
a0d0e21e 2860 if (regmatch(cc->next))
4633a7c4 2861 sayYES;
2ab05381
IZ
2862 if (PL_regcc)
2863 PL_regcc->cur = ln;
3280af22 2864 PL_regcc = cc;
4633a7c4 2865 cc->cur = n - 1;
c277df42 2866 cc->lastloc = lastloc;
4633a7c4 2867 sayNO;
a0d0e21e
LW
2868 }
2869 /* NOT REACHED */
c277df42
IZ
2870 case BRANCHJ:
2871 next = scan + ARG(scan);
2872 if (next == scan)
2873 next = NULL;
2874 inner = NEXTOPER(NEXTOPER(scan));
2875 goto do_branch;
2876 case BRANCH:
2877 inner = NEXTOPER(scan);
2878 do_branch:
2879 {
2880 CHECKPOINT lastcp;
2881 c1 = OP(scan);
2882 if (OP(next) != c1) /* No choice. */
2883 next = inner; /* Avoid recursion. */
a0d0e21e 2884 else {
02db2b7b
IZ
2885 I32 lastparen = *PL_reglastparen;
2886 I32 unwind1;
2887 re_unwind_branch_t *uw;
2888
2889 /* Put unwinding data on stack */
2890 unwind1 = SSNEWt(1,re_unwind_branch_t);
2891 uw = SSPTRt(unwind1,re_unwind_branch_t);
2892 uw->prev = unwind;
2893 unwind = unwind1;
2894 uw->type = ((c1 == BRANCH)
2895 ? RE_UNWIND_BRANCH
2896 : RE_UNWIND_BRANCHJ);
2897 uw->lastparen = lastparen;
2898 uw->next = next;
2899 uw->locinput = locinput;
2900 uw->nextchr = nextchr;
2901#ifdef DEBUGGING
2902 uw->regindent = ++PL_regindent;
2903#endif
c277df42 2904
02db2b7b
IZ
2905 REGCP_SET(uw->lastcp);
2906
2907 /* Now go into the first branch */
2908 next = inner;
a687059c 2909 }
a0d0e21e
LW
2910 }
2911 break;
2912 case MINMOD:
2913 minmod = 1;
2914 break;
c277df42
IZ
2915 case CURLYM:
2916 {
00db4c45 2917 I32 l = 0;
c277df42
IZ
2918 CHECKPOINT lastcp;
2919
2920 /* We suppose that the next guy does not need
2921 backtracking: in particular, it is of constant length,
2922 and has no parenths to influence future backrefs. */
2923 ln = ARG1(scan); /* min to match */
2924 n = ARG2(scan); /* max to match */
c277df42
IZ
2925 paren = scan->flags;
2926 if (paren) {
3280af22
NIS
2927 if (paren > PL_regsize)
2928 PL_regsize = paren;
2929 if (paren > *PL_reglastparen)
2930 *PL_reglastparen = paren;
c277df42 2931 }
dc45a647 2932 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2933 if (paren)
2934 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2935 PL_reginput = locinput;
c277df42
IZ
2936 if (minmod) {
2937 minmod = 0;
2938 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2939 sayNO;
5f4b28b2 2940 if (ln && l == 0 && n >= ln
c277df42
IZ
2941 /* In fact, this is tricky. If paren, then the
2942 fact that we did/didnot match may influence
2943 future execution. */
2944 && !(paren && ln == 0))
2945 ln = n;
3280af22 2946 locinput = PL_reginput;
22c35a8c 2947 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2948 c1 = (U8)*STRING(next);
c277df42 2949 if (OP(next) == EXACTF)
22c35a8c 2950 c2 = PL_fold[c1];
c277df42 2951 else if (OP(next) == EXACTFL)
22c35a8c 2952 c2 = PL_fold_locale[c1];
c277df42
IZ
2953 else
2954 c2 = c1;
a0ed51b3
LW
2955 }
2956 else
c277df42 2957 c1 = c2 = -1000;
02db2b7b 2958 REGCP_SET(lastcp);
5f4b28b2 2959 /* This may be improved if l == 0. */
c277df42
IZ
2960 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2961 /* If it could work, try it. */
2962 if (c1 == -1000 ||
3280af22
NIS
2963 UCHARAT(PL_reginput) == c1 ||
2964 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2965 {
2966 if (paren) {
2967 if (n) {
cf93c79d
IZ
2968 PL_regstartp[paren] =
2969 HOPc(PL_reginput, -l) - PL_bostr;
2970 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2971 }
2972 else
cf93c79d 2973 PL_regendp[paren] = -1;
c277df42
IZ
2974 }
2975 if (regmatch(next))
2976 sayYES;
02db2b7b 2977 REGCP_UNWIND(lastcp);
c277df42
IZ
2978 }
2979 /* Couldn't or didn't -- move forward. */
3280af22 2980 PL_reginput = locinput;
c277df42
IZ
2981 if (regrepeat_hard(scan, 1, &l)) {
2982 ln++;
3280af22 2983 locinput = PL_reginput;
c277df42
IZ
2984 }
2985 else
2986 sayNO;
2987 }
a0ed51b3
LW
2988 }
2989 else {
c277df42
IZ
2990 n = regrepeat_hard(scan, n, &l);
2991 if (n != 0 && l == 0
2992 /* In fact, this is tricky. If paren, then the
2993 fact that we did/didnot match may influence
2994 future execution. */
2995 && !(paren && ln == 0))
2996 ln = n;
3280af22 2997 locinput = PL_reginput;
c277df42 2998 DEBUG_r(
5c0ca799 2999 PerlIO_printf(Perl_debug_log,
faccc32b 3000 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3001 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3002 (IV) n, (IV)l)
c277df42
IZ
3003 );
3004 if (n >= ln) {
22c35a8c 3005 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3006 c1 = (U8)*STRING(next);
c277df42 3007 if (OP(next) == EXACTF)
22c35a8c 3008 c2 = PL_fold[c1];
c277df42 3009 else if (OP(next) == EXACTFL)
22c35a8c 3010 c2 = PL_fold_locale[c1];
c277df42
IZ
3011 else
3012 c2 = c1;
a0ed51b3
LW
3013 }
3014 else
c277df42
IZ
3015 c1 = c2 = -1000;
3016 }
02db2b7b 3017 REGCP_SET(lastcp);
c277df42
IZ
3018 while (n >= ln) {
3019 /* If it could work, try it. */
3020 if (c1 == -1000 ||
3280af22
NIS
3021 UCHARAT(PL_reginput) == c1 ||
3022 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3023 {
3024 DEBUG_r(
c3464db5 3025 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3026 "%*s trying tail with n=%"IVdf"...\n",
3027 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3028 );
3029 if (paren) {
3030 if (n) {
cf93c79d
IZ
3031 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3032 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3033 }
a0ed51b3 3034 else
cf93c79d 3035 PL_regendp[paren] = -1;
c277df42 3036 }
a0ed51b3
LW
3037 if (regmatch(next))
3038 sayYES;
02db2b7b 3039 REGCP_UNWIND(lastcp);
a0ed51b3 3040 }
c277df42
IZ
3041 /* Couldn't or didn't -- back up. */
3042 n--;
dfe13c55 3043 locinput = HOPc(locinput, -l);
3280af22 3044 PL_reginput = locinput;
c277df42
IZ
3045 }
3046 }
3047 sayNO;
3048 break;
3049 }
3050 case CURLYN:
3051 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3052 if (paren > PL_regsize)
3053 PL_regsize = paren;
3054 if (paren > *PL_reglastparen)
3055 *PL_reglastparen = paren;
c277df42
IZ
3056 ln = ARG1(scan); /* min to match */
3057 n = ARG2(scan); /* max to match */
dc45a647 3058 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3059 goto repeat;
a0d0e21e 3060 case CURLY:
c277df42 3061 paren = 0;
a0d0e21e
LW
3062 ln = ARG1(scan); /* min to match */
3063 n = ARG2(scan); /* max to match */
dc45a647 3064 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3065 goto repeat;
3066 case STAR:
3067 ln = 0;
c277df42 3068 n = REG_INFTY;
a0d0e21e 3069 scan = NEXTOPER(scan);
c277df42 3070 paren = 0;
a0d0e21e
LW
3071 goto repeat;
3072 case PLUS:
c277df42
IZ
3073 ln = 1;
3074 n = REG_INFTY;
3075 scan = NEXTOPER(scan);
3076 paren = 0;
3077 repeat:
a0d0e21e
LW
3078 /*
3079 * Lookahead to avoid useless match attempts
3080 * when we know what character comes next.
3081 */
22c35a8c 3082 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3083 c1 = (U8)*STRING(next);
bbce6d69 3084 if (OP(next) == EXACTF)
22c35a8c 3085 c2 = PL_fold[c1];
bbce6d69 3086 else if (OP(next) == EXACTFL)
22c35a8c 3087 c2 = PL_fold_locale[c1];
bbce6d69 3088 else
3089 c2 = c1;
3090 }
a0d0e21e 3091 else
bbce6d69 3092 c1 = c2 = -1000;
3280af22 3093 PL_reginput = locinput;
a0d0e21e 3094 if (minmod) {
c277df42 3095 CHECKPOINT lastcp;
a0d0e21e
LW
3096 minmod = 0;
3097 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3098 sayNO;
a0ed51b3 3099 locinput = PL_reginput;
02db2b7b 3100 REGCP_SET(lastcp);
0fe9bf95
IZ
3101 if (c1 != -1000) {
3102 char *e = locinput + n - ln; /* Should not check after this */
3103 char *old = locinput;
3104
3105 if (e >= PL_regeol || (n == REG_INFTY))
3106 e = PL_regeol - 1;
3107 while (1) {
3108 /* Find place 'next' could work */
3109 if (c1 == c2) {
3110 while (locinput <= e && *locinput != c1)
3111 locinput++;
3112 } else {
3113 while (locinput <= e
3114 && *locinput != c1
3115 && *locinput != c2)
3116 locinput++;
3117 }
3118 if (locinput > e)
3119 sayNO;
3120 /* PL_reginput == old now */
3121 if (locinput != old) {
3122 ln = 1; /* Did some */
3123 if (regrepeat(scan, locinput - old) <
3124 locinput - old)
3125 sayNO;
3126 }
3127 /* PL_reginput == locinput now */
29d1e993 3128 TRYPAREN(paren, ln, locinput);
0fe9bf95 3129 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3130 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3131 /* Couldn't or didn't -- move forward. */
3132 old = locinput++;
3133 }
3134 }
3135 else
c277df42 3136 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3137 /* If it could work, try it. */
bbce6d69 3138 if (c1 == -1000 ||
3280af22
NIS
3139 UCHARAT(PL_reginput) == c1 ||
3140 UCHARAT(PL_reginput) == c2)
bbce6d69 3141 {
29d1e993 3142 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3143 REGCP_UNWIND(lastcp);
bbce6d69 3144 }
c277df42 3145 /* Couldn't or didn't -- move forward. */
a0ed51b3 3146 PL_reginput = locinput;
a0d0e21e
LW
3147 if (regrepeat(scan, 1)) {
3148 ln++;
a0ed51b3
LW
3149 locinput = PL_reginput;
3150 }
3151 else
4633a7c4 3152 sayNO;
a0d0e21e
LW
3153 }
3154 }
3155 else {
c277df42 3156 CHECKPOINT lastcp;
a0d0e21e 3157 n = regrepeat(scan, n);
a0ed51b3 3158 locinput = PL_reginput;
22c35a8c 3159 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3160 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3161 ln = n; /* why back off? */
1aeab75a
GS
3162 /* ...because $ and \Z can match before *and* after
3163 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3164 We should back off by one in this case. */
3165 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3166 ln--;
3167 }
02db2b7b 3168 REGCP_SET(lastcp);
c277df42
IZ
3169 if (paren) {
3170 while (n >= ln) {
3171 /* If it could work, try it. */
3172 if (c1 == -1000 ||
3280af22
NIS
3173 UCHARAT(PL_reginput) == c1 ||
3174 UCHARAT(PL_reginput) == c2)
c277df42 3175 {
29d1e993 3176 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3177 REGCP_UNWIND(lastcp);
c277df42
IZ
3178 }
3179 /* Couldn't or didn't -- back up. */
3180 n--;
dfe13c55 3181 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3182 }
a0ed51b3
LW
3183 }
3184 else {
c277df42
IZ
3185 while (n >= ln) {
3186 /* If it could work, try it. */
3187 if (c1 == -1000 ||
3280af22
NIS
3188 UCHARAT(PL_reginput) == c1 ||
3189 UCHARAT(PL_reginput) == c2)
c277df42 3190 {
29d1e993 3191 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3192 REGCP_UNWIND(lastcp);
c277df42
IZ
3193 }
3194 /* Couldn't or didn't -- back up. */
3195 n--;
dfe13c55 3196 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3197 }
a0d0e21e
LW
3198 }
3199 }
4633a7c4 3200 sayNO;
c277df42 3201 break;
a0d0e21e 3202 case END:
0f5d15d6
IZ
3203 if (PL_reg_call_cc) {
3204 re_cc_state *cur_call_cc = PL_reg_call_cc;
3205 CURCUR *cctmp = PL_regcc;
3206 regexp *re = PL_reg_re;
3207 CHECKPOINT cp, lastcp;
3208
3209 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3210 REGCP_SET(lastcp);
0f5d15d6
IZ
3211 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3212 the caller. */
3213 PL_reginput = locinput; /* Make position available to
3214 the callcc. */
3215 cache_re(PL_reg_call_cc->re);
3216 PL_regcc = PL_reg_call_cc->cc;
3217 PL_reg_call_cc = PL_reg_call_cc->prev;
3218 if (regmatch(cur_call_cc->node)) {
3219 PL_reg_call_cc = cur_call_cc;
3220 regcpblow(cp);
3221 sayYES;
3222 }
02db2b7b 3223 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3224 regcppop();
3225 PL_reg_call_cc = cur_call_cc;
3226 PL_regcc = cctmp;
3227 PL_reg_re = re;
3228 cache_re(re);
3229
3230 DEBUG_r(
3231 PerlIO_printf(Perl_debug_log,
3232 "%*s continuation failed...\n",
3233 REPORT_CODE_OFF+PL_regindent*2, "")
3234 );
7821416a 3235 sayNO_SILENT;
0f5d15d6 3236 }
7821416a
IZ
3237 if (locinput < PL_regtill) {
3238 DEBUG_r(PerlIO_printf(Perl_debug_log,
3239 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3240 PL_colors[4],
3241 (long)(locinput - PL_reg_starttry),
3242 (long)(PL_regtill - PL_reg_starttry),
3243 PL_colors[5]));
3244 sayNO_FINAL; /* Cannot match: too short. */
3245 }
3246 PL_reginput = locinput; /* put where regtry can find it */
3247 sayYES_FINAL; /* Success! */
7e5428c5 3248 case SUCCEED:
3280af22 3249 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3250 sayYES_LOUD; /* Success! */
c277df42
IZ
3251 case SUSPEND:
3252 n = 1;
9fe1d20c 3253 PL_reginput = locinput;
c277df42 3254 goto do_ifmatch;
a0d0e21e 3255 case UNLESSM:
c277df42 3256 n = 0;
a0ed51b3 3257 if (scan->flags) {
0fe9bf95
IZ
3258 if (UTF) { /* XXXX This is absolutely
3259 broken, we read before
3260 start of string. */
3261 s = HOPMAYBEc(locinput, -scan->flags);
3262 if (!s)
3263 goto say_yes;
3264 PL_reginput = s;
3265 }
3266 else {
3267 if (locinput < PL_bostr + scan->flags)
3268 goto say_yes;
3269 PL_reginput = locinput - scan->flags;
3270 goto do_ifmatch;
3271 }
a0ed51b3
LW
3272 }
3273 else
3274 PL_reginput = locinput;
c277df42
IZ
3275 goto do_ifmatch;
3276 case IFMATCH:
3277 n = 1;
a0ed51b3 3278 if (scan->flags) {
0fe9bf95
IZ
3279 if (UTF) { /* XXXX This is absolutely
3280 broken, we read before
3281 start of string. */
3282 s = HOPMAYBEc(locinput, -scan->flags);
3283 if (!s || s < PL_bostr)
3284 goto say_no;
3285 PL_reginput = s;
3286 }
3287 else {
3288 if (locinput < PL_bostr + scan->flags)
3289 goto say_no;
3290 PL_reginput = locinput - scan->flags;
3291 goto do_ifmatch;
3292 }
a0ed51b3
LW
3293 }
3294 else
3295 PL_reginput = locinput;
3296
c277df42 3297 do_ifmatch:
c277df42
IZ
3298 inner = NEXTOPER(NEXTOPER(scan));
3299 if (regmatch(inner) != n) {
3300 say_no:
3301 if (logical) {
3302 logical = 0;
3303 sw = 0;
3304 goto do_longjump;
a0ed51b3
LW
3305 }
3306 else
c277df42
IZ
3307 sayNO;
3308 }
3309 say_yes:
3310 if (logical) {
3311 logical = 0;
3312 sw = 1;
3313 }
fe44a5e8 3314 if (OP(scan) == SUSPEND) {
3280af22 3315 locinput = PL_reginput;
565764a8 3316 nextchr = UCHARAT(locinput);
fe44a5e8 3317 }
c277df42
IZ
3318 /* FALL THROUGH. */
3319 case LONGJMP:
3320 do_longjump:
3321 next = scan + ARG(scan);
3322 if (next == scan)
3323 next = NULL;
a0d0e21e
LW
3324 break;
3325 default:
b900a521 3326 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3327 PTR2UV(scan), OP(scan));
cea2e8a9 3328 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3329 }
02db2b7b 3330 reenter:
a0d0e21e
LW
3331 scan = next;
3332 }
a687059c 3333
a0d0e21e
LW
3334 /*
3335 * We get here only if there's trouble -- normally "case END" is
3336 * the terminating point.
3337 */
cea2e8a9 3338 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3339 /*NOTREACHED*/
4633a7c4
LW
3340 sayNO;
3341
7821416a
IZ
3342yes_loud:
3343 DEBUG_r(
3344 PerlIO_printf(Perl_debug_log,
3345 "%*s %scould match...%s\n",
3346 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3347 );
3348 goto yes;
3349yes_final:
3350 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3351 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3352yes:
3353#ifdef DEBUGGING
3280af22 3354 PL_regindent--;
4633a7c4 3355#endif
02db2b7b
IZ
3356
3357#if 0 /* Breaks $^R */
3358 if (unwind)
3359 regcpblow(firstcp);
3360#endif
4633a7c4
LW
3361 return 1;
3362
3363no:
7821416a
IZ
3364 DEBUG_r(
3365 PerlIO_printf(Perl_debug_log,
3366 "%*s %sfailed...%s\n",
3367 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3368 );
3369 goto do_no;
3370no_final:
3371do_no:
02db2b7b
IZ
3372 if (unwind) {
3373 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3374
3375 switch (uw->type) {
3376 case RE_UNWIND_BRANCH:
3377 case RE_UNWIND_BRANCHJ:
3378 {
3379 re_unwind_branch_t *uwb = &(uw->branch);
3380 I32 lastparen = uwb->lastparen;
3381
3382 REGCP_UNWIND(uwb->lastcp);
3383 for (n = *PL_reglastparen; n > lastparen; n--)
3384 PL_regendp[n] = -1;
3385 *PL_reglastparen = n;
3386 scan = next = uwb->next;
3387 if ( !scan ||
3388 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3389 ? BRANCH : BRANCHJ) ) { /* Failure */
3390 unwind = uwb->prev;
3391#ifdef DEBUGGING
3392 PL_regindent--;
3393#endif
3394 goto do_no;
3395 }
3396 /* Have more choice yet. Reuse the same uwb. */
3397 /*SUPPRESS 560*/
3398 if ((n = (uwb->type == RE_UNWIND_BRANCH
3399 ? NEXT_OFF(next) : ARG(next))))
3400 next += n;
3401 else
3402 next = NULL; /* XXXX Needn't unwinding in this case... */
3403 uwb->next = next;
3404 next = NEXTOPER(scan);
3405 if (uwb->type == RE_UNWIND_BRANCHJ)
3406 next = NEXTOPER(next);
3407 locinput = uwb->locinput;
3408 nextchr = uwb->nextchr;
3409#ifdef DEBUGGING
3410 PL_regindent = uwb->regindent;
3411#endif
3412
3413 goto reenter;
3414 }
3415 /* NOT REACHED */
3416 default:
3417 Perl_croak(aTHX_ "regexp unwind memory corruption");
3418 }
3419 /* NOT REACHED */
3420 }
4633a7c4 3421#ifdef DEBUGGING
3280af22 3422 PL_regindent--;
4633a7c4 3423#endif
a0d0e21e 3424 return 0;
a687059c
LW
3425}
3426
3427/*
3428 - regrepeat - repeatedly match something simple, report how many
3429 */
3430/*
3431 * [This routine now assumes that it will only match on things of length 1.
3432 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3433 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3434 */
76e3520e 3435STATIC I32
cea2e8a9 3436S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3437{
5c0ca799 3438 dTHR;
a0d0e21e 3439 register char *scan;
a0d0e21e 3440 register I32 c;
3280af22 3441 register char *loceol = PL_regeol;
a0ed51b3 3442 register I32 hardcount = 0;
a0d0e21e 3443
3280af22 3444 scan = PL_reginput;
c277df42 3445 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3446 loceol = scan + max;
a0d0e21e 3447 switch (OP(p)) {
22c35a8c 3448 case REG_ANY:
a0d0e21e
LW
3449 while (scan < loceol && *scan != '\n')
3450 scan++;
3451 break;
3452 case SANY:
3453 scan = loceol;
3454 break;
a0ed51b3
LW
3455 case ANYUTF8:
3456 loceol = PL_regeol;
3457 while (scan < loceol && *scan != '\n') {
3458 scan += UTF8SKIP(scan);
3459 hardcount++;
3460 }
3461 break;
3462 case SANYUTF8:
3463 loceol = PL_regeol;
3464 while (scan < loceol) {
3465 scan += UTF8SKIP(scan);
3466 hardcount++;
3467 }
3468 break;
bbce6d69 3469 case EXACT: /* length of string is 1 */
cd439c50 3470 c = (U8)*STRING(p);
bbce6d69 3471 while (scan < loceol && UCHARAT(scan) == c)
3472 scan++;
3473 break;
3474 case EXACTF: /* length of string is 1 */
cd439c50 3475 c = (U8)*STRING(p);
bbce6d69 3476 while (scan < loceol &&
22c35a8c 3477 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3478 scan++;
3479 break;
3480 case EXACTFL: /* length of string is 1 */
3280af22 3481 PL_reg_flags |= RF_tainted;
cd439c50 3482 c = (U8)*STRING(p);
bbce6d69 3483 while (scan < loceol &&
22c35a8c 3484 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3485 scan++;
3486 break;
a0ed51b3
LW
3487 case ANYOFUTF8:
3488 loceol = PL_regeol;
3489 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3490 scan += UTF8SKIP(scan);
3491 hardcount++;
3492 }
3493 break;
a0d0e21e 3494 case ANYOF:
936ed897 3495 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3496 scan++;
a0d0e21e
LW
3497 break;
3498 case ALNUM:
3499 while (scan < loceol && isALNUM(*scan))
3500 scan++;
3501 break;
a0ed51b3
LW
3502 case ALNUMUTF8:
3503 loceol = PL_regeol;
dfe13c55 3504 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3505 scan += UTF8SKIP(scan);
3506 hardcount++;
3507 }
3508 break;
bbce6d69 3509 case ALNUML:
3280af22 3510 PL_reg_flags |= RF_tainted;
bbce6d69 3511 while (scan < loceol && isALNUM_LC(*scan))
3512 scan++;
3513 break;
a0ed51b3
LW
3514 case ALNUMLUTF8:
3515 PL_reg_flags |= RF_tainted;
3516 loceol = PL_regeol;
dfe13c55 3517 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3518 scan += UTF8SKIP(scan);
3519 hardcount++;
3520 }
3521 break;
3522 break;
a0d0e21e
LW
3523 case NALNUM:
3524 while (scan < loceol && !isALNUM(*scan))
3525 scan++;
3526 break;
a0ed51b3
LW
3527 case NALNUMUTF8:
3528 loceol = PL_regeol;
dfe13c55 3529 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3530 scan += UTF8SKIP(scan);
3531 hardcount++;
3532 }
3533 break;
bbce6d69 3534 case NALNUML:
3280af22 3535 PL_reg_flags |= RF_tainted;
bbce6d69 3536 while (scan < loceol && !isALNUM_LC(*scan))
3537 scan++;
3538 break;
a0ed51b3
LW
3539 case NALNUMLUTF8:
3540 PL_reg_flags |= RF_tainted;
3541 loceol = PL_regeol;
dfe13c55 3542 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3543 scan += UTF8SKIP(scan);
3544 hardcount++;
3545 }
3546 break;
a0d0e21e
LW
3547 case SPACE:
3548 while (scan < loceol && isSPACE(*scan))
3549 scan++;
3550 break;
a0ed51b3
LW
3551 case SPACEUTF8:
3552 loceol = PL_regeol;
dfe13c55 3553 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3554 scan += UTF8SKIP(scan);
3555 hardcount++;
3556 }
3557 break;
bbce6d69 3558 case SPACEL:
3280af22 3559 PL_reg_flags |= RF_tainted;
bbce6d69 3560 while (scan < loceol && isSPACE_LC(*scan))
3561 scan++;
3562 break;
a0ed51b3
LW
3563 case SPACELUTF8:
3564 PL_reg_flags |= RF_tainted;
3565 loceol = PL_regeol;
dfe13c55 3566 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3567 scan += UTF8SKIP(scan);
3568 hardcount++;
3569 }
3570 break;
a0d0e21e
LW
3571 case NSPACE:
3572 while (scan < loceol && !isSPACE(*scan))
3573 scan++;
3574 break;
a0ed51b3
LW
3575 case NSPACEUTF8:
3576 loceol = PL_regeol;
dfe13c55 3577 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3578 scan += UTF8SKIP(scan);
3579 hardcount++;
3580 }
3581 break;
bbce6d69 3582 case NSPACEL:
3280af22 3583 PL_reg_flags |= RF_tainted;
bbce6d69 3584 while (scan < loceol && !isSPACE_LC(*scan))
3585 scan++;
3586 break;
a0ed51b3
LW
3587 case NSPACELUTF8:
3588 PL_reg_flags |= RF_tainted;
3589 loceol = PL_regeol;
dfe13c55 3590 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3591 scan += UTF8SKIP(scan);
3592 hardcount++;
3593 }
3594 break;
a0d0e21e
LW
3595 case DIGIT:
3596 while (scan < loceol && isDIGIT(*scan))
3597 scan++;
3598 break;
a0ed51b3
LW
3599 case DIGITUTF8:
3600 loceol = PL_regeol;
dfe13c55 3601 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3602 scan += UTF8SKIP(scan);
3603 hardcount++;
3604 }
3605 break;
3606 break;
a0d0e21e
LW
3607 case NDIGIT:
3608 while (scan < loceol && !isDIGIT(*scan))
3609 scan++;
3610 break;
a0ed51b3
LW
3611 case NDIGITUTF8:
3612 loceol = PL_regeol;
dfe13c55 3613 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3614 scan += UTF8SKIP(scan);
3615 hardcount++;
3616 }
3617 break;
a0d0e21e
LW
3618 default: /* Called on something of 0 width. */
3619 break; /* So match right here or not at all. */
3620 }
a687059c 3621
a0ed51b3
LW
3622 if (hardcount)
3623 c = hardcount;
3624 else
3625 c = scan - PL_reginput;
3280af22 3626 PL_reginput = scan;
a687059c 3627
c277df42
IZ
3628 DEBUG_r(
3629 {
3630 SV *prop = sv_newmortal();
3631
3632 regprop(prop, p);
3633 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3634 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3635 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3636 });
3637
a0d0e21e 3638 return(c);
a687059c
LW
3639}
3640
3641/*
c277df42
IZ
3642 - regrepeat_hard - repeatedly match something, report total lenth and length
3643 *
3644 * The repeater is supposed to have constant length.
3645 */
3646
76e3520e 3647STATIC I32
cea2e8a9 3648S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3649{
5c0ca799 3650 dTHR;
c277df42
IZ
3651 register char *scan;
3652 register char *start;
3280af22 3653 register char *loceol = PL_regeol;
a0ed51b3 3654 I32 l = 0;
708e3b05 3655 I32 count = 0, res = 1;
a0ed51b3
LW
3656
3657 if (!max)
3658 return 0;
c277df42 3659
3280af22 3660 start = PL_reginput;
a0ed51b3 3661 if (UTF) {
708e3b05 3662 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3663 if (!count++) {
3664 l = 0;
3665 while (start < PL_reginput) {
3666 l++;
3667 start += UTF8SKIP(start);
3668 }
3669 *lp = l;
3670 if (l == 0)
3671 return max;
3672 }
3673 if (count == max)
3674 return count;
3675 }
3676 }
3677 else {
708e3b05 3678 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3679 if (!count++) {
3680 *lp = l = PL_reginput - start;
3681 if (max != REG_INFTY && l*max < loceol - scan)
3682 loceol = scan + l*max;
3683 if (l == 0)
3684 return max;
c277df42
IZ
3685 }
3686 }
3687 }
708e3b05 3688 if (!res)
3280af22 3689 PL_reginput = scan;
c277df42 3690
a0ed51b3 3691 return count;
c277df42
IZ
3692}
3693
3694/*
cb8d8820 3695 - reginclass - determine if a character falls into a character class
bbce6d69 3696 */
3697
76e3520e 3698STATIC bool
936ed897 3699S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3700{
5c0ca799 3701 dTHR;
b8c5462f 3702 char flags = ANYOF_FLAGS(p);
bbce6d69 3703 bool match = FALSE;
3704
3705 c &= 0xFF;
b8c5462f 3706 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3707 match = TRUE;
3708 else if (flags & ANYOF_FOLD) {
3709 I32 cf;
3710 if (flags & ANYOF_LOCALE) {
3280af22 3711 PL_reg_flags |= RF_tainted;
22c35a8c 3712 cf = PL_fold_locale[c];
bbce6d69 3713 }
3714 else
22c35a8c 3715 cf = PL_fold[c];
b8c5462f 3716 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3717 match = TRUE;
3718 }
3719
b8c5462f 3720 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3721 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3722 if (
3723 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3724 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3725 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3726 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3727 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3728 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3729 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3730 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3731 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3732 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3733 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3734 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3735 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3736 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3737 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3738 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3739 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3740 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3741 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3742 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3743 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3744 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3745 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3746 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3747 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3748 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3749 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3753 ) /* How's that for a conditional? */
bbce6d69 3754 {
3755 match = TRUE;
3756 }
3757 }
3758
ae5c130c 3759 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3760}
3761
a0ed51b3 3762STATIC bool
cea2e8a9 3763S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3764{
3765 dTHR;
a0ed51b3
LW
3766 char flags = ARG1(f);
3767 bool match = FALSE;
3768 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3769
3770 if (swash_fetch(sv, p))
3771 match = TRUE;
3772 else if (flags & ANYOF_FOLD) {
806e7201 3773 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3774 if (flags & ANYOF_LOCALE) {
3775 PL_reg_flags |= RF_tainted;
3776 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3777 }
3778 else
3779 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3780 if (swash_fetch(sv, tmpbuf))
3781 match = TRUE;
3782 }
3783
b8c5462f 3784 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3785
3786 return (flags & ANYOF_INVERT) ? !match : match;
3787}
161b471a 3788
dfe13c55 3789STATIC U8 *
cea2e8a9 3790S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3791{
3792 dTHR;
a0ed51b3
LW
3793 if (off >= 0) {
3794 while (off-- && s < (U8*)PL_regeol)
3795 s += UTF8SKIP(s);
3796 }
3797 else {
3798 while (off++) {
3799 if (s > (U8*)PL_bostr) {
3800 s--;
3801 if (*s & 0x80) {
3802 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3803 s--;
3804 } /* XXX could check well-formedness here */
3805 }
3806 }
3807 }
3808 return s;
3809}
161b471a 3810
dfe13c55 3811STATIC U8 *
cea2e8a9 3812S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3813{
c485e607 3814 dTHR;
a0ed51b3
LW
3815 if (off >= 0) {
3816 while (off-- && s < (U8*)PL_regeol)
3817 s += UTF8SKIP(s);
3818 if (off >= 0)
3819 return 0;
3820 }
3821 else {
3822 while (off++) {
3823 if (s > (U8*)PL_bostr) {
3824 s--;
3825 if (*s & 0x80) {
3826 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3827 s--;
3828 } /* XXX could check well-formedness here */
3829 }
3830 else
3831 break;
3832 }
3833 if (off <= 0)
3834 return 0;
3835 }
3836 return s;
3837}
51371543
GS
3838
3839#ifdef PERL_OBJECT
51371543
GS
3840#include "XSUB.h"
3841#endif
3842
3843static void
3844restore_pos(pTHXo_ void *arg)
3845{
3846 dTHR;
3847 if (PL_reg_eval_set) {
3848 if (PL_reg_oldsaved) {
3849 PL_reg_re->subbeg = PL_reg_oldsaved;
3850 PL_reg_re->sublen = PL_reg_oldsavedlen;
3851 RX_MATCH_COPIED_on(PL_reg_re);
3852 }
3853 PL_reg_magic->mg_len = PL_reg_oldpos;
3854 PL_reg_eval_set = 0;
3855 PL_curpm = PL_reg_oldcurpm;
3856 }
3857}