This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [ID 20000809.005] trouble with long string and /m modifier - uninitialized value
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
cad2e5aa
JH
328 char *t;
329 I32 ml_anch;
2c2d71f5 330 char *tmp;
6eb5f6b9
JH
331 register char *other_last = Nullch; /* other substr checked before this */
332 char *check_at; /* check substr found at this pos */
30944b6d
IZ
333#ifdef DEBUGGING
334 char *i_strpos = strpos;
335#endif
cad2e5aa
JH
336
337 DEBUG_r( if (!PL_colorset) reginitcolors() );
338 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 339 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
340 PL_colors[4],PL_colors[5],PL_colors[0],
341 prog->precomp,
342 PL_colors[1],
343 (strlen(prog->precomp) > 60 ? "..." : ""),
344 PL_colors[0],
b900a521 345 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
346 strpos, PL_colors[1],
347 (strend - strpos > 60 ? "..." : ""))
348 );
349
2c2d71f5
JH
350 if (prog->minlen > strend - strpos) {
351 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 352 goto fail;
2c2d71f5 353 }
653099ff 354 check = prog->check_substr;
2c2d71f5 355 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
356 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
357 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 358 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
359
360 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 361 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
362 I32 slen;
363
364 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
365 /* SvCUR is not set on references: SvRV and SvPVX overlap */
366 && sv && !SvROK(sv)
367 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 368 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 369 goto fail;
2c2d71f5 370 }
adac82c7 371 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 372 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
373 if (SvTAIL(check)) {
374 slen = SvCUR(check); /* >= 1 */
cad2e5aa 375
2c2d71f5
JH
376 if ( strend - s > slen || strend - s < slen - 1
377 || (strend - s == slen && strend[-1] != '\n')) {
378 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
379 goto fail_finish;
cad2e5aa
JH
380 }
381 /* Now should match s[0..slen-2] */
382 slen--;
653099ff 383 if (slen && (*SvPVX(check) != *s
cad2e5aa 384 || (slen > 1
653099ff 385 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
386 report_neq:
387 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
388 goto fail_finish;
389 }
cad2e5aa 390 }
653099ff
GS
391 else if (*SvPVX(check) != *s
392 || ((slen = SvCUR(check)) > 1
393 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
394 goto report_neq;
395 goto success_at_start;
cad2e5aa 396 }
2c2d71f5 397 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 398 s = strpos;
2c2d71f5 399 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 402 if (!ml_anch) {
653099ff
GS
403 I32 end = prog->check_offset_max + CHR_SVLEN(check)
404 - (SvTAIL(check) != 0);
2c2d71f5
JH
405 I32 eshift = strend - s - end;
406
407 if (end_shift < eshift)
408 end_shift = eshift;
409 }
cad2e5aa 410 }
2c2d71f5 411 else { /* Can match at random position */
cad2e5aa
JH
412 ml_anch = 0;
413 s = strpos;
2c2d71f5
JH
414 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
415 /* Should be nonnegative! */
416 end_shift = prog->minlen - start_shift -
653099ff 417 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
418 }
419
2c2d71f5 420#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 421 if (end_shift < 0)
6bbae5e6 422 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
423#endif
424
2c2d71f5
JH
425 restart:
426 /* Find a possible match in the region s..strend by looking for
427 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 428 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
429 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
430 I32 p = -1; /* Internal iterator of scream. */
431 I32 *pp = data ? data->scream_pos : &p;
432
2c2d71f5
JH
433 if (PL_screamfirst[BmRARE(check)] >= 0
434 || ( BmRARE(check) == '\n'
435 && (BmPREVIOUS(check) == SvCUR(check) - 1)
436 && SvTAIL(check) ))
437 s = screaminstr(sv, check,
438 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 439 else
2c2d71f5 440 goto fail_finish;
cad2e5aa
JH
441 if (data)
442 *data->scream_olds = s;
443 }
444 else
445 s = fbm_instr((unsigned char*)s + start_shift,
446 (unsigned char*)strend - end_shift,
2c2d71f5 447 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
448
449 /* Update the count-of-usability, remove useless subpatterns,
450 unshift s. */
2c2d71f5
JH
451
452 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
453 (s ? "Found" : "Did not find"),
454 ((check == prog->anchored_substr) ? "anchored" : "floating"),
455 PL_colors[0],
7b0972df
JH
456 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
457 SvPVX(check),
2c2d71f5
JH
458 PL_colors[1], (SvTAIL(check) ? "$" : ""),
459 (s ? " at offset " : "...\n") ) );
460
461 if (!s)
462 goto fail_finish;
463
6eb5f6b9
JH
464 check_at = s;
465
2c2d71f5 466 /* Finish the diagnostic message */
30944b6d 467 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
468
469 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
470 Start with the other substr.
471 XXXX no SCREAM optimization yet - and a very coarse implementation
472 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
473 *always* match. Probably should be marked during compile...
474 Probably it is right to do no SCREAM here...
475 */
476
477 if (prog->float_substr && prog->anchored_substr) {
30944b6d 478 /* Take into account the "other" substring. */
2c2d71f5
JH
479 /* XXXX May be hopelessly wrong for UTF... */
480 if (!other_last)
6eb5f6b9 481 other_last = strpos;
2c2d71f5 482 if (check == prog->float_substr) {
30944b6d
IZ
483 do_other_anchored:
484 {
2c2d71f5
JH
485 char *last = s - start_shift, *last1, *last2;
486 char *s1 = s;
487
488 tmp = PL_bostr;
489 t = s - prog->check_offset_max;
490 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
491 && (!(prog->reganch & ROPT_UTF8)
492 || (PL_bostr = strpos, /* Used in regcopmaybe() */
493 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
494 && t > strpos)))
30944b6d 495 /* EMPTY */;
2c2d71f5
JH
496 else
497 t = strpos;
498 t += prog->anchored_offset;
6eb5f6b9
JH
499 if (t < other_last) /* These positions already checked */
500 t = other_last;
2c2d71f5
JH
501 PL_bostr = tmp;
502 last2 = last1 = strend - prog->minlen;
503 if (last < last1)
504 last1 = last;
505 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
506 /* On end-of-str: see comment below. */
507 s = fbm_instr((unsigned char*)t,
508 (unsigned char*)last1 + prog->anchored_offset
509 + SvCUR(prog->anchored_substr)
510 - (SvTAIL(prog->anchored_substr)!=0),
511 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
512 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
513 (s ? "Found" : "Contradicts"),
514 PL_colors[0],
7b0972df
JH
515 (int)(SvCUR(prog->anchored_substr)
516 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
517 SvPVX(prog->anchored_substr),
518 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
519 if (!s) {
520 if (last1 >= last2) {
521 DEBUG_r(PerlIO_printf(Perl_debug_log,
522 ", giving up...\n"));
523 goto fail_finish;
524 }
525 DEBUG_r(PerlIO_printf(Perl_debug_log,
526 ", trying floating at offset %ld...\n",
30944b6d 527 (long)(s1 + 1 - i_strpos)));
2c2d71f5 528 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 529 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
530 s = HOPc(last, 1);
531 goto restart;
532 }
533 else {
534 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 535 (long)(s - i_strpos)));
2c2d71f5 536 t = s - prog->anchored_offset;
6eb5f6b9 537 other_last = s + 1;
30944b6d 538 s = s1;
2c2d71f5
JH
539 if (t == strpos)
540 goto try_at_start;
2c2d71f5
JH
541 goto try_at_offset;
542 }
30944b6d 543 }
2c2d71f5
JH
544 }
545 else { /* Take into account the floating substring. */
546 char *last, *last1;
547 char *s1 = s;
548
549 t = s - start_shift;
550 last1 = last = strend - prog->minlen + prog->float_min_offset;
551 if (last - t > prog->float_max_offset)
552 last = t + prog->float_max_offset;
553 s = t + prog->float_min_offset;
6eb5f6b9
JH
554 if (s < other_last)
555 s = other_last;
2c2d71f5
JH
556 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
557 /* fbm_instr() takes into account exact value of end-of-str
558 if the check is SvTAIL(ed). Since false positives are OK,
559 and end-of-str is not later than strend we are OK. */
560 s = fbm_instr((unsigned char*)s,
561 (unsigned char*)last + SvCUR(prog->float_substr)
562 - (SvTAIL(prog->float_substr)!=0),
563 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
564 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->float_substr)
568 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->float_substr),
570 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 == last) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying anchored starting at offset %ld...\n",
30944b6d 579 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 580 other_last = last + 1;
2c2d71f5
JH
581 PL_regeol = strend; /* Used in HOP() */
582 s = HOPc(t, 1);
583 goto restart;
584 }
585 else {
586 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 587 (long)(s - i_strpos)));
6eb5f6b9 588 other_last = s + 1;
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
594 }
cad2e5aa 595 }
2c2d71f5
JH
596
597 t = s - prog->check_offset_max;
598 tmp = PL_bostr;
599 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
600 && (!(prog->reganch & ROPT_UTF8)
601 || (PL_bostr = strpos, /* Used in regcopmaybe() */
602 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
603 && t > strpos)))) {
604 PL_bostr = tmp;
605 /* Fixed substring is found far enough so that the match
606 cannot start at strpos. */
607 try_at_offset:
cad2e5aa 608 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
609 /* Eventually fbm_*() should handle this, but often
610 anchored_offset is not 0, so this check will not be wasted. */
611 /* XXXX In the code below we prefer to look for "^" even in
612 presence of anchored substrings. And we search even
613 beyond the found float position. These pessimizations
614 are historical artefacts only. */
615 find_anchor:
2c2d71f5 616 while (t < strend - prog->minlen) {
cad2e5aa 617 if (*t == '\n') {
4ee3650e 618 if (t < check_at - prog->check_offset_min) {
30944b6d 619 if (prog->anchored_substr) {
4ee3650e
GS
620 /* Since we moved from the found position,
621 we definitely contradict the found anchored
30944b6d
IZ
622 substr. Due to the above check we do not
623 contradict "check" substr.
624 Thus we can arrive here only if check substr
625 is float. Redo checking for "other"=="fixed".
626 */
627 strpos = t + 1;
628 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
629 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
630 goto do_other_anchored;
631 }
4ee3650e
GS
632 /* We don't contradict the found floating substring. */
633 /* XXXX Why not check for STCLASS? */
cad2e5aa 634 s = t + 1;
2c2d71f5 635 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 636 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
637 goto set_useful;
638 }
4ee3650e
GS
639 /* Position contradicts check-string */
640 /* XXXX probably better to look for check-string
641 than for "\n", so one should lower the limit for t? */
642 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 643 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 644 other_last = strpos = s = t + 1;
cad2e5aa
JH
645 goto restart;
646 }
647 t++;
648 }
2c2d71f5
JH
649 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
650 PL_colors[0],PL_colors[1]));
651 goto fail_finish;
cad2e5aa 652 }
f5952150
GS
653 else {
654 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
655 PL_colors[0],PL_colors[1]));
656 }
cad2e5aa
JH
657 s = t;
658 set_useful:
2c2d71f5 659 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
660 }
661 else {
2c2d71f5 662 PL_bostr = tmp;
f5952150 663 /* The found string does not prohibit matching at strpos,
2c2d71f5 664 - no optimization of calling REx engine can be performed,
f5952150
GS
665 unless it was an MBOL and we are not after MBOL,
666 or a future STCLASS check will fail this. */
2c2d71f5
JH
667 try_at_start:
668 /* Even in this situation we may use MBOL flag if strpos is offset
669 wrt the start of the string. */
05b4157f 670 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
671 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
672 /* May be due to an implicit anchor of m{.*foo} */
673 && !(prog->reganch & ROPT_IMPLICIT))
674 {
cad2e5aa
JH
675 t = strpos;
676 goto find_anchor;
677 }
30944b6d 678 DEBUG_r( if (ml_anch)
f5952150
GS
679 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
680 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 681 );
2c2d71f5 682 success_at_start:
30944b6d 683 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 684 && prog->check_substr /* Could be deleted already */
cad2e5aa 685 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
686 && prog->check_substr == prog->float_substr)
687 {
cad2e5aa 688 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 689 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
690 SvREFCNT_dec(prog->check_substr);
691 prog->check_substr = Nullsv; /* disable */
692 prog->float_substr = Nullsv; /* clear */
5e39e1e5 693 check = Nullsv; /* abort */
cad2e5aa 694 s = strpos;
3cf5c195
IZ
695 /* XXXX This is a remnant of the old implementation. It
696 looks wasteful, since now INTUIT can use many
6eb5f6b9 697 other heuristics. */
cad2e5aa
JH
698 prog->reganch &= ~RE_USE_INTUIT;
699 }
700 else
701 s = strpos;
702 }
703
6eb5f6b9
JH
704 /* Last resort... */
705 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
706 if (prog->regstclass) {
707 /* minlen == 0 is possible if regstclass is \b or \B,
708 and the fixed substr is ''$.
709 Since minlen is already taken into account, s+1 is before strend;
710 accidentally, minlen >= 1 guaranties no false positives at s + 1
711 even for \b or \B. But (minlen? 1 : 0) below assumes that
712 regstclass does not come from lookahead... */
713 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
714 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
715 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
716 ? STR_LEN(prog->regstclass)
717 : 1);
6eb5f6b9 718 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
719 ? s + (prog->minlen? cl_l : 0)
720 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 721 : strend) ;
8fdd422b 722 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
723
724 t = s;
76384e4a
GS
725 if (prog->reganch & ROPT_UTF8) {
726 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
727 PL_bostr = startpos;
728 }
6eb5f6b9
JH
729 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
730 if (!s) {
731#ifdef DEBUGGING
732 char *what;
733#endif
734 if (endpos == strend) {
735 DEBUG_r( PerlIO_printf(Perl_debug_log,
736 "Could not match STCLASS...\n") );
737 goto fail;
738 }
66e933ab
GS
739 DEBUG_r( PerlIO_printf(Perl_debug_log,
740 "This position contradicts STCLASS...\n") );
653099ff
GS
741 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
742 goto fail;
6eb5f6b9
JH
743 /* Contradict one of substrings */
744 if (prog->anchored_substr) {
6eb5f6b9
JH
745 if (prog->anchored_substr == check) {
746 DEBUG_r( what = "anchored" );
747 hop_and_restart:
748 PL_regeol = strend; /* Used in HOP() */
749 s = HOPc(t, 1);
66e933ab
GS
750 if (s + start_shift + end_shift > strend) {
751 /* XXXX Should be taken into account earlier? */
752 DEBUG_r( PerlIO_printf(Perl_debug_log,
753 "Could not match STCLASS...\n") );
754 goto fail;
755 }
5e39e1e5
HS
756 if (!check)
757 goto giveup;
6eb5f6b9 758 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 759 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
760 what, (long)(s + start_shift - i_strpos)) );
761 goto restart;
762 }
66e933ab 763 /* Have both, check_string is floating */
6eb5f6b9
JH
764 if (t + start_shift >= check_at) /* Contradicts floating=check */
765 goto retry_floating_check;
766 /* Recheck anchored substring, but not floating... */
767 s = check_at;
5e39e1e5
HS
768 if (!check)
769 goto giveup;
6eb5f6b9 770 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 771 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
772 (long)(other_last - i_strpos)) );
773 goto do_other_anchored;
774 }
60e71179
GS
775 /* Another way we could have checked stclass at the
776 current position only: */
777 if (ml_anch) {
778 s = t = t + 1;
5e39e1e5
HS
779 if (!check)
780 goto giveup;
60e71179 781 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
782 "Looking for /%s^%s/m starting at offset %ld...\n",
783 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 784 goto try_at_offset;
66e933ab 785 }
60e71179
GS
786 if (!prog->float_substr) /* Could have been deleted */
787 goto fail;
6eb5f6b9
JH
788 /* Check is floating subtring. */
789 retry_floating_check:
790 t = check_at - start_shift;
791 DEBUG_r( what = "floating" );
792 goto hop_and_restart;
793 }
794 DEBUG_r( if (t != s)
795 PerlIO_printf(Perl_debug_log,
796 "By STCLASS: moving %ld --> %ld\n",
797 (long)(t - i_strpos), (long)(s - i_strpos));
798 else
799 PerlIO_printf(Perl_debug_log,
800 "Does not contradict STCLASS...\n") );
801 }
5e39e1e5
HS
802 giveup:
803 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
804 PL_colors[4], (check ? "Guessed" : "Giving up"),
805 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 806 return s;
2c2d71f5
JH
807
808 fail_finish: /* Substring not found */
66e933ab
GS
809 if (prog->check_substr) /* could be removed already */
810 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 811 fail:
2c2d71f5 812 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
813 PL_colors[4],PL_colors[5]));
814 return Nullch;
815}
9661b544 816
6eb5f6b9 817/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
818STATIC char *
819S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 820{
6eb5f6b9
JH
821 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
822 char *m;
d8093b23 823 STRLEN ln;
824 unsigned int c1;
825 unsigned int c2;
6eb5f6b9
JH
826 char *e;
827 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 828
6eb5f6b9
JH
829 /* We know what class it must start with. */
830 switch (OP(c)) {
831 case ANYOFUTF8:
832 while (s < strend) {
833 if (REGINCLASSUTF8(c, (U8*)s)) {
834 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 835 goto got_it;
3cf5c195 836 else
6eb5f6b9 837 tmp = doevery;
a0ed51b3 838 }
6eb5f6b9
JH
839 else
840 tmp = 1;
a0ed51b3
LW
841 s += UTF8SKIP(s);
842 }
6eb5f6b9
JH
843 break;
844 case ANYOF:
a0ed51b3 845 while (s < strend) {
d8093b23 846 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
847 if (tmp && (norun || regtry(prog, s)))
848 goto got_it;
849 else
850 tmp = doevery;
a0ed51b3 851 }
6eb5f6b9
JH
852 else
853 tmp = 1;
a0ed51b3 854 s++;
a0d0e21e 855 }
6eb5f6b9
JH
856 break;
857 case EXACTF:
858 m = STRING(c);
859 ln = STR_LEN(c);
d8093b23 860 c1 = *(U8*)m;
6eb5f6b9
JH
861 c2 = PL_fold[c1];
862 goto do_exactf;
863 case EXACTFL:
864 m = STRING(c);
865 ln = STR_LEN(c);
d8093b23 866 c1 = *(U8*)m;
6eb5f6b9
JH
867 c2 = PL_fold_locale[c1];
868 do_exactf:
869 e = strend - ln;
b3c9acc1 870
6eb5f6b9
JH
871 if (norun && e < s)
872 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
873 /* Here it is NOT UTF! */
874 if (c1 == c2) {
875 while (s <= e) {
d8093b23 876 if ( *(U8*)s == c1
66e933ab
GS
877 && (ln == 1 || !(OP(c) == EXACTF
878 ? ibcmp(s, m, ln)
879 : ibcmp_locale(s, m, ln)))
6eb5f6b9 880 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
881 goto got_it;
882 s++;
883 }
884 } else {
885 while (s <= e) {
d8093b23 886 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
887 && (ln == 1 || !(OP(c) == EXACTF
888 ? ibcmp(s, m, ln)
889 : ibcmp_locale(s, m, ln)))
6eb5f6b9 890 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
891 goto got_it;
892 s++;
893 }
894 }
895 break;
bbce6d69 896 case BOUNDL:
3280af22 897 PL_reg_flags |= RF_tainted;
bbce6d69 898 /* FALL THROUGH */
a0d0e21e 899 case BOUND:
6eb5f6b9 900 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 901 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 902 while (s < strend) {
95bac841 903 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 904 tmp = !tmp;
6eb5f6b9 905 if ((norun || regtry(prog, s)))
a0d0e21e 906 goto got_it;
a687059c 907 }
a0d0e21e
LW
908 s++;
909 }
6eb5f6b9 910 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
911 goto got_it;
912 break;
a0ed51b3
LW
913 case BOUNDLUTF8:
914 PL_reg_flags |= RF_tainted;
915 /* FALL THROUGH */
916 case BOUNDUTF8:
6eb5f6b9 917 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 918 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 919 while (s < strend) {
76384e4a 920 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
921 swash_fetch(PL_utf8_alnum, (U8*)s) :
922 isALNUM_LC_utf8((U8*)s)))
923 {
a0ed51b3 924 tmp = !tmp;
6eb5f6b9 925 if ((norun || regtry(prog, s)))
a0ed51b3
LW
926 goto got_it;
927 }
928 s += UTF8SKIP(s);
929 }
6eb5f6b9 930 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
931 goto got_it;
932 break;
bbce6d69 933 case NBOUNDL:
3280af22 934 PL_reg_flags |= RF_tainted;
bbce6d69 935 /* FALL THROUGH */
a0d0e21e 936 case NBOUND:
6eb5f6b9 937 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 938 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 939 while (s < strend) {
95bac841 940 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 941 tmp = !tmp;
6eb5f6b9 942 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
943 goto got_it;
944 s++;
945 }
6eb5f6b9 946 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
947 goto got_it;
948 break;
a0ed51b3
LW
949 case NBOUNDLUTF8:
950 PL_reg_flags |= RF_tainted;
951 /* FALL THROUGH */
952 case NBOUNDUTF8:
6eb5f6b9 953 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 954 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 955 while (s < strend) {
76384e4a 956 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
957 swash_fetch(PL_utf8_alnum, (U8*)s) :
958 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 959 tmp = !tmp;
6eb5f6b9 960 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
961 goto got_it;
962 s += UTF8SKIP(s);
963 }
6eb5f6b9 964 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
965 goto got_it;
966 break;
a0d0e21e
LW
967 case ALNUM:
968 while (s < strend) {
bbce6d69 969 if (isALNUM(*s)) {
6eb5f6b9 970 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
971 goto got_it;
972 else
973 tmp = doevery;
974 }
975 else
976 tmp = 1;
977 s++;
978 }
979 break;
a0ed51b3
LW
980 case ALNUMUTF8:
981 while (s < strend) {
dfe13c55 982 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 983 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
984 goto got_it;
985 else
986 tmp = doevery;
987 }
988 else
989 tmp = 1;
990 s += UTF8SKIP(s);
991 }
992 break;
bbce6d69 993 case ALNUML:
3280af22 994 PL_reg_flags |= RF_tainted;
bbce6d69
PP
995 while (s < strend) {
996 if (isALNUM_LC(*s)) {
6eb5f6b9 997 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 998 goto got_it;
a687059c 999 else
a0d0e21e
LW
1000 tmp = doevery;
1001 }
1002 else
1003 tmp = 1;
1004 s++;
1005 }
1006 break;
a0ed51b3
LW
1007 case ALNUMLUTF8:
1008 PL_reg_flags |= RF_tainted;
1009 while (s < strend) {
dfe13c55 1010 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1011 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1012 goto got_it;
1013 else
1014 tmp = doevery;
1015 }
1016 else
1017 tmp = 1;
1018 s += UTF8SKIP(s);
1019 }
1020 break;
a0d0e21e
LW
1021 case NALNUM:
1022 while (s < strend) {
bbce6d69 1023 if (!isALNUM(*s)) {
6eb5f6b9 1024 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1025 goto got_it;
1026 else
1027 tmp = doevery;
1028 }
1029 else
1030 tmp = 1;
1031 s++;
1032 }
1033 break;
a0ed51b3
LW
1034 case NALNUMUTF8:
1035 while (s < strend) {
dfe13c55 1036 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1037 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1038 goto got_it;
1039 else
1040 tmp = doevery;
1041 }
1042 else
1043 tmp = 1;
1044 s += UTF8SKIP(s);
1045 }
1046 break;
bbce6d69 1047 case NALNUML:
3280af22 1048 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1049 while (s < strend) {
1050 if (!isALNUM_LC(*s)) {
6eb5f6b9 1051 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1052 goto got_it;
a687059c 1053 else
a0d0e21e 1054 tmp = doevery;
a687059c 1055 }
a0d0e21e
LW
1056 else
1057 tmp = 1;
1058 s++;
1059 }
1060 break;
a0ed51b3
LW
1061 case NALNUMLUTF8:
1062 PL_reg_flags |= RF_tainted;
1063 while (s < strend) {
dfe13c55 1064 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1065 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1066 goto got_it;
1067 else
1068 tmp = doevery;
1069 }
1070 else
1071 tmp = 1;
1072 s += UTF8SKIP(s);
1073 }
1074 break;
a0d0e21e
LW
1075 case SPACE:
1076 while (s < strend) {
1077 if (isSPACE(*s)) {
6eb5f6b9 1078 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1079 goto got_it;
1080 else
1081 tmp = doevery;
2304df62 1082 }
a0d0e21e
LW
1083 else
1084 tmp = 1;
1085 s++;
1086 }
1087 break;
a0ed51b3
LW
1088 case SPACEUTF8:
1089 while (s < strend) {
dfe13c55 1090 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1091 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1092 goto got_it;
1093 else
1094 tmp = doevery;
1095 }
1096 else
1097 tmp = 1;
1098 s += UTF8SKIP(s);
1099 }
1100 break;
bbce6d69 1101 case SPACEL:
3280af22 1102 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1103 while (s < strend) {
1104 if (isSPACE_LC(*s)) {
6eb5f6b9 1105 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1106 goto got_it;
1107 else
1108 tmp = doevery;
1109 }
1110 else
1111 tmp = 1;
1112 s++;
1113 }
1114 break;
a0ed51b3
LW
1115 case SPACELUTF8:
1116 PL_reg_flags |= RF_tainted;
1117 while (s < strend) {
dfe13c55 1118 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1119 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
1124 else
1125 tmp = 1;
1126 s += UTF8SKIP(s);
1127 }
1128 break;
a0d0e21e
LW
1129 case NSPACE:
1130 while (s < strend) {
1131 if (!isSPACE(*s)) {
6eb5f6b9 1132 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1133 goto got_it;
1134 else
1135 tmp = doevery;
a687059c 1136 }
a0d0e21e
LW
1137 else
1138 tmp = 1;
1139 s++;
1140 }
1141 break;
a0ed51b3
LW
1142 case NSPACEUTF8:
1143 while (s < strend) {
dfe13c55 1144 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1145 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
1150 else
1151 tmp = 1;
1152 s += UTF8SKIP(s);
1153 }
1154 break;
bbce6d69 1155 case NSPACEL:
3280af22 1156 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1157 while (s < strend) {
1158 if (!isSPACE_LC(*s)) {
6eb5f6b9 1159 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1160 goto got_it;
1161 else
1162 tmp = doevery;
1163 }
1164 else
1165 tmp = 1;
1166 s++;
1167 }
1168 break;
a0ed51b3
LW
1169 case NSPACELUTF8:
1170 PL_reg_flags |= RF_tainted;
1171 while (s < strend) {
dfe13c55 1172 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1173 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1174 goto got_it;
1175 else
1176 tmp = doevery;
1177 }
1178 else
1179 tmp = 1;
1180 s += UTF8SKIP(s);
1181 }
1182 break;
a0d0e21e
LW
1183 case DIGIT:
1184 while (s < strend) {
1185 if (isDIGIT(*s)) {
6eb5f6b9 1186 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1187 goto got_it;
1188 else
1189 tmp = doevery;
2b69d0c2 1190 }
a0d0e21e
LW
1191 else
1192 tmp = 1;
1193 s++;
1194 }
1195 break;
a0ed51b3
LW
1196 case DIGITUTF8:
1197 while (s < strend) {
dfe13c55 1198 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1199 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
1204 else
1205 tmp = 1;
1206 s += UTF8SKIP(s);
1207 }
1208 break;
b8c5462f
JH
1209 case DIGITL:
1210 PL_reg_flags |= RF_tainted;
1211 while (s < strend) {
1212 if (isDIGIT_LC(*s)) {
6eb5f6b9 1213 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
1218 else
1219 tmp = 1;
1220 s++;
1221 }
1222 break;
1223 case DIGITLUTF8:
1224 PL_reg_flags |= RF_tainted;
1225 while (s < strend) {
1226 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1227 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1228 goto got_it;
1229 else
1230 tmp = doevery;
1231 }
1232 else
1233 tmp = 1;
1234 s += UTF8SKIP(s);
1235 }
1236 break;
a0d0e21e
LW
1237 case NDIGIT:
1238 while (s < strend) {
1239 if (!isDIGIT(*s)) {
6eb5f6b9 1240 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1241 goto got_it;
1242 else
1243 tmp = doevery;
a687059c 1244 }
a0d0e21e
LW
1245 else
1246 tmp = 1;
1247 s++;
1248 }
1249 break;
a0ed51b3
LW
1250 case NDIGITUTF8:
1251 while (s < strend) {
dfe13c55 1252 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1253 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
1258 else
1259 tmp = 1;
1260 s += UTF8SKIP(s);
1261 }
1262 break;
b8c5462f
JH
1263 case NDIGITL:
1264 PL_reg_flags |= RF_tainted;
1265 while (s < strend) {
1266 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1267 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1268 goto got_it;
1269 else
1270 tmp = doevery;
1271 }
1272 else
1273 tmp = 1;
1274 s++;
a0ed51b3 1275 }
b8c5462f
JH
1276 break;
1277 case NDIGITLUTF8:
1278 PL_reg_flags |= RF_tainted;
1279 while (s < strend) {
1280 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1281 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1282 goto got_it;
cf93c79d 1283 else
b8c5462f
JH
1284 tmp = doevery;
1285 }
1286 else
1287 tmp = 1;
1288 s += UTF8SKIP(s);
1289 }
1290 break;
b3c9acc1 1291 default:
3c3eec57
GS
1292 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1293 break;
d6a28714 1294 }
6eb5f6b9
JH
1295 return 0;
1296 got_it:
1297 return s;
1298}
1299
1300/*
1301 - regexec_flags - match a regexp against a string
1302 */
1303I32
1304Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1305 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1306/* strend: pointer to null at end of string */
1307/* strbeg: real beginning of string */
1308/* minend: end of match must be >=minend after stringarg. */
1309/* data: May be used for some additional optimizations. */
1310/* nosave: For optimizations. */
1311{
1312 dTHR;
1313 register char *s;
1314 register regnode *c;
1315 register char *startpos = stringarg;
6eb5f6b9
JH
1316 I32 minlen; /* must match at least this many chars */
1317 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1318 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1319 constant substr. */ /* CC */
1320 I32 end_shift = 0; /* Same for the end. */ /* CC */
1321 I32 scream_pos = -1; /* Internal iterator of scream. */
1322 char *scream_olds;
1323 SV* oreplsv = GvSV(PL_replgv);
1324
1325 PL_regcc = 0;
1326
1327 cache_re(prog);
1328#ifdef DEBUGGING
1329 PL_regnarrate = PL_debug & 512;
1330#endif
1331
1332 /* Be paranoid... */
1333 if (prog == NULL || startpos == NULL) {
1334 Perl_croak(aTHX_ "NULL regexp parameter");
1335 return 0;
1336 }
1337
1338 minlen = prog->minlen;
1339 if (strend - startpos < minlen) goto phooey;
1340
1341 if (startpos == strbeg) /* is ^ valid at stringarg? */
1342 PL_regprev = '\n';
1343 else {
1344 PL_regprev = (U32)stringarg[-1];
1345 if (!PL_multiline && PL_regprev == '\n')
1346 PL_regprev = '\0'; /* force ^ to NOT match */
1347 }
1348
1349 /* Check validity of program. */
1350 if (UCHARAT(prog->program) != REG_MAGIC) {
1351 Perl_croak(aTHX_ "corrupted regexp program");
1352 }
1353
1354 PL_reg_flags = 0;
1355 PL_reg_eval_set = 0;
1356 PL_reg_maxiter = 0;
1357
1358 if (prog->reganch & ROPT_UTF8)
1359 PL_reg_flags |= RF_utf8;
1360
1361 /* Mark beginning of line for ^ and lookbehind. */
1362 PL_regbol = startpos;
1363 PL_bostr = strbeg;
1364 PL_reg_sv = sv;
1365
1366 /* Mark end of line for $ (and such) */
1367 PL_regeol = strend;
1368
1369 /* see how far we have to get to not match where we matched before */
1370 PL_regtill = startpos+minend;
1371
1372 /* We start without call_cc context. */
1373 PL_reg_call_cc = 0;
1374
1375 /* If there is a "must appear" string, look for it. */
1376 s = startpos;
1377
1378 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1379 MAGIC *mg;
1380
1381 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1382 PL_reg_ganch = startpos;
1383 else if (sv && SvTYPE(sv) >= SVt_PVMG
1384 && SvMAGIC(sv)
1385 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1386 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1387 if (prog->reganch & ROPT_ANCH_GPOS) {
1388 if (s > PL_reg_ganch)
1389 goto phooey;
1390 s = PL_reg_ganch;
1391 }
1392 }
1393 else /* pos() not defined */
1394 PL_reg_ganch = strbeg;
1395 }
1396
1397 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1398 re_scream_pos_data d;
1399
1400 d.scream_olds = &scream_olds;
1401 d.scream_pos = &scream_pos;
1402 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1403 if (!s)
1404 goto phooey; /* not present */
1405 }
1406
1407 DEBUG_r( if (!PL_colorset) reginitcolors() );
1408 DEBUG_r(PerlIO_printf(Perl_debug_log,
1409 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1410 PL_colors[4],PL_colors[5],PL_colors[0],
1411 prog->precomp,
1412 PL_colors[1],
1413 (strlen(prog->precomp) > 60 ? "..." : ""),
1414 PL_colors[0],
1415 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1416 startpos, PL_colors[1],
1417 (strend - startpos > 60 ? "..." : ""))
1418 );
1419
1420 /* Simplest case: anchored match need be tried only once. */
1421 /* [unless only anchor is BOL and multiline is set] */
1422 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1423 if (s == startpos && regtry(prog, startpos))
1424 goto got_it;
1425 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1426 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1427 {
1428 char *end;
1429
1430 if (minlen)
1431 dontbother = minlen - 1;
1432 end = HOPc(strend, -dontbother) - 1;
1433 /* for multiline we only have to try after newlines */
1434 if (prog->check_substr) {
1435 if (s == startpos)
1436 goto after_try;
1437 while (1) {
1438 if (regtry(prog, s))
1439 goto got_it;
1440 after_try:
1441 if (s >= end)
1442 goto phooey;
1443 if (prog->reganch & RE_USE_INTUIT) {
1444 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1445 if (!s)
1446 goto phooey;
1447 }
1448 else
1449 s++;
1450 }
1451 } else {
1452 if (s > startpos)
1453 s--;
1454 while (s < end) {
1455 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1456 if (regtry(prog, s))
1457 goto got_it;
1458 }
1459 }
1460 }
1461 }
1462 goto phooey;
1463 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1464 if (regtry(prog, PL_reg_ganch))
1465 goto got_it;
1466 goto phooey;
1467 }
1468
1469 /* Messy cases: unanchored match. */
1470 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1471 /* we have /x+whatever/ */
1472 /* it must be a one character string (XXXX Except UTF?) */
1473 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1474#ifdef DEBUGGING
1475 int did_match = 0;
1476#endif
1477
6eb5f6b9
JH
1478 if (UTF) {
1479 while (s < strend) {
1480 if (*s == ch) {
bf93d4cc 1481 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1482 if (regtry(prog, s)) goto got_it;
1483 s += UTF8SKIP(s);
1484 while (s < strend && *s == ch)
1485 s += UTF8SKIP(s);
1486 }
1487 s += UTF8SKIP(s);
1488 }
1489 }
1490 else {
1491 while (s < strend) {
1492 if (*s == ch) {
bf93d4cc 1493 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1494 if (regtry(prog, s)) goto got_it;
1495 s++;
1496 while (s < strend && *s == ch)
1497 s++;
1498 }
1499 s++;
1500 }
1501 }
bf93d4cc
GS
1502 DEBUG_r(did_match ||
1503 PerlIO_printf(Perl_debug_log,
1504 "Did not find anchored character...\n"));
6eb5f6b9
JH
1505 }
1506 /*SUPPRESS 560*/
1507 else if (prog->anchored_substr != Nullsv
1508 || (prog->float_substr != Nullsv
1509 && prog->float_max_offset < strend - s)) {
1510 SV *must = prog->anchored_substr
1511 ? prog->anchored_substr : prog->float_substr;
1512 I32 back_max =
1513 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1514 I32 back_min =
1515 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1516 char *last = HOPc(strend, /* Cannot start after this */
1517 -(I32)(CHR_SVLEN(must)
1518 - (SvTAIL(must) != 0) + back_min));
1519 char *last1; /* Last position checked before */
bf93d4cc
GS
1520#ifdef DEBUGGING
1521 int did_match = 0;
1522#endif
6eb5f6b9
JH
1523
1524 if (s > PL_bostr)
1525 last1 = HOPc(s, -1);
1526 else
1527 last1 = s - 1; /* bogus */
1528
1529 /* XXXX check_substr already used to find `s', can optimize if
1530 check_substr==must. */
1531 scream_pos = -1;
1532 dontbother = end_shift;
1533 strend = HOPc(strend, -dontbother);
1534 while ( (s <= last) &&
1535 ((flags & REXEC_SCREAM)
1536 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1537 end_shift, &scream_pos, 0))
1538 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1539 (unsigned char*)strend, must,
1540 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1541 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1542 if (HOPc(s, -back_max) > last1) {
1543 last1 = HOPc(s, -back_min);
1544 s = HOPc(s, -back_max);
1545 }
1546 else {
1547 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1548
1549 last1 = HOPc(s, -back_min);
1550 s = t;
1551 }
1552 if (UTF) {
1553 while (s <= last1) {
1554 if (regtry(prog, s))
1555 goto got_it;
1556 s += UTF8SKIP(s);
1557 }
1558 }
1559 else {
1560 while (s <= last1) {
1561 if (regtry(prog, s))
1562 goto got_it;
1563 s++;
1564 }
1565 }
1566 }
bf93d4cc
GS
1567 DEBUG_r(did_match ||
1568 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1569 ((must == prog->anchored_substr)
1570 ? "anchored" : "floating"),
1571 PL_colors[0],
1572 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1573 SvPVX(must),
1574 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1575 goto phooey;
1576 }
155aba94 1577 else if ((c = prog->regstclass)) {
66e933ab
GS
1578 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1579 /* don't bother with what can't match */
6eb5f6b9
JH
1580 strend = HOPc(strend, -(minlen - 1));
1581 if (find_byclass(prog, c, s, strend, startpos, 0))
1582 goto got_it;
bf93d4cc 1583 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1584 }
1585 else {
1586 dontbother = 0;
1587 if (prog->float_substr != Nullsv) { /* Trim the end. */
1588 char *last;
d6a28714
JH
1589
1590 if (flags & REXEC_SCREAM) {
1591 last = screaminstr(sv, prog->float_substr, s - strbeg,
1592 end_shift, &scream_pos, 1); /* last one */
1593 if (!last)
1594 last = scream_olds; /* Only one occurence. */
b8c5462f 1595 }
d6a28714
JH
1596 else {
1597 STRLEN len;
1598 char *little = SvPV(prog->float_substr, len);
1599
1600 if (SvTAIL(prog->float_substr)) {
1601 if (memEQ(strend - len + 1, little, len - 1))
1602 last = strend - len + 1;
1603 else if (!PL_multiline)
1604 last = memEQ(strend - len, little, len)
1605 ? strend - len : Nullch;
b8c5462f 1606 else
d6a28714
JH
1607 goto find_last;
1608 } else {
1609 find_last:
1610 if (len)
1611 last = rninstr(s, strend, little, little + len);
b8c5462f 1612 else
d6a28714 1613 last = strend; /* matching `$' */
b8c5462f 1614 }
b8c5462f 1615 }
bf93d4cc
GS
1616 if (last == NULL) {
1617 DEBUG_r(PerlIO_printf(Perl_debug_log,
1618 "%sCan't trim the tail, match fails (should not happen)%s\n",
1619 PL_colors[4],PL_colors[5]));
1620 goto phooey; /* Should not happen! */
1621 }
d6a28714
JH
1622 dontbother = strend - last + prog->float_min_offset;
1623 }
1624 if (minlen && (dontbother < minlen))
1625 dontbother = minlen - 1;
1626 strend -= dontbother; /* this one's always in bytes! */
1627 /* We don't know much -- general case. */
1628 if (UTF) {
1629 for (;;) {
1630 if (regtry(prog, s))
1631 goto got_it;
1632 if (s >= strend)
1633 break;
b8c5462f 1634 s += UTF8SKIP(s);
d6a28714
JH
1635 };
1636 }
1637 else {
1638 do {
1639 if (regtry(prog, s))
1640 goto got_it;
1641 } while (s++ < strend);
1642 }
1643 }
1644
1645 /* Failure. */
1646 goto phooey;
1647
1648got_it:
1649 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1650
1651 if (PL_reg_eval_set) {
1652 /* Preserve the current value of $^R */
1653 if (oreplsv != GvSV(PL_replgv))
1654 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1655 restored, the value remains
1656 the same. */
1657 restore_pos(aTHXo_ 0);
1658 }
1659
1660 /* make sure $`, $&, $', and $digit will work later */
1661 if ( !(flags & REXEC_NOT_FIRST) ) {
1662 if (RX_MATCH_COPIED(prog)) {
1663 Safefree(prog->subbeg);
1664 RX_MATCH_COPIED_off(prog);
1665 }
1666 if (flags & REXEC_COPY_STR) {
1667 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1668
1669 s = savepvn(strbeg, i);
1670 prog->subbeg = s;
1671 prog->sublen = i;
1672 RX_MATCH_COPIED_on(prog);
1673 }
1674 else {
1675 prog->subbeg = strbeg;
1676 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1677 }
1678 }
1679
1680 return 1;
1681
1682phooey:
bf93d4cc
GS
1683 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1684 PL_colors[4],PL_colors[5]));
d6a28714
JH
1685 if (PL_reg_eval_set)
1686 restore_pos(aTHXo_ 0);
1687 return 0;
1688}
1689
1690/*
1691 - regtry - try match at specific point
1692 */
1693STATIC I32 /* 0 failure, 1 success */
1694S_regtry(pTHX_ regexp *prog, char *startpos)
1695{
1696 dTHR;
1697 register I32 i;
1698 register I32 *sp;
1699 register I32 *ep;
1700 CHECKPOINT lastcp;
1701
1702 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1703 MAGIC *mg;
1704
1705 PL_reg_eval_set = RS_init;
1706 DEBUG_r(DEBUG_s(
b900a521
JH
1707 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1708 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1709 ));
e8347627 1710 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1711 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1712 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1713 SAVETMPS;
1714 /* Apparently this is not needed, judging by wantarray. */
e8347627 1715 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1716 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1717
1718 if (PL_reg_sv) {
1719 /* Make $_ available to executed code. */
1720 if (PL_reg_sv != DEFSV) {
1721 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1722 SAVESPTR(DEFSV);
1723 DEFSV = PL_reg_sv;
b8c5462f 1724 }
d6a28714
JH
1725
1726 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1727 && (mg = mg_find(PL_reg_sv, 'g')))) {
1728 /* prepare for quick setting of pos */
1729 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1730 mg = mg_find(PL_reg_sv, 'g');
1731 mg->mg_len = -1;
b8c5462f 1732 }
d6a28714
JH
1733 PL_reg_magic = mg;
1734 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1735 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1736 }
1737 if (!PL_reg_curpm)
0f79a09d 1738 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1739 PL_reg_curpm->op_pmregexp = prog;
1740 PL_reg_oldcurpm = PL_curpm;
1741 PL_curpm = PL_reg_curpm;
1742 if (RX_MATCH_COPIED(prog)) {
1743 /* Here is a serious problem: we cannot rewrite subbeg,
1744 since it may be needed if this match fails. Thus
1745 $` inside (?{}) could fail... */
1746 PL_reg_oldsaved = prog->subbeg;
1747 PL_reg_oldsavedlen = prog->sublen;
1748 RX_MATCH_COPIED_off(prog);
1749 }
1750 else
1751 PL_reg_oldsaved = Nullch;
1752 prog->subbeg = PL_bostr;
1753 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1754 }
1755 prog->startp[0] = startpos - PL_bostr;
1756 PL_reginput = startpos;
1757 PL_regstartp = prog->startp;
1758 PL_regendp = prog->endp;
1759 PL_reglastparen = &prog->lastparen;
1760 prog->lastparen = 0;
1761 PL_regsize = 0;
1762 DEBUG_r(PL_reg_starttry = startpos);
1763 if (PL_reg_start_tmpl <= prog->nparens) {
1764 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1765 if(PL_reg_start_tmp)
1766 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1767 else
1768 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1769 }
1770
1771 /* XXXX What this code is doing here?!!! There should be no need
1772 to do this again and again, PL_reglastparen should take care of
1773 this! */
1774 sp = prog->startp;
1775 ep = prog->endp;
1776 if (prog->nparens) {
1777 for (i = prog->nparens; i >= 1; i--) {
1778 *++sp = -1;
1779 *++ep = -1;
1780 }
1781 }
1782 REGCP_SET;
1783 if (regmatch(prog->program + 1)) {
1784 prog->endp[0] = PL_reginput - PL_bostr;
1785 return 1;
1786 }
1787 REGCP_UNWIND;
1788 return 0;
1789}
1790
1791/*
1792 - regmatch - main matching routine
1793 *
1794 * Conceptually the strategy is simple: check to see whether the current
1795 * node matches, call self recursively to see whether the rest matches,
1796 * and then act accordingly. In practice we make some effort to avoid
1797 * recursion, in particular by going through "ordinary" nodes (that don't
1798 * need to know whether the rest of the match failed) by a loop instead of
1799 * by recursion.
1800 */
1801/* [lwall] I've hoisted the register declarations to the outer block in order to
1802 * maybe save a little bit of pushing and popping on the stack. It also takes
1803 * advantage of machines that use a register save mask on subroutine entry.
1804 */
1805STATIC I32 /* 0 failure, 1 success */
1806S_regmatch(pTHX_ regnode *prog)
1807{
1808 dTHR;
1809 register regnode *scan; /* Current node. */
1810 regnode *next; /* Next node. */
1811 regnode *inner; /* Next node in internal branch. */
1812 register I32 nextchr; /* renamed nextchr - nextchar colides with
1813 function of same name */
1814 register I32 n; /* no or next */
1815 register I32 ln; /* len or last */
1816 register char *s; /* operand or save */
1817 register char *locinput = PL_reginput;
1818 register I32 c1, c2, paren; /* case fold search, parenth */
1819 int minmod = 0, sw = 0, logical = 0;
1820#ifdef DEBUGGING
1821 PL_regindent++;
1822#endif
1823
1824 /* Note that nextchr is a byte even in UTF */
1825 nextchr = UCHARAT(locinput);
1826 scan = prog;
1827 while (scan != NULL) {
1828#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1829#ifdef DEBUGGING
1830# define sayYES goto yes
1831# define sayNO goto no
7821416a
IZ
1832# define sayYES_FINAL goto yes_final
1833# define sayYES_LOUD goto yes_loud
1834# define sayNO_FINAL goto no_final
1835# define sayNO_SILENT goto do_no
d6a28714
JH
1836# define saySAME(x) if (x) goto yes; else goto no
1837# define REPORT_CODE_OFF 24
1838#else
1839# define sayYES return 1
1840# define sayNO return 0
7821416a
IZ
1841# define sayYES_FINAL return 1
1842# define sayYES_LOUD return 1
1843# define sayNO_FINAL return 0
1844# define sayNO_SILENT return 0
d6a28714
JH
1845# define saySAME(x) return x
1846#endif
1847 DEBUG_r( {
1848 SV *prop = sv_newmortal();
1849 int docolor = *PL_colors[0];
1850 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1851 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1852 /* The part of the string before starttry has one color
1853 (pref0_len chars), between starttry and current
1854 position another one (pref_len - pref0_len chars),
1855 after the current position the third one.
1856 We assume that pref0_len <= pref_len, otherwise we
1857 decrease pref0_len. */
1858 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1859 ? (5 + taill) - l : locinput - PL_bostr);
1860 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1861
1862 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1863 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1864 ? (5 + taill) - pref_len : PL_regeol - locinput);
1865 if (pref0_len < 0)
1866 pref0_len = 0;
1867 if (pref0_len > pref_len)
1868 pref0_len = pref_len;
1869 regprop(prop, scan);
1870 PerlIO_printf(Perl_debug_log,
b900a521
JH
1871 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1872 (IV)(locinput - PL_bostr),
d6a28714
JH
1873 PL_colors[4], pref0_len,
1874 locinput - pref_len, PL_colors[5],
1875 PL_colors[2], pref_len - pref0_len,
1876 locinput - pref_len + pref0_len, PL_colors[3],
1877 (docolor ? "" : "> <"),
1878 PL_colors[0], l, locinput, PL_colors[1],
1879 15 - l - pref_len + 1,
1880 "",
b900a521 1881 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1882 SvPVX(prop));
1883 } );
1884
1885 next = scan + NEXT_OFF(scan);
1886 if (next == scan)
1887 next = NULL;
1888
1889 switch (OP(scan)) {
1890 case BOL:
1891 if (locinput == PL_bostr
1892 ? PL_regprev == '\n'
1893 : (PL_multiline &&
1894 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1895 {
1896 /* regtill = regbol; */
b8c5462f
JH
1897 break;
1898 }
d6a28714
JH
1899 sayNO;
1900 case MBOL:
1901 if (locinput == PL_bostr
1902 ? PL_regprev == '\n'
1903 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1904 {
b8c5462f
JH
1905 break;
1906 }
d6a28714
JH
1907 sayNO;
1908 case SBOL:
c2a73568 1909 if (locinput == PL_bostr)
b8c5462f 1910 break;
d6a28714
JH
1911 sayNO;
1912 case GPOS:
1913 if (locinput == PL_reg_ganch)
1914 break;
1915 sayNO;
1916 case EOL:
1917 if (PL_multiline)
1918 goto meol;
1919 else
1920 goto seol;
1921 case MEOL:
1922 meol:
1923 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1924 sayNO;
b8c5462f 1925 break;
d6a28714
JH
1926 case SEOL:
1927 seol:
1928 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1929 sayNO;
d6a28714 1930 if (PL_regeol - locinput > 1)
b8c5462f 1931 sayNO;
b8c5462f 1932 break;
d6a28714
JH
1933 case EOS:
1934 if (PL_regeol != locinput)
b8c5462f 1935 sayNO;
d6a28714
JH
1936 break;
1937 case SANYUTF8:
b8c5462f 1938 if (nextchr & 0x80) {
b8c5462f 1939 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1940 if (locinput > PL_regeol)
1941 sayNO;
b8c5462f
JH
1942 nextchr = UCHARAT(locinput);
1943 break;
1944 }
d6a28714 1945 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1946 sayNO;
b8c5462f 1947 nextchr = UCHARAT(++locinput);
a0d0e21e 1948 break;
d6a28714
JH
1949 case SANY:
1950 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1951 sayNO;
1952 nextchr = UCHARAT(++locinput);
b85d18e9 1953 break;
d6a28714 1954 case ANYUTF8:
a0ed51b3 1955 if (nextchr & 0x80) {
b8c5462f 1956 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1957 if (locinput > PL_regeol)
1958 sayNO;
a0ed51b3
LW
1959 nextchr = UCHARAT(locinput);
1960 break;
1961 }
155aba94 1962 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1963 sayNO;
1964 nextchr = UCHARAT(++locinput);
1965 break;
d6a28714 1966 case REG_ANY:
155aba94 1967 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1968 sayNO;
76e3520e 1969 nextchr = UCHARAT(++locinput);
a0d0e21e 1970 break;
d6a28714 1971 case EXACT:
cd439c50
IZ
1972 s = STRING(scan);
1973 ln = STR_LEN(scan);
d6a28714
JH
1974 /* Inline the first character, for speed. */
1975 if (UCHARAT(s) != nextchr)
1976 sayNO;
1977 if (PL_regeol - locinput < ln)
1978 sayNO;
1979 if (ln > 1 && memNE(s, locinput, ln))
1980 sayNO;
1981 locinput += ln;
1982 nextchr = UCHARAT(locinput);
1983 break;
1984 case EXACTFL:
b8c5462f
JH
1985 PL_reg_flags |= RF_tainted;
1986 /* FALL THROUGH */
d6a28714 1987 case EXACTF:
cd439c50
IZ
1988 s = STRING(scan);
1989 ln = STR_LEN(scan);
d6a28714
JH
1990
1991 if (UTF) {
1992 char *l = locinput;
1993 char *e = s + ln;
1994 c1 = OP(scan) == EXACTF;
1995 while (s < e) {
1996 if (l >= PL_regeol)
1997 sayNO;
1998 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1999 toLOWER_utf8((U8*)l) :
2000 toLOWER_LC_utf8((U8*)l)))
2001 {
2002 sayNO;
2003 }
2004 s += UTF8SKIP(s);
2005 l += UTF8SKIP(l);
b8c5462f 2006 }
d6a28714 2007 locinput = l;
a0ed51b3
LW
2008 nextchr = UCHARAT(locinput);
2009 break;
2010 }
d6a28714
JH
2011
2012 /* Inline the first character, for speed. */
2013 if (UCHARAT(s) != nextchr &&
2014 UCHARAT(s) != ((OP(scan) == EXACTF)
2015 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2016 sayNO;
d6a28714 2017 if (PL_regeol - locinput < ln)
b8c5462f 2018 sayNO;
d6a28714
JH
2019 if (ln > 1 && (OP(scan) == EXACTF
2020 ? ibcmp(s, locinput, ln)
2021 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2022 sayNO;
d6a28714
JH
2023 locinput += ln;
2024 nextchr = UCHARAT(locinput);
a0d0e21e 2025 break;
d6a28714 2026 case ANYOFUTF8:
d6a28714 2027 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2028 sayNO;
d6a28714
JH
2029 if (locinput >= PL_regeol)
2030 sayNO;
2031 locinput += PL_utf8skip[nextchr];
2032 nextchr = UCHARAT(locinput);
2033 break;
2034 case ANYOF:
d6a28714 2035 if (nextchr < 0)
b8c5462f 2036 nextchr = UCHARAT(locinput);
936ed897 2037 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2038 sayNO;
2039 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2040 sayNO;
b8c5462f
JH
2041 nextchr = UCHARAT(++locinput);
2042 break;
d6a28714 2043 case ALNUML:
b8c5462f
JH
2044 PL_reg_flags |= RF_tainted;
2045 /* FALL THROUGH */
d6a28714 2046 case ALNUM:
b8c5462f 2047 if (!nextchr)
4633a7c4 2048 sayNO;
d6a28714
JH
2049 if (!(OP(scan) == ALNUM
2050 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2051 sayNO;
2052 nextchr = UCHARAT(++locinput);
bbce6d69 2053 break;
d6a28714 2054 case ALNUMLUTF8:
3280af22 2055 PL_reg_flags |= RF_tainted;
bbce6d69 2056 /* FALL THROUGH */
d6a28714 2057 case ALNUMUTF8:
b8c5462f
JH
2058 if (!nextchr)
2059 sayNO;
2060 if (nextchr & 0x80) {
d6a28714
JH
2061 if (!(OP(scan) == ALNUMUTF8
2062 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2063 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2064 {
2065 sayNO;
a0ed51b3 2066 }
b8c5462f 2067 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2068 nextchr = UCHARAT(locinput);
2069 break;
2070 }
d6a28714
JH
2071 if (!(OP(scan) == ALNUMUTF8
2072 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2073 sayNO;
b8c5462f 2074 nextchr = UCHARAT(++locinput);
a0d0e21e 2075 break;
d6a28714 2076 case NALNUML:
b8c5462f
JH
2077 PL_reg_flags |= RF_tainted;
2078 /* FALL THROUGH */
d6a28714
JH
2079 case NALNUM:
2080 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2081 sayNO;
d6a28714
JH
2082 if (OP(scan) == NALNUM
2083 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2084 sayNO;
b8c5462f 2085 nextchr = UCHARAT(++locinput);
a0ed51b3 2086 break;
d6a28714 2087 case NALNUMLUTF8:
b8c5462f
JH
2088 PL_reg_flags |= RF_tainted;
2089 /* FALL THROUGH */
d6a28714 2090 case NALNUMUTF8:
3280af22 2091 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2092 sayNO;
b8c5462f 2093 if (nextchr & 0x80) {
d6a28714
JH
2094 if (OP(scan) == NALNUMUTF8
2095 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2096 : isALNUM_LC_utf8((U8*)locinput))
2097 {
b8c5462f 2098 sayNO;
d6a28714 2099 }
b8c5462f
JH
2100 locinput += PL_utf8skip[nextchr];
2101 nextchr = UCHARAT(locinput);
2102 break;
2103 }
d6a28714
JH
2104 if (OP(scan) == NALNUMUTF8
2105 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2106 sayNO;
76e3520e 2107 nextchr = UCHARAT(++locinput);
a0d0e21e 2108 break;
d6a28714
JH
2109 case BOUNDL:
2110 case NBOUNDL:
3280af22 2111 PL_reg_flags |= RF_tainted;
bbce6d69 2112 /* FALL THROUGH */
d6a28714
JH
2113 case BOUND:
2114 case NBOUND:
2115 /* was last char in word? */
2116 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2117 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2118 ln = isALNUM(ln);
2119 n = isALNUM(nextchr);
2120 }
2121 else {
2122 ln = isALNUM_LC(ln);
2123 n = isALNUM_LC(nextchr);
2124 }
2125 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2126 sayNO;
a0d0e21e 2127 break;
d6a28714
JH
2128 case BOUNDLUTF8:
2129 case NBOUNDLUTF8:
a0ed51b3
LW
2130 PL_reg_flags |= RF_tainted;
2131 /* FALL THROUGH */
d6a28714
JH
2132 case BOUNDUTF8:
2133 case NBOUNDUTF8:
2134 /* was last char in word? */
2135 ln = (locinput != PL_regbol)
2136 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2137 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2138 ln = isALNUM_uni(ln);
2139 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2140 }
d6a28714
JH
2141 else {
2142 ln = isALNUM_LC_uni(ln);
2143 n = isALNUM_LC_utf8((U8*)locinput);
2144 }
2145 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2146 sayNO;
a0ed51b3 2147 break;
d6a28714 2148 case SPACEL:
3280af22 2149 PL_reg_flags |= RF_tainted;
bbce6d69 2150 /* FALL THROUGH */
d6a28714 2151 case SPACE:
9442cb0e 2152 if (!nextchr)
4633a7c4 2153 sayNO;
d6a28714
JH
2154 if (!(OP(scan) == SPACE
2155 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2156 sayNO;
76e3520e 2157 nextchr = UCHARAT(++locinput);
a0d0e21e 2158 break;
d6a28714 2159 case SPACELUTF8:
a0ed51b3
LW
2160 PL_reg_flags |= RF_tainted;
2161 /* FALL THROUGH */
d6a28714 2162 case SPACEUTF8:
9442cb0e 2163 if (!nextchr)
a0ed51b3
LW
2164 sayNO;
2165 if (nextchr & 0x80) {
d6a28714 2166 if (!(OP(scan) == SPACEUTF8
9442cb0e 2167 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2168 : isSPACE_LC_utf8((U8*)locinput)))
2169 {
a0ed51b3 2170 sayNO;
d6a28714 2171 }
6f06b55f 2172 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2173 nextchr = UCHARAT(locinput);
2174 break;
2175 }
d6a28714
JH
2176 if (!(OP(scan) == SPACEUTF8
2177 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2178 sayNO;
2179 nextchr = UCHARAT(++locinput);
2180 break;
d6a28714 2181 case NSPACEL:
3280af22 2182 PL_reg_flags |= RF_tainted;
bbce6d69 2183 /* FALL THROUGH */
d6a28714 2184 case NSPACE:
9442cb0e 2185 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2186 sayNO;
9442cb0e 2187 if (OP(scan) == NSPACE
d6a28714 2188 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2189 sayNO;
b8c5462f 2190 nextchr = UCHARAT(++locinput);
a0d0e21e 2191 break;
d6a28714 2192 case NSPACELUTF8:
a0ed51b3
LW
2193 PL_reg_flags |= RF_tainted;
2194 /* FALL THROUGH */
d6a28714 2195 case NSPACEUTF8:
9442cb0e 2196 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2197 sayNO;
2198 if (nextchr & 0x80) {
d6a28714 2199 if (OP(scan) == NSPACEUTF8
9442cb0e 2200 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2201 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2202 {
2203 sayNO;
2204 }
2205 locinput += PL_utf8skip[nextchr];
2206 nextchr = UCHARAT(locinput);
2207 break;
a0ed51b3 2208 }
d6a28714
JH
2209 if (OP(scan) == NSPACEUTF8
2210 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2211 sayNO;
76e3520e 2212 nextchr = UCHARAT(++locinput);
a0d0e21e 2213 break;
d6a28714 2214 case DIGITL:
a0ed51b3
LW
2215 PL_reg_flags |= RF_tainted;
2216 /* FALL THROUGH */
d6a28714 2217 case DIGIT:
9442cb0e 2218 if (!nextchr)
a0ed51b3 2219 sayNO;
d6a28714
JH
2220 if (!(OP(scan) == DIGIT
2221 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2222 sayNO;
76e3520e 2223 nextchr = UCHARAT(++locinput);
a0d0e21e 2224 break;
d6a28714 2225 case DIGITLUTF8:
a0ed51b3
LW
2226 PL_reg_flags |= RF_tainted;
2227 /* FALL THROUGH */
d6a28714 2228 case DIGITUTF8:
a0ed51b3
LW
2229 if (!nextchr)
2230 sayNO;
2231 if (nextchr & 0x80) {
9442cb0e
GS
2232 if (!(OP(scan) == DIGITUTF8
2233 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2234 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2235 {
a0ed51b3 2236 sayNO;
dfe13c55 2237 }
6f06b55f 2238 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2239 nextchr = UCHARAT(locinput);
2240 break;
2241 }
9442cb0e
GS
2242 if (!(OP(scan) == DIGITUTF8
2243 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2244 sayNO;
2245 nextchr = UCHARAT(++locinput);
2246 break;
d6a28714 2247 case NDIGITL:
b8c5462f
JH
2248 PL_reg_flags |= RF_tainted;
2249 /* FALL THROUGH */
d6a28714 2250 case NDIGIT:
9442cb0e 2251 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2252 sayNO;
9442cb0e 2253 if (OP(scan) == NDIGIT
d6a28714 2254 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2255 sayNO;
76e3520e 2256 nextchr = UCHARAT(++locinput);
a0d0e21e 2257 break;
d6a28714 2258 case NDIGITLUTF8:
b8c5462f
JH
2259 PL_reg_flags |= RF_tainted;
2260 /* FALL THROUGH */
d6a28714 2261 case NDIGITUTF8:
b8c5462f
JH
2262 if (!nextchr && locinput >= PL_regeol)
2263 sayNO;
a0ed51b3 2264 if (nextchr & 0x80) {
9442cb0e
GS
2265 if (OP(scan) == NDIGITUTF8
2266 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2267 : isDIGIT_LC_utf8((U8*)locinput))
2268 {
a0ed51b3 2269 sayNO;
9442cb0e 2270 }
6f06b55f 2271 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2272 nextchr = UCHARAT(locinput);
2273 break;
2274 }
9442cb0e
GS
2275 if (OP(scan) == NDIGITUTF8
2276 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2277 sayNO;
2278 nextchr = UCHARAT(++locinput);
2279 break;
2280 case CLUMP:
dfe13c55 2281 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2282 sayNO;
6f06b55f 2283 locinput += PL_utf8skip[nextchr];
dfe13c55 2284 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2285 locinput += UTF8SKIP(locinput);
2286 if (locinput > PL_regeol)
2287 sayNO;
2288 nextchr = UCHARAT(locinput);
2289 break;
c8756f30 2290 case REFFL:
3280af22 2291 PL_reg_flags |= RF_tainted;
c8756f30 2292 /* FALL THROUGH */
c277df42 2293 case REF:
c8756f30 2294 case REFF:
c277df42 2295 n = ARG(scan); /* which paren pair */
cf93c79d 2296 ln = PL_regstartp[n];
2c2d71f5 2297 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2298 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2299 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2300 if (ln == PL_regendp[n])
a0d0e21e 2301 break;
a0ed51b3 2302
cf93c79d 2303 s = PL_bostr + ln;
a0ed51b3
LW
2304 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2305 char *l = locinput;
cf93c79d 2306 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2307 /*
2308 * Note that we can't do the "other character" lookup trick as
2309 * in the 8-bit case (no pun intended) because in Unicode we
2310 * have to map both upper and title case to lower case.
2311 */
2312 if (OP(scan) == REFF) {
2313 while (s < e) {
2314 if (l >= PL_regeol)
2315 sayNO;
dfe13c55 2316 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2317 sayNO;
2318 s += UTF8SKIP(s);
2319 l += UTF8SKIP(l);
2320 }
2321 }
2322 else {
2323 while (s < e) {
2324 if (l >= PL_regeol)
2325 sayNO;
dfe13c55 2326 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2327 sayNO;
2328 s += UTF8SKIP(s);
2329 l += UTF8SKIP(l);
2330 }
2331 }
2332 locinput = l;
2333 nextchr = UCHARAT(locinput);
2334 break;
2335 }
2336
a0d0e21e 2337 /* Inline the first character, for speed. */
76e3520e 2338 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2339 (OP(scan) == REF ||
2340 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2341 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2342 sayNO;
cf93c79d 2343 ln = PL_regendp[n] - ln;
3280af22 2344 if (locinput + ln > PL_regeol)
4633a7c4 2345 sayNO;
c8756f30
AK
2346 if (ln > 1 && (OP(scan) == REF
2347 ? memNE(s, locinput, ln)
2348 : (OP(scan) == REFF
2349 ? ibcmp(s, locinput, ln)
2350 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2351 sayNO;
a0d0e21e 2352 locinput += ln;
76e3520e 2353 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2354 break;
2355
2356 case NOTHING:
c277df42 2357 case TAIL:
a0d0e21e
LW
2358 break;
2359 case BACK:
2360 break;
c277df42
IZ
2361 case EVAL:
2362 {
2363 dSP;
533c011a 2364 OP_4tree *oop = PL_op;
3280af22
NIS
2365 COP *ocurcop = PL_curcop;
2366 SV **ocurpad = PL_curpad;
c277df42
IZ
2367 SV *ret;
2368
2369 n = ARG(scan);
533c011a 2370 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2371 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2372 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2373 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2374
cea2e8a9 2375 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2376 SPAGAIN;
2377 ret = POPs;
2378 PUTBACK;
2379
0f5d15d6
IZ
2380 PL_op = oop;
2381 PL_curpad = ocurpad;
2382 PL_curcop = ocurcop;
c277df42 2383 if (logical) {
0f5d15d6
IZ
2384 if (logical == 2) { /* Postponed subexpression. */
2385 regexp *re;
22c35a8c 2386 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2387 re_cc_state state;
0f5d15d6
IZ
2388 CHECKPOINT cp, lastcp;
2389
2390 if(SvROK(ret) || SvRMAGICAL(ret)) {
2391 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2392
2393 if(SvMAGICAL(sv))
2394 mg = mg_find(sv, 'r');
2395 }
2396 if (mg) {
2397 re = (regexp *)mg->mg_obj;
df0003d4 2398 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2399 }
2400 else {
2401 STRLEN len;
2402 char *t = SvPV(ret, len);
2403 PMOP pm;
2404 char *oprecomp = PL_regprecomp;
2405 I32 osize = PL_regsize;
2406 I32 onpar = PL_regnpar;
2407
2408 pm.op_pmflags = 0;
393fec97 2409 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2410 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2411 if (!(SvFLAGS(ret)
2412 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2413 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2414 PL_regprecomp = oprecomp;
2415 PL_regsize = osize;
2416 PL_regnpar = onpar;
2417 }
2418 DEBUG_r(
2419 PerlIO_printf(Perl_debug_log,
2420 "Entering embedded `%s%.60s%s%s'\n",
2421 PL_colors[0],
2422 re->precomp,
2423 PL_colors[1],
2424 (strlen(re->precomp) > 60 ? "..." : ""))
2425 );
2426 state.node = next;
2427 state.prev = PL_reg_call_cc;
2428 state.cc = PL_regcc;
2429 state.re = PL_reg_re;
2430
2ab05381 2431 PL_regcc = 0;
0f5d15d6
IZ
2432
2433 cp = regcppush(0); /* Save *all* the positions. */
2434 REGCP_SET;
2435 cache_re(re);
2436 state.ss = PL_savestack_ix;
2437 *PL_reglastparen = 0;
2438 PL_reg_call_cc = &state;
2439 PL_reginput = locinput;
2c2d71f5
JH
2440
2441 /* XXXX This is too dramatic a measure... */
2442 PL_reg_maxiter = 0;
2443
0f5d15d6 2444 if (regmatch(re->program + 1)) {
2c914db6
IZ
2445 /* Even though we succeeded, we need to restore
2446 global variables, since we may be wrapped inside
2447 SUSPEND, thus the match may be not finished yet. */
2448
2449 /* XXXX Do this only if SUSPENDed? */
2450 PL_reg_call_cc = state.prev;
2451 PL_regcc = state.cc;
2452 PL_reg_re = state.re;
2453 cache_re(PL_reg_re);
2454
2455 /* XXXX This is too dramatic a measure... */
2456 PL_reg_maxiter = 0;
2457
2458 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2459 ReREFCNT_dec(re);
2460 regcpblow(cp);
2461 sayYES;
2462 }
0f5d15d6
IZ
2463 ReREFCNT_dec(re);
2464 REGCP_UNWIND;
2465 regcppop();
2466 PL_reg_call_cc = state.prev;
2467 PL_regcc = state.cc;
2468 PL_reg_re = state.re;
d3790889 2469 cache_re(PL_reg_re);
2c2d71f5
JH
2470
2471 /* XXXX This is too dramatic a measure... */
2472 PL_reg_maxiter = 0;
2473
0f5d15d6
IZ
2474 sayNO;
2475 }
c277df42 2476 sw = SvTRUE(ret);
0f5d15d6 2477 logical = 0;
a0ed51b3
LW
2478 }
2479 else
3280af22 2480 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2481 break;
2482 }
a0d0e21e 2483 case OPEN:
c277df42 2484 n = ARG(scan); /* which paren pair */
3280af22
NIS
2485 PL_reg_start_tmp[n] = locinput;
2486 if (n > PL_regsize)
2487 PL_regsize = n;
a0d0e21e
LW
2488 break;
2489 case CLOSE:
c277df42 2490 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2491 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2492 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2493 if (n > *PL_reglastparen)
2494 *PL_reglastparen = n;
a0d0e21e 2495 break;
c277df42
IZ
2496 case GROUPP:
2497 n = ARG(scan); /* which paren pair */
cf93c79d 2498 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2499 break;
2500 case IFTHEN:
2c2d71f5 2501 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2502 if (sw)
2503 next = NEXTOPER(NEXTOPER(scan));
2504 else {
2505 next = scan + ARG(scan);
2506 if (OP(next) == IFTHEN) /* Fake one. */
2507 next = NEXTOPER(NEXTOPER(next));
2508 }
2509 break;
2510 case LOGICAL:
0f5d15d6 2511 logical = scan->flags;
c277df42 2512 break;
2ab05381
IZ
2513/*******************************************************************
2514 PL_regcc contains infoblock about the innermost (...)* loop, and
2515 a pointer to the next outer infoblock.
2516
2517 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2518
2519 1) After matching X, regnode for CURLYX is processed;
2520
2521 2) This regnode creates infoblock on the stack, and calls
2522 regmatch() recursively with the starting point at WHILEM node;
2523
2524 3) Each hit of WHILEM node tries to match A and Z (in the order
2525 depending on the current iteration, min/max of {min,max} and
2526 greediness). The information about where are nodes for "A"
2527 and "Z" is read from the infoblock, as is info on how many times "A"
2528 was already matched, and greediness.
2529
2530 4) After A matches, the same WHILEM node is hit again.
2531
2532 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2533 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2534 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2535 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2536 of the external loop.
2537
2538 Currently present infoblocks form a tree with a stem formed by PL_curcc
2539 and whatever it mentions via ->next, and additional attached trees
2540 corresponding to temporarily unset infoblocks as in "5" above.
2541
2542 In the following picture infoblocks for outer loop of
2543 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2544 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2545 infoblocks are drawn below the "reset" infoblock.
2546
2547 In fact in the picture below we do not show failed matches for Z and T
2548 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2549 more obvious *why* one needs to *temporary* unset infoblocks.]
2550
2551 Matched REx position InfoBlocks Comment
2552 (Y(A)*?Z)*?T x
2553 Y(A)*?Z)*?T x <- O
2554 Y (A)*?Z)*?T x <- O
2555 Y A)*?Z)*?T x <- O <- I
2556 YA )*?Z)*?T x <- O <- I
2557 YA A)*?Z)*?T x <- O <- I
2558 YAA )*?Z)*?T x <- O <- I
2559 YAA Z)*?T x <- O # Temporary unset I
2560 I
2561
2562 YAAZ Y(A)*?Z)*?T x <- O
2563 I
2564
2565 YAAZY (A)*?Z)*?T x <- O
2566 I
2567
2568 YAAZY A)*?Z)*?T x <- O <- I
2569 I
2570
2571 YAAZYA )*?Z)*?T x <- O <- I
2572 I
2573
2574 YAAZYA Z)*?T x <- O # Temporary unset I
2575 I,I
2576
2577 YAAZYAZ )*?T x <- O
2578 I,I
2579
2580 YAAZYAZ T x # Temporary unset O
2581 O
2582 I,I
2583
2584 YAAZYAZT x
2585 O
2586 I,I
2587 *******************************************************************/
a0d0e21e
LW
2588 case CURLYX: {
2589 CURCUR cc;
3280af22 2590 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2591
2592 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2593 next += ARG(next);
3280af22
NIS
2594 cc.oldcc = PL_regcc;
2595 PL_regcc = &cc;
2596 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2597 cc.cur = -1;
2598 cc.min = ARG1(scan);
2599 cc.max = ARG2(scan);
c277df42 2600 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2601 cc.next = next;
2602 cc.minmod = minmod;
2603 cc.lastloc = 0;
3280af22 2604 PL_reginput = locinput;
a0d0e21e
LW
2605 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2606 regcpblow(cp);
3280af22 2607 PL_regcc = cc.oldcc;
4633a7c4 2608 saySAME(n);
a0d0e21e
LW
2609 }
2610 /* NOT REACHED */
2611 case WHILEM: {
2612 /*
2613 * This is really hard to understand, because after we match
2614 * what we're trying to match, we must make sure the rest of
2c2d71f5 2615 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2616 * to go back UP the parse tree by recursing ever deeper. And
2617 * if it fails, we have to reset our parent's current state
2618 * that we can try again after backing off.
2619 */
2620
c277df42 2621 CHECKPOINT cp, lastcp;
3280af22 2622 CURCUR* cc = PL_regcc;
c277df42
IZ
2623 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2624
4633a7c4 2625 n = cc->cur + 1; /* how many we know we matched */
3280af22 2626 PL_reginput = locinput;
a0d0e21e 2627
c277df42
IZ
2628 DEBUG_r(
2629 PerlIO_printf(Perl_debug_log,
2630 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2631 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2632 (long)n, (long)cc->min,
2633 (long)cc->max, (long)cc)
2634 );
4633a7c4 2635
a0d0e21e
LW
2636 /* If degenerate scan matches "", assume scan done. */
2637
579cf2c3 2638 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2639 PL_regcc = cc->oldcc;
2ab05381
IZ
2640 if (PL_regcc)
2641 ln = PL_regcc->cur;
c277df42 2642 DEBUG_r(
c3464db5
DD
2643 PerlIO_printf(Perl_debug_log,
2644 "%*s empty match detected, try continuation...\n",
3280af22 2645 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2646 );
a0d0e21e 2647 if (regmatch(cc->next))
4633a7c4 2648 sayYES;
2ab05381
IZ
2649 if (PL_regcc)
2650 PL_regcc->cur = ln;
3280af22 2651 PL_regcc = cc;
4633a7c4 2652 sayNO;
a0d0e21e
LW
2653 }
2654
2655 /* First just match a string of min scans. */
2656
2657 if (n < cc->min) {
2658 cc->cur = n;
2659 cc->lastloc = locinput;
4633a7c4
LW
2660 if (regmatch(cc->scan))
2661 sayYES;
2662 cc->cur = n - 1;
c277df42 2663 cc->lastloc = lastloc;
4633a7c4 2664 sayNO;
a0d0e21e
LW
2665 }
2666
2c2d71f5
JH
2667 if (scan->flags) {
2668 /* Check whether we already were at this position.
2669 Postpone detection until we know the match is not
2670 *that* much linear. */
2671 if (!PL_reg_maxiter) {
2672 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2673 PL_reg_leftiter = PL_reg_maxiter;
2674 }
2675 if (PL_reg_leftiter-- == 0) {
2676 I32 size = (PL_reg_maxiter + 7)/8;
2677 if (PL_reg_poscache) {
2678 if (PL_reg_poscache_size < size) {
2679 Renew(PL_reg_poscache, size, char);
2680 PL_reg_poscache_size = size;
2681 }
2682 Zero(PL_reg_poscache, size, char);
2683 }
2684 else {
2685 PL_reg_poscache_size = size;
2686 Newz(29, PL_reg_poscache, size, char);
2687 }
2688 DEBUG_r(
2689 PerlIO_printf(Perl_debug_log,
2690 "%sDetected a super-linear match, switching on caching%s...\n",
2691 PL_colors[4], PL_colors[5])
2692 );
2693 }
2694 if (PL_reg_leftiter < 0) {
2695 I32 o = locinput - PL_bostr, b;
2696
2697 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2698 b = o % 8;
2699 o /= 8;
2700 if (PL_reg_poscache[o] & (1<<b)) {
2701 DEBUG_r(
2702 PerlIO_printf(Perl_debug_log,
2703 "%*s already tried at this position...\n",
2704 REPORT_CODE_OFF+PL_regindent*2, "")
2705 );
7821416a 2706 sayNO_SILENT;
2c2d71f5
JH
2707 }
2708 PL_reg_poscache[o] |= (1<<b);
2709 }
2710 }
2711
a0d0e21e
LW
2712 /* Prefer next over scan for minimal matching. */
2713
2714 if (cc->minmod) {
3280af22 2715 PL_regcc = cc->oldcc;
2ab05381
IZ
2716 if (PL_regcc)
2717 ln = PL_regcc->cur;
5f05dabc 2718 cp = regcppush(cc->parenfloor);
c277df42 2719 REGCP_SET;
5f05dabc 2720 if (regmatch(cc->next)) {
c277df42 2721 regcpblow(cp);
4633a7c4 2722 sayYES; /* All done. */
5f05dabc 2723 }
c277df42 2724 REGCP_UNWIND;
5f05dabc 2725 regcppop();
2ab05381
IZ
2726 if (PL_regcc)
2727 PL_regcc->cur = ln;
3280af22 2728 PL_regcc = cc;
a0d0e21e 2729
c277df42 2730 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2731 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2732 && !(PL_reg_flags & RF_warned)) {
2733 PL_reg_flags |= RF_warned;
e476b1b5 2734 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2735 "Complex regular subexpression recursion",
2736 REG_INFTY - 1);
c277df42 2737 }
4633a7c4 2738 sayNO;
c277df42 2739 }
a687059c 2740
c277df42 2741 DEBUG_r(
c3464db5
DD
2742 PerlIO_printf(Perl_debug_log,
2743 "%*s trying longer...\n",
3280af22 2744 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2745 );
a0d0e21e 2746 /* Try scanning more and see if it helps. */
3280af22 2747 PL_reginput = locinput;
a0d0e21e
LW
2748 cc->cur = n;
2749 cc->lastloc = locinput;
5f05dabc 2750 cp = regcppush(cc->parenfloor);
c277df42 2751 REGCP_SET;
5f05dabc 2752 if (regmatch(cc->scan)) {
c277df42 2753 regcpblow(cp);
4633a7c4 2754 sayYES;
5f05dabc 2755 }
c277df42 2756 REGCP_UNWIND;
5f05dabc 2757 regcppop();
4633a7c4 2758 cc->cur = n - 1;
c277df42 2759 cc->lastloc = lastloc;
4633a7c4 2760 sayNO;
a0d0e21e
LW
2761 }
2762
2763 /* Prefer scan over next for maximal matching. */
2764
2765 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2766 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2767 cc->cur = n;
2768 cc->lastloc = locinput;
c277df42 2769 REGCP_SET;
5f05dabc 2770 if (regmatch(cc->scan)) {
c277df42 2771 regcpblow(cp);
4633a7c4 2772 sayYES;
5f05dabc 2773 }
c277df42 2774 REGCP_UNWIND;
a0d0e21e 2775 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2776 PL_reginput = locinput;
c277df42 2777 DEBUG_r(
c3464db5
DD
2778 PerlIO_printf(Perl_debug_log,
2779 "%*s failed, try continuation...\n",
3280af22 2780 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2781 );
2782 }
e476b1b5 2783 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2784 && !(PL_reg_flags & RF_warned)) {
3280af22 2785 PL_reg_flags |= RF_warned;
e476b1b5 2786 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2787 "Complex regular subexpression recursion",
2788 REG_INFTY - 1);
a0d0e21e
LW
2789 }
2790
2791 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2792 PL_regcc = cc->oldcc;
2ab05381
IZ
2793 if (PL_regcc)
2794 ln = PL_regcc->cur;
a0d0e21e 2795 if (regmatch(cc->next))
4633a7c4 2796 sayYES;
2ab05381
IZ
2797 if (PL_regcc)
2798 PL_regcc->cur = ln;
3280af22 2799 PL_regcc = cc;
4633a7c4 2800 cc->cur = n - 1;
c277df42 2801 cc->lastloc = lastloc;
4633a7c4 2802 sayNO;
a0d0e21e
LW
2803 }
2804 /* NOT REACHED */
c277df42
IZ
2805 case BRANCHJ:
2806 next = scan + ARG(scan);
2807 if (next == scan)
2808 next = NULL;
2809 inner = NEXTOPER(NEXTOPER(scan));
2810 goto do_branch;
2811 case BRANCH:
2812 inner = NEXTOPER(scan);
2813 do_branch:
2814 {
2815 CHECKPOINT lastcp;
2816 c1 = OP(scan);
2817 if (OP(next) != c1) /* No choice. */
2818 next = inner; /* Avoid recursion. */
a0d0e21e 2819 else {
3280af22 2820 int lastparen = *PL_reglastparen;
c277df42
IZ
2821
2822 REGCP_SET;
a0d0e21e 2823 do {
3280af22 2824 PL_reginput = locinput;
c277df42 2825 if (regmatch(inner))
4633a7c4 2826 sayYES;
c277df42 2827 REGCP_UNWIND;
3280af22 2828 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2829 PL_regendp[n] = -1;
3280af22 2830 *PL_reglastparen = n;
c277df42 2831 scan = next;
a0d0e21e 2832 /*SUPPRESS 560*/
155aba94 2833 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2834 next += n;
a0d0e21e 2835 else
c277df42 2836 next = NULL;
c277df42
IZ
2837 inner = NEXTOPER(scan);
2838 if (c1 == BRANCHJ) {
2839 inner = NEXTOPER(inner);
2840 }
2841 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2842 sayNO;
a0d0e21e 2843 /* NOTREACHED */
a687059c 2844 }
a0d0e21e
LW
2845 }
2846 break;
2847 case MINMOD:
2848 minmod = 1;
2849 break;
c277df42
IZ
2850 case CURLYM:
2851 {
00db4c45 2852 I32 l = 0;
c277df42
IZ
2853 CHECKPOINT lastcp;
2854
2855 /* We suppose that the next guy does not need
2856 backtracking: in particular, it is of constant length,
2857 and has no parenths to influence future backrefs. */
2858 ln = ARG1(scan); /* min to match */
2859 n = ARG2(scan); /* max to match */
c277df42
IZ
2860 paren = scan->flags;
2861 if (paren) {
3280af22
NIS
2862 if (paren > PL_regsize)
2863 PL_regsize = paren;
2864 if (paren > *PL_reglastparen)
2865 *PL_reglastparen = paren;
c277df42 2866 }
dc45a647 2867 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2868 if (paren)
2869 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2870 PL_reginput = locinput;
c277df42
IZ
2871 if (minmod) {
2872 minmod = 0;
2873 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2874 sayNO;
5f4b28b2 2875 if (ln && l == 0 && n >= ln
c277df42
IZ
2876 /* In fact, this is tricky. If paren, then the
2877 fact that we did/didnot match may influence
2878 future execution. */
2879 && !(paren && ln == 0))
2880 ln = n;
3280af22 2881 locinput = PL_reginput;
22c35a8c 2882 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2883 c1 = (U8)*STRING(next);
c277df42 2884 if (OP(next) == EXACTF)
22c35a8c 2885 c2 = PL_fold[c1];
c277df42 2886 else if (OP(next) == EXACTFL)
22c35a8c 2887 c2 = PL_fold_locale[c1];
c277df42
IZ
2888 else
2889 c2 = c1;
a0ed51b3
LW
2890 }
2891 else
c277df42
IZ
2892 c1 = c2 = -1000;
2893 REGCP_SET;
5f4b28b2 2894 /* This may be improved if l == 0. */
c277df42
IZ
2895 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2896 /* If it could work, try it. */
2897 if (c1 == -1000 ||
3280af22
NIS
2898 UCHARAT(PL_reginput) == c1 ||
2899 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2900 {
2901 if (paren) {
2902 if (n) {
cf93c79d
IZ
2903 PL_regstartp[paren] =
2904 HOPc(PL_reginput, -l) - PL_bostr;
2905 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2906 }
2907 else
cf93c79d 2908 PL_regendp[paren] = -1;
c277df42
IZ
2909 }
2910 if (regmatch(next))
2911 sayYES;
2912 REGCP_UNWIND;
2913 }
2914 /* Couldn't or didn't -- move forward. */
3280af22 2915 PL_reginput = locinput;
c277df42
IZ
2916 if (regrepeat_hard(scan, 1, &l)) {
2917 ln++;
3280af22 2918 locinput = PL_reginput;
c277df42
IZ
2919 }
2920 else
2921 sayNO;
2922 }
a0ed51b3
LW
2923 }
2924 else {
c277df42
IZ
2925 n = regrepeat_hard(scan, n, &l);
2926 if (n != 0 && l == 0
2927 /* In fact, this is tricky. If paren, then the
2928 fact that we did/didnot match may influence
2929 future execution. */
2930 && !(paren && ln == 0))
2931 ln = n;
3280af22 2932 locinput = PL_reginput;
c277df42 2933 DEBUG_r(
5c0ca799 2934 PerlIO_printf(Perl_debug_log,
faccc32b 2935 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2936 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2937 (IV) n, (IV)l)
c277df42
IZ
2938 );
2939 if (n >= ln) {
22c35a8c 2940 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2941 c1 = (U8)*STRING(next);
c277df42 2942 if (OP(next) == EXACTF)
22c35a8c 2943 c2 = PL_fold[c1];
c277df42 2944 else if (OP(next) == EXACTFL)
22c35a8c 2945 c2 = PL_fold_locale[c1];
c277df42
IZ
2946 else
2947 c2 = c1;
a0ed51b3
LW
2948 }
2949 else
c277df42
IZ
2950 c1 = c2 = -1000;
2951 }
2952 REGCP_SET;
2953 while (n >= ln) {
2954 /* If it could work, try it. */
2955 if (c1 == -1000 ||
3280af22
NIS
2956 UCHARAT(PL_reginput) == c1 ||
2957 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2958 {
2959 DEBUG_r(
c3464db5 2960 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2961 "%*s trying tail with n=%"IVdf"...\n",
2962 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2963 );
2964 if (paren) {
2965 if (n) {
cf93c79d
IZ
2966 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2967 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2968 }
a0ed51b3 2969 else
cf93c79d 2970 PL_regendp[paren] = -1;
c277df42 2971 }
a0ed51b3
LW
2972 if (regmatch(next))
2973 sayYES;
2974 REGCP_UNWIND;
2975 }
c277df42
IZ
2976 /* Couldn't or didn't -- back up. */
2977 n--;
dfe13c55 2978 locinput = HOPc(locinput, -l);
3280af22 2979 PL_reginput = locinput;
c277df42
IZ
2980 }
2981 }
2982 sayNO;
2983 break;
2984 }
2985 case CURLYN:
2986 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2987 if (paren > PL_regsize)
2988 PL_regsize = paren;
2989 if (paren > *PL_reglastparen)
2990 *PL_reglastparen = paren;
c277df42
IZ
2991 ln = ARG1(scan); /* min to match */
2992 n = ARG2(scan); /* max to match */
dc45a647 2993 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2994 goto repeat;
a0d0e21e 2995 case CURLY:
c277df42 2996 paren = 0;
a0d0e21e
LW
2997 ln = ARG1(scan); /* min to match */
2998 n = ARG2(scan); /* max to match */
dc45a647 2999 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3000 goto repeat;
3001 case STAR:
3002 ln = 0;
c277df42 3003 n = REG_INFTY;
a0d0e21e 3004 scan = NEXTOPER(scan);
c277df42 3005 paren = 0;
a0d0e21e
LW
3006 goto repeat;
3007 case PLUS:
c277df42
IZ
3008 ln = 1;
3009 n = REG_INFTY;
3010 scan = NEXTOPER(scan);
3011 paren = 0;
3012 repeat:
a0d0e21e
LW
3013 /*
3014 * Lookahead to avoid useless match attempts
3015 * when we know what character comes next.
3016 */
22c35a8c 3017 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3018 c1 = (U8)*STRING(next);
bbce6d69 3019 if (OP(next) == EXACTF)
22c35a8c 3020 c2 = PL_fold[c1];
bbce6d69 3021 else if (OP(next) == EXACTFL)
22c35a8c 3022 c2 = PL_fold_locale[c1];
bbce6d69
PP
3023 else
3024 c2 = c1;
3025 }
a0d0e21e 3026 else
bbce6d69 3027 c1 = c2 = -1000;
3280af22 3028 PL_reginput = locinput;
a0d0e21e 3029 if (minmod) {
c277df42 3030 CHECKPOINT lastcp;
a0d0e21e
LW
3031 minmod = 0;
3032 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3033 sayNO;
a0ed51b3 3034 locinput = PL_reginput;
c277df42 3035 REGCP_SET;
0fe9bf95
IZ
3036 if (c1 != -1000) {
3037 char *e = locinput + n - ln; /* Should not check after this */
3038 char *old = locinput;
3039
3040 if (e >= PL_regeol || (n == REG_INFTY))
3041 e = PL_regeol - 1;
3042 while (1) {
3043 /* Find place 'next' could work */
3044 if (c1 == c2) {
3045 while (locinput <= e && *locinput != c1)
3046 locinput++;
3047 } else {
3048 while (locinput <= e
3049 && *locinput != c1
3050 && *locinput != c2)
3051 locinput++;
3052 }
3053 if (locinput > e)
3054 sayNO;
3055 /* PL_reginput == old now */
3056 if (locinput != old) {
3057 ln = 1; /* Did some */
3058 if (regrepeat(scan, locinput - old) <
3059 locinput - old)
3060 sayNO;
3061 }
3062 /* PL_reginput == locinput now */
29d1e993 3063 TRYPAREN(paren, ln, locinput);
0fe9bf95
IZ
3064 PL_reginput = locinput; /* Could be reset... */
3065 REGCP_UNWIND;
3066 /* Couldn't or didn't -- move forward. */
3067 old = locinput++;
3068 }
3069 }
3070 else
c277df42 3071 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3072 /* If it could work, try it. */
bbce6d69 3073 if (c1 == -1000 ||
3280af22
NIS
3074 UCHARAT(PL_reginput) == c1 ||
3075 UCHARAT(PL_reginput) == c2)
bbce6d69 3076 {
29d1e993 3077 TRYPAREN(paren, n, PL_reginput);
c277df42 3078 REGCP_UNWIND;
bbce6d69 3079 }
c277df42 3080 /* Couldn't or didn't -- move forward. */
a0ed51b3 3081 PL_reginput = locinput;
a0d0e21e
LW
3082 if (regrepeat(scan, 1)) {
3083 ln++;
a0ed51b3
LW
3084 locinput = PL_reginput;
3085 }
3086 else
4633a7c4 3087 sayNO;
a0d0e21e
LW
3088 }
3089 }
3090 else {
c277df42 3091 CHECKPOINT lastcp;
a0d0e21e 3092 n = regrepeat(scan, n);
a0ed51b3 3093 locinput = PL_reginput;
22c35a8c 3094 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3095 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3096 ln = n; /* why back off? */
1aeab75a
GS
3097 /* ...because $ and \Z can match before *and* after
3098 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3099 We should back off by one in this case. */
3100 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3101 ln--;
3102 }
c277df42
IZ
3103 REGCP_SET;
3104 if (paren) {
3105 while (n >= ln) {
3106 /* If it could work, try it. */
3107 if (c1 == -1000 ||
3280af22
NIS
3108 UCHARAT(PL_reginput) == c1 ||
3109 UCHARAT(PL_reginput) == c2)
c277df42 3110 {
29d1e993 3111 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3112 REGCP_UNWIND;
3113 }
3114 /* Couldn't or didn't -- back up. */
3115 n--;
dfe13c55 3116 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3117 }
a0ed51b3
LW
3118 }
3119 else {
c277df42
IZ
3120 while (n >= ln) {
3121 /* If it could work, try it. */
3122 if (c1 == -1000 ||
3280af22
NIS
3123 UCHARAT(PL_reginput) == c1 ||
3124 UCHARAT(PL_reginput) == c2)
c277df42 3125 {
29d1e993 3126 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3127 REGCP_UNWIND;
3128 }
3129 /* Couldn't or didn't -- back up. */
3130 n--;
dfe13c55 3131 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3132 }
a0d0e21e
LW
3133 }
3134 }
4633a7c4 3135 sayNO;
c277df42 3136 break;
a0d0e21e 3137 case END:
0f5d15d6
IZ
3138 if (PL_reg_call_cc) {
3139 re_cc_state *cur_call_cc = PL_reg_call_cc;
3140 CURCUR *cctmp = PL_regcc;
3141 regexp *re = PL_reg_re;
3142 CHECKPOINT cp, lastcp;
3143
3144 cp = regcppush(0); /* Save *all* the positions. */
3145 REGCP_SET;
3146 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3147 the caller. */
3148 PL_reginput = locinput; /* Make position available to
3149 the callcc. */
3150 cache_re(PL_reg_call_cc->re);
3151 PL_regcc = PL_reg_call_cc->cc;
3152 PL_reg_call_cc = PL_reg_call_cc->prev;
3153 if (regmatch(cur_call_cc->node)) {
3154 PL_reg_call_cc = cur_call_cc;
3155 regcpblow(cp);
3156 sayYES;
3157 }
3158 REGCP_UNWIND;
3159 regcppop();
3160 PL_reg_call_cc = cur_call_cc;
3161 PL_regcc = cctmp;
3162 PL_reg_re = re;
3163 cache_re(re);
3164
3165 DEBUG_r(
3166 PerlIO_printf(Perl_debug_log,
3167 "%*s continuation failed...\n",
3168 REPORT_CODE_OFF+PL_regindent*2, "")
3169 );
7821416a 3170 sayNO_SILENT;
0f5d15d6 3171 }
7821416a
IZ
3172 if (locinput < PL_regtill) {
3173 DEBUG_r(PerlIO_printf(Perl_debug_log,
3174 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3175 PL_colors[4],
3176 (long)(locinput - PL_reg_starttry),
3177 (long)(PL_regtill - PL_reg_starttry),
3178 PL_colors[5]));
3179 sayNO_FINAL; /* Cannot match: too short. */
3180 }
3181 PL_reginput = locinput; /* put where regtry can find it */
3182 sayYES_FINAL; /* Success! */
7e5428c5 3183 case SUCCEED:
3280af22 3184 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3185 sayYES_LOUD; /* Success! */
c277df42
IZ
3186 case SUSPEND:
3187 n = 1;
9fe1d20c 3188 PL_reginput = locinput;
c277df42 3189 goto do_ifmatch;
a0d0e21e 3190 case UNLESSM:
c277df42 3191 n = 0;
a0ed51b3 3192 if (scan->flags) {
0fe9bf95
IZ
3193 if (UTF) { /* XXXX This is absolutely
3194 broken, we read before
3195 start of string. */
3196 s = HOPMAYBEc(locinput, -scan->flags);
3197 if (!s)
3198 goto say_yes;
3199 PL_reginput = s;
3200 }
3201 else {
3202 if (locinput < PL_bostr + scan->flags)
3203 goto say_yes;
3204 PL_reginput = locinput - scan->flags;
3205 goto do_ifmatch;
3206 }
a0ed51b3
LW
3207 }
3208 else
3209 PL_reginput = locinput;
c277df42
IZ
3210 goto do_ifmatch;
3211 case IFMATCH:
3212 n = 1;
a0ed51b3 3213 if (scan->flags) {
0fe9bf95
IZ
3214 if (UTF) { /* XXXX This is absolutely
3215 broken, we read before
3216 start of string. */
3217 s = HOPMAYBEc(locinput, -scan->flags);
3218 if (!s || s < PL_bostr)
3219 goto say_no;
3220 PL_reginput = s;
3221 }
3222 else {
3223 if (locinput < PL_bostr + scan->flags)
3224 goto say_no;
3225 PL_reginput = locinput - scan->flags;
3226 goto do_ifmatch;
3227 }
a0ed51b3
LW
3228 }
3229 else
3230 PL_reginput = locinput;
3231
c277df42 3232 do_ifmatch:
c277df42
IZ
3233 inner = NEXTOPER(NEXTOPER(scan));
3234 if (regmatch(inner) != n) {
3235 say_no:
3236 if (logical) {
3237 logical = 0;
3238 sw = 0;
3239 goto do_longjump;
a0ed51b3
LW
3240 }
3241 else
c277df42
IZ
3242 sayNO;
3243 }
3244 say_yes:
3245 if (logical) {
3246 logical = 0;
3247 sw = 1;
3248 }
fe44a5e8 3249 if (OP(scan) == SUSPEND) {
3280af22 3250 locinput = PL_reginput;
565764a8 3251 nextchr = UCHARAT(locinput);
fe44a5e8 3252 }
c277df42
IZ
3253 /* FALL THROUGH. */
3254 case LONGJMP:
3255 do_longjump:
3256 next = scan + ARG(scan);
3257 if (next == scan)
3258 next = NULL;
a0d0e21e
LW
3259 break;
3260 default:
b900a521 3261 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3262 PTR2UV(scan), OP(scan));
cea2e8a9 3263 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3264 }
a0d0e21e
LW
3265 scan = next;
3266 }
a687059c 3267
a0d0e21e
LW
3268 /*
3269 * We get here only if there's trouble -- normally "case END" is
3270 * the terminating point.
3271 */
cea2e8a9 3272 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3273 /*NOTREACHED*/
4633a7c4
LW
3274 sayNO;
3275
7821416a
IZ
3276yes_loud:
3277 DEBUG_r(
3278 PerlIO_printf(Perl_debug_log,
3279 "%*s %scould match...%s\n",
3280 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3281 );
3282 goto yes;
3283yes_final:
3284 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3285 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3286yes:
3287#ifdef DEBUGGING
3280af22 3288 PL_regindent--;
4633a7c4
LW
3289#endif
3290 return 1;
3291
3292no:
7821416a
IZ
3293 DEBUG_r(
3294 PerlIO_printf(Perl_debug_log,
3295 "%*s %sfailed...%s\n",
3296 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3297 );
3298 goto do_no;
3299no_final:
3300do_no:
4633a7c4 3301#ifdef DEBUGGING
3280af22 3302 PL_regindent--;
4633a7c4 3303#endif
a0d0e21e 3304 return 0;
a687059c
LW
3305}
3306
3307/*
3308 - regrepeat - repeatedly match something simple, report how many
3309 */
3310/*
3311 * [This routine now assumes that it will only match on things of length 1.
3312 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3313 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3314 */
76e3520e 3315STATIC I32
cea2e8a9 3316S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3317{
5c0ca799 3318 dTHR;
a0d0e21e 3319 register char *scan;
a0d0e21e 3320 register I32 c;
3280af22 3321 register char *loceol = PL_regeol;
a0ed51b3 3322 register I32 hardcount = 0;
a0d0e21e 3323
3280af22 3324 scan = PL_reginput;
c277df42 3325 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3326 loceol = scan + max;
a0d0e21e 3327 switch (OP(p)) {
22c35a8c 3328 case REG_ANY:
a0d0e21e
LW
3329 while (scan < loceol && *scan != '\n')
3330 scan++;
3331 break;
3332 case SANY:
3333 scan = loceol;
3334 break;
a0ed51b3
LW
3335 case ANYUTF8:
3336 loceol = PL_regeol;
3337 while (scan < loceol && *scan != '\n') {
3338 scan += UTF8SKIP(scan);
3339 hardcount++;
3340 }
3341 break;
3342 case SANYUTF8:
3343 loceol = PL_regeol;
3344 while (scan < loceol) {
3345 scan += UTF8SKIP(scan);
3346 hardcount++;
3347 }
3348 break;
bbce6d69 3349 case EXACT: /* length of string is 1 */
cd439c50 3350 c = (U8)*STRING(p);
bbce6d69
PP
3351 while (scan < loceol && UCHARAT(scan) == c)
3352 scan++;
3353 break;
3354 case EXACTF: /* length of string is 1 */
cd439c50 3355 c = (U8)*STRING(p);
bbce6d69 3356 while (scan < loceol &&
22c35a8c 3357 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69
PP
3358 scan++;
3359 break;
3360 case EXACTFL: /* length of string is 1 */
3280af22 3361 PL_reg_flags |= RF_tainted;
cd439c50 3362 c = (U8)*STRING(p);
bbce6d69 3363 while (scan < loceol &&
22c35a8c 3364 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3365 scan++;
3366 break;
a0ed51b3
LW
3367 case ANYOFUTF8:
3368 loceol = PL_regeol;
3369 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3370 scan += UTF8SKIP(scan);
3371 hardcount++;
3372 }
3373 break;
a0d0e21e 3374 case ANYOF:
936ed897 3375 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3376 scan++;
a0d0e21e
LW
3377 break;
3378 case ALNUM:
3379 while (scan < loceol && isALNUM(*scan))
3380 scan++;
3381 break;
a0ed51b3
LW
3382 case ALNUMUTF8:
3383 loceol = PL_regeol;
dfe13c55 3384 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3385 scan += UTF8SKIP(scan);
3386 hardcount++;
3387 }
3388 break;
bbce6d69 3389 case ALNUML:
3280af22 3390 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3391 while (scan < loceol && isALNUM_LC(*scan))
3392 scan++;
3393 break;
a0ed51b3
LW
3394 case ALNUMLUTF8:
3395 PL_reg_flags |= RF_tainted;
3396 loceol = PL_regeol;
dfe13c55 3397 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3398 scan += UTF8SKIP(scan);
3399 hardcount++;
3400 }
3401 break;
3402 break;
a0d0e21e
LW
3403 case NALNUM:
3404 while (scan < loceol && !isALNUM(*scan))
3405 scan++;
3406 break;
a0ed51b3
LW
3407 case NALNUMUTF8:
3408 loceol = PL_regeol;
dfe13c55 3409 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3410 scan += UTF8SKIP(scan);
3411 hardcount++;
3412 }
3413 break;
bbce6d69 3414 case NALNUML:
3280af22 3415 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3416 while (scan < loceol && !isALNUM_LC(*scan))
3417 scan++;
3418 break;
a0ed51b3
LW
3419 case NALNUMLUTF8:
3420 PL_reg_flags |= RF_tainted;
3421 loceol = PL_regeol;
dfe13c55 3422 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3423 scan += UTF8SKIP(scan);
3424 hardcount++;
3425 }
3426 break;
a0d0e21e
LW
3427 case SPACE:
3428 while (scan < loceol && isSPACE(*scan))
3429 scan++;
3430 break;
a0ed51b3
LW
3431 case SPACEUTF8:
3432 loceol = PL_regeol;
dfe13c55 3433 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3434 scan += UTF8SKIP(scan);
3435 hardcount++;
3436 }
3437 break;
bbce6d69 3438 case SPACEL:
3280af22 3439 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3440 while (scan < loceol && isSPACE_LC(*scan))
3441 scan++;
3442 break;
a0ed51b3
LW
3443 case SPACELUTF8:
3444 PL_reg_flags |= RF_tainted;
3445 loceol = PL_regeol;
dfe13c55 3446 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3447 scan += UTF8SKIP(scan);
3448 hardcount++;
3449 }
3450 break;
a0d0e21e
LW
3451 case NSPACE:
3452 while (scan < loceol && !isSPACE(*scan))
3453 scan++;
3454 break;
a0ed51b3
LW
3455 case NSPACEUTF8:
3456 loceol = PL_regeol;
dfe13c55 3457 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3458 scan += UTF8SKIP(scan);
3459 hardcount++;
3460 }
3461 break;
bbce6d69 3462 case NSPACEL:
3280af22 3463 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3464 while (scan < loceol && !isSPACE_LC(*scan))
3465 scan++;
3466 break;
a0ed51b3
LW
3467 case NSPACELUTF8:
3468 PL_reg_flags |= RF_tainted;
3469 loceol = PL_regeol;
dfe13c55 3470 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3471 scan += UTF8SKIP(scan);
3472 hardcount++;
3473 }
3474 break;
a0d0e21e
LW
3475 case DIGIT:
3476 while (scan < loceol && isDIGIT(*scan))
3477 scan++;
3478 break;
a0ed51b3
LW
3479 case DIGITUTF8:
3480 loceol = PL_regeol;
dfe13c55 3481 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3482 scan += UTF8SKIP(scan);
3483 hardcount++;
3484 }
3485 break;
3486 break;
a0d0e21e
LW
3487 case NDIGIT:
3488 while (scan < loceol && !isDIGIT(*scan))
3489 scan++;
3490 break;
a0ed51b3
LW
3491 case NDIGITUTF8:
3492 loceol = PL_regeol;
dfe13c55 3493 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3494 scan += UTF8SKIP(scan);
3495 hardcount++;
3496 }
3497 break;
a0d0e21e
LW
3498 default: /* Called on something of 0 width. */
3499 break; /* So match right here or not at all. */
3500 }
a687059c 3501
a0ed51b3
LW
3502 if (hardcount)
3503 c = hardcount;
3504 else
3505 c = scan - PL_reginput;
3280af22 3506 PL_reginput = scan;
a687059c 3507
c277df42
IZ
3508 DEBUG_r(
3509 {
3510 SV *prop = sv_newmortal();
3511
3512 regprop(prop, p);
3513 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3514 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3515 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3516 });
3517
a0d0e21e 3518 return(c);
a687059c
LW
3519}
3520
3521/*
c277df42
IZ
3522 - regrepeat_hard - repeatedly match something, report total lenth and length
3523 *
3524 * The repeater is supposed to have constant length.
3525 */
3526
76e3520e 3527STATIC I32
cea2e8a9 3528S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3529{
5c0ca799 3530 dTHR;
c277df42
IZ
3531 register char *scan;
3532 register char *start;
3280af22 3533 register char *loceol = PL_regeol;
a0ed51b3 3534 I32 l = 0;
708e3b05 3535 I32 count = 0, res = 1;
a0ed51b3
LW
3536
3537 if (!max)
3538 return 0;
c277df42 3539
3280af22 3540 start = PL_reginput;
a0ed51b3 3541 if (UTF) {
708e3b05 3542 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3543 if (!count++) {
3544 l = 0;
3545 while (start < PL_reginput) {
3546 l++;
3547 start += UTF8SKIP(start);
3548 }
3549 *lp = l;
3550 if (l == 0)
3551 return max;
3552 }
3553 if (count == max)
3554 return count;
3555 }
3556 }
3557 else {
708e3b05 3558 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3559 if (!count++) {
3560 *lp = l = PL_reginput - start;
3561 if (max != REG_INFTY && l*max < loceol - scan)
3562 loceol = scan + l*max;
3563 if (l == 0)
3564 return max;
c277df42
IZ
3565 }
3566 }
3567 }
708e3b05 3568 if (!res)
3280af22 3569 PL_reginput = scan;
c277df42 3570
a0ed51b3 3571 return count;
c277df42
IZ
3572}
3573
3574/*
cb8d8820 3575 - reginclass - determine if a character falls into a character class
bbce6d69
PP
3576 */
3577
76e3520e 3578STATIC bool
936ed897 3579S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3580{
5c0ca799 3581 dTHR;
b8c5462f 3582 char flags = ANYOF_FLAGS(p);
bbce6d69
PP
3583 bool match = FALSE;
3584
3585 c &= 0xFF;
b8c5462f 3586 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69
PP
3587 match = TRUE;
3588 else if (flags & ANYOF_FOLD) {
3589 I32 cf;
3590 if (flags & ANYOF_LOCALE) {
3280af22 3591 PL_reg_flags |= RF_tainted;
22c35a8c 3592 cf = PL_fold_locale[c];
bbce6d69
PP
3593 }
3594 else
22c35a8c 3595 cf = PL_fold[c];
b8c5462f 3596 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69
PP
3597 match = TRUE;
3598 }
3599
b8c5462f 3600 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3601 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3602 if (
3603 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3604 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3605 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3606 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3607 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3608 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3620 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3621 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3622 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3623 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3624 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3625 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3626 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3627 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3628 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3629 ) /* How's that for a conditional? */
bbce6d69
PP
3630 {
3631 match = TRUE;
3632 }
3633 }
3634
ae5c130c 3635 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69
PP
3636}
3637
a0ed51b3 3638STATIC bool
cea2e8a9 3639S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3640{
3641 dTHR;
a0ed51b3
LW
3642 char flags = ARG1(f);
3643 bool match = FALSE;
3644 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3645
3646 if (swash_fetch(sv, p))
3647 match = TRUE;
3648 else if (flags & ANYOF_FOLD) {
806e7201 3649 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3650 if (flags & ANYOF_LOCALE) {
3651 PL_reg_flags |= RF_tainted;
3652 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3653 }
3654 else
3655 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3656 if (swash_fetch(sv, tmpbuf))
3657 match = TRUE;
3658 }
3659
b8c5462f 3660 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3661
3662 return (flags & ANYOF_INVERT) ? !match : match;
3663}
161b471a 3664
dfe13c55 3665STATIC U8 *
cea2e8a9 3666S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3667{
3668 dTHR;
a0ed51b3
LW
3669 if (off >= 0) {
3670 while (off-- && s < (U8*)PL_regeol)
3671 s += UTF8SKIP(s);
3672 }
3673 else {
3674 while (off++) {
3675 if (s > (U8*)PL_bostr) {
3676 s--;
3677 if (*s & 0x80) {
3678 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3679 s--;
3680 } /* XXX could check well-formedness here */
3681 }
3682 }
3683 }
3684 return s;
3685}
161b471a 3686
dfe13c55 3687STATIC U8 *
cea2e8a9 3688S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3689{
c485e607 3690 dTHR;
a0ed51b3
LW
3691 if (off >= 0) {
3692 while (off-- && s < (U8*)PL_regeol)
3693 s += UTF8SKIP(s);
3694 if (off >= 0)
3695 return 0;
3696 }
3697 else {
3698 while (off++) {
3699 if (s > (U8*)PL_bostr) {
3700 s--;
3701 if (*s & 0x80) {
3702 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3703 s--;
3704 } /* XXX could check well-formedness here */
3705 }
3706 else
3707 break;
3708 }
3709 if (off <= 0)
3710 return 0;
3711 }
3712 return s;
3713}
51371543
GS
3714
3715#ifdef PERL_OBJECT
51371543
GS
3716#include "XSUB.h"
3717#endif
3718
3719static void
3720restore_pos(pTHXo_ void *arg)
3721{
3722 dTHR;
3723 if (PL_reg_eval_set) {
3724 if (PL_reg_oldsaved) {
3725 PL_reg_re->subbeg = PL_reg_oldsaved;
3726 PL_reg_re->sublen = PL_reg_oldsavedlen;
3727