This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix accidental pessimization in RE optimizer (from Ilya Zakharevich)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
349 /* SvCUR is not set on references: SvRV and SvPVX overlap */
350 && sv && !SvROK(sv)
351 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 352 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
adac82c7 355 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 356 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
357 if (SvTAIL(check)) {
358 slen = SvCUR(check); /* >= 1 */
cad2e5aa 359
2c2d71f5
JH
360 if ( strend - s > slen || strend - s < slen - 1
361 || (strend - s == slen && strend[-1] != '\n')) {
362 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
363 goto fail_finish;
cad2e5aa
JH
364 }
365 /* Now should match s[0..slen-2] */
366 slen--;
653099ff 367 if (slen && (*SvPVX(check) != *s
cad2e5aa 368 || (slen > 1
653099ff 369 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
370 report_neq:
371 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
372 goto fail_finish;
373 }
cad2e5aa 374 }
653099ff
GS
375 else if (*SvPVX(check) != *s
376 || ((slen = SvCUR(check)) > 1
377 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
378 goto report_neq;
379 goto success_at_start;
cad2e5aa 380 }
2c2d71f5 381 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 382 s = strpos;
2c2d71f5 383 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 384 end_shift = prog->minlen - start_shift -
653099ff 385 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 386 if (!ml_anch) {
653099ff
GS
387 I32 end = prog->check_offset_max + CHR_SVLEN(check)
388 - (SvTAIL(check) != 0);
2c2d71f5
JH
389 I32 eshift = strend - s - end;
390
391 if (end_shift < eshift)
392 end_shift = eshift;
393 }
cad2e5aa 394 }
2c2d71f5 395 else { /* Can match at random position */
cad2e5aa
JH
396 ml_anch = 0;
397 s = strpos;
2c2d71f5
JH
398 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
399 /* Should be nonnegative! */
400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
402 }
403
2c2d71f5 404#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 405 if (end_shift < 0)
6bbae5e6 406 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
407#endif
408
2c2d71f5
JH
409 restart:
410 /* Find a possible match in the region s..strend by looking for
411 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 412 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
413 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
414 I32 p = -1; /* Internal iterator of scream. */
415 I32 *pp = data ? data->scream_pos : &p;
416
2c2d71f5
JH
417 if (PL_screamfirst[BmRARE(check)] >= 0
418 || ( BmRARE(check) == '\n'
419 && (BmPREVIOUS(check) == SvCUR(check) - 1)
420 && SvTAIL(check) ))
421 s = screaminstr(sv, check,
422 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 423 else
2c2d71f5 424 goto fail_finish;
cad2e5aa
JH
425 if (data)
426 *data->scream_olds = s;
427 }
428 else
429 s = fbm_instr((unsigned char*)s + start_shift,
430 (unsigned char*)strend - end_shift,
2c2d71f5 431 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
432
433 /* Update the count-of-usability, remove useless subpatterns,
434 unshift s. */
2c2d71f5
JH
435
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
437 (s ? "Found" : "Did not find"),
438 ((check == prog->anchored_substr) ? "anchored" : "floating"),
439 PL_colors[0],
7b0972df
JH
440 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
441 SvPVX(check),
2c2d71f5
JH
442 PL_colors[1], (SvTAIL(check) ? "$" : ""),
443 (s ? " at offset " : "...\n") ) );
444
445 if (!s)
446 goto fail_finish;
447
6eb5f6b9
JH
448 check_at = s;
449
2c2d71f5 450 /* Finish the diagnostic message */
30944b6d 451 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
452
453 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
454 Start with the other substr.
455 XXXX no SCREAM optimization yet - and a very coarse implementation
456 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
457 *always* match. Probably should be marked during compile...
458 Probably it is right to do no SCREAM here...
459 */
460
461 if (prog->float_substr && prog->anchored_substr) {
30944b6d 462 /* Take into account the "other" substring. */
2c2d71f5
JH
463 /* XXXX May be hopelessly wrong for UTF... */
464 if (!other_last)
6eb5f6b9 465 other_last = strpos;
2c2d71f5 466 if (check == prog->float_substr) {
30944b6d
IZ
467 do_other_anchored:
468 {
2c2d71f5
JH
469 char *last = s - start_shift, *last1, *last2;
470 char *s1 = s;
471
472 tmp = PL_bostr;
473 t = s - prog->check_offset_max;
474 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
475 && (!(prog->reganch & ROPT_UTF8)
476 || (PL_bostr = strpos, /* Used in regcopmaybe() */
477 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
478 && t > strpos)))
30944b6d 479 /* EMPTY */;
2c2d71f5
JH
480 else
481 t = strpos;
482 t += prog->anchored_offset;
6eb5f6b9
JH
483 if (t < other_last) /* These positions already checked */
484 t = other_last;
2c2d71f5
JH
485 PL_bostr = tmp;
486 last2 = last1 = strend - prog->minlen;
487 if (last < last1)
488 last1 = last;
489 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
490 /* On end-of-str: see comment below. */
491 s = fbm_instr((unsigned char*)t,
492 (unsigned char*)last1 + prog->anchored_offset
493 + SvCUR(prog->anchored_substr)
494 - (SvTAIL(prog->anchored_substr)!=0),
495 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
496 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
497 (s ? "Found" : "Contradicts"),
498 PL_colors[0],
7b0972df
JH
499 (int)(SvCUR(prog->anchored_substr)
500 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
501 SvPVX(prog->anchored_substr),
502 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
503 if (!s) {
504 if (last1 >= last2) {
505 DEBUG_r(PerlIO_printf(Perl_debug_log,
506 ", giving up...\n"));
507 goto fail_finish;
508 }
509 DEBUG_r(PerlIO_printf(Perl_debug_log,
510 ", trying floating at offset %ld...\n",
30944b6d 511 (long)(s1 + 1 - i_strpos)));
2c2d71f5 512 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 513 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
514 s = HOPc(last, 1);
515 goto restart;
516 }
517 else {
518 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 519 (long)(s - i_strpos)));
2c2d71f5 520 t = s - prog->anchored_offset;
6eb5f6b9 521 other_last = s + 1;
30944b6d 522 s = s1;
2c2d71f5
JH
523 if (t == strpos)
524 goto try_at_start;
2c2d71f5
JH
525 goto try_at_offset;
526 }
30944b6d 527 }
2c2d71f5
JH
528 }
529 else { /* Take into account the floating substring. */
530 char *last, *last1;
531 char *s1 = s;
532
533 t = s - start_shift;
534 last1 = last = strend - prog->minlen + prog->float_min_offset;
535 if (last - t > prog->float_max_offset)
536 last = t + prog->float_max_offset;
537 s = t + prog->float_min_offset;
6eb5f6b9
JH
538 if (s < other_last)
539 s = other_last;
2c2d71f5
JH
540 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
541 /* fbm_instr() takes into account exact value of end-of-str
542 if the check is SvTAIL(ed). Since false positives are OK,
543 and end-of-str is not later than strend we are OK. */
544 s = fbm_instr((unsigned char*)s,
545 (unsigned char*)last + SvCUR(prog->float_substr)
546 - (SvTAIL(prog->float_substr)!=0),
547 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
548 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
549 (s ? "Found" : "Contradicts"),
550 PL_colors[0],
7b0972df
JH
551 (int)(SvCUR(prog->float_substr)
552 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
553 SvPVX(prog->float_substr),
554 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
555 if (!s) {
556 if (last1 == last) {
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 ", giving up...\n"));
559 goto fail_finish;
560 }
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 ", trying anchored starting at offset %ld...\n",
30944b6d 563 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 564 other_last = last + 1;
2c2d71f5
JH
565 PL_regeol = strend; /* Used in HOP() */
566 s = HOPc(t, 1);
567 goto restart;
568 }
569 else {
570 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 571 (long)(s - i_strpos)));
6eb5f6b9 572 other_last = s + 1;
30944b6d 573 s = s1;
2c2d71f5
JH
574 if (t == strpos)
575 goto try_at_start;
2c2d71f5
JH
576 goto try_at_offset;
577 }
578 }
cad2e5aa 579 }
2c2d71f5
JH
580
581 t = s - prog->check_offset_max;
582 tmp = PL_bostr;
583 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
584 && (!(prog->reganch & ROPT_UTF8)
585 || (PL_bostr = strpos, /* Used in regcopmaybe() */
586 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
587 && t > strpos)))) {
588 PL_bostr = tmp;
589 /* Fixed substring is found far enough so that the match
590 cannot start at strpos. */
591 try_at_offset:
cad2e5aa 592 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
593 /* Eventually fbm_*() should handle this, but often
594 anchored_offset is not 0, so this check will not be wasted. */
595 /* XXXX In the code below we prefer to look for "^" even in
596 presence of anchored substrings. And we search even
597 beyond the found float position. These pessimizations
598 are historical artefacts only. */
599 find_anchor:
2c2d71f5 600 while (t < strend - prog->minlen) {
cad2e5aa 601 if (*t == '\n') {
4ee3650e 602 if (t < check_at - prog->check_offset_min) {
30944b6d 603 if (prog->anchored_substr) {
4ee3650e
GS
604 /* Since we moved from the found position,
605 we definitely contradict the found anchored
30944b6d
IZ
606 substr. Due to the above check we do not
607 contradict "check" substr.
608 Thus we can arrive here only if check substr
609 is float. Redo checking for "other"=="fixed".
610 */
611 strpos = t + 1;
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
613 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
614 goto do_other_anchored;
615 }
4ee3650e
GS
616 /* We don't contradict the found floating substring. */
617 /* XXXX Why not check for STCLASS? */
cad2e5aa 618 s = t + 1;
2c2d71f5 619 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 620 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
621 goto set_useful;
622 }
4ee3650e
GS
623 /* Position contradicts check-string */
624 /* XXXX probably better to look for check-string
625 than for "\n", so one should lower the limit for t? */
626 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d
IZ
627 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
628 strpos = s = t + 1;
cad2e5aa
JH
629 goto restart;
630 }
631 t++;
632 }
2c2d71f5
JH
633 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]));
635 goto fail_finish;
cad2e5aa
JH
636 }
637 s = t;
638 set_useful:
2c2d71f5 639 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
640 }
641 else {
2c2d71f5
JH
642 PL_bostr = tmp;
643 /* The found string does not prohibit matching at beg-of-str
644 - no optimization of calling REx engine can be performed,
645 unless it was an MBOL and we are not after MBOL. */
646 try_at_start:
647 /* Even in this situation we may use MBOL flag if strpos is offset
648 wrt the start of the string. */
05b4157f 649 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
650 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
651 /* May be due to an implicit anchor of m{.*foo} */
652 && !(prog->reganch & ROPT_IMPLICIT))
653 {
cad2e5aa
JH
654 t = strpos;
655 goto find_anchor;
656 }
30944b6d
IZ
657 DEBUG_r( if (ml_anch)
658 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
659 PL_colors[0],PL_colors[1]);
660 );
2c2d71f5 661 success_at_start:
30944b6d 662 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 663 && prog->check_substr /* Could be deleted already */
cad2e5aa 664 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
665 && prog->check_substr == prog->float_substr)
666 {
cad2e5aa
JH
667 /* If flags & SOMETHING - do not do it many times on the same match */
668 SvREFCNT_dec(prog->check_substr);
669 prog->check_substr = Nullsv; /* disable */
670 prog->float_substr = Nullsv; /* clear */
671 s = strpos;
3cf5c195
IZ
672 /* XXXX This is a remnant of the old implementation. It
673 looks wasteful, since now INTUIT can use many
6eb5f6b9 674 other heuristics. */
cad2e5aa
JH
675 prog->reganch &= ~RE_USE_INTUIT;
676 }
677 else
678 s = strpos;
679 }
680
6eb5f6b9
JH
681 /* Last resort... */
682 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
683 if (prog->regstclass) {
684 /* minlen == 0 is possible if regstclass is \b or \B,
685 and the fixed substr is ''$.
686 Since minlen is already taken into account, s+1 is before strend;
687 accidentally, minlen >= 1 guaranties no false positives at s + 1
688 even for \b or \B. But (minlen? 1 : 0) below assumes that
689 regstclass does not come from lookahead... */
690 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
691 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
692 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
693 ? STR_LEN(prog->regstclass)
694 : 1);
6eb5f6b9 695 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
696 ? s + (prog->minlen? cl_l : 0)
697 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 698 : strend) ;
8fdd422b 699 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
700
701 t = s;
76384e4a
GS
702 if (prog->reganch & ROPT_UTF8) {
703 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
704 PL_bostr = startpos;
705 }
6eb5f6b9
JH
706 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
707 if (!s) {
708#ifdef DEBUGGING
709 char *what;
710#endif
711 if (endpos == strend) {
712 DEBUG_r( PerlIO_printf(Perl_debug_log,
713 "Could not match STCLASS...\n") );
714 goto fail;
715 }
66e933ab
GS
716 DEBUG_r( PerlIO_printf(Perl_debug_log,
717 "This position contradicts STCLASS...\n") );
653099ff
GS
718 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
719 goto fail;
6eb5f6b9
JH
720 /* Contradict one of substrings */
721 if (prog->anchored_substr) {
6eb5f6b9
JH
722 if (prog->anchored_substr == check) {
723 DEBUG_r( what = "anchored" );
724 hop_and_restart:
725 PL_regeol = strend; /* Used in HOP() */
726 s = HOPc(t, 1);
66e933ab
GS
727 if (s + start_shift + end_shift > strend) {
728 /* XXXX Should be taken into account earlier? */
729 DEBUG_r( PerlIO_printf(Perl_debug_log,
730 "Could not match STCLASS...\n") );
731 goto fail;
732 }
6eb5f6b9 733 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 734 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
735 what, (long)(s + start_shift - i_strpos)) );
736 goto restart;
737 }
66e933ab 738 /* Have both, check_string is floating */
6eb5f6b9
JH
739 if (t + start_shift >= check_at) /* Contradicts floating=check */
740 goto retry_floating_check;
741 /* Recheck anchored substring, but not floating... */
742 s = check_at;
743 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 744 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
745 (long)(other_last - i_strpos)) );
746 goto do_other_anchored;
747 }
60e71179
GS
748 /* Another way we could have checked stclass at the
749 current position only: */
750 if (ml_anch) {
751 s = t = t + 1;
752 DEBUG_r( PerlIO_printf(Perl_debug_log,
753 "Trying /^/m starting at offset %ld...\n",
754 (long)(t - i_strpos)) );
755 goto try_at_offset;
66e933ab 756 }
60e71179
GS
757 if (!prog->float_substr) /* Could have been deleted */
758 goto fail;
6eb5f6b9
JH
759 /* Check is floating subtring. */
760 retry_floating_check:
761 t = check_at - start_shift;
762 DEBUG_r( what = "floating" );
763 goto hop_and_restart;
764 }
765 DEBUG_r( if (t != s)
766 PerlIO_printf(Perl_debug_log,
767 "By STCLASS: moving %ld --> %ld\n",
768 (long)(t - i_strpos), (long)(s - i_strpos));
769 else
770 PerlIO_printf(Perl_debug_log,
771 "Does not contradict STCLASS...\n") );
772 }
2c2d71f5 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 774 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 775 return s;
2c2d71f5
JH
776
777 fail_finish: /* Substring not found */
66e933ab
GS
778 if (prog->check_substr) /* could be removed already */
779 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 780 fail:
2c2d71f5 781 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
782 PL_colors[4],PL_colors[5]));
783 return Nullch;
784}
9661b544 785
6eb5f6b9 786/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
787STATIC char *
788S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 789{
6eb5f6b9
JH
790 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
791 char *m;
d8093b23
G
792 STRLEN ln;
793 unsigned int c1;
794 unsigned int c2;
6eb5f6b9
JH
795 char *e;
796 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 797
6eb5f6b9
JH
798 /* We know what class it must start with. */
799 switch (OP(c)) {
800 case ANYOFUTF8:
801 while (s < strend) {
802 if (REGINCLASSUTF8(c, (U8*)s)) {
803 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 804 goto got_it;
3cf5c195 805 else
6eb5f6b9 806 tmp = doevery;
a0ed51b3 807 }
6eb5f6b9
JH
808 else
809 tmp = 1;
a0ed51b3
LW
810 s += UTF8SKIP(s);
811 }
6eb5f6b9
JH
812 break;
813 case ANYOF:
a0ed51b3 814 while (s < strend) {
d8093b23 815 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
816 if (tmp && (norun || regtry(prog, s)))
817 goto got_it;
818 else
819 tmp = doevery;
a0ed51b3 820 }
6eb5f6b9
JH
821 else
822 tmp = 1;
a0ed51b3 823 s++;
a0d0e21e 824 }
6eb5f6b9
JH
825 break;
826 case EXACTF:
827 m = STRING(c);
828 ln = STR_LEN(c);
d8093b23 829 c1 = *(U8*)m;
6eb5f6b9
JH
830 c2 = PL_fold[c1];
831 goto do_exactf;
832 case EXACTFL:
833 m = STRING(c);
834 ln = STR_LEN(c);
d8093b23 835 c1 = *(U8*)m;
6eb5f6b9
JH
836 c2 = PL_fold_locale[c1];
837 do_exactf:
838 e = strend - ln;
b3c9acc1 839
6eb5f6b9
JH
840 if (norun && e < s)
841 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
842 /* Here it is NOT UTF! */
843 if (c1 == c2) {
844 while (s <= e) {
d8093b23 845 if ( *(U8*)s == c1
66e933ab
GS
846 && (ln == 1 || !(OP(c) == EXACTF
847 ? ibcmp(s, m, ln)
848 : ibcmp_locale(s, m, ln)))
6eb5f6b9 849 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
850 goto got_it;
851 s++;
852 }
853 } else {
854 while (s <= e) {
d8093b23 855 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
856 && (ln == 1 || !(OP(c) == EXACTF
857 ? ibcmp(s, m, ln)
858 : ibcmp_locale(s, m, ln)))
6eb5f6b9 859 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
860 goto got_it;
861 s++;
862 }
863 }
864 break;
bbce6d69 865 case BOUNDL:
3280af22 866 PL_reg_flags |= RF_tainted;
bbce6d69 867 /* FALL THROUGH */
a0d0e21e 868 case BOUND:
6eb5f6b9 869 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 870 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 871 while (s < strend) {
95bac841 872 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 873 tmp = !tmp;
6eb5f6b9 874 if ((norun || regtry(prog, s)))
a0d0e21e 875 goto got_it;
a687059c 876 }
a0d0e21e
LW
877 s++;
878 }
6eb5f6b9 879 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
880 goto got_it;
881 break;
a0ed51b3
LW
882 case BOUNDLUTF8:
883 PL_reg_flags |= RF_tainted;
884 /* FALL THROUGH */
885 case BOUNDUTF8:
6eb5f6b9 886 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 887 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 888 while (s < strend) {
76384e4a 889 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
890 swash_fetch(PL_utf8_alnum, (U8*)s) :
891 isALNUM_LC_utf8((U8*)s)))
892 {
a0ed51b3 893 tmp = !tmp;
6eb5f6b9 894 if ((norun || regtry(prog, s)))
a0ed51b3
LW
895 goto got_it;
896 }
897 s += UTF8SKIP(s);
898 }
6eb5f6b9 899 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
900 goto got_it;
901 break;
bbce6d69 902 case NBOUNDL:
3280af22 903 PL_reg_flags |= RF_tainted;
bbce6d69 904 /* FALL THROUGH */
a0d0e21e 905 case NBOUND:
6eb5f6b9 906 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 907 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 908 while (s < strend) {
95bac841 909 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 910 tmp = !tmp;
6eb5f6b9 911 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
912 goto got_it;
913 s++;
914 }
6eb5f6b9 915 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
916 goto got_it;
917 break;
a0ed51b3
LW
918 case NBOUNDLUTF8:
919 PL_reg_flags |= RF_tainted;
920 /* FALL THROUGH */
921 case NBOUNDUTF8:
6eb5f6b9 922 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 923 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 924 while (s < strend) {
76384e4a 925 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
926 swash_fetch(PL_utf8_alnum, (U8*)s) :
927 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 928 tmp = !tmp;
6eb5f6b9 929 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
930 goto got_it;
931 s += UTF8SKIP(s);
932 }
6eb5f6b9 933 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
934 goto got_it;
935 break;
a0d0e21e
LW
936 case ALNUM:
937 while (s < strend) {
bbce6d69 938 if (isALNUM(*s)) {
6eb5f6b9 939 if (tmp && (norun || regtry(prog, s)))
bbce6d69 940 goto got_it;
941 else
942 tmp = doevery;
943 }
944 else
945 tmp = 1;
946 s++;
947 }
948 break;
a0ed51b3
LW
949 case ALNUMUTF8:
950 while (s < strend) {
dfe13c55 951 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 952 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
953 goto got_it;
954 else
955 tmp = doevery;
956 }
957 else
958 tmp = 1;
959 s += UTF8SKIP(s);
960 }
961 break;
bbce6d69 962 case ALNUML:
3280af22 963 PL_reg_flags |= RF_tainted;
bbce6d69 964 while (s < strend) {
965 if (isALNUM_LC(*s)) {
6eb5f6b9 966 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 967 goto got_it;
a687059c 968 else
a0d0e21e
LW
969 tmp = doevery;
970 }
971 else
972 tmp = 1;
973 s++;
974 }
975 break;
a0ed51b3
LW
976 case ALNUMLUTF8:
977 PL_reg_flags |= RF_tainted;
978 while (s < strend) {
dfe13c55 979 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 980 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
981 goto got_it;
982 else
983 tmp = doevery;
984 }
985 else
986 tmp = 1;
987 s += UTF8SKIP(s);
988 }
989 break;
a0d0e21e
LW
990 case NALNUM:
991 while (s < strend) {
bbce6d69 992 if (!isALNUM(*s)) {
6eb5f6b9 993 if (tmp && (norun || regtry(prog, s)))
bbce6d69 994 goto got_it;
995 else
996 tmp = doevery;
997 }
998 else
999 tmp = 1;
1000 s++;
1001 }
1002 break;
a0ed51b3
LW
1003 case NALNUMUTF8:
1004 while (s < strend) {
dfe13c55 1005 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1006 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1007 goto got_it;
1008 else
1009 tmp = doevery;
1010 }
1011 else
1012 tmp = 1;
1013 s += UTF8SKIP(s);
1014 }
1015 break;
bbce6d69 1016 case NALNUML:
3280af22 1017 PL_reg_flags |= RF_tainted;
bbce6d69 1018 while (s < strend) {
1019 if (!isALNUM_LC(*s)) {
6eb5f6b9 1020 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1021 goto got_it;
a687059c 1022 else
a0d0e21e 1023 tmp = doevery;
a687059c 1024 }
a0d0e21e
LW
1025 else
1026 tmp = 1;
1027 s++;
1028 }
1029 break;
a0ed51b3
LW
1030 case NALNUMLUTF8:
1031 PL_reg_flags |= RF_tainted;
1032 while (s < strend) {
dfe13c55 1033 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1034 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1035 goto got_it;
1036 else
1037 tmp = doevery;
1038 }
1039 else
1040 tmp = 1;
1041 s += UTF8SKIP(s);
1042 }
1043 break;
a0d0e21e
LW
1044 case SPACE:
1045 while (s < strend) {
1046 if (isSPACE(*s)) {
6eb5f6b9 1047 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1048 goto got_it;
1049 else
1050 tmp = doevery;
2304df62 1051 }
a0d0e21e
LW
1052 else
1053 tmp = 1;
1054 s++;
1055 }
1056 break;
a0ed51b3
LW
1057 case SPACEUTF8:
1058 while (s < strend) {
dfe13c55 1059 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1060 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1061 goto got_it;
1062 else
1063 tmp = doevery;
1064 }
1065 else
1066 tmp = 1;
1067 s += UTF8SKIP(s);
1068 }
1069 break;
bbce6d69 1070 case SPACEL:
3280af22 1071 PL_reg_flags |= RF_tainted;
bbce6d69 1072 while (s < strend) {
1073 if (isSPACE_LC(*s)) {
6eb5f6b9 1074 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1075 goto got_it;
1076 else
1077 tmp = doevery;
1078 }
1079 else
1080 tmp = 1;
1081 s++;
1082 }
1083 break;
a0ed51b3
LW
1084 case SPACELUTF8:
1085 PL_reg_flags |= RF_tainted;
1086 while (s < strend) {
dfe13c55 1087 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1088 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1089 goto got_it;
1090 else
1091 tmp = doevery;
1092 }
1093 else
1094 tmp = 1;
1095 s += UTF8SKIP(s);
1096 }
1097 break;
a0d0e21e
LW
1098 case NSPACE:
1099 while (s < strend) {
1100 if (!isSPACE(*s)) {
6eb5f6b9 1101 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1102 goto got_it;
1103 else
1104 tmp = doevery;
a687059c 1105 }
a0d0e21e
LW
1106 else
1107 tmp = 1;
1108 s++;
1109 }
1110 break;
a0ed51b3
LW
1111 case NSPACEUTF8:
1112 while (s < strend) {
dfe13c55 1113 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1114 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1115 goto got_it;
1116 else
1117 tmp = doevery;
1118 }
1119 else
1120 tmp = 1;
1121 s += UTF8SKIP(s);
1122 }
1123 break;
bbce6d69 1124 case NSPACEL:
3280af22 1125 PL_reg_flags |= RF_tainted;
bbce6d69 1126 while (s < strend) {
1127 if (!isSPACE_LC(*s)) {
6eb5f6b9 1128 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1129 goto got_it;
1130 else
1131 tmp = doevery;
1132 }
1133 else
1134 tmp = 1;
1135 s++;
1136 }
1137 break;
a0ed51b3
LW
1138 case NSPACELUTF8:
1139 PL_reg_flags |= RF_tainted;
1140 while (s < strend) {
dfe13c55 1141 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1142 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
1147 else
1148 tmp = 1;
1149 s += UTF8SKIP(s);
1150 }
1151 break;
a0d0e21e
LW
1152 case DIGIT:
1153 while (s < strend) {
1154 if (isDIGIT(*s)) {
6eb5f6b9 1155 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1156 goto got_it;
1157 else
1158 tmp = doevery;
2b69d0c2 1159 }
a0d0e21e
LW
1160 else
1161 tmp = 1;
1162 s++;
1163 }
1164 break;
a0ed51b3
LW
1165 case DIGITUTF8:
1166 while (s < strend) {
dfe13c55 1167 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1168 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1169 goto got_it;
1170 else
1171 tmp = doevery;
1172 }
1173 else
1174 tmp = 1;
1175 s += UTF8SKIP(s);
1176 }
1177 break;
b8c5462f
JH
1178 case DIGITL:
1179 PL_reg_flags |= RF_tainted;
1180 while (s < strend) {
1181 if (isDIGIT_LC(*s)) {
6eb5f6b9 1182 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1183 goto got_it;
1184 else
1185 tmp = doevery;
1186 }
1187 else
1188 tmp = 1;
1189 s++;
1190 }
1191 break;
1192 case DIGITLUTF8:
1193 PL_reg_flags |= RF_tainted;
1194 while (s < strend) {
1195 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1196 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1197 goto got_it;
1198 else
1199 tmp = doevery;
1200 }
1201 else
1202 tmp = 1;
1203 s += UTF8SKIP(s);
1204 }
1205 break;
a0d0e21e
LW
1206 case NDIGIT:
1207 while (s < strend) {
1208 if (!isDIGIT(*s)) {
6eb5f6b9 1209 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1210 goto got_it;
1211 else
1212 tmp = doevery;
a687059c 1213 }
a0d0e21e
LW
1214 else
1215 tmp = 1;
1216 s++;
1217 }
1218 break;
a0ed51b3
LW
1219 case NDIGITUTF8:
1220 while (s < strend) {
dfe13c55 1221 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1222 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1223 goto got_it;
1224 else
1225 tmp = doevery;
1226 }
1227 else
1228 tmp = 1;
1229 s += UTF8SKIP(s);
1230 }
1231 break;
b8c5462f
JH
1232 case NDIGITL:
1233 PL_reg_flags |= RF_tainted;
1234 while (s < strend) {
1235 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1236 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1237 goto got_it;
1238 else
1239 tmp = doevery;
1240 }
1241 else
1242 tmp = 1;
1243 s++;
a0ed51b3 1244 }
b8c5462f
JH
1245 break;
1246 case NDIGITLUTF8:
1247 PL_reg_flags |= RF_tainted;
1248 while (s < strend) {
1249 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1250 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1251 goto got_it;
cf93c79d 1252 else
b8c5462f
JH
1253 tmp = doevery;
1254 }
1255 else
1256 tmp = 1;
1257 s += UTF8SKIP(s);
1258 }
1259 break;
b3c9acc1 1260 default:
3c3eec57
GS
1261 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1262 break;
d6a28714 1263 }
6eb5f6b9
JH
1264 return 0;
1265 got_it:
1266 return s;
1267}
1268
1269/*
1270 - regexec_flags - match a regexp against a string
1271 */
1272I32
1273Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1274 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1275/* strend: pointer to null at end of string */
1276/* strbeg: real beginning of string */
1277/* minend: end of match must be >=minend after stringarg. */
1278/* data: May be used for some additional optimizations. */
1279/* nosave: For optimizations. */
1280{
1281 dTHR;
1282 register char *s;
1283 register regnode *c;
1284 register char *startpos = stringarg;
6eb5f6b9
JH
1285 I32 minlen; /* must match at least this many chars */
1286 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1287 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1288 constant substr. */ /* CC */
1289 I32 end_shift = 0; /* Same for the end. */ /* CC */
1290 I32 scream_pos = -1; /* Internal iterator of scream. */
1291 char *scream_olds;
1292 SV* oreplsv = GvSV(PL_replgv);
1293
1294 PL_regcc = 0;
1295
1296 cache_re(prog);
1297#ifdef DEBUGGING
1298 PL_regnarrate = PL_debug & 512;
1299#endif
1300
1301 /* Be paranoid... */
1302 if (prog == NULL || startpos == NULL) {
1303 Perl_croak(aTHX_ "NULL regexp parameter");
1304 return 0;
1305 }
1306
1307 minlen = prog->minlen;
1308 if (strend - startpos < minlen) goto phooey;
1309
1310 if (startpos == strbeg) /* is ^ valid at stringarg? */
1311 PL_regprev = '\n';
1312 else {
1313 PL_regprev = (U32)stringarg[-1];
1314 if (!PL_multiline && PL_regprev == '\n')
1315 PL_regprev = '\0'; /* force ^ to NOT match */
1316 }
1317
1318 /* Check validity of program. */
1319 if (UCHARAT(prog->program) != REG_MAGIC) {
1320 Perl_croak(aTHX_ "corrupted regexp program");
1321 }
1322
1323 PL_reg_flags = 0;
1324 PL_reg_eval_set = 0;
1325 PL_reg_maxiter = 0;
1326
1327 if (prog->reganch & ROPT_UTF8)
1328 PL_reg_flags |= RF_utf8;
1329
1330 /* Mark beginning of line for ^ and lookbehind. */
1331 PL_regbol = startpos;
1332 PL_bostr = strbeg;
1333 PL_reg_sv = sv;
1334
1335 /* Mark end of line for $ (and such) */
1336 PL_regeol = strend;
1337
1338 /* see how far we have to get to not match where we matched before */
1339 PL_regtill = startpos+minend;
1340
1341 /* We start without call_cc context. */
1342 PL_reg_call_cc = 0;
1343
1344 /* If there is a "must appear" string, look for it. */
1345 s = startpos;
1346
1347 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1348 MAGIC *mg;
1349
1350 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1351 PL_reg_ganch = startpos;
1352 else if (sv && SvTYPE(sv) >= SVt_PVMG
1353 && SvMAGIC(sv)
1354 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1355 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1356 if (prog->reganch & ROPT_ANCH_GPOS) {
1357 if (s > PL_reg_ganch)
1358 goto phooey;
1359 s = PL_reg_ganch;
1360 }
1361 }
1362 else /* pos() not defined */
1363 PL_reg_ganch = strbeg;
1364 }
1365
1366 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1367 re_scream_pos_data d;
1368
1369 d.scream_olds = &scream_olds;
1370 d.scream_pos = &scream_pos;
1371 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1372 if (!s)
1373 goto phooey; /* not present */
1374 }
1375
1376 DEBUG_r( if (!PL_colorset) reginitcolors() );
1377 DEBUG_r(PerlIO_printf(Perl_debug_log,
1378 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1379 PL_colors[4],PL_colors[5],PL_colors[0],
1380 prog->precomp,
1381 PL_colors[1],
1382 (strlen(prog->precomp) > 60 ? "..." : ""),
1383 PL_colors[0],
1384 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1385 startpos, PL_colors[1],
1386 (strend - startpos > 60 ? "..." : ""))
1387 );
1388
1389 /* Simplest case: anchored match need be tried only once. */
1390 /* [unless only anchor is BOL and multiline is set] */
1391 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1392 if (s == startpos && regtry(prog, startpos))
1393 goto got_it;
1394 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1395 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1396 {
1397 char *end;
1398
1399 if (minlen)
1400 dontbother = minlen - 1;
1401 end = HOPc(strend, -dontbother) - 1;
1402 /* for multiline we only have to try after newlines */
1403 if (prog->check_substr) {
1404 if (s == startpos)
1405 goto after_try;
1406 while (1) {
1407 if (regtry(prog, s))
1408 goto got_it;
1409 after_try:
1410 if (s >= end)
1411 goto phooey;
1412 if (prog->reganch & RE_USE_INTUIT) {
1413 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1414 if (!s)
1415 goto phooey;
1416 }
1417 else
1418 s++;
1419 }
1420 } else {
1421 if (s > startpos)
1422 s--;
1423 while (s < end) {
1424 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1425 if (regtry(prog, s))
1426 goto got_it;
1427 }
1428 }
1429 }
1430 }
1431 goto phooey;
1432 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1433 if (regtry(prog, PL_reg_ganch))
1434 goto got_it;
1435 goto phooey;
1436 }
1437
1438 /* Messy cases: unanchored match. */
1439 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1440 /* we have /x+whatever/ */
1441 /* it must be a one character string (XXXX Except UTF?) */
1442 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1443#ifdef DEBUGGING
1444 int did_match = 0;
1445#endif
1446
6eb5f6b9
JH
1447 if (UTF) {
1448 while (s < strend) {
1449 if (*s == ch) {
bf93d4cc 1450 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1451 if (regtry(prog, s)) goto got_it;
1452 s += UTF8SKIP(s);
1453 while (s < strend && *s == ch)
1454 s += UTF8SKIP(s);
1455 }
1456 s += UTF8SKIP(s);
1457 }
1458 }
1459 else {
1460 while (s < strend) {
1461 if (*s == ch) {
bf93d4cc 1462 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1463 if (regtry(prog, s)) goto got_it;
1464 s++;
1465 while (s < strend && *s == ch)
1466 s++;
1467 }
1468 s++;
1469 }
1470 }
bf93d4cc
GS
1471 DEBUG_r(did_match ||
1472 PerlIO_printf(Perl_debug_log,
1473 "Did not find anchored character...\n"));
6eb5f6b9
JH
1474 }
1475 /*SUPPRESS 560*/
1476 else if (prog->anchored_substr != Nullsv
1477 || (prog->float_substr != Nullsv
1478 && prog->float_max_offset < strend - s)) {
1479 SV *must = prog->anchored_substr
1480 ? prog->anchored_substr : prog->float_substr;
1481 I32 back_max =
1482 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1483 I32 back_min =
1484 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1485 char *last = HOPc(strend, /* Cannot start after this */
1486 -(I32)(CHR_SVLEN(must)
1487 - (SvTAIL(must) != 0) + back_min));
1488 char *last1; /* Last position checked before */
bf93d4cc
GS
1489#ifdef DEBUGGING
1490 int did_match = 0;
1491#endif
6eb5f6b9
JH
1492
1493 if (s > PL_bostr)
1494 last1 = HOPc(s, -1);
1495 else
1496 last1 = s - 1; /* bogus */
1497
1498 /* XXXX check_substr already used to find `s', can optimize if
1499 check_substr==must. */
1500 scream_pos = -1;
1501 dontbother = end_shift;
1502 strend = HOPc(strend, -dontbother);
1503 while ( (s <= last) &&
1504 ((flags & REXEC_SCREAM)
1505 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1506 end_shift, &scream_pos, 0))
1507 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1508 (unsigned char*)strend, must,
1509 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1510 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1511 if (HOPc(s, -back_max) > last1) {
1512 last1 = HOPc(s, -back_min);
1513 s = HOPc(s, -back_max);
1514 }
1515 else {
1516 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1517
1518 last1 = HOPc(s, -back_min);
1519 s = t;
1520 }
1521 if (UTF) {
1522 while (s <= last1) {
1523 if (regtry(prog, s))
1524 goto got_it;
1525 s += UTF8SKIP(s);
1526 }
1527 }
1528 else {
1529 while (s <= last1) {
1530 if (regtry(prog, s))
1531 goto got_it;
1532 s++;
1533 }
1534 }
1535 }
bf93d4cc
GS
1536 DEBUG_r(did_match ||
1537 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1538 ((must == prog->anchored_substr)
1539 ? "anchored" : "floating"),
1540 PL_colors[0],
1541 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1542 SvPVX(must),
1543 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1544 goto phooey;
1545 }
155aba94 1546 else if ((c = prog->regstclass)) {
66e933ab
GS
1547 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1548 /* don't bother with what can't match */
6eb5f6b9
JH
1549 strend = HOPc(strend, -(minlen - 1));
1550 if (find_byclass(prog, c, s, strend, startpos, 0))
1551 goto got_it;
bf93d4cc 1552 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1553 }
1554 else {
1555 dontbother = 0;
1556 if (prog->float_substr != Nullsv) { /* Trim the end. */
1557 char *last;
d6a28714
JH
1558
1559 if (flags & REXEC_SCREAM) {
1560 last = screaminstr(sv, prog->float_substr, s - strbeg,
1561 end_shift, &scream_pos, 1); /* last one */
1562 if (!last)
1563 last = scream_olds; /* Only one occurence. */
b8c5462f 1564 }
d6a28714
JH
1565 else {
1566 STRLEN len;
1567 char *little = SvPV(prog->float_substr, len);
1568
1569 if (SvTAIL(prog->float_substr)) {
1570 if (memEQ(strend - len + 1, little, len - 1))
1571 last = strend - len + 1;
1572 else if (!PL_multiline)
1573 last = memEQ(strend - len, little, len)
1574 ? strend - len : Nullch;
b8c5462f 1575 else
d6a28714
JH
1576 goto find_last;
1577 } else {
1578 find_last:
1579 if (len)
1580 last = rninstr(s, strend, little, little + len);
b8c5462f 1581 else
d6a28714 1582 last = strend; /* matching `$' */
b8c5462f 1583 }
b8c5462f 1584 }
bf93d4cc
GS
1585 if (last == NULL) {
1586 DEBUG_r(PerlIO_printf(Perl_debug_log,
1587 "%sCan't trim the tail, match fails (should not happen)%s\n",
1588 PL_colors[4],PL_colors[5]));
1589 goto phooey; /* Should not happen! */
1590 }
d6a28714
JH
1591 dontbother = strend - last + prog->float_min_offset;
1592 }
1593 if (minlen && (dontbother < minlen))
1594 dontbother = minlen - 1;
1595 strend -= dontbother; /* this one's always in bytes! */
1596 /* We don't know much -- general case. */
1597 if (UTF) {
1598 for (;;) {
1599 if (regtry(prog, s))
1600 goto got_it;
1601 if (s >= strend)
1602 break;
b8c5462f 1603 s += UTF8SKIP(s);
d6a28714
JH
1604 };
1605 }
1606 else {
1607 do {
1608 if (regtry(prog, s))
1609 goto got_it;
1610 } while (s++ < strend);
1611 }
1612 }
1613
1614 /* Failure. */
1615 goto phooey;
1616
1617got_it:
1618 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1619
1620 if (PL_reg_eval_set) {
1621 /* Preserve the current value of $^R */
1622 if (oreplsv != GvSV(PL_replgv))
1623 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1624 restored, the value remains
1625 the same. */
1626 restore_pos(aTHXo_ 0);
1627 }
1628
1629 /* make sure $`, $&, $', and $digit will work later */
1630 if ( !(flags & REXEC_NOT_FIRST) ) {
1631 if (RX_MATCH_COPIED(prog)) {
1632 Safefree(prog->subbeg);
1633 RX_MATCH_COPIED_off(prog);
1634 }
1635 if (flags & REXEC_COPY_STR) {
1636 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1637
1638 s = savepvn(strbeg, i);
1639 prog->subbeg = s;
1640 prog->sublen = i;
1641 RX_MATCH_COPIED_on(prog);
1642 }
1643 else {
1644 prog->subbeg = strbeg;
1645 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1646 }
1647 }
1648
1649 return 1;
1650
1651phooey:
bf93d4cc
GS
1652 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1653 PL_colors[4],PL_colors[5]));
d6a28714
JH
1654 if (PL_reg_eval_set)
1655 restore_pos(aTHXo_ 0);
1656 return 0;
1657}
1658
1659/*
1660 - regtry - try match at specific point
1661 */
1662STATIC I32 /* 0 failure, 1 success */
1663S_regtry(pTHX_ regexp *prog, char *startpos)
1664{
1665 dTHR;
1666 register I32 i;
1667 register I32 *sp;
1668 register I32 *ep;
1669 CHECKPOINT lastcp;
1670
1671 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1672 MAGIC *mg;
1673
1674 PL_reg_eval_set = RS_init;
1675 DEBUG_r(DEBUG_s(
b900a521
JH
1676 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1677 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1678 ));
e8347627 1679 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1680 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1681 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1682 SAVETMPS;
1683 /* Apparently this is not needed, judging by wantarray. */
e8347627 1684 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1685 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1686
1687 if (PL_reg_sv) {
1688 /* Make $_ available to executed code. */
1689 if (PL_reg_sv != DEFSV) {
1690 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1691 SAVESPTR(DEFSV);
1692 DEFSV = PL_reg_sv;
b8c5462f 1693 }
d6a28714
JH
1694
1695 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1696 && (mg = mg_find(PL_reg_sv, 'g')))) {
1697 /* prepare for quick setting of pos */
1698 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1699 mg = mg_find(PL_reg_sv, 'g');
1700 mg->mg_len = -1;
b8c5462f 1701 }
d6a28714
JH
1702 PL_reg_magic = mg;
1703 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1704 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1705 }
1706 if (!PL_reg_curpm)
0f79a09d 1707 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1708 PL_reg_curpm->op_pmregexp = prog;
1709 PL_reg_oldcurpm = PL_curpm;
1710 PL_curpm = PL_reg_curpm;
1711 if (RX_MATCH_COPIED(prog)) {
1712 /* Here is a serious problem: we cannot rewrite subbeg,
1713 since it may be needed if this match fails. Thus
1714 $` inside (?{}) could fail... */
1715 PL_reg_oldsaved = prog->subbeg;
1716 PL_reg_oldsavedlen = prog->sublen;
1717 RX_MATCH_COPIED_off(prog);
1718 }
1719 else
1720 PL_reg_oldsaved = Nullch;
1721 prog->subbeg = PL_bostr;
1722 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1723 }
1724 prog->startp[0] = startpos - PL_bostr;
1725 PL_reginput = startpos;
1726 PL_regstartp = prog->startp;
1727 PL_regendp = prog->endp;
1728 PL_reglastparen = &prog->lastparen;
1729 prog->lastparen = 0;
1730 PL_regsize = 0;
1731 DEBUG_r(PL_reg_starttry = startpos);
1732 if (PL_reg_start_tmpl <= prog->nparens) {
1733 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1734 if(PL_reg_start_tmp)
1735 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1736 else
1737 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1738 }
1739
1740 /* XXXX What this code is doing here?!!! There should be no need
1741 to do this again and again, PL_reglastparen should take care of
1742 this! */
1743 sp = prog->startp;
1744 ep = prog->endp;
1745 if (prog->nparens) {
1746 for (i = prog->nparens; i >= 1; i--) {
1747 *++sp = -1;
1748 *++ep = -1;
1749 }
1750 }
1751 REGCP_SET;
1752 if (regmatch(prog->program + 1)) {
1753 prog->endp[0] = PL_reginput - PL_bostr;
1754 return 1;
1755 }
1756 REGCP_UNWIND;
1757 return 0;
1758}
1759
1760/*
1761 - regmatch - main matching routine
1762 *
1763 * Conceptually the strategy is simple: check to see whether the current
1764 * node matches, call self recursively to see whether the rest matches,
1765 * and then act accordingly. In practice we make some effort to avoid
1766 * recursion, in particular by going through "ordinary" nodes (that don't
1767 * need to know whether the rest of the match failed) by a loop instead of
1768 * by recursion.
1769 */
1770/* [lwall] I've hoisted the register declarations to the outer block in order to
1771 * maybe save a little bit of pushing and popping on the stack. It also takes
1772 * advantage of machines that use a register save mask on subroutine entry.
1773 */
1774STATIC I32 /* 0 failure, 1 success */
1775S_regmatch(pTHX_ regnode *prog)
1776{
1777 dTHR;
1778 register regnode *scan; /* Current node. */
1779 regnode *next; /* Next node. */
1780 regnode *inner; /* Next node in internal branch. */
1781 register I32 nextchr; /* renamed nextchr - nextchar colides with
1782 function of same name */
1783 register I32 n; /* no or next */
1784 register I32 ln; /* len or last */
1785 register char *s; /* operand or save */
1786 register char *locinput = PL_reginput;
1787 register I32 c1, c2, paren; /* case fold search, parenth */
1788 int minmod = 0, sw = 0, logical = 0;
1789#ifdef DEBUGGING
1790 PL_regindent++;
1791#endif
1792
1793 /* Note that nextchr is a byte even in UTF */
1794 nextchr = UCHARAT(locinput);
1795 scan = prog;
1796 while (scan != NULL) {
1797#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1798#ifdef DEBUGGING
1799# define sayYES goto yes
1800# define sayNO goto no
7821416a
IZ
1801# define sayYES_FINAL goto yes_final
1802# define sayYES_LOUD goto yes_loud
1803# define sayNO_FINAL goto no_final
1804# define sayNO_SILENT goto do_no
d6a28714
JH
1805# define saySAME(x) if (x) goto yes; else goto no
1806# define REPORT_CODE_OFF 24
1807#else
1808# define sayYES return 1
1809# define sayNO return 0
7821416a
IZ
1810# define sayYES_FINAL return 1
1811# define sayYES_LOUD return 1
1812# define sayNO_FINAL return 0
1813# define sayNO_SILENT return 0
d6a28714
JH
1814# define saySAME(x) return x
1815#endif
1816 DEBUG_r( {
1817 SV *prop = sv_newmortal();
1818 int docolor = *PL_colors[0];
1819 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1820 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1821 /* The part of the string before starttry has one color
1822 (pref0_len chars), between starttry and current
1823 position another one (pref_len - pref0_len chars),
1824 after the current position the third one.
1825 We assume that pref0_len <= pref_len, otherwise we
1826 decrease pref0_len. */
1827 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1828 ? (5 + taill) - l : locinput - PL_bostr);
1829 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1830
1831 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1832 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1833 ? (5 + taill) - pref_len : PL_regeol - locinput);
1834 if (pref0_len < 0)
1835 pref0_len = 0;
1836 if (pref0_len > pref_len)
1837 pref0_len = pref_len;
1838 regprop(prop, scan);
1839 PerlIO_printf(Perl_debug_log,
b900a521
JH
1840 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1841 (IV)(locinput - PL_bostr),
d6a28714
JH
1842 PL_colors[4], pref0_len,
1843 locinput - pref_len, PL_colors[5],
1844 PL_colors[2], pref_len - pref0_len,
1845 locinput - pref_len + pref0_len, PL_colors[3],
1846 (docolor ? "" : "> <"),
1847 PL_colors[0], l, locinput, PL_colors[1],
1848 15 - l - pref_len + 1,
1849 "",
b900a521 1850 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1851 SvPVX(prop));
1852 } );
1853
1854 next = scan + NEXT_OFF(scan);
1855 if (next == scan)
1856 next = NULL;
1857
1858 switch (OP(scan)) {
1859 case BOL:
1860 if (locinput == PL_bostr
1861 ? PL_regprev == '\n'
1862 : (PL_multiline &&
1863 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1864 {
1865 /* regtill = regbol; */
b8c5462f
JH
1866 break;
1867 }
d6a28714
JH
1868 sayNO;
1869 case MBOL:
1870 if (locinput == PL_bostr
1871 ? PL_regprev == '\n'
1872 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1873 {
b8c5462f
JH
1874 break;
1875 }
d6a28714
JH
1876 sayNO;
1877 case SBOL:
1878 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1879 break;
d6a28714
JH
1880 sayNO;
1881 case GPOS:
1882 if (locinput == PL_reg_ganch)
1883 break;
1884 sayNO;
1885 case EOL:
1886 if (PL_multiline)
1887 goto meol;
1888 else
1889 goto seol;
1890 case MEOL:
1891 meol:
1892 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1893 sayNO;
b8c5462f 1894 break;
d6a28714
JH
1895 case SEOL:
1896 seol:
1897 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1898 sayNO;
d6a28714 1899 if (PL_regeol - locinput > 1)
b8c5462f 1900 sayNO;
b8c5462f 1901 break;
d6a28714
JH
1902 case EOS:
1903 if (PL_regeol != locinput)
b8c5462f 1904 sayNO;
d6a28714
JH
1905 break;
1906 case SANYUTF8:
b8c5462f 1907 if (nextchr & 0x80) {
b8c5462f 1908 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1909 if (locinput > PL_regeol)
1910 sayNO;
b8c5462f
JH
1911 nextchr = UCHARAT(locinput);
1912 break;
1913 }
d6a28714 1914 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1915 sayNO;
b8c5462f 1916 nextchr = UCHARAT(++locinput);
a0d0e21e 1917 break;
d6a28714
JH
1918 case SANY:
1919 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1920 sayNO;
1921 nextchr = UCHARAT(++locinput);
b85d18e9 1922 break;
d6a28714 1923 case ANYUTF8:
a0ed51b3 1924 if (nextchr & 0x80) {
b8c5462f 1925 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1926 if (locinput > PL_regeol)
1927 sayNO;
a0ed51b3
LW
1928 nextchr = UCHARAT(locinput);
1929 break;
1930 }
155aba94 1931 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1932 sayNO;
1933 nextchr = UCHARAT(++locinput);
1934 break;
d6a28714 1935 case REG_ANY:
155aba94 1936 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1937 sayNO;
76e3520e 1938 nextchr = UCHARAT(++locinput);
a0d0e21e 1939 break;
d6a28714 1940 case EXACT:
cd439c50
IZ
1941 s = STRING(scan);
1942 ln = STR_LEN(scan);
d6a28714
JH
1943 /* Inline the first character, for speed. */
1944 if (UCHARAT(s) != nextchr)
1945 sayNO;
1946 if (PL_regeol - locinput < ln)
1947 sayNO;
1948 if (ln > 1 && memNE(s, locinput, ln))
1949 sayNO;
1950 locinput += ln;
1951 nextchr = UCHARAT(locinput);
1952 break;
1953 case EXACTFL:
b8c5462f
JH
1954 PL_reg_flags |= RF_tainted;
1955 /* FALL THROUGH */
d6a28714 1956 case EXACTF:
cd439c50
IZ
1957 s = STRING(scan);
1958 ln = STR_LEN(scan);
d6a28714
JH
1959
1960 if (UTF) {
1961 char *l = locinput;
1962 char *e = s + ln;
1963 c1 = OP(scan) == EXACTF;
1964 while (s < e) {
1965 if (l >= PL_regeol)
1966 sayNO;
1967 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1968 toLOWER_utf8((U8*)l) :
1969 toLOWER_LC_utf8((U8*)l)))
1970 {
1971 sayNO;
1972 }
1973 s += UTF8SKIP(s);
1974 l += UTF8SKIP(l);
b8c5462f 1975 }
d6a28714 1976 locinput = l;
a0ed51b3
LW
1977 nextchr = UCHARAT(locinput);
1978 break;
1979 }
d6a28714
JH
1980
1981 /* Inline the first character, for speed. */
1982 if (UCHARAT(s) != nextchr &&
1983 UCHARAT(s) != ((OP(scan) == EXACTF)
1984 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1985 sayNO;
d6a28714 1986 if (PL_regeol - locinput < ln)
b8c5462f 1987 sayNO;
d6a28714
JH
1988 if (ln > 1 && (OP(scan) == EXACTF
1989 ? ibcmp(s, locinput, ln)
1990 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1991 sayNO;
d6a28714
JH
1992 locinput += ln;
1993 nextchr = UCHARAT(locinput);
a0d0e21e 1994 break;
d6a28714 1995 case ANYOFUTF8:
d6a28714 1996 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1997 sayNO;
d6a28714
JH
1998 if (locinput >= PL_regeol)
1999 sayNO;
2000 locinput += PL_utf8skip[nextchr];
2001 nextchr = UCHARAT(locinput);
2002 break;
2003 case ANYOF:
d6a28714 2004 if (nextchr < 0)
b8c5462f 2005 nextchr = UCHARAT(locinput);
936ed897 2006 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2007 sayNO;
2008 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2009 sayNO;
b8c5462f
JH
2010 nextchr = UCHARAT(++locinput);
2011 break;
d6a28714 2012 case ALNUML:
b8c5462f
JH
2013 PL_reg_flags |= RF_tainted;
2014 /* FALL THROUGH */
d6a28714 2015 case ALNUM:
b8c5462f 2016 if (!nextchr)
4633a7c4 2017 sayNO;
d6a28714
JH
2018 if (!(OP(scan) == ALNUM
2019 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2020 sayNO;
2021 nextchr = UCHARAT(++locinput);
bbce6d69 2022 break;
d6a28714 2023 case ALNUMLUTF8:
3280af22 2024 PL_reg_flags |= RF_tainted;
bbce6d69 2025 /* FALL THROUGH */
d6a28714 2026 case ALNUMUTF8:
b8c5462f
JH
2027 if (!nextchr)
2028 sayNO;
2029 if (nextchr & 0x80) {
d6a28714
JH
2030 if (!(OP(scan) == ALNUMUTF8
2031 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2032 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2033 {
2034 sayNO;
a0ed51b3 2035 }
b8c5462f 2036 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2037 nextchr = UCHARAT(locinput);
2038 break;
2039 }
d6a28714
JH
2040 if (!(OP(scan) == ALNUMUTF8
2041 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2042 sayNO;
b8c5462f 2043 nextchr = UCHARAT(++locinput);
a0d0e21e 2044 break;
d6a28714 2045 case NALNUML:
b8c5462f
JH
2046 PL_reg_flags |= RF_tainted;
2047 /* FALL THROUGH */
d6a28714
JH
2048 case NALNUM:
2049 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2050 sayNO;
d6a28714
JH
2051 if (OP(scan) == NALNUM
2052 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2053 sayNO;
b8c5462f 2054 nextchr = UCHARAT(++locinput);
a0ed51b3 2055 break;
d6a28714 2056 case NALNUMLUTF8:
b8c5462f
JH
2057 PL_reg_flags |= RF_tainted;
2058 /* FALL THROUGH */
d6a28714 2059 case NALNUMUTF8:
3280af22 2060 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2061 sayNO;
b8c5462f 2062 if (nextchr & 0x80) {
d6a28714
JH
2063 if (OP(scan) == NALNUMUTF8
2064 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2065 : isALNUM_LC_utf8((U8*)locinput))
2066 {
b8c5462f 2067 sayNO;
d6a28714 2068 }
b8c5462f
JH
2069 locinput += PL_utf8skip[nextchr];
2070 nextchr = UCHARAT(locinput);
2071 break;
2072 }
d6a28714
JH
2073 if (OP(scan) == NALNUMUTF8
2074 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2075 sayNO;
76e3520e 2076 nextchr = UCHARAT(++locinput);
a0d0e21e 2077 break;
d6a28714
JH
2078 case BOUNDL:
2079 case NBOUNDL:
3280af22 2080 PL_reg_flags |= RF_tainted;
bbce6d69 2081 /* FALL THROUGH */
d6a28714
JH
2082 case BOUND:
2083 case NBOUND:
2084 /* was last char in word? */
2085 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2086 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2087 ln = isALNUM(ln);
2088 n = isALNUM(nextchr);
2089 }
2090 else {
2091 ln = isALNUM_LC(ln);
2092 n = isALNUM_LC(nextchr);
2093 }
2094 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2095 sayNO;
a0d0e21e 2096 break;
d6a28714
JH
2097 case BOUNDLUTF8:
2098 case NBOUNDLUTF8:
a0ed51b3
LW
2099 PL_reg_flags |= RF_tainted;
2100 /* FALL THROUGH */
d6a28714
JH
2101 case BOUNDUTF8:
2102 case NBOUNDUTF8:
2103 /* was last char in word? */
2104 ln = (locinput != PL_regbol)
2105 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2106 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2107 ln = isALNUM_uni(ln);
2108 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2109 }
d6a28714
JH
2110 else {
2111 ln = isALNUM_LC_uni(ln);
2112 n = isALNUM_LC_utf8((U8*)locinput);
2113 }
2114 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2115 sayNO;
a0ed51b3 2116 break;
d6a28714 2117 case SPACEL:
3280af22 2118 PL_reg_flags |= RF_tainted;
bbce6d69 2119 /* FALL THROUGH */
d6a28714 2120 case SPACE:
9442cb0e 2121 if (!nextchr)
4633a7c4 2122 sayNO;
d6a28714
JH
2123 if (!(OP(scan) == SPACE
2124 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2125 sayNO;
76e3520e 2126 nextchr = UCHARAT(++locinput);
a0d0e21e 2127 break;
d6a28714 2128 case SPACELUTF8:
a0ed51b3
LW
2129 PL_reg_flags |= RF_tainted;
2130 /* FALL THROUGH */
d6a28714 2131 case SPACEUTF8:
9442cb0e 2132 if (!nextchr)
a0ed51b3
LW
2133 sayNO;
2134 if (nextchr & 0x80) {
d6a28714 2135 if (!(OP(scan) == SPACEUTF8
9442cb0e 2136 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2137 : isSPACE_LC_utf8((U8*)locinput)))
2138 {
a0ed51b3 2139 sayNO;
d6a28714 2140 }
6f06b55f 2141 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2142 nextchr = UCHARAT(locinput);
2143 break;
2144 }
d6a28714
JH
2145 if (!(OP(scan) == SPACEUTF8
2146 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2147 sayNO;
2148 nextchr = UCHARAT(++locinput);
2149 break;
d6a28714 2150 case NSPACEL:
3280af22 2151 PL_reg_flags |= RF_tainted;
bbce6d69 2152 /* FALL THROUGH */
d6a28714 2153 case NSPACE:
9442cb0e 2154 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2155 sayNO;
9442cb0e 2156 if (OP(scan) == NSPACE
d6a28714 2157 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2158 sayNO;
b8c5462f 2159 nextchr = UCHARAT(++locinput);
a0d0e21e 2160 break;
d6a28714 2161 case NSPACELUTF8:
a0ed51b3
LW
2162 PL_reg_flags |= RF_tainted;
2163 /* FALL THROUGH */
d6a28714 2164 case NSPACEUTF8:
9442cb0e 2165 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2166 sayNO;
2167 if (nextchr & 0x80) {
d6a28714 2168 if (OP(scan) == NSPACEUTF8
9442cb0e 2169 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2170 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2171 {
2172 sayNO;
2173 }
2174 locinput += PL_utf8skip[nextchr];
2175 nextchr = UCHARAT(locinput);
2176 break;
a0ed51b3 2177 }
d6a28714
JH
2178 if (OP(scan) == NSPACEUTF8
2179 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2180 sayNO;
76e3520e 2181 nextchr = UCHARAT(++locinput);
a0d0e21e 2182 break;
d6a28714 2183 case DIGITL:
a0ed51b3
LW
2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714 2186 case DIGIT:
9442cb0e 2187 if (!nextchr)
a0ed51b3 2188 sayNO;
d6a28714
JH
2189 if (!(OP(scan) == DIGIT
2190 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2191 sayNO;
76e3520e 2192 nextchr = UCHARAT(++locinput);
a0d0e21e 2193 break;
d6a28714 2194 case DIGITLUTF8:
a0ed51b3
LW
2195 PL_reg_flags |= RF_tainted;
2196 /* FALL THROUGH */
d6a28714 2197 case DIGITUTF8:
a0ed51b3
LW
2198 if (!nextchr)
2199 sayNO;
2200 if (nextchr & 0x80) {
9442cb0e
GS
2201 if (!(OP(scan) == DIGITUTF8
2202 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2203 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2204 {
a0ed51b3 2205 sayNO;
dfe13c55 2206 }
6f06b55f 2207 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2208 nextchr = UCHARAT(locinput);
2209 break;
2210 }
9442cb0e
GS
2211 if (!(OP(scan) == DIGITUTF8
2212 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2213 sayNO;
2214 nextchr = UCHARAT(++locinput);
2215 break;
d6a28714 2216 case NDIGITL:
b8c5462f
JH
2217 PL_reg_flags |= RF_tainted;
2218 /* FALL THROUGH */
d6a28714 2219 case NDIGIT:
9442cb0e 2220 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2221 sayNO;
9442cb0e 2222 if (OP(scan) == NDIGIT
d6a28714 2223 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2224 sayNO;
76e3520e 2225 nextchr = UCHARAT(++locinput);
a0d0e21e 2226 break;
d6a28714 2227 case NDIGITLUTF8:
b8c5462f
JH
2228 PL_reg_flags |= RF_tainted;
2229 /* FALL THROUGH */
d6a28714 2230 case NDIGITUTF8:
b8c5462f
JH
2231 if (!nextchr && locinput >= PL_regeol)
2232 sayNO;
a0ed51b3 2233 if (nextchr & 0x80) {
9442cb0e
GS
2234 if (OP(scan) == NDIGITUTF8
2235 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2236 : isDIGIT_LC_utf8((U8*)locinput))
2237 {
a0ed51b3 2238 sayNO;
9442cb0e 2239 }
6f06b55f 2240 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2241 nextchr = UCHARAT(locinput);
2242 break;
2243 }
9442cb0e
GS
2244 if (OP(scan) == NDIGITUTF8
2245 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2246 sayNO;
2247 nextchr = UCHARAT(++locinput);
2248 break;
2249 case CLUMP:
dfe13c55 2250 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2251 sayNO;
6f06b55f 2252 locinput += PL_utf8skip[nextchr];
dfe13c55 2253 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2254 locinput += UTF8SKIP(locinput);
2255 if (locinput > PL_regeol)
2256 sayNO;
2257 nextchr = UCHARAT(locinput);
2258 break;
c8756f30 2259 case REFFL:
3280af22 2260 PL_reg_flags |= RF_tainted;
c8756f30 2261 /* FALL THROUGH */
c277df42 2262 case REF:
c8756f30 2263 case REFF:
c277df42 2264 n = ARG(scan); /* which paren pair */
cf93c79d 2265 ln = PL_regstartp[n];
2c2d71f5 2266 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2267 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2268 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2269 if (ln == PL_regendp[n])
a0d0e21e 2270 break;
a0ed51b3 2271
cf93c79d 2272 s = PL_bostr + ln;
a0ed51b3
LW
2273 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2274 char *l = locinput;
cf93c79d 2275 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2276 /*
2277 * Note that we can't do the "other character" lookup trick as
2278 * in the 8-bit case (no pun intended) because in Unicode we
2279 * have to map both upper and title case to lower case.
2280 */
2281 if (OP(scan) == REFF) {
2282 while (s < e) {
2283 if (l >= PL_regeol)
2284 sayNO;
dfe13c55 2285 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2286 sayNO;
2287 s += UTF8SKIP(s);
2288 l += UTF8SKIP(l);
2289 }
2290 }
2291 else {
2292 while (s < e) {
2293 if (l >= PL_regeol)
2294 sayNO;
dfe13c55 2295 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2296 sayNO;
2297 s += UTF8SKIP(s);
2298 l += UTF8SKIP(l);
2299 }
2300 }
2301 locinput = l;
2302 nextchr = UCHARAT(locinput);
2303 break;
2304 }
2305
a0d0e21e 2306 /* Inline the first character, for speed. */
76e3520e 2307 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2308 (OP(scan) == REF ||
2309 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2310 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2311 sayNO;
cf93c79d 2312 ln = PL_regendp[n] - ln;
3280af22 2313 if (locinput + ln > PL_regeol)
4633a7c4 2314 sayNO;
c8756f30
AK
2315 if (ln > 1 && (OP(scan) == REF
2316 ? memNE(s, locinput, ln)
2317 : (OP(scan) == REFF
2318 ? ibcmp(s, locinput, ln)
2319 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2320 sayNO;
a0d0e21e 2321 locinput += ln;
76e3520e 2322 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2323 break;
2324
2325 case NOTHING:
c277df42 2326 case TAIL:
a0d0e21e
LW
2327 break;
2328 case BACK:
2329 break;
c277df42
IZ
2330 case EVAL:
2331 {
2332 dSP;
533c011a 2333 OP_4tree *oop = PL_op;
3280af22
NIS
2334 COP *ocurcop = PL_curcop;
2335 SV **ocurpad = PL_curpad;
c277df42
IZ
2336 SV *ret;
2337
2338 n = ARG(scan);
533c011a 2339 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2340 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2341 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2342 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2343
cea2e8a9 2344 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2345 SPAGAIN;
2346 ret = POPs;
2347 PUTBACK;
2348
0f5d15d6
IZ
2349 PL_op = oop;
2350 PL_curpad = ocurpad;
2351 PL_curcop = ocurcop;
c277df42 2352 if (logical) {
0f5d15d6
IZ
2353 if (logical == 2) { /* Postponed subexpression. */
2354 regexp *re;
22c35a8c 2355 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2356 re_cc_state state;
0f5d15d6
IZ
2357 CHECKPOINT cp, lastcp;
2358
2359 if(SvROK(ret) || SvRMAGICAL(ret)) {
2360 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2361
2362 if(SvMAGICAL(sv))
2363 mg = mg_find(sv, 'r');
2364 }
2365 if (mg) {
2366 re = (regexp *)mg->mg_obj;
df0003d4 2367 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2368 }
2369 else {
2370 STRLEN len;
2371 char *t = SvPV(ret, len);
2372 PMOP pm;
2373 char *oprecomp = PL_regprecomp;
2374 I32 osize = PL_regsize;
2375 I32 onpar = PL_regnpar;
2376
2377 pm.op_pmflags = 0;
393fec97 2378 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2379 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2380 if (!(SvFLAGS(ret)
2381 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2382 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2383 PL_regprecomp = oprecomp;
2384 PL_regsize = osize;
2385 PL_regnpar = onpar;
2386 }
2387 DEBUG_r(
2388 PerlIO_printf(Perl_debug_log,
2389 "Entering embedded `%s%.60s%s%s'\n",
2390 PL_colors[0],
2391 re->precomp,
2392 PL_colors[1],
2393 (strlen(re->precomp) > 60 ? "..." : ""))
2394 );
2395 state.node = next;
2396 state.prev = PL_reg_call_cc;
2397 state.cc = PL_regcc;
2398 state.re = PL_reg_re;
2399
2ab05381 2400 PL_regcc = 0;
0f5d15d6
IZ
2401
2402 cp = regcppush(0); /* Save *all* the positions. */
2403 REGCP_SET;
2404 cache_re(re);
2405 state.ss = PL_savestack_ix;
2406 *PL_reglastparen = 0;
2407 PL_reg_call_cc = &state;
2408 PL_reginput = locinput;
2c2d71f5
JH
2409
2410 /* XXXX This is too dramatic a measure... */
2411 PL_reg_maxiter = 0;
2412
0f5d15d6 2413 if (regmatch(re->program + 1)) {
2c914db6
IZ
2414 /* Even though we succeeded, we need to restore
2415 global variables, since we may be wrapped inside
2416 SUSPEND, thus the match may be not finished yet. */
2417
2418 /* XXXX Do this only if SUSPENDed? */
2419 PL_reg_call_cc = state.prev;
2420 PL_regcc = state.cc;
2421 PL_reg_re = state.re;
2422 cache_re(PL_reg_re);
2423
2424 /* XXXX This is too dramatic a measure... */
2425 PL_reg_maxiter = 0;
2426
2427 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2428 ReREFCNT_dec(re);
2429 regcpblow(cp);
2430 sayYES;
2431 }
0f5d15d6
IZ
2432 ReREFCNT_dec(re);
2433 REGCP_UNWIND;
2434 regcppop();
2435 PL_reg_call_cc = state.prev;
2436 PL_regcc = state.cc;
2437 PL_reg_re = state.re;
d3790889 2438 cache_re(PL_reg_re);
2c2d71f5
JH
2439
2440 /* XXXX This is too dramatic a measure... */
2441 PL_reg_maxiter = 0;
2442
0f5d15d6
IZ
2443 sayNO;
2444 }
c277df42 2445 sw = SvTRUE(ret);
0f5d15d6 2446 logical = 0;
a0ed51b3
LW
2447 }
2448 else
3280af22 2449 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2450 break;
2451 }
a0d0e21e 2452 case OPEN:
c277df42 2453 n = ARG(scan); /* which paren pair */
3280af22
NIS
2454 PL_reg_start_tmp[n] = locinput;
2455 if (n > PL_regsize)
2456 PL_regsize = n;
a0d0e21e
LW
2457 break;
2458 case CLOSE:
c277df42 2459 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2460 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2461 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2462 if (n > *PL_reglastparen)
2463 *PL_reglastparen = n;
a0d0e21e 2464 break;
c277df42
IZ
2465 case GROUPP:
2466 n = ARG(scan); /* which paren pair */
cf93c79d 2467 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2468 break;
2469 case IFTHEN:
2c2d71f5 2470 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2471 if (sw)
2472 next = NEXTOPER(NEXTOPER(scan));
2473 else {
2474 next = scan + ARG(scan);
2475 if (OP(next) == IFTHEN) /* Fake one. */
2476 next = NEXTOPER(NEXTOPER(next));
2477 }
2478 break;
2479 case LOGICAL:
0f5d15d6 2480 logical = scan->flags;
c277df42 2481 break;
2ab05381
IZ
2482/*******************************************************************
2483 PL_regcc contains infoblock about the innermost (...)* loop, and
2484 a pointer to the next outer infoblock.
2485
2486 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2487
2488 1) After matching X, regnode for CURLYX is processed;
2489
2490 2) This regnode creates infoblock on the stack, and calls
2491 regmatch() recursively with the starting point at WHILEM node;
2492
2493 3) Each hit of WHILEM node tries to match A and Z (in the order
2494 depending on the current iteration, min/max of {min,max} and
2495 greediness). The information about where are nodes for "A"
2496 and "Z" is read from the infoblock, as is info on how many times "A"
2497 was already matched, and greediness.
2498
2499 4) After A matches, the same WHILEM node is hit again.
2500
2501 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2502 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2503 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2504 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2505 of the external loop.
2506
2507 Currently present infoblocks form a tree with a stem formed by PL_curcc
2508 and whatever it mentions via ->next, and additional attached trees
2509 corresponding to temporarily unset infoblocks as in "5" above.
2510
2511 In the following picture infoblocks for outer loop of
2512 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2513 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2514 infoblocks are drawn below the "reset" infoblock.
2515
2516 In fact in the picture below we do not show failed matches for Z and T
2517 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2518 more obvious *why* one needs to *temporary* unset infoblocks.]
2519
2520 Matched REx position InfoBlocks Comment
2521 (Y(A)*?Z)*?T x
2522 Y(A)*?Z)*?T x <- O
2523 Y (A)*?Z)*?T x <- O
2524 Y A)*?Z)*?T x <- O <- I
2525 YA )*?Z)*?T x <- O <- I
2526 YA A)*?Z)*?T x <- O <- I
2527 YAA )*?Z)*?T x <- O <- I
2528 YAA Z)*?T x <- O # Temporary unset I
2529 I
2530
2531 YAAZ Y(A)*?Z)*?T x <- O
2532 I
2533
2534 YAAZY (A)*?Z)*?T x <- O
2535 I
2536
2537 YAAZY A)*?Z)*?T x <- O <- I
2538 I
2539
2540 YAAZYA )*?Z)*?T x <- O <- I
2541 I
2542
2543 YAAZYA Z)*?T x <- O # Temporary unset I
2544 I,I
2545
2546 YAAZYAZ )*?T x <- O
2547 I,I
2548
2549 YAAZYAZ T x # Temporary unset O
2550 O
2551 I,I
2552
2553 YAAZYAZT x
2554 O
2555 I,I
2556 *******************************************************************/
a0d0e21e
LW
2557 case CURLYX: {
2558 CURCUR cc;
3280af22 2559 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2560
2561 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2562 next += ARG(next);
3280af22
NIS
2563 cc.oldcc = PL_regcc;
2564 PL_regcc = &cc;
2565 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2566 cc.cur = -1;
2567 cc.min = ARG1(scan);
2568 cc.max = ARG2(scan);
c277df42 2569 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2570 cc.next = next;
2571 cc.minmod = minmod;
2572 cc.lastloc = 0;
3280af22 2573 PL_reginput = locinput;
a0d0e21e
LW
2574 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2575 regcpblow(cp);
3280af22 2576 PL_regcc = cc.oldcc;
4633a7c4 2577 saySAME(n);
a0d0e21e
LW
2578 }
2579 /* NOT REACHED */
2580 case WHILEM: {
2581 /*
2582 * This is really hard to understand, because after we match
2583 * what we're trying to match, we must make sure the rest of
2c2d71f5 2584 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2585 * to go back UP the parse tree by recursing ever deeper. And
2586 * if it fails, we have to reset our parent's current state
2587 * that we can try again after backing off.
2588 */
2589
c277df42 2590 CHECKPOINT cp, lastcp;
3280af22 2591 CURCUR* cc = PL_regcc;
c277df42
IZ
2592 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2593
4633a7c4 2594 n = cc->cur + 1; /* how many we know we matched */
3280af22 2595 PL_reginput = locinput;
a0d0e21e 2596
c277df42
IZ
2597 DEBUG_r(
2598 PerlIO_printf(Perl_debug_log,
2599 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2600 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2601 (long)n, (long)cc->min,
2602 (long)cc->max, (long)cc)
2603 );
4633a7c4 2604
a0d0e21e
LW
2605 /* If degenerate scan matches "", assume scan done. */
2606
579cf2c3 2607 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2608 PL_regcc = cc->oldcc;
2ab05381
IZ
2609 if (PL_regcc)
2610 ln = PL_regcc->cur;
c277df42 2611 DEBUG_r(
c3464db5
DD
2612 PerlIO_printf(Perl_debug_log,
2613 "%*s empty match detected, try continuation...\n",
3280af22 2614 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2615 );
a0d0e21e 2616 if (regmatch(cc->next))
4633a7c4 2617 sayYES;
2ab05381
IZ
2618 if (PL_regcc)
2619 PL_regcc->cur = ln;
3280af22 2620 PL_regcc = cc;
4633a7c4 2621 sayNO;
a0d0e21e
LW
2622 }
2623
2624 /* First just match a string of min scans. */
2625
2626 if (n < cc->min) {
2627 cc->cur = n;
2628 cc->lastloc = locinput;
4633a7c4
LW
2629 if (regmatch(cc->scan))
2630 sayYES;
2631 cc->cur = n - 1;
c277df42 2632 cc->lastloc = lastloc;
4633a7c4 2633 sayNO;
a0d0e21e
LW
2634 }
2635
2c2d71f5
JH
2636 if (scan->flags) {
2637 /* Check whether we already were at this position.
2638 Postpone detection until we know the match is not
2639 *that* much linear. */
2640 if (!PL_reg_maxiter) {
2641 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2642 PL_reg_leftiter = PL_reg_maxiter;
2643 }
2644 if (PL_reg_leftiter-- == 0) {
2645 I32 size = (PL_reg_maxiter + 7)/8;
2646 if (PL_reg_poscache) {
2647 if (PL_reg_poscache_size < size) {
2648 Renew(PL_reg_poscache, size, char);
2649 PL_reg_poscache_size = size;
2650 }
2651 Zero(PL_reg_poscache, size, char);
2652 }
2653 else {
2654 PL_reg_poscache_size = size;
2655 Newz(29, PL_reg_poscache, size, char);
2656 }
2657 DEBUG_r(
2658 PerlIO_printf(Perl_debug_log,
2659 "%sDetected a super-linear match, switching on caching%s...\n",
2660 PL_colors[4], PL_colors[5])
2661 );
2662 }
2663 if (PL_reg_leftiter < 0) {
2664 I32 o = locinput - PL_bostr, b;
2665
2666 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2667 b = o % 8;
2668 o /= 8;
2669 if (PL_reg_poscache[o] & (1<<b)) {
2670 DEBUG_r(
2671 PerlIO_printf(Perl_debug_log,
2672 "%*s already tried at this position...\n",
2673 REPORT_CODE_OFF+PL_regindent*2, "")
2674 );
7821416a 2675 sayNO_SILENT;
2c2d71f5
JH
2676 }
2677 PL_reg_poscache[o] |= (1<<b);
2678 }
2679 }
2680
a0d0e21e
LW
2681 /* Prefer next over scan for minimal matching. */
2682
2683 if (cc->minmod) {
3280af22 2684 PL_regcc = cc->oldcc;
2ab05381
IZ
2685 if (PL_regcc)
2686 ln = PL_regcc->cur;
5f05dabc 2687 cp = regcppush(cc->parenfloor);
c277df42 2688 REGCP_SET;
5f05dabc 2689 if (regmatch(cc->next)) {
c277df42 2690 regcpblow(cp);
4633a7c4 2691 sayYES; /* All done. */
5f05dabc 2692 }
c277df42 2693 REGCP_UNWIND;
5f05dabc 2694 regcppop();
2ab05381
IZ
2695 if (PL_regcc)
2696 PL_regcc->cur = ln;
3280af22 2697 PL_regcc = cc;
a0d0e21e 2698
c277df42 2699 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2700 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2701 && !(PL_reg_flags & RF_warned)) {
2702 PL_reg_flags |= RF_warned;
e476b1b5 2703 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2704 "Complex regular subexpression recursion",
2705 REG_INFTY - 1);
c277df42 2706 }
4633a7c4 2707 sayNO;
c277df42 2708 }
a687059c 2709
c277df42 2710 DEBUG_r(
c3464db5
DD
2711 PerlIO_printf(Perl_debug_log,
2712 "%*s trying longer...\n",
3280af22 2713 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2714 );
a0d0e21e 2715 /* Try scanning more and see if it helps. */
3280af22 2716 PL_reginput = locinput;
a0d0e21e
LW
2717 cc->cur = n;
2718 cc->lastloc = locinput;
5f05dabc 2719 cp = regcppush(cc->parenfloor);
c277df42 2720 REGCP_SET;
5f05dabc 2721 if (regmatch(cc->scan)) {
c277df42 2722 regcpblow(cp);
4633a7c4 2723 sayYES;
5f05dabc 2724 }
c277df42 2725 REGCP_UNWIND;
5f05dabc 2726 regcppop();
4633a7c4 2727 cc->cur = n - 1;
c277df42 2728 cc->lastloc = lastloc;
4633a7c4 2729 sayNO;
a0d0e21e
LW
2730 }
2731
2732 /* Prefer scan over next for maximal matching. */
2733
2734 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2735 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2736 cc->cur = n;
2737 cc->lastloc = locinput;
c277df42 2738 REGCP_SET;
5f05dabc 2739 if (regmatch(cc->scan)) {
c277df42 2740 regcpblow(cp);
4633a7c4 2741 sayYES;
5f05dabc 2742 }
c277df42 2743 REGCP_UNWIND;
a0d0e21e 2744 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2745 PL_reginput = locinput;
c277df42 2746 DEBUG_r(
c3464db5
DD
2747 PerlIO_printf(Perl_debug_log,
2748 "%*s failed, try continuation...\n",
3280af22 2749 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2750 );
2751 }
e476b1b5 2752 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2753 && !(PL_reg_flags & RF_warned)) {
3280af22 2754 PL_reg_flags |= RF_warned;
e476b1b5 2755 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2756 "Complex regular subexpression recursion",
2757 REG_INFTY - 1);
a0d0e21e
LW
2758 }
2759
2760 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2761 PL_regcc = cc->oldcc;
2ab05381
IZ
2762 if (PL_regcc)
2763 ln = PL_regcc->cur;
a0d0e21e 2764 if (regmatch(cc->next))
4633a7c4 2765 sayYES;
2ab05381
IZ
2766 if (PL_regcc)
2767 PL_regcc->cur = ln;
3280af22 2768 PL_regcc = cc;
4633a7c4 2769 cc->cur = n - 1;
c277df42 2770 cc->lastloc = lastloc;
4633a7c4 2771 sayNO;
a0d0e21e
LW
2772 }
2773 /* NOT REACHED */
c277df42
IZ
2774 case BRANCHJ:
2775 next = scan + ARG(scan);
2776 if (next == scan)
2777 next = NULL;
2778 inner = NEXTOPER(NEXTOPER(scan));
2779 goto do_branch;
2780 case BRANCH:
2781 inner = NEXTOPER(scan);
2782 do_branch:
2783 {
2784 CHECKPOINT lastcp;
2785 c1 = OP(scan);
2786 if (OP(next) != c1) /* No choice. */
2787 next = inner; /* Avoid recursion. */
a0d0e21e 2788 else {
3280af22 2789 int lastparen = *PL_reglastparen;
c277df42
IZ
2790
2791 REGCP_SET;
a0d0e21e 2792 do {
3280af22 2793 PL_reginput = locinput;
c277df42 2794 if (regmatch(inner))
4633a7c4 2795 sayYES;
c277df42 2796 REGCP_UNWIND;
3280af22 2797 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2798 PL_regendp[n] = -1;
3280af22 2799 *PL_reglastparen = n;
c277df42 2800 scan = next;
a0d0e21e 2801 /*SUPPRESS 560*/
155aba94 2802 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2803 next += n;
a0d0e21e 2804 else
c277df42 2805 next = NULL;
c277df42
IZ
2806 inner = NEXTOPER(scan);
2807 if (c1 == BRANCHJ) {
2808 inner = NEXTOPER(inner);
2809 }
2810 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2811 sayNO;
a0d0e21e 2812 /* NOTREACHED */
a687059c 2813 }
a0d0e21e
LW
2814 }
2815 break;
2816 case MINMOD:
2817 minmod = 1;
2818 break;
c277df42
IZ
2819 case CURLYM:
2820 {
00db4c45 2821 I32 l = 0;
c277df42
IZ
2822 CHECKPOINT lastcp;
2823
2824 /* We suppose that the next guy does not need
2825 backtracking: in particular, it is of constant length,
2826 and has no parenths to influence future backrefs. */
2827 ln = ARG1(scan); /* min to match */
2828 n = ARG2(scan); /* max to match */
c277df42
IZ
2829 paren = scan->flags;
2830 if (paren) {
3280af22
NIS
2831 if (paren > PL_regsize)
2832 PL_regsize = paren;
2833 if (paren > *PL_reglastparen)
2834 *PL_reglastparen = paren;
c277df42 2835 }
dc45a647 2836 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2837 if (paren)
2838 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2839 PL_reginput = locinput;
c277df42
IZ
2840 if (minmod) {
2841 minmod = 0;
2842 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2843 sayNO;
5f4b28b2 2844 if (ln && l == 0 && n >= ln
c277df42
IZ
2845 /* In fact, this is tricky. If paren, then the
2846 fact that we did/didnot match may influence
2847 future execution. */
2848 && !(paren && ln == 0))
2849 ln = n;
3280af22 2850 locinput = PL_reginput;
22c35a8c 2851 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2852 c1 = (U8)*STRING(next);
c277df42 2853 if (OP(next) == EXACTF)
22c35a8c 2854 c2 = PL_fold[c1];
c277df42 2855 else if (OP(next) == EXACTFL)
22c35a8c 2856 c2 = PL_fold_locale[c1];
c277df42
IZ
2857 else
2858 c2 = c1;
a0ed51b3
LW
2859 }
2860 else
c277df42
IZ
2861 c1 = c2 = -1000;
2862 REGCP_SET;
5f4b28b2 2863 /* This may be improved if l == 0. */
c277df42
IZ
2864 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2865 /* If it could work, try it. */
2866 if (c1 == -1000 ||
3280af22
NIS
2867 UCHARAT(PL_reginput) == c1 ||
2868 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2869 {
2870 if (paren) {
2871 if (n) {
cf93c79d
IZ
2872 PL_regstartp[paren] =
2873 HOPc(PL_reginput, -l) - PL_bostr;
2874 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2875 }
2876 else
cf93c79d 2877 PL_regendp[paren] = -1;
c277df42
IZ
2878 }
2879 if (regmatch(next))
2880 sayYES;
2881 REGCP_UNWIND;
2882 }
2883 /* Couldn't or didn't -- move forward. */
3280af22 2884 PL_reginput = locinput;
c277df42
IZ
2885 if (regrepeat_hard(scan, 1, &l)) {
2886 ln++;
3280af22 2887 locinput = PL_reginput;
c277df42
IZ
2888 }
2889 else
2890 sayNO;
2891 }
a0ed51b3
LW
2892 }
2893 else {
c277df42
IZ
2894 n = regrepeat_hard(scan, n, &l);
2895 if (n != 0 && l == 0
2896 /* In fact, this is tricky. If paren, then the
2897 fact that we did/didnot match may influence
2898 future execution. */
2899 && !(paren && ln == 0))
2900 ln = n;
3280af22 2901 locinput = PL_reginput;
c277df42 2902 DEBUG_r(
5c0ca799 2903 PerlIO_printf(Perl_debug_log,
faccc32b 2904 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2905 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2906 (IV) n, (IV)l)
c277df42
IZ
2907 );
2908 if (n >= ln) {
22c35a8c 2909 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2910 c1 = (U8)*STRING(next);
c277df42 2911 if (OP(next) == EXACTF)
22c35a8c 2912 c2 = PL_fold[c1];
c277df42 2913 else if (OP(next) == EXACTFL)
22c35a8c 2914 c2 = PL_fold_locale[c1];
c277df42
IZ
2915 else
2916 c2 = c1;
a0ed51b3
LW
2917 }
2918 else
c277df42
IZ
2919 c1 = c2 = -1000;
2920 }
2921 REGCP_SET;
2922 while (n >= ln) {
2923 /* If it could work, try it. */
2924 if (c1 == -1000 ||
3280af22
NIS
2925 UCHARAT(PL_reginput) == c1 ||
2926 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2927 {
2928 DEBUG_r(
c3464db5 2929 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2930 "%*s trying tail with n=%"IVdf"...\n",
2931 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2932 );
2933 if (paren) {
2934 if (n) {
cf93c79d
IZ
2935 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2936 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2937 }
a0ed51b3 2938 else
cf93c79d 2939 PL_regendp[paren] = -1;
c277df42 2940 }
a0ed51b3
LW
2941 if (regmatch(next))
2942 sayYES;
2943 REGCP_UNWIND;
2944 }
c277df42
IZ
2945 /* Couldn't or didn't -- back up. */
2946 n--;
dfe13c55 2947 locinput = HOPc(locinput, -l);
3280af22 2948 PL_reginput = locinput;
c277df42
IZ
2949 }
2950 }
2951 sayNO;
2952 break;
2953 }
2954 case CURLYN:
2955 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2956 if (paren > PL_regsize)
2957 PL_regsize = paren;
2958 if (paren > *PL_reglastparen)
2959 *PL_reglastparen = paren;
c277df42
IZ
2960 ln = ARG1(scan); /* min to match */
2961 n = ARG2(scan); /* max to match */
dc45a647 2962 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2963 goto repeat;
a0d0e21e 2964 case CURLY:
c277df42 2965 paren = 0;
a0d0e21e
LW
2966 ln = ARG1(scan); /* min to match */
2967 n = ARG2(scan); /* max to match */
dc45a647 2968 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2969 goto repeat;
2970 case STAR:
2971 ln = 0;
c277df42 2972 n = REG_INFTY;
a0d0e21e 2973 scan = NEXTOPER(scan);
c277df42 2974 paren = 0;
a0d0e21e
LW
2975 goto repeat;
2976 case PLUS:
c277df42
IZ
2977 ln = 1;
2978 n = REG_INFTY;
2979 scan = NEXTOPER(scan);
2980 paren = 0;
2981 repeat:
a0d0e21e
LW
2982 /*
2983 * Lookahead to avoid useless match attempts
2984 * when we know what character comes next.
2985 */
22c35a8c 2986 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2987 c1 = (U8)*STRING(next);
bbce6d69 2988 if (OP(next) == EXACTF)
22c35a8c 2989 c2 = PL_fold[c1];
bbce6d69 2990 else if (OP(next) == EXACTFL)
22c35a8c 2991 c2 = PL_fold_locale[c1];
bbce6d69 2992 else
2993 c2 = c1;
2994 }
a0d0e21e 2995 else
bbce6d69 2996 c1 = c2 = -1000;
3280af22 2997 PL_reginput = locinput;
a0d0e21e 2998 if (minmod) {
c277df42 2999 CHECKPOINT lastcp;
a0d0e21e
LW
3000 minmod = 0;
3001 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3002 sayNO;
a0ed51b3 3003 locinput = PL_reginput;
c277df42 3004 REGCP_SET;
0fe9bf95
IZ
3005 if (c1 != -1000) {
3006 char *e = locinput + n - ln; /* Should not check after this */
3007 char *old = locinput;
3008
3009 if (e >= PL_regeol || (n == REG_INFTY))
3010 e = PL_regeol - 1;
3011 while (1) {
3012 /* Find place 'next' could work */
3013 if (c1 == c2) {
3014 while (locinput <= e && *locinput != c1)
3015 locinput++;
3016 } else {
3017 while (locinput <= e
3018 && *locinput != c1
3019 && *locinput != c2)
3020 locinput++;
3021 }
3022 if (locinput > e)
3023 sayNO;
3024 /* PL_reginput == old now */
3025 if (locinput != old) {
3026 ln = 1; /* Did some */
3027 if (regrepeat(scan, locinput - old) <
3028 locinput - old)
3029 sayNO;
3030 }
3031 /* PL_reginput == locinput now */
3032 if (paren) {
3033 if (ln) {
cf93c79d
IZ
3034 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3035 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3036 }
3037 else
cf93c79d 3038 PL_regendp[paren] = -1;
0fe9bf95
IZ
3039 }
3040 if (regmatch(next))
3041 sayYES;
3042 PL_reginput = locinput; /* Could be reset... */
3043 REGCP_UNWIND;
3044 /* Couldn't or didn't -- move forward. */
3045 old = locinput++;
3046 }
3047 }
3048 else
c277df42 3049 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3050 /* If it could work, try it. */
bbce6d69 3051 if (c1 == -1000 ||
3280af22
NIS
3052 UCHARAT(PL_reginput) == c1 ||
3053 UCHARAT(PL_reginput) == c2)
bbce6d69 3054 {
c277df42
IZ
3055 if (paren) {
3056 if (n) {
cf93c79d
IZ
3057 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3058 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3059 }
3060 else
cf93c79d 3061 PL_regendp[paren] = -1;
c277df42 3062 }
a0d0e21e 3063 if (regmatch(next))
4633a7c4 3064 sayYES;
c277df42 3065 REGCP_UNWIND;
bbce6d69 3066 }
c277df42 3067 /* Couldn't or didn't -- move forward. */
a0ed51b3 3068 PL_reginput = locinput;
a0d0e21e
LW
3069 if (regrepeat(scan, 1)) {
3070 ln++;
a0ed51b3
LW
3071 locinput = PL_reginput;
3072 }
3073 else
4633a7c4 3074 sayNO;
a0d0e21e
LW
3075 }
3076 }
3077 else {
c277df42 3078 CHECKPOINT lastcp;
a0d0e21e 3079 n = regrepeat(scan, n);
a0ed51b3 3080 locinput = PL_reginput;
22c35a8c 3081 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3082 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3083 ln = n; /* why back off? */
1aeab75a
GS
3084 /* ...because $ and \Z can match before *and* after
3085 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3086 We should back off by one in this case. */
3087 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3088 ln--;
3089 }
c277df42
IZ
3090 REGCP_SET;
3091 if (paren) {
3092 while (n >= ln) {
3093 /* If it could work, try it. */
3094 if (c1 == -1000 ||
3280af22
NIS
3095 UCHARAT(PL_reginput) == c1 ||
3096 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3097 {
3098 if (paren && n) {
3099 if (n) {
cf93c79d
IZ
3100 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3101 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3102 }
3103 else
cf93c79d 3104 PL_regendp[paren] = -1;
c277df42
IZ
3105 }
3106 if (regmatch(next))
3107 sayYES;
3108 REGCP_UNWIND;
3109 }
3110 /* Couldn't or didn't -- back up. */
3111 n--;
dfe13c55 3112 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3113 }
a0ed51b3
LW
3114 }
3115 else {
c277df42
IZ
3116 while (n >= ln) {
3117 /* If it could work, try it. */
3118 if (c1 == -1000 ||
3280af22
NIS
3119 UCHARAT(PL_reginput) == c1 ||
3120 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3121 {
3122 if (regmatch(next))
3123 sayYES;
3124 REGCP_UNWIND;
3125 }
3126 /* Couldn't or didn't -- back up. */
3127 n--;
dfe13c55 3128 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3129 }
a0d0e21e
LW
3130 }
3131 }
4633a7c4 3132 sayNO;
c277df42 3133 break;
a0d0e21e 3134 case END:
0f5d15d6
IZ
3135 if (PL_reg_call_cc) {
3136 re_cc_state *cur_call_cc = PL_reg_call_cc;
3137 CURCUR *cctmp = PL_regcc;
3138 regexp *re = PL_reg_re;
3139 CHECKPOINT cp, lastcp;
3140
3141 cp = regcppush(0); /* Save *all* the positions. */
3142 REGCP_SET;
3143 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3144 the caller. */
3145 PL_reginput = locinput; /* Make position available to
3146 the callcc. */
3147 cache_re(PL_reg_call_cc->re);
3148 PL_regcc = PL_reg_call_cc->cc;
3149 PL_reg_call_cc = PL_reg_call_cc->prev;
3150 if (regmatch(cur_call_cc->node)) {
3151 PL_reg_call_cc = cur_call_cc;
3152 regcpblow(cp);
3153 sayYES;
3154 }
3155 REGCP_UNWIND;
3156 regcppop();
3157 PL_reg_call_cc = cur_call_cc;
3158 PL_regcc = cctmp;
3159 PL_reg_re = re;
3160 cache_re(re);
3161
3162 DEBUG_r(
3163 PerlIO_printf(Perl_debug_log,
3164 "%*s continuation failed...\n",
3165 REPORT_CODE_OFF+PL_regindent*2, "")
3166 );
7821416a 3167 sayNO_SILENT;
0f5d15d6 3168 }
7821416a
IZ
3169 if (locinput < PL_regtill) {
3170 DEBUG_r(PerlIO_printf(Perl_debug_log,
3171 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3172 PL_colors[4],
3173 (long)(locinput - PL_reg_starttry),
3174 (long)(PL_regtill - PL_reg_starttry),
3175 PL_colors[5]));
3176 sayNO_FINAL; /* Cannot match: too short. */
3177 }
3178 PL_reginput = locinput; /* put where regtry can find it */
3179 sayYES_FINAL; /* Success! */
7e5428c5 3180 case SUCCEED:
3280af22 3181 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3182 sayYES_LOUD; /* Success! */
c277df42
IZ
3183 case SUSPEND:
3184 n = 1;
9fe1d20c 3185 PL_reginput = locinput;
c277df42 3186 goto do_ifmatch;
a0d0e21e 3187 case UNLESSM:
c277df42 3188 n = 0;
a0ed51b3 3189 if (scan->flags) {
0fe9bf95
IZ
3190 if (UTF) { /* XXXX This is absolutely
3191 broken, we read before
3192 start of string. */
3193 s = HOPMAYBEc(locinput, -scan->flags);
3194 if (!s)
3195 goto say_yes;
3196 PL_reginput = s;
3197 }
3198 else {
3199 if (locinput < PL_bostr + scan->flags)
3200 goto say_yes;
3201 PL_reginput = locinput - scan->flags;
3202 goto do_ifmatch;
3203 }
a0ed51b3
LW
3204 }
3205 else
3206 PL_reginput = locinput;
c277df42
IZ
3207 goto do_ifmatch;
3208 case IFMATCH:
3209 n = 1;
a0ed51b3 3210 if (scan->flags) {
0fe9bf95
IZ
3211 if (UTF) { /* XXXX This is absolutely
3212 broken, we read before
3213 start of string. */
3214 s = HOPMAYBEc(locinput, -scan->flags);
3215 if (!s || s < PL_bostr)
3216 goto say_no;
3217 PL_reginput = s;
3218 }
3219 else {
3220 if (locinput < PL_bostr + scan->flags)
3221 goto say_no;
3222 PL_reginput = locinput - scan->flags;
3223 goto do_ifmatch;
3224 }
a0ed51b3
LW
3225 }
3226 else
3227 PL_reginput = locinput;
3228
c277df42 3229 do_ifmatch:
c277df42
IZ
3230 inner = NEXTOPER(NEXTOPER(scan));
3231 if (regmatch(inner) != n) {
3232 say_no:
3233 if (logical) {
3234 logical = 0;
3235 sw = 0;
3236 goto do_longjump;
a0ed51b3
LW
3237 }
3238 else
c277df42
IZ
3239 sayNO;
3240 }
3241 say_yes:
3242 if (logical) {
3243 logical = 0;
3244 sw = 1;
3245 }
fe44a5e8 3246 if (OP(scan) == SUSPEND) {
3280af22 3247 locinput = PL_reginput;
565764a8 3248 nextchr = UCHARAT(locinput);
fe44a5e8 3249 }
c277df42
IZ
3250 /* FALL THROUGH. */
3251 case LONGJMP:
3252 do_longjump:
3253 next = scan + ARG(scan);
3254 if (next == scan)
3255 next = NULL;
a0d0e21e
LW
3256 break;
3257 default:
b900a521 3258 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3259 PTR2UV(scan), OP(scan));
cea2e8a9 3260 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3261 }
a0d0e21e
LW
3262 scan = next;
3263 }
a687059c 3264
a0d0e21e
LW
3265 /*
3266 * We get here only if there's trouble -- normally "case END" is
3267 * the terminating point.
3268 */
cea2e8a9 3269 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3270 /*NOTREACHED*/
4633a7c4
LW
3271 sayNO;
3272
7821416a
IZ
3273yes_loud:
3274 DEBUG_r(
3275 PerlIO_printf(Perl_debug_log,
3276 "%*s %scould match...%s\n",
3277 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3278 );
3279 goto yes;
3280yes_final:
3281 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3282 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3283yes:
3284#ifdef DEBUGGING
3280af22 3285 PL_regindent--;
4633a7c4
LW
3286#endif
3287 return 1;
3288
3289no:
7821416a
IZ
3290 DEBUG_r(
3291 PerlIO_printf(Perl_debug_log,
3292 "%*s %sfailed...%s\n",
3293 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3294 );
3295 goto do_no;
3296no_final:
3297do_no:
4633a7c4 3298#ifdef DEBUGGING
3280af22 3299 PL_regindent--;
4633a7c4 3300#endif
a0d0e21e 3301 return 0;
a687059c
LW
3302}
3303
3304/*
3305 - regrepeat - repeatedly match something simple, report how many
3306 */
3307/*
3308 * [This routine now assumes that it will only match on things of length 1.
3309 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3310 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3311 */
76e3520e 3312STATIC I32
cea2e8a9 3313S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3314{
5c0ca799 3315 dTHR;
a0d0e21e 3316 register char *scan;
a0d0e21e 3317 register I32 c;
3280af22 3318 register char *loceol = PL_regeol;
a0ed51b3 3319 register I32 hardcount = 0;
a0d0e21e 3320
3280af22 3321 scan = PL_reginput;
c277df42 3322 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3323 loceol = scan + max;
a0d0e21e 3324 switch (OP(p)) {
22c35a8c 3325 case REG_ANY:
a0d0e21e
LW
3326 while (scan < loceol && *scan != '\n')
3327 scan++;
3328 break;
3329 case SANY:
3330 scan = loceol;
3331 break;
a0ed51b3
LW
3332 case ANYUTF8:
3333 loceol = PL_regeol;
3334 while (scan < loceol && *scan != '\n') {
3335 scan += UTF8SKIP(scan);
3336 hardcount++;
3337 }
3338 break;
3339 case SANYUTF8:
3340 loceol = PL_regeol;
3341 while (scan < loceol) {
3342 scan += UTF8SKIP(scan);
3343 hardcount++;
3344 }
3345 break;
bbce6d69 3346 case EXACT: /* length of string is 1 */
cd439c50 3347 c = (U8)*STRING(p);
bbce6d69 3348 while (scan < loceol && UCHARAT(scan) == c)
3349 scan++;
3350 break;
3351 case EXACTF: /* length of string is 1 */
cd439c50 3352 c = (U8)*STRING(p);
bbce6d69 3353 while (scan < loceol &&
22c35a8c 3354 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3355 scan++;
3356 break;
3357 case EXACTFL: /* length of string is 1 */
3280af22 3358 PL_reg_flags |= RF_tainted;
cd439c50 3359 c = (U8)*STRING(p);
bbce6d69 3360 while (scan < loceol &&
22c35a8c 3361 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3362 scan++;
3363 break;
a0ed51b3
LW
3364 case ANYOFUTF8:
3365 loceol = PL_regeol;
3366 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3367 scan += UTF8SKIP(scan);
3368 hardcount++;
3369 }
3370 break;
a0d0e21e 3371 case ANYOF:
936ed897 3372 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3373 scan++;
a0d0e21e
LW
3374 break;
3375 case ALNUM:
3376 while (scan < loceol && isALNUM(*scan))
3377 scan++;
3378 break;
a0ed51b3
LW
3379 case ALNUMUTF8:
3380 loceol = PL_regeol;
dfe13c55 3381 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3382 scan += UTF8SKIP(scan);
3383 hardcount++;
3384 }
3385 break;
bbce6d69 3386 case ALNUML:
3280af22 3387 PL_reg_flags |= RF_tainted;
bbce6d69 3388 while (scan < loceol && isALNUM_LC(*scan))
3389 scan++;
3390 break;
a0ed51b3
LW
3391 case ALNUMLUTF8:
3392 PL_reg_flags |= RF_tainted;
3393 loceol = PL_regeol;
dfe13c55 3394 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3395 scan += UTF8SKIP(scan);
3396 hardcount++;
3397 }
3398 break;
3399 break;
a0d0e21e
LW
3400 case NALNUM:
3401 while (scan < loceol && !isALNUM(*scan))
3402 scan++;
3403 break;
a0ed51b3
LW
3404 case NALNUMUTF8:
3405 loceol = PL_regeol;
dfe13c55 3406 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3407 scan += UTF8SKIP(scan);
3408 hardcount++;
3409 }
3410 break;
bbce6d69 3411 case NALNUML:
3280af22 3412 PL_reg_flags |= RF_tainted;
bbce6d69 3413 while (scan < loceol && !isALNUM_LC(*scan))
3414 scan++;
3415 break;
a0ed51b3
LW
3416 case NALNUMLUTF8:
3417 PL_reg_flags |= RF_tainted;
3418 loceol = PL_regeol;
dfe13c55 3419 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3420 scan += UTF8SKIP(scan);
3421 hardcount++;
3422 }
3423 break;
a0d0e21e
LW
3424 case SPACE:
3425 while (scan < loceol && isSPACE(*scan))
3426 scan++;
3427 break;
a0ed51b3
LW
3428 case SPACEUTF8:
3429 loceol = PL_regeol;
dfe13c55 3430 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3431 scan += UTF8SKIP(scan);
3432 hardcount++;
3433 }
3434 break;
bbce6d69 3435 case SPACEL:
3280af22 3436 PL_reg_flags |= RF_tainted;
bbce6d69 3437 while (scan < loceol && isSPACE_LC(*scan))
3438 scan++;
3439 break;
a0ed51b3
LW
3440 case SPACELUTF8:
3441 PL_reg_flags |= RF_tainted;
3442 loceol = PL_regeol;
dfe13c55 3443 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3444 scan += UTF8SKIP(scan);
3445 hardcount++;
3446 }
3447 break;
a0d0e21e
LW
3448 case NSPACE:
3449 while (scan < loceol && !isSPACE(*scan))
3450 scan++;
3451 break;
a0ed51b3
LW
3452 case NSPACEUTF8:
3453 loceol = PL_regeol;
dfe13c55 3454 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3455 scan += UTF8SKIP(scan);
3456 hardcount++;
3457 }
3458 break;
bbce6d69 3459 case NSPACEL:
3280af22 3460 PL_reg_flags |= RF_tainted;
bbce6d69 3461 while (scan < loceol && !isSPACE_LC(*scan))
3462 scan++;
3463 break;
a0ed51b3
LW
3464 case NSPACELUTF8:
3465 PL_reg_flags |= RF_tainted;
3466 loceol = PL_regeol;
dfe13c55 3467 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3468 scan += UTF8SKIP(scan);
3469 hardcount++;
3470 }
3471 break;
a0d0e21e
LW
3472 case DIGIT:
3473 while (scan < loceol && isDIGIT(*scan))
3474 scan++;
3475 break;
a0ed51b3
LW
3476 case DIGITUTF8:
3477 loceol = PL_regeol;
dfe13c55 3478 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3479 scan += UTF8SKIP(scan);
3480 hardcount++;
3481 }
3482 break;
3483 break;
a0d0e21e
LW
3484 case NDIGIT:
3485 while (scan < loceol && !isDIGIT(*scan))
3486 scan++;
3487 break;
a0ed51b3
LW
3488 case NDIGITUTF8:
3489 loceol = PL_regeol;
dfe13c55 3490 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3491 scan += UTF8SKIP(scan);
3492 hardcount++;
3493 }
3494 break;
a0d0e21e
LW
3495 default: /* Called on something of 0 width. */
3496 break; /* So match right here or not at all. */
3497 }
a687059c 3498
a0ed51b3
LW
3499 if (hardcount)
3500 c = hardcount;
3501 else
3502 c = scan - PL_reginput;
3280af22 3503 PL_reginput = scan;
a687059c 3504
c277df42
IZ
3505 DEBUG_r(
3506 {
3507 SV *prop = sv_newmortal();
3508
3509 regprop(prop, p);
3510 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3511 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3512 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3513 });
3514
a0d0e21e 3515 return(c);
a687059c
LW
3516}
3517
3518/*
c277df42
IZ
3519 - regrepeat_hard - repeatedly match something, report total lenth and length
3520 *
3521 * The repeater is supposed to have constant length.
3522 */
3523
76e3520e 3524STATIC I32
cea2e8a9 3525S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3526{
5c0ca799 3527 dTHR;
c277df42
IZ
3528 register char *scan;
3529 register char *start;
3280af22 3530 register char *loceol = PL_regeol;
a0ed51b3 3531 I32 l = 0;
708e3b05 3532 I32 count = 0, res = 1;
a0ed51b3
LW
3533
3534 if (!max)
3535 return 0;
c277df42 3536
3280af22 3537 start = PL_reginput;
a0ed51b3 3538 if (UTF) {
708e3b05 3539 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3540 if (!count++) {
3541 l = 0;
3542 while (start < PL_reginput) {
3543 l++;
3544 start += UTF8SKIP(start);
3545 }
3546 *lp = l;
3547 if (l == 0)
3548 return max;
3549 }
3550 if (count == max)
3551 return count;
3552 }
3553 }
3554 else {
708e3b05 3555 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3556 if (!count++) {
3557 *lp = l = PL_reginput - start;
3558 if (max != REG_INFTY && l*max < loceol - scan)
3559 loceol = scan + l*max;
3560 if (l == 0)
3561 return max;
c277df42
IZ
3562 }
3563 }
3564 }
708e3b05 3565 if (!res)
3280af22 3566 PL_reginput = scan;
c277df42 3567
a0ed51b3 3568 return count;
c277df42
IZ
3569}
3570
3571/*
cb8d8820 3572 - reginclass - determine if a character falls into a character class
bbce6d69 3573 */
3574
76e3520e 3575STATIC bool
936ed897 3576S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3577{
5c0ca799 3578 dTHR;
b8c5462f 3579 char flags = ANYOF_FLAGS(p);
bbce6d69 3580 bool match = FALSE;
3581
3582 c &= 0xFF;
b8c5462f 3583 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3584 match = TRUE;
3585 else if (flags & ANYOF_FOLD) {
3586 I32 cf;
3587 if (flags & ANYOF_LOCALE) {
3280af22 3588 PL_reg_flags |= RF_tainted;
22c35a8c 3589 cf = PL_fold_locale[c];
bbce6d69 3590 }
3591 else
22c35a8c 3592 cf = PL_fold[c];
b8c5462f 3593 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3594 match = TRUE;
3595 }
3596
b8c5462f 3597 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3598 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3599 if (
3600 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3601 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3602 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3603 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3604 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3605 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3606 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3607 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3608 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3620 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3621 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3622 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3623 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3624 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3625 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3626 ) /* How's that for a conditional? */
bbce6d69 3627 {
3628 match = TRUE;
3629 }
3630 }
3631
ae5c130c 3632 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3633}
3634
a0ed51b3 3635STATIC bool
cea2e8a9 3636S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3637{
3638 dTHR;
a0ed51b3
LW
3639 char flags = ARG1(f);
3640 bool match = FALSE;
3641 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3642
3643 if (swash_fetch(sv, p))
3644 match = TRUE;
3645 else if (flags & ANYOF_FOLD) {
806e7201 3646 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3647 if (flags & ANYOF_LOCALE) {
3648 PL_reg_flags |= RF_tainted;
3649 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3650 }
3651 else
3652 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3653 if (swash_fetch(sv, tmpbuf))
3654 match = TRUE;
3655 }
3656
b8c5462f 3657 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3658
3659 return (flags & ANYOF_INVERT) ? !match : match;
3660}
161b471a 3661
dfe13c55 3662STATIC U8 *
cea2e8a9 3663S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3664{
3665 dTHR;
a0ed51b3
LW
3666 if (off >= 0) {
3667 while (off-- && s < (U8*)PL_regeol)
3668 s += UTF8SKIP(s);
3669 }
3670 else {
3671 while (off++) {
3672 if (s > (U8*)PL_bostr) {
3673 s--;
3674 if (*s & 0x80) {
3675 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3676 s--;
3677 } /* XXX could check well-formedness here */
3678 }
3679 }
3680 }
3681 return s;
3682}
161b471a 3683
dfe13c55 3684STATIC U8 *
cea2e8a9 3685S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3686{
c485e607 3687 dTHR;
a0ed51b3
LW
3688 if (off >= 0) {
3689 while (off-- && s < (U8*)PL_regeol)
3690 s += UTF8SKIP(s);
3691 if (off >= 0)
3692 return 0;
3693 }
3694 else {
3695 while (off++) {
3696 if (s > (U8*)PL_bostr) {
3697 s--;
3698 if (*s & 0x80) {
3699 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3700 s--;
3701 } /* XXX could check well-formedness here */
3702 }
3703 else
3704 break;
3705 }
3706 if (off <= 0)
3707 return 0;
3708 }
3709 return s;
3710}
51371543
GS
3711
3712#ifdef PERL_OBJECT
51371543
GS
3713#include "XSUB.h"
3714#endif
3715
3716static void
3717restore_pos(pTHXo_ void *arg)
3718{
3719 dTHR;
3720 if (PL_reg_eval_set) {
3721 if (PL_reg_oldsaved) {
3722 PL_reg_re->subbeg = PL_reg_oldsaved;
3723 PL_reg_re->sublen = PL_reg_oldsavedlen;
3724 RX_MATCH_COPIED_on(PL_reg_re);
3725 }
3726 PL_reg_magic->mg_len = PL_reg_oldpos;
3727 PL_reg_eval_set = 0;
3728 PL_curpm = PL_reg_oldcurpm;
3729 }
3730}
3731