This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
The sv_catsv() fix, take two.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
56953603
IZ
45#endif
46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
3818b22b 70 **** Copyright (c) 1991-2000, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3
LW
109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
110#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
114#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
51371543
GS
119static void restore_pos(pTHXo_ void *arg);
120
121
76e3520e 122STATIC CHECKPOINT
cea2e8a9 123S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 124{
3280af22
NIS
125 int retval = PL_savestack_ix;
126 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
127 int p;
128
129 SSCHECK(i + 5);
3280af22 130 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
131 SSPUSHINT(PL_regendp[p]);
132 SSPUSHINT(PL_regstartp[p]);
3280af22 133 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
134 SSPUSHINT(p);
135 }
3280af22
NIS
136 SSPUSHINT(PL_regsize);
137 SSPUSHINT(*PL_reglastparen);
138 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
139 SSPUSHINT(i + 3);
140 SSPUSHINT(SAVEt_REGCONTEXT);
141 return retval;
142}
143
c277df42 144/* These are needed since we do not localize EVAL nodes: */
02db2b7b 145# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 146 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 147 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 148
02db2b7b 149# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 150 PerlIO_printf(Perl_debug_log, \
faccc32b 151 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 152 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 153
76e3520e 154STATIC char *
cea2e8a9 155S_regcppop(pTHX)
a0d0e21e
LW
156{
157 I32 i = SSPOPINT;
158 U32 paren = 0;
159 char *input;
cf93c79d 160 I32 tmps;
a0d0e21e
LW
161 assert(i == SAVEt_REGCONTEXT);
162 i = SSPOPINT;
163 input = (char *) SSPOPPTR;
3280af22
NIS
164 *PL_reglastparen = SSPOPINT;
165 PL_regsize = SSPOPINT;
c277df42 166 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 167 paren = (U32)SSPOPINT;
3280af22 168 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
169 PL_regstartp[paren] = SSPOPINT;
170 tmps = SSPOPINT;
3280af22
NIS
171 if (paren <= *PL_reglastparen)
172 PL_regendp[paren] = tmps;
c277df42 173 DEBUG_r(
c3464db5 174 PerlIO_printf(Perl_debug_log,
b900a521
JH
175 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
176 (UV)paren, (IV)PL_regstartp[paren],
177 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
178 (IV)PL_regendp[paren],
3280af22 179 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 180 );
a0d0e21e 181 }
c277df42 182 DEBUG_r(
3280af22 183 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 184 PerlIO_printf(Perl_debug_log,
faccc32b
JH
185 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
186 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
187 }
188 );
daf18116 189#if 1
dafc8851
JH
190 /* It would seem that the similar code in regtry()
191 * already takes care of this, and in fact it is in
192 * a better location to since this code can #if 0-ed out
193 * but the code in regtry() is needed or otherwise tests
194 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
195 * (as of patchlevel 7877) will fail. Then again,
196 * this code seems to be necessary or otherwise
197 * building DynaLoader will fail:
198 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
199 * --jhi */
3280af22
NIS
200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
cf93c79d
IZ
202 PL_regstartp[paren] = -1;
203 PL_regendp[paren] = -1;
a0d0e21e 204 }
dafc8851 205#endif
a0d0e21e
LW
206 return input;
207}
208
0f5d15d6 209STATIC char *
cea2e8a9 210S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
211{
212 I32 tmp = PL_savestack_ix;
213
214 PL_savestack_ix = ss;
215 regcppop();
216 PL_savestack_ix = tmp;
942e002e 217 return Nullch;
0f5d15d6
IZ
218}
219
220typedef struct re_cc_state
221{
222 I32 ss;
223 regnode *node;
224 struct re_cc_state *prev;
225 CURCUR *cc;
226 regexp *re;
227} re_cc_state;
228
02db2b7b 229#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 230
29d1e993
HS
231#define TRYPAREN(paren, n, input) { \
232 if (paren) { \
233 if (n) { \
234 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
235 PL_regendp[paren] = input - PL_bostr; \
236 } \
237 else \
238 PL_regendp[paren] = -1; \
239 } \
240 if (regmatch(next)) \
241 sayYES; \
242 if (paren && n) \
243 PL_regendp[paren] = -1; \
244}
245
246
a687059c 247/*
e50aee73 248 * pregexec and friends
a687059c
LW
249 */
250
251/*
c277df42 252 - pregexec - match a regexp against a string
a687059c 253 */
c277df42 254I32
864dbfa3 255Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 256 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
257/* strend: pointer to null at end of string */
258/* strbeg: real beginning of string */
259/* minend: end of match must be >=minend after stringarg. */
260/* nosave: For optimizations. */
261{
262 return
263 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
264 nosave ? 0 : REXEC_COPY_STR);
265}
0f5d15d6
IZ
266
267STATIC void
cea2e8a9 268S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
269{
270 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
271#ifdef DEBUGGING
272 PL_regprogram = prog->program;
273#endif
274 PL_regnpar = prog->nparens;
275 PL_regdata = prog->data;
276 PL_reg_re = prog;
277}
22e551b9 278
cad2e5aa
JH
279/*
280 * Need to implement the following flags for reg_anch:
281 *
282 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
283 * USE_INTUIT_ML
284 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
285 * INTUIT_AUTORITATIVE_ML
286 * INTUIT_ONCE_NOML - Intuit can match in one location only.
287 * INTUIT_ONCE_ML
288 *
289 * Another flag for this function: SECOND_TIME (so that float substrs
290 * with giant delta may be not rechecked).
291 */
292
293/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
294
2c2d71f5 295/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
296 Otherwise, only SvCUR(sv) is used to get strbeg. */
297
298/* XXXX We assume that strpos is strbeg unless sv. */
299
6eb5f6b9
JH
300/* XXXX Some places assume that there is a fixed substring.
301 An update may be needed if optimizer marks as "INTUITable"
302 RExen without fixed substrings. Similarly, it is assumed that
303 lengths of all the strings are no more than minlen, thus they
304 cannot come from lookahead.
305 (Or minlen should take into account lookahead.) */
306
2c2d71f5
JH
307/* A failure to find a constant substring means that there is no need to make
308 an expensive call to REx engine, thus we celebrate a failure. Similarly,
309 finding a substring too deep into the string means that less calls to
30944b6d
IZ
310 regtry() should be needed.
311
312 REx compiler's optimizer found 4 possible hints:
313 a) Anchored substring;
314 b) Fixed substring;
315 c) Whether we are anchored (beginning-of-line or \G);
316 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 317 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
318 string which does not contradict any of them.
319 */
2c2d71f5 320
6eb5f6b9
JH
321/* Most of decisions we do here should have been done at compile time.
322 The nodes of the REx which we used for the search should have been
323 deleted from the finite automaton. */
324
cad2e5aa
JH
325char *
326Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
327 char *strend, U32 flags, re_scream_pos_data *data)
328{
2c2d71f5 329 register I32 start_shift;
cad2e5aa 330 /* Should be nonnegative! */
2c2d71f5
JH
331 register I32 end_shift;
332 register char *s;
333 register SV *check;
a1933d95 334 char *strbeg;
cad2e5aa
JH
335 char *t;
336 I32 ml_anch;
2c2d71f5 337 char *tmp;
6eb5f6b9
JH
338 register char *other_last = Nullch; /* other substr checked before this */
339 char *check_at; /* check substr found at this pos */
30944b6d
IZ
340#ifdef DEBUGGING
341 char *i_strpos = strpos;
342#endif
cad2e5aa
JH
343
344 DEBUG_r( if (!PL_colorset) reginitcolors() );
345 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 346 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
347 PL_colors[4],PL_colors[5],PL_colors[0],
348 prog->precomp,
349 PL_colors[1],
350 (strlen(prog->precomp) > 60 ? "..." : ""),
351 PL_colors[0],
b900a521 352 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
353 strpos, PL_colors[1],
354 (strend - strpos > 60 ? "..." : ""))
355 );
356
2c2d71f5
JH
357 if (prog->minlen > strend - strpos) {
358 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 359 goto fail;
2c2d71f5 360 }
a1933d95 361 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 362 check = prog->check_substr;
2c2d71f5 363 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
364 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
365 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 366 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 367
5675c1a6
IZ
368 if (!ml_anch) {
369 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
370 /* SvCUR is not set on references: SvRV and SvPVX overlap */
371 && sv && !SvROK(sv)
a1933d95 372 && (strpos != strbeg)) {
5675c1a6
IZ
373 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
374 goto fail;
375 }
376 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 377 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
378 I32 slen;
379
adac82c7 380 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 381 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
382 if (SvTAIL(check)) {
383 slen = SvCUR(check); /* >= 1 */
cad2e5aa 384
2c2d71f5
JH
385 if ( strend - s > slen || strend - s < slen - 1
386 || (strend - s == slen && strend[-1] != '\n')) {
387 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
388 goto fail_finish;
cad2e5aa
JH
389 }
390 /* Now should match s[0..slen-2] */
391 slen--;
653099ff 392 if (slen && (*SvPVX(check) != *s
cad2e5aa 393 || (slen > 1
653099ff 394 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
395 report_neq:
396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
397 goto fail_finish;
398 }
cad2e5aa 399 }
653099ff
GS
400 else if (*SvPVX(check) != *s
401 || ((slen = SvCUR(check)) > 1
402 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
403 goto report_neq;
404 goto success_at_start;
5675c1a6 405 }
cad2e5aa 406 }
2c2d71f5 407 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 408 s = strpos;
2c2d71f5 409 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 410 end_shift = prog->minlen - start_shift -
653099ff 411 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 412 if (!ml_anch) {
653099ff
GS
413 I32 end = prog->check_offset_max + CHR_SVLEN(check)
414 - (SvTAIL(check) != 0);
2c2d71f5
JH
415 I32 eshift = strend - s - end;
416
417 if (end_shift < eshift)
418 end_shift = eshift;
419 }
cad2e5aa 420 }
2c2d71f5 421 else { /* Can match at random position */
cad2e5aa
JH
422 ml_anch = 0;
423 s = strpos;
2c2d71f5
JH
424 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
425 /* Should be nonnegative! */
426 end_shift = prog->minlen - start_shift -
653099ff 427 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
428 }
429
2c2d71f5 430#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 431 if (end_shift < 0)
6bbae5e6 432 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
433#endif
434
2c2d71f5
JH
435 restart:
436 /* Find a possible match in the region s..strend by looking for
437 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 438 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
439 I32 p = -1; /* Internal iterator of scream. */
440 I32 *pp = data ? data->scream_pos : &p;
441
2c2d71f5
JH
442 if (PL_screamfirst[BmRARE(check)] >= 0
443 || ( BmRARE(check) == '\n'
444 && (BmPREVIOUS(check) == SvCUR(check) - 1)
445 && SvTAIL(check) ))
446 s = screaminstr(sv, check,
447 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 448 else
2c2d71f5 449 goto fail_finish;
cad2e5aa
JH
450 if (data)
451 *data->scream_olds = s;
452 }
453 else
454 s = fbm_instr((unsigned char*)s + start_shift,
455 (unsigned char*)strend - end_shift,
2c2d71f5 456 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
457
458 /* Update the count-of-usability, remove useless subpatterns,
459 unshift s. */
2c2d71f5
JH
460
461 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
462 (s ? "Found" : "Did not find"),
463 ((check == prog->anchored_substr) ? "anchored" : "floating"),
464 PL_colors[0],
7b0972df
JH
465 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
466 SvPVX(check),
2c2d71f5
JH
467 PL_colors[1], (SvTAIL(check) ? "$" : ""),
468 (s ? " at offset " : "...\n") ) );
469
470 if (!s)
471 goto fail_finish;
472
6eb5f6b9
JH
473 check_at = s;
474
2c2d71f5 475 /* Finish the diagnostic message */
30944b6d 476 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
477
478 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
479 Start with the other substr.
480 XXXX no SCREAM optimization yet - and a very coarse implementation
481 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
482 *always* match. Probably should be marked during compile...
483 Probably it is right to do no SCREAM here...
484 */
485
486 if (prog->float_substr && prog->anchored_substr) {
30944b6d 487 /* Take into account the "other" substring. */
2c2d71f5
JH
488 /* XXXX May be hopelessly wrong for UTF... */
489 if (!other_last)
6eb5f6b9 490 other_last = strpos;
2c2d71f5 491 if (check == prog->float_substr) {
30944b6d
IZ
492 do_other_anchored:
493 {
2c2d71f5
JH
494 char *last = s - start_shift, *last1, *last2;
495 char *s1 = s;
496
497 tmp = PL_bostr;
498 t = s - prog->check_offset_max;
499 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
500 && (!(prog->reganch & ROPT_UTF8)
501 || (PL_bostr = strpos, /* Used in regcopmaybe() */
502 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
503 && t > strpos)))
30944b6d 504 /* EMPTY */;
2c2d71f5
JH
505 else
506 t = strpos;
507 t += prog->anchored_offset;
6eb5f6b9
JH
508 if (t < other_last) /* These positions already checked */
509 t = other_last;
2c2d71f5
JH
510 PL_bostr = tmp;
511 last2 = last1 = strend - prog->minlen;
512 if (last < last1)
513 last1 = last;
514 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
515 /* On end-of-str: see comment below. */
516 s = fbm_instr((unsigned char*)t,
517 (unsigned char*)last1 + prog->anchored_offset
518 + SvCUR(prog->anchored_substr)
519 - (SvTAIL(prog->anchored_substr)!=0),
520 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
521 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
522 (s ? "Found" : "Contradicts"),
523 PL_colors[0],
7b0972df
JH
524 (int)(SvCUR(prog->anchored_substr)
525 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
526 SvPVX(prog->anchored_substr),
527 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
528 if (!s) {
529 if (last1 >= last2) {
530 DEBUG_r(PerlIO_printf(Perl_debug_log,
531 ", giving up...\n"));
532 goto fail_finish;
533 }
534 DEBUG_r(PerlIO_printf(Perl_debug_log,
535 ", trying floating at offset %ld...\n",
30944b6d 536 (long)(s1 + 1 - i_strpos)));
2c2d71f5 537 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 538 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
539 s = HOPc(last, 1);
540 goto restart;
541 }
542 else {
543 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 544 (long)(s - i_strpos)));
2c2d71f5 545 t = s - prog->anchored_offset;
6eb5f6b9 546 other_last = s + 1;
30944b6d 547 s = s1;
2c2d71f5
JH
548 if (t == strpos)
549 goto try_at_start;
2c2d71f5
JH
550 goto try_at_offset;
551 }
30944b6d 552 }
2c2d71f5
JH
553 }
554 else { /* Take into account the floating substring. */
555 char *last, *last1;
556 char *s1 = s;
557
558 t = s - start_shift;
559 last1 = last = strend - prog->minlen + prog->float_min_offset;
560 if (last - t > prog->float_max_offset)
561 last = t + prog->float_max_offset;
562 s = t + prog->float_min_offset;
6eb5f6b9
JH
563 if (s < other_last)
564 s = other_last;
2c2d71f5
JH
565 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
566 /* fbm_instr() takes into account exact value of end-of-str
567 if the check is SvTAIL(ed). Since false positives are OK,
568 and end-of-str is not later than strend we are OK. */
569 s = fbm_instr((unsigned char*)s,
570 (unsigned char*)last + SvCUR(prog->float_substr)
571 - (SvTAIL(prog->float_substr)!=0),
572 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
573 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
574 (s ? "Found" : "Contradicts"),
575 PL_colors[0],
7b0972df
JH
576 (int)(SvCUR(prog->float_substr)
577 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
578 SvPVX(prog->float_substr),
579 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
580 if (!s) {
581 if (last1 == last) {
582 DEBUG_r(PerlIO_printf(Perl_debug_log,
583 ", giving up...\n"));
584 goto fail_finish;
585 }
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 ", trying anchored starting at offset %ld...\n",
30944b6d 588 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 589 other_last = last + 1;
2c2d71f5
JH
590 PL_regeol = strend; /* Used in HOP() */
591 s = HOPc(t, 1);
592 goto restart;
593 }
594 else {
595 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 596 (long)(s - i_strpos)));
6eb5f6b9 597 other_last = s + 1;
30944b6d 598 s = s1;
2c2d71f5
JH
599 if (t == strpos)
600 goto try_at_start;
2c2d71f5
JH
601 goto try_at_offset;
602 }
603 }
cad2e5aa 604 }
2c2d71f5
JH
605
606 t = s - prog->check_offset_max;
607 tmp = PL_bostr;
608 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
609 && (!(prog->reganch & ROPT_UTF8)
610 || (PL_bostr = strpos, /* Used in regcopmaybe() */
611 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
612 && t > strpos)))) {
613 PL_bostr = tmp;
614 /* Fixed substring is found far enough so that the match
615 cannot start at strpos. */
616 try_at_offset:
cad2e5aa 617 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
618 /* Eventually fbm_*() should handle this, but often
619 anchored_offset is not 0, so this check will not be wasted. */
620 /* XXXX In the code below we prefer to look for "^" even in
621 presence of anchored substrings. And we search even
622 beyond the found float position. These pessimizations
623 are historical artefacts only. */
624 find_anchor:
2c2d71f5 625 while (t < strend - prog->minlen) {
cad2e5aa 626 if (*t == '\n') {
4ee3650e 627 if (t < check_at - prog->check_offset_min) {
30944b6d 628 if (prog->anchored_substr) {
4ee3650e
GS
629 /* Since we moved from the found position,
630 we definitely contradict the found anchored
30944b6d
IZ
631 substr. Due to the above check we do not
632 contradict "check" substr.
633 Thus we can arrive here only if check substr
634 is float. Redo checking for "other"=="fixed".
635 */
636 strpos = t + 1;
637 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
638 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
639 goto do_other_anchored;
640 }
4ee3650e
GS
641 /* We don't contradict the found floating substring. */
642 /* XXXX Why not check for STCLASS? */
cad2e5aa 643 s = t + 1;
2c2d71f5 644 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 645 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
646 goto set_useful;
647 }
4ee3650e
GS
648 /* Position contradicts check-string */
649 /* XXXX probably better to look for check-string
650 than for "\n", so one should lower the limit for t? */
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 652 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 653 other_last = strpos = s = t + 1;
cad2e5aa
JH
654 goto restart;
655 }
656 t++;
657 }
2c2d71f5
JH
658 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
659 PL_colors[0],PL_colors[1]));
660 goto fail_finish;
cad2e5aa 661 }
f5952150
GS
662 else {
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
664 PL_colors[0],PL_colors[1]));
665 }
cad2e5aa
JH
666 s = t;
667 set_useful:
2c2d71f5 668 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
669 }
670 else {
2c2d71f5 671 PL_bostr = tmp;
f5952150 672 /* The found string does not prohibit matching at strpos,
2c2d71f5 673 - no optimization of calling REx engine can be performed,
f5952150
GS
674 unless it was an MBOL and we are not after MBOL,
675 or a future STCLASS check will fail this. */
2c2d71f5
JH
676 try_at_start:
677 /* Even in this situation we may use MBOL flag if strpos is offset
678 wrt the start of the string. */
05b4157f 679 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 680 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
681 /* May be due to an implicit anchor of m{.*foo} */
682 && !(prog->reganch & ROPT_IMPLICIT))
683 {
cad2e5aa
JH
684 t = strpos;
685 goto find_anchor;
686 }
30944b6d 687 DEBUG_r( if (ml_anch)
f5952150
GS
688 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
689 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 690 );
2c2d71f5 691 success_at_start:
30944b6d 692 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 693 && prog->check_substr /* Could be deleted already */
cad2e5aa 694 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
695 && prog->check_substr == prog->float_substr)
696 {
cad2e5aa 697 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 698 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
699 SvREFCNT_dec(prog->check_substr);
700 prog->check_substr = Nullsv; /* disable */
701 prog->float_substr = Nullsv; /* clear */
5e39e1e5 702 check = Nullsv; /* abort */
cad2e5aa 703 s = strpos;
3cf5c195
IZ
704 /* XXXX This is a remnant of the old implementation. It
705 looks wasteful, since now INTUIT can use many
6eb5f6b9 706 other heuristics. */
cad2e5aa
JH
707 prog->reganch &= ~RE_USE_INTUIT;
708 }
709 else
710 s = strpos;
711 }
712
6eb5f6b9
JH
713 /* Last resort... */
714 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
715 if (prog->regstclass) {
716 /* minlen == 0 is possible if regstclass is \b or \B,
717 and the fixed substr is ''$.
718 Since minlen is already taken into account, s+1 is before strend;
719 accidentally, minlen >= 1 guaranties no false positives at s + 1
720 even for \b or \B. But (minlen? 1 : 0) below assumes that
721 regstclass does not come from lookahead... */
722 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
723 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
724 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
725 ? STR_LEN(prog->regstclass)
726 : 1);
6eb5f6b9 727 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
728 ? s + (prog->minlen? cl_l : 0)
729 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 730 : strend) ;
a1933d95 731 char *startpos = strbeg;
6eb5f6b9
JH
732
733 t = s;
76384e4a 734 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 735 PL_regdata = prog->data;
76384e4a
GS
736 PL_bostr = startpos;
737 }
6eb5f6b9
JH
738 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
739 if (!s) {
740#ifdef DEBUGGING
741 char *what;
742#endif
743 if (endpos == strend) {
744 DEBUG_r( PerlIO_printf(Perl_debug_log,
745 "Could not match STCLASS...\n") );
746 goto fail;
747 }
66e933ab
GS
748 DEBUG_r( PerlIO_printf(Perl_debug_log,
749 "This position contradicts STCLASS...\n") );
653099ff
GS
750 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
751 goto fail;
6eb5f6b9
JH
752 /* Contradict one of substrings */
753 if (prog->anchored_substr) {
6eb5f6b9
JH
754 if (prog->anchored_substr == check) {
755 DEBUG_r( what = "anchored" );
756 hop_and_restart:
757 PL_regeol = strend; /* Used in HOP() */
758 s = HOPc(t, 1);
66e933ab
GS
759 if (s + start_shift + end_shift > strend) {
760 /* XXXX Should be taken into account earlier? */
761 DEBUG_r( PerlIO_printf(Perl_debug_log,
762 "Could not match STCLASS...\n") );
763 goto fail;
764 }
5e39e1e5
HS
765 if (!check)
766 goto giveup;
6eb5f6b9 767 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 768 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
769 what, (long)(s + start_shift - i_strpos)) );
770 goto restart;
771 }
66e933ab 772 /* Have both, check_string is floating */
6eb5f6b9
JH
773 if (t + start_shift >= check_at) /* Contradicts floating=check */
774 goto retry_floating_check;
775 /* Recheck anchored substring, but not floating... */
776 s = check_at;
5e39e1e5
HS
777 if (!check)
778 goto giveup;
6eb5f6b9 779 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 780 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
781 (long)(other_last - i_strpos)) );
782 goto do_other_anchored;
783 }
60e71179
GS
784 /* Another way we could have checked stclass at the
785 current position only: */
786 if (ml_anch) {
787 s = t = t + 1;
5e39e1e5
HS
788 if (!check)
789 goto giveup;
60e71179 790 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
791 "Looking for /%s^%s/m starting at offset %ld...\n",
792 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 793 goto try_at_offset;
66e933ab 794 }
60e71179
GS
795 if (!prog->float_substr) /* Could have been deleted */
796 goto fail;
6eb5f6b9
JH
797 /* Check is floating subtring. */
798 retry_floating_check:
799 t = check_at - start_shift;
800 DEBUG_r( what = "floating" );
801 goto hop_and_restart;
802 }
803 DEBUG_r( if (t != s)
804 PerlIO_printf(Perl_debug_log,
805 "By STCLASS: moving %ld --> %ld\n",
806 (long)(t - i_strpos), (long)(s - i_strpos));
807 else
808 PerlIO_printf(Perl_debug_log,
809 "Does not contradict STCLASS...\n") );
810 }
5e39e1e5
HS
811 giveup:
812 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
813 PL_colors[4], (check ? "Guessed" : "Giving up"),
814 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 815 return s;
2c2d71f5
JH
816
817 fail_finish: /* Substring not found */
66e933ab
GS
818 if (prog->check_substr) /* could be removed already */
819 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 820 fail:
2c2d71f5 821 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
822 PL_colors[4],PL_colors[5]));
823 return Nullch;
824}
9661b544 825
6eb5f6b9 826/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
827STATIC char *
828S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 829{
6eb5f6b9
JH
830 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
831 char *m;
d8093b23
G
832 STRLEN ln;
833 unsigned int c1;
834 unsigned int c2;
6eb5f6b9
JH
835 char *e;
836 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 837 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 838
6eb5f6b9
JH
839 /* We know what class it must start with. */
840 switch (OP(c)) {
6eb5f6b9 841 case ANYOF:
a0ed51b3 842 while (s < strend) {
ffc61ed2 843 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
844 if (tmp && (norun || regtry(prog, s)))
845 goto got_it;
846 else
847 tmp = doevery;
a0ed51b3 848 }
6eb5f6b9
JH
849 else
850 tmp = 1;
ffc61ed2 851 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 852 }
6eb5f6b9
JH
853 break;
854 case EXACTF:
855 m = STRING(c);
856 ln = STR_LEN(c);
d8093b23 857 c1 = *(U8*)m;
6eb5f6b9
JH
858 c2 = PL_fold[c1];
859 goto do_exactf;
860 case EXACTFL:
861 m = STRING(c);
862 ln = STR_LEN(c);
d8093b23 863 c1 = *(U8*)m;
6eb5f6b9
JH
864 c2 = PL_fold_locale[c1];
865 do_exactf:
866 e = strend - ln;
b3c9acc1 867
6eb5f6b9
JH
868 if (norun && e < s)
869 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
870 /* Here it is NOT UTF! */
871 if (c1 == c2) {
872 while (s <= e) {
d8093b23 873 if ( *(U8*)s == c1
66e933ab
GS
874 && (ln == 1 || !(OP(c) == EXACTF
875 ? ibcmp(s, m, ln)
876 : ibcmp_locale(s, m, ln)))
6eb5f6b9 877 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
878 goto got_it;
879 s++;
880 }
881 } else {
882 while (s <= e) {
d8093b23 883 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
884 && (ln == 1 || !(OP(c) == EXACTF
885 ? ibcmp(s, m, ln)
886 : ibcmp_locale(s, m, ln)))
6eb5f6b9 887 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
888 goto got_it;
889 s++;
890 }
891 }
892 break;
bbce6d69 893 case BOUNDL:
3280af22 894 PL_reg_flags |= RF_tainted;
bbce6d69 895 /* FALL THROUGH */
a0d0e21e 896 case BOUND:
ffc61ed2
JH
897 if (do_utf8) {
898 if (s == startpos)
899 tmp = '\n';
900 else {
901 U8 *r = reghop((U8*)s, -1);
902
903 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
904 }
905 tmp = ((OP(c) == BOUND ?
906 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
907 while (s < strend) {
908 if (tmp == !(OP(c) == BOUND ?
909 swash_fetch(PL_utf8_alnum, (U8*)s) :
910 isALNUM_LC_utf8((U8*)s)))
911 {
912 tmp = !tmp;
913 if ((norun || regtry(prog, s)))
914 goto got_it;
915 }
916 s += UTF8SKIP(s);
a687059c 917 }
a0d0e21e 918 }
667bb95a 919 else {
ffc61ed2
JH
920 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
921 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
922 while (s < strend) {
923 if (tmp ==
924 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
925 tmp = !tmp;
926 if ((norun || regtry(prog, s)))
927 goto got_it;
928 }
929 s++;
a0ed51b3 930 }
a0ed51b3 931 }
6eb5f6b9 932 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
933 goto got_it;
934 break;
bbce6d69 935 case NBOUNDL:
3280af22 936 PL_reg_flags |= RF_tainted;
bbce6d69 937 /* FALL THROUGH */
a0d0e21e 938 case NBOUND:
ffc61ed2
JH
939 if (do_utf8) {
940 if (s == startpos)
941 tmp = '\n';
942 else {
943 U8 *r = reghop((U8*)s, -1);
944
945 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
946 }
947 tmp = ((OP(c) == NBOUND ?
948 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
949 while (s < strend) {
950 if (tmp == !(OP(c) == NBOUND ?
951 swash_fetch(PL_utf8_alnum, (U8*)s) :
952 isALNUM_LC_utf8((U8*)s)))
953 tmp = !tmp;
954 else if ((norun || regtry(prog, s)))
955 goto got_it;
956 s += UTF8SKIP(s);
957 }
a0d0e21e 958 }
667bb95a 959 else {
ffc61ed2
JH
960 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
961 tmp = ((OP(c) == NBOUND ?
962 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
963 while (s < strend) {
964 if (tmp ==
965 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
966 tmp = !tmp;
967 else if ((norun || regtry(prog, s)))
968 goto got_it;
969 s++;
970 }
a0ed51b3 971 }
6eb5f6b9 972 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
973 goto got_it;
974 break;
a0d0e21e 975 case ALNUM:
ffc61ed2
JH
976 if (do_utf8) {
977 while (s < strend) {
978 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
979 if (tmp && (norun || regtry(prog, s)))
980 goto got_it;
981 else
982 tmp = doevery;
983 }
bbce6d69 984 else
ffc61ed2
JH
985 tmp = 1;
986 s += UTF8SKIP(s);
bbce6d69 987 }
bbce6d69 988 }
ffc61ed2
JH
989 else {
990 while (s < strend) {
991 if (isALNUM(*s)) {
992 if (tmp && (norun || regtry(prog, s)))
993 goto got_it;
994 else
995 tmp = doevery;
996 }
a0ed51b3 997 else
ffc61ed2
JH
998 tmp = 1;
999 s++;
a0ed51b3 1000 }
a0ed51b3
LW
1001 }
1002 break;
bbce6d69 1003 case ALNUML:
3280af22 1004 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1005 if (do_utf8) {
1006 while (s < strend) {
1007 if (isALNUM_LC_utf8((U8*)s)) {
1008 if (tmp && (norun || regtry(prog, s)))
1009 goto got_it;
1010 else
1011 tmp = doevery;
1012 }
a687059c 1013 else
ffc61ed2
JH
1014 tmp = 1;
1015 s += UTF8SKIP(s);
a0d0e21e 1016 }
a0d0e21e 1017 }
ffc61ed2
JH
1018 else {
1019 while (s < strend) {
1020 if (isALNUM_LC(*s)) {
1021 if (tmp && (norun || regtry(prog, s)))
1022 goto got_it;
1023 else
1024 tmp = doevery;
1025 }
a0ed51b3 1026 else
ffc61ed2
JH
1027 tmp = 1;
1028 s++;
a0ed51b3 1029 }
a0ed51b3
LW
1030 }
1031 break;
a0d0e21e 1032 case NALNUM:
ffc61ed2
JH
1033 if (do_utf8) {
1034 while (s < strend) {
1035 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1036 if (tmp && (norun || regtry(prog, s)))
1037 goto got_it;
1038 else
1039 tmp = doevery;
1040 }
bbce6d69 1041 else
ffc61ed2
JH
1042 tmp = 1;
1043 s += UTF8SKIP(s);
bbce6d69 1044 }
bbce6d69 1045 }
ffc61ed2
JH
1046 else {
1047 while (s < strend) {
1048 if (!isALNUM(*s)) {
1049 if (tmp && (norun || regtry(prog, s)))
1050 goto got_it;
1051 else
1052 tmp = doevery;
1053 }
a0ed51b3 1054 else
ffc61ed2
JH
1055 tmp = 1;
1056 s++;
a0ed51b3 1057 }
a0ed51b3
LW
1058 }
1059 break;
bbce6d69 1060 case NALNUML:
3280af22 1061 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1062 if (do_utf8) {
1063 while (s < strend) {
1064 if (!isALNUM_LC_utf8((U8*)s)) {
1065 if (tmp && (norun || regtry(prog, s)))
1066 goto got_it;
1067 else
1068 tmp = doevery;
1069 }
a687059c 1070 else
ffc61ed2
JH
1071 tmp = 1;
1072 s += UTF8SKIP(s);
a687059c 1073 }
a0d0e21e 1074 }
ffc61ed2
JH
1075 else {
1076 while (s < strend) {
1077 if (!isALNUM_LC(*s)) {
1078 if (tmp && (norun || regtry(prog, s)))
1079 goto got_it;
1080 else
1081 tmp = doevery;
1082 }
a0ed51b3 1083 else
ffc61ed2
JH
1084 tmp = 1;
1085 s++;
a0ed51b3 1086 }
a0ed51b3
LW
1087 }
1088 break;
a0d0e21e 1089 case SPACE:
ffc61ed2
JH
1090 if (do_utf8) {
1091 while (s < strend) {
1092 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1093 if (tmp && (norun || regtry(prog, s)))
1094 goto got_it;
1095 else
1096 tmp = doevery;
1097 }
a0d0e21e 1098 else
ffc61ed2
JH
1099 tmp = 1;
1100 s += UTF8SKIP(s);
2304df62 1101 }
a0d0e21e 1102 }
ffc61ed2
JH
1103 else {
1104 while (s < strend) {
1105 if (isSPACE(*s)) {
1106 if (tmp && (norun || regtry(prog, s)))
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
a0ed51b3 1111 else
ffc61ed2
JH
1112 tmp = 1;
1113 s++;
a0ed51b3 1114 }
a0ed51b3
LW
1115 }
1116 break;
bbce6d69 1117 case SPACEL:
3280af22 1118 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1119 if (do_utf8) {
1120 while (s < strend) {
1121 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1122 if (tmp && (norun || regtry(prog, s)))
1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
bbce6d69 1127 else
ffc61ed2
JH
1128 tmp = 1;
1129 s += UTF8SKIP(s);
bbce6d69 1130 }
bbce6d69 1131 }
ffc61ed2
JH
1132 else {
1133 while (s < strend) {
1134 if (isSPACE_LC(*s)) {
1135 if (tmp && (norun || regtry(prog, s)))
1136 goto got_it;
1137 else
1138 tmp = doevery;
1139 }
a0ed51b3 1140 else
ffc61ed2
JH
1141 tmp = 1;
1142 s++;
a0ed51b3 1143 }
a0ed51b3
LW
1144 }
1145 break;
a0d0e21e 1146 case NSPACE:
ffc61ed2
JH
1147 if (do_utf8) {
1148 while (s < strend) {
1149 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1150 if (tmp && (norun || regtry(prog, s)))
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
a0d0e21e 1155 else
ffc61ed2
JH
1156 tmp = 1;
1157 s += UTF8SKIP(s);
a687059c 1158 }
a0d0e21e 1159 }
ffc61ed2
JH
1160 else {
1161 while (s < strend) {
1162 if (!isSPACE(*s)) {
1163 if (tmp && (norun || regtry(prog, s)))
1164 goto got_it;
1165 else
1166 tmp = doevery;
1167 }
a0ed51b3 1168 else
ffc61ed2
JH
1169 tmp = 1;
1170 s++;
a0ed51b3 1171 }
a0ed51b3
LW
1172 }
1173 break;
bbce6d69 1174 case NSPACEL:
3280af22 1175 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1176 if (do_utf8) {
1177 while (s < strend) {
1178 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1179 if (tmp && (norun || regtry(prog, s)))
1180 goto got_it;
1181 else
1182 tmp = doevery;
1183 }
bbce6d69 1184 else
ffc61ed2
JH
1185 tmp = 1;
1186 s += UTF8SKIP(s);
bbce6d69 1187 }
bbce6d69 1188 }
ffc61ed2
JH
1189 else {
1190 while (s < strend) {
1191 if (!isSPACE_LC(*s)) {
1192 if (tmp && (norun || regtry(prog, s)))
1193 goto got_it;
1194 else
1195 tmp = doevery;
1196 }
a0ed51b3 1197 else
ffc61ed2
JH
1198 tmp = 1;
1199 s++;
a0ed51b3 1200 }
a0ed51b3
LW
1201 }
1202 break;
a0d0e21e 1203 case DIGIT:
ffc61ed2
JH
1204 if (do_utf8) {
1205 while (s < strend) {
1206 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1207 if (tmp && (norun || regtry(prog, s)))
1208 goto got_it;
1209 else
1210 tmp = doevery;
1211 }
a0d0e21e 1212 else
ffc61ed2
JH
1213 tmp = 1;
1214 s += UTF8SKIP(s);
2b69d0c2 1215 }
a0d0e21e 1216 }
ffc61ed2
JH
1217 else {
1218 while (s < strend) {
1219 if (isDIGIT(*s)) {
1220 if (tmp && (norun || regtry(prog, s)))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
a0ed51b3 1225 else
ffc61ed2
JH
1226 tmp = 1;
1227 s++;
a0ed51b3 1228 }
a0ed51b3
LW
1229 }
1230 break;
b8c5462f
JH
1231 case DIGITL:
1232 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1233 if (do_utf8) {
1234 while (s < strend) {
1235 if (isDIGIT_LC_utf8((U8*)s)) {
1236 if (tmp && (norun || regtry(prog, s)))
1237 goto got_it;
1238 else
1239 tmp = doevery;
1240 }
b8c5462f 1241 else
ffc61ed2
JH
1242 tmp = 1;
1243 s += UTF8SKIP(s);
b8c5462f 1244 }
b8c5462f 1245 }
ffc61ed2
JH
1246 else {
1247 while (s < strend) {
1248 if (isDIGIT_LC(*s)) {
1249 if (tmp && (norun || regtry(prog, s)))
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
b8c5462f 1254 else
ffc61ed2
JH
1255 tmp = 1;
1256 s++;
b8c5462f 1257 }
b8c5462f
JH
1258 }
1259 break;
a0d0e21e 1260 case NDIGIT:
ffc61ed2
JH
1261 if (do_utf8) {
1262 while (s < strend) {
1263 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
a0d0e21e 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s += UTF8SKIP(s);
a687059c 1272 }
a0d0e21e 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (!isDIGIT(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
b8c5462f
JH
1288 case NDIGITL:
1289 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1290 if (do_utf8) {
1291 while (s < strend) {
1292 if (!isDIGIT_LC_utf8((U8*)s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
b8c5462f 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s += UTF8SKIP(s);
b8c5462f 1301 }
a0ed51b3 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (!isDIGIT_LC(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
cf93c79d 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
b8c5462f 1314 }
b8c5462f
JH
1315 }
1316 break;
b3c9acc1 1317 default:
3c3eec57
GS
1318 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1319 break;
d6a28714 1320 }
6eb5f6b9
JH
1321 return 0;
1322 got_it:
1323 return s;
1324}
1325
1326/*
1327 - regexec_flags - match a regexp against a string
1328 */
1329I32
1330Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1331 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1332/* strend: pointer to null at end of string */
1333/* strbeg: real beginning of string */
1334/* minend: end of match must be >=minend after stringarg. */
1335/* data: May be used for some additional optimizations. */
1336/* nosave: For optimizations. */
1337{
6eb5f6b9
JH
1338 register char *s;
1339 register regnode *c;
1340 register char *startpos = stringarg;
6eb5f6b9
JH
1341 I32 minlen; /* must match at least this many chars */
1342 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1343 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1344 constant substr. */ /* CC */
1345 I32 end_shift = 0; /* Same for the end. */ /* CC */
1346 I32 scream_pos = -1; /* Internal iterator of scream. */
1347 char *scream_olds;
1348 SV* oreplsv = GvSV(PL_replgv);
1349
1350 PL_regcc = 0;
1351
1352 cache_re(prog);
1353#ifdef DEBUGGING
1354 PL_regnarrate = PL_debug & 512;
1355#endif
1356
1357 /* Be paranoid... */
1358 if (prog == NULL || startpos == NULL) {
1359 Perl_croak(aTHX_ "NULL regexp parameter");
1360 return 0;
1361 }
1362
1363 minlen = prog->minlen;
1364 if (strend - startpos < minlen) goto phooey;
1365
1366 if (startpos == strbeg) /* is ^ valid at stringarg? */
1367 PL_regprev = '\n';
1368 else {
1369 PL_regprev = (U32)stringarg[-1];
1370 if (!PL_multiline && PL_regprev == '\n')
1371 PL_regprev = '\0'; /* force ^ to NOT match */
1372 }
1373
1374 /* Check validity of program. */
1375 if (UCHARAT(prog->program) != REG_MAGIC) {
1376 Perl_croak(aTHX_ "corrupted regexp program");
1377 }
1378
1379 PL_reg_flags = 0;
1380 PL_reg_eval_set = 0;
1381 PL_reg_maxiter = 0;
1382
1383 if (prog->reganch & ROPT_UTF8)
1384 PL_reg_flags |= RF_utf8;
1385
1386 /* Mark beginning of line for ^ and lookbehind. */
1387 PL_regbol = startpos;
1388 PL_bostr = strbeg;
1389 PL_reg_sv = sv;
1390
1391 /* Mark end of line for $ (and such) */
1392 PL_regeol = strend;
1393
1394 /* see how far we have to get to not match where we matched before */
1395 PL_regtill = startpos+minend;
1396
1397 /* We start without call_cc context. */
1398 PL_reg_call_cc = 0;
1399
1400 /* If there is a "must appear" string, look for it. */
1401 s = startpos;
1402
1403 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1404 MAGIC *mg;
1405
1406 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1407 PL_reg_ganch = startpos;
1408 else if (sv && SvTYPE(sv) >= SVt_PVMG
1409 && SvMAGIC(sv)
1410 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1411 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1412 if (prog->reganch & ROPT_ANCH_GPOS) {
1413 if (s > PL_reg_ganch)
1414 goto phooey;
1415 s = PL_reg_ganch;
1416 }
1417 }
1418 else /* pos() not defined */
1419 PL_reg_ganch = strbeg;
1420 }
1421
1422 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1423 re_scream_pos_data d;
1424
1425 d.scream_olds = &scream_olds;
1426 d.scream_pos = &scream_pos;
1427 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1428 if (!s)
1429 goto phooey; /* not present */
1430 }
1431
1432 DEBUG_r( if (!PL_colorset) reginitcolors() );
1433 DEBUG_r(PerlIO_printf(Perl_debug_log,
1434 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1435 PL_colors[4],PL_colors[5],PL_colors[0],
1436 prog->precomp,
1437 PL_colors[1],
1438 (strlen(prog->precomp) > 60 ? "..." : ""),
1439 PL_colors[0],
1440 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1441 startpos, PL_colors[1],
1442 (strend - startpos > 60 ? "..." : ""))
1443 );
1444
1445 /* Simplest case: anchored match need be tried only once. */
1446 /* [unless only anchor is BOL and multiline is set] */
1447 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1448 if (s == startpos && regtry(prog, startpos))
1449 goto got_it;
1450 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1451 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1452 {
1453 char *end;
1454
1455 if (minlen)
1456 dontbother = minlen - 1;
1457 end = HOPc(strend, -dontbother) - 1;
1458 /* for multiline we only have to try after newlines */
1459 if (prog->check_substr) {
1460 if (s == startpos)
1461 goto after_try;
1462 while (1) {
1463 if (regtry(prog, s))
1464 goto got_it;
1465 after_try:
1466 if (s >= end)
1467 goto phooey;
1468 if (prog->reganch & RE_USE_INTUIT) {
1469 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1470 if (!s)
1471 goto phooey;
1472 }
1473 else
1474 s++;
1475 }
1476 } else {
1477 if (s > startpos)
1478 s--;
1479 while (s < end) {
1480 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1481 if (regtry(prog, s))
1482 goto got_it;
1483 }
1484 }
1485 }
1486 }
1487 goto phooey;
1488 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1489 if (regtry(prog, PL_reg_ganch))
1490 goto got_it;
1491 goto phooey;
1492 }
1493
1494 /* Messy cases: unanchored match. */
1495 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1496 /* we have /x+whatever/ */
1497 /* it must be a one character string (XXXX Except UTF?) */
1498 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1499#ifdef DEBUGGING
1500 int did_match = 0;
1501#endif
1502
6eb5f6b9
JH
1503 if (UTF) {
1504 while (s < strend) {
1505 if (*s == ch) {
bf93d4cc 1506 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1507 if (regtry(prog, s)) goto got_it;
1508 s += UTF8SKIP(s);
1509 while (s < strend && *s == ch)
1510 s += UTF8SKIP(s);
1511 }
1512 s += UTF8SKIP(s);
1513 }
1514 }
1515 else {
1516 while (s < strend) {
1517 if (*s == ch) {
bf93d4cc 1518 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1519 if (regtry(prog, s)) goto got_it;
1520 s++;
1521 while (s < strend && *s == ch)
1522 s++;
1523 }
1524 s++;
1525 }
1526 }
bf93d4cc
GS
1527 DEBUG_r(did_match ||
1528 PerlIO_printf(Perl_debug_log,
1529 "Did not find anchored character...\n"));
6eb5f6b9
JH
1530 }
1531 /*SUPPRESS 560*/
1532 else if (prog->anchored_substr != Nullsv
1533 || (prog->float_substr != Nullsv
1534 && prog->float_max_offset < strend - s)) {
1535 SV *must = prog->anchored_substr
1536 ? prog->anchored_substr : prog->float_substr;
1537 I32 back_max =
1538 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1539 I32 back_min =
1540 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1541 char *last = HOPc(strend, /* Cannot start after this */
1542 -(I32)(CHR_SVLEN(must)
1543 - (SvTAIL(must) != 0) + back_min));
1544 char *last1; /* Last position checked before */
bf93d4cc
GS
1545#ifdef DEBUGGING
1546 int did_match = 0;
1547#endif
6eb5f6b9
JH
1548
1549 if (s > PL_bostr)
1550 last1 = HOPc(s, -1);
1551 else
1552 last1 = s - 1; /* bogus */
1553
1554 /* XXXX check_substr already used to find `s', can optimize if
1555 check_substr==must. */
1556 scream_pos = -1;
1557 dontbother = end_shift;
1558 strend = HOPc(strend, -dontbother);
1559 while ( (s <= last) &&
1560 ((flags & REXEC_SCREAM)
1561 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1562 end_shift, &scream_pos, 0))
1563 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1564 (unsigned char*)strend, must,
1565 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1566 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1567 if (HOPc(s, -back_max) > last1) {
1568 last1 = HOPc(s, -back_min);
1569 s = HOPc(s, -back_max);
1570 }
1571 else {
1572 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1573
1574 last1 = HOPc(s, -back_min);
1575 s = t;
1576 }
1577 if (UTF) {
1578 while (s <= last1) {
1579 if (regtry(prog, s))
1580 goto got_it;
1581 s += UTF8SKIP(s);
1582 }
1583 }
1584 else {
1585 while (s <= last1) {
1586 if (regtry(prog, s))
1587 goto got_it;
1588 s++;
1589 }
1590 }
1591 }
bf93d4cc
GS
1592 DEBUG_r(did_match ||
1593 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1594 ((must == prog->anchored_substr)
1595 ? "anchored" : "floating"),
1596 PL_colors[0],
1597 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1598 SvPVX(must),
1599 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1600 goto phooey;
1601 }
155aba94 1602 else if ((c = prog->regstclass)) {
66e933ab
GS
1603 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1604 /* don't bother with what can't match */
6eb5f6b9 1605 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1606 DEBUG_r({
1607 SV *prop = sv_newmortal();
1608 regprop(prop, c);
1609 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1610 });
6eb5f6b9
JH
1611 if (find_byclass(prog, c, s, strend, startpos, 0))
1612 goto got_it;
bf93d4cc 1613 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1614 }
1615 else {
1616 dontbother = 0;
1617 if (prog->float_substr != Nullsv) { /* Trim the end. */
1618 char *last;
d6a28714
JH
1619
1620 if (flags & REXEC_SCREAM) {
1621 last = screaminstr(sv, prog->float_substr, s - strbeg,
1622 end_shift, &scream_pos, 1); /* last one */
1623 if (!last)
ffc61ed2 1624 last = scream_olds; /* Only one occurrence. */
b8c5462f 1625 }
d6a28714
JH
1626 else {
1627 STRLEN len;
1628 char *little = SvPV(prog->float_substr, len);
1629
1630 if (SvTAIL(prog->float_substr)) {
1631 if (memEQ(strend - len + 1, little, len - 1))
1632 last = strend - len + 1;
1633 else if (!PL_multiline)
1634 last = memEQ(strend - len, little, len)
1635 ? strend - len : Nullch;
b8c5462f 1636 else
d6a28714
JH
1637 goto find_last;
1638 } else {
1639 find_last:
1640 if (len)
1641 last = rninstr(s, strend, little, little + len);
b8c5462f 1642 else
d6a28714 1643 last = strend; /* matching `$' */
b8c5462f 1644 }
b8c5462f 1645 }
bf93d4cc
GS
1646 if (last == NULL) {
1647 DEBUG_r(PerlIO_printf(Perl_debug_log,
1648 "%sCan't trim the tail, match fails (should not happen)%s\n",
1649 PL_colors[4],PL_colors[5]));
1650 goto phooey; /* Should not happen! */
1651 }
d6a28714
JH
1652 dontbother = strend - last + prog->float_min_offset;
1653 }
1654 if (minlen && (dontbother < minlen))
1655 dontbother = minlen - 1;
1656 strend -= dontbother; /* this one's always in bytes! */
1657 /* We don't know much -- general case. */
1658 if (UTF) {
1659 for (;;) {
1660 if (regtry(prog, s))
1661 goto got_it;
1662 if (s >= strend)
1663 break;
b8c5462f 1664 s += UTF8SKIP(s);
d6a28714
JH
1665 };
1666 }
1667 else {
1668 do {
1669 if (regtry(prog, s))
1670 goto got_it;
1671 } while (s++ < strend);
1672 }
1673 }
1674
1675 /* Failure. */
1676 goto phooey;
1677
1678got_it:
1679 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1680
1681 if (PL_reg_eval_set) {
1682 /* Preserve the current value of $^R */
1683 if (oreplsv != GvSV(PL_replgv))
1684 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1685 restored, the value remains
1686 the same. */
1687 restore_pos(aTHXo_ 0);
1688 }
1689
1690 /* make sure $`, $&, $', and $digit will work later */
1691 if ( !(flags & REXEC_NOT_FIRST) ) {
1692 if (RX_MATCH_COPIED(prog)) {
1693 Safefree(prog->subbeg);
1694 RX_MATCH_COPIED_off(prog);
1695 }
1696 if (flags & REXEC_COPY_STR) {
1697 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1698
1699 s = savepvn(strbeg, i);
1700 prog->subbeg = s;
1701 prog->sublen = i;
1702 RX_MATCH_COPIED_on(prog);
1703 }
1704 else {
1705 prog->subbeg = strbeg;
1706 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1707 }
1708 }
1709
1710 return 1;
1711
1712phooey:
bf93d4cc
GS
1713 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1714 PL_colors[4],PL_colors[5]));
d6a28714
JH
1715 if (PL_reg_eval_set)
1716 restore_pos(aTHXo_ 0);
1717 return 0;
1718}
1719
1720/*
1721 - regtry - try match at specific point
1722 */
1723STATIC I32 /* 0 failure, 1 success */
1724S_regtry(pTHX_ regexp *prog, char *startpos)
1725{
d6a28714
JH
1726 register I32 i;
1727 register I32 *sp;
1728 register I32 *ep;
1729 CHECKPOINT lastcp;
1730
02db2b7b
IZ
1731#ifdef DEBUGGING
1732 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1733#endif
d6a28714
JH
1734 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1735 MAGIC *mg;
1736
1737 PL_reg_eval_set = RS_init;
1738 DEBUG_r(DEBUG_s(
b900a521
JH
1739 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1740 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1741 ));
e8347627 1742 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1743 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1744 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1745 SAVETMPS;
1746 /* Apparently this is not needed, judging by wantarray. */
e8347627 1747 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1748 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1749
1750 if (PL_reg_sv) {
1751 /* Make $_ available to executed code. */
1752 if (PL_reg_sv != DEFSV) {
1753 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1754 SAVESPTR(DEFSV);
1755 DEFSV = PL_reg_sv;
b8c5462f 1756 }
d6a28714
JH
1757
1758 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1759 && (mg = mg_find(PL_reg_sv, 'g')))) {
1760 /* prepare for quick setting of pos */
1761 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1762 mg = mg_find(PL_reg_sv, 'g');
1763 mg->mg_len = -1;
b8c5462f 1764 }
d6a28714
JH
1765 PL_reg_magic = mg;
1766 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1767 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1768 }
1769 if (!PL_reg_curpm)
0f79a09d 1770 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1771 PL_reg_curpm->op_pmregexp = prog;
1772 PL_reg_oldcurpm = PL_curpm;
1773 PL_curpm = PL_reg_curpm;
1774 if (RX_MATCH_COPIED(prog)) {
1775 /* Here is a serious problem: we cannot rewrite subbeg,
1776 since it may be needed if this match fails. Thus
1777 $` inside (?{}) could fail... */
1778 PL_reg_oldsaved = prog->subbeg;
1779 PL_reg_oldsavedlen = prog->sublen;
1780 RX_MATCH_COPIED_off(prog);
1781 }
1782 else
1783 PL_reg_oldsaved = Nullch;
1784 prog->subbeg = PL_bostr;
1785 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1786 }
1787 prog->startp[0] = startpos - PL_bostr;
1788 PL_reginput = startpos;
1789 PL_regstartp = prog->startp;
1790 PL_regendp = prog->endp;
1791 PL_reglastparen = &prog->lastparen;
1792 prog->lastparen = 0;
1793 PL_regsize = 0;
1794 DEBUG_r(PL_reg_starttry = startpos);
1795 if (PL_reg_start_tmpl <= prog->nparens) {
1796 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1797 if(PL_reg_start_tmp)
1798 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1799 else
1800 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1801 }
1802
1803 /* XXXX What this code is doing here?!!! There should be no need
1804 to do this again and again, PL_reglastparen should take care of
3dd2943c 1805 this! --ilya*/
dafc8851
JH
1806
1807 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1808 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1809 * PL_reglastparen), is not needed at all by the test suite
1810 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1811 * enough, for building DynaLoader, or otherwise this
1812 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1813 * will happen. Meanwhile, this code *is* needed for the
1814 * above-mentioned test suite tests to succeed. The common theme
1815 * on those tests seems to be returning null fields from matches.
1816 * --jhi */
dafc8851 1817#if 1
d6a28714
JH
1818 sp = prog->startp;
1819 ep = prog->endp;
1820 if (prog->nparens) {
09e8ae3b 1821 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1822 *++sp = -1;
1823 *++ep = -1;
1824 }
1825 }
dafc8851 1826#endif
02db2b7b 1827 REGCP_SET(lastcp);
d6a28714
JH
1828 if (regmatch(prog->program + 1)) {
1829 prog->endp[0] = PL_reginput - PL_bostr;
1830 return 1;
1831 }
02db2b7b 1832 REGCP_UNWIND(lastcp);
d6a28714
JH
1833 return 0;
1834}
1835
02db2b7b
IZ
1836#define RE_UNWIND_BRANCH 1
1837#define RE_UNWIND_BRANCHJ 2
1838
1839union re_unwind_t;
1840
1841typedef struct { /* XX: makes sense to enlarge it... */
1842 I32 type;
1843 I32 prev;
1844 CHECKPOINT lastcp;
1845} re_unwind_generic_t;
1846
1847typedef struct {
1848 I32 type;
1849 I32 prev;
1850 CHECKPOINT lastcp;
1851 I32 lastparen;
1852 regnode *next;
1853 char *locinput;
1854 I32 nextchr;
1855#ifdef DEBUGGING
1856 int regindent;
1857#endif
1858} re_unwind_branch_t;
1859
1860typedef union re_unwind_t {
1861 I32 type;
1862 re_unwind_generic_t generic;
1863 re_unwind_branch_t branch;
1864} re_unwind_t;
1865
d6a28714
JH
1866/*
1867 - regmatch - main matching routine
1868 *
1869 * Conceptually the strategy is simple: check to see whether the current
1870 * node matches, call self recursively to see whether the rest matches,
1871 * and then act accordingly. In practice we make some effort to avoid
1872 * recursion, in particular by going through "ordinary" nodes (that don't
1873 * need to know whether the rest of the match failed) by a loop instead of
1874 * by recursion.
1875 */
1876/* [lwall] I've hoisted the register declarations to the outer block in order to
1877 * maybe save a little bit of pushing and popping on the stack. It also takes
1878 * advantage of machines that use a register save mask on subroutine entry.
1879 */
1880STATIC I32 /* 0 failure, 1 success */
1881S_regmatch(pTHX_ regnode *prog)
1882{
d6a28714
JH
1883 register regnode *scan; /* Current node. */
1884 regnode *next; /* Next node. */
1885 regnode *inner; /* Next node in internal branch. */
1886 register I32 nextchr; /* renamed nextchr - nextchar colides with
1887 function of same name */
1888 register I32 n; /* no or next */
1889 register I32 ln; /* len or last */
1890 register char *s; /* operand or save */
1891 register char *locinput = PL_reginput;
1892 register I32 c1, c2, paren; /* case fold search, parenth */
1893 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1894 I32 unwind = 0;
1895 I32 firstcp = PL_savestack_ix;
ffc61ed2 1896 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1897
d6a28714
JH
1898#ifdef DEBUGGING
1899 PL_regindent++;
1900#endif
1901
1902 /* Note that nextchr is a byte even in UTF */
1903 nextchr = UCHARAT(locinput);
1904 scan = prog;
1905 while (scan != NULL) {
1906#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1907#if 1
d6a28714
JH
1908# define sayYES goto yes
1909# define sayNO goto no
7821416a
IZ
1910# define sayYES_FINAL goto yes_final
1911# define sayYES_LOUD goto yes_loud
1912# define sayNO_FINAL goto no_final
1913# define sayNO_SILENT goto do_no
d6a28714
JH
1914# define saySAME(x) if (x) goto yes; else goto no
1915# define REPORT_CODE_OFF 24
1916#else
1917# define sayYES return 1
1918# define sayNO return 0
7821416a
IZ
1919# define sayYES_FINAL return 1
1920# define sayYES_LOUD return 1
1921# define sayNO_FINAL return 0
1922# define sayNO_SILENT return 0
d6a28714
JH
1923# define saySAME(x) return x
1924#endif
1925 DEBUG_r( {
1926 SV *prop = sv_newmortal();
1927 int docolor = *PL_colors[0];
1928 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1929 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1930 /* The part of the string before starttry has one color
1931 (pref0_len chars), between starttry and current
1932 position another one (pref_len - pref0_len chars),
1933 after the current position the third one.
1934 We assume that pref0_len <= pref_len, otherwise we
1935 decrease pref0_len. */
1936 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1937 ? (5 + taill) - l : locinput - PL_bostr);
1938 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1939
1940 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1941 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1942 ? (5 + taill) - pref_len : PL_regeol - locinput);
1943 if (pref0_len < 0)
1944 pref0_len = 0;
1945 if (pref0_len > pref_len)
1946 pref0_len = pref_len;
1947 regprop(prop, scan);
1948 PerlIO_printf(Perl_debug_log,
b900a521
JH
1949 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1950 (IV)(locinput - PL_bostr),
d6a28714
JH
1951 PL_colors[4], pref0_len,
1952 locinput - pref_len, PL_colors[5],
1953 PL_colors[2], pref_len - pref0_len,
1954 locinput - pref_len + pref0_len, PL_colors[3],
1955 (docolor ? "" : "> <"),
1956 PL_colors[0], l, locinput, PL_colors[1],
1957 15 - l - pref_len + 1,
1958 "",
b900a521 1959 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1960 SvPVX(prop));
1961 } );
1962
1963 next = scan + NEXT_OFF(scan);
1964 if (next == scan)
1965 next = NULL;
1966
1967 switch (OP(scan)) {
1968 case BOL:
1969 if (locinput == PL_bostr
1970 ? PL_regprev == '\n'
1971 : (PL_multiline &&
1972 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1973 {
1974 /* regtill = regbol; */
b8c5462f
JH
1975 break;
1976 }
d6a28714
JH
1977 sayNO;
1978 case MBOL:
1979 if (locinput == PL_bostr
1980 ? PL_regprev == '\n'
1981 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1982 {
b8c5462f
JH
1983 break;
1984 }
d6a28714
JH
1985 sayNO;
1986 case SBOL:
c2a73568 1987 if (locinput == PL_bostr)
b8c5462f 1988 break;
d6a28714
JH
1989 sayNO;
1990 case GPOS:
1991 if (locinput == PL_reg_ganch)
1992 break;
1993 sayNO;
1994 case EOL:
1995 if (PL_multiline)
1996 goto meol;
1997 else
1998 goto seol;
1999 case MEOL:
2000 meol:
2001 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2002 sayNO;
b8c5462f 2003 break;
d6a28714
JH
2004 case SEOL:
2005 seol:
2006 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2007 sayNO;
d6a28714 2008 if (PL_regeol - locinput > 1)
b8c5462f 2009 sayNO;
b8c5462f 2010 break;
d6a28714
JH
2011 case EOS:
2012 if (PL_regeol != locinput)
b8c5462f 2013 sayNO;
d6a28714 2014 break;
ffc61ed2
JH
2015 case SANY:
2016 if (DO_UTF8(PL_reg_sv)) {
b8c5462f 2017 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2018 if (locinput > PL_regeol)
2019 sayNO;
b8c5462f
JH
2020 nextchr = UCHARAT(locinput);
2021 break;
2022 }
d6a28714 2023 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2024 sayNO;
b8c5462f 2025 nextchr = UCHARAT(++locinput);
a0d0e21e 2026 break;
ffc61ed2
JH
2027 case REG_ANY:
2028 if (DO_UTF8(PL_reg_sv)) {
b8c5462f 2029 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2030 if (locinput > PL_regeol)
2031 sayNO;
a0ed51b3
LW
2032 nextchr = UCHARAT(locinput);
2033 break;
2034 }
155aba94 2035 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2036 sayNO;
2037 nextchr = UCHARAT(++locinput);
2038 break;
d6a28714 2039 case EXACT:
cd439c50
IZ
2040 s = STRING(scan);
2041 ln = STR_LEN(scan);
d6a28714
JH
2042 /* Inline the first character, for speed. */
2043 if (UCHARAT(s) != nextchr)
2044 sayNO;
2045 if (PL_regeol - locinput < ln)
2046 sayNO;
2047 if (ln > 1 && memNE(s, locinput, ln))
2048 sayNO;
2049 locinput += ln;
2050 nextchr = UCHARAT(locinput);
2051 break;
2052 case EXACTFL:
b8c5462f
JH
2053 PL_reg_flags |= RF_tainted;
2054 /* FALL THROUGH */
d6a28714 2055 case EXACTF:
cd439c50
IZ
2056 s = STRING(scan);
2057 ln = STR_LEN(scan);
d6a28714
JH
2058
2059 if (UTF) {
2060 char *l = locinput;
2061 char *e = s + ln;
2062 c1 = OP(scan) == EXACTF;
2063 while (s < e) {
2064 if (l >= PL_regeol)
2065 sayNO;
667bb95a
JH
2066 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2067 (c1 ?
2068 toLOWER_utf8((U8*)l) :
2069 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2070 {
2071 sayNO;
2072 }
2073 s += UTF8SKIP(s);
2074 l += UTF8SKIP(l);
b8c5462f 2075 }
d6a28714 2076 locinput = l;
a0ed51b3
LW
2077 nextchr = UCHARAT(locinput);
2078 break;
2079 }
d6a28714
JH
2080
2081 /* Inline the first character, for speed. */
2082 if (UCHARAT(s) != nextchr &&
2083 UCHARAT(s) != ((OP(scan) == EXACTF)
2084 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2085 sayNO;
d6a28714 2086 if (PL_regeol - locinput < ln)
b8c5462f 2087 sayNO;
d6a28714
JH
2088 if (ln > 1 && (OP(scan) == EXACTF
2089 ? ibcmp(s, locinput, ln)
2090 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2091 sayNO;
d6a28714
JH
2092 locinput += ln;
2093 nextchr = UCHARAT(locinput);
a0d0e21e 2094 break;
d6a28714 2095 case ANYOF:
ffc61ed2
JH
2096 if (do_utf8) {
2097 if (!reginclass(scan, (U8*)locinput, do_utf8))
2098 sayNO;
2099 if (locinput >= PL_regeol)
2100 sayNO;
2101 locinput += PL_utf8skip[nextchr];
b8c5462f 2102 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2103 }
2104 else {
2105 if (nextchr < 0)
2106 nextchr = UCHARAT(locinput);
2107 if (!reginclass(scan, (U8*)locinput, do_utf8))
2108 sayNO;
2109 if (!nextchr && locinput >= PL_regeol)
2110 sayNO;
2111 nextchr = UCHARAT(++locinput);
2112 }
b8c5462f 2113 break;
d6a28714 2114 case ALNUML:
b8c5462f
JH
2115 PL_reg_flags |= RF_tainted;
2116 /* FALL THROUGH */
d6a28714 2117 case ALNUM:
b8c5462f 2118 if (!nextchr)
4633a7c4 2119 sayNO;
ffc61ed2
JH
2120 if (do_utf8) {
2121 if (!(OP(scan) == ALNUM
d6a28714
JH
2122 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2123 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2124 {
2125 sayNO;
a0ed51b3 2126 }
b8c5462f 2127 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2128 nextchr = UCHARAT(locinput);
2129 break;
2130 }
ffc61ed2 2131 if (!(OP(scan) == ALNUM
d6a28714 2132 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2133 sayNO;
b8c5462f 2134 nextchr = UCHARAT(++locinput);
a0d0e21e 2135 break;
d6a28714 2136 case NALNUML:
b8c5462f
JH
2137 PL_reg_flags |= RF_tainted;
2138 /* FALL THROUGH */
d6a28714
JH
2139 case NALNUM:
2140 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2141 sayNO;
ffc61ed2
JH
2142 if (do_utf8) {
2143 if (OP(scan) == NALNUM
d6a28714
JH
2144 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2145 : isALNUM_LC_utf8((U8*)locinput))
2146 {
b8c5462f 2147 sayNO;
d6a28714 2148 }
b8c5462f
JH
2149 locinput += PL_utf8skip[nextchr];
2150 nextchr = UCHARAT(locinput);
2151 break;
2152 }
ffc61ed2 2153 if (OP(scan) == NALNUM
d6a28714 2154 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2155 sayNO;
76e3520e 2156 nextchr = UCHARAT(++locinput);
a0d0e21e 2157 break;
d6a28714
JH
2158 case BOUNDL:
2159 case NBOUNDL:
3280af22 2160 PL_reg_flags |= RF_tainted;
bbce6d69 2161 /* FALL THROUGH */
d6a28714
JH
2162 case BOUND:
2163 case NBOUND:
2164 /* was last char in word? */
ffc61ed2
JH
2165 if (do_utf8) {
2166 if (locinput == PL_regbol)
2167 ln = PL_regprev;
2168 else {
2169 U8 *r = reghop((U8*)locinput, -1);
2170
2171 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2172 }
2173 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2174 ln = isALNUM_uni(ln);
2175 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2176 }
2177 else {
2178 ln = isALNUM_LC_uni(ln);
2179 n = isALNUM_LC_utf8((U8*)locinput);
2180 }
a0ed51b3 2181 }
d6a28714 2182 else {
ffc61ed2
JH
2183 ln = (locinput != PL_regbol) ?
2184 UCHARAT(locinput - 1) : PL_regprev;
2185 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2186 ln = isALNUM(ln);
2187 n = isALNUM(nextchr);
2188 }
2189 else {
2190 ln = isALNUM_LC(ln);
2191 n = isALNUM_LC(nextchr);
2192 }
d6a28714 2193 }
ffc61ed2
JH
2194 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2195 OP(scan) == BOUNDL))
2196 sayNO;
a0ed51b3 2197 break;
d6a28714 2198 case SPACEL:
3280af22 2199 PL_reg_flags |= RF_tainted;
bbce6d69 2200 /* FALL THROUGH */
d6a28714 2201 case SPACE:
9442cb0e 2202 if (!nextchr)
4633a7c4 2203 sayNO;
ffc61ed2
JH
2204 if (DO_UTF8(PL_reg_sv)) {
2205 if (nextchr & 0x80) {
2206 if (!(OP(scan) == SPACE
2207 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2208 : isSPACE_LC_utf8((U8*)locinput)))
2209 {
2210 sayNO;
2211 }
2212 locinput += PL_utf8skip[nextchr];
2213 nextchr = UCHARAT(locinput);
2214 break;
d6a28714 2215 }
ffc61ed2
JH
2216 if (!(OP(scan) == SPACE
2217 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2218 sayNO;
2219 nextchr = UCHARAT(++locinput);
2220 }
2221 else {
2222 if (!(OP(scan) == SPACE
2223 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2224 sayNO;
2225 nextchr = UCHARAT(++locinput);
a0ed51b3 2226 }
a0ed51b3 2227 break;
d6a28714 2228 case NSPACEL:
3280af22 2229 PL_reg_flags |= RF_tainted;
bbce6d69 2230 /* FALL THROUGH */
d6a28714 2231 case NSPACE:
9442cb0e 2232 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2233 sayNO;
ffc61ed2
JH
2234 if (DO_UTF8(PL_reg_sv)) {
2235 if (OP(scan) == NSPACE
9442cb0e 2236 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2237 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2238 {
2239 sayNO;
2240 }
2241 locinput += PL_utf8skip[nextchr];
2242 nextchr = UCHARAT(locinput);
2243 break;
a0ed51b3 2244 }
ffc61ed2 2245 if (OP(scan) == NSPACE
d6a28714 2246 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2247 sayNO;
76e3520e 2248 nextchr = UCHARAT(++locinput);
a0d0e21e 2249 break;
d6a28714 2250 case DIGITL:
a0ed51b3
LW
2251 PL_reg_flags |= RF_tainted;
2252 /* FALL THROUGH */
d6a28714 2253 case DIGIT:
9442cb0e 2254 if (!nextchr)
a0ed51b3 2255 sayNO;
ffc61ed2
JH
2256 if (DO_UTF8(PL_reg_sv)) {
2257 if (!(OP(scan) == DIGIT
9442cb0e
GS
2258 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2259 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2260 {
a0ed51b3 2261 sayNO;
dfe13c55 2262 }
6f06b55f 2263 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2264 nextchr = UCHARAT(locinput);
2265 break;
2266 }
ffc61ed2 2267 if (!(OP(scan) == DIGIT
9442cb0e 2268 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2269 sayNO;
2270 nextchr = UCHARAT(++locinput);
2271 break;
d6a28714 2272 case NDIGITL:
b8c5462f
JH
2273 PL_reg_flags |= RF_tainted;
2274 /* FALL THROUGH */
d6a28714 2275 case NDIGIT:
9442cb0e 2276 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2277 sayNO;
ffc61ed2
JH
2278 if (DO_UTF8(PL_reg_sv)) {
2279 if (OP(scan) == NDIGIT
9442cb0e
GS
2280 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2281 : isDIGIT_LC_utf8((U8*)locinput))
2282 {
a0ed51b3 2283 sayNO;
9442cb0e 2284 }
6f06b55f 2285 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2286 nextchr = UCHARAT(locinput);
2287 break;
2288 }
ffc61ed2 2289 if (OP(scan) == NDIGIT
9442cb0e 2290 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2291 sayNO;
2292 nextchr = UCHARAT(++locinput);
2293 break;
2294 case CLUMP:
dfe13c55 2295 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2296 sayNO;
6f06b55f 2297 locinput += PL_utf8skip[nextchr];
dfe13c55 2298 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2299 locinput += UTF8SKIP(locinput);
2300 if (locinput > PL_regeol)
2301 sayNO;
2302 nextchr = UCHARAT(locinput);
2303 break;
c8756f30 2304 case REFFL:
3280af22 2305 PL_reg_flags |= RF_tainted;
c8756f30 2306 /* FALL THROUGH */
c277df42 2307 case REF:
c8756f30 2308 case REFF:
c277df42 2309 n = ARG(scan); /* which paren pair */
cf93c79d 2310 ln = PL_regstartp[n];
2c2d71f5 2311 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2312 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2313 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2314 if (ln == PL_regendp[n])
a0d0e21e 2315 break;
a0ed51b3 2316
cf93c79d 2317 s = PL_bostr + ln;
a0ed51b3
LW
2318 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2319 char *l = locinput;
cf93c79d 2320 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2321 /*
2322 * Note that we can't do the "other character" lookup trick as
2323 * in the 8-bit case (no pun intended) because in Unicode we
2324 * have to map both upper and title case to lower case.
2325 */
2326 if (OP(scan) == REFF) {
2327 while (s < e) {
2328 if (l >= PL_regeol)
2329 sayNO;
dfe13c55 2330 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2331 sayNO;
2332 s += UTF8SKIP(s);
2333 l += UTF8SKIP(l);
2334 }
2335 }
2336 else {
2337 while (s < e) {
2338 if (l >= PL_regeol)
2339 sayNO;
dfe13c55 2340 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2341 sayNO;
2342 s += UTF8SKIP(s);
2343 l += UTF8SKIP(l);
2344 }
2345 }
2346 locinput = l;
2347 nextchr = UCHARAT(locinput);
2348 break;
2349 }
2350
a0d0e21e 2351 /* Inline the first character, for speed. */
76e3520e 2352 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2353 (OP(scan) == REF ||
2354 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2355 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2356 sayNO;
cf93c79d 2357 ln = PL_regendp[n] - ln;
3280af22 2358 if (locinput + ln > PL_regeol)
4633a7c4 2359 sayNO;
c8756f30
AK
2360 if (ln > 1 && (OP(scan) == REF
2361 ? memNE(s, locinput, ln)
2362 : (OP(scan) == REFF
2363 ? ibcmp(s, locinput, ln)
2364 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2365 sayNO;
a0d0e21e 2366 locinput += ln;
76e3520e 2367 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2368 break;
2369
2370 case NOTHING:
c277df42 2371 case TAIL:
a0d0e21e
LW
2372 break;
2373 case BACK:
2374 break;
c277df42
IZ
2375 case EVAL:
2376 {
2377 dSP;
533c011a 2378 OP_4tree *oop = PL_op;
3280af22
NIS
2379 COP *ocurcop = PL_curcop;
2380 SV **ocurpad = PL_curpad;
c277df42
IZ
2381 SV *ret;
2382
2383 n = ARG(scan);
533c011a 2384 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2385 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2386 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2387 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2388
cea2e8a9 2389 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2390 SPAGAIN;
2391 ret = POPs;
2392 PUTBACK;
2393
0f5d15d6
IZ
2394 PL_op = oop;
2395 PL_curpad = ocurpad;
2396 PL_curcop = ocurcop;
c277df42 2397 if (logical) {
0f5d15d6
IZ
2398 if (logical == 2) { /* Postponed subexpression. */
2399 regexp *re;
22c35a8c 2400 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2401 re_cc_state state;
0f5d15d6
IZ
2402 CHECKPOINT cp, lastcp;
2403
2404 if(SvROK(ret) || SvRMAGICAL(ret)) {
2405 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2406
2407 if(SvMAGICAL(sv))
2408 mg = mg_find(sv, 'r');
2409 }
2410 if (mg) {
2411 re = (regexp *)mg->mg_obj;
df0003d4 2412 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2413 }
2414 else {
2415 STRLEN len;
2416 char *t = SvPV(ret, len);
2417 PMOP pm;
2418 char *oprecomp = PL_regprecomp;
2419 I32 osize = PL_regsize;
2420 I32 onpar = PL_regnpar;
2421
2422 pm.op_pmflags = 0;
393fec97 2423 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2424 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2425 if (!(SvFLAGS(ret)
2426 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2427 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2428 PL_regprecomp = oprecomp;
2429 PL_regsize = osize;
2430 PL_regnpar = onpar;
2431 }
2432 DEBUG_r(
2433 PerlIO_printf(Perl_debug_log,
2434 "Entering embedded `%s%.60s%s%s'\n",
2435 PL_colors[0],
2436 re->precomp,
2437 PL_colors[1],
2438 (strlen(re->precomp) > 60 ? "..." : ""))
2439 );
2440 state.node = next;
2441 state.prev = PL_reg_call_cc;
2442 state.cc = PL_regcc;
2443 state.re = PL_reg_re;
2444
2ab05381 2445 PL_regcc = 0;
0f5d15d6
IZ
2446
2447 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2448 REGCP_SET(lastcp);
0f5d15d6
IZ
2449 cache_re(re);
2450 state.ss = PL_savestack_ix;
2451 *PL_reglastparen = 0;
2452 PL_reg_call_cc = &state;
2453 PL_reginput = locinput;
2c2d71f5
JH
2454
2455 /* XXXX This is too dramatic a measure... */
2456 PL_reg_maxiter = 0;
2457
0f5d15d6 2458 if (regmatch(re->program + 1)) {
2c914db6
IZ
2459 /* Even though we succeeded, we need to restore
2460 global variables, since we may be wrapped inside
2461 SUSPEND, thus the match may be not finished yet. */
2462
2463 /* XXXX Do this only if SUSPENDed? */
2464 PL_reg_call_cc = state.prev;
2465 PL_regcc = state.cc;
2466 PL_reg_re = state.re;
2467 cache_re(PL_reg_re);
2468
2469 /* XXXX This is too dramatic a measure... */
2470 PL_reg_maxiter = 0;
2471
2472 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2473 ReREFCNT_dec(re);
2474 regcpblow(cp);
2475 sayYES;
2476 }
0f5d15d6 2477 ReREFCNT_dec(re);
02db2b7b 2478 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2479 regcppop();
2480 PL_reg_call_cc = state.prev;
2481 PL_regcc = state.cc;
2482 PL_reg_re = state.re;
d3790889 2483 cache_re(PL_reg_re);
2c2d71f5
JH
2484
2485 /* XXXX This is too dramatic a measure... */
2486 PL_reg_maxiter = 0;
2487
0f5d15d6
IZ
2488 sayNO;
2489 }
c277df42 2490 sw = SvTRUE(ret);
0f5d15d6 2491 logical = 0;
a0ed51b3
LW
2492 }
2493 else
3280af22 2494 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2495 break;
2496 }
a0d0e21e 2497 case OPEN:
c277df42 2498 n = ARG(scan); /* which paren pair */
3280af22
NIS
2499 PL_reg_start_tmp[n] = locinput;
2500 if (n > PL_regsize)
2501 PL_regsize = n;
a0d0e21e
LW
2502 break;
2503 case CLOSE:
c277df42 2504 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2505 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2506 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2507 if (n > *PL_reglastparen)
2508 *PL_reglastparen = n;
a0d0e21e 2509 break;
c277df42
IZ
2510 case GROUPP:
2511 n = ARG(scan); /* which paren pair */
cf93c79d 2512 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2513 break;
2514 case IFTHEN:
2c2d71f5 2515 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2516 if (sw)
2517 next = NEXTOPER(NEXTOPER(scan));
2518 else {
2519 next = scan + ARG(scan);
2520 if (OP(next) == IFTHEN) /* Fake one. */
2521 next = NEXTOPER(NEXTOPER(next));
2522 }
2523 break;
2524 case LOGICAL:
0f5d15d6 2525 logical = scan->flags;
c277df42 2526 break;
2ab05381
IZ
2527/*******************************************************************
2528 PL_regcc contains infoblock about the innermost (...)* loop, and
2529 a pointer to the next outer infoblock.
2530
2531 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2532
2533 1) After matching X, regnode for CURLYX is processed;
2534
2535 2) This regnode creates infoblock on the stack, and calls
2536 regmatch() recursively with the starting point at WHILEM node;
2537
2538 3) Each hit of WHILEM node tries to match A and Z (in the order
2539 depending on the current iteration, min/max of {min,max} and
2540 greediness). The information about where are nodes for "A"
2541 and "Z" is read from the infoblock, as is info on how many times "A"
2542 was already matched, and greediness.
2543
2544 4) After A matches, the same WHILEM node is hit again.
2545
2546 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2547 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2548 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2549 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2550 of the external loop.
2551
2552 Currently present infoblocks form a tree with a stem formed by PL_curcc
2553 and whatever it mentions via ->next, and additional attached trees
2554 corresponding to temporarily unset infoblocks as in "5" above.
2555
2556 In the following picture infoblocks for outer loop of
2557 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2558 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2559 infoblocks are drawn below the "reset" infoblock.
2560
2561 In fact in the picture below we do not show failed matches for Z and T
2562 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2563 more obvious *why* one needs to *temporary* unset infoblocks.]
2564
2565 Matched REx position InfoBlocks Comment
2566 (Y(A)*?Z)*?T x
2567 Y(A)*?Z)*?T x <- O
2568 Y (A)*?Z)*?T x <- O
2569 Y A)*?Z)*?T x <- O <- I
2570 YA )*?Z)*?T x <- O <- I
2571 YA A)*?Z)*?T x <- O <- I
2572 YAA )*?Z)*?T x <- O <- I
2573 YAA Z)*?T x <- O # Temporary unset I
2574 I
2575
2576 YAAZ Y(A)*?Z)*?T x <- O
2577 I
2578
2579 YAAZY (A)*?Z)*?T x <- O
2580 I
2581
2582 YAAZY A)*?Z)*?T x <- O <- I
2583 I
2584
2585 YAAZYA )*?Z)*?T x <- O <- I
2586 I
2587
2588 YAAZYA Z)*?T x <- O # Temporary unset I
2589 I,I
2590
2591 YAAZYAZ )*?T x <- O
2592 I,I
2593
2594 YAAZYAZ T x # Temporary unset O
2595 O
2596 I,I
2597
2598 YAAZYAZT x
2599 O
2600 I,I
2601 *******************************************************************/
a0d0e21e
LW
2602 case CURLYX: {
2603 CURCUR cc;
3280af22 2604 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2605 /* No need to save/restore up to this paren */
2606 I32 parenfloor = scan->flags;
c277df42
IZ
2607
2608 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2609 next += ARG(next);
3280af22
NIS
2610 cc.oldcc = PL_regcc;
2611 PL_regcc = &cc;
cb434fcc
IZ
2612 /* XXXX Probably it is better to teach regpush to support
2613 parenfloor > PL_regsize... */
2614 if (parenfloor > *PL_reglastparen)
2615 parenfloor = *PL_reglastparen; /* Pessimization... */
2616 cc.parenfloor = parenfloor;
a0d0e21e
LW
2617 cc.cur = -1;
2618 cc.min = ARG1(scan);
2619 cc.max = ARG2(scan);
c277df42 2620 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2621 cc.next = next;
2622 cc.minmod = minmod;
2623 cc.lastloc = 0;
3280af22 2624 PL_reginput = locinput;
a0d0e21e
LW
2625 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2626 regcpblow(cp);
3280af22 2627 PL_regcc = cc.oldcc;
4633a7c4 2628 saySAME(n);
a0d0e21e
LW
2629 }
2630 /* NOT REACHED */
2631 case WHILEM: {
2632 /*
2633 * This is really hard to understand, because after we match
2634 * what we're trying to match, we must make sure the rest of
2c2d71f5 2635 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2636 * to go back UP the parse tree by recursing ever deeper. And
2637 * if it fails, we have to reset our parent's current state
2638 * that we can try again after backing off.
2639 */
2640
c277df42 2641 CHECKPOINT cp, lastcp;
3280af22 2642 CURCUR* cc = PL_regcc;
c277df42
IZ
2643 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2644
4633a7c4 2645 n = cc->cur + 1; /* how many we know we matched */
3280af22 2646 PL_reginput = locinput;
a0d0e21e 2647
c277df42
IZ
2648 DEBUG_r(
2649 PerlIO_printf(Perl_debug_log,
2650 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2651 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2652 (long)n, (long)cc->min,
2653 (long)cc->max, (long)cc)
2654 );
4633a7c4 2655
a0d0e21e
LW
2656 /* If degenerate scan matches "", assume scan done. */
2657
579cf2c3 2658 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2659 PL_regcc = cc->oldcc;
2ab05381
IZ
2660 if (PL_regcc)
2661 ln = PL_regcc->cur;
c277df42 2662 DEBUG_r(
c3464db5
DD
2663 PerlIO_printf(Perl_debug_log,
2664 "%*s empty match detected, try continuation...\n",
3280af22 2665 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2666 );
a0d0e21e 2667 if (regmatch(cc->next))
4633a7c4 2668 sayYES;
2ab05381
IZ
2669 if (PL_regcc)
2670 PL_regcc->cur = ln;
3280af22 2671 PL_regcc = cc;
4633a7c4 2672 sayNO;
a0d0e21e
LW
2673 }
2674
2675 /* First just match a string of min scans. */
2676
2677 if (n < cc->min) {
2678 cc->cur = n;
2679 cc->lastloc = locinput;
4633a7c4
LW
2680 if (regmatch(cc->scan))
2681 sayYES;
2682 cc->cur = n - 1;
c277df42 2683 cc->lastloc = lastloc;
4633a7c4 2684 sayNO;
a0d0e21e
LW
2685 }
2686
2c2d71f5
JH
2687 if (scan->flags) {
2688 /* Check whether we already were at this position.
2689 Postpone detection until we know the match is not
2690 *that* much linear. */
2691 if (!PL_reg_maxiter) {
2692 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2693 PL_reg_leftiter = PL_reg_maxiter;
2694 }
2695 if (PL_reg_leftiter-- == 0) {
2696 I32 size = (PL_reg_maxiter + 7)/8;
2697 if (PL_reg_poscache) {
2698 if (PL_reg_poscache_size < size) {
2699 Renew(PL_reg_poscache, size, char);
2700 PL_reg_poscache_size = size;
2701 }
2702 Zero(PL_reg_poscache, size, char);
2703 }
2704 else {
2705 PL_reg_poscache_size = size;
2706 Newz(29, PL_reg_poscache, size, char);
2707 }
2708 DEBUG_r(
2709 PerlIO_printf(Perl_debug_log,
2710 "%sDetected a super-linear match, switching on caching%s...\n",
2711 PL_colors[4], PL_colors[5])
2712 );
2713 }
2714 if (PL_reg_leftiter < 0) {
2715 I32 o = locinput - PL_bostr, b;
2716
2717 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2718 b = o % 8;
2719 o /= 8;
2720 if (PL_reg_poscache[o] & (1<<b)) {
2721 DEBUG_r(
2722 PerlIO_printf(Perl_debug_log,
2723 "%*s already tried at this position...\n",
2724 REPORT_CODE_OFF+PL_regindent*2, "")
2725 );
7821416a 2726 sayNO_SILENT;
2c2d71f5
JH
2727 }
2728 PL_reg_poscache[o] |= (1<<b);
2729 }
2730 }
2731
a0d0e21e
LW
2732 /* Prefer next over scan for minimal matching. */
2733
2734 if (cc->minmod) {
3280af22 2735 PL_regcc = cc->oldcc;
2ab05381
IZ
2736 if (PL_regcc)
2737 ln = PL_regcc->cur;
5f05dabc 2738 cp = regcppush(cc->parenfloor);
02db2b7b 2739 REGCP_SET(lastcp);
5f05dabc 2740 if (regmatch(cc->next)) {
c277df42 2741 regcpblow(cp);
4633a7c4 2742 sayYES; /* All done. */
5f05dabc 2743 }
02db2b7b 2744 REGCP_UNWIND(lastcp);
5f05dabc 2745 regcppop();
2ab05381
IZ
2746 if (PL_regcc)
2747 PL_regcc->cur = ln;
3280af22 2748 PL_regcc = cc;
a0d0e21e 2749
c277df42 2750 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2751 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2752 && !(PL_reg_flags & RF_warned)) {
2753 PL_reg_flags |= RF_warned;
e476b1b5 2754 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2755 "Complex regular subexpression recursion",
2756 REG_INFTY - 1);
c277df42 2757 }
4633a7c4 2758 sayNO;
c277df42 2759 }
a687059c 2760
c277df42 2761 DEBUG_r(
c3464db5
DD
2762 PerlIO_printf(Perl_debug_log,
2763 "%*s trying longer...\n",
3280af22 2764 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2765 );
a0d0e21e 2766 /* Try scanning more and see if it helps. */
3280af22 2767 PL_reginput = locinput;
a0d0e21e
LW
2768 cc->cur = n;
2769 cc->lastloc = locinput;
5f05dabc 2770 cp = regcppush(cc->parenfloor);
02db2b7b 2771 REGCP_SET(lastcp);
5f05dabc 2772 if (regmatch(cc->scan)) {
c277df42 2773 regcpblow(cp);
4633a7c4 2774 sayYES;
5f05dabc 2775 }
02db2b7b 2776 REGCP_UNWIND(lastcp);
5f05dabc 2777 regcppop();
4633a7c4 2778 cc->cur = n - 1;
c277df42 2779 cc->lastloc = lastloc;
4633a7c4 2780 sayNO;
a0d0e21e
LW
2781 }
2782
2783 /* Prefer scan over next for maximal matching. */
2784
2785 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2786 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2787 cc->cur = n;
2788 cc->lastloc = locinput;
02db2b7b 2789 REGCP_SET(lastcp);
5f05dabc 2790 if (regmatch(cc->scan)) {
c277df42 2791 regcpblow(cp);
4633a7c4 2792 sayYES;
5f05dabc 2793 }
02db2b7b 2794 REGCP_UNWIND(lastcp);
a0d0e21e 2795 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2796 PL_reginput = locinput;
c277df42 2797 DEBUG_r(
c3464db5
DD
2798 PerlIO_printf(Perl_debug_log,
2799 "%*s failed, try continuation...\n",
3280af22 2800 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2801 );
2802 }
e476b1b5 2803 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2804 && !(PL_reg_flags & RF_warned)) {
3280af22 2805 PL_reg_flags |= RF_warned;
e476b1b5 2806 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2807 "Complex regular subexpression recursion",
2808 REG_INFTY - 1);
a0d0e21e
LW
2809 }
2810
2811 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2812 PL_regcc = cc->oldcc;
2ab05381
IZ
2813 if (PL_regcc)
2814 ln = PL_regcc->cur;
a0d0e21e 2815 if (regmatch(cc->next))
4633a7c4 2816 sayYES;
2ab05381
IZ
2817 if (PL_regcc)
2818 PL_regcc->cur = ln;
3280af22 2819 PL_regcc = cc;
4633a7c4 2820 cc->cur = n - 1;
c277df42 2821 cc->lastloc = lastloc;
4633a7c4 2822 sayNO;
a0d0e21e
LW
2823 }
2824 /* NOT REACHED */
c277df42
IZ
2825 case BRANCHJ:
2826 next = scan + ARG(scan);
2827 if (next == scan)
2828 next = NULL;
2829 inner = NEXTOPER(NEXTOPER(scan));
2830 goto do_branch;
2831 case BRANCH:
2832 inner = NEXTOPER(scan);
2833 do_branch:
2834 {
2835 CHECKPOINT lastcp;
2836 c1 = OP(scan);
2837 if (OP(next) != c1) /* No choice. */
2838 next = inner; /* Avoid recursion. */
a0d0e21e 2839 else {
02db2b7b
IZ
2840 I32 lastparen = *PL_reglastparen;
2841 I32 unwind1;
2842 re_unwind_branch_t *uw;
2843
2844 /* Put unwinding data on stack */
2845 unwind1 = SSNEWt(1,re_unwind_branch_t);
2846 uw = SSPTRt(unwind1,re_unwind_branch_t);
2847 uw->prev = unwind;
2848 unwind = unwind1;
2849 uw->type = ((c1 == BRANCH)
2850 ? RE_UNWIND_BRANCH
2851 : RE_UNWIND_BRANCHJ);
2852 uw->lastparen = lastparen;
2853 uw->next = next;
2854 uw->locinput = locinput;
2855 uw->nextchr = nextchr;
2856#ifdef DEBUGGING
2857 uw->regindent = ++PL_regindent;
2858#endif
c277df42 2859
02db2b7b
IZ
2860 REGCP_SET(uw->lastcp);
2861
2862 /* Now go into the first branch */
2863 next = inner;
a687059c 2864 }
a0d0e21e
LW
2865 }
2866 break;
2867 case MINMOD:
2868 minmod = 1;
2869 break;
c277df42
IZ
2870 case CURLYM:
2871 {
00db4c45 2872 I32 l = 0;
c277df42
IZ
2873 CHECKPOINT lastcp;
2874
2875 /* We suppose that the next guy does not need
2876 backtracking: in particular, it is of constant length,
2877 and has no parenths to influence future backrefs. */
2878 ln = ARG1(scan); /* min to match */
2879 n = ARG2(scan); /* max to match */
c277df42
IZ
2880 paren = scan->flags;
2881 if (paren) {
3280af22
NIS
2882 if (paren > PL_regsize)
2883 PL_regsize = paren;
2884 if (paren > *PL_reglastparen)
2885 *PL_reglastparen = paren;
c277df42 2886 }
dc45a647 2887 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2888 if (paren)
2889 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2890 PL_reginput = locinput;
c277df42
IZ
2891 if (minmod) {
2892 minmod = 0;
2893 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2894 sayNO;
5f4b28b2 2895 if (ln && l == 0 && n >= ln
c277df42
IZ
2896 /* In fact, this is tricky. If paren, then the
2897 fact that we did/didnot match may influence
2898 future execution. */
2899 && !(paren && ln == 0))
2900 ln = n;
3280af22 2901 locinput = PL_reginput;
22c35a8c 2902 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2903 c1 = (U8)*STRING(next);
c277df42 2904 if (OP(next) == EXACTF)
22c35a8c 2905 c2 = PL_fold[c1];
c277df42 2906 else if (OP(next) == EXACTFL)
22c35a8c 2907 c2 = PL_fold_locale[c1];
c277df42
IZ
2908 else
2909 c2 = c1;
a0ed51b3
LW
2910 }
2911 else
c277df42 2912 c1 = c2 = -1000;
02db2b7b 2913 REGCP_SET(lastcp);
5f4b28b2 2914 /* This may be improved if l == 0. */
c277df42
IZ
2915 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2916 /* If it could work, try it. */
2917 if (c1 == -1000 ||
3280af22
NIS
2918 UCHARAT(PL_reginput) == c1 ||
2919 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2920 {
2921 if (paren) {
2922 if (n) {
cf93c79d
IZ
2923 PL_regstartp[paren] =
2924 HOPc(PL_reginput, -l) - PL_bostr;
2925 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2926 }
2927 else
cf93c79d 2928 PL_regendp[paren] = -1;
c277df42
IZ
2929 }
2930 if (regmatch(next))
2931 sayYES;
02db2b7b 2932 REGCP_UNWIND(lastcp);
c277df42
IZ
2933 }
2934 /* Couldn't or didn't -- move forward. */
3280af22 2935 PL_reginput = locinput;
c277df42
IZ
2936 if (regrepeat_hard(scan, 1, &l)) {
2937 ln++;
3280af22 2938 locinput = PL_reginput;
c277df42
IZ
2939 }
2940 else
2941 sayNO;
2942 }
a0ed51b3
LW
2943 }
2944 else {
c277df42
IZ
2945 n = regrepeat_hard(scan, n, &l);
2946 if (n != 0 && l == 0
2947 /* In fact, this is tricky. If paren, then the
2948 fact that we did/didnot match may influence
2949 future execution. */
2950 && !(paren && ln == 0))
2951 ln = n;
3280af22 2952 locinput = PL_reginput;
c277df42 2953 DEBUG_r(
5c0ca799 2954 PerlIO_printf(Perl_debug_log,
faccc32b 2955 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2956 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2957 (IV) n, (IV)l)
c277df42
IZ
2958 );
2959 if (n >= ln) {
22c35a8c 2960 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2961 c1 = (U8)*STRING(next);
c277df42 2962 if (OP(next) == EXACTF)
22c35a8c 2963 c2 = PL_fold[c1];
c277df42 2964 else if (OP(next) == EXACTFL)
22c35a8c 2965 c2 = PL_fold_locale[c1];
c277df42
IZ
2966 else
2967 c2 = c1;
a0ed51b3
LW
2968 }
2969 else
c277df42
IZ
2970 c1 = c2 = -1000;
2971 }
02db2b7b 2972 REGCP_SET(lastcp);
c277df42
IZ
2973 while (n >= ln) {
2974 /* If it could work, try it. */
2975 if (c1 == -1000 ||
3280af22
NIS
2976 UCHARAT(PL_reginput) == c1 ||
2977 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2978 {
2979 DEBUG_r(
c3464db5 2980 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2981 "%*s trying tail with n=%"IVdf"...\n",
2982 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2983 );
2984 if (paren) {
2985 if (n) {
cf93c79d
IZ
2986 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2987 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2988 }
a0ed51b3 2989 else
cf93c79d 2990 PL_regendp[paren] = -1;
c277df42 2991 }
a0ed51b3
LW
2992 if (regmatch(next))
2993 sayYES;
02db2b7b 2994 REGCP_UNWIND(lastcp);
a0ed51b3 2995 }
c277df42
IZ
2996 /* Couldn't or didn't -- back up. */
2997 n--;
dfe13c55 2998 locinput = HOPc(locinput, -l);
3280af22 2999 PL_reginput = locinput;
c277df42
IZ
3000 }
3001 }
3002 sayNO;
3003 break;
3004 }
3005 case CURLYN:
3006 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3007 if (paren > PL_regsize)
3008 PL_regsize = paren;
3009 if (paren > *PL_reglastparen)
3010 *PL_reglastparen = paren;
c277df42
IZ
3011 ln = ARG1(scan); /* min to match */
3012 n = ARG2(scan); /* max to match */
dc45a647 3013 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3014 goto repeat;
a0d0e21e 3015 case CURLY:
c277df42 3016 paren = 0;
a0d0e21e
LW
3017 ln = ARG1(scan); /* min to match */
3018 n = ARG2(scan); /* max to match */
dc45a647 3019 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3020 goto repeat;
3021 case STAR:
3022 ln = 0;
c277df42 3023 n = REG_INFTY;
a0d0e21e 3024 scan = NEXTOPER(scan);
c277df42 3025 paren = 0;
a0d0e21e
LW
3026 goto repeat;
3027 case PLUS:
c277df42
IZ
3028 ln = 1;
3029 n = REG_INFTY;
3030 scan = NEXTOPER(scan);
3031 paren = 0;
3032 repeat:
a0d0e21e
LW
3033 /*
3034 * Lookahead to avoid useless match attempts
3035 * when we know what character comes next.
3036 */
22c35a8c 3037 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3038 c1 = (U8)*STRING(next);
bbce6d69 3039 if (OP(next) == EXACTF)
22c35a8c 3040 c2 = PL_fold[c1];
bbce6d69 3041 else if (OP(next) == EXACTFL)
22c35a8c 3042 c2 = PL_fold_locale[c1];
bbce6d69 3043 else
3044 c2 = c1;
3045 }
a0d0e21e 3046 else
bbce6d69 3047 c1 = c2 = -1000;
3280af22 3048 PL_reginput = locinput;
a0d0e21e 3049 if (minmod) {
c277df42 3050 CHECKPOINT lastcp;
a0d0e21e
LW
3051 minmod = 0;
3052 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3053 sayNO;
a0ed51b3 3054 locinput = PL_reginput;
02db2b7b 3055 REGCP_SET(lastcp);
0fe9bf95
IZ
3056 if (c1 != -1000) {
3057 char *e = locinput + n - ln; /* Should not check after this */
3058 char *old = locinput;
3059
3060 if (e >= PL_regeol || (n == REG_INFTY))
3061 e = PL_regeol - 1;
3062 while (1) {
3063 /* Find place 'next' could work */
3064 if (c1 == c2) {
3065 while (locinput <= e && *locinput != c1)
3066 locinput++;
3067 } else {
3068 while (locinput <= e
3069 && *locinput != c1
3070 && *locinput != c2)
3071 locinput++;
3072 }
3073 if (locinput > e)
3074 sayNO;
3075 /* PL_reginput == old now */
3076 if (locinput != old) {
3077 ln = 1; /* Did some */
3078 if (regrepeat(scan, locinput - old) <
3079 locinput - old)
3080 sayNO;
3081 }
3082 /* PL_reginput == locinput now */
29d1e993 3083 TRYPAREN(paren, ln, locinput);
0fe9bf95 3084 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3085 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3086 /* Couldn't or didn't -- move forward. */
3087 old = locinput++;
3088 }
3089 }
3090 else
c277df42 3091 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3092 /* If it could work, try it. */
bbce6d69 3093 if (c1 == -1000 ||
3280af22
NIS
3094 UCHARAT(PL_reginput) == c1 ||
3095 UCHARAT(PL_reginput) == c2)
bbce6d69 3096 {
29d1e993 3097 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3098 REGCP_UNWIND(lastcp);
bbce6d69 3099 }
c277df42 3100 /* Couldn't or didn't -- move forward. */
a0ed51b3 3101 PL_reginput = locinput;
a0d0e21e
LW
3102 if (regrepeat(scan, 1)) {
3103 ln++;
a0ed51b3
LW
3104 locinput = PL_reginput;
3105 }
3106 else
4633a7c4 3107 sayNO;
a0d0e21e
LW
3108 }
3109 }
3110 else {
c277df42 3111 CHECKPOINT lastcp;
a0d0e21e 3112 n = regrepeat(scan, n);
a0ed51b3 3113 locinput = PL_reginput;
22c35a8c 3114 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3115 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3116 ln = n; /* why back off? */
1aeab75a
GS
3117 /* ...because $ and \Z can match before *and* after
3118 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3119 We should back off by one in this case. */
3120 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3121 ln--;
3122 }
02db2b7b 3123 REGCP_SET(lastcp);
c277df42
IZ
3124 if (paren) {
3125 while (n >= ln) {
3126 /* If it could work, try it. */
3127 if (c1 == -1000 ||
3280af22
NIS
3128 UCHARAT(PL_reginput) == c1 ||
3129 UCHARAT(PL_reginput) == c2)
c277df42 3130 {
29d1e993 3131 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3132 REGCP_UNWIND(lastcp);
c277df42
IZ
3133 }
3134 /* Couldn't or didn't -- back up. */
3135 n--;
dfe13c55 3136 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3137 }
a0ed51b3
LW
3138 }
3139 else {
c277df42
IZ
3140 while (n >= ln) {
3141 /* If it could work, try it. */
3142 if (c1 == -1000 ||
3280af22
NIS
3143 UCHARAT(PL_reginput) == c1 ||
3144 UCHARAT(PL_reginput) == c2)
c277df42 3145 {
29d1e993 3146 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3147 REGCP_UNWIND(lastcp);
c277df42
IZ
3148 }
3149 /* Couldn't or didn't -- back up. */
3150 n--;
dfe13c55 3151 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3152 }
a0d0e21e
LW
3153 }
3154 }
4633a7c4 3155 sayNO;
c277df42 3156 break;
a0d0e21e 3157 case END:
0f5d15d6
IZ
3158 if (PL_reg_call_cc) {
3159 re_cc_state *cur_call_cc = PL_reg_call_cc;
3160 CURCUR *cctmp = PL_regcc;
3161 regexp *re = PL_reg_re;
3162 CHECKPOINT cp, lastcp;
3163
3164 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3165 REGCP_SET(lastcp);
0f5d15d6
IZ
3166 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3167 the caller. */
3168 PL_reginput = locinput; /* Make position available to
3169 the callcc. */
3170 cache_re(PL_reg_call_cc->re);
3171 PL_regcc = PL_reg_call_cc->cc;
3172 PL_reg_call_cc = PL_reg_call_cc->prev;
3173 if (regmatch(cur_call_cc->node)) {
3174 PL_reg_call_cc = cur_call_cc;
3175 regcpblow(cp);
3176 sayYES;
3177 }
02db2b7b 3178 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3179 regcppop();
3180 PL_reg_call_cc = cur_call_cc;
3181 PL_regcc = cctmp;
3182 PL_reg_re = re;
3183 cache_re(re);
3184
3185 DEBUG_r(
3186 PerlIO_printf(Perl_debug_log,
3187 "%*s continuation failed...\n",
3188 REPORT_CODE_OFF+PL_regindent*2, "")
3189 );
7821416a 3190 sayNO_SILENT;
0f5d15d6 3191 }
7821416a
IZ
3192 if (locinput < PL_regtill) {
3193 DEBUG_r(PerlIO_printf(Perl_debug_log,
3194 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3195 PL_colors[4],
3196 (long)(locinput - PL_reg_starttry),
3197 (long)(PL_regtill - PL_reg_starttry),
3198 PL_colors[5]));
3199 sayNO_FINAL; /* Cannot match: too short. */
3200 }
3201 PL_reginput = locinput; /* put where regtry can find it */
3202 sayYES_FINAL; /* Success! */
7e5428c5 3203 case SUCCEED:
3280af22 3204 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3205 sayYES_LOUD; /* Success! */
c277df42
IZ
3206 case SUSPEND:
3207 n = 1;
9fe1d20c 3208 PL_reginput = locinput;
c277df42 3209 goto do_ifmatch;
a0d0e21e 3210 case UNLESSM:
c277df42 3211 n = 0;
a0ed51b3 3212 if (scan->flags) {
0fe9bf95
IZ
3213 if (UTF) { /* XXXX This is absolutely
3214 broken, we read before
3215 start of string. */
3216 s = HOPMAYBEc(locinput, -scan->flags);
3217 if (!s)
3218 goto say_yes;
3219 PL_reginput = s;
3220 }
3221 else {
3222 if (locinput < PL_bostr + scan->flags)
3223 goto say_yes;
3224 PL_reginput = locinput - scan->flags;
3225 goto do_ifmatch;
3226 }
a0ed51b3
LW
3227 }
3228 else
3229 PL_reginput = locinput;
c277df42
IZ
3230 goto do_ifmatch;
3231 case IFMATCH:
3232 n = 1;
a0ed51b3 3233 if (scan->flags) {
0fe9bf95
IZ
3234 if (UTF) { /* XXXX This is absolutely
3235 broken, we read before
3236 start of string. */
3237 s = HOPMAYBEc(locinput, -scan->flags);
3238 if (!s || s < PL_bostr)
3239 goto say_no;
3240 PL_reginput = s;
3241 }
3242 else {
3243 if (locinput < PL_bostr + scan->flags)
3244 goto say_no;
3245 PL_reginput = locinput - scan->flags;
3246 goto do_ifmatch;
3247 }
a0ed51b3
LW
3248 }
3249 else
3250 PL_reginput = locinput;
3251
c277df42 3252 do_ifmatch:
c277df42
IZ
3253 inner = NEXTOPER(NEXTOPER(scan));
3254 if (regmatch(inner) != n) {
3255 say_no:
3256 if (logical) {
3257 logical = 0;
3258 sw = 0;
3259 goto do_longjump;
a0ed51b3
LW
3260 }
3261 else
c277df42
IZ
3262 sayNO;
3263 }
3264 say_yes:
3265 if (logical) {
3266 logical = 0;
3267 sw = 1;
3268 }
fe44a5e8 3269 if (OP(scan) == SUSPEND) {
3280af22 3270 locinput = PL_reginput;
565764a8 3271 nextchr = UCHARAT(locinput);
fe44a5e8 3272 }
c277df42
IZ
3273 /* FALL THROUGH. */
3274 case LONGJMP:
3275 do_longjump:
3276 next = scan + ARG(scan);
3277 if (next == scan)
3278 next = NULL;
a0d0e21e
LW
3279 break;
3280 default:
b900a521 3281 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3282 PTR2UV(scan), OP(scan));
cea2e8a9 3283 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3284 }
02db2b7b 3285 reenter:
a0d0e21e
LW
3286 scan = next;
3287 }
a687059c 3288
a0d0e21e
LW
3289 /*
3290 * We get here only if there's trouble -- normally "case END" is
3291 * the terminating point.
3292 */
cea2e8a9 3293 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3294 /*NOTREACHED*/
4633a7c4
LW
3295 sayNO;
3296
7821416a
IZ
3297yes_loud:
3298 DEBUG_r(
3299 PerlIO_printf(Perl_debug_log,
3300 "%*s %scould match...%s\n",
3301 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3302 );
3303 goto yes;
3304yes_final:
3305 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3306 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3307yes:
3308#ifdef DEBUGGING
3280af22 3309 PL_regindent--;
4633a7c4 3310#endif
02db2b7b
IZ
3311
3312#if 0 /* Breaks $^R */
3313 if (unwind)
3314 regcpblow(firstcp);
3315#endif
4633a7c4
LW
3316 return 1;
3317
3318no:
7821416a
IZ
3319 DEBUG_r(
3320 PerlIO_printf(Perl_debug_log,
3321 "%*s %sfailed...%s\n",
3322 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3323 );
3324 goto do_no;
3325no_final:
3326do_no:
02db2b7b
IZ
3327 if (unwind) {
3328 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3329
3330 switch (uw->type) {
3331 case RE_UNWIND_BRANCH:
3332 case RE_UNWIND_BRANCHJ:
3333 {
3334 re_unwind_branch_t *uwb = &(uw->branch);
3335 I32 lastparen = uwb->lastparen;
3336
3337 REGCP_UNWIND(uwb->lastcp);
3338 for (n = *PL_reglastparen; n > lastparen; n--)
3339 PL_regendp[n] = -1;
3340 *PL_reglastparen = n;
3341 scan = next = uwb->next;
3342 if ( !scan ||
3343 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3344 ? BRANCH : BRANCHJ) ) { /* Failure */
3345 unwind = uwb->prev;
3346#ifdef DEBUGGING
3347 PL_regindent--;
3348#endif
3349 goto do_no;
3350 }
3351 /* Have more choice yet. Reuse the same uwb. */
3352 /*SUPPRESS 560*/
3353 if ((n = (uwb->type == RE_UNWIND_BRANCH
3354 ? NEXT_OFF(next) : ARG(next))))
3355 next += n;
3356 else
3357 next = NULL; /* XXXX Needn't unwinding in this case... */
3358 uwb->next = next;
3359 next = NEXTOPER(scan);
3360 if (uwb->type == RE_UNWIND_BRANCHJ)
3361 next = NEXTOPER(next);
3362 locinput = uwb->locinput;
3363 nextchr = uwb->nextchr;
3364#ifdef DEBUGGING
3365 PL_regindent = uwb->regindent;
3366#endif
3367
3368 goto reenter;
3369 }
3370 /* NOT REACHED */
3371 default:
3372 Perl_croak(aTHX_ "regexp unwind memory corruption");
3373 }
3374 /* NOT REACHED */
3375 }
4633a7c4 3376#ifdef DEBUGGING
3280af22 3377 PL_regindent--;
4633a7c4 3378#endif
a0d0e21e 3379 return 0;
a687059c
LW
3380}
3381
3382/*
3383 - regrepeat - repeatedly match something simple, report how many
3384 */
3385/*
3386 * [This routine now assumes that it will only match on things of length 1.
3387 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3388 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3389 */
76e3520e 3390STATIC I32
cea2e8a9 3391S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3392{
a0d0e21e 3393 register char *scan;
a0d0e21e 3394 register I32 c;
3280af22 3395 register char *loceol = PL_regeol;
a0ed51b3 3396 register I32 hardcount = 0;
ffc61ed2 3397 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3398
3280af22 3399 scan = PL_reginput;
c277df42 3400 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3401 loceol = scan + max;
a0d0e21e 3402 switch (OP(p)) {
22c35a8c 3403 case REG_ANY:
ffc61ed2
JH
3404 if (DO_UTF8(PL_reg_sv)) {
3405 loceol = PL_regeol;
3406 while (scan < loceol && *scan != '\n') {
3407 scan += UTF8SKIP(scan);
3408 hardcount++;
3409 }
3410 } else {
3411 while (scan < loceol && *scan != '\n')
3412 scan++;
a0ed51b3
LW
3413 }
3414 break;
ffc61ed2
JH
3415 case SANY:
3416 if (DO_UTF8(PL_reg_sv)) {
3417 loceol = PL_regeol;
3418 while (scan < loceol) {
3419 scan += UTF8SKIP(scan);
3420 hardcount++;
3421 }
3422 } else {
3423 scan = loceol;
a0ed51b3
LW
3424 }
3425 break;
bbce6d69 3426 case EXACT: /* length of string is 1 */
cd439c50 3427 c = (U8)*STRING(p);
bbce6d69 3428 while (scan < loceol && UCHARAT(scan) == c)
3429 scan++;
3430 break;
3431 case EXACTF: /* length of string is 1 */
cd439c50 3432 c = (U8)*STRING(p);
bbce6d69 3433 while (scan < loceol &&
22c35a8c 3434 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3435 scan++;
3436 break;
3437 case EXACTFL: /* length of string is 1 */
3280af22 3438 PL_reg_flags |= RF_tainted;
cd439c50 3439 c = (U8)*STRING(p);
bbce6d69 3440 while (scan < loceol &&
22c35a8c 3441 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3442 scan++;
3443 break;
3444 case ANYOF:
ffc61ed2
JH
3445 if (do_utf8) {
3446 loceol = PL_regeol;
3447 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8)) {
3448 scan += UTF8SKIP(scan);
3449 hardcount++;
3450 }
3451 } else {
3452 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3453 scan++;
3454 }
a0d0e21e
LW
3455 break;
3456 case ALNUM:
ffc61ed2
JH
3457 if (DO_UTF8(PL_reg_sv)) {
3458 loceol = PL_regeol;
3459 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3460 scan += UTF8SKIP(scan);
3461 hardcount++;
3462 }
3463 } else {
3464 while (scan < loceol && isALNUM(*scan))
3465 scan++;
a0ed51b3
LW
3466 }
3467 break;
bbce6d69 3468 case ALNUML:
3280af22 3469 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3470 if (DO_UTF8(PL_reg_sv)) {
3471 loceol = PL_regeol;
3472 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3473 scan += UTF8SKIP(scan);
3474 hardcount++;
3475 }
3476 } else {
3477 while (scan < loceol && isALNUM_LC(*scan))
3478 scan++;
a0ed51b3
LW
3479 }
3480 break;
a0d0e21e 3481 case NALNUM:
ffc61ed2
JH
3482 if (DO_UTF8(PL_reg_sv)) {
3483 loceol = PL_regeol;
3484 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3485 scan += UTF8SKIP(scan);
3486 hardcount++;
3487 }
3488 } else {
3489 while (scan < loceol && !isALNUM(*scan))
3490 scan++;
a0ed51b3
LW
3491 }
3492 break;
bbce6d69 3493 case NALNUML:
3280af22 3494 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3495 if (DO_UTF8(PL_reg_sv)) {
3496 loceol = PL_regeol;
3497 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3498 scan += UTF8SKIP(scan);
3499 hardcount++;
3500 }
3501 } else {
3502 while (scan < loceol && !isALNUM_LC(*scan))
3503 scan++;
a0ed51b3
LW
3504 }
3505 break;
a0d0e21e 3506 case SPACE:
ffc61ed2
JH
3507 if (DO_UTF8(PL_reg_sv)) {
3508 loceol = PL_regeol;
3509 while (scan < loceol &&
3510 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3511 scan += UTF8SKIP(scan);
3512 hardcount++;
3513 }
3514 } else {
3515 while (scan < loceol && isSPACE(*scan))
3516 scan++;
a0ed51b3
LW
3517 }
3518 break;
bbce6d69 3519 case SPACEL:
3280af22 3520 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3521 if (DO_UTF8(PL_reg_sv)) {
3522 loceol = PL_regeol;
3523 while (scan < loceol &&
3524 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3525 scan += UTF8SKIP(scan);
3526 hardcount++;
3527 }
3528 } else {
3529 while (scan < loceol && isSPACE_LC(*scan))
3530 scan++;
a0ed51b3
LW
3531 }
3532 break;
a0d0e21e 3533 case NSPACE:
ffc61ed2
JH
3534 if (DO_UTF8(PL_reg_sv)) {
3535 loceol = PL_regeol;
3536 while (scan < loceol &&
3537 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3538 scan += UTF8SKIP(scan);
3539 hardcount++;
3540 }
3541 } else {
3542 while (scan < loceol && !isSPACE(*scan))
3543 scan++;
3544 break;
a0ed51b3 3545 }
bbce6d69 3546 case NSPACEL:
3280af22 3547 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3548 if (DO_UTF8(PL_reg_sv)) {
3549 loceol = PL_regeol;
3550 while (scan < loceol &&
3551 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3552 scan += UTF8SKIP(scan);
3553 hardcount++;
3554 }
3555 } else {
3556 while (scan < loceol && !isSPACE_LC(*scan))
3557 scan++;
a0ed51b3
LW
3558 }
3559 break;
a0d0e21e 3560 case DIGIT:
ffc61ed2
JH
3561 if (DO_UTF8(PL_reg_sv)) {
3562 loceol = PL_regeol;
3563 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3564 scan += UTF8SKIP(scan);
3565 hardcount++;
3566 }
3567 } else {
3568 while (scan < loceol && isDIGIT(*scan))
3569 scan++;
a0ed51b3
LW
3570 }
3571 break;
a0d0e21e 3572 case NDIGIT:
ffc61ed2
JH
3573 if (DO_UTF8(PL_reg_sv)) {
3574 loceol = PL_regeol;
3575 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3576 scan += UTF8SKIP(scan);
3577 hardcount++;
3578 }
3579 } else {
3580 while (scan < loceol && !isDIGIT(*scan))
3581 scan++;
a0ed51b3
LW
3582 }
3583 break;
a0d0e21e
LW
3584 default: /* Called on something of 0 width. */
3585 break; /* So match right here or not at all. */
3586 }
a687059c 3587
a0ed51b3
LW
3588 if (hardcount)
3589 c = hardcount;
3590 else
3591 c = scan - PL_reginput;
3280af22 3592 PL_reginput = scan;
a687059c 3593
c277df42
IZ
3594 DEBUG_r(
3595 {
3596 SV *prop = sv_newmortal();
3597
3598 regprop(prop, p);
3599 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3600 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3601 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3602 });
3603
a0d0e21e 3604 return(c);
a687059c
LW
3605}
3606
3607/*
c277df42
IZ
3608 - regrepeat_hard - repeatedly match something, report total lenth and length
3609 *
3610 * The repeater is supposed to have constant length.
3611 */
3612
76e3520e 3613STATIC I32
cea2e8a9 3614S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3615{
3616 register char *scan;
3617 register char *start;
3280af22 3618 register char *loceol = PL_regeol;
a0ed51b3 3619 I32 l = 0;
708e3b05 3620 I32 count = 0, res = 1;
a0ed51b3
LW
3621
3622 if (!max)
3623 return 0;
c277df42 3624
3280af22 3625 start = PL_reginput;
a0ed51b3 3626 if (UTF) {
708e3b05 3627 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3628 if (!count++) {
3629 l = 0;
3630 while (start < PL_reginput) {
3631 l++;
3632 start += UTF8SKIP(start);
3633 }
3634 *lp = l;
3635 if (l == 0)
3636 return max;
3637 }
3638 if (count == max)
3639 return count;
3640 }
3641 }
3642 else {
708e3b05 3643 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3644 if (!count++) {
3645 *lp = l = PL_reginput - start;
3646 if (max != REG_INFTY && l*max < loceol - scan)
3647 loceol = scan + l*max;
3648 if (l == 0)
3649 return max;
c277df42
IZ
3650 }
3651 }
3652 }
708e3b05 3653 if (!res)
3280af22 3654 PL_reginput = scan;
c277df42 3655
a0ed51b3 3656 return count;
c277df42
IZ
3657}
3658
3659/*
ffc61ed2
JH
3660- regclass_swash - prepare the utf8 swash
3661*/
3662
3663SV *
3664Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3665{
3666 SV *sw = NULL;
3667 SV *si = NULL;
3668
3669 if (PL_regdata && PL_regdata->count) {
3670 U32 n = ARG(node);
3671
3672 if (PL_regdata->what[n] == 's') {
3673 SV *rv = (SV*)PL_regdata->data[n];
3674 AV *av = (AV*)SvRV((SV*)rv);
3675 SV **a;
3676
3677 si = *av_fetch(av, 0, FALSE);
3678 a = av_fetch(av, 1, FALSE);
3679
3680 if (a)
3681 sw = *a;
3682 else if (si && doinit) {
3683 sw = swash_init("utf8", "", si, 1, 0);
3684 (void)av_store(av, 1, sw);
3685 }
3686 }
3687 }
3688
3689 if (initsvp)
3690 *initsvp = si;
3691
3692 return sw;
3693}
3694
3695/*
cb8d8820 3696 - reginclass - determine if a character falls into a character class
bbce6d69 3697 */
3698
76e3520e 3699STATIC bool
ffc61ed2 3700S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3701{
ffc61ed2 3702 char flags = ANYOF_FLAGS(n);
bbce6d69 3703 bool match = FALSE;
3704
ffc61ed2
JH
3705 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3706 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3707 STRLEN len;
3708 UV c = utf8_to_uv_simple(p, &len);
3709
3710 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3711 match = TRUE;
bbce6d69 3712 }
bbce6d69 3713
ffc61ed2
JH
3714 if (!match) {
3715 SV *sw = regclass_swash(n, TRUE, 0);
3716
3717 if (sw) {
3718 if (swash_fetch(sw, p))
3719 match = TRUE;
3720 else if (flags & ANYOF_FOLD) {
3721 U8 tmpbuf[UTF8_MAXLEN+1];
3722
3723 if (flags & ANYOF_LOCALE) {
3724 PL_reg_flags |= RF_tainted;
3725 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3726 }
3727 else
3728 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3729 if (swash_fetch(sw, tmpbuf))
3730 match = TRUE;
3731 }
3732 }
bbce6d69 3733 }
3734 }
ffc61ed2
JH
3735 else {
3736 U8 c = *p;
bbce6d69 3737
ffc61ed2
JH
3738 if (ANYOF_BITMAP_TEST(n, c))
3739 match = TRUE;
3740 else if (flags & ANYOF_FOLD) {
3741 I32 f;
a0ed51b3 3742
ffc61ed2
JH
3743 if (flags & ANYOF_LOCALE) {
3744 PL_reg_flags |= RF_tainted;
3745 f = PL_fold_locale[c];
3746 }
3747 else
3748 f = PL_fold[c];
3749 if (f != c && ANYOF_BITMAP_TEST(n, f))
3750 match = TRUE;
3751 }
3752
3753 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3754 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3755 if (
3756 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3757 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3758 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3759 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3760 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3762 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3763 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3764 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3765 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3766 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3767 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3768 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3769 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3770 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3771 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3772 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3773 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3774 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3775 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3776 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3777 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3778 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3779 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3780 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3781 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3782 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3783 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3784 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3785 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3786 ) /* How's that for a conditional? */
3787 {
3788 match = TRUE;
3789 }
a0ed51b3 3790 }
a0ed51b3
LW
3791 }
3792
a0ed51b3
LW
3793 return (flags & ANYOF_INVERT) ? !match : match;
3794}
161b471a 3795
dfe13c55 3796STATIC U8 *
cea2e8a9 3797S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3798{
a0ed51b3 3799 if (off >= 0) {
ffc61ed2
JH
3800 while (off-- && s < (U8*)PL_regeol) {
3801 /* XXX could check well-formedness here */
a0ed51b3 3802 s += UTF8SKIP(s);
ffc61ed2 3803 }
a0ed51b3
LW
3804 }
3805 else {
3806 while (off++) {
3807 if (s > (U8*)PL_bostr) {
3808 s--;
ffc61ed2
JH
3809 if (UTF8_IS_CONTINUED(*s)) {
3810 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3811 s--;
ffc61ed2
JH
3812 }
3813 /* XXX could check well-formedness here */
a0ed51b3
LW
3814 }
3815 }
3816 }
3817 return s;
3818}
161b471a 3819
dfe13c55 3820STATIC U8 *
cea2e8a9 3821S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3
LW
3822{
3823 if (off >= 0) {
ffc61ed2
JH
3824 while (off-- && s < (U8*)PL_regeol) {
3825 /* XXX could check well-formedness here */
a0ed51b3 3826 s += UTF8SKIP(s);
ffc61ed2 3827 }
a0ed51b3
LW
3828 if (off >= 0)
3829 return 0;
3830 }
3831 else {
3832 while (off++) {
3833 if (s > (U8*)PL_bostr) {
3834 s--;
ffc61ed2
JH
3835 if (UTF8_IS_CONTINUED(*s)) {
3836 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3837 s--;
ffc61ed2
JH
3838 }
3839 /* XXX could check well-formedness here */
a0ed51b3
LW
3840 }
3841 else
3842 break;
3843 }
3844 if (off <= 0)
3845 return 0;
3846 }
3847 return s;
3848}
51371543
GS
3849
3850#ifdef PERL_OBJECT
51371543
GS
3851#include "XSUB.h"
3852#endif
3853
3854static void
3855restore_pos(pTHXo_ void *arg)
3856{
51371543
GS
3857 if (PL_reg_eval_set) {
3858 if (PL_reg_oldsaved) {
3859 PL_reg_re->subbeg = PL_reg_oldsaved;
3860 PL_reg_re->sublen = PL_reg_oldsavedlen;
3861 RX_MATCH_COPIED_on(PL_reg_re);
3862 }
3863 PL_reg_magic->mg_len = PL_reg_oldpos;
3864 PL_reg_eval_set = 0;
3865 PL_curpm = PL_reg_oldcurpm;
3866 }
3867}