This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[patch perl@8102] dos/djgpp update
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
fde631ed
JH
109#ifdef DEBUGGING
110# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p))
111#else
112# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
113#endif
a0ed51b3
LW
114
115#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
116#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
117
dfe13c55
GS
118#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
119#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
120#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
121#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
122#define HOPc(pos,off) ((char*)HOP(pos,off))
123#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 124
51371543
GS
125static void restore_pos(pTHXo_ void *arg);
126
127
76e3520e 128STATIC CHECKPOINT
cea2e8a9 129S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 130{
3280af22
NIS
131 int retval = PL_savestack_ix;
132 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
133 int p;
134
135 SSCHECK(i + 5);
3280af22 136 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
137 SSPUSHINT(PL_regendp[p]);
138 SSPUSHINT(PL_regstartp[p]);
3280af22 139 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
140 SSPUSHINT(p);
141 }
3280af22
NIS
142 SSPUSHINT(PL_regsize);
143 SSPUSHINT(*PL_reglastparen);
144 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
145 SSPUSHINT(i + 3);
146 SSPUSHINT(SAVEt_REGCONTEXT);
147 return retval;
148}
149
c277df42 150/* These are needed since we do not localize EVAL nodes: */
02db2b7b 151# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 152 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 153 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 154
02db2b7b 155# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 156 PerlIO_printf(Perl_debug_log, \
faccc32b 157 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 158 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 159
76e3520e 160STATIC char *
cea2e8a9 161S_regcppop(pTHX)
a0d0e21e
LW
162{
163 I32 i = SSPOPINT;
164 U32 paren = 0;
165 char *input;
cf93c79d 166 I32 tmps;
a0d0e21e
LW
167 assert(i == SAVEt_REGCONTEXT);
168 i = SSPOPINT;
169 input = (char *) SSPOPPTR;
3280af22
NIS
170 *PL_reglastparen = SSPOPINT;
171 PL_regsize = SSPOPINT;
c277df42 172 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 173 paren = (U32)SSPOPINT;
3280af22 174 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
175 PL_regstartp[paren] = SSPOPINT;
176 tmps = SSPOPINT;
3280af22
NIS
177 if (paren <= *PL_reglastparen)
178 PL_regendp[paren] = tmps;
c277df42 179 DEBUG_r(
c3464db5 180 PerlIO_printf(Perl_debug_log,
b900a521
JH
181 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
182 (UV)paren, (IV)PL_regstartp[paren],
183 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
184 (IV)PL_regendp[paren],
3280af22 185 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 186 );
a0d0e21e 187 }
c277df42 188 DEBUG_r(
3280af22 189 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 190 PerlIO_printf(Perl_debug_log,
faccc32b
JH
191 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
192 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
193 }
194 );
daf18116 195#if 1
dafc8851
JH
196 /* It would seem that the similar code in regtry()
197 * already takes care of this, and in fact it is in
198 * a better location to since this code can #if 0-ed out
199 * but the code in regtry() is needed or otherwise tests
200 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
201 * (as of patchlevel 7877) will fail. Then again,
202 * this code seems to be necessary or otherwise
203 * building DynaLoader will fail:
204 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
205 * --jhi */
3280af22
NIS
206 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
207 if (paren > PL_regsize)
cf93c79d
IZ
208 PL_regstartp[paren] = -1;
209 PL_regendp[paren] = -1;
a0d0e21e 210 }
dafc8851 211#endif
a0d0e21e
LW
212 return input;
213}
214
0f5d15d6 215STATIC char *
cea2e8a9 216S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
217{
218 I32 tmp = PL_savestack_ix;
219
220 PL_savestack_ix = ss;
221 regcppop();
222 PL_savestack_ix = tmp;
942e002e 223 return Nullch;
0f5d15d6
IZ
224}
225
226typedef struct re_cc_state
227{
228 I32 ss;
229 regnode *node;
230 struct re_cc_state *prev;
231 CURCUR *cc;
232 regexp *re;
233} re_cc_state;
234
02db2b7b 235#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 236
29d1e993
HS
237#define TRYPAREN(paren, n, input) { \
238 if (paren) { \
239 if (n) { \
240 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
241 PL_regendp[paren] = input - PL_bostr; \
242 } \
243 else \
244 PL_regendp[paren] = -1; \
245 } \
246 if (regmatch(next)) \
247 sayYES; \
248 if (paren && n) \
249 PL_regendp[paren] = -1; \
250}
251
252
a687059c 253/*
e50aee73 254 * pregexec and friends
a687059c
LW
255 */
256
257/*
c277df42 258 - pregexec - match a regexp against a string
a687059c 259 */
c277df42 260I32
864dbfa3 261Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 262 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
263/* strend: pointer to null at end of string */
264/* strbeg: real beginning of string */
265/* minend: end of match must be >=minend after stringarg. */
266/* nosave: For optimizations. */
267{
268 return
269 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
270 nosave ? 0 : REXEC_COPY_STR);
271}
0f5d15d6
IZ
272
273STATIC void
cea2e8a9 274S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
275{
276 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
277#ifdef DEBUGGING
278 PL_regprogram = prog->program;
279#endif
280 PL_regnpar = prog->nparens;
281 PL_regdata = prog->data;
282 PL_reg_re = prog;
283}
22e551b9 284
cad2e5aa
JH
285/*
286 * Need to implement the following flags for reg_anch:
287 *
288 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
289 * USE_INTUIT_ML
290 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
291 * INTUIT_AUTORITATIVE_ML
292 * INTUIT_ONCE_NOML - Intuit can match in one location only.
293 * INTUIT_ONCE_ML
294 *
295 * Another flag for this function: SECOND_TIME (so that float substrs
296 * with giant delta may be not rechecked).
297 */
298
299/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
300
2c2d71f5 301/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
302 Otherwise, only SvCUR(sv) is used to get strbeg. */
303
304/* XXXX We assume that strpos is strbeg unless sv. */
305
6eb5f6b9
JH
306/* XXXX Some places assume that there is a fixed substring.
307 An update may be needed if optimizer marks as "INTUITable"
308 RExen without fixed substrings. Similarly, it is assumed that
309 lengths of all the strings are no more than minlen, thus they
310 cannot come from lookahead.
311 (Or minlen should take into account lookahead.) */
312
2c2d71f5
JH
313/* A failure to find a constant substring means that there is no need to make
314 an expensive call to REx engine, thus we celebrate a failure. Similarly,
315 finding a substring too deep into the string means that less calls to
30944b6d
IZ
316 regtry() should be needed.
317
318 REx compiler's optimizer found 4 possible hints:
319 a) Anchored substring;
320 b) Fixed substring;
321 c) Whether we are anchored (beginning-of-line or \G);
322 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 323 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
324 string which does not contradict any of them.
325 */
2c2d71f5 326
6eb5f6b9
JH
327/* Most of decisions we do here should have been done at compile time.
328 The nodes of the REx which we used for the search should have been
329 deleted from the finite automaton. */
330
cad2e5aa
JH
331char *
332Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
333 char *strend, U32 flags, re_scream_pos_data *data)
334{
2c2d71f5 335 register I32 start_shift;
cad2e5aa 336 /* Should be nonnegative! */
2c2d71f5
JH
337 register I32 end_shift;
338 register char *s;
339 register SV *check;
a1933d95 340 char *strbeg;
cad2e5aa
JH
341 char *t;
342 I32 ml_anch;
2c2d71f5 343 char *tmp;
6eb5f6b9
JH
344 register char *other_last = Nullch; /* other substr checked before this */
345 char *check_at; /* check substr found at this pos */
30944b6d
IZ
346#ifdef DEBUGGING
347 char *i_strpos = strpos;
348#endif
cad2e5aa
JH
349
350 DEBUG_r( if (!PL_colorset) reginitcolors() );
351 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 352 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
353 PL_colors[4],PL_colors[5],PL_colors[0],
354 prog->precomp,
355 PL_colors[1],
356 (strlen(prog->precomp) > 60 ? "..." : ""),
357 PL_colors[0],
b900a521 358 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
359 strpos, PL_colors[1],
360 (strend - strpos > 60 ? "..." : ""))
361 );
362
2c2d71f5
JH
363 if (prog->minlen > strend - strpos) {
364 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 365 goto fail;
2c2d71f5 366 }
a1933d95 367 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 368 check = prog->check_substr;
2c2d71f5 369 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
370 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
371 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 372 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 373
5675c1a6
IZ
374 if (!ml_anch) {
375 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
376 /* SvCUR is not set on references: SvRV and SvPVX overlap */
377 && sv && !SvROK(sv)
a1933d95 378 && (strpos != strbeg)) {
5675c1a6
IZ
379 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
380 goto fail;
381 }
382 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 383 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
384 I32 slen;
385
adac82c7 386 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 387 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
388 if (SvTAIL(check)) {
389 slen = SvCUR(check); /* >= 1 */
cad2e5aa 390
2c2d71f5
JH
391 if ( strend - s > slen || strend - s < slen - 1
392 || (strend - s == slen && strend[-1] != '\n')) {
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
394 goto fail_finish;
cad2e5aa
JH
395 }
396 /* Now should match s[0..slen-2] */
397 slen--;
653099ff 398 if (slen && (*SvPVX(check) != *s
cad2e5aa 399 || (slen > 1
653099ff 400 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
401 report_neq:
402 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
403 goto fail_finish;
404 }
cad2e5aa 405 }
653099ff
GS
406 else if (*SvPVX(check) != *s
407 || ((slen = SvCUR(check)) > 1
408 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
409 goto report_neq;
410 goto success_at_start;
5675c1a6 411 }
cad2e5aa 412 }
2c2d71f5 413 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 414 s = strpos;
2c2d71f5 415 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 416 end_shift = prog->minlen - start_shift -
653099ff 417 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 418 if (!ml_anch) {
653099ff
GS
419 I32 end = prog->check_offset_max + CHR_SVLEN(check)
420 - (SvTAIL(check) != 0);
2c2d71f5
JH
421 I32 eshift = strend - s - end;
422
423 if (end_shift < eshift)
424 end_shift = eshift;
425 }
cad2e5aa 426 }
2c2d71f5 427 else { /* Can match at random position */
cad2e5aa
JH
428 ml_anch = 0;
429 s = strpos;
2c2d71f5
JH
430 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
431 /* Should be nonnegative! */
432 end_shift = prog->minlen - start_shift -
653099ff 433 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
434 }
435
2c2d71f5 436#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 437 if (end_shift < 0)
6bbae5e6 438 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
439#endif
440
2c2d71f5
JH
441 restart:
442 /* Find a possible match in the region s..strend by looking for
443 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 444 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
445 I32 p = -1; /* Internal iterator of scream. */
446 I32 *pp = data ? data->scream_pos : &p;
447
2c2d71f5
JH
448 if (PL_screamfirst[BmRARE(check)] >= 0
449 || ( BmRARE(check) == '\n'
450 && (BmPREVIOUS(check) == SvCUR(check) - 1)
451 && SvTAIL(check) ))
452 s = screaminstr(sv, check,
453 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 454 else
2c2d71f5 455 goto fail_finish;
cad2e5aa
JH
456 if (data)
457 *data->scream_olds = s;
458 }
459 else
460 s = fbm_instr((unsigned char*)s + start_shift,
461 (unsigned char*)strend - end_shift,
2c2d71f5 462 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
463
464 /* Update the count-of-usability, remove useless subpatterns,
465 unshift s. */
2c2d71f5
JH
466
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
468 (s ? "Found" : "Did not find"),
469 ((check == prog->anchored_substr) ? "anchored" : "floating"),
470 PL_colors[0],
7b0972df
JH
471 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
472 SvPVX(check),
2c2d71f5
JH
473 PL_colors[1], (SvTAIL(check) ? "$" : ""),
474 (s ? " at offset " : "...\n") ) );
475
476 if (!s)
477 goto fail_finish;
478
6eb5f6b9
JH
479 check_at = s;
480
2c2d71f5 481 /* Finish the diagnostic message */
30944b6d 482 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
483
484 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
485 Start with the other substr.
486 XXXX no SCREAM optimization yet - and a very coarse implementation
487 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
488 *always* match. Probably should be marked during compile...
489 Probably it is right to do no SCREAM here...
490 */
491
492 if (prog->float_substr && prog->anchored_substr) {
30944b6d 493 /* Take into account the "other" substring. */
2c2d71f5
JH
494 /* XXXX May be hopelessly wrong for UTF... */
495 if (!other_last)
6eb5f6b9 496 other_last = strpos;
2c2d71f5 497 if (check == prog->float_substr) {
30944b6d
IZ
498 do_other_anchored:
499 {
2c2d71f5
JH
500 char *last = s - start_shift, *last1, *last2;
501 char *s1 = s;
502
503 tmp = PL_bostr;
504 t = s - prog->check_offset_max;
505 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
506 && (!(prog->reganch & ROPT_UTF8)
507 || (PL_bostr = strpos, /* Used in regcopmaybe() */
508 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
509 && t > strpos)))
30944b6d 510 /* EMPTY */;
2c2d71f5
JH
511 else
512 t = strpos;
513 t += prog->anchored_offset;
6eb5f6b9
JH
514 if (t < other_last) /* These positions already checked */
515 t = other_last;
2c2d71f5
JH
516 PL_bostr = tmp;
517 last2 = last1 = strend - prog->minlen;
518 if (last < last1)
519 last1 = last;
520 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
521 /* On end-of-str: see comment below. */
522 s = fbm_instr((unsigned char*)t,
523 (unsigned char*)last1 + prog->anchored_offset
524 + SvCUR(prog->anchored_substr)
525 - (SvTAIL(prog->anchored_substr)!=0),
526 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
527 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
528 (s ? "Found" : "Contradicts"),
529 PL_colors[0],
7b0972df
JH
530 (int)(SvCUR(prog->anchored_substr)
531 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
532 SvPVX(prog->anchored_substr),
533 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
534 if (!s) {
535 if (last1 >= last2) {
536 DEBUG_r(PerlIO_printf(Perl_debug_log,
537 ", giving up...\n"));
538 goto fail_finish;
539 }
540 DEBUG_r(PerlIO_printf(Perl_debug_log,
541 ", trying floating at offset %ld...\n",
30944b6d 542 (long)(s1 + 1 - i_strpos)));
2c2d71f5 543 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 544 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
545 s = HOPc(last, 1);
546 goto restart;
547 }
548 else {
549 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 550 (long)(s - i_strpos)));
2c2d71f5 551 t = s - prog->anchored_offset;
6eb5f6b9 552 other_last = s + 1;
30944b6d 553 s = s1;
2c2d71f5
JH
554 if (t == strpos)
555 goto try_at_start;
2c2d71f5
JH
556 goto try_at_offset;
557 }
30944b6d 558 }
2c2d71f5
JH
559 }
560 else { /* Take into account the floating substring. */
561 char *last, *last1;
562 char *s1 = s;
563
564 t = s - start_shift;
565 last1 = last = strend - prog->minlen + prog->float_min_offset;
566 if (last - t > prog->float_max_offset)
567 last = t + prog->float_max_offset;
568 s = t + prog->float_min_offset;
6eb5f6b9
JH
569 if (s < other_last)
570 s = other_last;
2c2d71f5
JH
571 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
572 /* fbm_instr() takes into account exact value of end-of-str
573 if the check is SvTAIL(ed). Since false positives are OK,
574 and end-of-str is not later than strend we are OK. */
575 s = fbm_instr((unsigned char*)s,
576 (unsigned char*)last + SvCUR(prog->float_substr)
577 - (SvTAIL(prog->float_substr)!=0),
578 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
579 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
580 (s ? "Found" : "Contradicts"),
581 PL_colors[0],
7b0972df
JH
582 (int)(SvCUR(prog->float_substr)
583 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
584 SvPVX(prog->float_substr),
585 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
586 if (!s) {
587 if (last1 == last) {
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", giving up...\n"));
590 goto fail_finish;
591 }
592 DEBUG_r(PerlIO_printf(Perl_debug_log,
593 ", trying anchored starting at offset %ld...\n",
30944b6d 594 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 595 other_last = last + 1;
2c2d71f5
JH
596 PL_regeol = strend; /* Used in HOP() */
597 s = HOPc(t, 1);
598 goto restart;
599 }
600 else {
601 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 602 (long)(s - i_strpos)));
6eb5f6b9 603 other_last = s + 1;
30944b6d 604 s = s1;
2c2d71f5
JH
605 if (t == strpos)
606 goto try_at_start;
2c2d71f5
JH
607 goto try_at_offset;
608 }
609 }
cad2e5aa 610 }
2c2d71f5
JH
611
612 t = s - prog->check_offset_max;
613 tmp = PL_bostr;
614 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
615 && (!(prog->reganch & ROPT_UTF8)
616 || (PL_bostr = strpos, /* Used in regcopmaybe() */
617 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
618 && t > strpos)))) {
619 PL_bostr = tmp;
620 /* Fixed substring is found far enough so that the match
621 cannot start at strpos. */
622 try_at_offset:
cad2e5aa 623 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
624 /* Eventually fbm_*() should handle this, but often
625 anchored_offset is not 0, so this check will not be wasted. */
626 /* XXXX In the code below we prefer to look for "^" even in
627 presence of anchored substrings. And we search even
628 beyond the found float position. These pessimizations
629 are historical artefacts only. */
630 find_anchor:
2c2d71f5 631 while (t < strend - prog->minlen) {
cad2e5aa 632 if (*t == '\n') {
4ee3650e 633 if (t < check_at - prog->check_offset_min) {
30944b6d 634 if (prog->anchored_substr) {
4ee3650e
GS
635 /* Since we moved from the found position,
636 we definitely contradict the found anchored
30944b6d
IZ
637 substr. Due to the above check we do not
638 contradict "check" substr.
639 Thus we can arrive here only if check substr
640 is float. Redo checking for "other"=="fixed".
641 */
642 strpos = t + 1;
643 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
644 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
645 goto do_other_anchored;
646 }
4ee3650e
GS
647 /* We don't contradict the found floating substring. */
648 /* XXXX Why not check for STCLASS? */
cad2e5aa 649 s = t + 1;
2c2d71f5 650 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 651 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
652 goto set_useful;
653 }
4ee3650e
GS
654 /* Position contradicts check-string */
655 /* XXXX probably better to look for check-string
656 than for "\n", so one should lower the limit for t? */
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 658 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 659 other_last = strpos = s = t + 1;
cad2e5aa
JH
660 goto restart;
661 }
662 t++;
663 }
2c2d71f5
JH
664 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
665 PL_colors[0],PL_colors[1]));
666 goto fail_finish;
cad2e5aa 667 }
f5952150
GS
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
670 PL_colors[0],PL_colors[1]));
671 }
cad2e5aa
JH
672 s = t;
673 set_useful:
2c2d71f5 674 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
675 }
676 else {
2c2d71f5 677 PL_bostr = tmp;
f5952150 678 /* The found string does not prohibit matching at strpos,
2c2d71f5 679 - no optimization of calling REx engine can be performed,
f5952150
GS
680 unless it was an MBOL and we are not after MBOL,
681 or a future STCLASS check will fail this. */
2c2d71f5
JH
682 try_at_start:
683 /* Even in this situation we may use MBOL flag if strpos is offset
684 wrt the start of the string. */
05b4157f 685 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 686 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
687 /* May be due to an implicit anchor of m{.*foo} */
688 && !(prog->reganch & ROPT_IMPLICIT))
689 {
cad2e5aa
JH
690 t = strpos;
691 goto find_anchor;
692 }
30944b6d 693 DEBUG_r( if (ml_anch)
f5952150
GS
694 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
695 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 696 );
2c2d71f5 697 success_at_start:
30944b6d 698 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 699 && prog->check_substr /* Could be deleted already */
cad2e5aa 700 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
701 && prog->check_substr == prog->float_substr)
702 {
cad2e5aa 703 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 704 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
705 SvREFCNT_dec(prog->check_substr);
706 prog->check_substr = Nullsv; /* disable */
707 prog->float_substr = Nullsv; /* clear */
5e39e1e5 708 check = Nullsv; /* abort */
cad2e5aa 709 s = strpos;
3cf5c195
IZ
710 /* XXXX This is a remnant of the old implementation. It
711 looks wasteful, since now INTUIT can use many
6eb5f6b9 712 other heuristics. */
cad2e5aa
JH
713 prog->reganch &= ~RE_USE_INTUIT;
714 }
715 else
716 s = strpos;
717 }
718
6eb5f6b9
JH
719 /* Last resort... */
720 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
721 if (prog->regstclass) {
722 /* minlen == 0 is possible if regstclass is \b or \B,
723 and the fixed substr is ''$.
724 Since minlen is already taken into account, s+1 is before strend;
725 accidentally, minlen >= 1 guaranties no false positives at s + 1
726 even for \b or \B. But (minlen? 1 : 0) below assumes that
727 regstclass does not come from lookahead... */
728 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
729 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
730 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
731 ? STR_LEN(prog->regstclass)
732 : 1);
6eb5f6b9 733 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
734 ? s + (prog->minlen? cl_l : 0)
735 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 736 : strend) ;
a1933d95 737 char *startpos = strbeg;
6eb5f6b9
JH
738
739 t = s;
76384e4a
GS
740 if (prog->reganch & ROPT_UTF8) {
741 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
742 PL_bostr = startpos;
743 }
6eb5f6b9
JH
744 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
745 if (!s) {
746#ifdef DEBUGGING
747 char *what;
748#endif
749 if (endpos == strend) {
750 DEBUG_r( PerlIO_printf(Perl_debug_log,
751 "Could not match STCLASS...\n") );
752 goto fail;
753 }
66e933ab
GS
754 DEBUG_r( PerlIO_printf(Perl_debug_log,
755 "This position contradicts STCLASS...\n") );
653099ff
GS
756 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
757 goto fail;
6eb5f6b9
JH
758 /* Contradict one of substrings */
759 if (prog->anchored_substr) {
6eb5f6b9
JH
760 if (prog->anchored_substr == check) {
761 DEBUG_r( what = "anchored" );
762 hop_and_restart:
763 PL_regeol = strend; /* Used in HOP() */
764 s = HOPc(t, 1);
66e933ab
GS
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
769 goto fail;
770 }
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
775 what, (long)(s + start_shift - i_strpos)) );
776 goto restart;
777 }
66e933ab 778 /* Have both, check_string is floating */
6eb5f6b9
JH
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
782 s = check_at;
5e39e1e5
HS
783 if (!check)
784 goto giveup;
6eb5f6b9 785 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 786 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
789 }
60e71179
GS
790 /* Another way we could have checked stclass at the
791 current position only: */
792 if (ml_anch) {
793 s = t = t + 1;
5e39e1e5
HS
794 if (!check)
795 goto giveup;
60e71179 796 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 799 goto try_at_offset;
66e933ab 800 }
60e71179
GS
801 if (!prog->float_substr) /* Could have been deleted */
802 goto fail;
6eb5f6b9
JH
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
808 }
809 DEBUG_r( if (t != s)
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
813 else
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
816 }
5e39e1e5
HS
817 giveup:
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 821 return s;
2c2d71f5
JH
822
823 fail_finish: /* Substring not found */
66e933ab
GS
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 826 fail:
2c2d71f5 827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
828 PL_colors[4],PL_colors[5]));
829 return Nullch;
830}
9661b544 831
6eb5f6b9 832/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
833STATIC char *
834S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 835{
6eb5f6b9
JH
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
837 char *m;
d8093b23
G
838 STRLEN ln;
839 unsigned int c1;
840 unsigned int c2;
6eb5f6b9
JH
841 char *e;
842 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 843
6eb5f6b9
JH
844 /* We know what class it must start with. */
845 switch (OP(c)) {
846 case ANYOFUTF8:
847 while (s < strend) {
848 if (REGINCLASSUTF8(c, (U8*)s)) {
849 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 850 goto got_it;
3cf5c195 851 else
6eb5f6b9 852 tmp = doevery;
a0ed51b3 853 }
6eb5f6b9
JH
854 else
855 tmp = 1;
a0ed51b3
LW
856 s += UTF8SKIP(s);
857 }
6eb5f6b9
JH
858 break;
859 case ANYOF:
a0ed51b3 860 while (s < strend) {
d8093b23 861 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
862 if (tmp && (norun || regtry(prog, s)))
863 goto got_it;
864 else
865 tmp = doevery;
a0ed51b3 866 }
6eb5f6b9
JH
867 else
868 tmp = 1;
a0ed51b3 869 s++;
a0d0e21e 870 }
6eb5f6b9
JH
871 break;
872 case EXACTF:
873 m = STRING(c);
874 ln = STR_LEN(c);
d8093b23 875 c1 = *(U8*)m;
6eb5f6b9
JH
876 c2 = PL_fold[c1];
877 goto do_exactf;
878 case EXACTFL:
879 m = STRING(c);
880 ln = STR_LEN(c);
d8093b23 881 c1 = *(U8*)m;
6eb5f6b9
JH
882 c2 = PL_fold_locale[c1];
883 do_exactf:
884 e = strend - ln;
b3c9acc1 885
6eb5f6b9
JH
886 if (norun && e < s)
887 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
888 /* Here it is NOT UTF! */
889 if (c1 == c2) {
890 while (s <= e) {
d8093b23 891 if ( *(U8*)s == c1
66e933ab
GS
892 && (ln == 1 || !(OP(c) == EXACTF
893 ? ibcmp(s, m, ln)
894 : ibcmp_locale(s, m, ln)))
6eb5f6b9 895 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
896 goto got_it;
897 s++;
898 }
899 } else {
900 while (s <= e) {
d8093b23 901 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
902 && (ln == 1 || !(OP(c) == EXACTF
903 ? ibcmp(s, m, ln)
904 : ibcmp_locale(s, m, ln)))
6eb5f6b9 905 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
906 goto got_it;
907 s++;
908 }
909 }
910 break;
bbce6d69 911 case BOUNDL:
3280af22 912 PL_reg_flags |= RF_tainted;
bbce6d69 913 /* FALL THROUGH */
a0d0e21e 914 case BOUND:
6eb5f6b9 915 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 916 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 917 while (s < strend) {
95bac841 918 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 919 tmp = !tmp;
6eb5f6b9 920 if ((norun || regtry(prog, s)))
a0d0e21e 921 goto got_it;
a687059c 922 }
a0d0e21e
LW
923 s++;
924 }
6eb5f6b9 925 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
926 goto got_it;
927 break;
a0ed51b3
LW
928 case BOUNDLUTF8:
929 PL_reg_flags |= RF_tainted;
930 /* FALL THROUGH */
931 case BOUNDUTF8:
667bb95a
JH
932 if (s == startpos)
933 tmp = '\n';
934 else {
935 U8 *r = reghop((U8*)s, -1);
936
937 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
938 }
939 tmp = ((OP(c) == BOUNDUTF8 ?
940 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 941 while (s < strend) {
76384e4a 942 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
943 swash_fetch(PL_utf8_alnum, (U8*)s) :
944 isALNUM_LC_utf8((U8*)s)))
945 {
a0ed51b3 946 tmp = !tmp;
6eb5f6b9 947 if ((norun || regtry(prog, s)))
a0ed51b3
LW
948 goto got_it;
949 }
950 s += UTF8SKIP(s);
951 }
6eb5f6b9 952 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
953 goto got_it;
954 break;
bbce6d69 955 case NBOUNDL:
3280af22 956 PL_reg_flags |= RF_tainted;
bbce6d69 957 /* FALL THROUGH */
a0d0e21e 958 case NBOUND:
6eb5f6b9 959 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 960 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 961 while (s < strend) {
95bac841 962 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 963 tmp = !tmp;
6eb5f6b9 964 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
965 goto got_it;
966 s++;
967 }
6eb5f6b9 968 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
969 goto got_it;
970 break;
a0ed51b3
LW
971 case NBOUNDLUTF8:
972 PL_reg_flags |= RF_tainted;
973 /* FALL THROUGH */
974 case NBOUNDUTF8:
667bb95a
JH
975 if (s == startpos)
976 tmp = '\n';
977 else {
978 U8 *r = reghop((U8*)s, -1);
979
980 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
981 }
982 tmp = ((OP(c) == NBOUNDUTF8 ?
983 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 984 while (s < strend) {
76384e4a 985 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
986 swash_fetch(PL_utf8_alnum, (U8*)s) :
987 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 988 tmp = !tmp;
6eb5f6b9 989 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
990 goto got_it;
991 s += UTF8SKIP(s);
992 }
6eb5f6b9 993 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
994 goto got_it;
995 break;
a0d0e21e
LW
996 case ALNUM:
997 while (s < strend) {
bbce6d69 998 if (isALNUM(*s)) {
6eb5f6b9 999 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1000 goto got_it;
1001 else
1002 tmp = doevery;
1003 }
1004 else
1005 tmp = 1;
1006 s++;
1007 }
1008 break;
a0ed51b3
LW
1009 case ALNUMUTF8:
1010 while (s < strend) {
dfe13c55 1011 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1013 goto got_it;
1014 else
1015 tmp = doevery;
1016 }
1017 else
1018 tmp = 1;
1019 s += UTF8SKIP(s);
1020 }
1021 break;
bbce6d69 1022 case ALNUML:
3280af22 1023 PL_reg_flags |= RF_tainted;
bbce6d69 1024 while (s < strend) {
1025 if (isALNUM_LC(*s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1027 goto got_it;
a687059c 1028 else
a0d0e21e
LW
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s++;
1034 }
1035 break;
a0ed51b3
LW
1036 case ALNUMLUTF8:
1037 PL_reg_flags |= RF_tainted;
1038 while (s < strend) {
dfe13c55 1039 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1040 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1041 goto got_it;
1042 else
1043 tmp = doevery;
1044 }
1045 else
1046 tmp = 1;
1047 s += UTF8SKIP(s);
1048 }
1049 break;
a0d0e21e
LW
1050 case NALNUM:
1051 while (s < strend) {
bbce6d69 1052 if (!isALNUM(*s)) {
6eb5f6b9 1053 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1054 goto got_it;
1055 else
1056 tmp = doevery;
1057 }
1058 else
1059 tmp = 1;
1060 s++;
1061 }
1062 break;
a0ed51b3
LW
1063 case NALNUMUTF8:
1064 while (s < strend) {
dfe13c55 1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s += UTF8SKIP(s);
1074 }
1075 break;
bbce6d69 1076 case NALNUML:
3280af22 1077 PL_reg_flags |= RF_tainted;
bbce6d69 1078 while (s < strend) {
1079 if (!isALNUM_LC(*s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1081 goto got_it;
a687059c 1082 else
a0d0e21e 1083 tmp = doevery;
a687059c 1084 }
a0d0e21e
LW
1085 else
1086 tmp = 1;
1087 s++;
1088 }
1089 break;
a0ed51b3
LW
1090 case NALNUMLUTF8:
1091 PL_reg_flags |= RF_tainted;
1092 while (s < strend) {
dfe13c55 1093 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1094 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
1099 else
1100 tmp = 1;
1101 s += UTF8SKIP(s);
1102 }
1103 break;
a0d0e21e
LW
1104 case SPACE:
1105 while (s < strend) {
1106 if (isSPACE(*s)) {
6eb5f6b9 1107 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1108 goto got_it;
1109 else
1110 tmp = doevery;
2304df62 1111 }
a0d0e21e
LW
1112 else
1113 tmp = 1;
1114 s++;
1115 }
1116 break;
a0ed51b3
LW
1117 case SPACEUTF8:
1118 while (s < strend) {
dfe13c55 1119 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s += UTF8SKIP(s);
1128 }
1129 break;
bbce6d69 1130 case SPACEL:
3280af22 1131 PL_reg_flags |= RF_tainted;
bbce6d69 1132 while (s < strend) {
1133 if (isSPACE_LC(*s)) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s++;
1142 }
1143 break;
a0ed51b3
LW
1144 case SPACELUTF8:
1145 PL_reg_flags |= RF_tainted;
1146 while (s < strend) {
dfe13c55 1147 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1148 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
1153 else
1154 tmp = 1;
1155 s += UTF8SKIP(s);
1156 }
1157 break;
a0d0e21e
LW
1158 case NSPACE:
1159 while (s < strend) {
1160 if (!isSPACE(*s)) {
6eb5f6b9 1161 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1162 goto got_it;
1163 else
1164 tmp = doevery;
a687059c 1165 }
a0d0e21e
LW
1166 else
1167 tmp = 1;
1168 s++;
1169 }
1170 break;
a0ed51b3
LW
1171 case NSPACEUTF8:
1172 while (s < strend) {
dfe13c55 1173 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s += UTF8SKIP(s);
1182 }
1183 break;
bbce6d69 1184 case NSPACEL:
3280af22 1185 PL_reg_flags |= RF_tainted;
bbce6d69 1186 while (s < strend) {
1187 if (!isSPACE_LC(*s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s++;
1196 }
1197 break;
a0ed51b3
LW
1198 case NSPACELUTF8:
1199 PL_reg_flags |= RF_tainted;
1200 while (s < strend) {
dfe13c55 1201 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1202 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1203 goto got_it;
1204 else
1205 tmp = doevery;
1206 }
1207 else
1208 tmp = 1;
1209 s += UTF8SKIP(s);
1210 }
1211 break;
a0d0e21e
LW
1212 case DIGIT:
1213 while (s < strend) {
1214 if (isDIGIT(*s)) {
6eb5f6b9 1215 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1216 goto got_it;
1217 else
1218 tmp = doevery;
2b69d0c2 1219 }
a0d0e21e
LW
1220 else
1221 tmp = 1;
1222 s++;
1223 }
1224 break;
a0ed51b3
LW
1225 case DIGITUTF8:
1226 while (s < strend) {
dfe13c55 1227 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s += UTF8SKIP(s);
1236 }
1237 break;
b8c5462f
JH
1238 case DIGITL:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (isDIGIT_LC(*s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s++;
1250 }
1251 break;
1252 case DIGITLUTF8:
1253 PL_reg_flags |= RF_tainted;
1254 while (s < strend) {
1255 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1256 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1257 goto got_it;
1258 else
1259 tmp = doevery;
1260 }
1261 else
1262 tmp = 1;
1263 s += UTF8SKIP(s);
1264 }
1265 break;
a0d0e21e
LW
1266 case NDIGIT:
1267 while (s < strend) {
1268 if (!isDIGIT(*s)) {
6eb5f6b9 1269 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1270 goto got_it;
1271 else
1272 tmp = doevery;
a687059c 1273 }
a0d0e21e
LW
1274 else
1275 tmp = 1;
1276 s++;
1277 }
1278 break;
a0ed51b3
LW
1279 case NDIGITUTF8:
1280 while (s < strend) {
dfe13c55 1281 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1282 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
1287 else
1288 tmp = 1;
1289 s += UTF8SKIP(s);
1290 }
1291 break;
b8c5462f
JH
1292 case NDIGITL:
1293 PL_reg_flags |= RF_tainted;
1294 while (s < strend) {
1295 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1296 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1297 goto got_it;
1298 else
1299 tmp = doevery;
1300 }
1301 else
1302 tmp = 1;
1303 s++;
a0ed51b3 1304 }
b8c5462f
JH
1305 break;
1306 case NDIGITLUTF8:
1307 PL_reg_flags |= RF_tainted;
1308 while (s < strend) {
1309 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1310 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1311 goto got_it;
cf93c79d 1312 else
b8c5462f
JH
1313 tmp = doevery;
1314 }
1315 else
1316 tmp = 1;
1317 s += UTF8SKIP(s);
1318 }
1319 break;
b3c9acc1 1320 default:
3c3eec57
GS
1321 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1322 break;
d6a28714 1323 }
6eb5f6b9
JH
1324 return 0;
1325 got_it:
1326 return s;
1327}
1328
1329/*
1330 - regexec_flags - match a regexp against a string
1331 */
1332I32
1333Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1334 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1335/* strend: pointer to null at end of string */
1336/* strbeg: real beginning of string */
1337/* minend: end of match must be >=minend after stringarg. */
1338/* data: May be used for some additional optimizations. */
1339/* nosave: For optimizations. */
1340{
6eb5f6b9
JH
1341 register char *s;
1342 register regnode *c;
1343 register char *startpos = stringarg;
6eb5f6b9
JH
1344 I32 minlen; /* must match at least this many chars */
1345 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1346 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1347 constant substr. */ /* CC */
1348 I32 end_shift = 0; /* Same for the end. */ /* CC */
1349 I32 scream_pos = -1; /* Internal iterator of scream. */
1350 char *scream_olds;
1351 SV* oreplsv = GvSV(PL_replgv);
1352
1353 PL_regcc = 0;
1354
1355 cache_re(prog);
1356#ifdef DEBUGGING
1357 PL_regnarrate = PL_debug & 512;
1358#endif
1359
1360 /* Be paranoid... */
1361 if (prog == NULL || startpos == NULL) {
1362 Perl_croak(aTHX_ "NULL regexp parameter");
1363 return 0;
1364 }
1365
1366 minlen = prog->minlen;
1367 if (strend - startpos < minlen) goto phooey;
1368
1369 if (startpos == strbeg) /* is ^ valid at stringarg? */
1370 PL_regprev = '\n';
1371 else {
1372 PL_regprev = (U32)stringarg[-1];
1373 if (!PL_multiline && PL_regprev == '\n')
1374 PL_regprev = '\0'; /* force ^ to NOT match */
1375 }
1376
1377 /* Check validity of program. */
1378 if (UCHARAT(prog->program) != REG_MAGIC) {
1379 Perl_croak(aTHX_ "corrupted regexp program");
1380 }
1381
1382 PL_reg_flags = 0;
1383 PL_reg_eval_set = 0;
1384 PL_reg_maxiter = 0;
1385
1386 if (prog->reganch & ROPT_UTF8)
1387 PL_reg_flags |= RF_utf8;
1388
1389 /* Mark beginning of line for ^ and lookbehind. */
1390 PL_regbol = startpos;
1391 PL_bostr = strbeg;
1392 PL_reg_sv = sv;
1393
1394 /* Mark end of line for $ (and such) */
1395 PL_regeol = strend;
1396
1397 /* see how far we have to get to not match where we matched before */
1398 PL_regtill = startpos+minend;
1399
1400 /* We start without call_cc context. */
1401 PL_reg_call_cc = 0;
1402
1403 /* If there is a "must appear" string, look for it. */
1404 s = startpos;
1405
1406 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1407 MAGIC *mg;
1408
1409 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1410 PL_reg_ganch = startpos;
1411 else if (sv && SvTYPE(sv) >= SVt_PVMG
1412 && SvMAGIC(sv)
1413 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1414 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1415 if (prog->reganch & ROPT_ANCH_GPOS) {
1416 if (s > PL_reg_ganch)
1417 goto phooey;
1418 s = PL_reg_ganch;
1419 }
1420 }
1421 else /* pos() not defined */
1422 PL_reg_ganch = strbeg;
1423 }
1424
1425 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1426 re_scream_pos_data d;
1427
1428 d.scream_olds = &scream_olds;
1429 d.scream_pos = &scream_pos;
1430 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1431 if (!s)
1432 goto phooey; /* not present */
1433 }
1434
1435 DEBUG_r( if (!PL_colorset) reginitcolors() );
1436 DEBUG_r(PerlIO_printf(Perl_debug_log,
1437 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1438 PL_colors[4],PL_colors[5],PL_colors[0],
1439 prog->precomp,
1440 PL_colors[1],
1441 (strlen(prog->precomp) > 60 ? "..." : ""),
1442 PL_colors[0],
1443 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1444 startpos, PL_colors[1],
1445 (strend - startpos > 60 ? "..." : ""))
1446 );
1447
1448 /* Simplest case: anchored match need be tried only once. */
1449 /* [unless only anchor is BOL and multiline is set] */
1450 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1451 if (s == startpos && regtry(prog, startpos))
1452 goto got_it;
1453 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1454 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1455 {
1456 char *end;
1457
1458 if (minlen)
1459 dontbother = minlen - 1;
1460 end = HOPc(strend, -dontbother) - 1;
1461 /* for multiline we only have to try after newlines */
1462 if (prog->check_substr) {
1463 if (s == startpos)
1464 goto after_try;
1465 while (1) {
1466 if (regtry(prog, s))
1467 goto got_it;
1468 after_try:
1469 if (s >= end)
1470 goto phooey;
1471 if (prog->reganch & RE_USE_INTUIT) {
1472 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1473 if (!s)
1474 goto phooey;
1475 }
1476 else
1477 s++;
1478 }
1479 } else {
1480 if (s > startpos)
1481 s--;
1482 while (s < end) {
1483 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1484 if (regtry(prog, s))
1485 goto got_it;
1486 }
1487 }
1488 }
1489 }
1490 goto phooey;
1491 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1492 if (regtry(prog, PL_reg_ganch))
1493 goto got_it;
1494 goto phooey;
1495 }
1496
1497 /* Messy cases: unanchored match. */
1498 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1499 /* we have /x+whatever/ */
1500 /* it must be a one character string (XXXX Except UTF?) */
1501 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1502#ifdef DEBUGGING
1503 int did_match = 0;
1504#endif
1505
6eb5f6b9
JH
1506 if (UTF) {
1507 while (s < strend) {
1508 if (*s == ch) {
bf93d4cc 1509 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1510 if (regtry(prog, s)) goto got_it;
1511 s += UTF8SKIP(s);
1512 while (s < strend && *s == ch)
1513 s += UTF8SKIP(s);
1514 }
1515 s += UTF8SKIP(s);
1516 }
1517 }
1518 else {
1519 while (s < strend) {
1520 if (*s == ch) {
bf93d4cc 1521 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1522 if (regtry(prog, s)) goto got_it;
1523 s++;
1524 while (s < strend && *s == ch)
1525 s++;
1526 }
1527 s++;
1528 }
1529 }
bf93d4cc
GS
1530 DEBUG_r(did_match ||
1531 PerlIO_printf(Perl_debug_log,
1532 "Did not find anchored character...\n"));
6eb5f6b9
JH
1533 }
1534 /*SUPPRESS 560*/
1535 else if (prog->anchored_substr != Nullsv
1536 || (prog->float_substr != Nullsv
1537 && prog->float_max_offset < strend - s)) {
1538 SV *must = prog->anchored_substr
1539 ? prog->anchored_substr : prog->float_substr;
1540 I32 back_max =
1541 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1542 I32 back_min =
1543 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1544 char *last = HOPc(strend, /* Cannot start after this */
1545 -(I32)(CHR_SVLEN(must)
1546 - (SvTAIL(must) != 0) + back_min));
1547 char *last1; /* Last position checked before */
bf93d4cc
GS
1548#ifdef DEBUGGING
1549 int did_match = 0;
1550#endif
6eb5f6b9
JH
1551
1552 if (s > PL_bostr)
1553 last1 = HOPc(s, -1);
1554 else
1555 last1 = s - 1; /* bogus */
1556
1557 /* XXXX check_substr already used to find `s', can optimize if
1558 check_substr==must. */
1559 scream_pos = -1;
1560 dontbother = end_shift;
1561 strend = HOPc(strend, -dontbother);
1562 while ( (s <= last) &&
1563 ((flags & REXEC_SCREAM)
1564 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1565 end_shift, &scream_pos, 0))
1566 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1567 (unsigned char*)strend, must,
1568 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1569 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1570 if (HOPc(s, -back_max) > last1) {
1571 last1 = HOPc(s, -back_min);
1572 s = HOPc(s, -back_max);
1573 }
1574 else {
1575 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1576
1577 last1 = HOPc(s, -back_min);
1578 s = t;
1579 }
1580 if (UTF) {
1581 while (s <= last1) {
1582 if (regtry(prog, s))
1583 goto got_it;
1584 s += UTF8SKIP(s);
1585 }
1586 }
1587 else {
1588 while (s <= last1) {
1589 if (regtry(prog, s))
1590 goto got_it;
1591 s++;
1592 }
1593 }
1594 }
bf93d4cc
GS
1595 DEBUG_r(did_match ||
1596 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1597 ((must == prog->anchored_substr)
1598 ? "anchored" : "floating"),
1599 PL_colors[0],
1600 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1601 SvPVX(must),
1602 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1603 goto phooey;
1604 }
155aba94 1605 else if ((c = prog->regstclass)) {
66e933ab
GS
1606 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1607 /* don't bother with what can't match */
6eb5f6b9
JH
1608 strend = HOPc(strend, -(minlen - 1));
1609 if (find_byclass(prog, c, s, strend, startpos, 0))
1610 goto got_it;
bf93d4cc 1611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1612 }
1613 else {
1614 dontbother = 0;
1615 if (prog->float_substr != Nullsv) { /* Trim the end. */
1616 char *last;
d6a28714
JH
1617
1618 if (flags & REXEC_SCREAM) {
1619 last = screaminstr(sv, prog->float_substr, s - strbeg,
1620 end_shift, &scream_pos, 1); /* last one */
1621 if (!last)
1622 last = scream_olds; /* Only one occurence. */
b8c5462f 1623 }
d6a28714
JH
1624 else {
1625 STRLEN len;
1626 char *little = SvPV(prog->float_substr, len);
1627
1628 if (SvTAIL(prog->float_substr)) {
1629 if (memEQ(strend - len + 1, little, len - 1))
1630 last = strend - len + 1;
1631 else if (!PL_multiline)
1632 last = memEQ(strend - len, little, len)
1633 ? strend - len : Nullch;
b8c5462f 1634 else
d6a28714
JH
1635 goto find_last;
1636 } else {
1637 find_last:
1638 if (len)
1639 last = rninstr(s, strend, little, little + len);
b8c5462f 1640 else
d6a28714 1641 last = strend; /* matching `$' */
b8c5462f 1642 }
b8c5462f 1643 }
bf93d4cc
GS
1644 if (last == NULL) {
1645 DEBUG_r(PerlIO_printf(Perl_debug_log,
1646 "%sCan't trim the tail, match fails (should not happen)%s\n",
1647 PL_colors[4],PL_colors[5]));
1648 goto phooey; /* Should not happen! */
1649 }
d6a28714
JH
1650 dontbother = strend - last + prog->float_min_offset;
1651 }
1652 if (minlen && (dontbother < minlen))
1653 dontbother = minlen - 1;
1654 strend -= dontbother; /* this one's always in bytes! */
1655 /* We don't know much -- general case. */
1656 if (UTF) {
1657 for (;;) {
1658 if (regtry(prog, s))
1659 goto got_it;
1660 if (s >= strend)
1661 break;
b8c5462f 1662 s += UTF8SKIP(s);
d6a28714
JH
1663 };
1664 }
1665 else {
1666 do {
1667 if (regtry(prog, s))
1668 goto got_it;
1669 } while (s++ < strend);
1670 }
1671 }
1672
1673 /* Failure. */
1674 goto phooey;
1675
1676got_it:
1677 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1678
1679 if (PL_reg_eval_set) {
1680 /* Preserve the current value of $^R */
1681 if (oreplsv != GvSV(PL_replgv))
1682 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1683 restored, the value remains
1684 the same. */
1685 restore_pos(aTHXo_ 0);
1686 }
1687
1688 /* make sure $`, $&, $', and $digit will work later */
1689 if ( !(flags & REXEC_NOT_FIRST) ) {
1690 if (RX_MATCH_COPIED(prog)) {
1691 Safefree(prog->subbeg);
1692 RX_MATCH_COPIED_off(prog);
1693 }
1694 if (flags & REXEC_COPY_STR) {
1695 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1696
1697 s = savepvn(strbeg, i);
1698 prog->subbeg = s;
1699 prog->sublen = i;
1700 RX_MATCH_COPIED_on(prog);
1701 }
1702 else {
1703 prog->subbeg = strbeg;
1704 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1705 }
1706 }
1707
1708 return 1;
1709
1710phooey:
bf93d4cc
GS
1711 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1712 PL_colors[4],PL_colors[5]));
d6a28714
JH
1713 if (PL_reg_eval_set)
1714 restore_pos(aTHXo_ 0);
1715 return 0;
1716}
1717
1718/*
1719 - regtry - try match at specific point
1720 */
1721STATIC I32 /* 0 failure, 1 success */
1722S_regtry(pTHX_ regexp *prog, char *startpos)
1723{
d6a28714
JH
1724 register I32 i;
1725 register I32 *sp;
1726 register I32 *ep;
1727 CHECKPOINT lastcp;
1728
02db2b7b
IZ
1729#ifdef DEBUGGING
1730 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1731#endif
d6a28714
JH
1732 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1733 MAGIC *mg;
1734
1735 PL_reg_eval_set = RS_init;
1736 DEBUG_r(DEBUG_s(
b900a521
JH
1737 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1738 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1739 ));
e8347627 1740 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1741 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1742 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1743 SAVETMPS;
1744 /* Apparently this is not needed, judging by wantarray. */
e8347627 1745 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1746 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1747
1748 if (PL_reg_sv) {
1749 /* Make $_ available to executed code. */
1750 if (PL_reg_sv != DEFSV) {
1751 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1752 SAVESPTR(DEFSV);
1753 DEFSV = PL_reg_sv;
b8c5462f 1754 }
d6a28714
JH
1755
1756 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1757 && (mg = mg_find(PL_reg_sv, 'g')))) {
1758 /* prepare for quick setting of pos */
1759 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1760 mg = mg_find(PL_reg_sv, 'g');
1761 mg->mg_len = -1;
b8c5462f 1762 }
d6a28714
JH
1763 PL_reg_magic = mg;
1764 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1765 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1766 }
1767 if (!PL_reg_curpm)
0f79a09d 1768 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1769 PL_reg_curpm->op_pmregexp = prog;
1770 PL_reg_oldcurpm = PL_curpm;
1771 PL_curpm = PL_reg_curpm;
1772 if (RX_MATCH_COPIED(prog)) {
1773 /* Here is a serious problem: we cannot rewrite subbeg,
1774 since it may be needed if this match fails. Thus
1775 $` inside (?{}) could fail... */
1776 PL_reg_oldsaved = prog->subbeg;
1777 PL_reg_oldsavedlen = prog->sublen;
1778 RX_MATCH_COPIED_off(prog);
1779 }
1780 else
1781 PL_reg_oldsaved = Nullch;
1782 prog->subbeg = PL_bostr;
1783 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1784 }
1785 prog->startp[0] = startpos - PL_bostr;
1786 PL_reginput = startpos;
1787 PL_regstartp = prog->startp;
1788 PL_regendp = prog->endp;
1789 PL_reglastparen = &prog->lastparen;
1790 prog->lastparen = 0;
1791 PL_regsize = 0;
1792 DEBUG_r(PL_reg_starttry = startpos);
1793 if (PL_reg_start_tmpl <= prog->nparens) {
1794 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1795 if(PL_reg_start_tmp)
1796 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1797 else
1798 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1799 }
1800
1801 /* XXXX What this code is doing here?!!! There should be no need
1802 to do this again and again, PL_reglastparen should take care of
3dd2943c 1803 this! --ilya*/
dafc8851
JH
1804
1805 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1806 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1807 * PL_reglastparen), is not needed at all by the test suite
1808 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1809 * enough, for building DynaLoader, or otherwise this
1810 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1811 * will happen. Meanwhile, this code *is* needed for the
1812 * above-mentioned test suite tests to succeed. The common theme
1813 * on those tests seems to be returning null fields from matches.
1814 * --jhi */
dafc8851 1815#if 1
d6a28714
JH
1816 sp = prog->startp;
1817 ep = prog->endp;
1818 if (prog->nparens) {
09e8ae3b 1819 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1820 *++sp = -1;
1821 *++ep = -1;
1822 }
1823 }
dafc8851 1824#endif
02db2b7b 1825 REGCP_SET(lastcp);
d6a28714
JH
1826 if (regmatch(prog->program + 1)) {
1827 prog->endp[0] = PL_reginput - PL_bostr;
1828 return 1;
1829 }
02db2b7b 1830 REGCP_UNWIND(lastcp);
d6a28714
JH
1831 return 0;
1832}
1833
02db2b7b
IZ
1834#define RE_UNWIND_BRANCH 1
1835#define RE_UNWIND_BRANCHJ 2
1836
1837union re_unwind_t;
1838
1839typedef struct { /* XX: makes sense to enlarge it... */
1840 I32 type;
1841 I32 prev;
1842 CHECKPOINT lastcp;
1843} re_unwind_generic_t;
1844
1845typedef struct {
1846 I32 type;
1847 I32 prev;
1848 CHECKPOINT lastcp;
1849 I32 lastparen;
1850 regnode *next;
1851 char *locinput;
1852 I32 nextchr;
1853#ifdef DEBUGGING
1854 int regindent;
1855#endif
1856} re_unwind_branch_t;
1857
1858typedef union re_unwind_t {
1859 I32 type;
1860 re_unwind_generic_t generic;
1861 re_unwind_branch_t branch;
1862} re_unwind_t;
1863
d6a28714
JH
1864/*
1865 - regmatch - main matching routine
1866 *
1867 * Conceptually the strategy is simple: check to see whether the current
1868 * node matches, call self recursively to see whether the rest matches,
1869 * and then act accordingly. In practice we make some effort to avoid
1870 * recursion, in particular by going through "ordinary" nodes (that don't
1871 * need to know whether the rest of the match failed) by a loop instead of
1872 * by recursion.
1873 */
1874/* [lwall] I've hoisted the register declarations to the outer block in order to
1875 * maybe save a little bit of pushing and popping on the stack. It also takes
1876 * advantage of machines that use a register save mask on subroutine entry.
1877 */
1878STATIC I32 /* 0 failure, 1 success */
1879S_regmatch(pTHX_ regnode *prog)
1880{
d6a28714
JH
1881 register regnode *scan; /* Current node. */
1882 regnode *next; /* Next node. */
1883 regnode *inner; /* Next node in internal branch. */
1884 register I32 nextchr; /* renamed nextchr - nextchar colides with
1885 function of same name */
1886 register I32 n; /* no or next */
1887 register I32 ln; /* len or last */
1888 register char *s; /* operand or save */
1889 register char *locinput = PL_reginput;
1890 register I32 c1, c2, paren; /* case fold search, parenth */
1891 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1892 I32 unwind = 0;
1893 I32 firstcp = PL_savestack_ix;
1894
d6a28714
JH
1895#ifdef DEBUGGING
1896 PL_regindent++;
1897#endif
1898
1899 /* Note that nextchr is a byte even in UTF */
1900 nextchr = UCHARAT(locinput);
1901 scan = prog;
1902 while (scan != NULL) {
1903#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1904#if 1
d6a28714
JH
1905# define sayYES goto yes
1906# define sayNO goto no
7821416a
IZ
1907# define sayYES_FINAL goto yes_final
1908# define sayYES_LOUD goto yes_loud
1909# define sayNO_FINAL goto no_final
1910# define sayNO_SILENT goto do_no
d6a28714
JH
1911# define saySAME(x) if (x) goto yes; else goto no
1912# define REPORT_CODE_OFF 24
1913#else
1914# define sayYES return 1
1915# define sayNO return 0
7821416a
IZ
1916# define sayYES_FINAL return 1
1917# define sayYES_LOUD return 1
1918# define sayNO_FINAL return 0
1919# define sayNO_SILENT return 0
d6a28714
JH
1920# define saySAME(x) return x
1921#endif
1922 DEBUG_r( {
1923 SV *prop = sv_newmortal();
1924 int docolor = *PL_colors[0];
1925 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1926 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1927 /* The part of the string before starttry has one color
1928 (pref0_len chars), between starttry and current
1929 position another one (pref_len - pref0_len chars),
1930 after the current position the third one.
1931 We assume that pref0_len <= pref_len, otherwise we
1932 decrease pref0_len. */
1933 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1934 ? (5 + taill) - l : locinput - PL_bostr);
1935 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1936
1937 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1938 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1939 ? (5 + taill) - pref_len : PL_regeol - locinput);
1940 if (pref0_len < 0)
1941 pref0_len = 0;
1942 if (pref0_len > pref_len)
1943 pref0_len = pref_len;
1944 regprop(prop, scan);
1945 PerlIO_printf(Perl_debug_log,
b900a521
JH
1946 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1947 (IV)(locinput - PL_bostr),
d6a28714
JH
1948 PL_colors[4], pref0_len,
1949 locinput - pref_len, PL_colors[5],
1950 PL_colors[2], pref_len - pref0_len,
1951 locinput - pref_len + pref0_len, PL_colors[3],
1952 (docolor ? "" : "> <"),
1953 PL_colors[0], l, locinput, PL_colors[1],
1954 15 - l - pref_len + 1,
1955 "",
b900a521 1956 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1957 SvPVX(prop));
1958 } );
1959
1960 next = scan + NEXT_OFF(scan);
1961 if (next == scan)
1962 next = NULL;
1963
1964 switch (OP(scan)) {
1965 case BOL:
1966 if (locinput == PL_bostr
1967 ? PL_regprev == '\n'
1968 : (PL_multiline &&
1969 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1970 {
1971 /* regtill = regbol; */
b8c5462f
JH
1972 break;
1973 }
d6a28714
JH
1974 sayNO;
1975 case MBOL:
1976 if (locinput == PL_bostr
1977 ? PL_regprev == '\n'
1978 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1979 {
b8c5462f
JH
1980 break;
1981 }
d6a28714
JH
1982 sayNO;
1983 case SBOL:
c2a73568 1984 if (locinput == PL_bostr)
b8c5462f 1985 break;
d6a28714
JH
1986 sayNO;
1987 case GPOS:
1988 if (locinput == PL_reg_ganch)
1989 break;
1990 sayNO;
1991 case EOL:
1992 if (PL_multiline)
1993 goto meol;
1994 else
1995 goto seol;
1996 case MEOL:
1997 meol:
1998 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1999 sayNO;
b8c5462f 2000 break;
d6a28714
JH
2001 case SEOL:
2002 seol:
2003 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2004 sayNO;
d6a28714 2005 if (PL_regeol - locinput > 1)
b8c5462f 2006 sayNO;
b8c5462f 2007 break;
d6a28714
JH
2008 case EOS:
2009 if (PL_regeol != locinput)
b8c5462f 2010 sayNO;
d6a28714
JH
2011 break;
2012 case SANYUTF8:
b8c5462f 2013 if (nextchr & 0x80) {
b8c5462f 2014 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2015 if (locinput > PL_regeol)
2016 sayNO;
b8c5462f
JH
2017 nextchr = UCHARAT(locinput);
2018 break;
2019 }
d6a28714 2020 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2021 sayNO;
b8c5462f 2022 nextchr = UCHARAT(++locinput);
a0d0e21e 2023 break;
d6a28714
JH
2024 case SANY:
2025 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2026 sayNO;
2027 nextchr = UCHARAT(++locinput);
b85d18e9 2028 break;
d6a28714 2029 case ANYUTF8:
a0ed51b3 2030 if (nextchr & 0x80) {
b8c5462f 2031 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2032 if (locinput > PL_regeol)
2033 sayNO;
a0ed51b3
LW
2034 nextchr = UCHARAT(locinput);
2035 break;
2036 }
155aba94 2037 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2038 sayNO;
2039 nextchr = UCHARAT(++locinput);
2040 break;
d6a28714 2041 case REG_ANY:
155aba94 2042 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2043 sayNO;
76e3520e 2044 nextchr = UCHARAT(++locinput);
a0d0e21e 2045 break;
d6a28714 2046 case EXACT:
cd439c50
IZ
2047 s = STRING(scan);
2048 ln = STR_LEN(scan);
d6a28714
JH
2049 /* Inline the first character, for speed. */
2050 if (UCHARAT(s) != nextchr)
2051 sayNO;
2052 if (PL_regeol - locinput < ln)
2053 sayNO;
2054 if (ln > 1 && memNE(s, locinput, ln))
2055 sayNO;
2056 locinput += ln;
2057 nextchr = UCHARAT(locinput);
2058 break;
2059 case EXACTFL:
b8c5462f
JH
2060 PL_reg_flags |= RF_tainted;
2061 /* FALL THROUGH */
d6a28714 2062 case EXACTF:
cd439c50
IZ
2063 s = STRING(scan);
2064 ln = STR_LEN(scan);
d6a28714
JH
2065
2066 if (UTF) {
2067 char *l = locinput;
2068 char *e = s + ln;
2069 c1 = OP(scan) == EXACTF;
2070 while (s < e) {
2071 if (l >= PL_regeol)
2072 sayNO;
667bb95a
JH
2073 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2074 (c1 ?
2075 toLOWER_utf8((U8*)l) :
2076 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2077 {
2078 sayNO;
2079 }
2080 s += UTF8SKIP(s);
2081 l += UTF8SKIP(l);
b8c5462f 2082 }
d6a28714 2083 locinput = l;
a0ed51b3
LW
2084 nextchr = UCHARAT(locinput);
2085 break;
2086 }
d6a28714
JH
2087
2088 /* Inline the first character, for speed. */
2089 if (UCHARAT(s) != nextchr &&
2090 UCHARAT(s) != ((OP(scan) == EXACTF)
2091 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2092 sayNO;
d6a28714 2093 if (PL_regeol - locinput < ln)
b8c5462f 2094 sayNO;
d6a28714
JH
2095 if (ln > 1 && (OP(scan) == EXACTF
2096 ? ibcmp(s, locinput, ln)
2097 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2098 sayNO;
d6a28714
JH
2099 locinput += ln;
2100 nextchr = UCHARAT(locinput);
a0d0e21e 2101 break;
d6a28714 2102 case ANYOFUTF8:
d6a28714 2103 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2104 sayNO;
d6a28714
JH
2105 if (locinput >= PL_regeol)
2106 sayNO;
2107 locinput += PL_utf8skip[nextchr];
2108 nextchr = UCHARAT(locinput);
2109 break;
2110 case ANYOF:
d6a28714 2111 if (nextchr < 0)
b8c5462f 2112 nextchr = UCHARAT(locinput);
936ed897 2113 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2114 sayNO;
2115 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2116 sayNO;
b8c5462f
JH
2117 nextchr = UCHARAT(++locinput);
2118 break;
d6a28714 2119 case ALNUML:
b8c5462f
JH
2120 PL_reg_flags |= RF_tainted;
2121 /* FALL THROUGH */
d6a28714 2122 case ALNUM:
b8c5462f 2123 if (!nextchr)
4633a7c4 2124 sayNO;
d6a28714
JH
2125 if (!(OP(scan) == ALNUM
2126 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2127 sayNO;
2128 nextchr = UCHARAT(++locinput);
bbce6d69 2129 break;
d6a28714 2130 case ALNUMLUTF8:
3280af22 2131 PL_reg_flags |= RF_tainted;
bbce6d69 2132 /* FALL THROUGH */
d6a28714 2133 case ALNUMUTF8:
b8c5462f
JH
2134 if (!nextchr)
2135 sayNO;
2136 if (nextchr & 0x80) {
d6a28714
JH
2137 if (!(OP(scan) == ALNUMUTF8
2138 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2139 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2140 {
2141 sayNO;
a0ed51b3 2142 }
b8c5462f 2143 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2144 nextchr = UCHARAT(locinput);
2145 break;
2146 }
d6a28714
JH
2147 if (!(OP(scan) == ALNUMUTF8
2148 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2149 sayNO;
b8c5462f 2150 nextchr = UCHARAT(++locinput);
a0d0e21e 2151 break;
d6a28714 2152 case NALNUML:
b8c5462f
JH
2153 PL_reg_flags |= RF_tainted;
2154 /* FALL THROUGH */
d6a28714
JH
2155 case NALNUM:
2156 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2157 sayNO;
d6a28714
JH
2158 if (OP(scan) == NALNUM
2159 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2160 sayNO;
b8c5462f 2161 nextchr = UCHARAT(++locinput);
a0ed51b3 2162 break;
d6a28714 2163 case NALNUMLUTF8:
b8c5462f
JH
2164 PL_reg_flags |= RF_tainted;
2165 /* FALL THROUGH */
d6a28714 2166 case NALNUMUTF8:
3280af22 2167 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2168 sayNO;
b8c5462f 2169 if (nextchr & 0x80) {
d6a28714
JH
2170 if (OP(scan) == NALNUMUTF8
2171 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2172 : isALNUM_LC_utf8((U8*)locinput))
2173 {
b8c5462f 2174 sayNO;
d6a28714 2175 }
b8c5462f
JH
2176 locinput += PL_utf8skip[nextchr];
2177 nextchr = UCHARAT(locinput);
2178 break;
2179 }
d6a28714
JH
2180 if (OP(scan) == NALNUMUTF8
2181 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2182 sayNO;
76e3520e 2183 nextchr = UCHARAT(++locinput);
a0d0e21e 2184 break;
d6a28714
JH
2185 case BOUNDL:
2186 case NBOUNDL:
3280af22 2187 PL_reg_flags |= RF_tainted;
bbce6d69 2188 /* FALL THROUGH */
d6a28714
JH
2189 case BOUND:
2190 case NBOUND:
2191 /* was last char in word? */
2192 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2193 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2194 ln = isALNUM(ln);
2195 n = isALNUM(nextchr);
2196 }
2197 else {
2198 ln = isALNUM_LC(ln);
2199 n = isALNUM_LC(nextchr);
2200 }
2201 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2202 sayNO;
a0d0e21e 2203 break;
d6a28714
JH
2204 case BOUNDLUTF8:
2205 case NBOUNDLUTF8:
a0ed51b3
LW
2206 PL_reg_flags |= RF_tainted;
2207 /* FALL THROUGH */
d6a28714
JH
2208 case BOUNDUTF8:
2209 case NBOUNDUTF8:
2210 /* was last char in word? */
667bb95a
JH
2211 if (locinput == PL_regbol)
2212 ln = PL_regprev;
2213 else {
2214 U8 *r = reghop((U8*)locinput, -1);
2215
2216 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2217 }
d6a28714
JH
2218 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2219 ln = isALNUM_uni(ln);
2220 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2221 }
d6a28714
JH
2222 else {
2223 ln = isALNUM_LC_uni(ln);
2224 n = isALNUM_LC_utf8((U8*)locinput);
2225 }
2226 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2227 sayNO;
a0ed51b3 2228 break;
d6a28714 2229 case SPACEL:
3280af22 2230 PL_reg_flags |= RF_tainted;
bbce6d69 2231 /* FALL THROUGH */
d6a28714 2232 case SPACE:
9442cb0e 2233 if (!nextchr)
4633a7c4 2234 sayNO;
d6a28714
JH
2235 if (!(OP(scan) == SPACE
2236 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2237 sayNO;
76e3520e 2238 nextchr = UCHARAT(++locinput);
a0d0e21e 2239 break;
d6a28714 2240 case SPACELUTF8:
a0ed51b3
LW
2241 PL_reg_flags |= RF_tainted;
2242 /* FALL THROUGH */
d6a28714 2243 case SPACEUTF8:
9442cb0e 2244 if (!nextchr)
a0ed51b3
LW
2245 sayNO;
2246 if (nextchr & 0x80) {
d6a28714 2247 if (!(OP(scan) == SPACEUTF8
9442cb0e 2248 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2249 : isSPACE_LC_utf8((U8*)locinput)))
2250 {
a0ed51b3 2251 sayNO;
d6a28714 2252 }
6f06b55f 2253 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2254 nextchr = UCHARAT(locinput);
2255 break;
2256 }
d6a28714
JH
2257 if (!(OP(scan) == SPACEUTF8
2258 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2259 sayNO;
2260 nextchr = UCHARAT(++locinput);
2261 break;
d6a28714 2262 case NSPACEL:
3280af22 2263 PL_reg_flags |= RF_tainted;
bbce6d69 2264 /* FALL THROUGH */
d6a28714 2265 case NSPACE:
9442cb0e 2266 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2267 sayNO;
9442cb0e 2268 if (OP(scan) == NSPACE
d6a28714 2269 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2270 sayNO;
b8c5462f 2271 nextchr = UCHARAT(++locinput);
a0d0e21e 2272 break;
d6a28714 2273 case NSPACELUTF8:
a0ed51b3
LW
2274 PL_reg_flags |= RF_tainted;
2275 /* FALL THROUGH */
d6a28714 2276 case NSPACEUTF8:
9442cb0e 2277 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2278 sayNO;
2279 if (nextchr & 0x80) {
d6a28714 2280 if (OP(scan) == NSPACEUTF8
9442cb0e 2281 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2282 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2283 {
2284 sayNO;
2285 }
2286 locinput += PL_utf8skip[nextchr];
2287 nextchr = UCHARAT(locinput);
2288 break;
a0ed51b3 2289 }
d6a28714
JH
2290 if (OP(scan) == NSPACEUTF8
2291 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2292 sayNO;
76e3520e 2293 nextchr = UCHARAT(++locinput);
a0d0e21e 2294 break;
d6a28714 2295 case DIGITL:
a0ed51b3
LW
2296 PL_reg_flags |= RF_tainted;
2297 /* FALL THROUGH */
d6a28714 2298 case DIGIT:
9442cb0e 2299 if (!nextchr)
a0ed51b3 2300 sayNO;
d6a28714
JH
2301 if (!(OP(scan) == DIGIT
2302 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2303 sayNO;
76e3520e 2304 nextchr = UCHARAT(++locinput);
a0d0e21e 2305 break;
d6a28714 2306 case DIGITLUTF8:
a0ed51b3
LW
2307 PL_reg_flags |= RF_tainted;
2308 /* FALL THROUGH */
d6a28714 2309 case DIGITUTF8:
a0ed51b3
LW
2310 if (!nextchr)
2311 sayNO;
2312 if (nextchr & 0x80) {
9442cb0e
GS
2313 if (!(OP(scan) == DIGITUTF8
2314 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2315 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2316 {
a0ed51b3 2317 sayNO;
dfe13c55 2318 }
6f06b55f 2319 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2320 nextchr = UCHARAT(locinput);
2321 break;
2322 }
9442cb0e
GS
2323 if (!(OP(scan) == DIGITUTF8
2324 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2325 sayNO;
2326 nextchr = UCHARAT(++locinput);
2327 break;
d6a28714 2328 case NDIGITL:
b8c5462f
JH
2329 PL_reg_flags |= RF_tainted;
2330 /* FALL THROUGH */
d6a28714 2331 case NDIGIT:
9442cb0e 2332 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2333 sayNO;
9442cb0e 2334 if (OP(scan) == NDIGIT
d6a28714 2335 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2336 sayNO;
76e3520e 2337 nextchr = UCHARAT(++locinput);
a0d0e21e 2338 break;
d6a28714 2339 case NDIGITLUTF8:
b8c5462f
JH
2340 PL_reg_flags |= RF_tainted;
2341 /* FALL THROUGH */
d6a28714 2342 case NDIGITUTF8:
b8c5462f
JH
2343 if (!nextchr && locinput >= PL_regeol)
2344 sayNO;
a0ed51b3 2345 if (nextchr & 0x80) {
9442cb0e
GS
2346 if (OP(scan) == NDIGITUTF8
2347 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2348 : isDIGIT_LC_utf8((U8*)locinput))
2349 {
a0ed51b3 2350 sayNO;
9442cb0e 2351 }
6f06b55f 2352 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2353 nextchr = UCHARAT(locinput);
2354 break;
2355 }
9442cb0e
GS
2356 if (OP(scan) == NDIGITUTF8
2357 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2358 sayNO;
2359 nextchr = UCHARAT(++locinput);
2360 break;
2361 case CLUMP:
dfe13c55 2362 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2363 sayNO;
6f06b55f 2364 locinput += PL_utf8skip[nextchr];
dfe13c55 2365 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2366 locinput += UTF8SKIP(locinput);
2367 if (locinput > PL_regeol)
2368 sayNO;
2369 nextchr = UCHARAT(locinput);
2370 break;
c8756f30 2371 case REFFL:
3280af22 2372 PL_reg_flags |= RF_tainted;
c8756f30 2373 /* FALL THROUGH */
c277df42 2374 case REF:
c8756f30 2375 case REFF:
c277df42 2376 n = ARG(scan); /* which paren pair */
cf93c79d 2377 ln = PL_regstartp[n];
2c2d71f5 2378 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2379 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2380 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2381 if (ln == PL_regendp[n])
a0d0e21e 2382 break;
a0ed51b3 2383
cf93c79d 2384 s = PL_bostr + ln;
a0ed51b3
LW
2385 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2386 char *l = locinput;
cf93c79d 2387 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2388 /*
2389 * Note that we can't do the "other character" lookup trick as
2390 * in the 8-bit case (no pun intended) because in Unicode we
2391 * have to map both upper and title case to lower case.
2392 */
2393 if (OP(scan) == REFF) {
2394 while (s < e) {
2395 if (l >= PL_regeol)
2396 sayNO;
dfe13c55 2397 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2398 sayNO;
2399 s += UTF8SKIP(s);
2400 l += UTF8SKIP(l);
2401 }
2402 }
2403 else {
2404 while (s < e) {
2405 if (l >= PL_regeol)
2406 sayNO;
dfe13c55 2407 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2408 sayNO;
2409 s += UTF8SKIP(s);
2410 l += UTF8SKIP(l);
2411 }
2412 }
2413 locinput = l;
2414 nextchr = UCHARAT(locinput);
2415 break;
2416 }
2417
a0d0e21e 2418 /* Inline the first character, for speed. */
76e3520e 2419 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2420 (OP(scan) == REF ||
2421 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2422 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2423 sayNO;
cf93c79d 2424 ln = PL_regendp[n] - ln;
3280af22 2425 if (locinput + ln > PL_regeol)
4633a7c4 2426 sayNO;
c8756f30
AK
2427 if (ln > 1 && (OP(scan) == REF
2428 ? memNE(s, locinput, ln)
2429 : (OP(scan) == REFF
2430 ? ibcmp(s, locinput, ln)
2431 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2432 sayNO;
a0d0e21e 2433 locinput += ln;
76e3520e 2434 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2435 break;
2436
2437 case NOTHING:
c277df42 2438 case TAIL:
a0d0e21e
LW
2439 break;
2440 case BACK:
2441 break;
c277df42
IZ
2442 case EVAL:
2443 {
2444 dSP;
533c011a 2445 OP_4tree *oop = PL_op;
3280af22
NIS
2446 COP *ocurcop = PL_curcop;
2447 SV **ocurpad = PL_curpad;
c277df42
IZ
2448 SV *ret;
2449
2450 n = ARG(scan);
533c011a 2451 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2452 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2453 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2454 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2455
cea2e8a9 2456 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2457 SPAGAIN;
2458 ret = POPs;
2459 PUTBACK;
2460
0f5d15d6
IZ
2461 PL_op = oop;
2462 PL_curpad = ocurpad;
2463 PL_curcop = ocurcop;
c277df42 2464 if (logical) {
0f5d15d6
IZ
2465 if (logical == 2) { /* Postponed subexpression. */
2466 regexp *re;
22c35a8c 2467 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2468 re_cc_state state;
0f5d15d6
IZ
2469 CHECKPOINT cp, lastcp;
2470
2471 if(SvROK(ret) || SvRMAGICAL(ret)) {
2472 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2473
2474 if(SvMAGICAL(sv))
2475 mg = mg_find(sv, 'r');
2476 }
2477 if (mg) {
2478 re = (regexp *)mg->mg_obj;
df0003d4 2479 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2480 }
2481 else {
2482 STRLEN len;
2483 char *t = SvPV(ret, len);
2484 PMOP pm;
2485 char *oprecomp = PL_regprecomp;
2486 I32 osize = PL_regsize;
2487 I32 onpar = PL_regnpar;
2488
2489 pm.op_pmflags = 0;
393fec97 2490 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2491 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2492 if (!(SvFLAGS(ret)
2493 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2494 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2495 PL_regprecomp = oprecomp;
2496 PL_regsize = osize;
2497 PL_regnpar = onpar;
2498 }
2499 DEBUG_r(
2500 PerlIO_printf(Perl_debug_log,
2501 "Entering embedded `%s%.60s%s%s'\n",
2502 PL_colors[0],
2503 re->precomp,
2504 PL_colors[1],
2505 (strlen(re->precomp) > 60 ? "..." : ""))
2506 );
2507 state.node = next;
2508 state.prev = PL_reg_call_cc;
2509 state.cc = PL_regcc;
2510 state.re = PL_reg_re;
2511
2ab05381 2512 PL_regcc = 0;
0f5d15d6
IZ
2513
2514 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2515 REGCP_SET(lastcp);
0f5d15d6
IZ
2516 cache_re(re);
2517 state.ss = PL_savestack_ix;
2518 *PL_reglastparen = 0;
2519 PL_reg_call_cc = &state;
2520 PL_reginput = locinput;
2c2d71f5
JH
2521
2522 /* XXXX This is too dramatic a measure... */
2523 PL_reg_maxiter = 0;
2524
0f5d15d6 2525 if (regmatch(re->program + 1)) {
2c914db6
IZ
2526 /* Even though we succeeded, we need to restore
2527 global variables, since we may be wrapped inside
2528 SUSPEND, thus the match may be not finished yet. */
2529
2530 /* XXXX Do this only if SUSPENDed? */
2531 PL_reg_call_cc = state.prev;
2532 PL_regcc = state.cc;
2533 PL_reg_re = state.re;
2534 cache_re(PL_reg_re);
2535
2536 /* XXXX This is too dramatic a measure... */
2537 PL_reg_maxiter = 0;
2538
2539 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2540 ReREFCNT_dec(re);
2541 regcpblow(cp);
2542 sayYES;
2543 }
0f5d15d6 2544 ReREFCNT_dec(re);
02db2b7b 2545 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2546 regcppop();
2547 PL_reg_call_cc = state.prev;
2548 PL_regcc = state.cc;
2549 PL_reg_re = state.re;
d3790889 2550 cache_re(PL_reg_re);
2c2d71f5
JH
2551
2552 /* XXXX This is too dramatic a measure... */
2553 PL_reg_maxiter = 0;
2554
0f5d15d6
IZ
2555 sayNO;
2556 }
c277df42 2557 sw = SvTRUE(ret);
0f5d15d6 2558 logical = 0;
a0ed51b3
LW
2559 }
2560 else
3280af22 2561 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2562 break;
2563 }
a0d0e21e 2564 case OPEN:
c277df42 2565 n = ARG(scan); /* which paren pair */
3280af22
NIS
2566 PL_reg_start_tmp[n] = locinput;
2567 if (n > PL_regsize)
2568 PL_regsize = n;
a0d0e21e
LW
2569 break;
2570 case CLOSE:
c277df42 2571 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2572 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2573 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2574 if (n > *PL_reglastparen)
2575 *PL_reglastparen = n;
a0d0e21e 2576 break;
c277df42
IZ
2577 case GROUPP:
2578 n = ARG(scan); /* which paren pair */
cf93c79d 2579 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2580 break;
2581 case IFTHEN:
2c2d71f5 2582 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2583 if (sw)
2584 next = NEXTOPER(NEXTOPER(scan));
2585 else {
2586 next = scan + ARG(scan);
2587 if (OP(next) == IFTHEN) /* Fake one. */
2588 next = NEXTOPER(NEXTOPER(next));
2589 }
2590 break;
2591 case LOGICAL:
0f5d15d6 2592 logical = scan->flags;
c277df42 2593 break;
2ab05381
IZ
2594/*******************************************************************
2595 PL_regcc contains infoblock about the innermost (...)* loop, and
2596 a pointer to the next outer infoblock.
2597
2598 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2599
2600 1) After matching X, regnode for CURLYX is processed;
2601
2602 2) This regnode creates infoblock on the stack, and calls
2603 regmatch() recursively with the starting point at WHILEM node;
2604
2605 3) Each hit of WHILEM node tries to match A and Z (in the order
2606 depending on the current iteration, min/max of {min,max} and
2607 greediness). The information about where are nodes for "A"
2608 and "Z" is read from the infoblock, as is info on how many times "A"
2609 was already matched, and greediness.
2610
2611 4) After A matches, the same WHILEM node is hit again.
2612
2613 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2614 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2615 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2616 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2617 of the external loop.
2618
2619 Currently present infoblocks form a tree with a stem formed by PL_curcc
2620 and whatever it mentions via ->next, and additional attached trees
2621 corresponding to temporarily unset infoblocks as in "5" above.
2622
2623 In the following picture infoblocks for outer loop of
2624 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2625 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2626 infoblocks are drawn below the "reset" infoblock.
2627
2628 In fact in the picture below we do not show failed matches for Z and T
2629 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2630 more obvious *why* one needs to *temporary* unset infoblocks.]
2631
2632 Matched REx position InfoBlocks Comment
2633 (Y(A)*?Z)*?T x
2634 Y(A)*?Z)*?T x <- O
2635 Y (A)*?Z)*?T x <- O
2636 Y A)*?Z)*?T x <- O <- I
2637 YA )*?Z)*?T x <- O <- I
2638 YA A)*?Z)*?T x <- O <- I
2639 YAA )*?Z)*?T x <- O <- I
2640 YAA Z)*?T x <- O # Temporary unset I
2641 I
2642
2643 YAAZ Y(A)*?Z)*?T x <- O
2644 I
2645
2646 YAAZY (A)*?Z)*?T x <- O
2647 I
2648
2649 YAAZY A)*?Z)*?T x <- O <- I
2650 I
2651
2652 YAAZYA )*?Z)*?T x <- O <- I
2653 I
2654
2655 YAAZYA Z)*?T x <- O # Temporary unset I
2656 I,I
2657
2658 YAAZYAZ )*?T x <- O
2659 I,I
2660
2661 YAAZYAZ T x # Temporary unset O
2662 O
2663 I,I
2664
2665 YAAZYAZT x
2666 O
2667 I,I
2668 *******************************************************************/
a0d0e21e
LW
2669 case CURLYX: {
2670 CURCUR cc;
3280af22 2671 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2672 /* No need to save/restore up to this paren */
2673 I32 parenfloor = scan->flags;
c277df42
IZ
2674
2675 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2676 next += ARG(next);
3280af22
NIS
2677 cc.oldcc = PL_regcc;
2678 PL_regcc = &cc;
cb434fcc
IZ
2679 /* XXXX Probably it is better to teach regpush to support
2680 parenfloor > PL_regsize... */
2681 if (parenfloor > *PL_reglastparen)
2682 parenfloor = *PL_reglastparen; /* Pessimization... */
2683 cc.parenfloor = parenfloor;
a0d0e21e
LW
2684 cc.cur = -1;
2685 cc.min = ARG1(scan);
2686 cc.max = ARG2(scan);
c277df42 2687 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2688 cc.next = next;
2689 cc.minmod = minmod;
2690 cc.lastloc = 0;
3280af22 2691 PL_reginput = locinput;
a0d0e21e
LW
2692 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2693 regcpblow(cp);
3280af22 2694 PL_regcc = cc.oldcc;
4633a7c4 2695 saySAME(n);
a0d0e21e
LW
2696 }
2697 /* NOT REACHED */
2698 case WHILEM: {
2699 /*
2700 * This is really hard to understand, because after we match
2701 * what we're trying to match, we must make sure the rest of
2c2d71f5 2702 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2703 * to go back UP the parse tree by recursing ever deeper. And
2704 * if it fails, we have to reset our parent's current state
2705 * that we can try again after backing off.
2706 */
2707
c277df42 2708 CHECKPOINT cp, lastcp;
3280af22 2709 CURCUR* cc = PL_regcc;
c277df42
IZ
2710 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2711
4633a7c4 2712 n = cc->cur + 1; /* how many we know we matched */
3280af22 2713 PL_reginput = locinput;
a0d0e21e 2714
c277df42
IZ
2715 DEBUG_r(
2716 PerlIO_printf(Perl_debug_log,
2717 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2718 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2719 (long)n, (long)cc->min,
2720 (long)cc->max, (long)cc)
2721 );
4633a7c4 2722
a0d0e21e
LW
2723 /* If degenerate scan matches "", assume scan done. */
2724
579cf2c3 2725 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2726 PL_regcc = cc->oldcc;
2ab05381
IZ
2727 if (PL_regcc)
2728 ln = PL_regcc->cur;
c277df42 2729 DEBUG_r(
c3464db5
DD
2730 PerlIO_printf(Perl_debug_log,
2731 "%*s empty match detected, try continuation...\n",
3280af22 2732 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2733 );
a0d0e21e 2734 if (regmatch(cc->next))
4633a7c4 2735 sayYES;
2ab05381
IZ
2736 if (PL_regcc)
2737 PL_regcc->cur = ln;
3280af22 2738 PL_regcc = cc;
4633a7c4 2739 sayNO;
a0d0e21e
LW
2740 }
2741
2742 /* First just match a string of min scans. */
2743
2744 if (n < cc->min) {
2745 cc->cur = n;
2746 cc->lastloc = locinput;
4633a7c4
LW
2747 if (regmatch(cc->scan))
2748 sayYES;
2749 cc->cur = n - 1;
c277df42 2750 cc->lastloc = lastloc;
4633a7c4 2751 sayNO;
a0d0e21e
LW
2752 }
2753
2c2d71f5
JH
2754 if (scan->flags) {
2755 /* Check whether we already were at this position.
2756 Postpone detection until we know the match is not
2757 *that* much linear. */
2758 if (!PL_reg_maxiter) {
2759 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2760 PL_reg_leftiter = PL_reg_maxiter;
2761 }
2762 if (PL_reg_leftiter-- == 0) {
2763 I32 size = (PL_reg_maxiter + 7)/8;
2764 if (PL_reg_poscache) {
2765 if (PL_reg_poscache_size < size) {
2766 Renew(PL_reg_poscache, size, char);
2767 PL_reg_poscache_size = size;
2768 }
2769 Zero(PL_reg_poscache, size, char);
2770 }
2771 else {
2772 PL_reg_poscache_size = size;
2773 Newz(29, PL_reg_poscache, size, char);
2774 }
2775 DEBUG_r(
2776 PerlIO_printf(Perl_debug_log,
2777 "%sDetected a super-linear match, switching on caching%s...\n",
2778 PL_colors[4], PL_colors[5])
2779 );
2780 }
2781 if (PL_reg_leftiter < 0) {
2782 I32 o = locinput - PL_bostr, b;
2783
2784 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2785 b = o % 8;
2786 o /= 8;
2787 if (PL_reg_poscache[o] & (1<<b)) {
2788 DEBUG_r(
2789 PerlIO_printf(Perl_debug_log,
2790 "%*s already tried at this position...\n",
2791 REPORT_CODE_OFF+PL_regindent*2, "")
2792 );
7821416a 2793 sayNO_SILENT;
2c2d71f5
JH
2794 }
2795 PL_reg_poscache[o] |= (1<<b);
2796 }
2797 }
2798
a0d0e21e
LW
2799 /* Prefer next over scan for minimal matching. */
2800
2801 if (cc->minmod) {
3280af22 2802 PL_regcc = cc->oldcc;
2ab05381
IZ
2803 if (PL_regcc)
2804 ln = PL_regcc->cur;
5f05dabc 2805 cp = regcppush(cc->parenfloor);
02db2b7b 2806 REGCP_SET(lastcp);
5f05dabc 2807 if (regmatch(cc->next)) {
c277df42 2808 regcpblow(cp);
4633a7c4 2809 sayYES; /* All done. */
5f05dabc 2810 }
02db2b7b 2811 REGCP_UNWIND(lastcp);
5f05dabc 2812 regcppop();
2ab05381
IZ
2813 if (PL_regcc)
2814 PL_regcc->cur = ln;
3280af22 2815 PL_regcc = cc;
a0d0e21e 2816
c277df42 2817 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2818 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2819 && !(PL_reg_flags & RF_warned)) {
2820 PL_reg_flags |= RF_warned;
e476b1b5 2821 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2822 "Complex regular subexpression recursion",
2823 REG_INFTY - 1);
c277df42 2824 }
4633a7c4 2825 sayNO;
c277df42 2826 }
a687059c 2827
c277df42 2828 DEBUG_r(
c3464db5
DD
2829 PerlIO_printf(Perl_debug_log,
2830 "%*s trying longer...\n",
3280af22 2831 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2832 );
a0d0e21e 2833 /* Try scanning more and see if it helps. */
3280af22 2834 PL_reginput = locinput;
a0d0e21e
LW
2835 cc->cur = n;
2836 cc->lastloc = locinput;
5f05dabc 2837 cp = regcppush(cc->parenfloor);
02db2b7b 2838 REGCP_SET(lastcp);
5f05dabc 2839 if (regmatch(cc->scan)) {
c277df42 2840 regcpblow(cp);
4633a7c4 2841 sayYES;
5f05dabc 2842 }
02db2b7b 2843 REGCP_UNWIND(lastcp);
5f05dabc 2844 regcppop();
4633a7c4 2845 cc->cur = n - 1;
c277df42 2846 cc->lastloc = lastloc;
4633a7c4 2847 sayNO;
a0d0e21e
LW
2848 }
2849
2850 /* Prefer scan over next for maximal matching. */
2851
2852 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2853 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2854 cc->cur = n;
2855 cc->lastloc = locinput;
02db2b7b 2856 REGCP_SET(lastcp);
5f05dabc 2857 if (regmatch(cc->scan)) {
c277df42 2858 regcpblow(cp);
4633a7c4 2859 sayYES;
5f05dabc 2860 }
02db2b7b 2861 REGCP_UNWIND(lastcp);
a0d0e21e 2862 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2863 PL_reginput = locinput;
c277df42 2864 DEBUG_r(
c3464db5
DD
2865 PerlIO_printf(Perl_debug_log,
2866 "%*s failed, try continuation...\n",
3280af22 2867 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2868 );
2869 }
e476b1b5 2870 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2871 && !(PL_reg_flags & RF_warned)) {
3280af22 2872 PL_reg_flags |= RF_warned;
e476b1b5 2873 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2874 "Complex regular subexpression recursion",
2875 REG_INFTY - 1);
a0d0e21e
LW
2876 }
2877
2878 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2879 PL_regcc = cc->oldcc;
2ab05381
IZ
2880 if (PL_regcc)
2881 ln = PL_regcc->cur;
a0d0e21e 2882 if (regmatch(cc->next))
4633a7c4 2883 sayYES;
2ab05381
IZ
2884 if (PL_regcc)
2885 PL_regcc->cur = ln;
3280af22 2886 PL_regcc = cc;
4633a7c4 2887 cc->cur = n - 1;
c277df42 2888 cc->lastloc = lastloc;
4633a7c4 2889 sayNO;
a0d0e21e
LW
2890 }
2891 /* NOT REACHED */
c277df42
IZ
2892 case BRANCHJ:
2893 next = scan + ARG(scan);
2894 if (next == scan)
2895 next = NULL;
2896 inner = NEXTOPER(NEXTOPER(scan));
2897 goto do_branch;
2898 case BRANCH:
2899 inner = NEXTOPER(scan);
2900 do_branch:
2901 {
2902 CHECKPOINT lastcp;
2903 c1 = OP(scan);
2904 if (OP(next) != c1) /* No choice. */
2905 next = inner; /* Avoid recursion. */
a0d0e21e 2906 else {
02db2b7b
IZ
2907 I32 lastparen = *PL_reglastparen;
2908 I32 unwind1;
2909 re_unwind_branch_t *uw;
2910
2911 /* Put unwinding data on stack */
2912 unwind1 = SSNEWt(1,re_unwind_branch_t);
2913 uw = SSPTRt(unwind1,re_unwind_branch_t);
2914 uw->prev = unwind;
2915 unwind = unwind1;
2916 uw->type = ((c1 == BRANCH)
2917 ? RE_UNWIND_BRANCH
2918 : RE_UNWIND_BRANCHJ);
2919 uw->lastparen = lastparen;
2920 uw->next = next;
2921 uw->locinput = locinput;
2922 uw->nextchr = nextchr;
2923#ifdef DEBUGGING
2924 uw->regindent = ++PL_regindent;
2925#endif
c277df42 2926
02db2b7b
IZ
2927 REGCP_SET(uw->lastcp);
2928
2929 /* Now go into the first branch */
2930 next = inner;
a687059c 2931 }
a0d0e21e
LW
2932 }
2933 break;
2934 case MINMOD:
2935 minmod = 1;
2936 break;
c277df42
IZ
2937 case CURLYM:
2938 {
00db4c45 2939 I32 l = 0;
c277df42
IZ
2940 CHECKPOINT lastcp;
2941
2942 /* We suppose that the next guy does not need
2943 backtracking: in particular, it is of constant length,
2944 and has no parenths to influence future backrefs. */
2945 ln = ARG1(scan); /* min to match */
2946 n = ARG2(scan); /* max to match */
c277df42
IZ
2947 paren = scan->flags;
2948 if (paren) {
3280af22
NIS
2949 if (paren > PL_regsize)
2950 PL_regsize = paren;
2951 if (paren > *PL_reglastparen)
2952 *PL_reglastparen = paren;
c277df42 2953 }
dc45a647 2954 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2955 if (paren)
2956 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2957 PL_reginput = locinput;
c277df42
IZ
2958 if (minmod) {
2959 minmod = 0;
2960 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2961 sayNO;
5f4b28b2 2962 if (ln && l == 0 && n >= ln
c277df42
IZ
2963 /* In fact, this is tricky. If paren, then the
2964 fact that we did/didnot match may influence
2965 future execution. */
2966 && !(paren && ln == 0))
2967 ln = n;
3280af22 2968 locinput = PL_reginput;
22c35a8c 2969 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2970 c1 = (U8)*STRING(next);
c277df42 2971 if (OP(next) == EXACTF)
22c35a8c 2972 c2 = PL_fold[c1];
c277df42 2973 else if (OP(next) == EXACTFL)
22c35a8c 2974 c2 = PL_fold_locale[c1];
c277df42
IZ
2975 else
2976 c2 = c1;
a0ed51b3
LW
2977 }
2978 else
c277df42 2979 c1 = c2 = -1000;
02db2b7b 2980 REGCP_SET(lastcp);
5f4b28b2 2981 /* This may be improved if l == 0. */
c277df42
IZ
2982 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2983 /* If it could work, try it. */
2984 if (c1 == -1000 ||
3280af22
NIS
2985 UCHARAT(PL_reginput) == c1 ||
2986 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2987 {
2988 if (paren) {
2989 if (n) {
cf93c79d
IZ
2990 PL_regstartp[paren] =
2991 HOPc(PL_reginput, -l) - PL_bostr;
2992 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2993 }
2994 else
cf93c79d 2995 PL_regendp[paren] = -1;
c277df42
IZ
2996 }
2997 if (regmatch(next))
2998 sayYES;
02db2b7b 2999 REGCP_UNWIND(lastcp);
c277df42
IZ
3000 }
3001 /* Couldn't or didn't -- move forward. */
3280af22 3002 PL_reginput = locinput;
c277df42
IZ
3003 if (regrepeat_hard(scan, 1, &l)) {
3004 ln++;
3280af22 3005 locinput = PL_reginput;
c277df42
IZ
3006 }
3007 else
3008 sayNO;
3009 }
a0ed51b3
LW
3010 }
3011 else {
c277df42
IZ
3012 n = regrepeat_hard(scan, n, &l);
3013 if (n != 0 && l == 0
3014 /* In fact, this is tricky. If paren, then the
3015 fact that we did/didnot match may influence
3016 future execution. */
3017 && !(paren && ln == 0))
3018 ln = n;
3280af22 3019 locinput = PL_reginput;
c277df42 3020 DEBUG_r(
5c0ca799 3021 PerlIO_printf(Perl_debug_log,
faccc32b 3022 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3023 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3024 (IV) n, (IV)l)
c277df42
IZ
3025 );
3026 if (n >= ln) {
22c35a8c 3027 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3028 c1 = (U8)*STRING(next);
c277df42 3029 if (OP(next) == EXACTF)
22c35a8c 3030 c2 = PL_fold[c1];
c277df42 3031 else if (OP(next) == EXACTFL)
22c35a8c 3032 c2 = PL_fold_locale[c1];
c277df42
IZ
3033 else
3034 c2 = c1;
a0ed51b3
LW
3035 }
3036 else
c277df42
IZ
3037 c1 = c2 = -1000;
3038 }
02db2b7b 3039 REGCP_SET(lastcp);
c277df42
IZ
3040 while (n >= ln) {
3041 /* If it could work, try it. */
3042 if (c1 == -1000 ||
3280af22
NIS
3043 UCHARAT(PL_reginput) == c1 ||
3044 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3045 {
3046 DEBUG_r(
c3464db5 3047 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3048 "%*s trying tail with n=%"IVdf"...\n",
3049 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3050 );
3051 if (paren) {
3052 if (n) {
cf93c79d
IZ
3053 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3054 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3055 }
a0ed51b3 3056 else
cf93c79d 3057 PL_regendp[paren] = -1;
c277df42 3058 }
a0ed51b3
LW
3059 if (regmatch(next))
3060 sayYES;
02db2b7b 3061 REGCP_UNWIND(lastcp);
a0ed51b3 3062 }
c277df42
IZ
3063 /* Couldn't or didn't -- back up. */
3064 n--;
dfe13c55 3065 locinput = HOPc(locinput, -l);
3280af22 3066 PL_reginput = locinput;
c277df42
IZ
3067 }
3068 }
3069 sayNO;
3070 break;
3071 }
3072 case CURLYN:
3073 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3074 if (paren > PL_regsize)
3075 PL_regsize = paren;
3076 if (paren > *PL_reglastparen)
3077 *PL_reglastparen = paren;
c277df42
IZ
3078 ln = ARG1(scan); /* min to match */
3079 n = ARG2(scan); /* max to match */
dc45a647 3080 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3081 goto repeat;
a0d0e21e 3082 case CURLY:
c277df42 3083 paren = 0;
a0d0e21e
LW
3084 ln = ARG1(scan); /* min to match */
3085 n = ARG2(scan); /* max to match */
dc45a647 3086 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3087 goto repeat;
3088 case STAR:
3089 ln = 0;
c277df42 3090 n = REG_INFTY;
a0d0e21e 3091 scan = NEXTOPER(scan);
c277df42 3092 paren = 0;
a0d0e21e
LW
3093 goto repeat;
3094 case PLUS:
c277df42
IZ
3095 ln = 1;
3096 n = REG_INFTY;
3097 scan = NEXTOPER(scan);
3098 paren = 0;
3099 repeat:
a0d0e21e
LW
3100 /*
3101 * Lookahead to avoid useless match attempts
3102 * when we know what character comes next.
3103 */
22c35a8c 3104 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3105 c1 = (U8)*STRING(next);
bbce6d69 3106 if (OP(next) == EXACTF)
22c35a8c 3107 c2 = PL_fold[c1];
bbce6d69 3108 else if (OP(next) == EXACTFL)
22c35a8c 3109 c2 = PL_fold_locale[c1];
bbce6d69 3110 else
3111 c2 = c1;
3112 }
a0d0e21e 3113 else
bbce6d69 3114 c1 = c2 = -1000;
3280af22 3115 PL_reginput = locinput;
a0d0e21e 3116 if (minmod) {
c277df42 3117 CHECKPOINT lastcp;
a0d0e21e
LW
3118 minmod = 0;
3119 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3120 sayNO;
a0ed51b3 3121 locinput = PL_reginput;
02db2b7b 3122 REGCP_SET(lastcp);
0fe9bf95
IZ
3123 if (c1 != -1000) {
3124 char *e = locinput + n - ln; /* Should not check after this */
3125 char *old = locinput;
3126
3127 if (e >= PL_regeol || (n == REG_INFTY))
3128 e = PL_regeol - 1;
3129 while (1) {
3130 /* Find place 'next' could work */
3131 if (c1 == c2) {
3132 while (locinput <= e && *locinput != c1)
3133 locinput++;
3134 } else {
3135 while (locinput <= e
3136 && *locinput != c1
3137 && *locinput != c2)
3138 locinput++;
3139 }
3140 if (locinput > e)
3141 sayNO;
3142 /* PL_reginput == old now */
3143 if (locinput != old) {
3144 ln = 1; /* Did some */
3145 if (regrepeat(scan, locinput - old) <
3146 locinput - old)
3147 sayNO;
3148 }
3149 /* PL_reginput == locinput now */
29d1e993 3150 TRYPAREN(paren, ln, locinput);
0fe9bf95 3151 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3152 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3153 /* Couldn't or didn't -- move forward. */
3154 old = locinput++;
3155 }
3156 }
3157 else
c277df42 3158 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3159 /* If it could work, try it. */
bbce6d69 3160 if (c1 == -1000 ||
3280af22
NIS
3161 UCHARAT(PL_reginput) == c1 ||
3162 UCHARAT(PL_reginput) == c2)
bbce6d69 3163 {
29d1e993 3164 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3165 REGCP_UNWIND(lastcp);
bbce6d69 3166 }
c277df42 3167 /* Couldn't or didn't -- move forward. */
a0ed51b3 3168 PL_reginput = locinput;
a0d0e21e
LW
3169 if (regrepeat(scan, 1)) {
3170 ln++;
a0ed51b3
LW
3171 locinput = PL_reginput;
3172 }
3173 else
4633a7c4 3174 sayNO;
a0d0e21e
LW
3175 }
3176 }
3177 else {
c277df42 3178 CHECKPOINT lastcp;
a0d0e21e 3179 n = regrepeat(scan, n);
a0ed51b3 3180 locinput = PL_reginput;
22c35a8c 3181 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3182 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3183 ln = n; /* why back off? */
1aeab75a
GS
3184 /* ...because $ and \Z can match before *and* after
3185 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3186 We should back off by one in this case. */
3187 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3188 ln--;
3189 }
02db2b7b 3190 REGCP_SET(lastcp);
c277df42
IZ
3191 if (paren) {
3192 while (n >= ln) {
3193 /* If it could work, try it. */
3194 if (c1 == -1000 ||
3280af22
NIS
3195 UCHARAT(PL_reginput) == c1 ||
3196 UCHARAT(PL_reginput) == c2)
c277df42 3197 {
29d1e993 3198 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3199 REGCP_UNWIND(lastcp);
c277df42
IZ
3200 }
3201 /* Couldn't or didn't -- back up. */
3202 n--;
dfe13c55 3203 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3204 }
a0ed51b3
LW
3205 }
3206 else {
c277df42
IZ
3207 while (n >= ln) {
3208 /* If it could work, try it. */
3209 if (c1 == -1000 ||
3280af22
NIS
3210 UCHARAT(PL_reginput) == c1 ||
3211 UCHARAT(PL_reginput) == c2)
c277df42 3212 {
29d1e993 3213 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3214 REGCP_UNWIND(lastcp);
c277df42
IZ
3215 }
3216 /* Couldn't or didn't -- back up. */
3217 n--;
dfe13c55 3218 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3219 }
a0d0e21e
LW
3220 }
3221 }
4633a7c4 3222 sayNO;
c277df42 3223 break;
a0d0e21e 3224 case END:
0f5d15d6
IZ
3225 if (PL_reg_call_cc) {
3226 re_cc_state *cur_call_cc = PL_reg_call_cc;
3227 CURCUR *cctmp = PL_regcc;
3228 regexp *re = PL_reg_re;
3229 CHECKPOINT cp, lastcp;
3230
3231 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3232 REGCP_SET(lastcp);
0f5d15d6
IZ
3233 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3234 the caller. */
3235 PL_reginput = locinput; /* Make position available to
3236 the callcc. */
3237 cache_re(PL_reg_call_cc->re);
3238 PL_regcc = PL_reg_call_cc->cc;
3239 PL_reg_call_cc = PL_reg_call_cc->prev;
3240 if (regmatch(cur_call_cc->node)) {
3241 PL_reg_call_cc = cur_call_cc;
3242 regcpblow(cp);
3243 sayYES;
3244 }
02db2b7b 3245 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3246 regcppop();
3247 PL_reg_call_cc = cur_call_cc;
3248 PL_regcc = cctmp;
3249 PL_reg_re = re;
3250 cache_re(re);
3251
3252 DEBUG_r(
3253 PerlIO_printf(Perl_debug_log,
3254 "%*s continuation failed...\n",
3255 REPORT_CODE_OFF+PL_regindent*2, "")
3256 );
7821416a 3257 sayNO_SILENT;
0f5d15d6 3258 }
7821416a
IZ
3259 if (locinput < PL_regtill) {
3260 DEBUG_r(PerlIO_printf(Perl_debug_log,
3261 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3262 PL_colors[4],
3263 (long)(locinput - PL_reg_starttry),
3264 (long)(PL_regtill - PL_reg_starttry),
3265 PL_colors[5]));
3266 sayNO_FINAL; /* Cannot match: too short. */
3267 }
3268 PL_reginput = locinput; /* put where regtry can find it */
3269 sayYES_FINAL; /* Success! */
7e5428c5 3270 case SUCCEED:
3280af22 3271 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3272 sayYES_LOUD; /* Success! */
c277df42
IZ
3273 case SUSPEND:
3274 n = 1;
9fe1d20c 3275 PL_reginput = locinput;
c277df42 3276 goto do_ifmatch;
a0d0e21e 3277 case UNLESSM:
c277df42 3278 n = 0;
a0ed51b3 3279 if (scan->flags) {
0fe9bf95
IZ
3280 if (UTF) { /* XXXX This is absolutely
3281 broken, we read before
3282 start of string. */
3283 s = HOPMAYBEc(locinput, -scan->flags);
3284 if (!s)
3285 goto say_yes;
3286 PL_reginput = s;
3287 }
3288 else {
3289 if (locinput < PL_bostr + scan->flags)
3290 goto say_yes;
3291 PL_reginput = locinput - scan->flags;
3292 goto do_ifmatch;
3293 }
a0ed51b3
LW
3294 }
3295 else
3296 PL_reginput = locinput;
c277df42
IZ
3297 goto do_ifmatch;
3298 case IFMATCH:
3299 n = 1;
a0ed51b3 3300 if (scan->flags) {
0fe9bf95
IZ
3301 if (UTF) { /* XXXX This is absolutely
3302 broken, we read before
3303 start of string. */
3304 s = HOPMAYBEc(locinput, -scan->flags);
3305 if (!s || s < PL_bostr)
3306 goto say_no;
3307 PL_reginput = s;
3308 }
3309 else {
3310 if (locinput < PL_bostr + scan->flags)
3311 goto say_no;
3312 PL_reginput = locinput - scan->flags;
3313 goto do_ifmatch;
3314 }
a0ed51b3
LW
3315 }
3316 else
3317 PL_reginput = locinput;
3318
c277df42 3319 do_ifmatch:
c277df42
IZ
3320 inner = NEXTOPER(NEXTOPER(scan));
3321 if (regmatch(inner) != n) {
3322 say_no:
3323 if (logical) {
3324 logical = 0;
3325 sw = 0;
3326 goto do_longjump;
a0ed51b3
LW
3327 }
3328 else
c277df42
IZ
3329 sayNO;
3330 }
3331 say_yes:
3332 if (logical) {
3333 logical = 0;
3334 sw = 1;
3335 }
fe44a5e8 3336 if (OP(scan) == SUSPEND) {
3280af22 3337 locinput = PL_reginput;
565764a8 3338 nextchr = UCHARAT(locinput);
fe44a5e8 3339 }
c277df42
IZ
3340 /* FALL THROUGH. */
3341 case LONGJMP:
3342 do_longjump:
3343 next = scan + ARG(scan);
3344 if (next == scan)
3345 next = NULL;
a0d0e21e
LW
3346 break;
3347 default:
b900a521 3348 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3349 PTR2UV(scan), OP(scan));
cea2e8a9 3350 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3351 }
02db2b7b 3352 reenter:
a0d0e21e
LW
3353 scan = next;
3354 }
a687059c 3355
a0d0e21e
LW
3356 /*
3357 * We get here only if there's trouble -- normally "case END" is
3358 * the terminating point.
3359 */
cea2e8a9 3360 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3361 /*NOTREACHED*/
4633a7c4
LW
3362 sayNO;
3363
7821416a
IZ
3364yes_loud:
3365 DEBUG_r(
3366 PerlIO_printf(Perl_debug_log,
3367 "%*s %scould match...%s\n",
3368 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3369 );
3370 goto yes;
3371yes_final:
3372 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3373 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3374yes:
3375#ifdef DEBUGGING
3280af22 3376 PL_regindent--;
4633a7c4 3377#endif
02db2b7b
IZ
3378
3379#if 0 /* Breaks $^R */
3380 if (unwind)
3381 regcpblow(firstcp);
3382#endif
4633a7c4
LW
3383 return 1;
3384
3385no:
7821416a
IZ
3386 DEBUG_r(
3387 PerlIO_printf(Perl_debug_log,
3388 "%*s %sfailed...%s\n",
3389 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3390 );
3391 goto do_no;
3392no_final:
3393do_no:
02db2b7b
IZ
3394 if (unwind) {
3395 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3396
3397 switch (uw->type) {
3398 case RE_UNWIND_BRANCH:
3399 case RE_UNWIND_BRANCHJ:
3400 {
3401 re_unwind_branch_t *uwb = &(uw->branch);
3402 I32 lastparen = uwb->lastparen;
3403
3404 REGCP_UNWIND(uwb->lastcp);
3405 for (n = *PL_reglastparen; n > lastparen; n--)
3406 PL_regendp[n] = -1;
3407 *PL_reglastparen = n;
3408 scan = next = uwb->next;
3409 if ( !scan ||
3410 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3411 ? BRANCH : BRANCHJ) ) { /* Failure */
3412 unwind = uwb->prev;
3413#ifdef DEBUGGING
3414 PL_regindent--;
3415#endif
3416 goto do_no;
3417 }
3418 /* Have more choice yet. Reuse the same uwb. */
3419 /*SUPPRESS 560*/
3420 if ((n = (uwb->type == RE_UNWIND_BRANCH
3421 ? NEXT_OFF(next) : ARG(next))))
3422 next += n;
3423 else
3424 next = NULL; /* XXXX Needn't unwinding in this case... */
3425 uwb->next = next;
3426 next = NEXTOPER(scan);
3427 if (uwb->type == RE_UNWIND_BRANCHJ)
3428 next = NEXTOPER(next);
3429 locinput = uwb->locinput;
3430 nextchr = uwb->nextchr;
3431#ifdef DEBUGGING
3432 PL_regindent = uwb->regindent;
3433#endif
3434
3435 goto reenter;
3436 }
3437 /* NOT REACHED */
3438 default:
3439 Perl_croak(aTHX_ "regexp unwind memory corruption");
3440 }
3441 /* NOT REACHED */
3442 }
4633a7c4 3443#ifdef DEBUGGING
3280af22 3444 PL_regindent--;
4633a7c4 3445#endif
a0d0e21e 3446 return 0;
a687059c
LW
3447}
3448
3449/*
3450 - regrepeat - repeatedly match something simple, report how many
3451 */
3452/*
3453 * [This routine now assumes that it will only match on things of length 1.
3454 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3455 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3456 */
76e3520e 3457STATIC I32
cea2e8a9 3458S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3459{
a0d0e21e 3460 register char *scan;
a0d0e21e 3461 register I32 c;
3280af22 3462 register char *loceol = PL_regeol;
a0ed51b3 3463 register I32 hardcount = 0;
a0d0e21e 3464
3280af22 3465 scan = PL_reginput;
c277df42 3466 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3467 loceol = scan + max;
a0d0e21e 3468 switch (OP(p)) {
22c35a8c 3469 case REG_ANY:
a0d0e21e
LW
3470 while (scan < loceol && *scan != '\n')
3471 scan++;
3472 break;
3473 case SANY:
3474 scan = loceol;
3475 break;
a0ed51b3
LW
3476 case ANYUTF8:
3477 loceol = PL_regeol;
3478 while (scan < loceol && *scan != '\n') {
3479 scan += UTF8SKIP(scan);
3480 hardcount++;
3481 }
3482 break;
3483 case SANYUTF8:
3484 loceol = PL_regeol;
3485 while (scan < loceol) {
3486 scan += UTF8SKIP(scan);
3487 hardcount++;
3488 }
3489 break;
bbce6d69 3490 case EXACT: /* length of string is 1 */
cd439c50 3491 c = (U8)*STRING(p);
bbce6d69 3492 while (scan < loceol && UCHARAT(scan) == c)
3493 scan++;
3494 break;
3495 case EXACTF: /* length of string is 1 */
cd439c50 3496 c = (U8)*STRING(p);
bbce6d69 3497 while (scan < loceol &&
22c35a8c 3498 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3499 scan++;
3500 break;
3501 case EXACTFL: /* length of string is 1 */
3280af22 3502 PL_reg_flags |= RF_tainted;
cd439c50 3503 c = (U8)*STRING(p);
bbce6d69 3504 while (scan < loceol &&
22c35a8c 3505 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3506 scan++;
3507 break;
a0ed51b3
LW
3508 case ANYOFUTF8:
3509 loceol = PL_regeol;
3510 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3511 scan += UTF8SKIP(scan);
3512 hardcount++;
3513 }
3514 break;
a0d0e21e 3515 case ANYOF:
936ed897 3516 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3517 scan++;
a0d0e21e
LW
3518 break;
3519 case ALNUM:
3520 while (scan < loceol && isALNUM(*scan))
3521 scan++;
3522 break;
a0ed51b3
LW
3523 case ALNUMUTF8:
3524 loceol = PL_regeol;
dfe13c55 3525 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3526 scan += UTF8SKIP(scan);
3527 hardcount++;
3528 }
3529 break;
bbce6d69 3530 case ALNUML:
3280af22 3531 PL_reg_flags |= RF_tainted;
bbce6d69 3532 while (scan < loceol && isALNUM_LC(*scan))
3533 scan++;
3534 break;
a0ed51b3
LW
3535 case ALNUMLUTF8:
3536 PL_reg_flags |= RF_tainted;
3537 loceol = PL_regeol;
dfe13c55 3538 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3539 scan += UTF8SKIP(scan);
3540 hardcount++;
3541 }
3542 break;
3543 break;
a0d0e21e
LW
3544 case NALNUM:
3545 while (scan < loceol && !isALNUM(*scan))
3546 scan++;
3547 break;
a0ed51b3
LW
3548 case NALNUMUTF8:
3549 loceol = PL_regeol;
dfe13c55 3550 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3551 scan += UTF8SKIP(scan);
3552 hardcount++;
3553 }
3554 break;
bbce6d69 3555 case NALNUML:
3280af22 3556 PL_reg_flags |= RF_tainted;
bbce6d69 3557 while (scan < loceol && !isALNUM_LC(*scan))
3558 scan++;
3559 break;
a0ed51b3
LW
3560 case NALNUMLUTF8:
3561 PL_reg_flags |= RF_tainted;
3562 loceol = PL_regeol;
dfe13c55 3563 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3564 scan += UTF8SKIP(scan);
3565 hardcount++;
3566 }
3567 break;
a0d0e21e
LW
3568 case SPACE:
3569 while (scan < loceol && isSPACE(*scan))
3570 scan++;
3571 break;
a0ed51b3
LW
3572 case SPACEUTF8:
3573 loceol = PL_regeol;
dfe13c55 3574 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3575 scan += UTF8SKIP(scan);
3576 hardcount++;
3577 }
3578 break;
bbce6d69 3579 case SPACEL:
3280af22 3580 PL_reg_flags |= RF_tainted;
bbce6d69 3581 while (scan < loceol && isSPACE_LC(*scan))
3582 scan++;
3583 break;
a0ed51b3
LW
3584 case SPACELUTF8:
3585 PL_reg_flags |= RF_tainted;
3586 loceol = PL_regeol;
dfe13c55 3587 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3588 scan += UTF8SKIP(scan);
3589 hardcount++;
3590 }
3591 break;
a0d0e21e
LW
3592 case NSPACE:
3593 while (scan < loceol && !isSPACE(*scan))
3594 scan++;
3595 break;
a0ed51b3
LW
3596 case NSPACEUTF8:
3597 loceol = PL_regeol;
dfe13c55 3598 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3599 scan += UTF8SKIP(scan);
3600 hardcount++;
3601 }
3602 break;
bbce6d69 3603 case NSPACEL:
3280af22 3604 PL_reg_flags |= RF_tainted;
bbce6d69 3605 while (scan < loceol && !isSPACE_LC(*scan))
3606 scan++;
3607 break;
a0ed51b3
LW
3608 case NSPACELUTF8:
3609 PL_reg_flags |= RF_tainted;
3610 loceol = PL_regeol;
dfe13c55 3611 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3612 scan += UTF8SKIP(scan);
3613 hardcount++;
3614 }
3615 break;
a0d0e21e
LW
3616 case DIGIT:
3617 while (scan < loceol && isDIGIT(*scan))
3618 scan++;
3619 break;
a0ed51b3
LW
3620 case DIGITUTF8:
3621 loceol = PL_regeol;
dfe13c55 3622 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3623 scan += UTF8SKIP(scan);
3624 hardcount++;
3625 }
3626 break;
3627 break;
a0d0e21e
LW
3628 case NDIGIT:
3629 while (scan < loceol && !isDIGIT(*scan))
3630 scan++;
3631 break;
a0ed51b3
LW
3632 case NDIGITUTF8:
3633 loceol = PL_regeol;
dfe13c55 3634 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3635 scan += UTF8SKIP(scan);
3636 hardcount++;
3637 }
3638 break;
a0d0e21e
LW
3639 default: /* Called on something of 0 width. */
3640 break; /* So match right here or not at all. */
3641 }
a687059c 3642
a0ed51b3
LW
3643 if (hardcount)
3644 c = hardcount;
3645 else
3646 c = scan - PL_reginput;
3280af22 3647 PL_reginput = scan;
a687059c 3648
c277df42
IZ
3649 DEBUG_r(
3650 {
3651 SV *prop = sv_newmortal();
3652
3653 regprop(prop, p);
3654 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3655 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3656 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3657 });
3658
a0d0e21e 3659 return(c);
a687059c
LW
3660}
3661
3662/*
c277df42
IZ
3663 - regrepeat_hard - repeatedly match something, report total lenth and length
3664 *
3665 * The repeater is supposed to have constant length.
3666 */
3667
76e3520e 3668STATIC I32
cea2e8a9 3669S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3670{
3671 register char *scan;
3672 register char *start;
3280af22 3673 register char *loceol = PL_regeol;
a0ed51b3 3674 I32 l = 0;
708e3b05 3675 I32 count = 0, res = 1;
a0ed51b3
LW
3676
3677 if (!max)
3678 return 0;
c277df42 3679
3280af22 3680 start = PL_reginput;
a0ed51b3 3681 if (UTF) {
708e3b05 3682 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3683 if (!count++) {
3684 l = 0;
3685 while (start < PL_reginput) {
3686 l++;
3687 start += UTF8SKIP(start);
3688 }
3689 *lp = l;
3690 if (l == 0)
3691 return max;
3692 }
3693 if (count == max)
3694 return count;
3695 }
3696 }
3697 else {
708e3b05 3698 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3699 if (!count++) {
3700 *lp = l = PL_reginput - start;
3701 if (max != REG_INFTY && l*max < loceol - scan)
3702 loceol = scan + l*max;
3703 if (l == 0)
3704 return max;
c277df42
IZ
3705 }
3706 }
3707 }
708e3b05 3708 if (!res)
3280af22 3709 PL_reginput = scan;
c277df42 3710
a0ed51b3 3711 return count;
c277df42
IZ
3712}
3713
3714/*
cb8d8820 3715 - reginclass - determine if a character falls into a character class
bbce6d69 3716 */
3717
76e3520e 3718STATIC bool
936ed897 3719S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3720{
b8c5462f 3721 char flags = ANYOF_FLAGS(p);
bbce6d69 3722 bool match = FALSE;
3723
3724 c &= 0xFF;
b8c5462f 3725 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3726 match = TRUE;